import collections
import json
import pathlib
import re
from typing import Optional, Tuple, Union
import nbformat # type: ignore
import unidecode # type: ignore
from nbconvert.preprocessors import ExecutePreprocessor # type: ignore
TAGS_REGEX_PATTERNS_TO_IGNORE = ["hide", r"score:\d"]
SOLUTION_REGEX = re.compile(
r"### BEGIN SOLUTION[\s\S](.*?)[\s\S]### END SOLUTION", re.DOTALL
)
SOLUTION_REPL = r"""### BEGIN SOLUTION
### END SOLUTION"""
UNIVERSAL_REGEX = re.compile(r".", re.DOTALL)
ANSWER_TAG_REGEX = r"answer:*"
SCORE_REGEX = re.compile(r"score:(\d+)")
DESCRIPTION_REGEX = re.compile(r"description:(.*)")
[docs]
def read(nb_path: Union[pathlib.Path, str], as_version: int = 4) -> dict:
"""
Read a jupyter notebook file at `nb_path`.
Returns the python `dict` representation.
"""
with open(nb_path, "r") as f:
try:
nb = nbformat.read(f, as_version=as_version)
except nbformat.reader.NotJSONError:
return {}
return nb
[docs]
def remove_cells(
nb_node, tags_regex_patterns_to_ignore=None, solution_regex=None, solution_repl=None
): # TODO Add typing to this function
"""
Given a dictionary representation of a notebook, removes:
- Cells with tags matching patterns in `tags_regex_patterns_to_ignore`
- Text in cells matching the `solution_regex` pattern.
Returns the python `dict` representation.
"""
if tags_regex_patterns_to_ignore is None:
tags_regex_patterns_to_ignore = TAGS_REGEX_PATTERNS_TO_IGNORE
if solution_regex is None:
solution_regex = SOLUTION_REGEX
if solution_repl is None:
solution_repl = SOLUTION_REPL
cells = []
for cell in nb_node["cells"]:
if "tags" not in cell["metadata"] or all(
not bool(re.match(pattern=pattern, string=tag))
for tag in cell["metadata"]["tags"]
for pattern in tags_regex_patterns_to_ignore
):
try:
source = unidecode.unidecode("".join(cell["source"]))
new_source = re.sub(
pattern=solution_regex, repl=solution_repl, string=source
)
cell["source"] = new_source
if solution_repl in cell["source"]:
if "outputs" in cell.keys():
cell["outputs"] = []
except KeyError: # pragma: no cover
pass # TODO Add test coverage for this statement
cells.append(cell)
nb_node["cells"] = cells
return nb_node
[docs]
def write(output_path: pathlib.Path, nb_node: dict):
"""
Write the python dict representation of a notebook to `output_path`.
"""
output_path.write_text(json.dumps(nb_node))
[docs]
def add_checks(nb_node: dict, source_nb_node: dict, answer_tag_regex=None) -> dict:
"""
Given a `nb_node` and a source `source_nb_node`, add the cells in
`nb_node` with tags matching `answer_tag_regex` to `source_nb_node`
This is used to add a student's answers to the source notebook.
"""
if answer_tag_regex is None:
answer_tag_regex = ANSWER_TAG_REGEX
answers = {
tag: cell
for cell in nb_node["cells"]
for tag in cell["metadata"].get("tags", [])
if bool(re.match(pattern=answer_tag_regex, string=tag))
}
for i, cell in enumerate(source_nb_node["cells"]):
for tag in cell["metadata"].get("tags", []):
if tag in answers:
source_nb_node["cells"][i] = answers[tag]
elif bool(re.match(pattern=answer_tag_regex, string=tag)):
source_nb_node["cells"][i]["source"] = ""
return source_nb_node
[docs]
def get_score(cell: dict, score_regex_pattern=None) -> int:
"""
Given a `cell` of a notebook, return the score as defined by the
`score_regex_pattern`.
"""
if score_regex_pattern is None:
score_regex_pattern = SCORE_REGEX
tags = get_tags(cell)
if tags != "":
search = re.search(pattern=score_regex_pattern, string=tags)
try:
return int(search.group(1)) # type: ignore
except AttributeError:
return None
return None
[docs]
def get_description(
cell: dict, description_regex_pattern=None, tag_seperator: str = "|"
) -> str:
"""
Given a `cell` of a notebook, return the description as defined by the
`description_regex_pattern`.
"""
if description_regex_pattern is None:
description_regex_pattern = DESCRIPTION_REGEX
tags = get_tags(cell, tag_seperator=tag_seperator)
if tags != "":
for tag in tags.split(tag_seperator):
search = re.search(pattern=description_regex_pattern, string=tag)
try:
return search.group(1).replace("-", " ").capitalize() # type: ignore
except AttributeError:
pass
return ""
[docs]
def check(
nb_node: dict,
timeout: int = 600,
score_regex_pattern=None,
answer_tag_pattern=None,
) -> Tuple[Optional[int], Optional[int], str, dict]:
"""
Given a `nb_node`, it executes the notebook and keep track of the score.
This returns 4 things:
- The student score
- The total score obtainable
- Some feedback in markdown format
- A dictionary mapping check description tags to a boolean
"""
if score_regex_pattern is None:
score_regex_pattern = SCORE_REGEX
if answer_tag_pattern is None:
answer_tag_pattern = ANSWER_TAG_REGEX
ep = ExecutePreprocessor(timeout=timeout, allow_errors=True)
ep.preprocess(nb_node)
total_score = 0
maximum_score = 0
passed_check = {}
feedback_md = ""
for cell in nb_node["cells"]:
answer_tags = get_tags(cell=cell, tag_regex=answer_tag_pattern)
if (
answer_tags is not None
and len(get_tags(cell=cell, tag_regex=answer_tag_pattern)) > 0
):
feedback_md += f"""
---
## {answer_tags}
"""
# TODO Use the walrus operator here.
if get_score(cell, score_regex_pattern=score_regex_pattern) is not None:
score = get_score(cell)
description = get_description(cell)
if description != "":
feedback_md += f"""
### {description}
"""
maximum_score += score
try:
outputs = cell["outputs"][0]
if outputs["output_type"] == "error":
# TODO Add something here that outputs the error to a log.
# Do this for errors that are not expected so not NbChkr...
# errors.
question_feedback = outputs["evalue"]
passed_check[description] = False
feedback_md += f"""
{question_feedback}
0 / {score}
"""
except (KeyError, IndexError):
feedback_md += f"""
{score} / {score}
"""
total_score += score
passed_check[description] = True
return total_score, maximum_score, feedback_md, passed_check