Coverage for src/midgy/render.py: 97%
147 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-02 16:08 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-02 16:08 -0800
1"""render builds the machinery to translate markdown documents to code."""
3from dataclasses import dataclass, field
4from functools import partial
5from io import StringIO
6from re import compile
8__all__ = ()
10DOCTEST_CHAR, CONTINUATION_CHAR, COLON_CHAR, QUOTES_CHARS = 62, 92, 58, {39, 34}
11BLOCK, FENCE, PYCON = "code_block", "fence", "pycon"
12ESCAPE = {x: "\\" + x for x in "'\""}
13ESCAPE_PATTERN = compile("[" + "".join(ESCAPE) + "]")
14escape = partial(ESCAPE_PATTERN.sub, lambda m: ESCAPE.get(m.group(0)))
15SP, QUOTES = chr(32), (chr(34) * 3, chr(39) * 3)
18# the Renderer is special markdown renderer designed to produce
19# line for line transformations of markdown to the converted code.
20# not all languages require this, but for python it matters.
21@dataclass
22class Renderer:
23 """the base render system for markdown to code.
25 * tokenize & render markdown as code
26 * line-for-line rendering
27 * use indented code as fiducial markers for translation
28 * augment the commonmark spec with shebang, doctest, code, and front_matter tokens
29 * a reusable base class that underlies the python translation
30 """
32 parser: object = None
33 cell_hr_length: int = 9
34 include_code: bool = True # the nuclear option
35 include_code_fences: set = field(default_factory=set)
36 include_indented_code: bool = True
37 include_doctest: bool = False
38 config_key: str = "py"
40 def __post_init__(self):
41 self.parser = self.get_parser()
43 @classmethod
44 def code_from_string(cls, body, **kwargs):
45 """render a string"""
46 return cls(**kwargs).render(body)
48 def get_block(self, env, stop=None):
49 """iterate through the lines in a buffer"""
50 if stop is None:
51 yield from env["source"]
52 else:
53 while env["last_line"] < stop:
54 yield self.readline(env)
56 def get_cells(self, tokens, *, env=None, include_hr=True):
57 """walk cells separated by mega-hrs"""
58 block = []
59 for token in tokens:
60 if token.type == "hr":
61 if (len(token.markup) - token.markup.count(" ")) > self.cell_hr_length:
62 yield (list(block), token)
63 block.clear()
64 if include_hr:
65 block.append(token)
66 elif env is not None:
67 list(self.get_block(env, token))
68 else:
69 block.append(token)
70 if block:
71 yield block, None
73 def get_front_matter(self, tokens):
74 for token in tokens:
75 if token.type == "shebang":
76 continue
77 if token.type == "front_matter":
78 from .front_matter import load
80 if "data" in token.meta:
81 return token.meta["data"]
82 return token.meta.setdefault("data", load(token.content))
83 return
85 def get_initial_env(self, src, tokens):
86 """initialize the parser environment indents"""
87 env = dict(source=StringIO(src), last_line=0, last_indent=0)
88 for token in filter(self.is_code_block, tokens): # iterate through the tokens
89 env["min_indent"] = min(env.get("min_indent", 9999), token.meta["min_indent"])
90 env.setdefault("min_indent", 0)
91 return env
93 def get_parser(self):
94 from markdown_it import MarkdownIt
96 parser = MarkdownIt("gfm-like", options_update=dict(inline_definitions=True, langPrefix=""))
97 return self.set_parser_defaults(parser)
99 def get_updated_env(self, token, env, **kwargs):
100 """update the state of the environment"""
101 left = token.content.rstrip()
102 env.update(
103 continued=left.endswith("\\"),
104 colon_block=left.endswith(":"),
105 quoted_block=left.endswith(QUOTES),
106 )
107 env.update(kwargs)
109 def is_code_block(self, token):
110 """is the token a code block entry"""
111 if self.include_code:
112 if token.type == BLOCK:
113 if token.meta["is_doctest"]:
114 return self.include_doctest
115 return self.include_indented_code
116 elif token.type == FENCE:
117 if token.info in self.include_code_fences:
118 return True
119 if token.info == PYCON:
120 return self.include_doctest
121 return False
123 def non_code(self, env, next=None):
124 yield from self.get_block(env, next.map[0] if next else None)
125 if next:
126 env.update(last_indent=next.meta.get("last_indent", 0))
128 def parse(self, src):
129 return self.parser.parse(src)
131 def parse_cells(self, body, *, include_hr=True):
132 yield from (x[0] for x in self.get_cells(self.parse(body), include_hr=include_hr))
134 def print(self, iter, io):
135 return print(*iter, file=io, sep="", end="")
137 def readline(self, env):
138 try:
139 return env["source"].readline()
140 finally:
141 env["last_line"] += 1
143 def render(self, src):
144 return self.render_tokens(self.parse(src), src=src)
146 def render_cells(self, src, *, include_hr=True):
147 # cells allow different parsers in a single pass
148 tokens = self.parse(src)
149 self = self.renderer_from_tokens(tokens)
150 prior = self.get_initial_env(src, tokens)
151 prior_token = None
152 source = prior.pop("source")
154 for block, next_token in self.get_cells(tokens, env=prior, include_hr=include_hr):
155 env = self.get_initial_env(src, block)
156 env["source"], env["last_line"] = source, prior["last_line"]
157 prior_token and block.insert(0, prior_token)
158 yield self.render_tokens(block, env=env, stop=next_token)
159 prior, prior_token = env, next_token
161 def render_token(self, token, env):
162 if token:
163 method = getattr(self, token.type, None)
164 if method:
165 yield from method(token, env) or ()
167 def render_tokens(self, tokens, env=None, src=None, stop=None, target=None):
168 """render parsed markdown tokens"""
169 if target is None:
170 target = StringIO()
171 self = self.renderer_from_tokens(tokens)
172 if env is None:
173 env = self.get_initial_env(src, tokens)
174 for token in tokens:
175 if self.is_code_block(token):
176 env["next_code"] = token
177 self.print(self.render_token(token, env), target)
178 # handle anything left in the buffer
179 self.print(self.non_code(env, stop), target)
180 return target.getvalue() # return the value of the target, a format string.
182 def renderer_from_tokens(self, tokens):
183 front_matter = self.get_front_matter(tokens)
184 if front_matter:
185 # front matter can reconfigure the parser and make a new one
186 config = {k: getattr(self, k) for k in self.__dataclass_fields__}
187 config.update(front_matter.get(self.config_key, {}))
188 if config:
189 return type(self)(**config)
190 return self
192 def set_parser_defaults(self, parser):
193 # our tangling system adds extra conventions to commonmark:
194 ## extend indented code to recognize doctest syntax in-line
195 ## replace the indented code lexer to recognize doctests and append metadata.
196 ## recognize shebang lines at the beginning of a document.
197 ## recognize front-matter at the beginning of document of following shebangs
198 from mdit_py_plugins import deflist, footnote
199 from .front_matter import _front_matter_lexer, _shebang_lexer
200 from .lexers import code_fence_lexer, doctest_lexer, code_lexer
202 parser.block.ruler.before("code", "doctest", doctest_lexer)
203 parser.block.ruler.disable("code")
204 # our indented code captures doctests in indented blocks
205 parser.block.ruler.after("doctest", "code", code_lexer)
206 parser.disable(FENCE)
207 # our code fence captures indent information
208 parser.block.ruler.after("code", FENCE, code_fence_lexer)
209 # shebang because this markdown is code
210 parser.block.ruler.before("table", "shebang", _shebang_lexer)
211 parser.block.ruler.before("table", "front_matter", _front_matter_lexer)
212 parser.use(footnote.footnote_plugin).use(deflist.deflist_plugin)
213 parser.disable("footnote_tail")
214 return parser