Coverage for src/midgy/render.py: 94%
237 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-10-04 10:04 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-10-04 10:04 -0700
1"""render builds the machinery to translate markdown documents to code."""
3from dataclasses import dataclass, field
4from functools import partial
5from io import StringIO
6from re import compile
7from textwrap import dedent
9__all__ = ()
11DOCTEST_CHAR, CONTINUATION_CHAR, COLON_CHAR, QUOTES_CHARS = 62, 92, 58, {39, 34}
12DOCTEST_CHARS = DOCTEST_CHAR, DOCTEST_CHAR, DOCTEST_CHAR, 32
13ESCAPE = {x: "\\" + x for x in "'\""}
14ESCAPE_PATTERN = compile("[" + "".join(ESCAPE) + "]")
15ELLIPSIS_CHARS = (ord("."),) * 3 + (32,)
16escape = partial(ESCAPE_PATTERN.sub, lambda m: ESCAPE.get(m.group(0)))
19# the Renderer is special markdown renderer designed to produce
20# line for line transformations of markdown to the converted code.
21# not all languages require this, but for python it matters.
22@dataclass
23class Renderer:
24 """the base render system for markdown to code.
26 * tokenize & render markdown as code
27 * line-for-line rendering
28 * use indented code as fiducial markers for translation
29 * augment the commonmark spec with shebang, doctest, code, and front_matter tokens
30 * a reusable base class that underlies the python translation
31 """
33 from markdown_it import MarkdownIt
35 parser: object = field(default_factory=partial(MarkdownIt, "gfm-like"))
36 cell_hr_length: int = 9
37 include_code_fences: set = field(default_factory=set)
38 include_indented_code: bool = True
39 config_key: str = "py"
41 def __post_init__(self):
42 from mdit_py_plugins import deflist, footnote
44 from .front_matter import _front_matter_lexer, _shebang_lexer
46 # our tangling system adds extra conventions to commonmark:
47 ## extend indented code to recognize doctest syntax in-line
48 ## replace the indented code lexer to recognize doctests and append metadata.
49 ## recognize shebang lines at the beginning of a document.
50 ## recognize front-matter at the beginning of document of following shebangs
51 self.parser.block.ruler.before("code", "doctest", _doctest_lexer)
52 self.parser.block.ruler.disable("code")
53 self.parser.block.ruler.after("doctest", "code", _code_lexer)
54 self.parser.block.ruler.before("table", "shebang", _shebang_lexer)
55 self.parser.block.ruler.before("table", "front_matter", _front_matter_lexer)
56 self.parser.use(footnote.footnote_plugin).use(deflist.deflist_plugin)
58 def code_block(self, token, env):
59 if self.include_indented_code:
60 yield from self.get_block(env, token.map[1])
62 code_fence_block = code_block
64 @classmethod
65 def code_from_string(cls, body, **kwargs):
66 """render a string"""
67 return cls(**kwargs).render(body)
69 def fence(self, token, env):
70 if token.info in self.include_code_fences:
71 return self.code_fence_block(token, env)
72 method = getattr(self, f"fence_{token.info}", None)
73 if method:
74 return method(token, env)
76 def format(self, body):
77 """a function that consumers can use to format their code"""
78 return body
80 def get_block(self, env, stop=None):
81 """iterate through the lines in a buffer"""
82 if stop is None:
83 yield from env["source"]
84 else:
85 while env["last_line"] < stop:
86 yield self.readline(env)
88 def non_code(self, env, next=None):
89 yield from self.get_block(env, next.map[0] if next else None)
91 def parse(self, src):
92 return self.parser.parse(src)
94 def parse_cells(self, body, *, include_cell_hr=True):
95 yield from (
96 x[0] for x in self.walk_cells(self.parse(body), include_cell_hr=include_cell_hr)
97 )
99 def print(self, iter, io):
100 return print(*iter, file=io, sep="", end="")
102 def readline(self, env):
103 try:
104 return env["source"].readline()
105 finally:
106 env["last_line"] += 1
108 def render(self, src, format=False):
109 tokens = self.parse(src)
110 out = self.render_tokens(tokens, src=src)
111 return self.format(out) if format else out
113 def render_cells(self, src, *, include_cell_hr=True):
114 tokens = self.parse(src)
115 self = self.renderer_from_tokens(tokens)
116 prior = self._init_env(src, tokens)
117 prior_token = None
118 source = prior.pop("source")
119 for block, next_token in self.walk_cells(
120 tokens, env=prior, include_cell_hr=include_cell_hr
121 ):
122 env = self._init_env(src, block)
123 env["source"], env["last_line"] = source, prior["last_line"]
124 prior_token and block.insert(0, prior_token)
125 yield self.render_tokens(block, env=env, stop=next_token)
126 prior, prior_token = env, next_token
128 def render_lines(self, src):
129 return dedent(self.render("".join(src))).splitlines(True)
131 def renderer_from_tokens(self, tokens):
132 front_matter = self._get_front_matter(tokens)
133 if front_matter:
134 config = front_matter.get(self.config_key, None)
135 if config:
136 return type(self)(**config)
137 return self
139 def render_tokens(self, tokens, env=None, src=None, stop=None):
140 """render parsed markdown tokens"""
141 target = StringIO()
142 self = self.renderer_from_tokens(tokens)
143 if env is None:
144 env = self._init_env(src, tokens)
146 for generic, code in self._walk_code_blocks(tokens):
147 # we walk pairs of tokens preceding code and the code token
148 # the next code token is needed as a reference for indenting
149 # non-code blocks that precede the code.
150 env["next_code"] = code
151 for token in generic:
152 # walk the non-code tokens for any markers the class defines
153 # renderers for. the renderer is responsible for taking of the
154 # preceding non-code blocks, this feature is needed for any logical
155 # rendering conditions.
156 f = getattr(self, token.type, None)
157 f and self.print(f(token, env) or "", target)
158 if code:
159 # format and print the preceding non-code block
160 self.print(self.non_code(env, code), target)
162 # update the rendering environment
163 env.update(
164 last_indent=code.meta["last_indent"],
165 )
167 # format and print
168 self.print(self.code_block(code, env), target)
170 # handle anything left in the buffer
171 self.print(self.non_code(env, stop), target)
173 return target.getvalue() # return the value of the target, a format string.
175 def wrap_lines(self, lines, lead="", pre="", trail="", continuation=""):
176 """a utility function to manipulate a buffer of content line-by-line."""
177 ws, any, continued = "", False, False
178 for line in lines:
179 LL = len(line.rstrip())
180 if LL:
181 continued = line[LL - 1] == "\\"
182 LL -= 1 * continued
183 if any:
184 yield ws
185 else:
186 for i, l in enumerate(StringIO(ws)):
187 yield l[:-1] + continuation + l[-1]
188 yield from (lead, line[:LL])
189 any, ws = True, line[LL:]
190 lead = ""
191 else:
192 ws += line
193 if any:
194 yield trail
195 if continued:
196 for i, line in enumerate(StringIO(ws)):
197 yield from (lead, line[:-1], i and "\\" or "", line[-1])
198 else:
199 yield ws
201 def _init_env(self, src, tokens):
202 env = dict(source=StringIO(src), last_line=0, min_indent=None, last_indent=0)
203 include_doctest = getattr(self, "include_doctest", False)
204 for token in tokens:
205 doctest = False
206 if token.type == "fence":
207 if token.info in self.include_code_fences:
208 env["min_indent"] = 0
209 continue
210 if include_doctest:
211 doctest = token.info == "pycon"
212 if doctest or (token.type == "code_block"):
213 if env["min_indent"] is None:
214 env["min_indent"] = token.meta["min_indent"]
215 else:
216 env["min_indent"] = min(env["min_indent"], token.meta["min_indent"])
218 if env["min_indent"] is None:
219 env["min_indent"] = 0
220 return env
222 def _get_front_matter(self, tokens):
223 for token in tokens:
224 if token.type == "shebang":
225 continue
226 if token.type == "front_matter":
227 from .front_matter import load
229 return load(token.content)
230 return
232 def walk_cells(self, tokens, *, env=None, include_cell_hr=True):
233 block = []
234 for token in tokens:
235 if token.type == "hr":
236 if (len(token.markup) - token.markup.count(" ")) > self.cell_hr_length:
237 yield (list(block), token)
238 block.clear()
239 if include_cell_hr:
240 block.append(token)
241 elif env is not None:
242 list(self.get_block(env, token))
243 else:
244 block.append(token)
245 if block:
246 yield block, None
248 def _walk_code_blocks(self, tokens):
249 prior = []
250 for token in tokens:
251 if token.type == "code_block":
252 yield list(prior), token
253 prior.clear()
254 else:
255 prior.append(token)
256 yield prior, None
258 del MarkdownIt
261@dataclass
262class DedentCodeBlock(Renderer):
263 def code_block(self, token, env):
264 ref = env["min_indent"]
265 for line in self.get_block(env, token.map[1]):
266 right = line.lstrip()
267 if right:
268 yield line[ref:]
269 last = right
270 else:
271 yield line
274def _code_lexer(state, start, end, silent=False):
275 """a code lexer that tracks indents in the token and is aware of doctests"""
276 if state.sCount[start] - state.blkIndent >= 4:
277 first_indent, last_indent, next, last_line = 0, 0, start, start
278 while next < end:
279 if state.isEmpty(next):
280 next += 1
281 continue
282 if state.sCount[next] - state.blkIndent >= 4:
283 begin = state.bMarks[next] + state.tShift[next]
284 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS:
285 break
286 if not first_indent:
287 first_indent = state.sCount[next]
288 last_indent, last_line = state.sCount[next], next
289 next += 1
290 else:
291 break
292 state.line = last_line + 1
293 token = state.push("code_block", "code", 0)
294 token.content = state.getLines(start, state.line, 4 + state.blkIndent, True)
295 token.map = [start, state.line]
296 min_indent = min(
297 state.sCount[i]
298 for i in range(start, state.line)
299 if not state.isEmpty(i) and state.sCount[i]
300 )
301 meta = dict(
302 first_indent=first_indent,
303 last_indent=last_indent,
304 min_indent=min_indent,
305 )
306 token.meta.update(meta)
307 return True
308 return False
311def _doctest_lexer(state, startLine, end, silent=False):
312 """a markdown-it-py plugin for doctests
314 doctest are a literate programming convention in python that we
315 include in the pidgy grammar. this avoids a mixing python and doctest
316 code together.
318 the doctest blocks:
319 * extend the indented code blocks
320 * do not conflict with blockquotes
321 * are implicit code fences with the `pycon` info
322 * can be replaced with explicit code blocks.
323 """
324 start = state.bMarks[startLine] + state.tShift[startLine]
326 if (start - state.blkIndent) < 4:
327 return False
329 if state.srcCharCode[start : start + 4] == DOCTEST_CHARS:
330 lead, extra, output, closed = startLine, startLine + 1, startLine + 1, False
331 indent, next = state.sCount[startLine], startLine + 1
332 while next < end:
333 if state.isEmpty(next):
334 break
335 if state.sCount[next] < indent:
336 break
337 begin = state.bMarks[next] + state.tShift[next]
338 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS:
339 break
341 next += 1
342 if (not closed) and state.srcCharCode[begin : begin + 4] == ELLIPSIS_CHARS:
343 extra = next
344 else:
345 closed = True
346 output = next
347 state.line = next
348 token = state.push("fence", "code", 0)
349 token.info = "pycon"
350 token.content = state.getLines(startLine, next, 0, True)
351 token.map = [startLine, state.line]
352 token.meta.update(
353 first_indent=indent,
354 last_indent=indent,
355 min_indent=indent,
356 )
358 token.meta.update(input=[lead, extra])
359 token.meta.update(output=[extra, output] if extra < output else None)
361 return True
362 return False