Coverage for src/midgy/render.py: 94%

237 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-10-04 10:04 -0700

1"""render builds the machinery to translate markdown documents to code.""" 

2 

3from dataclasses import dataclass, field 

4from functools import partial 

5from io import StringIO 

6from re import compile 

7from textwrap import dedent 

8 

9__all__ = () 

10 

11DOCTEST_CHAR, CONTINUATION_CHAR, COLON_CHAR, QUOTES_CHARS = 62, 92, 58, {39, 34} 

12DOCTEST_CHARS = DOCTEST_CHAR, DOCTEST_CHAR, DOCTEST_CHAR, 32 

13ESCAPE = {x: "\\" + x for x in "'\""} 

14ESCAPE_PATTERN = compile("[" + "".join(ESCAPE) + "]") 

15ELLIPSIS_CHARS = (ord("."),) * 3 + (32,) 

16escape = partial(ESCAPE_PATTERN.sub, lambda m: ESCAPE.get(m.group(0))) 

17 

18 

19# the Renderer is special markdown renderer designed to produce 

20# line for line transformations of markdown to the converted code. 

21# not all languages require this, but for python it matters. 

22@dataclass 

23class Renderer: 

24 """the base render system for markdown to code. 

25 

26 * tokenize & render markdown as code 

27 * line-for-line rendering 

28 * use indented code as fiducial markers for translation 

29 * augment the commonmark spec with shebang, doctest, code, and front_matter tokens 

30 * a reusable base class that underlies the python translation 

31 """ 

32 

33 from markdown_it import MarkdownIt 

34 

35 parser: object = field(default_factory=partial(MarkdownIt, "gfm-like")) 

36 cell_hr_length: int = 9 

37 include_code_fences: set = field(default_factory=set) 

38 include_indented_code: bool = True 

39 config_key: str = "py" 

40 

41 def __post_init__(self): 

42 from mdit_py_plugins import deflist, footnote 

43 

44 from .front_matter import _front_matter_lexer, _shebang_lexer 

45 

46 # our tangling system adds extra conventions to commonmark: 

47 ## extend indented code to recognize doctest syntax in-line 

48 ## replace the indented code lexer to recognize doctests and append metadata. 

49 ## recognize shebang lines at the beginning of a document. 

50 ## recognize front-matter at the beginning of document of following shebangs 

51 self.parser.block.ruler.before("code", "doctest", _doctest_lexer) 

52 self.parser.block.ruler.disable("code") 

53 self.parser.block.ruler.after("doctest", "code", _code_lexer) 

54 self.parser.block.ruler.before("table", "shebang", _shebang_lexer) 

55 self.parser.block.ruler.before("table", "front_matter", _front_matter_lexer) 

56 self.parser.use(footnote.footnote_plugin).use(deflist.deflist_plugin) 

57 

58 def code_block(self, token, env): 

59 if self.include_indented_code: 

60 yield from self.get_block(env, token.map[1]) 

61 

62 code_fence_block = code_block 

63 

64 @classmethod 

65 def code_from_string(cls, body, **kwargs): 

66 """render a string""" 

67 return cls(**kwargs).render(body) 

68 

69 def fence(self, token, env): 

70 if token.info in self.include_code_fences: 

71 return self.code_fence_block(token, env) 

72 method = getattr(self, f"fence_{token.info}", None) 

73 if method: 

74 return method(token, env) 

75 

76 def format(self, body): 

77 """a function that consumers can use to format their code""" 

78 return body 

79 

80 def get_block(self, env, stop=None): 

81 """iterate through the lines in a buffer""" 

82 if stop is None: 

83 yield from env["source"] 

84 else: 

85 while env["last_line"] < stop: 

86 yield self.readline(env) 

87 

88 def non_code(self, env, next=None): 

89 yield from self.get_block(env, next.map[0] if next else None) 

90 

91 def parse(self, src): 

92 return self.parser.parse(src) 

93 

94 def parse_cells(self, body, *, include_cell_hr=True): 

95 yield from ( 

96 x[0] for x in self.walk_cells(self.parse(body), include_cell_hr=include_cell_hr) 

97 ) 

98 

99 def print(self, iter, io): 

100 return print(*iter, file=io, sep="", end="") 

101 

102 def readline(self, env): 

103 try: 

104 return env["source"].readline() 

105 finally: 

106 env["last_line"] += 1 

107 

108 def render(self, src, format=False): 

109 tokens = self.parse(src) 

110 out = self.render_tokens(tokens, src=src) 

111 return self.format(out) if format else out 

112 

113 def render_cells(self, src, *, include_cell_hr=True): 

114 tokens = self.parse(src) 

115 self = self.renderer_from_tokens(tokens) 

116 prior = self._init_env(src, tokens) 

117 prior_token = None 

118 source = prior.pop("source") 

119 for block, next_token in self.walk_cells( 

120 tokens, env=prior, include_cell_hr=include_cell_hr 

121 ): 

122 env = self._init_env(src, block) 

123 env["source"], env["last_line"] = source, prior["last_line"] 

124 prior_token and block.insert(0, prior_token) 

125 yield self.render_tokens(block, env=env, stop=next_token) 

126 prior, prior_token = env, next_token 

127 

128 def render_lines(self, src): 

129 return dedent(self.render("".join(src))).splitlines(True) 

130 

131 def renderer_from_tokens(self, tokens): 

132 front_matter = self._get_front_matter(tokens) 

133 if front_matter: 

134 config = front_matter.get(self.config_key, None) 

135 if config: 

136 return type(self)(**config) 

137 return self 

138 

139 def render_tokens(self, tokens, env=None, src=None, stop=None): 

140 """render parsed markdown tokens""" 

141 target = StringIO() 

142 self = self.renderer_from_tokens(tokens) 

143 if env is None: 

144 env = self._init_env(src, tokens) 

145 

146 for generic, code in self._walk_code_blocks(tokens): 

147 # we walk pairs of tokens preceding code and the code token 

148 # the next code token is needed as a reference for indenting 

149 # non-code blocks that precede the code. 

150 env["next_code"] = code 

151 for token in generic: 

152 # walk the non-code tokens for any markers the class defines 

153 # renderers for. the renderer is responsible for taking of the 

154 # preceding non-code blocks, this feature is needed for any logical 

155 # rendering conditions. 

156 f = getattr(self, token.type, None) 

157 f and self.print(f(token, env) or "", target) 

158 if code: 

159 # format and print the preceding non-code block 

160 self.print(self.non_code(env, code), target) 

161 

162 # update the rendering environment 

163 env.update( 

164 last_indent=code.meta["last_indent"], 

165 ) 

166 

167 # format and print 

168 self.print(self.code_block(code, env), target) 

169 

170 # handle anything left in the buffer 

171 self.print(self.non_code(env, stop), target) 

172 

173 return target.getvalue() # return the value of the target, a format string. 

174 

175 def wrap_lines(self, lines, lead="", pre="", trail="", continuation=""): 

176 """a utility function to manipulate a buffer of content line-by-line.""" 

177 ws, any, continued = "", False, False 

178 for line in lines: 

179 LL = len(line.rstrip()) 

180 if LL: 

181 continued = line[LL - 1] == "\\" 

182 LL -= 1 * continued 

183 if any: 

184 yield ws 

185 else: 

186 for i, l in enumerate(StringIO(ws)): 

187 yield l[:-1] + continuation + l[-1] 

188 yield from (lead, line[:LL]) 

189 any, ws = True, line[LL:] 

190 lead = "" 

191 else: 

192 ws += line 

193 if any: 

194 yield trail 

195 if continued: 

196 for i, line in enumerate(StringIO(ws)): 

197 yield from (lead, line[:-1], i and "\\" or "", line[-1]) 

198 else: 

199 yield ws 

200 

201 def _init_env(self, src, tokens): 

202 env = dict(source=StringIO(src), last_line=0, min_indent=None, last_indent=0) 

203 include_doctest = getattr(self, "include_doctest", False) 

204 for token in tokens: 

205 doctest = False 

206 if token.type == "fence": 

207 if token.info in self.include_code_fences: 

208 env["min_indent"] = 0 

209 continue 

210 if include_doctest: 

211 doctest = token.info == "pycon" 

212 if doctest or (token.type == "code_block"): 

213 if env["min_indent"] is None: 

214 env["min_indent"] = token.meta["min_indent"] 

215 else: 

216 env["min_indent"] = min(env["min_indent"], token.meta["min_indent"]) 

217 

218 if env["min_indent"] is None: 

219 env["min_indent"] = 0 

220 return env 

221 

222 def _get_front_matter(self, tokens): 

223 for token in tokens: 

224 if token.type == "shebang": 

225 continue 

226 if token.type == "front_matter": 

227 from .front_matter import load 

228 

229 return load(token.content) 

230 return 

231 

232 def walk_cells(self, tokens, *, env=None, include_cell_hr=True): 

233 block = [] 

234 for token in tokens: 

235 if token.type == "hr": 

236 if (len(token.markup) - token.markup.count(" ")) > self.cell_hr_length: 

237 yield (list(block), token) 

238 block.clear() 

239 if include_cell_hr: 

240 block.append(token) 

241 elif env is not None: 

242 list(self.get_block(env, token)) 

243 else: 

244 block.append(token) 

245 if block: 

246 yield block, None 

247 

248 def _walk_code_blocks(self, tokens): 

249 prior = [] 

250 for token in tokens: 

251 if token.type == "code_block": 

252 yield list(prior), token 

253 prior.clear() 

254 else: 

255 prior.append(token) 

256 yield prior, None 

257 

258 del MarkdownIt 

259 

260 

261@dataclass 

262class DedentCodeBlock(Renderer): 

263 def code_block(self, token, env): 

264 ref = env["min_indent"] 

265 for line in self.get_block(env, token.map[1]): 

266 right = line.lstrip() 

267 if right: 

268 yield line[ref:] 

269 last = right 

270 else: 

271 yield line 

272 

273 

274def _code_lexer(state, start, end, silent=False): 

275 """a code lexer that tracks indents in the token and is aware of doctests""" 

276 if state.sCount[start] - state.blkIndent >= 4: 

277 first_indent, last_indent, next, last_line = 0, 0, start, start 

278 while next < end: 

279 if state.isEmpty(next): 

280 next += 1 

281 continue 

282 if state.sCount[next] - state.blkIndent >= 4: 

283 begin = state.bMarks[next] + state.tShift[next] 

284 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS: 

285 break 

286 if not first_indent: 

287 first_indent = state.sCount[next] 

288 last_indent, last_line = state.sCount[next], next 

289 next += 1 

290 else: 

291 break 

292 state.line = last_line + 1 

293 token = state.push("code_block", "code", 0) 

294 token.content = state.getLines(start, state.line, 4 + state.blkIndent, True) 

295 token.map = [start, state.line] 

296 min_indent = min( 

297 state.sCount[i] 

298 for i in range(start, state.line) 

299 if not state.isEmpty(i) and state.sCount[i] 

300 ) 

301 meta = dict( 

302 first_indent=first_indent, 

303 last_indent=last_indent, 

304 min_indent=min_indent, 

305 ) 

306 token.meta.update(meta) 

307 return True 

308 return False 

309 

310 

311def _doctest_lexer(state, startLine, end, silent=False): 

312 """a markdown-it-py plugin for doctests 

313 

314 doctest are a literate programming convention in python that we 

315 include in the pidgy grammar. this avoids a mixing python and doctest 

316 code together. 

317 

318 the doctest blocks: 

319 * extend the indented code blocks 

320 * do not conflict with blockquotes 

321 * are implicit code fences with the `pycon` info 

322 * can be replaced with explicit code blocks. 

323 """ 

324 start = state.bMarks[startLine] + state.tShift[startLine] 

325 

326 if (start - state.blkIndent) < 4: 

327 return False 

328 

329 if state.srcCharCode[start : start + 4] == DOCTEST_CHARS: 

330 lead, extra, output, closed = startLine, startLine + 1, startLine + 1, False 

331 indent, next = state.sCount[startLine], startLine + 1 

332 while next < end: 

333 if state.isEmpty(next): 

334 break 

335 if state.sCount[next] < indent: 

336 break 

337 begin = state.bMarks[next] + state.tShift[next] 

338 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS: 

339 break 

340 

341 next += 1 

342 if (not closed) and state.srcCharCode[begin : begin + 4] == ELLIPSIS_CHARS: 

343 extra = next 

344 else: 

345 closed = True 

346 output = next 

347 state.line = next 

348 token = state.push("fence", "code", 0) 

349 token.info = "pycon" 

350 token.content = state.getLines(startLine, next, 0, True) 

351 token.map = [startLine, state.line] 

352 token.meta.update( 

353 first_indent=indent, 

354 last_indent=indent, 

355 min_indent=indent, 

356 ) 

357 

358 token.meta.update(input=[lead, extra]) 

359 token.meta.update(output=[extra, output] if extra < output else None) 

360 

361 return True 

362 return False