From 906a5e04eaa76ab2781b0807afb78847f218c767 Mon Sep 17 00:00:00 2001 From: Collin Lefeber Date: Wed, 19 Jun 2024 16:08:40 -0400 Subject: [PATCH] build_a_blog: safer markdown --- main.py | 19 ++++++------ posts/build_a_blog.md | 67 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/main.py b/main.py index d88c8c2..a3d92d1 100644 --- a/main.py +++ b/main.py @@ -14,12 +14,12 @@ from markdown.extensions.toc import TocExtension destpath_re = re.compile(r'\.md$') logging.basicConfig(encoding='utf-8', level=logging.INFO) -md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)]) cpu_count = os.cpu_count() def convert(text): - md.reset() - return md.convert(text) + md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)]) + res = md.convert(text) + return res, md.Meta def render_post(fpath): destpath = destpath_re.sub('.html', fpath) @@ -30,15 +30,16 @@ def render_post(fpath): text = input_file.read() logging.info("parsing %s", fpath) - out = convert(text) + out, meta = convert(text) - title = md.Meta.get('title')[0] - date = md.Meta.get('date')[0] + title = meta.get('title')[0] + date = meta.get('date')[0] draft = False - if md.Meta.get('draft'): + if meta.get('draft'): draft = True - out = convert('# ' + title) + out + title_out, _ = convert(text) + out = title_out + out logging.info("writing to %s", destpath) render_template('index.html.tmpl', destpath, {'content': out, 'more_title': ' - ' + title}) @@ -105,7 +106,7 @@ def rss_post_xml(post): text = inf.read() - converted = convert(text) + converted, _ = convert(text) pubdate = email.utils.format_datetime(datetime.datetime.fromisoformat(post['date'])) subs = { diff --git a/posts/build_a_blog.md b/posts/build_a_blog.md index f528154..ed8e807 100644 --- a/posts/build_a_blog.md +++ b/posts/build_a_blog.md @@ -812,7 +812,7 @@ So here is the diff to make that happen: ```diff diff --git a/main.py b/main.py -index 52ce57b..95b650d 100644 +index 52ce57b..a3d92d1 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,11 @@ @@ -827,15 +827,45 @@ index 52ce57b..95b650d 100644 from string import Template import markdown -@@ -13,6 +15,7 @@ destpath_re = re.compile(r'\.md$') +@@ -12,11 +14,12 @@ from markdown.extensions.toc import TocExtension + destpath_re = re.compile(r'\.md$') logging.basicConfig(encoding='utf-8', level=logging.INFO) - md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)]) +-md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)]) +cpu_count = os.cpu_count() def convert(text): - md.reset() -@@ -52,11 +55,11 @@ def render_posts(): +- md.reset() +- return md.convert(text) ++ md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)]) ++ res = md.convert(text) ++ return res, md.Meta + + def render_post(fpath): + destpath = destpath_re.sub('.html', fpath) +@@ -27,15 +30,16 @@ def render_post(fpath): + text = input_file.read() + + logging.info("parsing %s", fpath) +- out = convert(text) ++ out, meta = convert(text) + +- title = md.Meta.get('title')[0] +- date = md.Meta.get('date')[0] ++ title = meta.get('title')[0] ++ date = meta.get('date')[0] + draft = False +- if md.Meta.get('draft'): ++ if meta.get('draft'): + draft = True + +- out = convert('# ' + title) + out ++ title_out, _ = convert(text) ++ out = title_out + out + + logging.info("writing to %s", destpath) + render_template('index.html.tmpl', destpath, {'content': out, 'more_title': ' - ' + title}) +@@ -52,11 +56,11 @@ def render_posts(): files = glob.glob('posts/*.md') logging.info('found post files %s', files) posts = [] @@ -851,8 +881,21 @@ index 52ce57b..95b650d 100644 return posts def posts_list_html(posts): +@@ -102,7 +106,7 @@ def rss_post_xml(post): + text = inf.read() + + +- converted = convert(text) ++ converted, _ = convert(text) + + pubdate = email.utils.format_datetime(datetime.datetime.fromisoformat(post['date'])) + subs = { ``` +The biggest note is that `convert()` now creates a `Markdown` instance on each call. This protects against multiple processes trying to use the same module level `md` + +See for notes on how `Markdown.reset()` and thread safety. + And re-run the benchmarks: ```shell @@ -862,9 +905,9 @@ INFO: removing old __bench files INFO: number of *.md files 102 INFO: number of *.html files 2 INFO: running -real 0.27 -user 1.82 -sys 0.15 +real 0.45 +user 4.21 +sys 0.32 INFO: number of *.html files 102 INFO: cleanup __bench files @@ -873,14 +916,14 @@ INFO: removing old __bench files INFO: number of *.md files 1002 INFO: number of *.html files 2 INFO: running -real 1.25 -user 16.68 -sys 0.52 +real 2.52 +user 35.69 +sys 0.96 INFO: number of *.html files 1002 INFO: cleanup __bench files ``` -Down to 1.25s for 1000 posts 🎉 +So that's down to 2.5s for 1000 posts. Not a bad start! [1]: https://crystal-lang.org/ [2]: https://github.com/crystal-lang/crystal/releases/tag/0.31.0