build_a_blog: safer markdown

This commit is contained in:
Collin Lefeber 2024-06-19 16:08:40 -04:00
parent 3e06bfd9ae
commit 906a5e04ea
2 changed files with 65 additions and 21 deletions

19
main.py
View file

@ -14,12 +14,12 @@ from markdown.extensions.toc import TocExtension
destpath_re = re.compile(r'\.md$')
logging.basicConfig(encoding='utf-8', level=logging.INFO)
md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
cpu_count = os.cpu_count()
def convert(text):
md.reset()
return md.convert(text)
md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
res = md.convert(text)
return res, md.Meta
def render_post(fpath):
destpath = destpath_re.sub('.html', fpath)
@ -30,15 +30,16 @@ def render_post(fpath):
text = input_file.read()
logging.info("parsing %s", fpath)
out = convert(text)
out, meta = convert(text)
title = md.Meta.get('title')[0]
date = md.Meta.get('date')[0]
title = meta.get('title')[0]
date = meta.get('date')[0]
draft = False
if md.Meta.get('draft'):
if meta.get('draft'):
draft = True
out = convert('# ' + title) + out
title_out, _ = convert(text)
out = title_out + out
logging.info("writing to %s", destpath)
render_template('index.html.tmpl', destpath, {'content': out, 'more_title': ' - ' + title})
@ -105,7 +106,7 @@ def rss_post_xml(post):
text = inf.read()
converted = convert(text)
converted, _ = convert(text)
pubdate = email.utils.format_datetime(datetime.datetime.fromisoformat(post['date']))
subs = {

View file

@ -812,7 +812,7 @@ So here is the diff to make that happen:
```diff
diff --git a/main.py b/main.py
index 52ce57b..95b650d 100644
index 52ce57b..a3d92d1 100644
--- a/main.py
+++ b/main.py
@@ -1,9 +1,11 @@
@ -827,15 +827,45 @@ index 52ce57b..95b650d 100644
from string import Template
import markdown
@@ -13,6 +15,7 @@ destpath_re = re.compile(r'\.md$')
@@ -12,11 +14,12 @@ from markdown.extensions.toc import TocExtension
destpath_re = re.compile(r'\.md$')
logging.basicConfig(encoding='utf-8', level=logging.INFO)
md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
-md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
+cpu_count = os.cpu_count()
def convert(text):
md.reset()
@@ -52,11 +55,11 @@ def render_posts():
- md.reset()
- return md.convert(text)
+ md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
+ res = md.convert(text)
+ return res, md.Meta
def render_post(fpath):
destpath = destpath_re.sub('.html', fpath)
@@ -27,15 +30,16 @@ def render_post(fpath):
text = input_file.read()
logging.info("parsing %s", fpath)
- out = convert(text)
+ out, meta = convert(text)
- title = md.Meta.get('title')[0]
- date = md.Meta.get('date')[0]
+ title = meta.get('title')[0]
+ date = meta.get('date')[0]
draft = False
- if md.Meta.get('draft'):
+ if meta.get('draft'):
draft = True
- out = convert('# ' + title) + out
+ title_out, _ = convert(text)
+ out = title_out + out
logging.info("writing to %s", destpath)
render_template('index.html.tmpl', destpath, {'content': out, 'more_title': ' - ' + title})
@@ -52,11 +56,11 @@ def render_posts():
files = glob.glob('posts/*.md')
logging.info('found post files %s', files)
posts = []
@ -851,8 +881,21 @@ index 52ce57b..95b650d 100644
return posts
def posts_list_html(posts):
@@ -102,7 +106,7 @@ def rss_post_xml(post):
text = inf.read()
- converted = convert(text)
+ converted, _ = convert(text)
pubdate = email.utils.format_datetime(datetime.datetime.fromisoformat(post['date']))
subs = {
```
The biggest note is that `convert()` now creates a `Markdown` instance on each call. This protects against multiple processes trying to use the same module level `md`
See <https://python-markdown.github.io/reference/#Markdown> for notes on how `Markdown.reset()` and thread safety.
And re-run the benchmarks:
```shell
@ -862,9 +905,9 @@ INFO: removing old __bench files
INFO: number of *.md files 102
INFO: number of *.html files 2
INFO: running
real 0.27
user 1.82
sys 0.15
real 0.45
user 4.21
sys 0.32
INFO: number of *.html files 102
INFO: cleanup __bench files
@ -873,14 +916,14 @@ INFO: removing old __bench files
INFO: number of *.md files 1002
INFO: number of *.html files 2
INFO: running
real 1.25
user 16.68
sys 0.52
real 2.52
user 35.69
sys 0.96
INFO: number of *.html files 1002
INFO: cleanup __bench files
```
Down to 1.25s for 1000 posts 🎉
So that's down to 2.5s for 1000 posts. Not a bad start!
[1]: https://crystal-lang.org/
[2]: https://github.com/crystal-lang/crystal/releases/tag/0.31.0