build_a_blog: safer markdown
This commit is contained in:
parent
3e06bfd9ae
commit
906a5e04ea
2 changed files with 65 additions and 21 deletions
19
main.py
19
main.py
|
@ -14,12 +14,12 @@ from markdown.extensions.toc import TocExtension
|
||||||
destpath_re = re.compile(r'\.md$')
|
destpath_re = re.compile(r'\.md$')
|
||||||
logging.basicConfig(encoding='utf-8', level=logging.INFO)
|
logging.basicConfig(encoding='utf-8', level=logging.INFO)
|
||||||
|
|
||||||
md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
|
|
||||||
cpu_count = os.cpu_count()
|
cpu_count = os.cpu_count()
|
||||||
|
|
||||||
def convert(text):
|
def convert(text):
|
||||||
md.reset()
|
md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
|
||||||
return md.convert(text)
|
res = md.convert(text)
|
||||||
|
return res, md.Meta
|
||||||
|
|
||||||
def render_post(fpath):
|
def render_post(fpath):
|
||||||
destpath = destpath_re.sub('.html', fpath)
|
destpath = destpath_re.sub('.html', fpath)
|
||||||
|
@ -30,15 +30,16 @@ def render_post(fpath):
|
||||||
text = input_file.read()
|
text = input_file.read()
|
||||||
|
|
||||||
logging.info("parsing %s", fpath)
|
logging.info("parsing %s", fpath)
|
||||||
out = convert(text)
|
out, meta = convert(text)
|
||||||
|
|
||||||
title = md.Meta.get('title')[0]
|
title = meta.get('title')[0]
|
||||||
date = md.Meta.get('date')[0]
|
date = meta.get('date')[0]
|
||||||
draft = False
|
draft = False
|
||||||
if md.Meta.get('draft'):
|
if meta.get('draft'):
|
||||||
draft = True
|
draft = True
|
||||||
|
|
||||||
out = convert('# ' + title) + out
|
title_out, _ = convert(text)
|
||||||
|
out = title_out + out
|
||||||
|
|
||||||
logging.info("writing to %s", destpath)
|
logging.info("writing to %s", destpath)
|
||||||
render_template('index.html.tmpl', destpath, {'content': out, 'more_title': ' - ' + title})
|
render_template('index.html.tmpl', destpath, {'content': out, 'more_title': ' - ' + title})
|
||||||
|
@ -105,7 +106,7 @@ def rss_post_xml(post):
|
||||||
text = inf.read()
|
text = inf.read()
|
||||||
|
|
||||||
|
|
||||||
converted = convert(text)
|
converted, _ = convert(text)
|
||||||
|
|
||||||
pubdate = email.utils.format_datetime(datetime.datetime.fromisoformat(post['date']))
|
pubdate = email.utils.format_datetime(datetime.datetime.fromisoformat(post['date']))
|
||||||
subs = {
|
subs = {
|
||||||
|
|
|
@ -812,7 +812,7 @@ So here is the diff to make that happen:
|
||||||
|
|
||||||
```diff
|
```diff
|
||||||
diff --git a/main.py b/main.py
|
diff --git a/main.py b/main.py
|
||||||
index 52ce57b..95b650d 100644
|
index 52ce57b..a3d92d1 100644
|
||||||
--- a/main.py
|
--- a/main.py
|
||||||
+++ b/main.py
|
+++ b/main.py
|
||||||
@@ -1,9 +1,11 @@
|
@@ -1,9 +1,11 @@
|
||||||
|
@ -827,15 +827,45 @@ index 52ce57b..95b650d 100644
|
||||||
from string import Template
|
from string import Template
|
||||||
|
|
||||||
import markdown
|
import markdown
|
||||||
@@ -13,6 +15,7 @@ destpath_re = re.compile(r'\.md$')
|
@@ -12,11 +14,12 @@ from markdown.extensions.toc import TocExtension
|
||||||
|
destpath_re = re.compile(r'\.md$')
|
||||||
logging.basicConfig(encoding='utf-8', level=logging.INFO)
|
logging.basicConfig(encoding='utf-8', level=logging.INFO)
|
||||||
|
|
||||||
md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
|
-md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
|
||||||
+cpu_count = os.cpu_count()
|
+cpu_count = os.cpu_count()
|
||||||
|
|
||||||
def convert(text):
|
def convert(text):
|
||||||
md.reset()
|
- md.reset()
|
||||||
@@ -52,11 +55,11 @@ def render_posts():
|
- return md.convert(text)
|
||||||
|
+ md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(anchorlink=True)])
|
||||||
|
+ res = md.convert(text)
|
||||||
|
+ return res, md.Meta
|
||||||
|
|
||||||
|
def render_post(fpath):
|
||||||
|
destpath = destpath_re.sub('.html', fpath)
|
||||||
|
@@ -27,15 +30,16 @@ def render_post(fpath):
|
||||||
|
text = input_file.read()
|
||||||
|
|
||||||
|
logging.info("parsing %s", fpath)
|
||||||
|
- out = convert(text)
|
||||||
|
+ out, meta = convert(text)
|
||||||
|
|
||||||
|
- title = md.Meta.get('title')[0]
|
||||||
|
- date = md.Meta.get('date')[0]
|
||||||
|
+ title = meta.get('title')[0]
|
||||||
|
+ date = meta.get('date')[0]
|
||||||
|
draft = False
|
||||||
|
- if md.Meta.get('draft'):
|
||||||
|
+ if meta.get('draft'):
|
||||||
|
draft = True
|
||||||
|
|
||||||
|
- out = convert('# ' + title) + out
|
||||||
|
+ title_out, _ = convert(text)
|
||||||
|
+ out = title_out + out
|
||||||
|
|
||||||
|
logging.info("writing to %s", destpath)
|
||||||
|
render_template('index.html.tmpl', destpath, {'content': out, 'more_title': ' - ' + title})
|
||||||
|
@@ -52,11 +56,11 @@ def render_posts():
|
||||||
files = glob.glob('posts/*.md')
|
files = glob.glob('posts/*.md')
|
||||||
logging.info('found post files %s', files)
|
logging.info('found post files %s', files)
|
||||||
posts = []
|
posts = []
|
||||||
|
@ -851,8 +881,21 @@ index 52ce57b..95b650d 100644
|
||||||
return posts
|
return posts
|
||||||
|
|
||||||
def posts_list_html(posts):
|
def posts_list_html(posts):
|
||||||
|
@@ -102,7 +106,7 @@ def rss_post_xml(post):
|
||||||
|
text = inf.read()
|
||||||
|
|
||||||
|
|
||||||
|
- converted = convert(text)
|
||||||
|
+ converted, _ = convert(text)
|
||||||
|
|
||||||
|
pubdate = email.utils.format_datetime(datetime.datetime.fromisoformat(post['date']))
|
||||||
|
subs = {
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The biggest note is that `convert()` now creates a `Markdown` instance on each call. This protects against multiple processes trying to use the same module level `md`
|
||||||
|
|
||||||
|
See <https://python-markdown.github.io/reference/#Markdown> for notes on how `Markdown.reset()` and thread safety.
|
||||||
|
|
||||||
And re-run the benchmarks:
|
And re-run the benchmarks:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
@ -862,9 +905,9 @@ INFO: removing old __bench files
|
||||||
INFO: number of *.md files 102
|
INFO: number of *.md files 102
|
||||||
INFO: number of *.html files 2
|
INFO: number of *.html files 2
|
||||||
INFO: running
|
INFO: running
|
||||||
real 0.27
|
real 0.45
|
||||||
user 1.82
|
user 4.21
|
||||||
sys 0.15
|
sys 0.32
|
||||||
INFO: number of *.html files 102
|
INFO: number of *.html files 102
|
||||||
INFO: cleanup __bench files
|
INFO: cleanup __bench files
|
||||||
|
|
||||||
|
@ -873,14 +916,14 @@ INFO: removing old __bench files
|
||||||
INFO: number of *.md files 1002
|
INFO: number of *.md files 1002
|
||||||
INFO: number of *.html files 2
|
INFO: number of *.html files 2
|
||||||
INFO: running
|
INFO: running
|
||||||
real 1.25
|
real 2.52
|
||||||
user 16.68
|
user 35.69
|
||||||
sys 0.52
|
sys 0.96
|
||||||
INFO: number of *.html files 1002
|
INFO: number of *.html files 1002
|
||||||
INFO: cleanup __bench files
|
INFO: cleanup __bench files
|
||||||
```
|
```
|
||||||
|
|
||||||
Down to 1.25s for 1000 posts 🎉
|
So that's down to 2.5s for 1000 posts. Not a bad start!
|
||||||
|
|
||||||
[1]: https://crystal-lang.org/
|
[1]: https://crystal-lang.org/
|
||||||
[2]: https://github.com/crystal-lang/crystal/releases/tag/0.31.0
|
[2]: https://github.com/crystal-lang/crystal/releases/tag/0.31.0
|
||||||
|
|
Loading…
Reference in a new issue