blog/main.py

174 lines
3.9 KiB
Python
Raw Permalink Normal View History

2024-06-19 19:26:09 +00:00
import os
2024-06-17 23:52:21 +00:00
import re
import glob
import html
import email
import logging
import datetime
from multiprocessing import Pool
2024-06-17 23:52:21 +00:00
from string import Template
import markdown
from markdown.extensions.toc import TocExtension
2024-11-22 20:39:52 +00:00
destpath_re = re.compile(r"\.md$")
logging.basicConfig(encoding="utf-8", level=logging.INFO)
2024-06-17 23:52:21 +00:00
2024-06-19 19:26:09 +00:00
cpu_count = os.cpu_count()
2024-06-18 14:02:26 +00:00
2024-11-22 20:39:52 +00:00
2024-06-18 14:02:26 +00:00
def convert(text):
2024-11-22 20:39:52 +00:00
md = markdown.Markdown(extensions=["extra", "meta", TocExtension(anchorlink=True)])
2024-06-19 20:08:40 +00:00
res = md.convert(text)
return res, md.Meta
2024-06-18 14:02:26 +00:00
2024-11-22 20:39:52 +00:00
2024-06-17 23:52:21 +00:00
def render_post(fpath):
2024-11-22 20:39:52 +00:00
destpath = destpath_re.sub(".html", fpath)
2024-06-17 23:52:21 +00:00
logging.info("opening %s for parsing, dest %s", fpath, destpath)
# from: https://python-markdown.github.io/reference/
with open(fpath, "r", encoding="utf-8") as input_file:
logging.info("reading %s", fpath)
text = input_file.read()
logging.info("parsing %s", fpath)
2024-06-19 20:08:40 +00:00
out, meta = convert(text)
2024-06-17 23:52:21 +00:00
2024-11-22 20:39:52 +00:00
title = meta.get("title")[0]
date = meta.get("date")[0]
2024-06-19 17:49:09 +00:00
draft = False
2024-11-22 20:39:52 +00:00
if meta.get("draft"):
2024-06-19 17:49:09 +00:00
draft = True
2024-06-17 23:52:21 +00:00
2024-11-22 20:39:52 +00:00
title_out, _ = convert("# " + title)
2024-06-19 20:08:40 +00:00
out = title_out + out
2024-06-17 23:52:21 +00:00
logging.info("writing to %s", destpath)
2024-11-22 20:39:52 +00:00
render_template(
"index.html.tmpl", destpath, {"content": out, "more_title": " - " + title}
)
2024-06-17 23:52:21 +00:00
return {
2024-11-22 20:39:52 +00:00
"title": title,
"date": date,
"fpath": fpath,
"destpath": destpath,
"draft": draft,
2024-06-17 23:52:21 +00:00
}
2024-11-22 20:39:52 +00:00
2024-06-17 23:52:21 +00:00
def render_posts():
2024-11-22 20:39:52 +00:00
files = glob.glob("posts/*.md")
logging.info("found post files %s", files)
2024-06-17 23:52:21 +00:00
posts = []
2024-11-22 20:39:52 +00:00
logging.info("starting render posts with cpu_count: %d", cpu_count)
2024-06-19 19:26:09 +00:00
with Pool(processes=cpu_count) as pool:
posts = pool.map(render_post, files)
2024-06-17 23:52:21 +00:00
2024-06-19 19:34:10 +00:00
logging.info("render_posts result: %s", posts)
2024-06-17 23:52:21 +00:00
return posts
2024-11-22 20:39:52 +00:00
2024-06-17 23:52:21 +00:00
def posts_list_html(posts):
post_tpl = """<li>
<a href="{href}">{title}</a>
<time datetime="{date}">{disp_date}</time>
</li>"""
out = '<ul class="blog-posts-list">'
for post in posts:
2024-11-22 20:39:52 +00:00
disp_date = datetime.datetime.fromisoformat(post.get("date")).strftime(
"%Y-%m-%d"
)
out += post_tpl.format(
href=post.get("destpath"),
title=post.get("title"),
date=post.get("date"),
disp_date=disp_date,
)
return out + "</ul>"
2024-06-17 23:52:21 +00:00
def render_template(tpl_fname, out_fname, subs):
2024-11-22 20:39:52 +00:00
with open(tpl_fname, "r", encoding="utf-8") as inf:
2024-06-17 23:52:21 +00:00
tmpl = Template(inf.read())
out = tmpl.substitute(subs)
2024-11-22 20:39:52 +00:00
out_fname = os.path.join("public/", out_fname)
with open(out_fname, "w", encoding="utf-8") as outf:
2024-06-17 23:52:21 +00:00
outf.write(out)
2024-11-22 20:39:52 +00:00
2024-06-17 23:52:21 +00:00
def render_index(posts):
content_html = posts_list_html(posts)
2024-11-22 20:39:52 +00:00
render_template(
"index.html.tmpl", "index.html", {"content": content_html, "more_title": ""}
)
2024-06-17 23:52:21 +00:00
def rss_post_xml(post):
tpl = """
<item>
<title>{title}</title>
<link>{link}</link>
<pubDate>{pubdate}</pubDate>
<guid>{link}</guid>
<description>{description}</description>
</item>
"""
2024-11-22 20:39:52 +00:00
link = "https://cfebs.com/" + post["destpath"]
2024-06-17 23:52:21 +00:00
2024-11-22 20:39:52 +00:00
with open(post["fpath"], "r", encoding="utf-8") as inf:
2024-06-17 23:52:21 +00:00
text = inf.read()
2024-06-19 20:08:40 +00:00
converted, _ = convert(text)
2024-06-17 23:52:21 +00:00
2024-11-22 20:39:52 +00:00
pubdate = email.utils.format_datetime(datetime.datetime.fromisoformat(post["date"]))
2024-06-17 23:52:21 +00:00
subs = {
2024-11-22 20:39:52 +00:00
"title": post["title"],
"link": link,
"pubdate": pubdate,
"description": converted,
}
2024-06-17 23:52:21 +00:00
2024-11-22 20:39:52 +00:00
for k, v in subs.items():
2024-06-17 23:52:21 +00:00
subs[k] = html.escape(v)
return tpl.format(**subs)
2024-11-22 20:39:52 +00:00
2024-06-17 23:52:21 +00:00
def render_rss_index(posts):
2024-11-22 20:39:52 +00:00
items = ""
2024-06-17 23:52:21 +00:00
for post in posts[:5]:
items += rss_post_xml(post)
subs = {
2024-11-22 20:39:52 +00:00
"site_title": "cfebs.com",
"site_link": "https://cfebs.com",
"self_full_link": "https://cfebs.com/index.xml",
"description": "Recent content from cfebs.com",
"last_build_date": email.utils.format_datetime(datetime.datetime.now()),
2024-06-17 23:52:21 +00:00
}
2024-11-22 20:39:52 +00:00
for k, v in subs.items():
2024-06-17 23:52:21 +00:00
subs[k] = html.escape(v)
2024-11-22 20:39:52 +00:00
subs["items"] = items
render_template("index.xml.tmpl", "index.xml", subs)
2024-06-17 23:52:21 +00:00
def main():
2024-11-22 20:39:52 +00:00
os.makedirs("public/posts/", exist_ok=True)
2024-06-17 23:52:21 +00:00
posts = render_posts()
2024-11-22 20:39:52 +00:00
logging.info("rendered posts: %s", posts)
posts = filter(lambda p: not p["draft"], posts)
sorted_posts = sorted(
posts, key=lambda p: datetime.datetime.fromisoformat(p["date"]), reverse=True
)
2024-06-17 23:52:21 +00:00
render_index(sorted_posts)
render_rss_index(sorted_posts)
2024-11-22 20:39:52 +00:00
if __name__ == "__main__":
2024-06-17 23:52:21 +00:00
main()