Adding Notion script for initial construction of syllabus

This commit is contained in:
Sean Dockray 2020-06-04 11:28:52 +10:00
parent d26e775764
commit dcf4cac458
1 changed files with 251 additions and 0 deletions

View File

@ -0,0 +1,251 @@
import argparse
import sys
import os
import shutil
import subprocess
import re
from notion.client import NotionClient
from notion.markdown import notion_to_markdown
# Set the variables we'll be allowing for (as CL arg or environment variable)
vars = {
"syllabus_db": "https://www.notion.so/memoryspace/594e40c85f2844b5911f14e7db21850f?v=2a2e8a86e8a74d29844f5eef41d18b3f",
"syllabus_title": "Pirate Care",
"token_v2": "Find the value of 'token_v2' in your browser cookies when logged into Notion",
"hugo_site_path": "/mnt/d/dev/websites/arche-syllabus-test", # The Hugo site
"website_path": "/mnt/d/tmp/notion",
"base_url": "http://127.0.0.1:8000"
}
prefixes = {
"header": "# ",
"sub_header": "## ",
"sub_sub_header": "### ",
"bulleted_list": "+ ",
"numbered_list": "1. ",
"toggle": "[ ]",
"quote": "> ",
}
def get_value(name, default=False):
""" Variables can be set as environment variable or in command line """
if hasattr(args, name.lower()) and getattr(args, name.lower()) is not None:
print('CLI:', name, getattr(args, name.lower()))
return getattr(args, name.lower())
elif name.upper() in os.environ:
print('env:', name, os.environ[name.upper()])
return os.environ[name.upper()]
else:
print('default:', name, default)
return default
def rmrf(path):
""" Use safe-rm for recursive removal """
""" @TODO: make it safer """
def remove_readonly(func, path, excinfo):
os.chmod(path, stat.S_IWRITE)
func(path)
# Try removal
if os.path.exists(path) and len(os.path.realpath(path)) > 1:
if os.path.isdir(path):
shutil.rmtree(path, onerror=remove_readonly)
elif os.path.isfile(path):
os.remove(path)
else:
print("Either the path doesn't exist or you are trying to delete the root directory(?!):", path)
def cmd(parts, cwd=None, env=None):
""" Executes a shell command and returns the output """
print(f"Command: {' '.join(parts)} ({cwd})")
return subprocess.check_output(parts, cwd=cwd, env=env, universal_newlines=True).strip()
def hugo(hugo_site_path, dest, hugo_environment='gitea', base_url=None):
""" builds the website to "dest" using "tmp" as an intermediate location """
rmrf(dest)
try:
os.makedirs(dest, exist_ok=True)
except:
print(f"Error creating the directory: {dest}")
# overriding hugo config for development environments
env = os.environ.copy()
if base_url:
env["HUGO_PARAMS_BASEURL"] = base_url
# run the hugo command
hugo_output = cmd(['hugo', '-e', hugo_environment, '-d', dest], cwd=hugo_site_path, env=env)
def get_record_text(post):
""" Generates the markdown text of a Notion page
see: # https://github.com/brentbaum/brentbaum-notion-publishing/blob/master/notion/get_posts.py """
text = ""
for child in post.children:
if child.type == "page":
text += get_record_text(child)
elif child.type == "image":
# @todo: Download the image and get into static directory for Hugo
caption = child.caption if child.caption else ""
if child.display_source:
text += f"![{caption}]({child.display_source})\n"
else:
text += f"![{caption}]({child.source})\n"
elif child.type == "divider":
text += "---"
else:
prefix = prefixes.get(child.type, "")
text += prefix + child.title + "\n\n"
text = re.sub('`bib:([a-zA-Z0-9-]+)`', r'![](bib:\1)', text)
return text
def yaml_home(record):
""" YAML template for syllabus home page """
topics = [ f"{t.slug}.md" for t in record["related_to_syllabus_topics_syllabus"]]
return """---
title: %s
has_topics: %s
---\n\n""" % (
record["name"],
"[ %s ]" % ', '.join(topics),
)
def yaml_topic(record):
""" YAML template for topic """
sessions = [ f"{s.slug}.md" for s in record["related_to_syllabus_sessions_topic"]]
return """---
title: %s
has_sessions: %s
---\n\n""" % (
record["name"],
"[ %s ]" % ', '.join(sessions),
)
def yaml_default(record):
""" YAML template default """
return """---
title: %s
---\n\n""" % (
record["name"]
)
def resolve_fields(id, yaml_func=None):
""" Converts a page id into Markdown, metadata, and header """
record = client.get_block(id)
markdown = get_record_text(record)
post = record.get_all_properties()
header = yaml_func(post)
return {
**post,
#"publish_date": post["publish_date"].start.isoformat(),
#"updated": post["updated"].isoformat(),
"markdown": markdown,
"header": header,
}
def notion_to_md(id, filepath=None, yaml_func=None):
""" Takes an id (usually a Notion page) and writes to file """
page_data = resolve_fields(id, yaml_func=yaml_func)
page_content = page_data["header"] + page_data["markdown"]
# print(page_data)
if filepath:
with open(filepath, 'w') as f:
f.write(page_content)
return page_data
def rm_syllabus(content_dir):
""" Removes all the files that this script writes """
rmrf(os.path.join(content_dir, "_index.md"))
rmrf(os.path.join(content_dir, "topic"))
rmrf(os.path.join(content_dir, "session"))
def fetch_syllabus_md(syllabi, syllabus_name, content_dir):
""" Grabs the files from Notion """
global client
rm_syllabus(content_dir)
# Load the Syllabus collection
cv = client.get_collection_view(syllabi)
for row in cv.collection.get_rows():
# This only builds one syllabus at a time
if row.title == syllabus_name:
print(row)
collection = client.get_collection(row.id)
print(row.collection.get_schema_properties())
# Home page
notion_to_md(row.id,
filepath=os.path.join(content_dir, "_index.md"),
yaml_func=yaml_home)
# handle topics
topics_dir = os.path.join(content_dir, "topic")
print("creating: ", topics_dir)
os.makedirs(topics_dir, exist_ok=False)
topics = row.get_property("c~x$")
sessions = []
for topic in topics:
# print("properties:", topic.get_all_properties())
# print("schema:", topic.related_to_syllabus_sessions_topic)
print(" - ", f"{topic.slug}.md")
notion_to_md(topic.id,
filepath=os.path.join(topics_dir, f"{topic.slug}.md"),
yaml_func=yaml_topic)
sessions.extend([s for s in topic.related_to_syllabus_sessions_topic])
# handle sessions
sessions_dir = os.path.join(content_dir, "session")
print("creating: ", sessions_dir)
os.makedirs(sessions_dir, exist_ok=False)
for session in sessions:
# print("properties:", session.get_all_properties())
print(" - ", f"{session.slug}.md")
notion_to_md(session.id,
filepath=os.path.join(sessions_dir, f"{session.slug}.md"),
yaml_func=yaml_default)
if __name__=="__main__":
# Parsing command line arguments
parser = argparse.ArgumentParser()
for v in vars:
parser.add_argument(f"--{v.lower()}")
args = parser.parse_args()
# Load all variables from command line arguments or environment variables
for v in vars:
globals()[v.upper()] = get_value(v.lower(), vars[v])
# Obtain the `token_v2` value by inspecting your browser cookies on a logged-in session on Notion.so
try:
client = NotionClient(token_v2=TOKEN_V2)
except:
print("Notion connect didn't work using token: ", TOKEN_V2)
pwd = os.getcwd()
hugo_content_path = os.path.join(HUGO_SITE_PATH, 'content')
# Does the Hugo site exist ?
if not os.path.exists(hugo_content_path):
print(hugo_content_path, "doesn't exist. Make sure it is the location of a Hugo site.")
sys.exit()
#
# Grab the files from Notion
fetch_syllabus_md(SYLLABUS_DB, SYLLABUS_TITLE, hugo_content_path)
# Now build the Hugo site
hugo(HUGO_SITE_PATH, WEBSITE_PATH, base_url=BASE_URL)