From dcf4cac4588b90c5458f6e0ffb59d1838d452f22 Mon Sep 17 00:00:00 2001 From: Sean Dockray Date: Thu, 4 Jun 2020 11:28:52 +1000 Subject: [PATCH] Adding Notion script for initial construction of syllabus --- custom_syadmin/build_from_notion.py | 251 ++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) diff --git a/custom_syadmin/build_from_notion.py b/custom_syadmin/build_from_notion.py index e69de29..a1ce181 100644 --- a/custom_syadmin/build_from_notion.py +++ b/custom_syadmin/build_from_notion.py @@ -0,0 +1,251 @@ +import argparse +import sys +import os +import shutil +import subprocess +import re + +from notion.client import NotionClient +from notion.markdown import notion_to_markdown + + +# Set the variables we'll be allowing for (as CL arg or environment variable) +vars = { + "syllabus_db": "https://www.notion.so/memoryspace/594e40c85f2844b5911f14e7db21850f?v=2a2e8a86e8a74d29844f5eef41d18b3f", + "syllabus_title": "Pirate Care", + "token_v2": "Find the value of 'token_v2' in your browser cookies when logged into Notion", + "hugo_site_path": "/mnt/d/dev/websites/arche-syllabus-test", # The Hugo site + "website_path": "/mnt/d/tmp/notion", + "base_url": "http://127.0.0.1:8000" +} + +prefixes = { + "header": "# ", + "sub_header": "## ", + "sub_sub_header": "### ", + "bulleted_list": "+ ", + "numbered_list": "1. ", + "toggle": "[ ]", + "quote": "> ", +} + + +def get_value(name, default=False): + """ Variables can be set as environment variable or in command line """ + if hasattr(args, name.lower()) and getattr(args, name.lower()) is not None: + print('CLI:', name, getattr(args, name.lower())) + return getattr(args, name.lower()) + elif name.upper() in os.environ: + print('env:', name, os.environ[name.upper()]) + return os.environ[name.upper()] + else: + print('default:', name, default) + return default + + +def rmrf(path): + """ Use safe-rm for recursive removal """ + """ @TODO: make it safer """ + def remove_readonly(func, path, excinfo): + os.chmod(path, stat.S_IWRITE) + func(path) + # Try removal + if os.path.exists(path) and len(os.path.realpath(path)) > 1: + if os.path.isdir(path): + shutil.rmtree(path, onerror=remove_readonly) + elif os.path.isfile(path): + os.remove(path) + else: + print("Either the path doesn't exist or you are trying to delete the root directory(?!):", path) + + +def cmd(parts, cwd=None, env=None): + """ Executes a shell command and returns the output """ + print(f"Command: {' '.join(parts)} ({cwd})") + return subprocess.check_output(parts, cwd=cwd, env=env, universal_newlines=True).strip() + + +def hugo(hugo_site_path, dest, hugo_environment='gitea', base_url=None): + """ builds the website to "dest" using "tmp" as an intermediate location """ + rmrf(dest) + try: + os.makedirs(dest, exist_ok=True) + except: + print(f"Error creating the directory: {dest}") + # overriding hugo config for development environments + env = os.environ.copy() + if base_url: + env["HUGO_PARAMS_BASEURL"] = base_url + # run the hugo command + hugo_output = cmd(['hugo', '-e', hugo_environment, '-d', dest], cwd=hugo_site_path, env=env) + + +def get_record_text(post): + """ Generates the markdown text of a Notion page + see: # https://github.com/brentbaum/brentbaum-notion-publishing/blob/master/notion/get_posts.py """ + text = "" + + for child in post.children: + if child.type == "page": + text += get_record_text(child) + elif child.type == "image": + # @todo: Download the image and get into static directory for Hugo + caption = child.caption if child.caption else "" + if child.display_source: + text += f"![{caption}]({child.display_source})\n" + else: + text += f"![{caption}]({child.source})\n" + elif child.type == "divider": + text += "---" + else: + prefix = prefixes.get(child.type, "") + text += prefix + child.title + "\n\n" + + text = re.sub('`bib:([a-zA-Z0-9-]+)`', r'![](bib:\1)', text) + return text + + +def yaml_home(record): + """ YAML template for syllabus home page """ + topics = [ f"{t.slug}.md" for t in record["related_to_syllabus_topics_syllabus"]] + return """--- +title: %s +has_topics: %s +---\n\n""" % ( + record["name"], + "[ %s ]" % ', '.join(topics), + ) + + +def yaml_topic(record): + """ YAML template for topic """ + sessions = [ f"{s.slug}.md" for s in record["related_to_syllabus_sessions_topic"]] + return """--- +title: %s +has_sessions: %s +---\n\n""" % ( + record["name"], + "[ %s ]" % ', '.join(sessions), + ) + + +def yaml_default(record): + """ YAML template default """ + return """--- +title: %s +---\n\n""" % ( + record["name"] + ) + + +def resolve_fields(id, yaml_func=None): + """ Converts a page id into Markdown, metadata, and header """ + record = client.get_block(id) + markdown = get_record_text(record) + post = record.get_all_properties() + header = yaml_func(post) + + return { + **post, + #"publish_date": post["publish_date"].start.isoformat(), + #"updated": post["updated"].isoformat(), + "markdown": markdown, + "header": header, + } + + +def notion_to_md(id, filepath=None, yaml_func=None): + """ Takes an id (usually a Notion page) and writes to file """ + page_data = resolve_fields(id, yaml_func=yaml_func) + page_content = page_data["header"] + page_data["markdown"] + # print(page_data) + if filepath: + with open(filepath, 'w') as f: + f.write(page_content) + return page_data + + +def rm_syllabus(content_dir): + """ Removes all the files that this script writes """ + rmrf(os.path.join(content_dir, "_index.md")) + rmrf(os.path.join(content_dir, "topic")) + rmrf(os.path.join(content_dir, "session")) + + +def fetch_syllabus_md(syllabi, syllabus_name, content_dir): + """ Grabs the files from Notion """ + global client + rm_syllabus(content_dir) + + # Load the Syllabus collection + cv = client.get_collection_view(syllabi) + + for row in cv.collection.get_rows(): + # This only builds one syllabus at a time + if row.title == syllabus_name: + print(row) + collection = client.get_collection(row.id) + print(row.collection.get_schema_properties()) + # Home page + notion_to_md(row.id, + filepath=os.path.join(content_dir, "_index.md"), + yaml_func=yaml_home) + # handle topics + topics_dir = os.path.join(content_dir, "topic") + print("creating: ", topics_dir) + os.makedirs(topics_dir, exist_ok=False) + topics = row.get_property("c~x$") + sessions = [] + for topic in topics: + # print("properties:", topic.get_all_properties()) + # print("schema:", topic.related_to_syllabus_sessions_topic) + print(" - ", f"{topic.slug}.md") + notion_to_md(topic.id, + filepath=os.path.join(topics_dir, f"{topic.slug}.md"), + yaml_func=yaml_topic) + sessions.extend([s for s in topic.related_to_syllabus_sessions_topic]) + # handle sessions + sessions_dir = os.path.join(content_dir, "session") + print("creating: ", sessions_dir) + os.makedirs(sessions_dir, exist_ok=False) + for session in sessions: + # print("properties:", session.get_all_properties()) + print(" - ", f"{session.slug}.md") + notion_to_md(session.id, + filepath=os.path.join(sessions_dir, f"{session.slug}.md"), + yaml_func=yaml_default) + + +if __name__=="__main__": + # Parsing command line arguments + parser = argparse.ArgumentParser() + for v in vars: + parser.add_argument(f"--{v.lower()}") + args = parser.parse_args() + + # Load all variables from command line arguments or environment variables + for v in vars: + globals()[v.upper()] = get_value(v.lower(), vars[v]) + + # Obtain the `token_v2` value by inspecting your browser cookies on a logged-in session on Notion.so + try: + client = NotionClient(token_v2=TOKEN_V2) + except: + print("Notion connect didn't work using token: ", TOKEN_V2) + + pwd = os.getcwd() + hugo_content_path = os.path.join(HUGO_SITE_PATH, 'content') + + # Does the Hugo site exist ? + if not os.path.exists(hugo_content_path): + print(hugo_content_path, "doesn't exist. Make sure it is the location of a Hugo site.") + sys.exit() + + # + + # Grab the files from Notion + fetch_syllabus_md(SYLLABUS_DB, SYLLABUS_TITLE, hugo_content_path) + + # Now build the Hugo site + hugo(HUGO_SITE_PATH, WEBSITE_PATH, base_url=BASE_URL) +