import argparse import sys import os import shutil import subprocess import re import stat from urllib.parse import urlparse, parse_qs from notion.client import NotionClient from notion.markdown import notion_to_markdown # Set the variables we'll be allowing for (as CL arg or environment variable) vars = { "syllabus_db": "https://www.notion.so/memoryspace/594e40c85f2844b5911f14e7db21850f?v=2a2e8a86e8a74d29844f5eef41d18b3f", "syllabus_title": "Machine Listening", "token_v2": "Find the value of 'token_v2' in your browser cookies when logged into Notion", "hugo_path": "hugo", "hugo_site_path": "/mnt/d/dev/websites/arche-syllabus-test", # The Hugo site "website_path": "/mnt/d/tmp/notion", "base_url": "http://127.0.0.1:8000" } prefixes = { "header": "# ", "sub_header": "## ", "sub_sub_header": "### ", "bulleted_list": "+ ", "numbered_list": "1. ", "toggle": "[ ]", "quote": "> ", } def get_value(name, default=False): """ Variables can be set as environment variable or in command line """ if hasattr(args, name.lower()) and getattr(args, name.lower()) is not None: if not name.lower() == "token_v2": print('CLI:', name, getattr(args, name.lower())) else: print('CLI:', name, "XXXX") return getattr(args, name.lower()) elif name.upper() in os.environ: if not name.lower() == "token_v2": print('env:', name, os.environ[name.upper()]) else: print('CLI:', name, "XXXX") return os.environ[name.upper()] else: print('default:', name, default) return default def rmrf(path, keep_root=False): """ Use safe-rm for recursive removal """ """ @TODO: make it safer """ def remove_readonly(func, path, excinfo): os.chmod(path, stat.S_IWRITE) func(path) # Try removal if os.path.exists(path) and len(os.path.realpath(path)) > 1: if os.path.isdir(path): if keep_root: for root, dirs, files in os.walk(path): for f in files: os.remove(os.path.join(root, f)) else: shutil.rmtree(path, onerror=remove_readonly) elif os.path.isfile(path): os.remove(path) else: print("Either the path doesn't exist or you are trying to delete the root directory(?!):", path) def cmd(parts, cwd=None, env=None): """ Executes a shell command and returns the output """ print(f"Command: {' '.join(parts)} ({cwd})") return subprocess.call(parts, cwd=cwd, env=env, universal_newlines=True) def hugo(hugo_site_path, dest, hugo_environment='gitea', base_url=None): """ builds the website to "dest" using "tmp" as an intermediate location """ global HUGO_PATH rmrf(dest, keep_root=True) try: os.makedirs(dest, exist_ok=True) except: print(f"Error creating the directory: {dest}") # overriding hugo config for development environments env = os.environ.copy() if base_url: env["HUGO_PARAMS_BASEURL"] = base_url # run the hugo command hugo_output = cmd([HUGO_PATH, '-e', hugo_environment, '-d', dest, '--noTimes'], cwd=hugo_site_path, env=env) def hugo_video_shortcode(url): """ Returns the Hugo shortcode for a YouTube or Vimeo url """ if "youtube.com" in url: try: url_data = urlparse(url) query = parse_qs(url_data.query) video = query["v"][0] return "{{< youtube " + video + " >}}" except: pass elif "vimeo.com" in url: try: video = urlparse(url).path.lstrip("/") return "{{< vimeo " + video + " >}}" except: pass return f"[{url}]({url})" def get_record_text(post, level=0): """ Generates the markdown text of a Notion page see: # https://github.com/brentbaum/brentbaum-notion-publishing/blob/master/notion/get_posts.py """ text = "" for child in post.children: if child.type == "page": text += get_record_text(child) elif child.type == "image": # @todo: Download the image and get into static directory for Hugo caption = child.caption if child.caption else "" if child.display_source: text += f"![{caption}]({child.display_source})\n" else: text += f"![{caption}]({child.source})\n" elif child.type == "video": caption = f"\n_{child.caption}_" if child.caption else "" text += f"{hugo_video_shortcode(child.source)}{caption}\n\n" elif child.type == "bookmark": caption = f"\n_{child.caption}_" if child.caption else "" text += f"[{child.title}]({child.link}){caption}\n\n" elif child.type == "divider": text += "---" elif child.type == "bulleted_list": prefix = prefixes.get(child.type, "") text += ' ' * (level * 4) + prefix + child.title.encode('utf-8').decode('utf-8') + "\n\n" if len(child.children): text += get_record_text(child, level + 1) else: prefix = prefixes.get(child.type, "") text += prefix + child.title.encode('utf-8').decode('utf-8') + "\n\n" text = re.sub('`(bib|session|topic):([a-zA-Z0-9-]+)`', r'![](\1:\2)', text) return text def yaml_home(record): """ YAML template for syllabus home page """ topics = [ f"{t.slug}.md" for t in record["related_to_syllabus_topics_syllabus"]] return """--- title: > %s draft: %s has_topics: %s ---\n\n""" % ( record["name"], "false" if record.get("published", True) else "true", "[ %s ]" % ', '.join(topics), ) def yaml_topic(record): """ YAML template for topic """ sessions = [ f"{s.slug}.md" for s in record["related_to_syllabus_sessions_topic"]] return """--- title: > %s draft: %s has_sessions: %s ---\n\n""" % ( record["name"], "false" if record.get("published", True) else "true", "[ %s ]" % ', '.join(sessions), ) def yaml_default(record): """ YAML template default """ return """--- title: > %s draft: %s ---\n\n""" % ( record["name"], "false" if record.get("published", True) else "true" ) def resolve_fields(id, yaml_func=None): """ Converts a page id into Markdown, metadata, and header """ global client record = client.get_block(id) markdown = get_record_text(record) post = record.get_all_properties() header = yaml_func(post) return { **post, #"publish_date": post["publish_date"].start.isoformat(), #"updated": post["updated"].isoformat(), "markdown": markdown, "header": header, } def notion_to_md(id, filepath=None, yaml_func=None): """ Takes an id (usually a Notion page) and writes to file """ page_data = resolve_fields(id, yaml_func=yaml_func) page_content = page_data["header"] + page_data["markdown"] # print(page_data) if filepath: os.umask(0) with open(os.open(filepath, os.O_CREAT | os.O_WRONLY, 0o777), 'w', encoding="utf-8") as f: f.write(page_content) return page_data def rm_syllabus(content_dir): """ Removes all the files that this script writes """ rmrf(os.path.join(content_dir, "_index.md")) rmrf(os.path.join(content_dir, "topic")) rmrf(os.path.join(content_dir, "session")) def fetch_syllabus_md(syllabi, syllabus_name, content_dir): """ Grabs the files from Notion """ global client rm_syllabus(content_dir) # Load the Syllabus collection cv = client.get_collection_view(syllabi) for row in cv.collection.get_rows(): # This only builds one syllabus at a time if row.title == syllabus_name: print(row) collection = client.get_collection(row.id) print(row.collection.get_schema_properties()) # Home page notion_to_md(row.id, filepath=os.path.join(content_dir, "_index.md"), yaml_func=yaml_home) # handle topics topics_dir = os.path.join(content_dir, "topic") print("creating: ", topics_dir) os.makedirs(topics_dir, exist_ok=False) topics = row.get_property("c~x$") sessions = [] for topic in topics: # print("properties:", topic.get_all_properties()) # print("schema:", topic.related_to_syllabus_sessions_topic) print(" - ", f"{topic.slug}.md") notion_to_md(topic.id, filepath=os.path.join(topics_dir, f"{topic.slug}.md"), yaml_func=yaml_topic) sessions.extend([s for s in topic.related_to_syllabus_sessions_topic]) # handle sessions sessions_dir = os.path.join(content_dir, "session") print("creating: ", sessions_dir) os.makedirs(sessions_dir, exist_ok=False) for session in sessions: # print("properties:", session.get_all_properties()) print(" - ", f"{session.slug}.md") notion_to_md(session.id, filepath=os.path.join(sessions_dir, f"{session.slug}.md"), yaml_func=yaml_default) if __name__=="__main__": # Parsing command line arguments parser = argparse.ArgumentParser() for v in vars: parser.add_argument(f"--{v.lower()}") args = parser.parse_args() # Load all variables from command line arguments or environment variables for v in vars: globals()[v.upper()] = get_value(v.lower(), vars[v]) # Obtain the `token_v2` value by inspecting your browser cookies on a logged-in session on Notion.so try: client = NotionClient(token_v2=TOKEN_V2) except: print("Notion connect didn't work using token: ", TOKEN_V2) pwd = os.getcwd() hugo_content_path = os.path.join(HUGO_SITE_PATH, 'content') # Does the Hugo site exist ? if not os.path.exists(hugo_content_path): print(hugo_content_path, "doesn't exist. Make sure it is the location of a Hugo site.") sys.exit() # # Grab the files from Notion fetch_syllabus_md(SYLLABUS_DB, SYLLABUS_TITLE, hugo_content_path) # Now build the Hugo site hugo(HUGO_SITE_PATH, WEBSITE_PATH, base_url=BASE_URL)