2020-06-04 01:28:52 +00:00
|
|
|
import argparse
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import shutil
|
|
|
|
import subprocess
|
|
|
|
import re
|
2020-06-04 05:42:03 +00:00
|
|
|
import stat
|
2020-06-08 13:02:09 +00:00
|
|
|
from urllib.parse import urlparse, parse_qs
|
2020-06-04 01:28:52 +00:00
|
|
|
|
|
|
|
from notion.client import NotionClient
|
|
|
|
from notion.markdown import notion_to_markdown
|
|
|
|
|
|
|
|
|
|
|
|
# Set the variables we'll be allowing for (as CL arg or environment variable)
|
|
|
|
vars = {
|
|
|
|
"syllabus_db": "https://www.notion.so/memoryspace/594e40c85f2844b5911f14e7db21850f?v=2a2e8a86e8a74d29844f5eef41d18b3f",
|
2020-06-08 13:02:09 +00:00
|
|
|
"syllabus_title": "Machine Listening",
|
2020-06-04 01:28:52 +00:00
|
|
|
"token_v2": "Find the value of 'token_v2' in your browser cookies when logged into Notion",
|
2020-06-05 02:08:30 +00:00
|
|
|
"hugo_path": "hugo",
|
2020-06-04 01:28:52 +00:00
|
|
|
"hugo_site_path": "/mnt/d/dev/websites/arche-syllabus-test", # The Hugo site
|
|
|
|
"website_path": "/mnt/d/tmp/notion",
|
|
|
|
"base_url": "http://127.0.0.1:8000"
|
|
|
|
}
|
|
|
|
|
|
|
|
prefixes = {
|
|
|
|
"header": "# ",
|
|
|
|
"sub_header": "## ",
|
|
|
|
"sub_sub_header": "### ",
|
|
|
|
"bulleted_list": "+ ",
|
|
|
|
"numbered_list": "1. ",
|
|
|
|
"toggle": "[ ]",
|
|
|
|
"quote": "> ",
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def get_value(name, default=False):
|
|
|
|
""" Variables can be set as environment variable or in command line """
|
|
|
|
if hasattr(args, name.lower()) and getattr(args, name.lower()) is not None:
|
2020-06-05 02:08:30 +00:00
|
|
|
if not name.lower() == "token_v2":
|
|
|
|
print('CLI:', name, getattr(args, name.lower()))
|
|
|
|
else:
|
|
|
|
print('CLI:', name, "XXXX")
|
2020-06-04 01:28:52 +00:00
|
|
|
return getattr(args, name.lower())
|
|
|
|
elif name.upper() in os.environ:
|
2020-06-05 02:08:30 +00:00
|
|
|
if not name.lower() == "token_v2":
|
|
|
|
print('env:', name, os.environ[name.upper()])
|
|
|
|
else:
|
|
|
|
print('CLI:', name, "XXXX")
|
2020-06-04 01:28:52 +00:00
|
|
|
return os.environ[name.upper()]
|
|
|
|
else:
|
|
|
|
print('default:', name, default)
|
|
|
|
return default
|
|
|
|
|
|
|
|
|
2020-06-04 05:52:22 +00:00
|
|
|
def rmrf(path, keep_root=False):
|
2020-06-04 01:28:52 +00:00
|
|
|
""" Use safe-rm for recursive removal """
|
|
|
|
""" @TODO: make it safer """
|
|
|
|
def remove_readonly(func, path, excinfo):
|
|
|
|
os.chmod(path, stat.S_IWRITE)
|
|
|
|
func(path)
|
|
|
|
# Try removal
|
|
|
|
if os.path.exists(path) and len(os.path.realpath(path)) > 1:
|
|
|
|
if os.path.isdir(path):
|
2020-06-04 05:52:22 +00:00
|
|
|
if keep_root:
|
|
|
|
for root, dirs, files in os.walk(path):
|
|
|
|
for f in files:
|
|
|
|
os.remove(os.path.join(root, f))
|
|
|
|
else:
|
|
|
|
shutil.rmtree(path, onerror=remove_readonly)
|
2020-06-04 01:28:52 +00:00
|
|
|
elif os.path.isfile(path):
|
|
|
|
os.remove(path)
|
|
|
|
else:
|
|
|
|
print("Either the path doesn't exist or you are trying to delete the root directory(?!):", path)
|
|
|
|
|
|
|
|
|
|
|
|
def cmd(parts, cwd=None, env=None):
|
|
|
|
""" Executes a shell command and returns the output """
|
|
|
|
print(f"Command: {' '.join(parts)} ({cwd})")
|
2020-06-05 02:08:30 +00:00
|
|
|
return subprocess.call(parts, cwd=cwd, env=env, universal_newlines=True)
|
2020-06-04 01:28:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
def hugo(hugo_site_path, dest, hugo_environment='gitea', base_url=None):
|
|
|
|
""" builds the website to "dest" using "tmp" as an intermediate location """
|
2020-06-05 02:08:30 +00:00
|
|
|
global HUGO_PATH
|
2020-06-04 05:52:22 +00:00
|
|
|
rmrf(dest, keep_root=True)
|
2020-06-04 01:28:52 +00:00
|
|
|
try:
|
|
|
|
os.makedirs(dest, exist_ok=True)
|
|
|
|
except:
|
|
|
|
print(f"Error creating the directory: {dest}")
|
|
|
|
# overriding hugo config for development environments
|
|
|
|
env = os.environ.copy()
|
|
|
|
if base_url:
|
|
|
|
env["HUGO_PARAMS_BASEURL"] = base_url
|
|
|
|
# run the hugo command
|
2020-06-05 02:08:30 +00:00
|
|
|
hugo_output = cmd([HUGO_PATH, '-e', hugo_environment, '-d', dest, '--noTimes'], cwd=hugo_site_path, env=env)
|
2020-06-04 01:28:52 +00:00
|
|
|
|
|
|
|
|
2020-06-08 13:02:09 +00:00
|
|
|
def hugo_video_shortcode(url):
|
|
|
|
""" Returns the Hugo shortcode for a YouTube or Vimeo url """
|
|
|
|
if "youtube.com" in url:
|
|
|
|
try:
|
|
|
|
url_data = urlparse(url)
|
|
|
|
query = parse_qs(url_data.query)
|
|
|
|
video = query["v"][0]
|
|
|
|
return "{{< youtube " + video + " >}}"
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
elif "vimeo.com" in url:
|
|
|
|
try:
|
|
|
|
video = urlparse(url).path.lstrip("/")
|
|
|
|
return "{{< vimeo " + video + " >}}"
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
return f"[{url}]({url})"
|
|
|
|
|
|
|
|
|
2020-06-06 14:08:12 +00:00
|
|
|
def get_record_text(post, level=0):
|
2020-06-04 01:28:52 +00:00
|
|
|
""" Generates the markdown text of a Notion page
|
|
|
|
see: # https://github.com/brentbaum/brentbaum-notion-publishing/blob/master/notion/get_posts.py """
|
|
|
|
text = ""
|
|
|
|
|
|
|
|
for child in post.children:
|
|
|
|
if child.type == "page":
|
|
|
|
text += get_record_text(child)
|
|
|
|
elif child.type == "image":
|
|
|
|
# @todo: Download the image and get into static directory for Hugo
|
|
|
|
caption = child.caption if child.caption else ""
|
|
|
|
if child.display_source:
|
|
|
|
text += f"![{caption}]({child.display_source})\n"
|
|
|
|
else:
|
|
|
|
text += f"![{caption}]({child.source})\n"
|
2020-06-08 13:02:09 +00:00
|
|
|
elif child.type == "video":
|
|
|
|
caption = f"\n_{child.caption}_" if child.caption else ""
|
|
|
|
text += f"{hugo_video_shortcode(child.source)}{caption}\n\n"
|
|
|
|
elif child.type == "bookmark":
|
|
|
|
caption = f"\n_{child.caption}_" if child.caption else ""
|
|
|
|
text += f"[{child.title}]({child.link}){caption}\n\n"
|
2020-06-04 01:28:52 +00:00
|
|
|
elif child.type == "divider":
|
|
|
|
text += "---"
|
2020-06-06 14:08:12 +00:00
|
|
|
elif child.type == "bulleted_list":
|
|
|
|
prefix = prefixes.get(child.type, "")
|
|
|
|
text += ' ' * (level * 4) + prefix + child.title.encode('utf-8').decode('utf-8') + "\n\n"
|
|
|
|
if len(child.children):
|
|
|
|
text += get_record_text(child, level + 1)
|
2020-06-04 01:28:52 +00:00
|
|
|
else:
|
|
|
|
prefix = prefixes.get(child.type, "")
|
2020-06-05 02:25:52 +00:00
|
|
|
text += prefix + child.title.encode('utf-8').decode('utf-8') + "\n\n"
|
2020-06-08 13:03:59 +00:00
|
|
|
|
2020-06-07 13:50:53 +00:00
|
|
|
text = re.sub('`(bib|session|topic):([a-zA-Z0-9-]+)`', r'![](\1:\2)', text)
|
2020-06-04 01:28:52 +00:00
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
def yaml_home(record):
|
|
|
|
""" YAML template for syllabus home page """
|
|
|
|
topics = [ f"{t.slug}.md" for t in record["related_to_syllabus_topics_syllabus"]]
|
|
|
|
return """---
|
2020-06-06 13:46:32 +00:00
|
|
|
title: >
|
|
|
|
%s
|
2020-06-07 13:50:53 +00:00
|
|
|
draft: %s
|
2020-06-04 01:28:52 +00:00
|
|
|
has_topics: %s
|
|
|
|
---\n\n""" % (
|
|
|
|
record["name"],
|
2020-06-07 13:50:53 +00:00
|
|
|
"false" if record.get("published", True) else "true",
|
2020-06-04 01:28:52 +00:00
|
|
|
"[ %s ]" % ', '.join(topics),
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def yaml_topic(record):
|
|
|
|
""" YAML template for topic """
|
|
|
|
sessions = [ f"{s.slug}.md" for s in record["related_to_syllabus_sessions_topic"]]
|
|
|
|
return """---
|
2020-06-06 13:46:32 +00:00
|
|
|
title: >
|
|
|
|
%s
|
2020-06-07 13:50:53 +00:00
|
|
|
draft: %s
|
2020-06-04 01:28:52 +00:00
|
|
|
has_sessions: %s
|
|
|
|
---\n\n""" % (
|
|
|
|
record["name"],
|
2020-06-07 13:50:53 +00:00
|
|
|
"false" if record.get("published", True) else "true",
|
2020-06-04 01:28:52 +00:00
|
|
|
"[ %s ]" % ', '.join(sessions),
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def yaml_default(record):
|
|
|
|
""" YAML template default """
|
|
|
|
return """---
|
2020-06-06 13:46:32 +00:00
|
|
|
title: >
|
|
|
|
%s
|
2020-06-07 13:50:53 +00:00
|
|
|
draft: %s
|
2020-06-04 01:28:52 +00:00
|
|
|
---\n\n""" % (
|
2020-06-07 13:50:53 +00:00
|
|
|
record["name"],
|
|
|
|
"false" if record.get("published", True) else "true"
|
2020-06-04 01:28:52 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_fields(id, yaml_func=None):
|
|
|
|
""" Converts a page id into Markdown, metadata, and header """
|
2020-06-04 13:13:21 +00:00
|
|
|
global client
|
2020-06-04 01:28:52 +00:00
|
|
|
record = client.get_block(id)
|
|
|
|
markdown = get_record_text(record)
|
|
|
|
post = record.get_all_properties()
|
|
|
|
header = yaml_func(post)
|
|
|
|
|
|
|
|
return {
|
|
|
|
**post,
|
|
|
|
#"publish_date": post["publish_date"].start.isoformat(),
|
|
|
|
#"updated": post["updated"].isoformat(),
|
|
|
|
"markdown": markdown,
|
|
|
|
"header": header,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def notion_to_md(id, filepath=None, yaml_func=None):
|
|
|
|
""" Takes an id (usually a Notion page) and writes to file """
|
|
|
|
page_data = resolve_fields(id, yaml_func=yaml_func)
|
|
|
|
page_content = page_data["header"] + page_data["markdown"]
|
|
|
|
# print(page_data)
|
|
|
|
if filepath:
|
2020-06-04 13:27:32 +00:00
|
|
|
os.umask(0)
|
2020-06-08 13:06:07 +00:00
|
|
|
with open(os.open(filepath, os.O_CREAT | os.O_WRONLY, 0o777), 'w', encoding="utf-8") as f:
|
2020-06-05 02:33:30 +00:00
|
|
|
f.write(page_content)
|
2020-06-04 01:28:52 +00:00
|
|
|
return page_data
|
|
|
|
|
|
|
|
|
|
|
|
def rm_syllabus(content_dir):
|
|
|
|
""" Removes all the files that this script writes """
|
|
|
|
rmrf(os.path.join(content_dir, "_index.md"))
|
|
|
|
rmrf(os.path.join(content_dir, "topic"))
|
|
|
|
rmrf(os.path.join(content_dir, "session"))
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_syllabus_md(syllabi, syllabus_name, content_dir):
|
|
|
|
""" Grabs the files from Notion """
|
|
|
|
global client
|
|
|
|
rm_syllabus(content_dir)
|
|
|
|
|
|
|
|
# Load the Syllabus collection
|
|
|
|
cv = client.get_collection_view(syllabi)
|
|
|
|
|
|
|
|
for row in cv.collection.get_rows():
|
|
|
|
# This only builds one syllabus at a time
|
|
|
|
if row.title == syllabus_name:
|
|
|
|
print(row)
|
|
|
|
collection = client.get_collection(row.id)
|
|
|
|
print(row.collection.get_schema_properties())
|
|
|
|
# Home page
|
|
|
|
notion_to_md(row.id,
|
|
|
|
filepath=os.path.join(content_dir, "_index.md"),
|
|
|
|
yaml_func=yaml_home)
|
|
|
|
# handle topics
|
|
|
|
topics_dir = os.path.join(content_dir, "topic")
|
|
|
|
print("creating: ", topics_dir)
|
|
|
|
os.makedirs(topics_dir, exist_ok=False)
|
|
|
|
topics = row.get_property("c~x$")
|
|
|
|
sessions = []
|
|
|
|
for topic in topics:
|
|
|
|
# print("properties:", topic.get_all_properties())
|
|
|
|
# print("schema:", topic.related_to_syllabus_sessions_topic)
|
|
|
|
print(" - ", f"{topic.slug}.md")
|
|
|
|
notion_to_md(topic.id,
|
|
|
|
filepath=os.path.join(topics_dir, f"{topic.slug}.md"),
|
|
|
|
yaml_func=yaml_topic)
|
|
|
|
sessions.extend([s for s in topic.related_to_syllabus_sessions_topic])
|
|
|
|
# handle sessions
|
|
|
|
sessions_dir = os.path.join(content_dir, "session")
|
|
|
|
print("creating: ", sessions_dir)
|
|
|
|
os.makedirs(sessions_dir, exist_ok=False)
|
|
|
|
for session in sessions:
|
|
|
|
# print("properties:", session.get_all_properties())
|
|
|
|
print(" - ", f"{session.slug}.md")
|
|
|
|
notion_to_md(session.id,
|
|
|
|
filepath=os.path.join(sessions_dir, f"{session.slug}.md"),
|
|
|
|
yaml_func=yaml_default)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
|
# Parsing command line arguments
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
for v in vars:
|
|
|
|
parser.add_argument(f"--{v.lower()}")
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
# Load all variables from command line arguments or environment variables
|
|
|
|
for v in vars:
|
|
|
|
globals()[v.upper()] = get_value(v.lower(), vars[v])
|
|
|
|
|
|
|
|
# Obtain the `token_v2` value by inspecting your browser cookies on a logged-in session on Notion.so
|
|
|
|
try:
|
|
|
|
client = NotionClient(token_v2=TOKEN_V2)
|
|
|
|
except:
|
|
|
|
print("Notion connect didn't work using token: ", TOKEN_V2)
|
|
|
|
|
|
|
|
pwd = os.getcwd()
|
|
|
|
hugo_content_path = os.path.join(HUGO_SITE_PATH, 'content')
|
|
|
|
|
|
|
|
# Does the Hugo site exist ?
|
|
|
|
if not os.path.exists(hugo_content_path):
|
|
|
|
print(hugo_content_path, "doesn't exist. Make sure it is the location of a Hugo site.")
|
|
|
|
sys.exit()
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# Grab the files from Notion
|
|
|
|
fetch_syllabus_md(SYLLABUS_DB, SYLLABUS_TITLE, hugo_content_path)
|
|
|
|
|
|
|
|
# Now build the Hugo site
|
|
|
|
hugo(HUGO_SITE_PATH, WEBSITE_PATH, base_url=BASE_URL)
|
|
|
|
|