initial zotero rdf support...

This commit is contained in:
Marcell Mars 2022-06-13 17:33:04 +02:00
parent cc2f69d598
commit 1b0b208dbc
1 changed files with 194 additions and 0 deletions

194
cmd/build_zotero_rdf.go Normal file
View File

@ -0,0 +1,194 @@
package cmd
import (
"accorder/pkg/calibre"
"encoding/xml"
"fmt"
"log"
"strconv"
"strings"
"github.com/araddon/dateparse"
"github.com/beevik/etree"
"github.com/spf13/cobra"
)
func ImportZoteroRDF(cmd *cobra.Command) {
zoteroRDFPath := CliFlagValue(cmd, "import-zotero")
doc := etree.NewDocument()
if err := doc.ReadFromFile(zoteroRDFPath); err != nil {
panic(err)
}
root := doc.SelectElement("rdf:RDF")
attachmentsIDs := map[string]bool{}
for _, attachmentNode := range root.FindElements("[name()='link:type']") {
var bookOpf calibre.BookOpfW
var zoteroItem ZoteroItem
attachmentID := attachmentNode.Parent().SelectAttr("rdf:about").Value
if attachmentNode.Text() == "text/html" || attachmentsIDs[attachmentID] {
continue
}
bibliographyNode := root.FindElement(fmt.Sprintf("[@rdf:resource='%s']", attachmentID)).Parent().Copy()
newDoc := etree.NewDocument()
zoteroUnion := newDoc.CreateElement("zoteroItem")
attachmentsQuery := bibliographyNode.FindElements("[name()='link:link']")
itemAttachments := zoteroUnion.CreateElement("attachments")
mimeTypeMap := make(map[string]bool)
for _, attachment := range attachmentsQuery {
attachmentID := attachment.SelectAttr("rdf:resource").Value
zAttachment := root.FindElement(fmt.Sprintf("[@rdf:about='%s']", attachmentID))
mimeType := zAttachment.FindElement("[name()='link:type']").Text()
if mimeType != "text/html" {
if !mimeTypeMap[mimeType] {
attachmentsIDs[attachmentID] = true
filePath := zAttachment.FindElement("[name()='rdf:resource']").SelectAttr("rdf:resource").Value
attachment := itemAttachments.CreateElement("attachment")
attachment.SetText(filePath)
attachment.CreateAttr("mimeType", mimeType)
mimeTypeMap[mimeType] = true
} else {
fmt.Println("DUPLICATE mimeType:", mimeType, attachmentID)
}
}
}
dateQuery := bibliographyNode.FindElement("[name()='dc:date']")
if dateQuery != nil {
dateElement := zoteroUnion.CreateElement("date")
date, err := dateparse.ParseAny(dateQuery.Text())
if err == nil {
formattedDate := date.Format("2006-01-02")
dateElement.CreateText(formattedDate)
bookOpf.Metadata.Published = formattedDate
} else {
newDateQuery := fmt.Sprintf("1 %s", dateQuery.Text())
newDate, err := dateparse.ParseAny(newDateQuery)
if err == nil {
newFormattedDate := newDate.Format("2006-01-02")
dateElement.CreateText(newFormattedDate)
bookOpf.Metadata.Published = newFormattedDate
} else {
lastChanceDate := dateQuery.Text()[len(dateQuery.Text())-4:]
year, err := strconv.Atoi(lastChanceDate)
if err == nil {
justYear := fmt.Sprintf("%d-01-01", year)
dateElement.CreateText(justYear)
bookOpf.Metadata.Published = justYear
} else {
fmt.Println("ERROR parsing date...", err)
}
}
}
}
publisherQuery := bibliographyNode.FindElement("[name()='foaf:name']")
if publisherQuery != nil {
publisherElement := zoteroUnion.CreateElement("publisher")
publisher := publisherQuery.Text()
publisherElement.CreateText(publisher)
bookOpf.Metadata.Publisher = publisher
}
authorsQuery := bibliographyNode.FindElements("[name()='foaf:Person']")
authors := zoteroUnion.CreateElement("authors")
for _, authorNode := range authorsQuery {
var firstName, surName string
firstNameNode := authorNode.FindElement("[name()='foaf:givenName']")
if firstNameNode != nil {
firstName = firstNameNode.Text() + " "
}
surNameNode := authorNode.FindElement("[name()='foaf:surname']")
if surNameNode != nil {
surName = surNameNode.Text() + " "
}
fullName := strings.TrimSuffix(fmt.Sprintf("%s%s", firstName, surName), " ")
authors.CreateElement("author").SetText(fullName)
bookOpf.Metadata.Creators = append(bookOpf.Metadata.Creators, calibre.Creator{
Role: "aut",
Name: fullName,
})
}
titleQuery := bibliographyNode.FindElement("[name()='dc:title']")
if titleQuery != nil {
titleNode := zoteroUnion.CreateElement("title")
title := titleQuery.Text()
titleNode.CreateText(title)
bookOpf.Metadata.Title = title
}
descriptionQuery := bibliographyNode.FindElement("[name()='dcterms:abstract']")
if descriptionQuery != nil {
descriptionNode := zoteroUnion.CreateElement("description")
description := descriptionQuery.Text()
descriptionNode.CreateText(description)
bookOpf.Metadata.Description = description
}
// identifiers
identifiersQuery := bibliographyNode.FindElements("//[name()='dc:identifier'][not()]")
if len(identifiersQuery) > 0 {
for _, identifier := range identifiersQuery {
identifierLine := identifier.Text()
var id []string
if strings.Contains(identifierLine, " ") {
id = strings.Split(identifier.Text(), " ")
}
if len(id) > 1 && id[0] != "" {
fmt.Println(bookOpf.Metadata.Title, "TYPE:", id[0], "VALUE:", id[1])
}
}
}
b, err := newDoc.WriteToBytes()
if err != nil {
log.Fatal(err)
}
if err := xml.Unmarshal(b, &zoteroItem); err != nil {
log.Fatalln(err)
}
for _, a := range zoteroItem.Attachments.Attachment {
// fmt.Println(a.Path, a.MimeType)
_ = a
}
zi, _ := xml.MarshalIndent(zoteroItem, " ", " ")
_ = zi
// os.Stdout.Write(zi)
// fmt.Println("\n~+~ ~ ~ ~ ~")
bookOpf.Version = "2.0"
bookOpf.Xmlns = "http://www.idpf.org/2007/opf"
bookOpf.UniqueIdentifier = "uuid_id"
bookOpf.Metadata.DC = "http://purl.org/dc/elements/1.1/"
bookOpf.Metadata.OPF = "http://www.idpf.org/2007/opf"
bookOpf.Metadata.Identifiers = append(bookOpf.Metadata.Identifiers, calibre.Identifier{
Scheme: "calibre",
Id: "calibre_id",
Value: "-1",
})
bookOpfOutput, err := xml.MarshalIndent(bookOpf, " ", " ")
if err != nil {
log.Fatalln(err)
}
_ = bookOpfOutput
// os.Stdout.Write(bookOpfOutput)
// fmt.Println("\n ~ ~ ~ ~ ~")
}
}