From 1b0b208dbc41ea139f897ba2d7e56eb58226c564 Mon Sep 17 00:00:00 2001 From: Marcell Mars Date: Mon, 13 Jun 2022 17:33:04 +0200 Subject: [PATCH] initial zotero rdf support... --- cmd/build_zotero_rdf.go | 194 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 cmd/build_zotero_rdf.go diff --git a/cmd/build_zotero_rdf.go b/cmd/build_zotero_rdf.go new file mode 100644 index 0000000..e0b39de --- /dev/null +++ b/cmd/build_zotero_rdf.go @@ -0,0 +1,194 @@ +package cmd + +import ( + "accorder/pkg/calibre" + "encoding/xml" + "fmt" + "log" + "strconv" + "strings" + + "github.com/araddon/dateparse" + "github.com/beevik/etree" + "github.com/spf13/cobra" +) + +func ImportZoteroRDF(cmd *cobra.Command) { + + zoteroRDFPath := CliFlagValue(cmd, "import-zotero") + doc := etree.NewDocument() + if err := doc.ReadFromFile(zoteroRDFPath); err != nil { + panic(err) + } + root := doc.SelectElement("rdf:RDF") + attachmentsIDs := map[string]bool{} + for _, attachmentNode := range root.FindElements("[name()='link:type']") { + var bookOpf calibre.BookOpfW + var zoteroItem ZoteroItem + + attachmentID := attachmentNode.Parent().SelectAttr("rdf:about").Value + if attachmentNode.Text() == "text/html" || attachmentsIDs[attachmentID] { + continue + } + + bibliographyNode := root.FindElement(fmt.Sprintf("[@rdf:resource='%s']", attachmentID)).Parent().Copy() + + newDoc := etree.NewDocument() + zoteroUnion := newDoc.CreateElement("zoteroItem") + + attachmentsQuery := bibliographyNode.FindElements("[name()='link:link']") + itemAttachments := zoteroUnion.CreateElement("attachments") + mimeTypeMap := make(map[string]bool) + for _, attachment := range attachmentsQuery { + attachmentID := attachment.SelectAttr("rdf:resource").Value + zAttachment := root.FindElement(fmt.Sprintf("[@rdf:about='%s']", attachmentID)) + mimeType := zAttachment.FindElement("[name()='link:type']").Text() + if mimeType != "text/html" { + if !mimeTypeMap[mimeType] { + attachmentsIDs[attachmentID] = true + filePath := zAttachment.FindElement("[name()='rdf:resource']").SelectAttr("rdf:resource").Value + attachment := itemAttachments.CreateElement("attachment") + attachment.SetText(filePath) + attachment.CreateAttr("mimeType", mimeType) + mimeTypeMap[mimeType] = true + } else { + fmt.Println("DUPLICATE mimeType:", mimeType, attachmentID) + } + } + } + + dateQuery := bibliographyNode.FindElement("[name()='dc:date']") + if dateQuery != nil { + dateElement := zoteroUnion.CreateElement("date") + date, err := dateparse.ParseAny(dateQuery.Text()) + if err == nil { + formattedDate := date.Format("2006-01-02") + dateElement.CreateText(formattedDate) + + bookOpf.Metadata.Published = formattedDate + } else { + newDateQuery := fmt.Sprintf("1 %s", dateQuery.Text()) + newDate, err := dateparse.ParseAny(newDateQuery) + if err == nil { + newFormattedDate := newDate.Format("2006-01-02") + dateElement.CreateText(newFormattedDate) + bookOpf.Metadata.Published = newFormattedDate + } else { + lastChanceDate := dateQuery.Text()[len(dateQuery.Text())-4:] + year, err := strconv.Atoi(lastChanceDate) + if err == nil { + justYear := fmt.Sprintf("%d-01-01", year) + dateElement.CreateText(justYear) + bookOpf.Metadata.Published = justYear + } else { + fmt.Println("ERROR parsing date...", err) + } + } + } + + } + + publisherQuery := bibliographyNode.FindElement("[name()='foaf:name']") + if publisherQuery != nil { + publisherElement := zoteroUnion.CreateElement("publisher") + publisher := publisherQuery.Text() + publisherElement.CreateText(publisher) + + bookOpf.Metadata.Publisher = publisher + } + + authorsQuery := bibliographyNode.FindElements("[name()='foaf:Person']") + authors := zoteroUnion.CreateElement("authors") + for _, authorNode := range authorsQuery { + var firstName, surName string + + firstNameNode := authorNode.FindElement("[name()='foaf:givenName']") + if firstNameNode != nil { + firstName = firstNameNode.Text() + " " + } + surNameNode := authorNode.FindElement("[name()='foaf:surname']") + if surNameNode != nil { + surName = surNameNode.Text() + " " + } + fullName := strings.TrimSuffix(fmt.Sprintf("%s%s", firstName, surName), " ") + authors.CreateElement("author").SetText(fullName) + + bookOpf.Metadata.Creators = append(bookOpf.Metadata.Creators, calibre.Creator{ + Role: "aut", + Name: fullName, + }) + } + + titleQuery := bibliographyNode.FindElement("[name()='dc:title']") + if titleQuery != nil { + titleNode := zoteroUnion.CreateElement("title") + title := titleQuery.Text() + titleNode.CreateText(title) + + bookOpf.Metadata.Title = title + } + + descriptionQuery := bibliographyNode.FindElement("[name()='dcterms:abstract']") + if descriptionQuery != nil { + descriptionNode := zoteroUnion.CreateElement("description") + description := descriptionQuery.Text() + descriptionNode.CreateText(description) + + bookOpf.Metadata.Description = description + } + + // identifiers + identifiersQuery := bibliographyNode.FindElements("//[name()='dc:identifier'][not()]") + if len(identifiersQuery) > 0 { + for _, identifier := range identifiersQuery { + identifierLine := identifier.Text() + var id []string + if strings.Contains(identifierLine, " ") { + id = strings.Split(identifier.Text(), " ") + } + if len(id) > 1 && id[0] != "" { + fmt.Println(bookOpf.Metadata.Title, "TYPE:", id[0], "VALUE:", id[1]) + } + } + } + + b, err := newDoc.WriteToBytes() + if err != nil { + log.Fatal(err) + } + + if err := xml.Unmarshal(b, &zoteroItem); err != nil { + log.Fatalln(err) + } + + for _, a := range zoteroItem.Attachments.Attachment { + // fmt.Println(a.Path, a.MimeType) + _ = a + } + + zi, _ := xml.MarshalIndent(zoteroItem, " ", " ") + _ = zi + // os.Stdout.Write(zi) + // fmt.Println("\n~+~ ~ ~ ~ ~") + + bookOpf.Version = "2.0" + bookOpf.Xmlns = "http://www.idpf.org/2007/opf" + bookOpf.UniqueIdentifier = "uuid_id" + bookOpf.Metadata.DC = "http://purl.org/dc/elements/1.1/" + bookOpf.Metadata.OPF = "http://www.idpf.org/2007/opf" + + bookOpf.Metadata.Identifiers = append(bookOpf.Metadata.Identifiers, calibre.Identifier{ + Scheme: "calibre", + Id: "calibre_id", + Value: "-1", + }) + + bookOpfOutput, err := xml.MarshalIndent(bookOpf, " ", " ") + if err != nil { + log.Fatalln(err) + } + _ = bookOpfOutput + // os.Stdout.Write(bookOpfOutput) + // fmt.Println("\n ~ ~ ~ ~ ~") + } +}