package cmd import ( "accorder/pkg/calibre" "encoding/xml" "fmt" "log" "strconv" "strings" "github.com/araddon/dateparse" "github.com/beevik/etree" "github.com/spf13/cobra" ) func ImportZoteroRDF(cmd *cobra.Command) { zoteroRDFPath := CliFlagValue(cmd, "import-zotero") doc := etree.NewDocument() if err := doc.ReadFromFile(zoteroRDFPath); err != nil { panic(err) } root := doc.SelectElement("rdf:RDF") attachmentsIDs := map[string]bool{} for _, attachmentNode := range root.FindElements("[name()='link:type']") { var bookOpf calibre.BookOpfW var zoteroItem ZoteroItem attachmentID := attachmentNode.Parent().SelectAttr("rdf:about").Value if attachmentNode.Text() == "text/html" || attachmentsIDs[attachmentID] { continue } bibliographyNode := root.FindElement(fmt.Sprintf("[@rdf:resource='%s']", attachmentID)).Parent().Copy() newDoc := etree.NewDocument() zoteroUnion := newDoc.CreateElement("zoteroItem") attachmentsQuery := bibliographyNode.FindElements("[name()='link:link']") itemAttachments := zoteroUnion.CreateElement("attachments") mimeTypeMap := make(map[string]bool) for _, attachment := range attachmentsQuery { attachmentID := attachment.SelectAttr("rdf:resource").Value zAttachment := root.FindElement(fmt.Sprintf("[@rdf:about='%s']", attachmentID)) mimeType := zAttachment.FindElement("[name()='link:type']").Text() if mimeType != "text/html" { if !mimeTypeMap[mimeType] { attachmentsIDs[attachmentID] = true filePath := zAttachment.FindElement("[name()='rdf:resource']").SelectAttr("rdf:resource").Value attachment := itemAttachments.CreateElement("attachment") attachment.SetText(filePath) attachment.CreateAttr("mimeType", mimeType) mimeTypeMap[mimeType] = true } else { fmt.Println("DUPLICATE mimeType:", mimeType, attachmentID) } } } dateQuery := bibliographyNode.FindElement("[name()='dc:date']") if dateQuery != nil { dateElement := zoteroUnion.CreateElement("date") date, err := dateparse.ParseAny(dateQuery.Text()) if err == nil { formattedDate := date.Format("2006-01-02") dateElement.CreateText(formattedDate) bookOpf.Metadata.Published = formattedDate } else { newDateQuery := fmt.Sprintf("1 %s", dateQuery.Text()) newDate, err := dateparse.ParseAny(newDateQuery) if err == nil { newFormattedDate := newDate.Format("2006-01-02") dateElement.CreateText(newFormattedDate) bookOpf.Metadata.Published = newFormattedDate } else { lastChanceDate := dateQuery.Text()[len(dateQuery.Text())-4:] year, err := strconv.Atoi(lastChanceDate) if err == nil { justYear := fmt.Sprintf("%d-01-01", year) dateElement.CreateText(justYear) bookOpf.Metadata.Published = justYear } else { fmt.Println("ERROR parsing date...", err) } } } } publisherQuery := bibliographyNode.FindElement("[name()='foaf:name']") if publisherQuery != nil { publisherElement := zoteroUnion.CreateElement("publisher") publisher := publisherQuery.Text() publisherElement.CreateText(publisher) bookOpf.Metadata.Publisher = publisher } authorsQuery := bibliographyNode.FindElements("[name()='foaf:Person']") authors := zoteroUnion.CreateElement("authors") for _, authorNode := range authorsQuery { var firstName, surName string firstNameNode := authorNode.FindElement("[name()='foaf:givenName']") if firstNameNode != nil { firstName = firstNameNode.Text() + " " } surNameNode := authorNode.FindElement("[name()='foaf:surname']") if surNameNode != nil { surName = surNameNode.Text() + " " } fullName := strings.TrimSuffix(fmt.Sprintf("%s%s", firstName, surName), " ") authors.CreateElement("author").SetText(fullName) bookOpf.Metadata.Creators = append(bookOpf.Metadata.Creators, calibre.Creator{ Role: "aut", Name: fullName, }) } titleQuery := bibliographyNode.FindElement("[name()='dc:title']") if titleQuery != nil { titleNode := zoteroUnion.CreateElement("title") title := titleQuery.Text() titleNode.CreateText(title) bookOpf.Metadata.Title = title } descriptionQuery := bibliographyNode.FindElement("[name()='dcterms:abstract']") if descriptionQuery != nil { descriptionNode := zoteroUnion.CreateElement("description") description := descriptionQuery.Text() descriptionNode.CreateText(description) bookOpf.Metadata.Description = description } // identifiers identifiersQuery := bibliographyNode.FindElements("//[name()='dc:identifier'][not()]") if len(identifiersQuery) > 0 { for _, identifier := range identifiersQuery { identifierLine := identifier.Text() var id []string if strings.Contains(identifierLine, " ") { id = strings.Split(identifier.Text(), " ") } if len(id) > 1 && id[0] != "" { fmt.Println(bookOpf.Metadata.Title, "TYPE:", id[0], "VALUE:", id[1]) } } } b, err := newDoc.WriteToBytes() if err != nil { log.Fatal(err) } if err := xml.Unmarshal(b, &zoteroItem); err != nil { log.Fatalln(err) } for _, a := range zoteroItem.Attachments.Attachment { // fmt.Println(a.Path, a.MimeType) _ = a } zi, _ := xml.MarshalIndent(zoteroItem, " ", " ") _ = zi // os.Stdout.Write(zi) // fmt.Println("\n~+~ ~ ~ ~ ~") bookOpf.Version = "2.0" bookOpf.Xmlns = "http://www.idpf.org/2007/opf" bookOpf.UniqueIdentifier = "uuid_id" bookOpf.Metadata.DC = "http://purl.org/dc/elements/1.1/" bookOpf.Metadata.OPF = "http://www.idpf.org/2007/opf" bookOpf.Metadata.Identifiers = append(bookOpf.Metadata.Identifiers, calibre.Identifier{ Scheme: "calibre", Id: "calibre_id", Value: "-1", }) bookOpfOutput, err := xml.MarshalIndent(bookOpf, " ", " ") if err != nil { log.Fatalln(err) } _ = bookOpfOutput // os.Stdout.Write(bookOpfOutput) // fmt.Println("\n ~ ~ ~ ~ ~") } }