initial zotero rdf support...
This commit is contained in:
parent
aaf9f0aa66
commit
bbebb50dc8
|
@ -0,0 +1,194 @@
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"accorder/pkg/calibre"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/araddon/dateparse"
|
||||||
|
"github.com/beevik/etree"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ImportZoteroRDF(cmd *cobra.Command) {
|
||||||
|
|
||||||
|
zoteroRDFPath := CliFlagValue(cmd, "import-zotero")
|
||||||
|
doc := etree.NewDocument()
|
||||||
|
if err := doc.ReadFromFile(zoteroRDFPath); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
root := doc.SelectElement("rdf:RDF")
|
||||||
|
attachmentsIDs := map[string]bool{}
|
||||||
|
for _, attachmentNode := range root.FindElements("[name()='link:type']") {
|
||||||
|
var bookOpf calibre.BookOpfW
|
||||||
|
var zoteroItem ZoteroItem
|
||||||
|
|
||||||
|
attachmentID := attachmentNode.Parent().SelectAttr("rdf:about").Value
|
||||||
|
if attachmentNode.Text() == "text/html" || attachmentsIDs[attachmentID] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
bibliographyNode := root.FindElement(fmt.Sprintf("[@rdf:resource='%s']", attachmentID)).Parent().Copy()
|
||||||
|
|
||||||
|
newDoc := etree.NewDocument()
|
||||||
|
zoteroUnion := newDoc.CreateElement("zoteroItem")
|
||||||
|
|
||||||
|
attachmentsQuery := bibliographyNode.FindElements("[name()='link:link']")
|
||||||
|
itemAttachments := zoteroUnion.CreateElement("attachments")
|
||||||
|
mimeTypeMap := make(map[string]bool)
|
||||||
|
for _, attachment := range attachmentsQuery {
|
||||||
|
attachmentID := attachment.SelectAttr("rdf:resource").Value
|
||||||
|
zAttachment := root.FindElement(fmt.Sprintf("[@rdf:about='%s']", attachmentID))
|
||||||
|
mimeType := zAttachment.FindElement("[name()='link:type']").Text()
|
||||||
|
if mimeType != "text/html" {
|
||||||
|
if !mimeTypeMap[mimeType] {
|
||||||
|
attachmentsIDs[attachmentID] = true
|
||||||
|
filePath := zAttachment.FindElement("[name()='rdf:resource']").SelectAttr("rdf:resource").Value
|
||||||
|
attachment := itemAttachments.CreateElement("attachment")
|
||||||
|
attachment.SetText(filePath)
|
||||||
|
attachment.CreateAttr("mimeType", mimeType)
|
||||||
|
mimeTypeMap[mimeType] = true
|
||||||
|
} else {
|
||||||
|
fmt.Println("DUPLICATE mimeType:", mimeType, attachmentID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dateQuery := bibliographyNode.FindElement("[name()='dc:date']")
|
||||||
|
if dateQuery != nil {
|
||||||
|
dateElement := zoteroUnion.CreateElement("date")
|
||||||
|
date, err := dateparse.ParseAny(dateQuery.Text())
|
||||||
|
if err == nil {
|
||||||
|
formattedDate := date.Format("2006-01-02")
|
||||||
|
dateElement.CreateText(formattedDate)
|
||||||
|
|
||||||
|
bookOpf.Metadata.Published = formattedDate
|
||||||
|
} else {
|
||||||
|
newDateQuery := fmt.Sprintf("1 %s", dateQuery.Text())
|
||||||
|
newDate, err := dateparse.ParseAny(newDateQuery)
|
||||||
|
if err == nil {
|
||||||
|
newFormattedDate := newDate.Format("2006-01-02")
|
||||||
|
dateElement.CreateText(newFormattedDate)
|
||||||
|
bookOpf.Metadata.Published = newFormattedDate
|
||||||
|
} else {
|
||||||
|
lastChanceDate := dateQuery.Text()[len(dateQuery.Text())-4:]
|
||||||
|
year, err := strconv.Atoi(lastChanceDate)
|
||||||
|
if err == nil {
|
||||||
|
justYear := fmt.Sprintf("%d-01-01", year)
|
||||||
|
dateElement.CreateText(justYear)
|
||||||
|
bookOpf.Metadata.Published = justYear
|
||||||
|
} else {
|
||||||
|
fmt.Println("ERROR parsing date...", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
publisherQuery := bibliographyNode.FindElement("[name()='foaf:name']")
|
||||||
|
if publisherQuery != nil {
|
||||||
|
publisherElement := zoteroUnion.CreateElement("publisher")
|
||||||
|
publisher := publisherQuery.Text()
|
||||||
|
publisherElement.CreateText(publisher)
|
||||||
|
|
||||||
|
bookOpf.Metadata.Publisher = publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
authorsQuery := bibliographyNode.FindElements("[name()='foaf:Person']")
|
||||||
|
authors := zoteroUnion.CreateElement("authors")
|
||||||
|
for _, authorNode := range authorsQuery {
|
||||||
|
var firstName, surName string
|
||||||
|
|
||||||
|
firstNameNode := authorNode.FindElement("[name()='foaf:givenName']")
|
||||||
|
if firstNameNode != nil {
|
||||||
|
firstName = firstNameNode.Text() + " "
|
||||||
|
}
|
||||||
|
surNameNode := authorNode.FindElement("[name()='foaf:surname']")
|
||||||
|
if surNameNode != nil {
|
||||||
|
surName = surNameNode.Text() + " "
|
||||||
|
}
|
||||||
|
fullName := strings.TrimSuffix(fmt.Sprintf("%s%s", firstName, surName), " ")
|
||||||
|
authors.CreateElement("author").SetText(fullName)
|
||||||
|
|
||||||
|
bookOpf.Metadata.Creators = append(bookOpf.Metadata.Creators, calibre.Creator{
|
||||||
|
Role: "aut",
|
||||||
|
Name: fullName,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
titleQuery := bibliographyNode.FindElement("[name()='dc:title']")
|
||||||
|
if titleQuery != nil {
|
||||||
|
titleNode := zoteroUnion.CreateElement("title")
|
||||||
|
title := titleQuery.Text()
|
||||||
|
titleNode.CreateText(title)
|
||||||
|
|
||||||
|
bookOpf.Metadata.Title = title
|
||||||
|
}
|
||||||
|
|
||||||
|
descriptionQuery := bibliographyNode.FindElement("[name()='dcterms:abstract']")
|
||||||
|
if descriptionQuery != nil {
|
||||||
|
descriptionNode := zoteroUnion.CreateElement("description")
|
||||||
|
description := descriptionQuery.Text()
|
||||||
|
descriptionNode.CreateText(description)
|
||||||
|
|
||||||
|
bookOpf.Metadata.Description = description
|
||||||
|
}
|
||||||
|
|
||||||
|
// identifiers
|
||||||
|
identifiersQuery := bibliographyNode.FindElements("//[name()='dc:identifier'][not()]")
|
||||||
|
if len(identifiersQuery) > 0 {
|
||||||
|
for _, identifier := range identifiersQuery {
|
||||||
|
identifierLine := identifier.Text()
|
||||||
|
var id []string
|
||||||
|
if strings.Contains(identifierLine, " ") {
|
||||||
|
id = strings.Split(identifier.Text(), " ")
|
||||||
|
}
|
||||||
|
if len(id) > 1 && id[0] != "" {
|
||||||
|
fmt.Println(bookOpf.Metadata.Title, "TYPE:", id[0], "VALUE:", id[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b, err := newDoc.WriteToBytes()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := xml.Unmarshal(b, &zoteroItem); err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, a := range zoteroItem.Attachments.Attachment {
|
||||||
|
// fmt.Println(a.Path, a.MimeType)
|
||||||
|
_ = a
|
||||||
|
}
|
||||||
|
|
||||||
|
zi, _ := xml.MarshalIndent(zoteroItem, " ", " ")
|
||||||
|
_ = zi
|
||||||
|
// os.Stdout.Write(zi)
|
||||||
|
// fmt.Println("\n~+~ ~ ~ ~ ~")
|
||||||
|
|
||||||
|
bookOpf.Version = "2.0"
|
||||||
|
bookOpf.Xmlns = "http://www.idpf.org/2007/opf"
|
||||||
|
bookOpf.UniqueIdentifier = "uuid_id"
|
||||||
|
bookOpf.Metadata.DC = "http://purl.org/dc/elements/1.1/"
|
||||||
|
bookOpf.Metadata.OPF = "http://www.idpf.org/2007/opf"
|
||||||
|
|
||||||
|
bookOpf.Metadata.Identifiers = append(bookOpf.Metadata.Identifiers, calibre.Identifier{
|
||||||
|
Scheme: "calibre",
|
||||||
|
Id: "calibre_id",
|
||||||
|
Value: "-1",
|
||||||
|
})
|
||||||
|
|
||||||
|
bookOpfOutput, err := xml.MarshalIndent(bookOpf, " ", " ")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
_ = bookOpfOutput
|
||||||
|
// os.Stdout.Write(bookOpfOutput)
|
||||||
|
// fmt.Println("\n ~ ~ ~ ~ ~")
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue