initial zotero rdf support...
This commit is contained in:
parent
cc2f69d598
commit
1b0b208dbc
|
@ -0,0 +1,194 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"accorder/pkg/calibre"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/araddon/dateparse"
|
||||
"github.com/beevik/etree"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
func ImportZoteroRDF(cmd *cobra.Command) {
|
||||
|
||||
zoteroRDFPath := CliFlagValue(cmd, "import-zotero")
|
||||
doc := etree.NewDocument()
|
||||
if err := doc.ReadFromFile(zoteroRDFPath); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
root := doc.SelectElement("rdf:RDF")
|
||||
attachmentsIDs := map[string]bool{}
|
||||
for _, attachmentNode := range root.FindElements("[name()='link:type']") {
|
||||
var bookOpf calibre.BookOpfW
|
||||
var zoteroItem ZoteroItem
|
||||
|
||||
attachmentID := attachmentNode.Parent().SelectAttr("rdf:about").Value
|
||||
if attachmentNode.Text() == "text/html" || attachmentsIDs[attachmentID] {
|
||||
continue
|
||||
}
|
||||
|
||||
bibliographyNode := root.FindElement(fmt.Sprintf("[@rdf:resource='%s']", attachmentID)).Parent().Copy()
|
||||
|
||||
newDoc := etree.NewDocument()
|
||||
zoteroUnion := newDoc.CreateElement("zoteroItem")
|
||||
|
||||
attachmentsQuery := bibliographyNode.FindElements("[name()='link:link']")
|
||||
itemAttachments := zoteroUnion.CreateElement("attachments")
|
||||
mimeTypeMap := make(map[string]bool)
|
||||
for _, attachment := range attachmentsQuery {
|
||||
attachmentID := attachment.SelectAttr("rdf:resource").Value
|
||||
zAttachment := root.FindElement(fmt.Sprintf("[@rdf:about='%s']", attachmentID))
|
||||
mimeType := zAttachment.FindElement("[name()='link:type']").Text()
|
||||
if mimeType != "text/html" {
|
||||
if !mimeTypeMap[mimeType] {
|
||||
attachmentsIDs[attachmentID] = true
|
||||
filePath := zAttachment.FindElement("[name()='rdf:resource']").SelectAttr("rdf:resource").Value
|
||||
attachment := itemAttachments.CreateElement("attachment")
|
||||
attachment.SetText(filePath)
|
||||
attachment.CreateAttr("mimeType", mimeType)
|
||||
mimeTypeMap[mimeType] = true
|
||||
} else {
|
||||
fmt.Println("DUPLICATE mimeType:", mimeType, attachmentID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dateQuery := bibliographyNode.FindElement("[name()='dc:date']")
|
||||
if dateQuery != nil {
|
||||
dateElement := zoteroUnion.CreateElement("date")
|
||||
date, err := dateparse.ParseAny(dateQuery.Text())
|
||||
if err == nil {
|
||||
formattedDate := date.Format("2006-01-02")
|
||||
dateElement.CreateText(formattedDate)
|
||||
|
||||
bookOpf.Metadata.Published = formattedDate
|
||||
} else {
|
||||
newDateQuery := fmt.Sprintf("1 %s", dateQuery.Text())
|
||||
newDate, err := dateparse.ParseAny(newDateQuery)
|
||||
if err == nil {
|
||||
newFormattedDate := newDate.Format("2006-01-02")
|
||||
dateElement.CreateText(newFormattedDate)
|
||||
bookOpf.Metadata.Published = newFormattedDate
|
||||
} else {
|
||||
lastChanceDate := dateQuery.Text()[len(dateQuery.Text())-4:]
|
||||
year, err := strconv.Atoi(lastChanceDate)
|
||||
if err == nil {
|
||||
justYear := fmt.Sprintf("%d-01-01", year)
|
||||
dateElement.CreateText(justYear)
|
||||
bookOpf.Metadata.Published = justYear
|
||||
} else {
|
||||
fmt.Println("ERROR parsing date...", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
publisherQuery := bibliographyNode.FindElement("[name()='foaf:name']")
|
||||
if publisherQuery != nil {
|
||||
publisherElement := zoteroUnion.CreateElement("publisher")
|
||||
publisher := publisherQuery.Text()
|
||||
publisherElement.CreateText(publisher)
|
||||
|
||||
bookOpf.Metadata.Publisher = publisher
|
||||
}
|
||||
|
||||
authorsQuery := bibliographyNode.FindElements("[name()='foaf:Person']")
|
||||
authors := zoteroUnion.CreateElement("authors")
|
||||
for _, authorNode := range authorsQuery {
|
||||
var firstName, surName string
|
||||
|
||||
firstNameNode := authorNode.FindElement("[name()='foaf:givenName']")
|
||||
if firstNameNode != nil {
|
||||
firstName = firstNameNode.Text() + " "
|
||||
}
|
||||
surNameNode := authorNode.FindElement("[name()='foaf:surname']")
|
||||
if surNameNode != nil {
|
||||
surName = surNameNode.Text() + " "
|
||||
}
|
||||
fullName := strings.TrimSuffix(fmt.Sprintf("%s%s", firstName, surName), " ")
|
||||
authors.CreateElement("author").SetText(fullName)
|
||||
|
||||
bookOpf.Metadata.Creators = append(bookOpf.Metadata.Creators, calibre.Creator{
|
||||
Role: "aut",
|
||||
Name: fullName,
|
||||
})
|
||||
}
|
||||
|
||||
titleQuery := bibliographyNode.FindElement("[name()='dc:title']")
|
||||
if titleQuery != nil {
|
||||
titleNode := zoteroUnion.CreateElement("title")
|
||||
title := titleQuery.Text()
|
||||
titleNode.CreateText(title)
|
||||
|
||||
bookOpf.Metadata.Title = title
|
||||
}
|
||||
|
||||
descriptionQuery := bibliographyNode.FindElement("[name()='dcterms:abstract']")
|
||||
if descriptionQuery != nil {
|
||||
descriptionNode := zoteroUnion.CreateElement("description")
|
||||
description := descriptionQuery.Text()
|
||||
descriptionNode.CreateText(description)
|
||||
|
||||
bookOpf.Metadata.Description = description
|
||||
}
|
||||
|
||||
// identifiers
|
||||
identifiersQuery := bibliographyNode.FindElements("//[name()='dc:identifier'][not()]")
|
||||
if len(identifiersQuery) > 0 {
|
||||
for _, identifier := range identifiersQuery {
|
||||
identifierLine := identifier.Text()
|
||||
var id []string
|
||||
if strings.Contains(identifierLine, " ") {
|
||||
id = strings.Split(identifier.Text(), " ")
|
||||
}
|
||||
if len(id) > 1 && id[0] != "" {
|
||||
fmt.Println(bookOpf.Metadata.Title, "TYPE:", id[0], "VALUE:", id[1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
b, err := newDoc.WriteToBytes()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
if err := xml.Unmarshal(b, &zoteroItem); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
for _, a := range zoteroItem.Attachments.Attachment {
|
||||
// fmt.Println(a.Path, a.MimeType)
|
||||
_ = a
|
||||
}
|
||||
|
||||
zi, _ := xml.MarshalIndent(zoteroItem, " ", " ")
|
||||
_ = zi
|
||||
// os.Stdout.Write(zi)
|
||||
// fmt.Println("\n~+~ ~ ~ ~ ~")
|
||||
|
||||
bookOpf.Version = "2.0"
|
||||
bookOpf.Xmlns = "http://www.idpf.org/2007/opf"
|
||||
bookOpf.UniqueIdentifier = "uuid_id"
|
||||
bookOpf.Metadata.DC = "http://purl.org/dc/elements/1.1/"
|
||||
bookOpf.Metadata.OPF = "http://www.idpf.org/2007/opf"
|
||||
|
||||
bookOpf.Metadata.Identifiers = append(bookOpf.Metadata.Identifiers, calibre.Identifier{
|
||||
Scheme: "calibre",
|
||||
Id: "calibre_id",
|
||||
Value: "-1",
|
||||
})
|
||||
|
||||
bookOpfOutput, err := xml.MarshalIndent(bookOpf, " ", " ")
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
_ = bookOpfOutput
|
||||
// os.Stdout.Write(bookOpfOutput)
|
||||
// fmt.Println("\n ~ ~ ~ ~ ~")
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue