mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Make output OPF pretty and fix various setting bugs
This commit is contained in:
parent
a1e74e3531
commit
2c5802676d
@ -18,6 +18,7 @@ import sys, re, os, glob
|
|||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
import xml.dom.minidom as dom
|
import xml.dom.minidom as dom
|
||||||
|
from itertools import repeat
|
||||||
|
|
||||||
from libprs500.ebooks.metadata import MetaInformation
|
from libprs500.ebooks.metadata import MetaInformation
|
||||||
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
|
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
|
||||||
@ -193,16 +194,19 @@ class OPF(MetaInformation):
|
|||||||
rating = standard_field('rating')
|
rating = standard_field('rating')
|
||||||
tags = standard_field('tags')
|
tags = standard_field('tags')
|
||||||
|
|
||||||
|
HEADER = '''\
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE package
|
||||||
|
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.2 Package//EN"
|
||||||
|
"http://openebook.org/dtds/oeb-1.2/oebpkg12.dtd">
|
||||||
|
'''
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
raise NotImplementedError('Abstract base class')
|
raise NotImplementedError('Abstract base class')
|
||||||
|
|
||||||
def _initialize(self):
|
def _initialize(self):
|
||||||
if not hasattr(self, 'soup'):
|
if not hasattr(self, 'soup'):
|
||||||
self.soup = BeautifulStoneSoup(u'''\
|
self.soup = BeautifulStoneSoup(u'''\
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
%s
|
||||||
<!DOCTYPE package
|
|
||||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.2 Package//EN"
|
|
||||||
"http://openebook.org/dtds/oeb-1.2/oebpkg12.dtd">
|
|
||||||
<package unique-identifier="libprs_id">
|
<package unique-identifier="libprs_id">
|
||||||
<metadata>
|
<metadata>
|
||||||
<dc-metadata
|
<dc-metadata
|
||||||
@ -210,7 +214,7 @@ class OPF(MetaInformation):
|
|||||||
xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/" />
|
xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/" />
|
||||||
</metadata>
|
</metadata>
|
||||||
</package>
|
</package>
|
||||||
''')
|
'''%self.HEADER)
|
||||||
|
|
||||||
def _commit(self, doc):
|
def _commit(self, doc):
|
||||||
self.soup = BeautifulStoneSoup(doc.toxml('utf-8'), fromEncoding='utf-8')
|
self.soup = BeautifulStoneSoup(doc.toxml('utf-8'), fromEncoding='utf-8')
|
||||||
@ -297,8 +301,8 @@ class OPF(MetaInformation):
|
|||||||
def set_authors(self, authors):
|
def set_authors(self, authors):
|
||||||
if not authors:
|
if not authors:
|
||||||
authors = ['Unknown']
|
authors = ['Unknown']
|
||||||
attrs = [[('role', 'aut')] for a in authors]
|
attrs = list(repeat([('role', 'aut')], len(authors)))
|
||||||
self._set_metadata_element('dc:Creator', authors, attrs)
|
self._set_metadata_element('dc:creator', authors, attrs)
|
||||||
|
|
||||||
def get_author_sort(self):
|
def get_author_sort(self):
|
||||||
creators = self.soup.package.metadata.findAll('dc:creator')
|
creators = self.soup.package.metadata.findAll('dc:creator')
|
||||||
@ -319,7 +323,7 @@ class OPF(MetaInformation):
|
|||||||
self.set_authors([])
|
self.set_authors([])
|
||||||
doc = dom.parseString(self.soup.__str__('UTF-8'))
|
doc = dom.parseString(self.soup.__str__('UTF-8'))
|
||||||
package = doc.documentElement
|
package = doc.documentElement
|
||||||
aut = package.getElementsByTagName('dc:Creator')[0]
|
aut = package.getElementsByTagName('dc:creator')[0]
|
||||||
aut.setAttribute('file-as', aus)
|
aut.setAttribute('file-as', aus)
|
||||||
self._commit(doc)
|
self._commit(doc)
|
||||||
|
|
||||||
@ -338,7 +342,7 @@ class OPF(MetaInformation):
|
|||||||
self.title = None
|
self.title = None
|
||||||
doc = dom.parseString(self.soup.__str__('UTF-8'))
|
doc = dom.parseString(self.soup.__str__('UTF-8'))
|
||||||
package = doc.documentElement
|
package = doc.documentElement
|
||||||
tit = package.getElementsByTagName('dc:Title')[0]
|
tit = package.getElementsByTagName('dc:title')[0]
|
||||||
tit.setAttribute('file-as', title_sort)
|
tit.setAttribute('file-as', title_sort)
|
||||||
self._commit(doc)
|
self._commit(doc)
|
||||||
|
|
||||||
@ -351,7 +355,7 @@ class OPF(MetaInformation):
|
|||||||
def set_comments(self, comments):
|
def set_comments(self, comments):
|
||||||
if not comments:
|
if not comments:
|
||||||
comments = ''
|
comments = ''
|
||||||
self._set_metadata_element('dc:Description', comments)
|
self._set_metadata_element('dc:description', comments)
|
||||||
|
|
||||||
def get_uid(self):
|
def get_uid(self):
|
||||||
package = self.soup.find('package')
|
package = self.soup.find('package')
|
||||||
@ -371,7 +375,7 @@ class OPF(MetaInformation):
|
|||||||
def set_category(self, category):
|
def set_category(self, category):
|
||||||
if not category:
|
if not category:
|
||||||
category = ''
|
category = ''
|
||||||
self._set_metadata_element('dc:Type', category)
|
self._set_metadata_element('dc:type', category)
|
||||||
|
|
||||||
def get_publisher(self):
|
def get_publisher(self):
|
||||||
publisher = self.soup.find('dc:publisher')
|
publisher = self.soup.find('dc:publisher')
|
||||||
@ -382,7 +386,7 @@ class OPF(MetaInformation):
|
|||||||
def set_publisher(self, category):
|
def set_publisher(self, category):
|
||||||
if not category:
|
if not category:
|
||||||
category = 'Unknown'
|
category = 'Unknown'
|
||||||
self._set_metadata_element('dc:Publisher', category)
|
self._set_metadata_element('dc:publisher', category)
|
||||||
|
|
||||||
|
|
||||||
def get_isbn(self):
|
def get_isbn(self):
|
||||||
@ -396,7 +400,7 @@ class OPF(MetaInformation):
|
|||||||
|
|
||||||
def set_isbn(self, isbn):
|
def set_isbn(self, isbn):
|
||||||
if isbn:
|
if isbn:
|
||||||
self._set_metadata_element('dc:Identifier', isbn, [('scheme', 'ISBN')],
|
self._set_metadata_element('dc:identifier', isbn, [('scheme', 'ISBN')],
|
||||||
replace=True)
|
replace=True)
|
||||||
|
|
||||||
def get_libprs_id(self):
|
def get_libprs_id(self):
|
||||||
@ -407,7 +411,7 @@ class OPF(MetaInformation):
|
|||||||
|
|
||||||
def set_libprs_id(self, val):
|
def set_libprs_id(self, val):
|
||||||
if val:
|
if val:
|
||||||
self._set_metadata_element('dc:Identifier', str(val), [('scheme', 'libprs'), ('id', 'libprs_id')],
|
self._set_metadata_element('dc:identifier', str(val), [('scheme', 'libprs'), ('id', 'libprs_id')],
|
||||||
replace=True)
|
replace=True)
|
||||||
|
|
||||||
def get_cover(self):
|
def get_cover(self):
|
||||||
@ -509,15 +513,40 @@ class OPF(MetaInformation):
|
|||||||
return [unicode(a).strip() for a in ans]
|
return [unicode(a).strip() for a in ans]
|
||||||
|
|
||||||
def set_tags(self, tags):
|
def set_tags(self, tags):
|
||||||
self._set_metadata_element('dc:Subject', tags)
|
self._set_metadata_element('dc:subject', tags)
|
||||||
|
|
||||||
def write(self, stream):
|
def write(self, stream):
|
||||||
src = unicode(self.soup)
|
from lxml import etree
|
||||||
src = re.sub(r'>\s*</item(ref)*>', ' />\n', src)
|
root = etree.fromstring(unicode(self.soup))
|
||||||
src = re.sub(r'<manifest><', '<manifest>\n<', src)
|
root.text = '\n%4s'%' '
|
||||||
src = re.sub(r'<spine><', '<spine>\n<', src)
|
for child in root:
|
||||||
src = re.sub(r'^<item', ' <item', src)
|
child.text = '\n%8s'%' '
|
||||||
stream.write(src.encode('utf-8')+'\n')
|
child.tail = '\n%4s'%' ' if child is not root[-1] else '\n'
|
||||||
|
for grandchild in child:
|
||||||
|
grandchild.tail = '\n%8s'%' ' if grandchild is not child[-1] else '\n%4s'%' '
|
||||||
|
|
||||||
|
metadata = root.find('metadata')
|
||||||
|
if metadata is not None:
|
||||||
|
for parent in ['dc-metadata', 'x-metadata']:
|
||||||
|
parent = metadata.find(parent)
|
||||||
|
if parent is None:
|
||||||
|
continue
|
||||||
|
parent.text = '\n%12s'%' '
|
||||||
|
for child in parent:
|
||||||
|
child.tail = '\n%8s'%' ' if child is parent[-1] else '\n%12s'%' '
|
||||||
|
|
||||||
|
def fix_self_closing_tags(el):
|
||||||
|
''' Makes tags that have only whitespace content self closing '''
|
||||||
|
if len(el) == 0 and (el.text is None or el.text.strip() == ''):
|
||||||
|
el.text = None
|
||||||
|
for child in el:
|
||||||
|
fix_self_closing_tags(child)
|
||||||
|
|
||||||
|
fix_self_closing_tags(root)
|
||||||
|
|
||||||
|
raw = self.HEADER + etree.tostring(root, encoding='UTF-8')
|
||||||
|
|
||||||
|
stream.write(raw+'\n')
|
||||||
|
|
||||||
class OPFReader(OPF):
|
class OPFReader(OPF):
|
||||||
|
|
||||||
@ -621,9 +650,7 @@ def main(args=sys.argv):
|
|||||||
if opts.comment is not None:
|
if opts.comment is not None:
|
||||||
mi.comments = opts.comment.replace('&', '&').replace('<', '<').replace('>', '>')
|
mi.comments = opts.comment.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||||
print mi
|
print mi
|
||||||
res = str(mi.soup)
|
mi.write(open(args[1], 'wb'))
|
||||||
del mi
|
|
||||||
open(args[1], 'wb').write(res)
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user