mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #315
This commit is contained in:
parent
f97f1c91d2
commit
a433be5ba5
@ -59,6 +59,7 @@ class MetaInformation(object):
|
||||
self.series_index = None
|
||||
self.rating = None
|
||||
self.isbn = None
|
||||
self.tags = []
|
||||
|
||||
def __str__(self):
|
||||
ans = ''
|
||||
|
@ -17,6 +17,7 @@
|
||||
import sys, re, os
|
||||
from urllib import unquote
|
||||
from urlparse import urlparse
|
||||
import xml.dom.minidom as dom
|
||||
|
||||
from libprs500.ebooks.metadata import MetaInformation
|
||||
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
|
||||
@ -92,41 +93,116 @@ class TOC(list):
|
||||
pass
|
||||
|
||||
|
||||
class OPFReader(MetaInformation):
|
||||
class standard_field(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, typ=None):
|
||||
return getattr(obj, 'get_'+self.name)()
|
||||
|
||||
def __set__(self, obj, val):
|
||||
getattr(obj, 'set_'+self.name)(val)
|
||||
|
||||
class OPF(MetaInformation):
|
||||
|
||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||
|
||||
def __init__(self, stream, dir=os.getcwd()):
|
||||
manage = False
|
||||
if not hasattr(stream, 'read'):
|
||||
manage = True
|
||||
dir = os.path.dirname(stream)
|
||||
stream = open(stream, 'rb')
|
||||
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
|
||||
if hasattr(stream, 'seek'):
|
||||
stream.seek(0)
|
||||
self.soup = BeautifulStoneSoup(stream.read())
|
||||
if manage:
|
||||
stream.close()
|
||||
self.title = self.get_title()
|
||||
self.authors = self.get_authors()
|
||||
self.title_sort = self.get_title_sort()
|
||||
self.author_sort = self.get_author_sort()
|
||||
self.comments = self.get_comments()
|
||||
self.category = self.get_category()
|
||||
self.publisher = self.get_publisher()
|
||||
self.isbn = self.get_isbn()
|
||||
self.series = self.series_index = self.rating = None
|
||||
self.manifest = Manifest(self.soup, dir)
|
||||
self.spine = Spine(self.soup, self.manifest)
|
||||
self.toc = TOC(self, dir)
|
||||
self.cover = self.get_cover()
|
||||
libprs_id = standard_field('libprs_id')
|
||||
title = standard_field('title')
|
||||
authors = standard_field('authors')
|
||||
title_sort = standard_field('title_sort')
|
||||
author_sort = standard_field('author_sort')
|
||||
comments = standard_field('comments')
|
||||
category = standard_field('category')
|
||||
publisher = standard_field('publisher')
|
||||
isbn = standard_field('isbn')
|
||||
cover = standard_field('cover')
|
||||
series = standard_field('series')
|
||||
series_index = standard_field('series_index')
|
||||
rating = standard_field('rating')
|
||||
tags = standard_field('tags')
|
||||
|
||||
def __init__(self):
|
||||
raise NotImplementedError('Abstract base class')
|
||||
|
||||
def _initialize(self):
|
||||
if not hasattr(self, 'soup'):
|
||||
self.soup = BeautifulStoneSoup(u'''\
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE package
|
||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.2 Package//EN"
|
||||
"http://openebook.org/dtds/oeb-1.2/oebpkg12.dtd">
|
||||
<package unique-identifier="libprs_id">
|
||||
<metadata>
|
||||
<dc-metadata
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/" />
|
||||
</metadata>
|
||||
</package>
|
||||
''')
|
||||
|
||||
def _commit(self, doc):
|
||||
self.soup = BeautifulStoneSoup(doc.toxml('utf-8'), fromEncoding='utf-8')
|
||||
|
||||
def _find_element(self, package, name, attrs=[]):
|
||||
tags = package.getElementsByTagName(name)
|
||||
for tag in tags:
|
||||
match = True
|
||||
for attr, vattr in attrs:
|
||||
if tag.getAttribute(attr) != vattr:
|
||||
match = False
|
||||
break
|
||||
if match:
|
||||
return tag
|
||||
return None
|
||||
|
||||
def _set_metadata_element(self, name, value, attrs=[],
|
||||
type='dc-metadata', replace=False):
|
||||
self._initialize()
|
||||
if isinstance(value, basestring):
|
||||
value = [value]
|
||||
attrs = [attrs]
|
||||
doc = dom.parseString(self.soup.__str__('UTF-8'))
|
||||
package = doc.documentElement
|
||||
metadata = package.getElementsByTagName('metadata')[0]
|
||||
|
||||
dcms = metadata.getElementsByTagName(type)
|
||||
if dcms:
|
||||
dcm = dcms[0]
|
||||
else:
|
||||
dcm = doc.createElement(type)
|
||||
metadata.appendChild(dcm)
|
||||
tags = dcm.getElementsByTagName(name)
|
||||
if tags and not replace:
|
||||
for tag in tags:
|
||||
tag.parentNode.removeChild(tag)
|
||||
tag.unlink()
|
||||
|
||||
for val, vattrs in zip(value, attrs):
|
||||
if replace:
|
||||
el = self._find_element(package, name, vattrs)
|
||||
if el:
|
||||
el.parentNode.removeChild(el)
|
||||
el.unlink()
|
||||
el = doc.createElement(name)
|
||||
el.appendChild(doc.createTextNode(val))
|
||||
for attr, vattr in vattrs:
|
||||
el.setAttribute(attr, vattr)
|
||||
dcm.appendChild(el)
|
||||
self._commit(doc)
|
||||
|
||||
|
||||
def get_title(self):
|
||||
title = self.soup.package.metadata.find('dc:title')
|
||||
if title:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string)
|
||||
return self.default_title
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
|
||||
return self.default_title.strip()
|
||||
|
||||
def set_title(self, title):
|
||||
if not title:
|
||||
title = 'Unknown'
|
||||
self._set_metadata_element('dc:title', title)
|
||||
|
||||
def get_authors(self):
|
||||
creators = self.soup.package.metadata.findAll('dc:creator')
|
||||
@ -142,9 +218,15 @@ class OPFReader(MetaInformation):
|
||||
ans = []
|
||||
for i in au:
|
||||
ans.extend(i.split('&'))
|
||||
return ans
|
||||
return [a.strip() for a in ans]
|
||||
return []
|
||||
|
||||
def set_authors(self, authors):
|
||||
if not authors:
|
||||
authors = ['Unknown']
|
||||
attrs = [[('role', 'aut')] for a in authors]
|
||||
self._set_metadata_element('dc:Creator', authors, attrs)
|
||||
|
||||
def get_author_sort(self):
|
||||
creators = self.soup.package.metadata.findAll('dc:creator')
|
||||
for elem in creators:
|
||||
@ -153,32 +235,73 @@ class OPFReader(MetaInformation):
|
||||
role = elem.get('opf:role')
|
||||
if role == 'aut':
|
||||
fa = elem.get('file-as')
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, fa) if fa else None
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, fa).strip() if fa else None
|
||||
return None
|
||||
|
||||
def set_author_sort(self, aus):
|
||||
if not aus:
|
||||
aus = ''
|
||||
self._initialize()
|
||||
if not self.authors:
|
||||
self.set_authors([])
|
||||
doc = dom.parseString(self.soup.__str__('UTF-8'))
|
||||
package = doc.documentElement
|
||||
aut = package.getElementsByTagName('dc:Creator')[0]
|
||||
aut.setAttribute('file-as', aus)
|
||||
self._commit(doc)
|
||||
|
||||
def get_title_sort(self):
|
||||
title = self.soup.package.find('dc:title')
|
||||
if title:
|
||||
if title.has_key('file-as'):
|
||||
return title['file-as'].strip()
|
||||
return None
|
||||
|
||||
def set_title_sort(self, title_sort):
|
||||
if not title_sort:
|
||||
title_sort = ''
|
||||
self._initialize()
|
||||
if not self.title:
|
||||
self.title = None
|
||||
doc = dom.parseString(self.soup.__str__('UTF-8'))
|
||||
package = doc.documentElement
|
||||
tit = package.getElementsByTagName('dc:Title')[0]
|
||||
tit.setAttribute('file-as', title_sort)
|
||||
self._commit(doc)
|
||||
|
||||
def get_comments(self):
|
||||
comments = self.soup.find('dc:description')
|
||||
if comments:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string)
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string).strip()
|
||||
return None
|
||||
|
||||
def set_comments(self, comments):
|
||||
if not comments:
|
||||
comments = ''
|
||||
self._set_metadata_element('dc:Description', comments)
|
||||
|
||||
def get_category(self):
|
||||
category = self.soup.find('dc:type')
|
||||
if category:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string)
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string).strip()
|
||||
return None
|
||||
|
||||
def set_category(self, category):
|
||||
if not category:
|
||||
category = ''
|
||||
self._set_metadata_element('dc:Type', category)
|
||||
|
||||
def get_publisher(self):
|
||||
publisher = self.soup.find('dc:publisher')
|
||||
if publisher:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string)
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string).strip()
|
||||
return None
|
||||
|
||||
def set_publisher(self, category):
|
||||
if not category:
|
||||
category = 'Unknown'
|
||||
self._set_metadata_element('dc:Publisher', category)
|
||||
|
||||
|
||||
def get_isbn(self):
|
||||
for item in self.soup.package.metadata.findAll('dc:identifier'):
|
||||
@ -186,9 +309,25 @@ class OPFReader(MetaInformation):
|
||||
if not scheme:
|
||||
scheme = item.get('opf:scheme')
|
||||
if scheme is not None and scheme.lower() == 'isbn':
|
||||
return item.string
|
||||
return str(item.string).strip()
|
||||
return None
|
||||
|
||||
def set_isbn(self, isbn):
|
||||
if isbn:
|
||||
self._set_metadata_element('dc:Identifier', isbn, [('scheme', 'ISBN')],
|
||||
replace=True)
|
||||
|
||||
def get_libprs_id(self):
|
||||
for item in self.soup.package.metadata.findAll('dc:identifier'):
|
||||
if item.has_key('scheme') and item['scheme'] == 'libprs':
|
||||
return str(item.string).strip()
|
||||
return None
|
||||
|
||||
def set_libprs_id(self, val):
|
||||
if val:
|
||||
self._set_metadata_element('dc:Identifier', str(val), [('scheme', 'libprs'), ('id', 'libprs_id')],
|
||||
replace=True)
|
||||
|
||||
def get_cover(self):
|
||||
guide = self.soup.package.find('guide')
|
||||
if guide:
|
||||
@ -201,6 +340,23 @@ class OPFReader(MetaInformation):
|
||||
return reference.get('href')
|
||||
return None
|
||||
|
||||
def set_cover(self, path):
|
||||
self._initialize()
|
||||
doc = dom.parseString(self.soup.__str__('UTF-8'))
|
||||
package = doc.documentElement
|
||||
guide = package.getElementsByTagName('guide')
|
||||
if guide:
|
||||
guide = guide[0]
|
||||
else:
|
||||
guide = doc.createElement('guide')
|
||||
package.appendChild(guide)
|
||||
el = self._find_element(guide, 'reference', [('type', 'cover')])
|
||||
if not el:
|
||||
el = doc.createElement('reference')
|
||||
guide.appendChild(el)
|
||||
el.setAttribute('type', 'cover')
|
||||
el.setAttribute('href', path)
|
||||
self._commit(doc)
|
||||
|
||||
def possible_cover_prefixes(self):
|
||||
isbn, ans = [], []
|
||||
@ -213,6 +369,107 @@ class OPFReader(MetaInformation):
|
||||
ans.append(item[1].replace('-', ''))
|
||||
return ans
|
||||
|
||||
def get_series(self):
|
||||
xm = self.soup.package.metadata.find('x-metadata')
|
||||
if not xm:
|
||||
return None
|
||||
s = xm.find('series')
|
||||
if s:
|
||||
return str(s.string).strip()
|
||||
return None
|
||||
|
||||
def set_series(self, val):
|
||||
if not val:
|
||||
val = ''
|
||||
self._set_metadata_element('series', val, type='x-metadata')
|
||||
|
||||
def get_series_index(self):
|
||||
xm = self.soup.package.metadata.find('x-metadata')
|
||||
if not xm:
|
||||
return None
|
||||
s = xm.find('series-index')
|
||||
if s:
|
||||
try:
|
||||
return int(str(s.string).strip())
|
||||
except:
|
||||
return None
|
||||
return None
|
||||
|
||||
def set_series_index(self, val):
|
||||
if not val:
|
||||
val = 1
|
||||
self._set_metadata_element('series-index', str(val), type='x-metadata')
|
||||
|
||||
def get_rating(self):
|
||||
xm = self.soup.package.metadata.find('x-metadata')
|
||||
if not xm:
|
||||
return None
|
||||
s = xm.find('rating')
|
||||
if s:
|
||||
try:
|
||||
return int(str(s.string).strip())
|
||||
except:
|
||||
return None
|
||||
return None
|
||||
|
||||
def set_rating(self, val):
|
||||
if not val:
|
||||
val = 0
|
||||
self._set_metadata_element('rating', str(val), type='x-metadata')
|
||||
|
||||
def get_tags(self):
|
||||
ans = []
|
||||
subs = self.soup.findAll('dc:subject')
|
||||
for sub in subs:
|
||||
val = sub.string
|
||||
if val:
|
||||
ans.append(val)
|
||||
return [unicode(a).strip() for a in ans]
|
||||
|
||||
def set_tags(self, tags):
|
||||
self._set_metadata_element('dc:Subject', tags)
|
||||
|
||||
def write(self, stream):
|
||||
stream.write(self.soup.prettify('utf-8'))
|
||||
|
||||
class OPFReader(OPF):
|
||||
|
||||
def __init__(self, stream, dir=os.getcwd()):
|
||||
manage = False
|
||||
if not hasattr(stream, 'read'):
|
||||
manage = True
|
||||
dir = os.path.dirname(stream)
|
||||
stream = open(stream, 'rb')
|
||||
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
|
||||
if hasattr(stream, 'seek'):
|
||||
stream.seek(0)
|
||||
self.soup = BeautifulStoneSoup(stream.read())
|
||||
if manage:
|
||||
stream.close()
|
||||
|
||||
class OPFCreator(OPF):
|
||||
|
||||
def __init__(self, mi):
|
||||
self.title = mi.title
|
||||
self.authors = mi.authors
|
||||
if mi.category:
|
||||
self.category = mi.category
|
||||
if mi.comments:
|
||||
self.comments = mi.comments
|
||||
if mi.publisher:
|
||||
self.publisher = mi.publisher
|
||||
if mi.rating:
|
||||
self.rating = mi.rating
|
||||
if mi.series:
|
||||
self.series = mi.series
|
||||
if mi.series_index:
|
||||
self.series_index = mi.series_index
|
||||
if mi.tags:
|
||||
self.tags = mi.tags
|
||||
if mi.isbn:
|
||||
self.isbn = mi.isbn
|
||||
if hasattr(mi, 'libprs_id'):
|
||||
self.libprs_id = mi.libprs_id
|
||||
|
||||
def main(args=sys.argv):
|
||||
print OPFReader(open(args[1], 'rb'))
|
||||
|
@ -15,7 +15,7 @@
|
||||
"""
|
||||
Edit metadata in RTF files.
|
||||
"""
|
||||
import re, cStringIO, sys, copy
|
||||
import re, cStringIO, sys
|
||||
|
||||
from libprs500.ebooks.metadata import MetaInformation, get_parser
|
||||
|
||||
@ -118,13 +118,7 @@ def create_metadata(stream, options):
|
||||
stream.seek(0)
|
||||
stream.write(ans)
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
mi = copy.deepcopy(mi)
|
||||
mi.authors = ', '.join(mi.authors)
|
||||
mi.comment = mi.comments
|
||||
set_metadata_(stream, mi)
|
||||
|
||||
def set_metadata_(stream, options):
|
||||
def set_metadata(stream, options):
|
||||
'''
|
||||
Modify/add RTF metadata in stream
|
||||
@param options: Object with metadata attributes title, author, comment, category
|
||||
@ -147,7 +141,7 @@ def set_metadata_(stream, options):
|
||||
src = pat.sub(r'{\\title ' + title + r'}', src)
|
||||
else:
|
||||
src = add_metadata_item(src, 'title', title)
|
||||
comment = options.comment
|
||||
comment = options.comments
|
||||
if comment != None:
|
||||
comment = comment.encode('ascii', 'replace')
|
||||
pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL)
|
||||
@ -157,6 +151,7 @@ def set_metadata_(stream, options):
|
||||
src = add_metadata_item(src, 'subject', comment)
|
||||
author = options.authors
|
||||
if author != None:
|
||||
author = ', '.join(author)
|
||||
author = author.encode('ascii', 'ignore')
|
||||
pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL)
|
||||
if pat.search(src):
|
||||
@ -186,7 +181,10 @@ def main(args=sys.argv):
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
stream = open(args[1], 'r+b')
|
||||
set_metadata_(stream, options)
|
||||
if options.authors:
|
||||
options.authors = options.authors.split(',')
|
||||
options.comments = options.comment
|
||||
set_metadata(stream, options)
|
||||
mi = get_metadata(stream)
|
||||
return mi
|
||||
|
||||
|
@ -21,6 +21,7 @@ from zlib import compress, decompress
|
||||
|
||||
from libprs500 import sanitize_file_name
|
||||
from libprs500.ebooks.metadata.meta import set_metadata
|
||||
from libprs500.ebooks.metadata.opf import OPFCreator
|
||||
from libprs500.ebooks.metadata import MetaInformation
|
||||
|
||||
class Concatenate(object):
|
||||
@ -1087,6 +1088,25 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
||||
self.conn.execute('DELETE FROM books WHERE id=?', (id,))
|
||||
self.conn.commit()
|
||||
|
||||
def get_metadata(self, idx):
|
||||
aum = self.authors(idx)
|
||||
if aum: aum = aum.split(',')
|
||||
mi = MetaInformation(self.title(idx), aum)
|
||||
mi.author_sort = self.author_sort(idx)
|
||||
mi.comments = self.comments(idx)
|
||||
mi.publisher = self.publisher(idx)
|
||||
tags = self.tags(idx)
|
||||
if tags:
|
||||
mi.tags = [i.strip() for i in tags.split(',')]
|
||||
mi.series = self.series(idx)
|
||||
if mi.series:
|
||||
mi.series_index = self.series_index(idx)
|
||||
mi.rating = self.rating(idx)
|
||||
id = self.id(idx)
|
||||
mi.isbn = self.isbn(id)
|
||||
mi.libprs_id = id
|
||||
return mi
|
||||
|
||||
def export_to_dir(self, dir, indices, byauthor=False):
|
||||
if not os.path.exists(dir):
|
||||
raise IOError('Target directory does not exist: '+dir)
|
||||
@ -1113,6 +1133,17 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
||||
id = str(self.id(idx))
|
||||
if not os.path.exists(tpath):
|
||||
os.mkdir(tpath)
|
||||
mi = OPFCreator(self.get_metadata(idx))
|
||||
cover = self.cover(idx)
|
||||
if cover is not None:
|
||||
f = open(os.path.join(tpath, 'cover.jpg'), 'wb')
|
||||
f.write(cover)
|
||||
mi.cover = 'cover.jpg'
|
||||
f.close()
|
||||
f = open(os.path.join(tpath, 'metadata.opf'), 'wb')
|
||||
mi.write(f)
|
||||
f.close()
|
||||
|
||||
for fmt in self.formats(idx).split(','):
|
||||
data = self.format(idx, fmt)
|
||||
name = au + ' - ' + title if byauthor else title + ' - ' + au
|
||||
@ -1120,15 +1151,12 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
||||
f = open(os.path.join(tpath, sanitize_file_name(fname)), 'w+b')
|
||||
f.write(data)
|
||||
f.flush()
|
||||
aum = self.authors(idx)
|
||||
if aum: aum = aum.split(',')
|
||||
mi = MetaInformation(self.title(idx), aum)
|
||||
mi.author_sort = self.author_sort(idx)
|
||||
try:
|
||||
set_metadata(f, mi, fmt.lower())
|
||||
except:
|
||||
print 'Error setting metadata for book:', mi.title
|
||||
traceback.print_exc()
|
||||
f.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user