mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More robust OPF parsing and improved TOC creation in html2epub
This commit is contained in:
parent
e3b8a1b3bf
commit
501cc90bfa
@ -62,10 +62,11 @@ def config(defaults=None):
|
|||||||
c.add_opt('override_css', ['--override-css'], default=None,
|
c.add_opt('override_css', ['--override-css'], default=None,
|
||||||
help=_('Either the path to a CSS stylesheet or raw CSS. This CSS will override any existing CSS declarations in the source files.'))
|
help=_('Either the path to a CSS stylesheet or raw CSS. This CSS will override any existing CSS declarations in the source files.'))
|
||||||
structure = c.add_group('structure detection', _('Control auto-detection of document structure.'))
|
structure = c.add_group('structure detection', _('Control auto-detection of document structure.'))
|
||||||
structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section|part', 'i')]",
|
structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section|part', 'i')] | //*[@class = 'chapter']",
|
||||||
help=_('''\
|
help=_('''\
|
||||||
An XPath expression to detect chapter titles. The default is to consider <h1> or
|
An XPath expression to detect chapter titles. The default is to consider <h1> or
|
||||||
<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles.
|
<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as
|
||||||
|
well as any tags that have class="chapter".
|
||||||
The expression used must evaluate to a list of elements. To disable chapter detection,
|
The expression used must evaluate to a list of elements. To disable chapter detection,
|
||||||
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
|
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
|
||||||
help on using this feature.
|
help on using this feature.
|
||||||
@ -84,12 +85,12 @@ Control the automatic generation of a Table of Contents. If an OPF file is detec
|
|||||||
and it specifies a Table of Contents, then that will be used rather than trying
|
and it specifies a Table of Contents, then that will be used rather than trying
|
||||||
to auto-generate a Table of Contents.
|
to auto-generate a Table of Contents.
|
||||||
''').replace('\n', ' '))
|
''').replace('\n', ' '))
|
||||||
toc('max_toc_recursion', ['--max-toc-recursion'], default=1,
|
toc('max_toc_links', ['--max-toc-links'], default=50,
|
||||||
help=_('Number of levels of HTML files to try to autodetect TOC entries from. Set to 0 to disable all TOC autodetection. Default is %default.'))
|
help=_('Maximum number of links to insert into the TOC. Set to 0 to disable. Default is: %default. Links are only added to the TOC if less than the --toc-threshold number of chapters were detected.'))
|
||||||
toc('max_toc_links', ['--max-toc-links'], default=40,
|
|
||||||
help=_('Maximum number of links from each HTML file to insert into the TOC. Set to 0 to disable. Default is: %default.'))
|
|
||||||
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
||||||
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
||||||
|
toc('toc_threshold', ['--toc-threshold'], default=6,
|
||||||
|
help=_('If fewer than this number of chapters is detected, then links are added to the Table of Contents.'))
|
||||||
toc('use_auto_toc', ['--use-auto-toc'], default=False,
|
toc('use_auto_toc', ['--use-auto-toc'], default=False,
|
||||||
help=_('Normally, if the source file already has a Table of Contents, it is used in preference to the autodetected one. With this option, the autodetected one is always used.'))
|
help=_('Normally, if the source file already has a Table of Contents, it is used in preference to the autodetected one. With this option, the autodetected one is always used.'))
|
||||||
|
|
||||||
|
@ -69,13 +69,19 @@ the <spine> element of the OPF file.
|
|||||||
def parse_content(filelist, opts, tdir):
|
def parse_content(filelist, opts, tdir):
|
||||||
os.makedirs(os.path.join(tdir, 'content', 'resources'))
|
os.makedirs(os.path.join(tdir, 'content', 'resources'))
|
||||||
resource_map = {}
|
resource_map = {}
|
||||||
toc = TOC(base_path=tdir)
|
toc = TOC(base_path=tdir, type='root')
|
||||||
for htmlfile in filelist:
|
for htmlfile in filelist:
|
||||||
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
||||||
resource_map, filelist)
|
resource_map, filelist)
|
||||||
hp.populate_toc(toc)
|
hp.populate_toc(toc)
|
||||||
hp.save()
|
hp.save()
|
||||||
|
|
||||||
|
if toc.count('chapter') > opts.toc_threshold:
|
||||||
|
toc.purge(['file', 'link', 'unknown'])
|
||||||
|
if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
|
||||||
|
toc.purge(['link', 'unknown'])
|
||||||
|
toc.purge(['link'], max=opts.max_toc_links)
|
||||||
|
|
||||||
return resource_map, hp.htmlfile_map, toc
|
return resource_map, hp.htmlfile_map, toc
|
||||||
|
|
||||||
def convert(htmlfile, opts, notification=None):
|
def convert(htmlfile, opts, notification=None):
|
||||||
|
@ -118,7 +118,7 @@ class HTMLFile(object):
|
|||||||
raise IgnoreFile(msg, err.errno)
|
raise IgnoreFile(msg, err.errno)
|
||||||
|
|
||||||
self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
|
self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
|
||||||
|
self.title = None
|
||||||
if not self.is_binary:
|
if not self.is_binary:
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
||||||
@ -126,8 +126,7 @@ class HTMLFile(object):
|
|||||||
|
|
||||||
src = src.decode(encoding, 'replace')
|
src = src.decode(encoding, 'replace')
|
||||||
match = self.TITLE_PAT.search(src)
|
match = self.TITLE_PAT.search(src)
|
||||||
if match is not None:
|
self.title = match.group(1) if match is not None else None
|
||||||
self.title = match.group(1)
|
|
||||||
self.find_links(src)
|
self.find_links(src)
|
||||||
|
|
||||||
|
|
||||||
@ -460,8 +459,28 @@ class Processor(Parser):
|
|||||||
return Parser.save(self)
|
return Parser.save(self)
|
||||||
|
|
||||||
def populate_toc(self, toc):
|
def populate_toc(self, toc):
|
||||||
if self.level >= self.opts.max_toc_recursion:
|
|
||||||
return
|
def add_item(href, fragment, text, target, type='link'):
|
||||||
|
for entry in toc.flat():
|
||||||
|
if entry.href == href and entry.fragment == fragment:
|
||||||
|
return entry
|
||||||
|
if len(text) > 50:
|
||||||
|
text = text[:50] + u'\u2026'
|
||||||
|
return target.add_item(href, fragment, text, type=type)
|
||||||
|
|
||||||
|
# Add chapters to TOC
|
||||||
|
counter = 0
|
||||||
|
if not self.opts.no_chapters_in_toc:
|
||||||
|
for elem in getattr(self, 'detected_chapters', []):
|
||||||
|
text = (u''.join(elem.xpath('string()'))).strip()
|
||||||
|
if text:
|
||||||
|
name = self.htmlfile_map[self.htmlfile.path]
|
||||||
|
href = 'content/'+name
|
||||||
|
counter += 1
|
||||||
|
id = elem.get('id', 'calibre_chapter_%d'%counter)
|
||||||
|
elem.set('id', id)
|
||||||
|
add_item(href, id, text, toc, type='chapter')
|
||||||
|
|
||||||
|
|
||||||
referrer = toc
|
referrer = toc
|
||||||
if self.htmlfile.referrer is not None:
|
if self.htmlfile.referrer is not None:
|
||||||
@ -472,20 +491,13 @@ class Processor(Parser):
|
|||||||
referrer = i
|
referrer = i
|
||||||
break
|
break
|
||||||
|
|
||||||
def add_item(href, fragment, text, target):
|
|
||||||
for entry in toc.flat():
|
|
||||||
if entry.href == href and entry.fragment == fragment:
|
|
||||||
return entry
|
|
||||||
if len(text) > 50:
|
|
||||||
text = text[:50] + u'\u2026'
|
|
||||||
return target.add_item(href, fragment, text)
|
|
||||||
|
|
||||||
name = self.htmlfile_map[self.htmlfile.path]
|
name = self.htmlfile_map[self.htmlfile.path]
|
||||||
href = 'content/'+name
|
href = 'content/'+name
|
||||||
|
|
||||||
if referrer.href != href: # Happens for root file
|
if referrer.href != href: # Happens for root file
|
||||||
target = add_item(href, None, self.htmlfile.title, referrer)
|
target = add_item(href, None, unicode(self.htmlfile.title), referrer, type='file')
|
||||||
|
|
||||||
# Add links to TOC
|
# Add links to TOC
|
||||||
if int(self.opts.max_toc_links) > 0:
|
if int(self.opts.max_toc_links) > 0:
|
||||||
for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
|
for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
|
||||||
@ -502,18 +514,6 @@ class Processor(Parser):
|
|||||||
name = self.htmlfile_map[self.htmlfile.referrer.path]
|
name = self.htmlfile_map[self.htmlfile.referrer.path]
|
||||||
add_item(href, fragment, text, target)
|
add_item(href, fragment, text, target)
|
||||||
|
|
||||||
# Add chapters to TOC
|
|
||||||
if not self.opts.no_chapters_in_toc:
|
|
||||||
counter = 0
|
|
||||||
for elem in getattr(self, 'detected_chapters', []):
|
|
||||||
text = (u''.join(elem.xpath('string()'))).strip()
|
|
||||||
if text:
|
|
||||||
name = self.htmlfile_map[self.htmlfile.path]
|
|
||||||
href = 'content/'+name
|
|
||||||
counter += 1
|
|
||||||
id = elem.get('id', 'calibre_chapter_%d'%counter)
|
|
||||||
elem.set('id', id)
|
|
||||||
add_item(href, id, text, target)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_css(self):
|
def extract_css(self):
|
||||||
|
@ -393,7 +393,9 @@ class OPF(object):
|
|||||||
NAMESPACES = {
|
NAMESPACES = {
|
||||||
None : "http://www.idpf.org/2007/opf",
|
None : "http://www.idpf.org/2007/opf",
|
||||||
'dc' : "http://purl.org/dc/elements/1.1/",
|
'dc' : "http://purl.org/dc/elements/1.1/",
|
||||||
|
'dc1' : 'http://purl.org/dc/elements/1.0/',
|
||||||
'opf' : "http://www.idpf.org/2007/opf",
|
'opf' : "http://www.idpf.org/2007/opf",
|
||||||
|
'oebpackage' : 'http://openebook.org/namespaces/oeb-package/1.0/',
|
||||||
}
|
}
|
||||||
xpn = NAMESPACES.copy()
|
xpn = NAMESPACES.copy()
|
||||||
xpn.pop(None)
|
xpn.pop(None)
|
||||||
@ -402,16 +404,15 @@ class OPF(object):
|
|||||||
TEXT = XPath('string()')
|
TEXT = XPath('string()')
|
||||||
|
|
||||||
|
|
||||||
metadata_path = XPath('/opf:package/opf:metadata')
|
metadata_path = XPath('descendant::*[re:match(name(), "metadata", "i")]')
|
||||||
metadata_elem_path = XPath('/opf:package/opf:metadata/*[re:match(name(), $name, "i")]')
|
metadata_elem_path = XPath('descendant::*[re:match(name(), $name, "i")]')
|
||||||
authors_path = XPath('/opf:package/opf:metadata/*' + \
|
authors_path = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut")]')
|
||||||
'[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut")]')
|
tags_path = XPath('descendant::*[re:match(name(), "subject", "i")]')
|
||||||
tags_path = XPath('/opf:package/opf:metadata/*[re:match(name(), "subject", "i")]')
|
isbn_path = XPath('descendant::*[re:match(name(), "identifier", "i") and '+
|
||||||
isbn_path = XPath('/opf:package/opf:metadata/*[re:match(name(), "identifier", "i") and '+
|
|
||||||
'(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
|
'(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
|
||||||
manifest_path = XPath('/opf:package/*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
|
manifest_path = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
|
||||||
spine_path = XPath('/opf:package/*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
|
spine_path = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
|
||||||
guide_path = XPath('/opf:package/*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
|
guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
|
||||||
|
|
||||||
title = MetadataField('title')
|
title = MetadataField('title')
|
||||||
publisher = MetadataField('publisher')
|
publisher = MetadataField('publisher')
|
||||||
@ -424,25 +425,27 @@ class OPF(object):
|
|||||||
|
|
||||||
|
|
||||||
def __init__(self, stream, basedir=os.getcwdu()):
|
def __init__(self, stream, basedir=os.getcwdu()):
|
||||||
|
if not hasattr(stream, 'read'):
|
||||||
|
stream = open(stream, 'rb')
|
||||||
self.basedir = self.base_dir = basedir
|
self.basedir = self.base_dir = basedir
|
||||||
raw, self.encoding = xml_to_unicode(stream.read(), strip_encoding_pats=True, resolve_entities=True)
|
raw, self.encoding = xml_to_unicode(stream.read(), strip_encoding_pats=True, resolve_entities=True)
|
||||||
|
|
||||||
self.tree = etree.fromstring(raw, self.PARSER)
|
self.root = etree.fromstring(raw, self.PARSER)
|
||||||
self.metadata = self.metadata_path(self.tree)
|
self.metadata = self.metadata_path(self.root)
|
||||||
if not self.metadata:
|
if not self.metadata:
|
||||||
raise ValueError('Malformed OPF file: No <metadata> element')
|
raise ValueError('Malformed OPF file: No <metadata> element')
|
||||||
self.metadata = self.metadata[0]
|
self.metadata = self.metadata[0]
|
||||||
self.unquote_urls()
|
self.unquote_urls()
|
||||||
self.manifest = Manifest()
|
self.manifest = Manifest()
|
||||||
m = self.manifest_path(self.tree)
|
m = self.manifest_path(self.root)
|
||||||
if m:
|
if m:
|
||||||
self.manifest = Manifest.from_opf_manifest_element(m, basedir)
|
self.manifest = Manifest.from_opf_manifest_element(m, basedir)
|
||||||
self.spine = None
|
self.spine = None
|
||||||
s = self.spine_path(self.tree)
|
s = self.spine_path(self.root)
|
||||||
if s:
|
if s:
|
||||||
self.spine = Spine.from_opf_spine_element(s, self.manifest)
|
self.spine = Spine.from_opf_spine_element(s, self.manifest)
|
||||||
self.guide = None
|
self.guide = None
|
||||||
guide = self.guide_path(self.tree)
|
guide = self.guide_path(self.root)
|
||||||
if guide:
|
if guide:
|
||||||
self.guide = Guide.from_opf_guide(guide, basedir)
|
self.guide = Guide.from_opf_guide(guide, basedir)
|
||||||
self.cover_data = (None, None)
|
self.cover_data = (None, None)
|
||||||
@ -452,7 +455,7 @@ class OPF(object):
|
|||||||
return u''.join(self.TEXT(elem))
|
return u''.join(self.TEXT(elem))
|
||||||
|
|
||||||
def itermanifest(self):
|
def itermanifest(self):
|
||||||
return self.manifest_path(self.tree)
|
return self.manifest_path(self.root)
|
||||||
|
|
||||||
def create_manifest_item(self, href, media_type):
|
def create_manifest_item(self, href, media_type):
|
||||||
ids = [i.get('id', None) for i in self.itermanifest()]
|
ids = [i.get('id', None) for i in self.itermanifest()]
|
||||||
@ -478,7 +481,7 @@ class OPF(object):
|
|||||||
return [i.get('id') for i in items]
|
return [i.get('id') for i in items]
|
||||||
|
|
||||||
def iterspine(self):
|
def iterspine(self):
|
||||||
return self.spine_path(self.tree)
|
return self.spine_path(self.root)
|
||||||
|
|
||||||
def create_spine_item(self, idref):
|
def create_spine_item(self, idref):
|
||||||
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
|
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
|
||||||
@ -487,14 +490,14 @@ class OPF(object):
|
|||||||
|
|
||||||
def replace_spine_items_by_idref(self, idref, new_idrefs):
|
def replace_spine_items_by_idref(self, idref, new_idrefs):
|
||||||
items = list(map(self.create_spine_item, new_idrefs))
|
items = list(map(self.create_spine_item, new_idrefs))
|
||||||
spine = self.XPath('/opf:package/*[re:match(name(), "spine", "i")]')(self.tree)[0]
|
spine = self.XPath('/opf:package/*[re:match(name(), "spine", "i")]')(self.root)[0]
|
||||||
old = [i for i in self.iterspine() if i.get('idref', None) == idref]
|
old = [i for i in self.iterspine() if i.get('idref', None) == idref]
|
||||||
for x in old:
|
for x in old:
|
||||||
i = spine.index(x)
|
i = spine.index(x)
|
||||||
spine[i:i+1] = items
|
spine[i:i+1] = items
|
||||||
|
|
||||||
def iterguide(self):
|
def iterguide(self):
|
||||||
return self.guide_path(self.tree)
|
return self.guide_path(self.root)
|
||||||
|
|
||||||
def unquote_urls(self):
|
def unquote_urls(self):
|
||||||
for item in self.itermanifest():
|
for item in self.itermanifest():
|
||||||
@ -507,12 +510,12 @@ class OPF(object):
|
|||||||
|
|
||||||
def fget(self):
|
def fget(self):
|
||||||
ans = []
|
ans = []
|
||||||
for elem in self.authors_path(self.tree):
|
for elem in self.authors_path(self.metadata):
|
||||||
ans.extend([x.strip() for x in self.get_text(elem).split(',')])
|
ans.extend([x.strip() for x in self.get_text(elem).split(',')])
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
remove = list(self.authors_path(self.tree))
|
remove = list(self.authors_path(self.metadata))
|
||||||
for elem in remove:
|
for elem in remove:
|
||||||
self.metadata.remove(elem)
|
self.metadata.remove(elem)
|
||||||
for author in val:
|
for author in val:
|
||||||
@ -526,13 +529,13 @@ class OPF(object):
|
|||||||
def author_sort():
|
def author_sort():
|
||||||
|
|
||||||
def fget(self):
|
def fget(self):
|
||||||
matches = self.authors_path(self.tree)
|
matches = self.authors_path(self.metadata)
|
||||||
if matches:
|
if matches:
|
||||||
ans = matches[0].get('opf:file-as', None)
|
ans = matches[0].get('opf:file-as', None)
|
||||||
return ans if ans else matches[0].get('file-as', None)
|
return ans if ans else matches[0].get('file-as', None)
|
||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
matches = self.authors_path(self.tree)
|
matches = self.authors_path(self.metadata)
|
||||||
if matches:
|
if matches:
|
||||||
matches[0].set('file-as', unicode(val))
|
matches[0].set('file-as', unicode(val))
|
||||||
|
|
||||||
@ -543,12 +546,12 @@ class OPF(object):
|
|||||||
|
|
||||||
def fget(self):
|
def fget(self):
|
||||||
ans = []
|
ans = []
|
||||||
for tag in self.tags_path(self.tree):
|
for tag in self.tags_path(self.metadata):
|
||||||
ans.append(self.get_text(tag))
|
ans.append(self.get_text(tag))
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
for tag in list(self.tags_path(self.tree)):
|
for tag in list(self.tags_path(self.metadata)):
|
||||||
self.metadata.remove(tag)
|
self.metadata.remove(tag)
|
||||||
for tag in val:
|
for tag in val:
|
||||||
elem = self.create_metadata_element('subject', ns='dc')
|
elem = self.create_metadata_element('subject', ns='dc')
|
||||||
@ -560,11 +563,11 @@ class OPF(object):
|
|||||||
def isbn():
|
def isbn():
|
||||||
|
|
||||||
def fget(self):
|
def fget(self):
|
||||||
for match in self.isbn_path(self.tree):
|
for match in self.isbn_path(self.metadata):
|
||||||
return match.text if match.text else None
|
return match.text if match.text else None
|
||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
matches = self.isbn_path(self.tree)
|
matches = self.isbn_path(self.metadata)
|
||||||
if not matches:
|
if not matches:
|
||||||
matches = [self.create_metadata_element('identifier', ns='dc',
|
matches = [self.create_metadata_element('identifier', ns='dc',
|
||||||
attrib={'{%s}scheme'%self.NAMESPACES['opf']:'ISBN'})]
|
attrib={'{%s}scheme'%self.NAMESPACES['opf']:'ISBN'})]
|
||||||
@ -572,9 +575,9 @@ class OPF(object):
|
|||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
def get_metadata_element(self, name):
|
def get_metadata_element(self, name):
|
||||||
matches = self.metadata_elem_path(self.tree, name=name)
|
matches = self.metadata_elem_path(self.metadata, name=name)
|
||||||
if matches:
|
if matches:
|
||||||
return matches[0]
|
return matches[-1]
|
||||||
|
|
||||||
def create_metadata_element(self, name, attrib=None, ns='opf'):
|
def create_metadata_element(self, name, attrib=None, ns='opf'):
|
||||||
elem = etree.SubElement(self.metadata, '{%s}%s'%(self.NAMESPACES[ns], name),
|
elem = etree.SubElement(self.metadata, '{%s}%s'%(self.NAMESPACES[ns], name),
|
||||||
@ -583,7 +586,7 @@ class OPF(object):
|
|||||||
return elem
|
return elem
|
||||||
|
|
||||||
def render(self, encoding='utf-8'):
|
def render(self, encoding='utf-8'):
|
||||||
return etree.tostring(self.tree, encoding='utf-8', pretty_print=True)
|
return etree.tostring(self.root, encoding='utf-8', pretty_print=True)
|
||||||
|
|
||||||
def smart_update(self, mi):
|
def smart_update(self, mi):
|
||||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||||
@ -716,7 +719,13 @@ class OPFTest(unittest.TestCase):
|
|||||||
<creator opf:role="aut" file-as="Monkey">Monkey Kitchen, Next</creator>
|
<creator opf:role="aut" file-as="Monkey">Monkey Kitchen, Next</creator>
|
||||||
<dc:subject>One</dc:subject><dc:subject>Two</dc:subject>
|
<dc:subject>One</dc:subject><dc:subject>Two</dc:subject>
|
||||||
<dc:identifier scheme="ISBN">123456789</dc:identifier>
|
<dc:identifier scheme="ISBN">123456789</dc:identifier>
|
||||||
|
<x-metadata>
|
||||||
|
<series>A one book series</series>
|
||||||
|
</x-metadata>
|
||||||
</metadata>
|
</metadata>
|
||||||
|
<manifest>
|
||||||
|
<item id="1" href="a%20%7E%20b" media-type="text/txt" />
|
||||||
|
</manifest>
|
||||||
</package>
|
</package>
|
||||||
'''
|
'''
|
||||||
)
|
)
|
||||||
@ -729,14 +738,14 @@ class OPFTest(unittest.TestCase):
|
|||||||
self.assertEqual(opf.author_sort, 'Monkey')
|
self.assertEqual(opf.author_sort, 'Monkey')
|
||||||
self.assertEqual(opf.tags, ['One', 'Two'])
|
self.assertEqual(opf.tags, ['One', 'Two'])
|
||||||
self.assertEqual(opf.isbn, '123456789')
|
self.assertEqual(opf.isbn, '123456789')
|
||||||
self.assertEqual(opf.series, None)
|
self.assertEqual(opf.series, 'A one book series')
|
||||||
self.assertEqual(opf.series_index, None)
|
self.assertEqual(opf.series_index, None)
|
||||||
|
self.assertEqual(list(opf.itermanifest())[0].get('href'), 'a ~ b')
|
||||||
|
|
||||||
def testWriting(self):
|
def testWriting(self):
|
||||||
for test in [('title', 'New & Title'), ('authors', ['One', 'Two']),
|
for test in [('title', 'New & Title'), ('authors', ['One', 'Two']),
|
||||||
('author_sort', "Kitchen"), ('tags', ['Three']),
|
('author_sort', "Kitchen"), ('tags', ['Three']),
|
||||||
('isbn', 'a'), ('rating', 3)]:
|
('isbn', 'a'), ('rating', 3), ('series_index', 1)]:
|
||||||
setattr(self.opf, *test)
|
setattr(self.opf, *test)
|
||||||
self.assertEqual(getattr(self.opf, test[0]), test[1])
|
self.assertEqual(getattr(self.opf, test[0]), test[1])
|
||||||
|
|
||||||
@ -748,10 +757,5 @@ def suite():
|
|||||||
def test():
|
def test():
|
||||||
unittest.TextTestRunner(verbosity=2).run(suite())
|
unittest.TextTestRunner(verbosity=2).run(suite())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(test())
|
sys.exit(test())
|
@ -21,7 +21,7 @@ class NCXSoup(BeautifulStoneSoup):
|
|||||||
class TOC(list):
|
class TOC(list):
|
||||||
|
|
||||||
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
|
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
|
||||||
base_path=os.getcwd()):
|
base_path=os.getcwd(), type='unknown'):
|
||||||
self.href = href
|
self.href = href
|
||||||
self.fragment = fragment
|
self.fragment = fragment
|
||||||
if not self.fragment:
|
if not self.fragment:
|
||||||
@ -30,12 +30,32 @@ class TOC(list):
|
|||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.base_path = base_path
|
self.base_path = base_path
|
||||||
self.play_order = play_order
|
self.play_order = play_order
|
||||||
|
self.type = type
|
||||||
|
|
||||||
def add_item(self, href, fragment, text, play_order=None):
|
def count(self, type):
|
||||||
|
return len([i for i in self.flat() if i.type == type])
|
||||||
|
|
||||||
|
def purge(self, types, max=0):
|
||||||
|
remove = []
|
||||||
|
for entry in self.flat():
|
||||||
|
if entry.type in types:
|
||||||
|
remove.append(entry)
|
||||||
|
remove = remove[max:]
|
||||||
|
for entry in remove:
|
||||||
|
if entry.parent is None:
|
||||||
|
continue
|
||||||
|
entry.parent.remove(entry)
|
||||||
|
return remove
|
||||||
|
|
||||||
|
def remove(self, entry):
|
||||||
|
list.remove(self, entry)
|
||||||
|
entry.parent = None
|
||||||
|
|
||||||
|
def add_item(self, href, fragment, text, play_order=None, type='unknown'):
|
||||||
if play_order is None:
|
if play_order is None:
|
||||||
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
|
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
|
||||||
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
|
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
|
||||||
base_path=self.base_path, play_order=play_order))
|
base_path=self.base_path, play_order=play_order, type=type))
|
||||||
return self[-1]
|
return self[-1]
|
||||||
|
|
||||||
def top_level_items(self):
|
def top_level_items(self):
|
||||||
|
@ -77,7 +77,7 @@
|
|||||||
<item>
|
<item>
|
||||||
<widget class="QStackedWidget" name="stack" >
|
<widget class="QStackedWidget" name="stack" >
|
||||||
<property name="currentIndex" >
|
<property name="currentIndex" >
|
||||||
<number>0</number>
|
<number>3</number>
|
||||||
</property>
|
</property>
|
||||||
<widget class="QWidget" name="metadata_page" >
|
<widget class="QWidget" name="metadata_page" >
|
||||||
<layout class="QGridLayout" name="gridLayout_4" >
|
<layout class="QGridLayout" name="gridLayout_4" >
|
||||||
@ -619,15 +619,15 @@ p, li { white-space: pre-wrap; }
|
|||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="1" >
|
<item row="3" column="1" >
|
||||||
<widget class="QSpinBox" name="opt_max_toc_recursion" />
|
<widget class="QSpinBox" name="opt_toc_threshold" />
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="0" >
|
<item row="3" column="0" >
|
||||||
<widget class="QLabel" name="label_16" >
|
<widget class="QLabel" name="label_16" >
|
||||||
<property name="text" >
|
<property name="text" >
|
||||||
<string>Table of Contents &recursion</string>
|
<string>Chapter &threshold</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="buddy" >
|
<property name="buddy" >
|
||||||
<cstring>opt_max_toc_recursion</cstring>
|
<cstring>opt_toc_threshold</cstring>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
import re
|
import re, collections
|
||||||
|
|
||||||
from PyQt4.QtGui import QStatusBar, QMovie, QLabel, QFrame, QHBoxLayout, QPixmap, \
|
from PyQt4.QtGui import QStatusBar, QMovie, QLabel, QFrame, QHBoxLayout, QPixmap, \
|
||||||
QVBoxLayout, QSizePolicy, QToolButton, QIcon
|
QVBoxLayout, QSizePolicy, QToolButton, QIcon
|
||||||
@ -47,6 +47,13 @@ class BookInfoDisplay(QFrame):
|
|||||||
|
|
||||||
def mouseReleaseEvent(self, ev):
|
def mouseReleaseEvent(self, ev):
|
||||||
self.emit(SIGNAL('mr(int)'), 1)
|
self.emit(SIGNAL('mr(int)'), 1)
|
||||||
|
|
||||||
|
WEIGHTS = collections.defaultdict(lambda : 100)
|
||||||
|
WEIGHTS[_('Path')] = 0
|
||||||
|
WEIGHTS[_('Formats')] = 1
|
||||||
|
WEIGHTS[_('Comments')] = 2
|
||||||
|
WEIGHTS[_('Series')] = 3
|
||||||
|
WEIGHTS[_('Tags')] = 4
|
||||||
|
|
||||||
def __init__(self, clear_message):
|
def __init__(self, clear_message):
|
||||||
QFrame.__init__(self)
|
QFrame.__init__(self)
|
||||||
@ -74,7 +81,9 @@ class BookInfoDisplay(QFrame):
|
|||||||
rows = u''
|
rows = u''
|
||||||
self.book_data.setText('')
|
self.book_data.setText('')
|
||||||
self.data = data.copy()
|
self.data = data.copy()
|
||||||
for key in data.keys():
|
keys = data.keys()
|
||||||
|
keys.sort(cmp=lambda x, y: cmp(self.WEIGHTS[x], self.WEIGHTS[y]))
|
||||||
|
for key in keys:
|
||||||
txt = data[key]
|
txt = data[key]
|
||||||
#txt = '<br />\n'.join(textwrap.wrap(txt, 120))
|
#txt = '<br />\n'.join(textwrap.wrap(txt, 120))
|
||||||
if isinstance(key, str):
|
if isinstance(key, str):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user