mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Support for splitting HTML files to respect maximum flow size limit for EPUB on the SONY Reader.
This commit is contained in:
parent
5c37760a27
commit
35c8db2dd7
@ -170,6 +170,19 @@ def fit_image(width, height, pwidth, pheight):
|
||||
|
||||
return scaled, int(width), int(height)
|
||||
|
||||
class CurrentDir(object):
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
self.cwd = None
|
||||
|
||||
def __enter__(self, *args):
|
||||
self.cwd = os.getcwd()
|
||||
os.chdir(self.path)
|
||||
return self.cwd
|
||||
|
||||
def __exit__(self, *args):
|
||||
os.chdir(self.cwd)
|
||||
|
||||
def sanitize_file_name(name):
|
||||
'''
|
||||
|
@ -105,5 +105,8 @@ to auto-generate a Table of Contents.
|
||||
help=_('Print generated OPF file to stdout'))
|
||||
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
|
||||
help=_('Print generated NCX file to stdout'))
|
||||
|
||||
c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug', default=False,
|
||||
help=_('Keep intermediate files during processing by html2epub'))
|
||||
c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
|
||||
help=_('Extract the contents of the produced EPUB file to the specified directory.'))
|
||||
return c
|
@ -97,7 +97,9 @@ def convert(htmlfile, opts, notification=None):
|
||||
opts.chapter = XPath(opts.chapter,
|
||||
namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||
|
||||
with TemporaryDirectory('_html2epub') as tdir:
|
||||
with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
|
||||
if opts.keep_intermediate:
|
||||
print 'Intermediate files in', tdir
|
||||
resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir)
|
||||
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
|
||||
|
||||
@ -159,6 +161,8 @@ def convert(htmlfile, opts, notification=None):
|
||||
epub = initialize_container(opts.output)
|
||||
epub.add_dir(tdir)
|
||||
print 'Output written to', opts.output
|
||||
if opts.extract_to is not None:
|
||||
epub.extractall(opts.extract_to)
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
@ -7,108 +7,212 @@ __docformat__ = 'restructuredtext en'
|
||||
Split the flows in an epub file to conform to size limitations.
|
||||
'''
|
||||
|
||||
import sys, os, math, copy
|
||||
import os, math, copy, logging, functools
|
||||
from urllib import unquote
|
||||
|
||||
from lxml.etree import parse, XMLParser
|
||||
from lxml.etree import XPath as _XPath
|
||||
from lxml import etree, html
|
||||
from lxml.cssselect import CSSSelector
|
||||
from cssutils import CSSParser
|
||||
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ebooks.epub import tostring
|
||||
from calibre import CurrentDir, LoggingInterface
|
||||
|
||||
PARSER = XMLParser(recover=True)
|
||||
XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||
content = functools.partial(os.path.join, 'content')
|
||||
|
||||
SPLIT_ATTR = 'cs'
|
||||
SPLIT_POINT_ATTR = 'csp'
|
||||
|
||||
class SplitError(ValueError):
|
||||
|
||||
def __init__(self, path):
|
||||
ValueError.__init__(self, _('Could not find reasonable point at which to split: ')+os.path.basename(path))
|
||||
def __init__(self, path, root):
|
||||
size = len(tostring(root))/1024.
|
||||
ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')%
|
||||
(os.path.basename(path), size))
|
||||
|
||||
def split_tree(tree, split_point, before, opts, filepath):
|
||||
trees = set([])
|
||||
|
||||
|
||||
class Splitter(LoggingInterface):
|
||||
|
||||
def __init__(self, path, opts, always_remove=False):
|
||||
LoggingInterface.__init__(self, logging.getLogger('htmlsplit'))
|
||||
self.setup_cli_handler(opts.verbose)
|
||||
self.path = path
|
||||
self.always_remove = always_remove
|
||||
self.base = os.path.splitext(path)[0] + '_split_%d.html'
|
||||
self.opts = opts
|
||||
self.log_info('\tSplitting %s (%d KB)', path, os.stat(content(path)).st_size/1024.)
|
||||
root = html.fromstring(open(content(path)).read())
|
||||
|
||||
css = XPath('//link[@type = "text/css" and @rel = "stylesheet"]')(root)
|
||||
if css:
|
||||
cssp = os.path.join('content', *(css[0].get('href').split('/')))
|
||||
self.log_debug('\t\tParsing stylesheet...')
|
||||
stylesheet = CSSParser().parseString(open(cssp, 'rb').read())
|
||||
else:
|
||||
stylesheet = None
|
||||
self.page_breaks = []
|
||||
if stylesheet is not None:
|
||||
self.find_page_breaks(stylesheet, root)
|
||||
|
||||
self.trees = self.split(root.getroottree())
|
||||
self.commit()
|
||||
self.log_info('\t\tSplit into %d parts.', len(self.trees))
|
||||
if self.opts.verbose:
|
||||
for f in self.files:
|
||||
self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.)
|
||||
self.trees = None
|
||||
|
||||
def split(self, tree):
|
||||
'''
|
||||
Split ``tree`` into a *before* and *after* tree, preserving tag structure,
|
||||
but not duplicating any text. All tags that have had their text and tail
|
||||
removed have the attribute ``calibre_split`` set to 1.
|
||||
'''
|
||||
self.log_debug('\t\tSplitting...')
|
||||
root = tree.getroot()
|
||||
split_point, before = self.find_split_point(root)
|
||||
if split_point is None:
|
||||
if not self.always_remove:
|
||||
self.log_warn(_('\t\tToo much markup. Re-splitting without structure preservation. This may cause incorrect rendering.'))
|
||||
raise SplitError(self.path, root)
|
||||
tree2 = copy.deepcopy(tree)
|
||||
root2 = tree2.getroot()
|
||||
body, body2 = root.body, root2.body
|
||||
trees = []
|
||||
path = tree.getpath(split_point)
|
||||
root, root2 = tree.getroot(), tree2.getroot()
|
||||
body, body2 = root.xpath('//body')[0], root2.xpath('//body')[0]
|
||||
split_point2 = root2.xpath(path)[0]
|
||||
|
||||
def nix_element(elem, top=True):
|
||||
if self.always_remove:
|
||||
parent = elem.getparent()
|
||||
index = parent.index(elem)
|
||||
if top:
|
||||
parent.remove(elem)
|
||||
else:
|
||||
index = parent.index(elem)
|
||||
parent[index:index+1] = list(elem.iterchildren())
|
||||
|
||||
else:
|
||||
elem.text = u''
|
||||
elem.tail = u''
|
||||
elem.set(SPLIT_ATTR, '1')
|
||||
if elem.tag.lower() in ['ul', 'ol', 'dl', 'table', 'hr', 'img']:
|
||||
elem.set('style', 'display:none;')
|
||||
|
||||
def fix_split_point(sp):
|
||||
sp.set('style', sp.get('style', '')+'page-break-before:avoid;page-break-after:avoid')
|
||||
|
||||
# Tree 1
|
||||
hit_split_point = False
|
||||
for elem in body.iterdescendants():
|
||||
for elem in list(body.iterdescendants(etree.Element)):
|
||||
if elem.get(SPLIT_ATTR, '0') == '1':
|
||||
continue
|
||||
if elem is split_point:
|
||||
hit_split_point = True
|
||||
if before:
|
||||
elem.text = u''
|
||||
elem.tail = u''
|
||||
elem.set('calibre_split', '1')
|
||||
nix_element(elem)
|
||||
fix_split_point(elem)
|
||||
continue
|
||||
if hit_split_point:
|
||||
elem.text = u''
|
||||
elem.tail = u''
|
||||
elem.set('calibre_split', '1' if hit_split_point else '0')
|
||||
nix_element(elem)
|
||||
|
||||
|
||||
# Tree 2
|
||||
hit_split_point = False
|
||||
for elem in body2.iterdescendants():
|
||||
for elem in list(body2.iterdescendants(etree.Element)):
|
||||
if elem.get(SPLIT_ATTR, '0') == '1':
|
||||
continue
|
||||
if elem is split_point2:
|
||||
hit_split_point = True
|
||||
if not before:
|
||||
elem.text = u''
|
||||
elem.tail = u''
|
||||
elem.set('calibre_split', '1')
|
||||
nix_element(elem, top=False)
|
||||
fix_split_point(elem)
|
||||
continue
|
||||
if not hit_split_point:
|
||||
elem.text = u''
|
||||
elem.tail = u''
|
||||
elem.set('calibre_split', '0' if hit_split_point else '1')
|
||||
nix_element(elem, top=False)
|
||||
|
||||
for t, r in [(tree, root), (tree2, root2)]:
|
||||
if len(tostring(r)) < opts.profile.flow_size:
|
||||
size = len(tostring(r))
|
||||
if size <= self.opts.profile.flow_size:
|
||||
trees.append(t)
|
||||
self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', len(trees), size/1024.)
|
||||
else:
|
||||
new_split_point, before = find_split_point(t)
|
||||
if new_split_point is None:
|
||||
raise SplitError(filepath)
|
||||
trees.extend(split_tree(t, new_split_point, before, opts, filepath))
|
||||
trees.extend(self.split(t))
|
||||
|
||||
return trees
|
||||
|
||||
|
||||
def find_split_point(tree):
|
||||
root = tree.getroot()
|
||||
css = root.xpath('//style[@type="text/css"]')
|
||||
if css:
|
||||
|
||||
def pick_elem(elems):
|
||||
if elems:
|
||||
elems = [i for i in elems if elem.get('calibre_split', '0') != '1']
|
||||
if elems:
|
||||
i = int(math.floor(len(elems)/2.))
|
||||
return elems[i]
|
||||
|
||||
def selector_element(rule):
|
||||
try:
|
||||
selector = CSSSelector(rule.selectorText)
|
||||
return pick_elem(selector(root))
|
||||
except:
|
||||
return None
|
||||
|
||||
css = css[0].text
|
||||
from cssutils import CSSParser
|
||||
stylesheet = CSSParser().parseString(css)
|
||||
def find_page_breaks(self, stylesheet, root):
|
||||
'''
|
||||
Find all elements that have either page-break-before or page-break-after set.
|
||||
'''
|
||||
page_break_selectors = set([])
|
||||
for rule in stylesheet:
|
||||
if rule.type != rule.STYLE_RULE:
|
||||
continue
|
||||
before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()
|
||||
if before and before != 'avoid':
|
||||
elem = selector_element(rule)
|
||||
if elem is not None:
|
||||
return elem, True
|
||||
after = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()
|
||||
try:
|
||||
if before and before != 'avoid':
|
||||
page_break_selectors.add((CSSSelector(rule.selectorText), True))
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
if after and after != 'avoid':
|
||||
elem = selector_element(rule)
|
||||
if elem is not None:
|
||||
return elem, False
|
||||
page_break_selectors.add((CSSSelector(rule.selectorText), False))
|
||||
except:
|
||||
pass
|
||||
|
||||
for path in ('//*[re:match(name(), "h[1-6]", "i")', '/body/div', '//p'):
|
||||
page_breaks = set([])
|
||||
for selector, before in page_break_selectors:
|
||||
for elem in selector(root):
|
||||
elem.pb_before = before
|
||||
page_breaks.add(elem)
|
||||
|
||||
for i, elem in enumerate(root.iter()):
|
||||
elem.pb_order = i
|
||||
|
||||
page_breaks = list(page_breaks)
|
||||
page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
|
||||
tree = root.getroottree()
|
||||
self.page_breaks = [(XPath(tree.getpath(x)), x.pb_before) for x in page_breaks]
|
||||
|
||||
def find_split_point(self, root):
|
||||
'''
|
||||
Find the tag at which to split the tree rooted at `root`.
|
||||
Search order is:
|
||||
* page breaks
|
||||
* Heading tags
|
||||
* <div> tags
|
||||
* <p> tags
|
||||
|
||||
We try to split in the "middle" of the file (as defined by tag counts.
|
||||
'''
|
||||
def pick_elem(elems):
|
||||
if elems:
|
||||
elems = [i for i in elems if i.get(SPLIT_POINT_ATTR, '0') != '1'\
|
||||
and i.get(SPLIT_ATTR, '0') != '1']
|
||||
if elems:
|
||||
i = int(math.floor(len(elems)/2.))
|
||||
elems[i].set(SPLIT_POINT_ATTR, '1')
|
||||
return elems[i]
|
||||
|
||||
page_breaks = []
|
||||
for x in self.page_breaks:
|
||||
pb = x[0](root)
|
||||
if pb:
|
||||
page_breaks.append(pb[0])
|
||||
|
||||
elem = pick_elem(page_breaks)
|
||||
if elem is not None:
|
||||
i = page_breaks.index(elem)
|
||||
return elem, self.page_breaks[i][1]
|
||||
|
||||
|
||||
|
||||
for path in ('//*[re:match(name(), "h[1-6]", "i")]', '/html/body/div', '//p'):
|
||||
elems = root.xpath(path)
|
||||
elem = pick_elem(elems)
|
||||
if elem is not None:
|
||||
@ -116,67 +220,134 @@ def find_split_point(tree):
|
||||
|
||||
return None, True
|
||||
|
||||
def do_split(path, opts):
|
||||
tree = parse(path, parser=PARSER)
|
||||
split_point, before = find_split_point(tree)
|
||||
if split_point is None:
|
||||
raise SplitError(path)
|
||||
trees = split_tree(tree, split_point, before, opts, path)
|
||||
base = os.path.splitext(os.path.basename(path))[0] + '_split_%d.html'
|
||||
anchor_map = {None:base%0}
|
||||
files = []
|
||||
for i, tree in enumerate(trees):
|
||||
def commit(self):
|
||||
'''
|
||||
Commit all changes caused by the split. This removes the previously
|
||||
introduced ``calibre_split`` attribute and calculates an *anchor_map* for
|
||||
all anchors in the original tree. Internal links are re-directed. The
|
||||
original file is deleted and the split files are saved.
|
||||
'''
|
||||
self.anchor_map = {None:self.base%0}
|
||||
self.files = []
|
||||
|
||||
for i, tree in enumerate(self.trees):
|
||||
root = tree.getroot()
|
||||
files.append(base%i)
|
||||
for elem in root.xpath('//*[@id and @calibre_split = "1"]'):
|
||||
anchor_map[elem.get('id')] = files[-1]
|
||||
elem.attrib.pop('calibre_split')
|
||||
for elem in root.xpath('//*[@calibre_split]'):
|
||||
elem.attrib.pop('calibre_split')
|
||||
open(os.path.join(os.path.dirname(path), files[-1]), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
|
||||
os.remove(path)
|
||||
return path, files, anchor_map
|
||||
self.files.append(self.base%i)
|
||||
for elem in root.xpath('//*[@id]'):
|
||||
if elem.get(SPLIT_ATTR, '0') == '0':
|
||||
self.anchor_map[elem.get('id')] = self.files[-1]
|
||||
for elem in root.xpath('//*[@%s or @%s]'%(SPLIT_ATTR, SPLIT_POINT_ATTR)):
|
||||
elem.attrib.pop(SPLIT_ATTR, None)
|
||||
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
|
||||
|
||||
def fix_opf(opf, orig_file, files, anchor_map):
|
||||
orig = None
|
||||
for item in opf.manifest:
|
||||
if os.path.samefile(orig_file, item.path):
|
||||
orig = item
|
||||
break
|
||||
opf.manifest.remove(orig)
|
||||
ids = []
|
||||
for f in files:
|
||||
ids.append(opf.manifest.add_item(f))
|
||||
index = None
|
||||
for i, item in enumerate(opf.spine):
|
||||
if item.id == orig.id:
|
||||
index = i
|
||||
break
|
||||
for current, tree in zip(self.files, self.trees):
|
||||
for a in tree.getroot().xpath('//a[@href]'):
|
||||
href = a.get('href').strip()
|
||||
if href.startswith('#'):
|
||||
anchor = href[1:]
|
||||
file = self.anchor_map[anchor]
|
||||
if file != current:
|
||||
a.set('href', file+href)
|
||||
open(content(current), 'wb').\
|
||||
write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
|
||||
|
||||
os.remove(content(self.path))
|
||||
|
||||
|
||||
def fix_opf(self, opf):
|
||||
'''
|
||||
Fix references to the split file in the OPF.
|
||||
'''
|
||||
items = [item for item in opf.itermanifest() if item.get('href') == 'content/'+self.path]
|
||||
new_items = [('content/'+f, None) for f in self.files]
|
||||
id_map = {}
|
||||
for item in items:
|
||||
id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
|
||||
|
||||
for id in id_map.keys():
|
||||
opf.replace_spine_items_by_idref(id, id_map[id])
|
||||
|
||||
for ref in opf.iterguide():
|
||||
href = ref.get('href', '')
|
||||
if href.startswith('content/'+self.path):
|
||||
href = href.split('#')
|
||||
frag = None
|
||||
if len(href) > 1:
|
||||
frag = href[1]
|
||||
new_file = self.anchor_map[frag]
|
||||
ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
|
||||
|
||||
|
||||
|
||||
def fix_content_links(html_files, changes, opts):
|
||||
split_files = [f.path for f in changes]
|
||||
anchor_maps = [f.anchor_map for f in changes]
|
||||
files = list(html_files)
|
||||
for j, f in enumerate(split_files):
|
||||
try:
|
||||
i = files.index(f)
|
||||
files[i:i+1] = changes[j].files
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
for htmlfile in files:
|
||||
changed = False
|
||||
root = html.fromstring(open(content(htmlfile), 'rb').read())
|
||||
for a in root.xpath('//a[@href]'):
|
||||
href = a.get('href')
|
||||
if not href.startswith('#'):
|
||||
href = href.split('#')
|
||||
anchor = href[1] if len(href) > 1 else None
|
||||
href = href[0]
|
||||
if href in split_files:
|
||||
newf = anchor_maps[split_files.index(href)][anchor]
|
||||
frag = ('#'+anchor) if anchor else ''
|
||||
a.set('href', newf+frag)
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
|
||||
|
||||
def fix_ncx(path, changes):
|
||||
split_files = [f.path for f in changes]
|
||||
anchor_maps = [f.anchor_map for f in changes]
|
||||
tree = etree.parse(path)
|
||||
changed = False
|
||||
for content in tree.getroot().xpath('//x:content[@src]', namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
|
||||
href = content.get('src')
|
||||
if not href.startswith('#'):
|
||||
href = href.split('#')
|
||||
anchor = href[1] if len(href) > 1 else None
|
||||
href = href[0].split('/')[-1]
|
||||
if href in split_files:
|
||||
newf = anchor_maps[split_files.index(href)][anchor]
|
||||
frag = ('#'+anchor) if anchor else ''
|
||||
content.set('src', 'content/'+newf+frag)
|
||||
changed = True
|
||||
if changed:
|
||||
open(path, 'wb').write(etree.tostring(tree.getroot(), encoding='UTF-8', xml_declaration=True))
|
||||
|
||||
def split(pathtoopf, opts):
|
||||
return
|
||||
pathtoopf = os.path.abspath(pathtoopf)
|
||||
with CurrentDir(os.path.dirname(pathtoopf)):
|
||||
opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
|
||||
html_files = []
|
||||
for item in opf.manifest:
|
||||
if 'html' in item.mime_type.lower():
|
||||
html_files.append(item.path)
|
||||
for item in opf.itermanifest():
|
||||
if 'html' in item.get('media-type', '').lower():
|
||||
html_files.append(unquote(item.get('href')).split('/')[-1])
|
||||
changes = []
|
||||
for f in html_files:
|
||||
if os.stat(f).st_size > opts.profile.flow_size:
|
||||
fix_opf(opf, *do_split(f, opts))
|
||||
if changes:
|
||||
pass
|
||||
if os.stat(content(f)).st_size > opts.profile.flow_size:
|
||||
try:
|
||||
changes.append(Splitter(f, opts))
|
||||
except SplitError:
|
||||
changes.append(Splitter(f, opts, always_remove=True))
|
||||
changes[-1].fix_opf(opf)
|
||||
|
||||
open(pathtoopf, 'wb').write(opf.render())
|
||||
fix_content_links(html_files, changes, opts)
|
||||
|
||||
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
for item in opf.itermanifest():
|
||||
if item.get('media-type', '') == 'application/x-dtbncx+xml':
|
||||
fix_ncx(item.get('href'), changes)
|
||||
break
|
||||
|
@ -228,8 +228,14 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
|
||||
raise ValueError('OPF does not have a spine')
|
||||
flat = []
|
||||
for path in opf_reader.spine.items():
|
||||
path = os.path.abspath(path)
|
||||
if path not in flat:
|
||||
flat.append(os.path.abspath(path))
|
||||
for item in opf_reader.manifest:
|
||||
if 'html' in item.mime_type:
|
||||
path = os.path.abspath(item.path)
|
||||
if path not in flat:
|
||||
flat.append(path)
|
||||
flat = [HTMLFile(path, 0, encoding, verbose) for path in flat]
|
||||
return flat
|
||||
|
||||
@ -329,14 +335,15 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
if self.root.get(bad, None) is not None:
|
||||
self.root.attrib.pop(bad)
|
||||
|
||||
|
||||
def save_path(self):
|
||||
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
||||
|
||||
def save(self):
|
||||
'''
|
||||
Save processed HTML into the content directory.
|
||||
Should be called after all HTML processing is finished.
|
||||
'''
|
||||
with open(os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path]), 'wb') as f:
|
||||
with open(self.save_path(), 'wb') as f:
|
||||
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
||||
ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans)
|
||||
ans = re.compile(r'<head[^<>]*?>', re.IGNORECASE).sub('<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n', ans)
|
||||
@ -390,21 +397,26 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
if not isinstance(olink, unicode):
|
||||
olink = olink.decode(self.htmlfile.encoding)
|
||||
link = self.htmlfile.resolve(olink)
|
||||
frag = (('#'+link.fragment) if link.fragment else '')
|
||||
if link.path == self.htmlfile.path:
|
||||
return frag if frag else '#'
|
||||
if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path):
|
||||
return olink
|
||||
if link.path in self.htmlfiles:
|
||||
return self.htmlfile_map[link.path]
|
||||
return self.htmlfile_map[link.path] + frag
|
||||
if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None:
|
||||
return olink # This happens when --max-levels is used
|
||||
if link.path in self.resource_map.keys():
|
||||
return self.resource_map[link.path]
|
||||
return self.resource_map[link.path] + frag
|
||||
name = os.path.basename(link.path)
|
||||
name, ext = os.path.splitext(name)
|
||||
name += ('_%d'%len(self.resource_map)) + ext
|
||||
shutil.copyfile(link.path, os.path.join(self.resource_dir, name))
|
||||
name = 'resources/' + name
|
||||
self.resource_map[link.path] = name
|
||||
return name
|
||||
return name + frag
|
||||
|
||||
|
||||
|
||||
class Processor(Parser):
|
||||
'''
|
||||
@ -438,9 +450,12 @@ class Processor(Parser):
|
||||
|
||||
def save(self):
|
||||
head = self.head if self.head is not None else self.body
|
||||
style = etree.SubElement(head, 'style', attrib={'type':'text/css'})
|
||||
style.text='\n'+self.css
|
||||
style_path = os.path.basename(self.save_path())+'.css'
|
||||
style = etree.SubElement(head, 'link', attrib={'type':'text/css', 'rel':'stylesheet',
|
||||
'href':'resources/'+style_path})
|
||||
style.tail = '\n\n'
|
||||
style_path = os.path.join(os.path.dirname(self.save_path()), 'resources', style_path)
|
||||
open(style_path, 'wb').write(self.css.encode('utf-8'))
|
||||
return Parser.save(self)
|
||||
|
||||
def populate_toc(self, toc):
|
||||
@ -530,6 +545,8 @@ class Processor(Parser):
|
||||
css.append('\n'.join(style.xpath('./text()')))
|
||||
style.getparent().remove(style)
|
||||
|
||||
cache = {}
|
||||
class_counter = 0
|
||||
for font in self.root.xpath('//font'):
|
||||
try:
|
||||
size = int(font.attrib.pop('size', '3'))
|
||||
@ -542,16 +559,33 @@ class Processor(Parser):
|
||||
color = font.attrib.pop('color', None)
|
||||
if color is not None:
|
||||
setting += 'color:%s'%color
|
||||
id = get_id(font, counter)
|
||||
counter += 1
|
||||
css.append('#%s { %s }'%(id, setting))
|
||||
classname = cache.get(setting, None)
|
||||
if classname is None:
|
||||
classname = 'calibre_class_%d'%class_counter
|
||||
class_counter += 1
|
||||
cache[setting] = classname
|
||||
cn = font.get('class', '')
|
||||
if cn: cn += ' '
|
||||
cn += classname
|
||||
font.set('class', cn)
|
||||
|
||||
for elem in self.root.xpath('//*[@style]'):
|
||||
id = get_id(elem, counter)
|
||||
counter += 1
|
||||
css.append('#%s {%s}'%(id, elem.get('style')))
|
||||
setting = elem.get('style')
|
||||
classname = cache.get(setting, None)
|
||||
if classname is None:
|
||||
classname = 'calibre_class_%d'%class_counter
|
||||
class_counter += 1
|
||||
cache[setting] = classname
|
||||
cn = elem.get('class', '')
|
||||
if cn: cn += ' '
|
||||
cn += classname
|
||||
elem.set('class', cn)
|
||||
elem.attrib.pop('style')
|
||||
|
||||
for setting, cn in cache.items():
|
||||
css.append('.%s {%s}'%(cn, setting))
|
||||
|
||||
|
||||
self.raw_css = '\n\n'.join(css)
|
||||
self.css = unicode(self.raw_css)
|
||||
if self.opts.override_css:
|
||||
@ -688,6 +722,9 @@ def create_metadata(basepath, mi, filelist, resources):
|
||||
'''
|
||||
mi = OPFCreator(basepath, mi)
|
||||
entries = [('content/'+f, 'application/xhtml+xml') for f in filelist] + [(f, None) for f in resources]
|
||||
for f in filelist:
|
||||
if os.path.exists(os.path.join(basepath, 'content', 'resources', f+'.css')):
|
||||
entries.append(('content/resources/'+f+'.css', 'text/css'))
|
||||
mi.create_manifest(entries)
|
||||
mi.create_spine(['content/'+f for f in filelist])
|
||||
return mi
|
||||
|
@ -143,7 +143,8 @@ class ResourceCollection(object):
|
||||
self._resources.remove(resource)
|
||||
|
||||
def replace(self, start, end, items):
|
||||
pass
|
||||
'Same as list[start:end] = items'
|
||||
self._resources[start:end] = items
|
||||
|
||||
@staticmethod
|
||||
def from_directory_contents(top, topdown=True):
|
||||
|
@ -156,6 +156,19 @@ class Spine(ResourceCollection):
|
||||
self.manifest = manifest
|
||||
|
||||
|
||||
def replace(self, start, end, ids):
|
||||
'''
|
||||
Replace the items between start (inclusive) and end (not inclusive) with
|
||||
with the items identified by ids. ids can be a list of any length.
|
||||
'''
|
||||
items = []
|
||||
for id in ids:
|
||||
path = self.manifest.path_for_id(id)
|
||||
if path is None:
|
||||
raise ValueError('id %s not in manifest')
|
||||
items.append(Spine.Item(lambda x: id, path, is_path=True))
|
||||
ResourceCollection.replace(start, end, items)
|
||||
|
||||
def linear_items(self):
|
||||
for r in self:
|
||||
if r.is_linear:
|
||||
@ -297,6 +310,55 @@ class OPF(object):
|
||||
def get_text(self, elem):
|
||||
return u''.join(self.TEXT(elem))
|
||||
|
||||
def itermanifest(self):
|
||||
return self.manifest_path(self.tree)
|
||||
|
||||
def create_manifest_item(self, href, media_type):
|
||||
ids = [i.get('id', None) for i in self.itermanifest()]
|
||||
id = None
|
||||
for c in xrange(1, sys.maxint):
|
||||
id = 'id%d'%c
|
||||
if id not in ids:
|
||||
break
|
||||
if not media_type:
|
||||
media_type = 'application/xhtml+xml'
|
||||
ans = etree.Element('{%s}item'%self.NAMESPACES['opf'],
|
||||
attrib={'id':id, 'href':href, 'media-type':media_type})
|
||||
ans.tail = '\n\t\t'
|
||||
return ans
|
||||
|
||||
def replace_manifest_item(self, item, items):
|
||||
items = [self.create_manifest_item(*i) for i in items]
|
||||
for i, item2 in enumerate(items):
|
||||
item2.set('id', item.get('id')+'.%d'%(i+1))
|
||||
manifest = item.getparent()
|
||||
index = manifest.index(item)
|
||||
manifest[index:index+1] = items
|
||||
return [i.get('id') for i in items]
|
||||
|
||||
def iterspine(self):
|
||||
return self.spine_path(self.tree)
|
||||
|
||||
def create_spine_item(self, idref):
|
||||
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
|
||||
ans.tail = '\n\t\t'
|
||||
return ans
|
||||
|
||||
def replace_spine_items_by_idref(self, idref, new_idrefs):
|
||||
items = list(map(self.create_spine_item, new_idrefs))
|
||||
spine = self.XPath('/opf:package/*[re:match(name(), "spine", "i")]')(self.tree)[0]
|
||||
old = [i for i in self.iterspine() if i.get('idref', None) == idref]
|
||||
for x in old:
|
||||
i = spine.index(x)
|
||||
spine[i:i+1] = items
|
||||
|
||||
def iterguide(self):
|
||||
return self.guide_path(self.tree)
|
||||
|
||||
def render(self):
|
||||
return etree.tostring(self.tree, encoding='UTF-8', xml_declaration=True,
|
||||
pretty_print=True)
|
||||
|
||||
@apply
|
||||
def authors():
|
||||
|
||||
|
@ -24,6 +24,8 @@ class TOC(list):
|
||||
base_path=os.getcwd()):
|
||||
self.href = href
|
||||
self.fragment = fragment
|
||||
if not self.fragment:
|
||||
self.fragment = None
|
||||
self.text = text
|
||||
self.parent = parent
|
||||
self.base_path = base_path
|
||||
@ -153,7 +155,19 @@ class TOC(list):
|
||||
continue
|
||||
purl = urlparse(unquote(a['href']))
|
||||
href, fragment = purl[2], purl[5]
|
||||
if not fragment:
|
||||
fragment = None
|
||||
else:
|
||||
fragment = fragment.strip()
|
||||
href = href.strip()
|
||||
|
||||
txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
|
||||
add = True
|
||||
for i in self.flat():
|
||||
if i.href == href and i.fragment == fragment:
|
||||
add = False
|
||||
break
|
||||
if add:
|
||||
self.add_item(href, fragment, txt)
|
||||
|
||||
def render(self, stream, uid):
|
||||
|
@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
Miscellaneous widgets used in the GUI
|
||||
'''
|
||||
import re, os
|
||||
import re, os, traceback
|
||||
from PyQt4.QtGui import QListView, QIcon, QFont, QLabel, QListWidget, \
|
||||
QListWidgetItem, QTextCharFormat, QApplication, \
|
||||
QSyntaxHighlighter, QCursor, QColor, QWidget, QDialog, \
|
||||
@ -254,7 +254,12 @@ class FontFamilyModel(QAbstractListModel):
|
||||
|
||||
def __init__(self, *args):
|
||||
QAbstractListModel.__init__(self, *args)
|
||||
try:
|
||||
self.families = find_font_families()
|
||||
except:
|
||||
self.families = []
|
||||
print 'WARNING: Could not load fonts'
|
||||
traceback.print_exc()
|
||||
self.families.sort()
|
||||
self.families[:0] = ['None']
|
||||
|
||||
|
@ -278,7 +278,7 @@ def download_tarball():
|
||||
|
||||
def main(args=sys.argv):
|
||||
defdir = '/opt/calibre'
|
||||
destdir = raw_input('Enter the installation directory for calibre [%s]: '%defdir).strip()
|
||||
destdir = raw_input('Enter the installation directory for calibre (Its contents will be deleted!)[%s]: '%defdir).strip()
|
||||
if not destdir:
|
||||
destdir = defdir
|
||||
if os.path.exists(destdir):
|
||||
|
@ -147,6 +147,7 @@ def cli_docs(app):
|
||||
info(bold('creating docs for %s...'%cmd))
|
||||
open(os.path.join('cli', cmd+'.rst'), 'wb').write(raw)
|
||||
|
||||
|
||||
def auto_member(dirname, arguments, options, content, lineno,
|
||||
content_offset, block_text, state, state_machine):
|
||||
name = arguments[0]
|
||||
@ -196,8 +197,7 @@ def auto_member(dirname, arguments, options, content, lineno,
|
||||
node = nodes.paragraph()
|
||||
state.nested_parse(result, content_offset, node)
|
||||
|
||||
return node
|
||||
|
||||
return list(node)
|
||||
|
||||
|
||||
|
||||
|
@ -134,6 +134,7 @@ There can be several causes for this:
|
||||
* **Any windows version**: If this happens during an initial run of calibre, try deleting the folder you chose for your ebooks and restarting calibre.
|
||||
* **Windows Vista**: If the folder :file:`C:\\Users\\Your User Name\\AppData\\Local\\VirtualStore\\Program Files\\calibre` exists, delete it. Uninstall |app|. Reboot. Re-install.
|
||||
* **Any windows version**: Search your computer for a folder named :file:`_ipython`. Delete it and try again.
|
||||
* **Any windows version**: Try disabling any antivirus program you have running and see if that fixes it. Also try diabling any firewall software that prevents connections to the local computer.
|
||||
|
||||
If it still wont launch, start a command prompt (press the windows key and R; then type :command:`cmd.exe` in the Run dialog that appears). At the command prompt type the following command and press Enter::
|
||||
|
||||
|
@ -57,19 +57,21 @@ def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
|
||||
atexit.register(shutil.rmtree, tdir, True)
|
||||
return tdir
|
||||
|
||||
class TemporaryDirectory(str):
|
||||
class TemporaryDirectory(object):
|
||||
'''
|
||||
A temporary directory to be used in a with statement.
|
||||
'''
|
||||
def __init__(self, suffix='', prefix='', dir=None):
|
||||
def __init__(self, suffix='', prefix='', dir=None, keep=False):
|
||||
self.suffix = suffix
|
||||
self.prefix = prefix
|
||||
self.dir = dir
|
||||
self.keep = keep
|
||||
|
||||
def __enter__(self):
|
||||
self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir)
|
||||
return self.tdir
|
||||
|
||||
def __exit__(self, *args):
|
||||
if not self.keep:
|
||||
shutil.rmtree(self.tdir)
|
||||
|
||||
|
@ -53,6 +53,9 @@ def import_from_launchpad(url):
|
||||
open(out, 'wb').write(tf.extractfile(next).read())
|
||||
next = tf.next()
|
||||
check_for_critical_bugs()
|
||||
path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
print path
|
||||
subprocess.check_call('python setup.py translations'.split(), dir=path)
|
||||
return 0
|
||||
|
||||
def check_for_critical_bugs():
|
||||
|
Loading…
x
Reference in New Issue
Block a user