Support for splitting HTML files to respect maximum flow size limit for EPUB on the SONY Reader.

This commit is contained in:
Kovid Goyal 2008-09-21 22:47:43 -07:00
parent 5c37760a27
commit 35c8db2dd7
14 changed files with 515 additions and 199 deletions

View File

@ -170,6 +170,19 @@ def fit_image(width, height, pwidth, pheight):
return scaled, int(width), int(height) return scaled, int(width), int(height)
class CurrentDir(object):
def __init__(self, path):
self.path = path
self.cwd = None
def __enter__(self, *args):
self.cwd = os.getcwd()
os.chdir(self.path)
return self.cwd
def __exit__(self, *args):
os.chdir(self.cwd)
def sanitize_file_name(name): def sanitize_file_name(name):
''' '''

View File

@ -105,5 +105,8 @@ to auto-generate a Table of Contents.
help=_('Print generated OPF file to stdout')) help=_('Print generated OPF file to stdout'))
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug', c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
help=_('Print generated NCX file to stdout')) help=_('Print generated NCX file to stdout'))
c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug', default=False,
help=_('Keep intermediate files during processing by html2epub'))
c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
help=_('Extract the contents of the produced EPUB file to the specified directory.'))
return c return c

View File

@ -97,7 +97,9 @@ def convert(htmlfile, opts, notification=None):
opts.chapter = XPath(opts.chapter, opts.chapter = XPath(opts.chapter,
namespaces={'re':'http://exslt.org/regular-expressions'}) namespaces={'re':'http://exslt.org/regular-expressions'})
with TemporaryDirectory('_html2epub') as tdir: with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
if opts.keep_intermediate:
print 'Intermediate files in', tdir
resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir) resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir)
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()] resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
@ -159,6 +161,8 @@ def convert(htmlfile, opts, notification=None):
epub = initialize_container(opts.output) epub = initialize_container(opts.output)
epub.add_dir(tdir) epub.add_dir(tdir)
print 'Output written to', opts.output print 'Output written to', opts.output
if opts.extract_to is not None:
epub.extractall(opts.extract_to)
def main(args=sys.argv): def main(args=sys.argv):

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
@ -7,176 +7,347 @@ __docformat__ = 'restructuredtext en'
Split the flows in an epub file to conform to size limitations. Split the flows in an epub file to conform to size limitations.
''' '''
import sys, os, math, copy import os, math, copy, logging, functools
from urllib import unquote
from lxml.etree import parse, XMLParser from lxml.etree import XPath as _XPath
from lxml import etree, html
from lxml.cssselect import CSSSelector from lxml.cssselect import CSSSelector
from cssutils import CSSParser
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.epub import tostring from calibre.ebooks.epub import tostring
from calibre import CurrentDir, LoggingInterface
PARSER = XMLParser(recover=True) XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'})
content = functools.partial(os.path.join, 'content')
SPLIT_ATTR = 'cs'
SPLIT_POINT_ATTR = 'csp'
class SplitError(ValueError): class SplitError(ValueError):
def __init__(self, path): def __init__(self, path, root):
ValueError.__init__(self, _('Could not find reasonable point at which to split: ')+os.path.basename(path)) size = len(tostring(root))/1024.
ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')%
(os.path.basename(path), size))
def split_tree(tree, split_point, before, opts, filepath):
trees = set([])
tree2 = copy.deepcopy(tree)
path = tree.getpath(split_point)
root, root2 = tree.getroot(), tree2.getroot()
body, body2 = root.xpath('//body')[0], root2.xpath('//body')[0]
split_point2 = root2.xpath(path)[0]
# Tree 1
hit_split_point = False
for elem in body.iterdescendants():
if elem is split_point:
hit_split_point = True
if before:
elem.text = u''
elem.tail = u''
elem.set('calibre_split', '1')
continue
if hit_split_point:
elem.text = u''
elem.tail = u''
elem.set('calibre_split', '1' if hit_split_point else '0')
# Tree 2 class Splitter(LoggingInterface):
hit_split_point = False
for elem in body2.iterdescendants():
if elem is split_point2:
hit_split_point = True
if not before:
elem.text = u''
elem.tail = u''
elem.set('calibre_split', '1')
continue
if not hit_split_point:
elem.text = u''
elem.tail = u''
elem.set('calibre_split', '0' if hit_split_point else '1')
for t, r in [(tree, root), (tree2, root2)]: def __init__(self, path, opts, always_remove=False):
if len(tostring(r)) < opts.profile.flow_size: LoggingInterface.__init__(self, logging.getLogger('htmlsplit'))
trees.append(t) self.setup_cli_handler(opts.verbose)
self.path = path
self.always_remove = always_remove
self.base = os.path.splitext(path)[0] + '_split_%d.html'
self.opts = opts
self.log_info('\tSplitting %s (%d KB)', path, os.stat(content(path)).st_size/1024.)
root = html.fromstring(open(content(path)).read())
css = XPath('//link[@type = "text/css" and @rel = "stylesheet"]')(root)
if css:
cssp = os.path.join('content', *(css[0].get('href').split('/')))
self.log_debug('\t\tParsing stylesheet...')
stylesheet = CSSParser().parseString(open(cssp, 'rb').read())
else: else:
new_split_point, before = find_split_point(t) stylesheet = None
if new_split_point is None: self.page_breaks = []
raise SplitError(filepath) if stylesheet is not None:
trees.extend(split_tree(t, new_split_point, before, opts, filepath)) self.find_page_breaks(stylesheet, root)
return trees self.trees = self.split(root.getroottree())
self.commit()
self.log_info('\t\tSplit into %d parts.', len(self.trees))
if self.opts.verbose:
for f in self.files:
self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.)
self.trees = None
def split(self, tree):
'''
Split ``tree`` into a *before* and *after* tree, preserving tag structure,
but not duplicating any text. All tags that have had their text and tail
removed have the attribute ``calibre_split`` set to 1.
'''
self.log_debug('\t\tSplitting...')
root = tree.getroot()
split_point, before = self.find_split_point(root)
if split_point is None:
if not self.always_remove:
self.log_warn(_('\t\tToo much markup. Re-splitting without structure preservation. This may cause incorrect rendering.'))
raise SplitError(self.path, root)
tree2 = copy.deepcopy(tree)
root2 = tree2.getroot()
body, body2 = root.body, root2.body
trees = []
path = tree.getpath(split_point)
split_point2 = root2.xpath(path)[0]
def nix_element(elem, top=True):
if self.always_remove:
parent = elem.getparent()
index = parent.index(elem)
if top:
parent.remove(elem)
else:
index = parent.index(elem)
parent[index:index+1] = list(elem.iterchildren())
else:
elem.text = u''
elem.tail = u''
elem.set(SPLIT_ATTR, '1')
if elem.tag.lower() in ['ul', 'ol', 'dl', 'table', 'hr', 'img']:
elem.set('style', 'display:none;')
def fix_split_point(sp):
sp.set('style', sp.get('style', '')+'page-break-before:avoid;page-break-after:avoid')
# Tree 1
hit_split_point = False
for elem in list(body.iterdescendants(etree.Element)):
if elem.get(SPLIT_ATTR, '0') == '1':
continue
if elem is split_point:
hit_split_point = True
if before:
nix_element(elem)
fix_split_point(elem)
continue
if hit_split_point:
nix_element(elem)
def find_split_point(tree): # Tree 2
root = tree.getroot() hit_split_point = False
css = root.xpath('//style[@type="text/css"]') for elem in list(body2.iterdescendants(etree.Element)):
if css: if elem.get(SPLIT_ATTR, '0') == '1':
continue
if elem is split_point2:
hit_split_point = True
if not before:
nix_element(elem, top=False)
fix_split_point(elem)
continue
if not hit_split_point:
nix_element(elem, top=False)
def pick_elem(elems): for t, r in [(tree, root), (tree2, root2)]:
if elems: size = len(tostring(r))
elems = [i for i in elems if elem.get('calibre_split', '0') != '1'] if size <= self.opts.profile.flow_size:
if elems: trees.append(t)
i = int(math.floor(len(elems)/2.)) self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', len(trees), size/1024.)
return elems[i] else:
trees.extend(self.split(t))
def selector_element(rule): return trees
try:
selector = CSSSelector(rule.selectorText)
return pick_elem(selector(root))
except:
return None
css = css[0].text def find_page_breaks(self, stylesheet, root):
from cssutils import CSSParser '''
stylesheet = CSSParser().parseString(css) Find all elements that have either page-break-before or page-break-after set.
'''
page_break_selectors = set([])
for rule in stylesheet: for rule in stylesheet:
if rule.type != rule.STYLE_RULE: if rule.type != rule.STYLE_RULE:
continue continue
before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower() before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()
if before and before != 'avoid':
elem = selector_element(rule)
if elem is not None:
return elem, True
after = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower() after = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()
if after and after != 'avoid': try:
elem = selector_element(rule) if before and before != 'avoid':
if elem is not None: page_break_selectors.add((CSSSelector(rule.selectorText), True))
return elem, False except:
pass
try:
if after and after != 'avoid':
page_break_selectors.add((CSSSelector(rule.selectorText), False))
except:
pass
for path in ('//*[re:match(name(), "h[1-6]", "i")', '/body/div', '//p'): page_breaks = set([])
elems = root.xpath(path) for selector, before in page_break_selectors:
elem = pick_elem(elems) for elem in selector(root):
elem.pb_before = before
page_breaks.add(elem)
for i, elem in enumerate(root.iter()):
elem.pb_order = i
page_breaks = list(page_breaks)
page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
tree = root.getroottree()
self.page_breaks = [(XPath(tree.getpath(x)), x.pb_before) for x in page_breaks]
def find_split_point(self, root):
'''
Find the tag at which to split the tree rooted at `root`.
Search order is:
* page breaks
* Heading tags
* <div> tags
* <p> tags
We try to split in the "middle" of the file (as defined by tag counts.
'''
def pick_elem(elems):
if elems:
elems = [i for i in elems if i.get(SPLIT_POINT_ATTR, '0') != '1'\
and i.get(SPLIT_ATTR, '0') != '1']
if elems:
i = int(math.floor(len(elems)/2.))
elems[i].set(SPLIT_POINT_ATTR, '1')
return elems[i]
page_breaks = []
for x in self.page_breaks:
pb = x[0](root)
if pb:
page_breaks.append(pb[0])
elem = pick_elem(page_breaks)
if elem is not None: if elem is not None:
return elem, True i = page_breaks.index(elem)
return elem, self.page_breaks[i][1]
return None, True
def do_split(path, opts):
tree = parse(path, parser=PARSER)
split_point, before = find_split_point(tree)
if split_point is None:
raise SplitError(path)
trees = split_tree(tree, split_point, before, opts, path)
base = os.path.splitext(os.path.basename(path))[0] + '_split_%d.html'
anchor_map = {None:base%0}
files = []
for i, tree in enumerate(trees):
root = tree.getroot()
files.append(base%i)
for elem in root.xpath('//*[@id and @calibre_split = "1"]'):
anchor_map[elem.get('id')] = files[-1]
elem.attrib.pop('calibre_split')
for elem in root.xpath('//*[@calibre_split]'):
elem.attrib.pop('calibre_split')
open(os.path.join(os.path.dirname(path), files[-1]), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
os.remove(path)
return path, files, anchor_map
def fix_opf(opf, orig_file, files, anchor_map):
orig = None
for item in opf.manifest:
if os.path.samefile(orig_file, item.path):
orig = item
break
opf.manifest.remove(orig)
ids = []
for f in files:
ids.append(opf.manifest.add_item(f))
index = None
for i, item in enumerate(opf.spine):
if item.id == orig.id:
index = i
break
for path in ('//*[re:match(name(), "h[1-6]", "i")]', '/html/body/div', '//p'):
elems = root.xpath(path)
elem = pick_elem(elems)
if elem is not None:
return elem, True
return None, True
def commit(self):
'''
Commit all changes caused by the split. This removes the previously
introduced ``calibre_split`` attribute and calculates an *anchor_map* for
all anchors in the original tree. Internal links are re-directed. The
original file is deleted and the split files are saved.
'''
self.anchor_map = {None:self.base%0}
self.files = []
for i, tree in enumerate(self.trees):
root = tree.getroot()
self.files.append(self.base%i)
for elem in root.xpath('//*[@id]'):
if elem.get(SPLIT_ATTR, '0') == '0':
self.anchor_map[elem.get('id')] = self.files[-1]
for elem in root.xpath('//*[@%s or @%s]'%(SPLIT_ATTR, SPLIT_POINT_ATTR)):
elem.attrib.pop(SPLIT_ATTR, None)
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
for current, tree in zip(self.files, self.trees):
for a in tree.getroot().xpath('//a[@href]'):
href = a.get('href').strip()
if href.startswith('#'):
anchor = href[1:]
file = self.anchor_map[anchor]
if file != current:
a.set('href', file+href)
open(content(current), 'wb').\
write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
os.remove(content(self.path))
def fix_opf(self, opf):
'''
Fix references to the split file in the OPF.
'''
items = [item for item in opf.itermanifest() if item.get('href') == 'content/'+self.path]
new_items = [('content/'+f, None) for f in self.files]
id_map = {}
for item in items:
id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
for id in id_map.keys():
opf.replace_spine_items_by_idref(id, id_map[id])
for ref in opf.iterguide():
href = ref.get('href', '')
if href.startswith('content/'+self.path):
href = href.split('#')
frag = None
if len(href) > 1:
frag = href[1]
new_file = self.anchor_map[frag]
ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
def fix_content_links(html_files, changes, opts):
split_files = [f.path for f in changes]
anchor_maps = [f.anchor_map for f in changes]
files = list(html_files)
for j, f in enumerate(split_files):
try:
i = files.index(f)
files[i:i+1] = changes[j].files
except ValueError:
continue
for htmlfile in files:
changed = False
root = html.fromstring(open(content(htmlfile), 'rb').read())
for a in root.xpath('//a[@href]'):
href = a.get('href')
if not href.startswith('#'):
href = href.split('#')
anchor = href[1] if len(href) > 1 else None
href = href[0]
if href in split_files:
newf = anchor_maps[split_files.index(href)][anchor]
frag = ('#'+anchor) if anchor else ''
a.set('href', newf+frag)
changed = True
if changed:
open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
def fix_ncx(path, changes):
split_files = [f.path for f in changes]
anchor_maps = [f.anchor_map for f in changes]
tree = etree.parse(path)
changed = False
for content in tree.getroot().xpath('//x:content[@src]', namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
href = content.get('src')
if not href.startswith('#'):
href = href.split('#')
anchor = href[1] if len(href) > 1 else None
href = href[0].split('/')[-1]
if href in split_files:
newf = anchor_maps[split_files.index(href)][anchor]
frag = ('#'+anchor) if anchor else ''
content.set('src', 'content/'+newf+frag)
changed = True
if changed:
open(path, 'wb').write(etree.tostring(tree.getroot(), encoding='UTF-8', xml_declaration=True))
def split(pathtoopf, opts): def split(pathtoopf, opts):
return
pathtoopf = os.path.abspath(pathtoopf) pathtoopf = os.path.abspath(pathtoopf)
opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf)) with CurrentDir(os.path.dirname(pathtoopf)):
html_files = [] opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
for item in opf.manifest: html_files = []
if 'html' in item.mime_type.lower(): for item in opf.itermanifest():
html_files.append(item.path) if 'html' in item.get('media-type', '').lower():
changes = [] html_files.append(unquote(item.get('href')).split('/')[-1])
for f in html_files: changes = []
if os.stat(f).st_size > opts.profile.flow_size: for f in html_files:
fix_opf(opf, *do_split(f, opts)) if os.stat(content(f)).st_size > opts.profile.flow_size:
if changes: try:
pass changes.append(Splitter(f, opts))
except SplitError:
changes.append(Splitter(f, opts, always_remove=True))
changes[-1].fix_opf(opf)
open(pathtoopf, 'wb').write(opf.render())
fix_content_links(html_files, changes, opts)
for item in opf.itermanifest():
if item.get('media-type', '') == 'application/x-dtbncx+xml':
fix_ncx(item.get('href'), changes)
def main(args=sys.argv): break
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -228,8 +228,14 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
raise ValueError('OPF does not have a spine') raise ValueError('OPF does not have a spine')
flat = [] flat = []
for path in opf_reader.spine.items(): for path in opf_reader.spine.items():
path = os.path.abspath(path)
if path not in flat: if path not in flat:
flat.append(os.path.abspath(path)) flat.append(os.path.abspath(path))
for item in opf_reader.manifest:
if 'html' in item.mime_type:
path = os.path.abspath(item.path)
if path not in flat:
flat.append(path)
flat = [HTMLFile(path, 0, encoding, verbose) for path in flat] flat = [HTMLFile(path, 0, encoding, verbose) for path in flat]
return flat return flat
@ -329,14 +335,15 @@ class Parser(PreProcessor, LoggingInterface):
if self.root.get(bad, None) is not None: if self.root.get(bad, None) is not None:
self.root.attrib.pop(bad) self.root.attrib.pop(bad)
def save_path(self):
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
def save(self): def save(self):
''' '''
Save processed HTML into the content directory. Save processed HTML into the content directory.
Should be called after all HTML processing is finished. Should be called after all HTML processing is finished.
''' '''
with open(os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path]), 'wb') as f: with open(self.save_path(), 'wb') as f:
ans = tostring(self.root, pretty_print=self.opts.pretty_print) ans = tostring(self.root, pretty_print=self.opts.pretty_print)
ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans) ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans)
ans = re.compile(r'<head[^<>]*?>', re.IGNORECASE).sub('<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n', ans) ans = re.compile(r'<head[^<>]*?>', re.IGNORECASE).sub('<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n', ans)
@ -390,21 +397,26 @@ class Parser(PreProcessor, LoggingInterface):
if not isinstance(olink, unicode): if not isinstance(olink, unicode):
olink = olink.decode(self.htmlfile.encoding) olink = olink.decode(self.htmlfile.encoding)
link = self.htmlfile.resolve(olink) link = self.htmlfile.resolve(olink)
frag = (('#'+link.fragment) if link.fragment else '')
if link.path == self.htmlfile.path:
return frag if frag else '#'
if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path): if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path):
return olink return olink
if link.path in self.htmlfiles: if link.path in self.htmlfiles:
return self.htmlfile_map[link.path] return self.htmlfile_map[link.path] + frag
if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None: if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None:
return olink # This happens when --max-levels is used return olink # This happens when --max-levels is used
if link.path in self.resource_map.keys(): if link.path in self.resource_map.keys():
return self.resource_map[link.path] return self.resource_map[link.path] + frag
name = os.path.basename(link.path) name = os.path.basename(link.path)
name, ext = os.path.splitext(name) name, ext = os.path.splitext(name)
name += ('_%d'%len(self.resource_map)) + ext name += ('_%d'%len(self.resource_map)) + ext
shutil.copyfile(link.path, os.path.join(self.resource_dir, name)) shutil.copyfile(link.path, os.path.join(self.resource_dir, name))
name = 'resources/' + name name = 'resources/' + name
self.resource_map[link.path] = name self.resource_map[link.path] = name
return name return name + frag
class Processor(Parser): class Processor(Parser):
''' '''
@ -438,9 +450,12 @@ class Processor(Parser):
def save(self): def save(self):
head = self.head if self.head is not None else self.body head = self.head if self.head is not None else self.body
style = etree.SubElement(head, 'style', attrib={'type':'text/css'}) style_path = os.path.basename(self.save_path())+'.css'
style.text='\n'+self.css style = etree.SubElement(head, 'link', attrib={'type':'text/css', 'rel':'stylesheet',
'href':'resources/'+style_path})
style.tail = '\n\n' style.tail = '\n\n'
style_path = os.path.join(os.path.dirname(self.save_path()), 'resources', style_path)
open(style_path, 'wb').write(self.css.encode('utf-8'))
return Parser.save(self) return Parser.save(self)
def populate_toc(self, toc): def populate_toc(self, toc):
@ -530,6 +545,8 @@ class Processor(Parser):
css.append('\n'.join(style.xpath('./text()'))) css.append('\n'.join(style.xpath('./text()')))
style.getparent().remove(style) style.getparent().remove(style)
cache = {}
class_counter = 0
for font in self.root.xpath('//font'): for font in self.root.xpath('//font'):
try: try:
size = int(font.attrib.pop('size', '3')) size = int(font.attrib.pop('size', '3'))
@ -542,16 +559,33 @@ class Processor(Parser):
color = font.attrib.pop('color', None) color = font.attrib.pop('color', None)
if color is not None: if color is not None:
setting += 'color:%s'%color setting += 'color:%s'%color
id = get_id(font, counter) classname = cache.get(setting, None)
counter += 1 if classname is None:
css.append('#%s { %s }'%(id, setting)) classname = 'calibre_class_%d'%class_counter
class_counter += 1
cache[setting] = classname
cn = font.get('class', '')
if cn: cn += ' '
cn += classname
font.set('class', cn)
for elem in self.root.xpath('//*[@style]'): for elem in self.root.xpath('//*[@style]'):
id = get_id(elem, counter) setting = elem.get('style')
counter += 1 classname = cache.get(setting, None)
css.append('#%s {%s}'%(id, elem.get('style'))) if classname is None:
classname = 'calibre_class_%d'%class_counter
class_counter += 1
cache[setting] = classname
cn = elem.get('class', '')
if cn: cn += ' '
cn += classname
elem.set('class', cn)
elem.attrib.pop('style') elem.attrib.pop('style')
for setting, cn in cache.items():
css.append('.%s {%s}'%(cn, setting))
self.raw_css = '\n\n'.join(css) self.raw_css = '\n\n'.join(css)
self.css = unicode(self.raw_css) self.css = unicode(self.raw_css)
if self.opts.override_css: if self.opts.override_css:
@ -688,6 +722,9 @@ def create_metadata(basepath, mi, filelist, resources):
''' '''
mi = OPFCreator(basepath, mi) mi = OPFCreator(basepath, mi)
entries = [('content/'+f, 'application/xhtml+xml') for f in filelist] + [(f, None) for f in resources] entries = [('content/'+f, 'application/xhtml+xml') for f in filelist] + [(f, None) for f in resources]
for f in filelist:
if os.path.exists(os.path.join(basepath, 'content', 'resources', f+'.css')):
entries.append(('content/resources/'+f+'.css', 'text/css'))
mi.create_manifest(entries) mi.create_manifest(entries)
mi.create_spine(['content/'+f for f in filelist]) mi.create_spine(['content/'+f for f in filelist])
return mi return mi

View File

@ -143,7 +143,8 @@ class ResourceCollection(object):
self._resources.remove(resource) self._resources.remove(resource)
def replace(self, start, end, items): def replace(self, start, end, items):
pass 'Same as list[start:end] = items'
self._resources[start:end] = items
@staticmethod @staticmethod
def from_directory_contents(top, topdown=True): def from_directory_contents(top, topdown=True):

View File

@ -156,6 +156,19 @@ class Spine(ResourceCollection):
self.manifest = manifest self.manifest = manifest
def replace(self, start, end, ids):
'''
Replace the items between start (inclusive) and end (not inclusive) with
with the items identified by ids. ids can be a list of any length.
'''
items = []
for id in ids:
path = self.manifest.path_for_id(id)
if path is None:
raise ValueError('id %s not in manifest')
items.append(Spine.Item(lambda x: id, path, is_path=True))
ResourceCollection.replace(start, end, items)
def linear_items(self): def linear_items(self):
for r in self: for r in self:
if r.is_linear: if r.is_linear:
@ -297,6 +310,55 @@ class OPF(object):
def get_text(self, elem): def get_text(self, elem):
return u''.join(self.TEXT(elem)) return u''.join(self.TEXT(elem))
def itermanifest(self):
return self.manifest_path(self.tree)
def create_manifest_item(self, href, media_type):
ids = [i.get('id', None) for i in self.itermanifest()]
id = None
for c in xrange(1, sys.maxint):
id = 'id%d'%c
if id not in ids:
break
if not media_type:
media_type = 'application/xhtml+xml'
ans = etree.Element('{%s}item'%self.NAMESPACES['opf'],
attrib={'id':id, 'href':href, 'media-type':media_type})
ans.tail = '\n\t\t'
return ans
def replace_manifest_item(self, item, items):
items = [self.create_manifest_item(*i) for i in items]
for i, item2 in enumerate(items):
item2.set('id', item.get('id')+'.%d'%(i+1))
manifest = item.getparent()
index = manifest.index(item)
manifest[index:index+1] = items
return [i.get('id') for i in items]
def iterspine(self):
return self.spine_path(self.tree)
def create_spine_item(self, idref):
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
ans.tail = '\n\t\t'
return ans
def replace_spine_items_by_idref(self, idref, new_idrefs):
items = list(map(self.create_spine_item, new_idrefs))
spine = self.XPath('/opf:package/*[re:match(name(), "spine", "i")]')(self.tree)[0]
old = [i for i in self.iterspine() if i.get('idref', None) == idref]
for x in old:
i = spine.index(x)
spine[i:i+1] = items
def iterguide(self):
return self.guide_path(self.tree)
def render(self):
return etree.tostring(self.tree, encoding='UTF-8', xml_declaration=True,
pretty_print=True)
@apply @apply
def authors(): def authors():

View File

@ -24,6 +24,8 @@ class TOC(list):
base_path=os.getcwd()): base_path=os.getcwd()):
self.href = href self.href = href
self.fragment = fragment self.fragment = fragment
if not self.fragment:
self.fragment = None
self.text = text self.text = text
self.parent = parent self.parent = parent
self.base_path = base_path self.base_path = base_path
@ -153,8 +155,20 @@ class TOC(list):
continue continue
purl = urlparse(unquote(a['href'])) purl = urlparse(unquote(a['href']))
href, fragment = purl[2], purl[5] href, fragment = purl[2], purl[5]
if not fragment:
fragment = None
else:
fragment = fragment.strip()
href = href.strip()
txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)]) txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
self.add_item(href, fragment, txt) add = True
for i in self.flat():
if i.href == href and i.fragment == fragment:
add = False
break
if add:
self.add_item(href, fragment, txt)
def render(self, stream, uid): def render(self, stream, uid):
from calibre.resources import ncx_template from calibre.resources import ncx_template

View File

@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
Miscellaneous widgets used in the GUI Miscellaneous widgets used in the GUI
''' '''
import re, os import re, os, traceback
from PyQt4.QtGui import QListView, QIcon, QFont, QLabel, QListWidget, \ from PyQt4.QtGui import QListView, QIcon, QFont, QLabel, QListWidget, \
QListWidgetItem, QTextCharFormat, QApplication, \ QListWidgetItem, QTextCharFormat, QApplication, \
QSyntaxHighlighter, QCursor, QColor, QWidget, QDialog, \ QSyntaxHighlighter, QCursor, QColor, QWidget, QDialog, \
@ -254,7 +254,12 @@ class FontFamilyModel(QAbstractListModel):
def __init__(self, *args): def __init__(self, *args):
QAbstractListModel.__init__(self, *args) QAbstractListModel.__init__(self, *args)
self.families = find_font_families() try:
self.families = find_font_families()
except:
self.families = []
print 'WARNING: Could not load fonts'
traceback.print_exc()
self.families.sort() self.families.sort()
self.families[:0] = ['None'] self.families[:0] = ['None']

View File

@ -278,7 +278,7 @@ def download_tarball():
def main(args=sys.argv): def main(args=sys.argv):
defdir = '/opt/calibre' defdir = '/opt/calibre'
destdir = raw_input('Enter the installation directory for calibre [%s]: '%defdir).strip() destdir = raw_input('Enter the installation directory for calibre (Its contents will be deleted!)[%s]: '%defdir).strip()
if not destdir: if not destdir:
destdir = defdir destdir = defdir
if os.path.exists(destdir): if os.path.exists(destdir):

View File

@ -147,6 +147,7 @@ def cli_docs(app):
info(bold('creating docs for %s...'%cmd)) info(bold('creating docs for %s...'%cmd))
open(os.path.join('cli', cmd+'.rst'), 'wb').write(raw) open(os.path.join('cli', cmd+'.rst'), 'wb').write(raw)
def auto_member(dirname, arguments, options, content, lineno, def auto_member(dirname, arguments, options, content, lineno,
content_offset, block_text, state, state_machine): content_offset, block_text, state, state_machine):
name = arguments[0] name = arguments[0]
@ -196,8 +197,7 @@ def auto_member(dirname, arguments, options, content, lineno,
node = nodes.paragraph() node = nodes.paragraph()
state.nested_parse(result, content_offset, node) state.nested_parse(result, content_offset, node)
return node return list(node)

View File

@ -134,6 +134,7 @@ There can be several causes for this:
* **Any windows version**: If this happens during an initial run of calibre, try deleting the folder you chose for your ebooks and restarting calibre. * **Any windows version**: If this happens during an initial run of calibre, try deleting the folder you chose for your ebooks and restarting calibre.
* **Windows Vista**: If the folder :file:`C:\\Users\\Your User Name\\AppData\\Local\\VirtualStore\\Program Files\\calibre` exists, delete it. Uninstall |app|. Reboot. Re-install. * **Windows Vista**: If the folder :file:`C:\\Users\\Your User Name\\AppData\\Local\\VirtualStore\\Program Files\\calibre` exists, delete it. Uninstall |app|. Reboot. Re-install.
* **Any windows version**: Search your computer for a folder named :file:`_ipython`. Delete it and try again. * **Any windows version**: Search your computer for a folder named :file:`_ipython`. Delete it and try again.
* **Any windows version**: Try disabling any antivirus program you have running and see if that fixes it. Also try diabling any firewall software that prevents connections to the local computer.
If it still wont launch, start a command prompt (press the windows key and R; then type :command:`cmd.exe` in the Run dialog that appears). At the command prompt type the following command and press Enter:: If it still wont launch, start a command prompt (press the windows key and R; then type :command:`cmd.exe` in the Run dialog that appears). At the command prompt type the following command and press Enter::

View File

@ -57,19 +57,21 @@ def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
atexit.register(shutil.rmtree, tdir, True) atexit.register(shutil.rmtree, tdir, True)
return tdir return tdir
class TemporaryDirectory(str): class TemporaryDirectory(object):
''' '''
A temporary directory to be used ina with statement. A temporary directory to be used in a with statement.
''' '''
def __init__(self, suffix='', prefix='', dir=None): def __init__(self, suffix='', prefix='', dir=None, keep=False):
self.suffix = suffix self.suffix = suffix
self.prefix = prefix self.prefix = prefix
self.dir = dir self.dir = dir
self.keep = keep
def __enter__(self): def __enter__(self):
self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir) self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir)
return self.tdir return self.tdir
def __exit__(self, *args): def __exit__(self, *args):
shutil.rmtree(self.tdir) if not self.keep:
shutil.rmtree(self.tdir)

View File

@ -53,6 +53,9 @@ def import_from_launchpad(url):
open(out, 'wb').write(tf.extractfile(next).read()) open(out, 'wb').write(tf.extractfile(next).read())
next = tf.next() next = tf.next()
check_for_critical_bugs() check_for_critical_bugs()
path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
print path
subprocess.check_call('python setup.py translations'.split(), dir=path)
return 0 return 0
def check_for_critical_bugs(): def check_for_critical_bugs():