Support for splitting HTML files to respect maximum flow size limit for EPUB on the SONY Reader.

This commit is contained in:
Kovid Goyal 2008-09-21 22:47:43 -07:00
parent 5c37760a27
commit 35c8db2dd7
14 changed files with 515 additions and 199 deletions

View File

@ -170,6 +170,19 @@ def fit_image(width, height, pwidth, pheight):
return scaled, int(width), int(height)
class CurrentDir(object):
def __init__(self, path):
self.path = path
self.cwd = None
def __enter__(self, *args):
self.cwd = os.getcwd()
os.chdir(self.path)
return self.cwd
def __exit__(self, *args):
os.chdir(self.cwd)
def sanitize_file_name(name):
'''

View File

@ -105,5 +105,8 @@ to auto-generate a Table of Contents.
help=_('Print generated OPF file to stdout'))
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
help=_('Print generated NCX file to stdout'))
c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug', default=False,
help=_('Keep intermediate files during processing by html2epub'))
c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
help=_('Extract the contents of the produced EPUB file to the specified directory.'))
return c

View File

@ -97,7 +97,9 @@ def convert(htmlfile, opts, notification=None):
opts.chapter = XPath(opts.chapter,
namespaces={'re':'http://exslt.org/regular-expressions'})
with TemporaryDirectory('_html2epub') as tdir:
with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
if opts.keep_intermediate:
print 'Intermediate files in', tdir
resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir)
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
@ -159,6 +161,8 @@ def convert(htmlfile, opts, notification=None):
epub = initialize_container(opts.output)
epub.add_dir(tdir)
print 'Output written to', opts.output
if opts.extract_to is not None:
epub.extractall(opts.extract_to)
def main(args=sys.argv):

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
@ -7,108 +7,212 @@ __docformat__ = 'restructuredtext en'
Split the flows in an epub file to conform to size limitations.
'''
import sys, os, math, copy
import os, math, copy, logging, functools
from urllib import unquote
from lxml.etree import parse, XMLParser
from lxml.etree import XPath as _XPath
from lxml import etree, html
from lxml.cssselect import CSSSelector
from cssutils import CSSParser
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.epub import tostring
from calibre import CurrentDir, LoggingInterface
PARSER = XMLParser(recover=True)
XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'})
content = functools.partial(os.path.join, 'content')
SPLIT_ATTR = 'cs'
SPLIT_POINT_ATTR = 'csp'
class SplitError(ValueError):
def __init__(self, path):
ValueError.__init__(self, _('Could not find reasonable point at which to split: ')+os.path.basename(path))
def __init__(self, path, root):
size = len(tostring(root))/1024.
ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')%
(os.path.basename(path), size))
def split_tree(tree, split_point, before, opts, filepath):
trees = set([])
class Splitter(LoggingInterface):
def __init__(self, path, opts, always_remove=False):
LoggingInterface.__init__(self, logging.getLogger('htmlsplit'))
self.setup_cli_handler(opts.verbose)
self.path = path
self.always_remove = always_remove
self.base = os.path.splitext(path)[0] + '_split_%d.html'
self.opts = opts
self.log_info('\tSplitting %s (%d KB)', path, os.stat(content(path)).st_size/1024.)
root = html.fromstring(open(content(path)).read())
css = XPath('//link[@type = "text/css" and @rel = "stylesheet"]')(root)
if css:
cssp = os.path.join('content', *(css[0].get('href').split('/')))
self.log_debug('\t\tParsing stylesheet...')
stylesheet = CSSParser().parseString(open(cssp, 'rb').read())
else:
stylesheet = None
self.page_breaks = []
if stylesheet is not None:
self.find_page_breaks(stylesheet, root)
self.trees = self.split(root.getroottree())
self.commit()
self.log_info('\t\tSplit into %d parts.', len(self.trees))
if self.opts.verbose:
for f in self.files:
self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.)
self.trees = None
def split(self, tree):
'''
Split ``tree`` into a *before* and *after* tree, preserving tag structure,
but not duplicating any text. All tags that have had their text and tail
removed have the attribute ``calibre_split`` set to 1.
'''
self.log_debug('\t\tSplitting...')
root = tree.getroot()
split_point, before = self.find_split_point(root)
if split_point is None:
if not self.always_remove:
self.log_warn(_('\t\tToo much markup. Re-splitting without structure preservation. This may cause incorrect rendering.'))
raise SplitError(self.path, root)
tree2 = copy.deepcopy(tree)
root2 = tree2.getroot()
body, body2 = root.body, root2.body
trees = []
path = tree.getpath(split_point)
root, root2 = tree.getroot(), tree2.getroot()
body, body2 = root.xpath('//body')[0], root2.xpath('//body')[0]
split_point2 = root2.xpath(path)[0]
def nix_element(elem, top=True):
if self.always_remove:
parent = elem.getparent()
index = parent.index(elem)
if top:
parent.remove(elem)
else:
index = parent.index(elem)
parent[index:index+1] = list(elem.iterchildren())
else:
elem.text = u''
elem.tail = u''
elem.set(SPLIT_ATTR, '1')
if elem.tag.lower() in ['ul', 'ol', 'dl', 'table', 'hr', 'img']:
elem.set('style', 'display:none;')
def fix_split_point(sp):
sp.set('style', sp.get('style', '')+'page-break-before:avoid;page-break-after:avoid')
# Tree 1
hit_split_point = False
for elem in body.iterdescendants():
for elem in list(body.iterdescendants(etree.Element)):
if elem.get(SPLIT_ATTR, '0') == '1':
continue
if elem is split_point:
hit_split_point = True
if before:
elem.text = u''
elem.tail = u''
elem.set('calibre_split', '1')
nix_element(elem)
fix_split_point(elem)
continue
if hit_split_point:
elem.text = u''
elem.tail = u''
elem.set('calibre_split', '1' if hit_split_point else '0')
nix_element(elem)
# Tree 2
hit_split_point = False
for elem in body2.iterdescendants():
for elem in list(body2.iterdescendants(etree.Element)):
if elem.get(SPLIT_ATTR, '0') == '1':
continue
if elem is split_point2:
hit_split_point = True
if not before:
elem.text = u''
elem.tail = u''
elem.set('calibre_split', '1')
nix_element(elem, top=False)
fix_split_point(elem)
continue
if not hit_split_point:
elem.text = u''
elem.tail = u''
elem.set('calibre_split', '0' if hit_split_point else '1')
nix_element(elem, top=False)
for t, r in [(tree, root), (tree2, root2)]:
if len(tostring(r)) < opts.profile.flow_size:
size = len(tostring(r))
if size <= self.opts.profile.flow_size:
trees.append(t)
self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', len(trees), size/1024.)
else:
new_split_point, before = find_split_point(t)
if new_split_point is None:
raise SplitError(filepath)
trees.extend(split_tree(t, new_split_point, before, opts, filepath))
trees.extend(self.split(t))
return trees
def find_split_point(tree):
root = tree.getroot()
css = root.xpath('//style[@type="text/css"]')
if css:
def pick_elem(elems):
if elems:
elems = [i for i in elems if elem.get('calibre_split', '0') != '1']
if elems:
i = int(math.floor(len(elems)/2.))
return elems[i]
def selector_element(rule):
try:
selector = CSSSelector(rule.selectorText)
return pick_elem(selector(root))
except:
return None
css = css[0].text
from cssutils import CSSParser
stylesheet = CSSParser().parseString(css)
def find_page_breaks(self, stylesheet, root):
'''
Find all elements that have either page-break-before or page-break-after set.
'''
page_break_selectors = set([])
for rule in stylesheet:
if rule.type != rule.STYLE_RULE:
continue
before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()
if before and before != 'avoid':
elem = selector_element(rule)
if elem is not None:
return elem, True
after = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()
try:
if before and before != 'avoid':
page_break_selectors.add((CSSSelector(rule.selectorText), True))
except:
pass
try:
if after and after != 'avoid':
elem = selector_element(rule)
if elem is not None:
return elem, False
page_break_selectors.add((CSSSelector(rule.selectorText), False))
except:
pass
for path in ('//*[re:match(name(), "h[1-6]", "i")', '/body/div', '//p'):
page_breaks = set([])
for selector, before in page_break_selectors:
for elem in selector(root):
elem.pb_before = before
page_breaks.add(elem)
for i, elem in enumerate(root.iter()):
elem.pb_order = i
page_breaks = list(page_breaks)
page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
tree = root.getroottree()
self.page_breaks = [(XPath(tree.getpath(x)), x.pb_before) for x in page_breaks]
def find_split_point(self, root):
'''
Find the tag at which to split the tree rooted at `root`.
Search order is:
* page breaks
* Heading tags
* <div> tags
* <p> tags
We try to split in the "middle" of the file (as defined by tag counts.
'''
def pick_elem(elems):
if elems:
elems = [i for i in elems if i.get(SPLIT_POINT_ATTR, '0') != '1'\
and i.get(SPLIT_ATTR, '0') != '1']
if elems:
i = int(math.floor(len(elems)/2.))
elems[i].set(SPLIT_POINT_ATTR, '1')
return elems[i]
page_breaks = []
for x in self.page_breaks:
pb = x[0](root)
if pb:
page_breaks.append(pb[0])
elem = pick_elem(page_breaks)
if elem is not None:
i = page_breaks.index(elem)
return elem, self.page_breaks[i][1]
for path in ('//*[re:match(name(), "h[1-6]", "i")]', '/html/body/div', '//p'):
elems = root.xpath(path)
elem = pick_elem(elems)
if elem is not None:
@ -116,67 +220,134 @@ def find_split_point(tree):
return None, True
def do_split(path, opts):
tree = parse(path, parser=PARSER)
split_point, before = find_split_point(tree)
if split_point is None:
raise SplitError(path)
trees = split_tree(tree, split_point, before, opts, path)
base = os.path.splitext(os.path.basename(path))[0] + '_split_%d.html'
anchor_map = {None:base%0}
files = []
for i, tree in enumerate(trees):
def commit(self):
'''
Commit all changes caused by the split. This removes the previously
introduced ``calibre_split`` attribute and calculates an *anchor_map* for
all anchors in the original tree. Internal links are re-directed. The
original file is deleted and the split files are saved.
'''
self.anchor_map = {None:self.base%0}
self.files = []
for i, tree in enumerate(self.trees):
root = tree.getroot()
files.append(base%i)
for elem in root.xpath('//*[@id and @calibre_split = "1"]'):
anchor_map[elem.get('id')] = files[-1]
elem.attrib.pop('calibre_split')
for elem in root.xpath('//*[@calibre_split]'):
elem.attrib.pop('calibre_split')
open(os.path.join(os.path.dirname(path), files[-1]), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
os.remove(path)
return path, files, anchor_map
self.files.append(self.base%i)
for elem in root.xpath('//*[@id]'):
if elem.get(SPLIT_ATTR, '0') == '0':
self.anchor_map[elem.get('id')] = self.files[-1]
for elem in root.xpath('//*[@%s or @%s]'%(SPLIT_ATTR, SPLIT_POINT_ATTR)):
elem.attrib.pop(SPLIT_ATTR, None)
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
def fix_opf(opf, orig_file, files, anchor_map):
orig = None
for item in opf.manifest:
if os.path.samefile(orig_file, item.path):
orig = item
break
opf.manifest.remove(orig)
ids = []
for f in files:
ids.append(opf.manifest.add_item(f))
index = None
for i, item in enumerate(opf.spine):
if item.id == orig.id:
index = i
break
for current, tree in zip(self.files, self.trees):
for a in tree.getroot().xpath('//a[@href]'):
href = a.get('href').strip()
if href.startswith('#'):
anchor = href[1:]
file = self.anchor_map[anchor]
if file != current:
a.set('href', file+href)
open(content(current), 'wb').\
write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
os.remove(content(self.path))
def fix_opf(self, opf):
'''
Fix references to the split file in the OPF.
'''
items = [item for item in opf.itermanifest() if item.get('href') == 'content/'+self.path]
new_items = [('content/'+f, None) for f in self.files]
id_map = {}
for item in items:
id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
for id in id_map.keys():
opf.replace_spine_items_by_idref(id, id_map[id])
for ref in opf.iterguide():
href = ref.get('href', '')
if href.startswith('content/'+self.path):
href = href.split('#')
frag = None
if len(href) > 1:
frag = href[1]
new_file = self.anchor_map[frag]
ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
def fix_content_links(html_files, changes, opts):
split_files = [f.path for f in changes]
anchor_maps = [f.anchor_map for f in changes]
files = list(html_files)
for j, f in enumerate(split_files):
try:
i = files.index(f)
files[i:i+1] = changes[j].files
except ValueError:
continue
for htmlfile in files:
changed = False
root = html.fromstring(open(content(htmlfile), 'rb').read())
for a in root.xpath('//a[@href]'):
href = a.get('href')
if not href.startswith('#'):
href = href.split('#')
anchor = href[1] if len(href) > 1 else None
href = href[0]
if href in split_files:
newf = anchor_maps[split_files.index(href)][anchor]
frag = ('#'+anchor) if anchor else ''
a.set('href', newf+frag)
changed = True
if changed:
open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
def fix_ncx(path, changes):
split_files = [f.path for f in changes]
anchor_maps = [f.anchor_map for f in changes]
tree = etree.parse(path)
changed = False
for content in tree.getroot().xpath('//x:content[@src]', namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
href = content.get('src')
if not href.startswith('#'):
href = href.split('#')
anchor = href[1] if len(href) > 1 else None
href = href[0].split('/')[-1]
if href in split_files:
newf = anchor_maps[split_files.index(href)][anchor]
frag = ('#'+anchor) if anchor else ''
content.set('src', 'content/'+newf+frag)
changed = True
if changed:
open(path, 'wb').write(etree.tostring(tree.getroot(), encoding='UTF-8', xml_declaration=True))
def split(pathtoopf, opts):
return
pathtoopf = os.path.abspath(pathtoopf)
with CurrentDir(os.path.dirname(pathtoopf)):
opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
html_files = []
for item in opf.manifest:
if 'html' in item.mime_type.lower():
html_files.append(item.path)
for item in opf.itermanifest():
if 'html' in item.get('media-type', '').lower():
html_files.append(unquote(item.get('href')).split('/')[-1])
changes = []
for f in html_files:
if os.stat(f).st_size > opts.profile.flow_size:
fix_opf(opf, *do_split(f, opts))
if changes:
pass
if os.stat(content(f)).st_size > opts.profile.flow_size:
try:
changes.append(Splitter(f, opts))
except SplitError:
changes.append(Splitter(f, opts, always_remove=True))
changes[-1].fix_opf(opf)
open(pathtoopf, 'wb').write(opf.render())
fix_content_links(html_files, changes, opts)
def main(args=sys.argv):
return 0
if __name__ == '__main__':
sys.exit(main())
for item in opf.itermanifest():
if item.get('media-type', '') == 'application/x-dtbncx+xml':
fix_ncx(item.get('href'), changes)
break

View File

@ -228,8 +228,14 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
raise ValueError('OPF does not have a spine')
flat = []
for path in opf_reader.spine.items():
path = os.path.abspath(path)
if path not in flat:
flat.append(os.path.abspath(path))
for item in opf_reader.manifest:
if 'html' in item.mime_type:
path = os.path.abspath(item.path)
if path not in flat:
flat.append(path)
flat = [HTMLFile(path, 0, encoding, verbose) for path in flat]
return flat
@ -329,14 +335,15 @@ class Parser(PreProcessor, LoggingInterface):
if self.root.get(bad, None) is not None:
self.root.attrib.pop(bad)
def save_path(self):
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
def save(self):
'''
Save processed HTML into the content directory.
Should be called after all HTML processing is finished.
'''
with open(os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path]), 'wb') as f:
with open(self.save_path(), 'wb') as f:
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans)
ans = re.compile(r'<head[^<>]*?>', re.IGNORECASE).sub('<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n', ans)
@ -390,21 +397,26 @@ class Parser(PreProcessor, LoggingInterface):
if not isinstance(olink, unicode):
olink = olink.decode(self.htmlfile.encoding)
link = self.htmlfile.resolve(olink)
frag = (('#'+link.fragment) if link.fragment else '')
if link.path == self.htmlfile.path:
return frag if frag else '#'
if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path):
return olink
if link.path in self.htmlfiles:
return self.htmlfile_map[link.path]
return self.htmlfile_map[link.path] + frag
if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None:
return olink # This happens when --max-levels is used
if link.path in self.resource_map.keys():
return self.resource_map[link.path]
return self.resource_map[link.path] + frag
name = os.path.basename(link.path)
name, ext = os.path.splitext(name)
name += ('_%d'%len(self.resource_map)) + ext
shutil.copyfile(link.path, os.path.join(self.resource_dir, name))
name = 'resources/' + name
self.resource_map[link.path] = name
return name
return name + frag
class Processor(Parser):
'''
@ -438,9 +450,12 @@ class Processor(Parser):
def save(self):
head = self.head if self.head is not None else self.body
style = etree.SubElement(head, 'style', attrib={'type':'text/css'})
style.text='\n'+self.css
style_path = os.path.basename(self.save_path())+'.css'
style = etree.SubElement(head, 'link', attrib={'type':'text/css', 'rel':'stylesheet',
'href':'resources/'+style_path})
style.tail = '\n\n'
style_path = os.path.join(os.path.dirname(self.save_path()), 'resources', style_path)
open(style_path, 'wb').write(self.css.encode('utf-8'))
return Parser.save(self)
def populate_toc(self, toc):
@ -530,6 +545,8 @@ class Processor(Parser):
css.append('\n'.join(style.xpath('./text()')))
style.getparent().remove(style)
cache = {}
class_counter = 0
for font in self.root.xpath('//font'):
try:
size = int(font.attrib.pop('size', '3'))
@ -542,16 +559,33 @@ class Processor(Parser):
color = font.attrib.pop('color', None)
if color is not None:
setting += 'color:%s'%color
id = get_id(font, counter)
counter += 1
css.append('#%s { %s }'%(id, setting))
classname = cache.get(setting, None)
if classname is None:
classname = 'calibre_class_%d'%class_counter
class_counter += 1
cache[setting] = classname
cn = font.get('class', '')
if cn: cn += ' '
cn += classname
font.set('class', cn)
for elem in self.root.xpath('//*[@style]'):
id = get_id(elem, counter)
counter += 1
css.append('#%s {%s}'%(id, elem.get('style')))
setting = elem.get('style')
classname = cache.get(setting, None)
if classname is None:
classname = 'calibre_class_%d'%class_counter
class_counter += 1
cache[setting] = classname
cn = elem.get('class', '')
if cn: cn += ' '
cn += classname
elem.set('class', cn)
elem.attrib.pop('style')
for setting, cn in cache.items():
css.append('.%s {%s}'%(cn, setting))
self.raw_css = '\n\n'.join(css)
self.css = unicode(self.raw_css)
if self.opts.override_css:
@ -688,6 +722,9 @@ def create_metadata(basepath, mi, filelist, resources):
'''
mi = OPFCreator(basepath, mi)
entries = [('content/'+f, 'application/xhtml+xml') for f in filelist] + [(f, None) for f in resources]
for f in filelist:
if os.path.exists(os.path.join(basepath, 'content', 'resources', f+'.css')):
entries.append(('content/resources/'+f+'.css', 'text/css'))
mi.create_manifest(entries)
mi.create_spine(['content/'+f for f in filelist])
return mi

View File

@ -143,7 +143,8 @@ class ResourceCollection(object):
self._resources.remove(resource)
def replace(self, start, end, items):
pass
'Same as list[start:end] = items'
self._resources[start:end] = items
@staticmethod
def from_directory_contents(top, topdown=True):

View File

@ -156,6 +156,19 @@ class Spine(ResourceCollection):
self.manifest = manifest
def replace(self, start, end, ids):
'''
Replace the items between start (inclusive) and end (not inclusive) with
with the items identified by ids. ids can be a list of any length.
'''
items = []
for id in ids:
path = self.manifest.path_for_id(id)
if path is None:
raise ValueError('id %s not in manifest')
items.append(Spine.Item(lambda x: id, path, is_path=True))
ResourceCollection.replace(start, end, items)
def linear_items(self):
for r in self:
if r.is_linear:
@ -297,6 +310,55 @@ class OPF(object):
def get_text(self, elem):
return u''.join(self.TEXT(elem))
def itermanifest(self):
return self.manifest_path(self.tree)
def create_manifest_item(self, href, media_type):
ids = [i.get('id', None) for i in self.itermanifest()]
id = None
for c in xrange(1, sys.maxint):
id = 'id%d'%c
if id not in ids:
break
if not media_type:
media_type = 'application/xhtml+xml'
ans = etree.Element('{%s}item'%self.NAMESPACES['opf'],
attrib={'id':id, 'href':href, 'media-type':media_type})
ans.tail = '\n\t\t'
return ans
def replace_manifest_item(self, item, items):
items = [self.create_manifest_item(*i) for i in items]
for i, item2 in enumerate(items):
item2.set('id', item.get('id')+'.%d'%(i+1))
manifest = item.getparent()
index = manifest.index(item)
manifest[index:index+1] = items
return [i.get('id') for i in items]
def iterspine(self):
return self.spine_path(self.tree)
def create_spine_item(self, idref):
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
ans.tail = '\n\t\t'
return ans
def replace_spine_items_by_idref(self, idref, new_idrefs):
items = list(map(self.create_spine_item, new_idrefs))
spine = self.XPath('/opf:package/*[re:match(name(), "spine", "i")]')(self.tree)[0]
old = [i for i in self.iterspine() if i.get('idref', None) == idref]
for x in old:
i = spine.index(x)
spine[i:i+1] = items
def iterguide(self):
return self.guide_path(self.tree)
def render(self):
return etree.tostring(self.tree, encoding='UTF-8', xml_declaration=True,
pretty_print=True)
@apply
def authors():

View File

@ -24,6 +24,8 @@ class TOC(list):
base_path=os.getcwd()):
self.href = href
self.fragment = fragment
if not self.fragment:
self.fragment = None
self.text = text
self.parent = parent
self.base_path = base_path
@ -153,7 +155,19 @@ class TOC(list):
continue
purl = urlparse(unquote(a['href']))
href, fragment = purl[2], purl[5]
if not fragment:
fragment = None
else:
fragment = fragment.strip()
href = href.strip()
txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
add = True
for i in self.flat():
if i.href == href and i.fragment == fragment:
add = False
break
if add:
self.add_item(href, fragment, txt)
def render(self, stream, uid):

View File

@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Miscellaneous widgets used in the GUI
'''
import re, os
import re, os, traceback
from PyQt4.QtGui import QListView, QIcon, QFont, QLabel, QListWidget, \
QListWidgetItem, QTextCharFormat, QApplication, \
QSyntaxHighlighter, QCursor, QColor, QWidget, QDialog, \
@ -254,7 +254,12 @@ class FontFamilyModel(QAbstractListModel):
def __init__(self, *args):
QAbstractListModel.__init__(self, *args)
try:
self.families = find_font_families()
except:
self.families = []
print 'WARNING: Could not load fonts'
traceback.print_exc()
self.families.sort()
self.families[:0] = ['None']

View File

@ -278,7 +278,7 @@ def download_tarball():
def main(args=sys.argv):
defdir = '/opt/calibre'
destdir = raw_input('Enter the installation directory for calibre [%s]: '%defdir).strip()
destdir = raw_input('Enter the installation directory for calibre (Its contents will be deleted!)[%s]: '%defdir).strip()
if not destdir:
destdir = defdir
if os.path.exists(destdir):

View File

@ -147,6 +147,7 @@ def cli_docs(app):
info(bold('creating docs for %s...'%cmd))
open(os.path.join('cli', cmd+'.rst'), 'wb').write(raw)
def auto_member(dirname, arguments, options, content, lineno,
content_offset, block_text, state, state_machine):
name = arguments[0]
@ -196,8 +197,7 @@ def auto_member(dirname, arguments, options, content, lineno,
node = nodes.paragraph()
state.nested_parse(result, content_offset, node)
return node
return list(node)

View File

@ -134,6 +134,7 @@ There can be several causes for this:
* **Any windows version**: If this happens during an initial run of calibre, try deleting the folder you chose for your ebooks and restarting calibre.
* **Windows Vista**: If the folder :file:`C:\\Users\\Your User Name\\AppData\\Local\\VirtualStore\\Program Files\\calibre` exists, delete it. Uninstall |app|. Reboot. Re-install.
* **Any windows version**: Search your computer for a folder named :file:`_ipython`. Delete it and try again.
* **Any windows version**: Try disabling any antivirus program you have running and see if that fixes it. Also try diabling any firewall software that prevents connections to the local computer.
If it still wont launch, start a command prompt (press the windows key and R; then type :command:`cmd.exe` in the Run dialog that appears). At the command prompt type the following command and press Enter::

View File

@ -57,19 +57,21 @@ def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
atexit.register(shutil.rmtree, tdir, True)
return tdir
class TemporaryDirectory(str):
class TemporaryDirectory(object):
'''
A temporary directory to be used ina with statement.
A temporary directory to be used in a with statement.
'''
def __init__(self, suffix='', prefix='', dir=None):
def __init__(self, suffix='', prefix='', dir=None, keep=False):
self.suffix = suffix
self.prefix = prefix
self.dir = dir
self.keep = keep
def __enter__(self):
self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir)
return self.tdir
def __exit__(self, *args):
if not self.keep:
shutil.rmtree(self.tdir)

View File

@ -53,6 +53,9 @@ def import_from_launchpad(url):
open(out, 'wb').write(tf.extractfile(next).read())
next = tf.next()
check_for_critical_bugs()
path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
print path
subprocess.check_call('python setup.py translations'.split(), dir=path)
return 0
def check_for_critical_bugs():