mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk
This commit is contained in:
commit
a30c638e53
@ -43,7 +43,11 @@ def update_module(mod, path):
|
||||
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
|
||||
elif isosx:
|
||||
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
|
||||
'Resources', 'lib', 'python2.5', 'site-packages.zip')
|
||||
'Resources', 'lib',
|
||||
'python'+'.'.join(map(str, sys.version_info[:2])),
|
||||
'site-packages.zip')
|
||||
else:
|
||||
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
|
||||
if zp is not None:
|
||||
update_zipfile(zp, mod, path)
|
||||
else:
|
||||
|
@ -335,7 +335,7 @@ class PreProcessor(object):
|
||||
# Fix pdftohtml markup
|
||||
PDFTOHTML = [
|
||||
# Remove <hr> tags
|
||||
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
|
||||
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
|
||||
# Remove page numbers
|
||||
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
|
||||
# Remove <br> and replace <br><br> with <p>
|
||||
|
@ -267,6 +267,14 @@ class Manifest(object):
|
||||
return result
|
||||
return cmp(self.id, other.id)
|
||||
|
||||
def abshref(self, href):
|
||||
if '/' not in self.href:
|
||||
return href
|
||||
dirname = os.path.dirname(self.href)
|
||||
href = os.path.join(dirname, href)
|
||||
href = os.path.normpath(href).replace('\\', '/')
|
||||
return href
|
||||
|
||||
def __init__(self, oeb):
|
||||
self.oeb = oeb
|
||||
self.items = {}
|
||||
|
@ -143,17 +143,16 @@ def warn(x):
|
||||
class ReBinary(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml'}
|
||||
|
||||
def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
|
||||
self.path = path
|
||||
def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()):
|
||||
self.item = item
|
||||
self.logger = logger
|
||||
self.dir = os.path.dirname(path)
|
||||
self.manifest = oeb.manifest
|
||||
self.tags, self.tattrs = map
|
||||
self.buf = StringIO()
|
||||
self.anchors = []
|
||||
self.page_breaks = []
|
||||
self.is_html = is_html = map is HTML_MAP
|
||||
self.stylizer = Stylizer(root, path, oeb) if is_html else None
|
||||
self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
|
||||
self.tree_to_binary(root)
|
||||
self.content = self.buf.getvalue()
|
||||
self.ahc = self.build_ahc() if is_html else None
|
||||
@ -210,6 +209,8 @@ class ReBinary(object):
|
||||
if attr in ('href', 'src'):
|
||||
value = urlnormalize(value)
|
||||
path, frag = urldefrag(value)
|
||||
if self.item:
|
||||
path = self.item.abshref(path)
|
||||
prefix = unichr(3)
|
||||
if path in self.manifest.hrefs:
|
||||
prefix = unichr(2)
|
||||
@ -222,7 +223,7 @@ class ReBinary(object):
|
||||
elif attr.startswith('ms--'):
|
||||
attr = '%' + attr[4:]
|
||||
elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
|
||||
value = OEB_CSS_MIME
|
||||
value = CSS_MIME
|
||||
if attr in tattrs:
|
||||
self.write(tattrs[attr])
|
||||
else:
|
||||
@ -275,7 +276,7 @@ class ReBinary(object):
|
||||
def build_ahc(self):
|
||||
if len(self.anchors) > 6:
|
||||
self.logger.log_warn("More than six anchors in file %r. " \
|
||||
"Some links may not work properly." % self.path)
|
||||
"Some links may not work properly." % self.item.href)
|
||||
data = StringIO()
|
||||
data.write(unichr(len(self.anchors)).encode('utf-8'))
|
||||
for anchor, offset in self.anchors:
|
||||
@ -479,7 +480,7 @@ class LitWriter(object):
|
||||
secnum = 0
|
||||
if not isinstance(data, basestring):
|
||||
self._add_folder(name)
|
||||
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
|
||||
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP,
|
||||
logger=self._logger)
|
||||
self._add_file(name + '/ahc', rebin.ahc, 0)
|
||||
self._add_file(name + '/aht', rebin.aht, 0)
|
||||
@ -559,7 +560,7 @@ class LitWriter(object):
|
||||
meta.attrib['ms--minimum_level'] = '0'
|
||||
meta.attrib['ms--attr5'] = '1'
|
||||
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
||||
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
|
||||
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP,
|
||||
logger=self._logger)
|
||||
meta = rebin.content
|
||||
self._meta = meta
|
||||
|
@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
|
||||
# Remove self closing script tags as they also mess up BeautifulSoup
|
||||
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
|
||||
|
||||
# BeautifulSoup treats self closing <div> tags as open <div> tags
|
||||
(re.compile(r'(?i)<\s*div([^>]*)/\s*>'),
|
||||
lambda match: '<div%s></div>'%match.group(1))
|
||||
|
||||
]
|
||||
# Fix Baen markup
|
||||
BAEN = [
|
||||
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
|
||||
# Fix pdftohtml markup
|
||||
PDFTOHTML = [
|
||||
# Remove <hr> tags
|
||||
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
|
||||
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
|
||||
# Remove page numbers
|
||||
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
|
||||
# Remove <br> and replace <br><br> with <p>
|
||||
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
|
||||
if (css.has_key('display') and css['display'].lower() == 'none') or \
|
||||
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
|
||||
return ''
|
||||
text = u''
|
||||
text, alt_text = u'', u''
|
||||
for c in tag.contents:
|
||||
if limit != None and len(text) > limit:
|
||||
break
|
||||
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
||||
return u''
|
||||
continue
|
||||
if isinstance(c, NavigableString):
|
||||
text += unicode(c)
|
||||
elif isinstance(c, Tag):
|
||||
if c.name.lower() == 'img' and c.has_key('alt'):
|
||||
text += c['alt']
|
||||
return text
|
||||
alt_text += c['alt']
|
||||
continue
|
||||
text += self.get_text(c)
|
||||
return text
|
||||
return text if text.strip() else alt_text
|
||||
|
||||
def process_links(self):
|
||||
def add_toc_entry(text, target):
|
||||
|
@ -799,18 +799,39 @@ class Text(LRFStream):
|
||||
length = len(self.stream)
|
||||
style = self.style.as_dict()
|
||||
current_style = style.copy()
|
||||
text_tags = set(list(TextAttr.tag_map.keys()) + \
|
||||
list(Text.text_tags.keys()) + \
|
||||
list(ruby_tags.keys()))
|
||||
text_tags -= set([0xf500+i for i in range(10)])
|
||||
text_tags.add(0xf5cc)
|
||||
|
||||
while stream.tell() < length:
|
||||
|
||||
# Is there some text beofre a tag?
|
||||
pos = self.stream.find('\xf5', stream.tell()) - 1
|
||||
if pos > 0:
|
||||
self.add_text(self.stream[stream.tell():pos])
|
||||
stream.seek(pos)
|
||||
elif pos == -2: # No tags in this stream
|
||||
# Is there some text before a tag?
|
||||
def find_first_tag(start):
|
||||
pos = self.stream.find('\xf5', start)
|
||||
if pos == -1:
|
||||
return -1
|
||||
try:
|
||||
stream.seek(pos-1)
|
||||
_t = Tag(stream)
|
||||
if _t.id in text_tags:
|
||||
return pos-1
|
||||
return find_first_tag(pos+1)
|
||||
|
||||
|
||||
except:
|
||||
return find_first_tag(pos+1)
|
||||
|
||||
start_pos = stream.tell()
|
||||
tag_pos = find_first_tag(start_pos)
|
||||
if tag_pos >= start_pos:
|
||||
if tag_pos > start_pos:
|
||||
self.add_text(self.stream[start_pos:tag_pos])
|
||||
stream.seek(tag_pos)
|
||||
else: # No tags in this stream
|
||||
self.add_text(self.stream)
|
||||
stream.seek(0, 2)
|
||||
print repr(self.stream)
|
||||
break
|
||||
|
||||
tag = Tag(stream)
|
||||
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
|
||||
refpage = struct.unpack("<I", stream.read(4))[0]
|
||||
refobj = struct.unpack("<I", stream.read(4))[0]
|
||||
cnt = struct.unpack("<H", stream.read(2))[0]
|
||||
label = unicode(stream.read(cnt), "utf_16")
|
||||
raw = stream.read(cnt)
|
||||
label = raw.decode('utf_16_le')
|
||||
self._contents.append(TocLabel(refpage, refobj, label))
|
||||
c -= 1
|
||||
|
||||
|
@ -33,7 +33,6 @@ class EXTHHeader(object):
|
||||
self.length, self.num_items = struct.unpack('>LL', raw[4:12])
|
||||
raw = raw[12:]
|
||||
pos = 0
|
||||
|
||||
self.mi = MetaInformation('Unknown', ['Unknown'])
|
||||
self.has_fake_cover = True
|
||||
|
||||
@ -49,9 +48,17 @@ class EXTHHeader(object):
|
||||
self.cover_offset, = struct.unpack('>L', content)
|
||||
elif id == 202:
|
||||
self.thumbnail_offset, = struct.unpack('>L', content)
|
||||
#else:
|
||||
# print 'unknown record', id, repr(content)
|
||||
title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
|
||||
if title:
|
||||
self.mi.title = title.group(1).decode(codec, 'ignore')
|
||||
title = title.group(1).decode(codec, 'replace')
|
||||
if len(title) > 2:
|
||||
self.mi.title = title
|
||||
else:
|
||||
title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
|
||||
if title:
|
||||
self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
|
||||
|
||||
|
||||
def process_metadata(self, id, content, codec):
|
||||
@ -67,7 +74,8 @@ class EXTHHeader(object):
|
||||
if not self.mi.tags:
|
||||
self.mi.tags = []
|
||||
self.mi.tags.append(content.decode(codec, 'ignore'))
|
||||
|
||||
#else:
|
||||
# print 'unhandled metadata record', id, repr(content), codec
|
||||
|
||||
|
||||
class BookHeader(object):
|
||||
@ -466,6 +474,10 @@ def get_metadata(stream):
|
||||
cover = os.path.join(tdir, mi.cover)
|
||||
if os.access(cover, os.R_OK):
|
||||
mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
|
||||
else:
|
||||
path = os.path.join(tdir, 'images', '00001.jpg')
|
||||
if os.access(path, os.R_OK):
|
||||
mi.cover_data = ('JPEG', open(path, 'rb').read())
|
||||
return mi
|
||||
|
||||
def option_parser():
|
||||
|
@ -1482,8 +1482,9 @@ in which you want to store your books files. Any existing books will be automati
|
||||
return True
|
||||
|
||||
|
||||
def shutdown(self):
|
||||
self.write_settings()
|
||||
def shutdown(self, write_settings=True):
|
||||
if write_settings:
|
||||
self.write_settings()
|
||||
self.job_manager.terminate_all_jobs()
|
||||
self.device_manager.keep_going = False
|
||||
self.cover_cache.stop()
|
||||
@ -1503,6 +1504,7 @@ in which you want to store your books files. Any existing books will be automati
|
||||
|
||||
|
||||
def closeEvent(self, e):
|
||||
self.write_settings()
|
||||
if self.system_tray_icon.isVisible():
|
||||
if not dynamic['systray_msg'] and not isosx:
|
||||
info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
|
||||
@ -1512,7 +1514,7 @@ in which you want to store your books files. Any existing books will be automati
|
||||
else:
|
||||
if self.confirm_quit():
|
||||
try:
|
||||
self.shutdown()
|
||||
self.shutdown(write_settings=False)
|
||||
except:
|
||||
pass
|
||||
e.accept()
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -338,7 +338,7 @@ class ZipInfo (object):
|
||||
if isinstance(self.filename, unicode):
|
||||
try:
|
||||
return self.filename.encode('ascii'), self.flag_bits
|
||||
except UnicodeEncodeError:
|
||||
except:
|
||||
return self.filename.encode('utf-8'), self.flag_bits | 0x800
|
||||
else:
|
||||
return self.filename, self.flag_bits
|
||||
|
Loading…
x
Reference in New Issue
Block a user