mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk
This commit is contained in:
commit
a30c638e53
@ -43,7 +43,11 @@ def update_module(mod, path):
|
|||||||
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
|
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
|
||||||
elif isosx:
|
elif isosx:
|
||||||
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
|
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
|
||||||
'Resources', 'lib', 'python2.5', 'site-packages.zip')
|
'Resources', 'lib',
|
||||||
|
'python'+'.'.join(map(str, sys.version_info[:2])),
|
||||||
|
'site-packages.zip')
|
||||||
|
else:
|
||||||
|
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
|
||||||
if zp is not None:
|
if zp is not None:
|
||||||
update_zipfile(zp, mod, path)
|
update_zipfile(zp, mod, path)
|
||||||
else:
|
else:
|
||||||
|
@ -335,7 +335,7 @@ class PreProcessor(object):
|
|||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
PDFTOHTML = [
|
PDFTOHTML = [
|
||||||
# Remove <hr> tags
|
# Remove <hr> tags
|
||||||
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
|
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
|
||||||
# Remove page numbers
|
# Remove page numbers
|
||||||
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
|
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
|
||||||
# Remove <br> and replace <br><br> with <p>
|
# Remove <br> and replace <br><br> with <p>
|
||||||
|
@ -267,6 +267,14 @@ class Manifest(object):
|
|||||||
return result
|
return result
|
||||||
return cmp(self.id, other.id)
|
return cmp(self.id, other.id)
|
||||||
|
|
||||||
|
def abshref(self, href):
|
||||||
|
if '/' not in self.href:
|
||||||
|
return href
|
||||||
|
dirname = os.path.dirname(self.href)
|
||||||
|
href = os.path.join(dirname, href)
|
||||||
|
href = os.path.normpath(href).replace('\\', '/')
|
||||||
|
return href
|
||||||
|
|
||||||
def __init__(self, oeb):
|
def __init__(self, oeb):
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
self.items = {}
|
self.items = {}
|
||||||
|
@ -143,17 +143,16 @@ def warn(x):
|
|||||||
class ReBinary(object):
|
class ReBinary(object):
|
||||||
NSRMAP = {'': None, XML_NS: 'xml'}
|
NSRMAP = {'': None, XML_NS: 'xml'}
|
||||||
|
|
||||||
def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
|
def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()):
|
||||||
self.path = path
|
self.item = item
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.dir = os.path.dirname(path)
|
|
||||||
self.manifest = oeb.manifest
|
self.manifest = oeb.manifest
|
||||||
self.tags, self.tattrs = map
|
self.tags, self.tattrs = map
|
||||||
self.buf = StringIO()
|
self.buf = StringIO()
|
||||||
self.anchors = []
|
self.anchors = []
|
||||||
self.page_breaks = []
|
self.page_breaks = []
|
||||||
self.is_html = is_html = map is HTML_MAP
|
self.is_html = is_html = map is HTML_MAP
|
||||||
self.stylizer = Stylizer(root, path, oeb) if is_html else None
|
self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
|
||||||
self.tree_to_binary(root)
|
self.tree_to_binary(root)
|
||||||
self.content = self.buf.getvalue()
|
self.content = self.buf.getvalue()
|
||||||
self.ahc = self.build_ahc() if is_html else None
|
self.ahc = self.build_ahc() if is_html else None
|
||||||
@ -210,6 +209,8 @@ class ReBinary(object):
|
|||||||
if attr in ('href', 'src'):
|
if attr in ('href', 'src'):
|
||||||
value = urlnormalize(value)
|
value = urlnormalize(value)
|
||||||
path, frag = urldefrag(value)
|
path, frag = urldefrag(value)
|
||||||
|
if self.item:
|
||||||
|
path = self.item.abshref(path)
|
||||||
prefix = unichr(3)
|
prefix = unichr(3)
|
||||||
if path in self.manifest.hrefs:
|
if path in self.manifest.hrefs:
|
||||||
prefix = unichr(2)
|
prefix = unichr(2)
|
||||||
@ -222,7 +223,7 @@ class ReBinary(object):
|
|||||||
elif attr.startswith('ms--'):
|
elif attr.startswith('ms--'):
|
||||||
attr = '%' + attr[4:]
|
attr = '%' + attr[4:]
|
||||||
elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
|
elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
|
||||||
value = OEB_CSS_MIME
|
value = CSS_MIME
|
||||||
if attr in tattrs:
|
if attr in tattrs:
|
||||||
self.write(tattrs[attr])
|
self.write(tattrs[attr])
|
||||||
else:
|
else:
|
||||||
@ -275,7 +276,7 @@ class ReBinary(object):
|
|||||||
def build_ahc(self):
|
def build_ahc(self):
|
||||||
if len(self.anchors) > 6:
|
if len(self.anchors) > 6:
|
||||||
self.logger.log_warn("More than six anchors in file %r. " \
|
self.logger.log_warn("More than six anchors in file %r. " \
|
||||||
"Some links may not work properly." % self.path)
|
"Some links may not work properly." % self.item.href)
|
||||||
data = StringIO()
|
data = StringIO()
|
||||||
data.write(unichr(len(self.anchors)).encode('utf-8'))
|
data.write(unichr(len(self.anchors)).encode('utf-8'))
|
||||||
for anchor, offset in self.anchors:
|
for anchor, offset in self.anchors:
|
||||||
@ -479,7 +480,7 @@ class LitWriter(object):
|
|||||||
secnum = 0
|
secnum = 0
|
||||||
if not isinstance(data, basestring):
|
if not isinstance(data, basestring):
|
||||||
self._add_folder(name)
|
self._add_folder(name)
|
||||||
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
|
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP,
|
||||||
logger=self._logger)
|
logger=self._logger)
|
||||||
self._add_file(name + '/ahc', rebin.ahc, 0)
|
self._add_file(name + '/ahc', rebin.ahc, 0)
|
||||||
self._add_file(name + '/aht', rebin.aht, 0)
|
self._add_file(name + '/aht', rebin.aht, 0)
|
||||||
@ -559,7 +560,7 @@ class LitWriter(object):
|
|||||||
meta.attrib['ms--minimum_level'] = '0'
|
meta.attrib['ms--minimum_level'] = '0'
|
||||||
meta.attrib['ms--attr5'] = '1'
|
meta.attrib['ms--attr5'] = '1'
|
||||||
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
||||||
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
|
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP,
|
||||||
logger=self._logger)
|
logger=self._logger)
|
||||||
meta = rebin.content
|
meta = rebin.content
|
||||||
self._meta = meta
|
self._meta = meta
|
||||||
|
@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
|
|||||||
# Remove self closing script tags as they also mess up BeautifulSoup
|
# Remove self closing script tags as they also mess up BeautifulSoup
|
||||||
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
|
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
|
||||||
|
|
||||||
|
# BeautifulSoup treats self closing <div> tags as open <div> tags
|
||||||
|
(re.compile(r'(?i)<\s*div([^>]*)/\s*>'),
|
||||||
|
lambda match: '<div%s></div>'%match.group(1))
|
||||||
|
|
||||||
]
|
]
|
||||||
# Fix Baen markup
|
# Fix Baen markup
|
||||||
BAEN = [
|
BAEN = [
|
||||||
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
|
|||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
PDFTOHTML = [
|
PDFTOHTML = [
|
||||||
# Remove <hr> tags
|
# Remove <hr> tags
|
||||||
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
|
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
|
||||||
# Remove page numbers
|
# Remove page numbers
|
||||||
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
|
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
|
||||||
# Remove <br> and replace <br><br> with <p>
|
# Remove <br> and replace <br><br> with <p>
|
||||||
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
|
|||||||
if (css.has_key('display') and css['display'].lower() == 'none') or \
|
if (css.has_key('display') and css['display'].lower() == 'none') or \
|
||||||
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
|
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
|
||||||
return ''
|
return ''
|
||||||
text = u''
|
text, alt_text = u'', u''
|
||||||
for c in tag.contents:
|
for c in tag.contents:
|
||||||
if limit != None and len(text) > limit:
|
if limit != None and len(text) > limit:
|
||||||
break
|
break
|
||||||
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
||||||
return u''
|
continue
|
||||||
if isinstance(c, NavigableString):
|
if isinstance(c, NavigableString):
|
||||||
text += unicode(c)
|
text += unicode(c)
|
||||||
elif isinstance(c, Tag):
|
elif isinstance(c, Tag):
|
||||||
if c.name.lower() == 'img' and c.has_key('alt'):
|
if c.name.lower() == 'img' and c.has_key('alt'):
|
||||||
text += c['alt']
|
alt_text += c['alt']
|
||||||
return text
|
continue
|
||||||
text += self.get_text(c)
|
text += self.get_text(c)
|
||||||
return text
|
return text if text.strip() else alt_text
|
||||||
|
|
||||||
def process_links(self):
|
def process_links(self):
|
||||||
def add_toc_entry(text, target):
|
def add_toc_entry(text, target):
|
||||||
|
@ -799,18 +799,39 @@ class Text(LRFStream):
|
|||||||
length = len(self.stream)
|
length = len(self.stream)
|
||||||
style = self.style.as_dict()
|
style = self.style.as_dict()
|
||||||
current_style = style.copy()
|
current_style = style.copy()
|
||||||
|
text_tags = set(list(TextAttr.tag_map.keys()) + \
|
||||||
|
list(Text.text_tags.keys()) + \
|
||||||
|
list(ruby_tags.keys()))
|
||||||
|
text_tags -= set([0xf500+i for i in range(10)])
|
||||||
|
text_tags.add(0xf5cc)
|
||||||
|
|
||||||
while stream.tell() < length:
|
while stream.tell() < length:
|
||||||
|
|
||||||
# Is there some text beofre a tag?
|
# Is there some text before a tag?
|
||||||
pos = self.stream.find('\xf5', stream.tell()) - 1
|
def find_first_tag(start):
|
||||||
if pos > 0:
|
pos = self.stream.find('\xf5', start)
|
||||||
self.add_text(self.stream[stream.tell():pos])
|
if pos == -1:
|
||||||
stream.seek(pos)
|
return -1
|
||||||
elif pos == -2: # No tags in this stream
|
try:
|
||||||
|
stream.seek(pos-1)
|
||||||
|
_t = Tag(stream)
|
||||||
|
if _t.id in text_tags:
|
||||||
|
return pos-1
|
||||||
|
return find_first_tag(pos+1)
|
||||||
|
|
||||||
|
|
||||||
|
except:
|
||||||
|
return find_first_tag(pos+1)
|
||||||
|
|
||||||
|
start_pos = stream.tell()
|
||||||
|
tag_pos = find_first_tag(start_pos)
|
||||||
|
if tag_pos >= start_pos:
|
||||||
|
if tag_pos > start_pos:
|
||||||
|
self.add_text(self.stream[start_pos:tag_pos])
|
||||||
|
stream.seek(tag_pos)
|
||||||
|
else: # No tags in this stream
|
||||||
self.add_text(self.stream)
|
self.add_text(self.stream)
|
||||||
stream.seek(0, 2)
|
stream.seek(0, 2)
|
||||||
print repr(self.stream)
|
|
||||||
break
|
break
|
||||||
|
|
||||||
tag = Tag(stream)
|
tag = Tag(stream)
|
||||||
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
|
|||||||
refpage = struct.unpack("<I", stream.read(4))[0]
|
refpage = struct.unpack("<I", stream.read(4))[0]
|
||||||
refobj = struct.unpack("<I", stream.read(4))[0]
|
refobj = struct.unpack("<I", stream.read(4))[0]
|
||||||
cnt = struct.unpack("<H", stream.read(2))[0]
|
cnt = struct.unpack("<H", stream.read(2))[0]
|
||||||
label = unicode(stream.read(cnt), "utf_16")
|
raw = stream.read(cnt)
|
||||||
|
label = raw.decode('utf_16_le')
|
||||||
self._contents.append(TocLabel(refpage, refobj, label))
|
self._contents.append(TocLabel(refpage, refobj, label))
|
||||||
c -= 1
|
c -= 1
|
||||||
|
|
||||||
|
@ -33,7 +33,6 @@ class EXTHHeader(object):
|
|||||||
self.length, self.num_items = struct.unpack('>LL', raw[4:12])
|
self.length, self.num_items = struct.unpack('>LL', raw[4:12])
|
||||||
raw = raw[12:]
|
raw = raw[12:]
|
||||||
pos = 0
|
pos = 0
|
||||||
|
|
||||||
self.mi = MetaInformation('Unknown', ['Unknown'])
|
self.mi = MetaInformation('Unknown', ['Unknown'])
|
||||||
self.has_fake_cover = True
|
self.has_fake_cover = True
|
||||||
|
|
||||||
@ -49,9 +48,17 @@ class EXTHHeader(object):
|
|||||||
self.cover_offset, = struct.unpack('>L', content)
|
self.cover_offset, = struct.unpack('>L', content)
|
||||||
elif id == 202:
|
elif id == 202:
|
||||||
self.thumbnail_offset, = struct.unpack('>L', content)
|
self.thumbnail_offset, = struct.unpack('>L', content)
|
||||||
|
#else:
|
||||||
|
# print 'unknown record', id, repr(content)
|
||||||
title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
|
title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
|
||||||
if title:
|
if title:
|
||||||
self.mi.title = title.group(1).decode(codec, 'ignore')
|
title = title.group(1).decode(codec, 'replace')
|
||||||
|
if len(title) > 2:
|
||||||
|
self.mi.title = title
|
||||||
|
else:
|
||||||
|
title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
|
||||||
|
if title:
|
||||||
|
self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
|
||||||
|
|
||||||
|
|
||||||
def process_metadata(self, id, content, codec):
|
def process_metadata(self, id, content, codec):
|
||||||
@ -67,7 +74,8 @@ class EXTHHeader(object):
|
|||||||
if not self.mi.tags:
|
if not self.mi.tags:
|
||||||
self.mi.tags = []
|
self.mi.tags = []
|
||||||
self.mi.tags.append(content.decode(codec, 'ignore'))
|
self.mi.tags.append(content.decode(codec, 'ignore'))
|
||||||
|
#else:
|
||||||
|
# print 'unhandled metadata record', id, repr(content), codec
|
||||||
|
|
||||||
|
|
||||||
class BookHeader(object):
|
class BookHeader(object):
|
||||||
@ -466,6 +474,10 @@ def get_metadata(stream):
|
|||||||
cover = os.path.join(tdir, mi.cover)
|
cover = os.path.join(tdir, mi.cover)
|
||||||
if os.access(cover, os.R_OK):
|
if os.access(cover, os.R_OK):
|
||||||
mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
|
mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
|
||||||
|
else:
|
||||||
|
path = os.path.join(tdir, 'images', '00001.jpg')
|
||||||
|
if os.access(path, os.R_OK):
|
||||||
|
mi.cover_data = ('JPEG', open(path, 'rb').read())
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
|
@ -1482,7 +1482,8 @@ in which you want to store your books files. Any existing books will be automati
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self, write_settings=True):
|
||||||
|
if write_settings:
|
||||||
self.write_settings()
|
self.write_settings()
|
||||||
self.job_manager.terminate_all_jobs()
|
self.job_manager.terminate_all_jobs()
|
||||||
self.device_manager.keep_going = False
|
self.device_manager.keep_going = False
|
||||||
@ -1503,6 +1504,7 @@ in which you want to store your books files. Any existing books will be automati
|
|||||||
|
|
||||||
|
|
||||||
def closeEvent(self, e):
|
def closeEvent(self, e):
|
||||||
|
self.write_settings()
|
||||||
if self.system_tray_icon.isVisible():
|
if self.system_tray_icon.isVisible():
|
||||||
if not dynamic['systray_msg'] and not isosx:
|
if not dynamic['systray_msg'] and not isosx:
|
||||||
info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
|
info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
|
||||||
@ -1512,7 +1514,7 @@ in which you want to store your books files. Any existing books will be automati
|
|||||||
else:
|
else:
|
||||||
if self.confirm_quit():
|
if self.confirm_quit():
|
||||||
try:
|
try:
|
||||||
self.shutdown()
|
self.shutdown(write_settings=False)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
e.accept()
|
e.accept()
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -338,7 +338,7 @@ class ZipInfo (object):
|
|||||||
if isinstance(self.filename, unicode):
|
if isinstance(self.filename, unicode):
|
||||||
try:
|
try:
|
||||||
return self.filename.encode('ascii'), self.flag_bits
|
return self.filename.encode('ascii'), self.flag_bits
|
||||||
except UnicodeEncodeError:
|
except:
|
||||||
return self.filename.encode('utf-8'), self.flag_bits | 0x800
|
return self.filename.encode('utf-8'), self.flag_bits | 0x800
|
||||||
else:
|
else:
|
||||||
return self.filename, self.flag_bits
|
return self.filename, self.flag_bits
|
||||||
|
Loading…
x
Reference in New Issue
Block a user