Sync to trunk

This commit is contained in:
John Schember 2009-01-14 07:36:51 -05:00
commit a30c638e53
29 changed files with 17823 additions and 14810 deletions

View File

@ -43,7 +43,11 @@ def update_module(mod, path):
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip') zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
elif isosx: elif isosx:
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')), zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
'Resources', 'lib', 'python2.5', 'site-packages.zip') 'Resources', 'lib',
'python'+'.'.join(map(str, sys.version_info[:2])),
'site-packages.zip')
else:
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
if zp is not None: if zp is not None:
update_zipfile(zp, mod, path) update_zipfile(zp, mod, path)
else: else:

View File

@ -335,7 +335,7 @@ class PreProcessor(object):
# Fix pdftohtml markup # Fix pdftohtml markup
PDFTOHTML = [ PDFTOHTML = [
# Remove <hr> tags # Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'), (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
# Remove page numbers # Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''), (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p> # Remove <br> and replace <br><br> with <p>

View File

@ -267,6 +267,14 @@ class Manifest(object):
return result return result
return cmp(self.id, other.id) return cmp(self.id, other.id)
def abshref(self, href):
if '/' not in self.href:
return href
dirname = os.path.dirname(self.href)
href = os.path.join(dirname, href)
href = os.path.normpath(href).replace('\\', '/')
return href
def __init__(self, oeb): def __init__(self, oeb):
self.oeb = oeb self.oeb = oeb
self.items = {} self.items = {}

View File

@ -143,17 +143,16 @@ def warn(x):
class ReBinary(object): class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'} NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()): def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()):
self.path = path self.item = item
self.logger = logger self.logger = logger
self.dir = os.path.dirname(path)
self.manifest = oeb.manifest self.manifest = oeb.manifest
self.tags, self.tattrs = map self.tags, self.tattrs = map
self.buf = StringIO() self.buf = StringIO()
self.anchors = [] self.anchors = []
self.page_breaks = [] self.page_breaks = []
self.is_html = is_html = map is HTML_MAP self.is_html = is_html = map is HTML_MAP
self.stylizer = Stylizer(root, path, oeb) if is_html else None self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
self.tree_to_binary(root) self.tree_to_binary(root)
self.content = self.buf.getvalue() self.content = self.buf.getvalue()
self.ahc = self.build_ahc() if is_html else None self.ahc = self.build_ahc() if is_html else None
@ -210,6 +209,8 @@ class ReBinary(object):
if attr in ('href', 'src'): if attr in ('href', 'src'):
value = urlnormalize(value) value = urlnormalize(value)
path, frag = urldefrag(value) path, frag = urldefrag(value)
if self.item:
path = self.item.abshref(path)
prefix = unichr(3) prefix = unichr(3)
if path in self.manifest.hrefs: if path in self.manifest.hrefs:
prefix = unichr(2) prefix = unichr(2)
@ -222,7 +223,7 @@ class ReBinary(object):
elif attr.startswith('ms--'): elif attr.startswith('ms--'):
attr = '%' + attr[4:] attr = '%' + attr[4:]
elif tag == 'link' and attr == 'type' and value in OEB_STYLES: elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
value = OEB_CSS_MIME value = CSS_MIME
if attr in tattrs: if attr in tattrs:
self.write(tattrs[attr]) self.write(tattrs[attr])
else: else:
@ -275,7 +276,7 @@ class ReBinary(object):
def build_ahc(self): def build_ahc(self):
if len(self.anchors) > 6: if len(self.anchors) > 6:
self.logger.log_warn("More than six anchors in file %r. " \ self.logger.log_warn("More than six anchors in file %r. " \
"Some links may not work properly." % self.path) "Some links may not work properly." % self.item.href)
data = StringIO() data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8')) data.write(unichr(len(self.anchors)).encode('utf-8'))
for anchor, offset in self.anchors: for anchor, offset in self.anchors:
@ -479,7 +480,7 @@ class LitWriter(object):
secnum = 0 secnum = 0
if not isinstance(data, basestring): if not isinstance(data, basestring):
self._add_folder(name) self._add_folder(name)
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP, rebin = ReBinary(data, item, self._oeb, map=HTML_MAP,
logger=self._logger) logger=self._logger)
self._add_file(name + '/ahc', rebin.ahc, 0) self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0) self._add_file(name + '/aht', rebin.aht, 0)
@ -559,7 +560,7 @@ class LitWriter(object):
meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1' meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP, rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP,
logger=self._logger) logger=self._logger)
meta = rebin.content meta = rebin.content
self._meta = meta self._meta = meta

View File

@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
# Remove self closing script tags as they also mess up BeautifulSoup # Remove self closing script tags as they also mess up BeautifulSoup
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''), (re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
# BeautifulSoup treats self closing <div> tags as open <div> tags
(re.compile(r'(?i)<\s*div([^>]*)/\s*>'),
lambda match: '<div%s></div>'%match.group(1))
] ]
# Fix Baen markup # Fix Baen markup
BAEN = [ BAEN = [
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
# Fix pdftohtml markup # Fix pdftohtml markup
PDFTOHTML = [ PDFTOHTML = [
# Remove <hr> tags # Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'), (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
# Remove page numbers # Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''), (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p> # Remove <br> and replace <br><br> with <p>
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
if (css.has_key('display') and css['display'].lower() == 'none') or \ if (css.has_key('display') and css['display'].lower() == 'none') or \
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'): (css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
return '' return ''
text = u'' text, alt_text = u'', u''
for c in tag.contents: for c in tag.contents:
if limit != None and len(text) > limit: if limit != None and len(text) > limit:
break break
if isinstance(c, HTMLConverter.IGNORED_TAGS): if isinstance(c, HTMLConverter.IGNORED_TAGS):
return u'' continue
if isinstance(c, NavigableString): if isinstance(c, NavigableString):
text += unicode(c) text += unicode(c)
elif isinstance(c, Tag): elif isinstance(c, Tag):
if c.name.lower() == 'img' and c.has_key('alt'): if c.name.lower() == 'img' and c.has_key('alt'):
text += c['alt'] alt_text += c['alt']
return text continue
text += self.get_text(c) text += self.get_text(c)
return text return text if text.strip() else alt_text
def process_links(self): def process_links(self):
def add_toc_entry(text, target): def add_toc_entry(text, target):

View File

@ -799,18 +799,39 @@ class Text(LRFStream):
length = len(self.stream) length = len(self.stream)
style = self.style.as_dict() style = self.style.as_dict()
current_style = style.copy() current_style = style.copy()
text_tags = set(list(TextAttr.tag_map.keys()) + \
list(Text.text_tags.keys()) + \
list(ruby_tags.keys()))
text_tags -= set([0xf500+i for i in range(10)])
text_tags.add(0xf5cc)
while stream.tell() < length: while stream.tell() < length:
# Is there some text beofre a tag? # Is there some text before a tag?
pos = self.stream.find('\xf5', stream.tell()) - 1 def find_first_tag(start):
if pos > 0: pos = self.stream.find('\xf5', start)
self.add_text(self.stream[stream.tell():pos]) if pos == -1:
stream.seek(pos) return -1
elif pos == -2: # No tags in this stream try:
stream.seek(pos-1)
_t = Tag(stream)
if _t.id in text_tags:
return pos-1
return find_first_tag(pos+1)
except:
return find_first_tag(pos+1)
start_pos = stream.tell()
tag_pos = find_first_tag(start_pos)
if tag_pos >= start_pos:
if tag_pos > start_pos:
self.add_text(self.stream[start_pos:tag_pos])
stream.seek(tag_pos)
else: # No tags in this stream
self.add_text(self.stream) self.add_text(self.stream)
stream.seek(0, 2) stream.seek(0, 2)
print repr(self.stream)
break break
tag = Tag(stream) tag = Tag(stream)
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
refpage = struct.unpack("<I", stream.read(4))[0] refpage = struct.unpack("<I", stream.read(4))[0]
refobj = struct.unpack("<I", stream.read(4))[0] refobj = struct.unpack("<I", stream.read(4))[0]
cnt = struct.unpack("<H", stream.read(2))[0] cnt = struct.unpack("<H", stream.read(2))[0]
label = unicode(stream.read(cnt), "utf_16") raw = stream.read(cnt)
label = raw.decode('utf_16_le')
self._contents.append(TocLabel(refpage, refobj, label)) self._contents.append(TocLabel(refpage, refobj, label))
c -= 1 c -= 1

View File

@ -33,7 +33,6 @@ class EXTHHeader(object):
self.length, self.num_items = struct.unpack('>LL', raw[4:12]) self.length, self.num_items = struct.unpack('>LL', raw[4:12])
raw = raw[12:] raw = raw[12:]
pos = 0 pos = 0
self.mi = MetaInformation('Unknown', ['Unknown']) self.mi = MetaInformation('Unknown', ['Unknown'])
self.has_fake_cover = True self.has_fake_cover = True
@ -49,9 +48,17 @@ class EXTHHeader(object):
self.cover_offset, = struct.unpack('>L', content) self.cover_offset, = struct.unpack('>L', content)
elif id == 202: elif id == 202:
self.thumbnail_offset, = struct.unpack('>L', content) self.thumbnail_offset, = struct.unpack('>L', content)
#else:
# print 'unknown record', id, repr(content)
title = re.search(r'\0+([^\0]+)\0+', raw[pos:]) title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
if title: if title:
self.mi.title = title.group(1).decode(codec, 'ignore') title = title.group(1).decode(codec, 'replace')
if len(title) > 2:
self.mi.title = title
else:
title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
if title:
self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
def process_metadata(self, id, content, codec): def process_metadata(self, id, content, codec):
@ -67,7 +74,8 @@ class EXTHHeader(object):
if not self.mi.tags: if not self.mi.tags:
self.mi.tags = [] self.mi.tags = []
self.mi.tags.append(content.decode(codec, 'ignore')) self.mi.tags.append(content.decode(codec, 'ignore'))
#else:
# print 'unhandled metadata record', id, repr(content), codec
class BookHeader(object): class BookHeader(object):
@ -466,6 +474,10 @@ def get_metadata(stream):
cover = os.path.join(tdir, mi.cover) cover = os.path.join(tdir, mi.cover)
if os.access(cover, os.R_OK): if os.access(cover, os.R_OK):
mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read()) mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
else:
path = os.path.join(tdir, 'images', '00001.jpg')
if os.access(path, os.R_OK):
mi.cover_data = ('JPEG', open(path, 'rb').read())
return mi return mi
def option_parser(): def option_parser():

View File

@ -1482,8 +1482,9 @@ in which you want to store your books files. Any existing books will be automati
return True return True
def shutdown(self): def shutdown(self, write_settings=True):
self.write_settings() if write_settings:
self.write_settings()
self.job_manager.terminate_all_jobs() self.job_manager.terminate_all_jobs()
self.device_manager.keep_going = False self.device_manager.keep_going = False
self.cover_cache.stop() self.cover_cache.stop()
@ -1503,6 +1504,7 @@ in which you want to store your books files. Any existing books will be automati
def closeEvent(self, e): def closeEvent(self, e):
self.write_settings()
if self.system_tray_icon.isVisible(): if self.system_tray_icon.isVisible():
if not dynamic['systray_msg'] and not isosx: if not dynamic['systray_msg'] and not isosx:
info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_() info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
@ -1512,7 +1514,7 @@ in which you want to store your books files. Any existing books will be automati
else: else:
if self.confirm_quit(): if self.confirm_quit():
try: try:
self.shutdown() self.shutdown(write_settings=False)
except: except:
pass pass
e.accept() e.accept()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -338,7 +338,7 @@ class ZipInfo (object):
if isinstance(self.filename, unicode): if isinstance(self.filename, unicode):
try: try:
return self.filename.encode('ascii'), self.flag_bits return self.filename.encode('ascii'), self.flag_bits
except UnicodeEncodeError: except:
return self.filename.encode('utf-8'), self.flag_bits | 0x800 return self.filename.encode('utf-8'), self.flag_bits | 0x800
else: else:
return self.filename, self.flag_bits return self.filename, self.flag_bits