Sync to trunk

This commit is contained in:
John Schember 2009-01-14 07:36:51 -05:00
commit a30c638e53
29 changed files with 17823 additions and 14810 deletions

View File

@ -43,7 +43,11 @@ def update_module(mod, path):
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
elif isosx:
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
'Resources', 'lib', 'python2.5', 'site-packages.zip')
'Resources', 'lib',
'python'+'.'.join(map(str, sys.version_info[:2])),
'site-packages.zip')
else:
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
if zp is not None:
update_zipfile(zp, mod, path)
else:

View File

@ -335,7 +335,7 @@ class PreProcessor(object):
# Fix pdftohtml markup
PDFTOHTML = [
# Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
# Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p>

View File

@ -266,6 +266,14 @@ class Manifest(object):
if result != 0:
return result
return cmp(self.id, other.id)
def abshref(self, href):
if '/' not in self.href:
return href
dirname = os.path.dirname(self.href)
href = os.path.join(dirname, href)
href = os.path.normpath(href).replace('\\', '/')
return href
def __init__(self, oeb):
self.oeb = oeb

View File

@ -143,17 +143,16 @@ def warn(x):
class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
self.path = path
def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()):
self.item = item
self.logger = logger
self.dir = os.path.dirname(path)
self.manifest = oeb.manifest
self.tags, self.tattrs = map
self.buf = StringIO()
self.anchors = []
self.page_breaks = []
self.is_html = is_html = map is HTML_MAP
self.stylizer = Stylizer(root, path, oeb) if is_html else None
self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
self.tree_to_binary(root)
self.content = self.buf.getvalue()
self.ahc = self.build_ahc() if is_html else None
@ -210,6 +209,8 @@ class ReBinary(object):
if attr in ('href', 'src'):
value = urlnormalize(value)
path, frag = urldefrag(value)
if self.item:
path = self.item.abshref(path)
prefix = unichr(3)
if path in self.manifest.hrefs:
prefix = unichr(2)
@ -222,7 +223,7 @@ class ReBinary(object):
elif attr.startswith('ms--'):
attr = '%' + attr[4:]
elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
value = OEB_CSS_MIME
value = CSS_MIME
if attr in tattrs:
self.write(tattrs[attr])
else:
@ -275,7 +276,7 @@ class ReBinary(object):
def build_ahc(self):
if len(self.anchors) > 6:
self.logger.log_warn("More than six anchors in file %r. " \
"Some links may not work properly." % self.path)
"Some links may not work properly." % self.item.href)
data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8'))
for anchor, offset in self.anchors:
@ -479,7 +480,7 @@ class LitWriter(object):
secnum = 0
if not isinstance(data, basestring):
self._add_folder(name)
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP,
logger=self._logger)
self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0)
@ -559,7 +560,7 @@ class LitWriter(object):
meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP,
logger=self._logger)
meta = rebin.content
self._meta = meta

View File

@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
# Remove self closing script tags as they also mess up BeautifulSoup
(re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
# BeautifulSoup treats self closing <div> tags as open <div> tags
(re.compile(r'(?i)<\s*div([^>]*)/\s*>'),
lambda match: '<div%s></div>'%match.group(1))
]
# Fix Baen markup
BAEN = [
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
# Fix pdftohtml markup
PDFTOHTML = [
# Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
# Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p>
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
if (css.has_key('display') and css['display'].lower() == 'none') or \
(css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
return ''
text = u''
text, alt_text = u'', u''
for c in tag.contents:
if limit != None and len(text) > limit:
break
if isinstance(c, HTMLConverter.IGNORED_TAGS):
return u''
continue
if isinstance(c, NavigableString):
text += unicode(c)
elif isinstance(c, Tag):
if c.name.lower() == 'img' and c.has_key('alt'):
text += c['alt']
return text
alt_text += c['alt']
continue
text += self.get_text(c)
return text
return text if text.strip() else alt_text
def process_links(self):
def add_toc_entry(text, target):

View File

@ -700,7 +700,7 @@ class Text(LRFStream):
def add_text(self, text):
s = unicode(text, "utf-16-le")
if s:
s = s.translate(self.text_map)
s = s.translate(self.text_map)
self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
def end_container(self, tag, stream):
@ -799,18 +799,39 @@ class Text(LRFStream):
length = len(self.stream)
style = self.style.as_dict()
current_style = style.copy()
text_tags = set(list(TextAttr.tag_map.keys()) + \
list(Text.text_tags.keys()) + \
list(ruby_tags.keys()))
text_tags -= set([0xf500+i for i in range(10)])
text_tags.add(0xf5cc)
while stream.tell() < length:
# Is there some text beofre a tag?
pos = self.stream.find('\xf5', stream.tell()) - 1
if pos > 0:
self.add_text(self.stream[stream.tell():pos])
stream.seek(pos)
elif pos == -2: # No tags in this stream
# Is there some text before a tag?
def find_first_tag(start):
pos = self.stream.find('\xf5', start)
if pos == -1:
return -1
try:
stream.seek(pos-1)
_t = Tag(stream)
if _t.id in text_tags:
return pos-1
return find_first_tag(pos+1)
except:
return find_first_tag(pos+1)
start_pos = stream.tell()
tag_pos = find_first_tag(start_pos)
if tag_pos >= start_pos:
if tag_pos > start_pos:
self.add_text(self.stream[start_pos:tag_pos])
stream.seek(tag_pos)
else: # No tags in this stream
self.add_text(self.stream)
stream.seek(0, 2)
print repr(self.stream)
break
tag = Tag(stream)
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
refpage = struct.unpack("<I", stream.read(4))[0]
refobj = struct.unpack("<I", stream.read(4))[0]
cnt = struct.unpack("<H", stream.read(2))[0]
label = unicode(stream.read(cnt), "utf_16")
raw = stream.read(cnt)
label = raw.decode('utf_16_le')
self._contents.append(TocLabel(refpage, refobj, label))
c -= 1

View File

@ -33,7 +33,6 @@ class EXTHHeader(object):
self.length, self.num_items = struct.unpack('>LL', raw[4:12])
raw = raw[12:]
pos = 0
self.mi = MetaInformation('Unknown', ['Unknown'])
self.has_fake_cover = True
@ -49,9 +48,17 @@ class EXTHHeader(object):
self.cover_offset, = struct.unpack('>L', content)
elif id == 202:
self.thumbnail_offset, = struct.unpack('>L', content)
#else:
# print 'unknown record', id, repr(content)
title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
if title:
self.mi.title = title.group(1).decode(codec, 'ignore')
title = title.group(1).decode(codec, 'replace')
if len(title) > 2:
self.mi.title = title
else:
title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
if title:
self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
def process_metadata(self, id, content, codec):
@ -67,7 +74,8 @@ class EXTHHeader(object):
if not self.mi.tags:
self.mi.tags = []
self.mi.tags.append(content.decode(codec, 'ignore'))
#else:
# print 'unhandled metadata record', id, repr(content), codec
class BookHeader(object):
@ -466,6 +474,10 @@ def get_metadata(stream):
cover = os.path.join(tdir, mi.cover)
if os.access(cover, os.R_OK):
mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
else:
path = os.path.join(tdir, 'images', '00001.jpg')
if os.access(path, os.R_OK):
mi.cover_data = ('JPEG', open(path, 'rb').read())
return mi
def option_parser():

View File

@ -1482,8 +1482,9 @@ in which you want to store your books files. Any existing books will be automati
return True
def shutdown(self):
self.write_settings()
def shutdown(self, write_settings=True):
if write_settings:
self.write_settings()
self.job_manager.terminate_all_jobs()
self.device_manager.keep_going = False
self.cover_cache.stop()
@ -1503,6 +1504,7 @@ in which you want to store your books files. Any existing books will be automati
def closeEvent(self, e):
self.write_settings()
if self.system_tray_icon.isVisible():
if not dynamic['systray_msg'] and not isosx:
info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
@ -1512,7 +1514,7 @@ in which you want to store your books files. Any existing books will be automati
else:
if self.confirm_quit():
try:
self.shutdown()
self.shutdown(write_settings=False)
except:
pass
e.accept()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -338,7 +338,7 @@ class ZipInfo (object):
if isinstance(self.filename, unicode):
try:
return self.filename.encode('ascii'), self.flag_bits
except UnicodeEncodeError:
except:
return self.filename.encode('utf-8'), self.flag_bits | 0x800
else:
return self.filename, self.flag_bits