Sync to trunk

2025-07-09 03:04:10 -04:00 · 2009-01-14 07:36:51 -05:00 · 2009-01-14 07:36:51 -05:00 · a30c638e53
commit a30c638e53
parent b656e8968c ab0c2accef
29 changed files with 17823 additions and 14810 deletions
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -43,7 +43,11 @@ def update_module(mod, path):
        zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
    elif isosx:
        zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
-                            'Resources', 'lib', 'python2.5', 'site-packages.zip')
+                            'Resources', 'lib', 
                            'python'+'.'.join(map(str, sys.version_info[:2])), 
                            'site-packages.zip')
    else:
        zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
    if zp is not None:
        update_zipfile(zp, mod, path)
    else:
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -335,7 +335,7 @@ class PreProcessor(object):
    # Fix pdftohtml markup
    PDFTOHTML  = [
                  # Remove <hr> tags
-                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
+                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
                  # Remove page numbers
                  (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
                  # Remove <br> and replace <br><br> with <p>
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@ -267,6 +267,14 @@ class Manifest(object):
                return result
            return cmp(self.id, other.id)
        def abshref(self, href):
            if '/' not in self.href:
                return href
            dirname = os.path.dirname(self.href)
            href = os.path.join(dirname, href)
            href = os.path.normpath(href).replace('\\', '/')
            return href
    def __init__(self, oeb):
        self.oeb = oeb
        self.items = {}
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -143,17 +143,16 @@ def warn(x):
 class ReBinary(object):
    NSRMAP = {'': None, XML_NS: 'xml'}
-    def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
+    def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()):
-        self.path = path
+        self.item = item
        self.logger = logger
        self.dir = os.path.dirname(path)
        self.manifest = oeb.manifest
        self.tags, self.tattrs = map
        self.buf = StringIO()
        self.anchors = []
        self.page_breaks = []
        self.is_html  = is_html = map is HTML_MAP
-        self.stylizer = Stylizer(root, path, oeb) if is_html else None
+        self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
        self.tree_to_binary(root)
        self.content = self.buf.getvalue()
        self.ahc = self.build_ahc() if is_html else None
@ -210,6 +209,8 @@ class ReBinary(object):
            if attr in ('href', 'src'):
                value = urlnormalize(value)
                path, frag = urldefrag(value)
                if self.item:
                    path = self.item.abshref(path)
                prefix = unichr(3)
                if path in self.manifest.hrefs:
                    prefix = unichr(2)
@ -222,7 +223,7 @@ class ReBinary(object):
            elif attr.startswith('ms--'):
                attr = '%' + attr[4:]
            elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
-                value = OEB_CSS_MIME
+                value = CSS_MIME
            if attr in tattrs:
                self.write(tattrs[attr])
            else:
@ -275,7 +276,7 @@ class ReBinary(object):
    def build_ahc(self):
        if len(self.anchors) > 6:
            self.logger.log_warn("More than six anchors in file %r. " \
-                "Some links may not work properly." % self.path)
+                "Some links may not work properly." % self.item.href)
        data = StringIO()
        data.write(unichr(len(self.anchors)).encode('utf-8'))
        for anchor, offset in self.anchors:
@ -479,7 +480,7 @@ class LitWriter(object):
            secnum = 0
            if not isinstance(data, basestring):
                self._add_folder(name)
-                rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
+                rebin = ReBinary(data, item, self._oeb, map=HTML_MAP,
                                 logger=self._logger)
                self._add_file(name + '/ahc', rebin.ahc, 0)
                self._add_file(name + '/aht', rebin.aht, 0)
@ -559,7 +560,7 @@ class LitWriter(object):
        meta.attrib['ms--minimum_level'] = '0'
        meta.attrib['ms--attr5'] = '1'
        meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
-        rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
+        rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP,
                         logger=self._logger)
        meta = rebin.content
        self._meta = meta
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
                        # Remove self closing script tags as they also mess up BeautifulSoup
                        (re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
                        # BeautifulSoup treats self closing <div> tags as open <div> tags
                        (re.compile(r'(?i)<\s*div([^>]*)/\s*>'), 
                         lambda match: '<div%s></div>'%match.group(1))
                        ]
    # Fix Baen markup
    BAEN = [ 
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
    # Fix pdftohtml markup
    PDFTOHTML  = [
                  # Remove <hr> tags
-                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
+                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
                  # Remove page numbers
                  (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
                  # Remove <br> and replace <br><br> with <p>
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
        if (css.has_key('display') and css['display'].lower() == 'none') or \
           (css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
            return ''
-        text = u''
+        text, alt_text = u'', u''
        for c in tag.contents:
            if limit != None and len(text) > limit:
                break
            if isinstance(c, HTMLConverter.IGNORED_TAGS):
-                return u''
+                continue
            if isinstance(c, NavigableString):
                text += unicode(c)                
            elif isinstance(c, Tag):
                if c.name.lower() == 'img' and c.has_key('alt'):
-                    text += c['alt']
+                    alt_text += c['alt']
-                    return text
+                    continue
                text += self.get_text(c)
-        return text
+        return text if text.strip() else alt_text
    def process_links(self):
        def add_toc_entry(text, target):
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -799,18 +799,39 @@ class Text(LRFStream):
        length = len(self.stream)
        style = self.style.as_dict()
        current_style = style.copy()
        text_tags = set(list(TextAttr.tag_map.keys()) + \
                        list(Text.text_tags.keys()) + \
                        list(ruby_tags.keys()))
        text_tags -= set([0xf500+i for i in range(10)])
        text_tags.add(0xf5cc)
        while stream.tell() < length:
-            # Is there some text beofre a tag?
+            # Is there some text before a tag?
-            pos = self.stream.find('\xf5', stream.tell()) - 1
+            def find_first_tag(start):
-            if pos > 0:
+                pos = self.stream.find('\xf5', start)
-                self.add_text(self.stream[stream.tell():pos])
+                if pos == -1:
-                stream.seek(pos)
+                    return -1
-            elif pos == -2: # No tags in this stream
+                try:
                    stream.seek(pos-1)
                    _t = Tag(stream)
                    if _t.id in text_tags:
                        return pos-1
                    return find_first_tag(pos+1)
                except:
                    return find_first_tag(pos+1)
            start_pos = stream.tell()        
            tag_pos = find_first_tag(start_pos)
            if tag_pos >= start_pos:
                if tag_pos > start_pos:
                    self.add_text(self.stream[start_pos:tag_pos])
                stream.seek(tag_pos)
            else: # No tags in this stream
                self.add_text(self.stream)
                stream.seek(0, 2)
                print repr(self.stream)
                break
            tag = Tag(stream)
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
            refpage = struct.unpack("<I", stream.read(4))[0]
            refobj  = struct.unpack("<I", stream.read(4))[0]
            cnt = struct.unpack("<H", stream.read(2))[0]
-            label = unicode(stream.read(cnt), "utf_16")
+            raw = stream.read(cnt)
            label = raw.decode('utf_16_le')
            self._contents.append(TocLabel(refpage, refobj, label))
            c -= 1
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -33,7 +33,6 @@ class EXTHHeader(object):
        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
        raw = raw[12:]
        pos = 0
        self.mi = MetaInformation('Unknown', ['Unknown'])
        self.has_fake_cover = True
@ -49,9 +48,17 @@ class EXTHHeader(object):
                self.cover_offset, = struct.unpack('>L', content)
            elif id == 202:
                self.thumbnail_offset, = struct.unpack('>L', content)
            #else:
            #    print 'unknown record', id, repr(content)
        title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
        if title:
-            self.mi.title = title.group(1).decode(codec, 'ignore')
+            title = title.group(1).decode(codec, 'replace')
            if len(title) > 2:
                self.mi.title = title
            else:
                title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
                if title:
                    self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
    def process_metadata(self, id, content, codec):
@ -67,7 +74,8 @@ class EXTHHeader(object):
            if not self.mi.tags:
                self.mi.tags = []
            self.mi.tags.append(content.decode(codec, 'ignore'))
-         
+        #else:
        #    print 'unhandled metadata record', id, repr(content), codec 
 class BookHeader(object):
@ -466,6 +474,10 @@ def get_metadata(stream):
            cover =  os.path.join(tdir, mi.cover)
            if os.access(cover, os.R_OK):
                mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
        else:
            path = os.path.join(tdir, 'images', '00001.jpg')
            if os.access(path, os.R_OK):
                mi.cover_data = ('JPEG', open(path, 'rb').read())
    return mi
 def option_parser():
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -1482,7 +1482,8 @@ in which you want to store your books files. Any existing books will be automati
        return True
-    def shutdown(self):
+    def shutdown(self, write_settings=True):
        if write_settings:
            self.write_settings()
        self.job_manager.terminate_all_jobs()
        self.device_manager.keep_going = False
@ -1503,6 +1504,7 @@ in which you want to store your books files. Any existing books will be automati
    def closeEvent(self, e):
        self.write_settings()
        if self.system_tray_icon.isVisible():
            if not dynamic['systray_msg'] and not isosx:
                info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
@ -1512,7 +1514,7 @@ in which you want to store your books files. Any existing books will be automati
        else:
            if self.confirm_quit():
                try:
-                    self.shutdown()
+                    self.shutdown(write_settings=False)
                except:
                    pass
                e.accept()
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@ -338,7 +338,7 @@ class ZipInfo (object):
        if isinstance(self.filename, unicode):
            try:
                return self.filename.encode('ascii'), self.flag_bits
-            except UnicodeEncodeError:
+            except:
                return self.filename.encode('utf-8'), self.flag_bits | 0x800
        else:
            return self.filename, self.flag_bits