Sync to trunk

2025-07-09 03:04:10 -04:00 · 2009-01-14 07:36:51 -05:00 · 2009-01-14 07:36:51 -05:00 · a30c638e53
commit a30c638e53
parent b656e8968c ab0c2accef
29 changed files with 17823 additions and 14810 deletions
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -43,7 +43,11 @@ def update_module(mod, path):
        zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
    elif isosx:
        zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
-                            'Resources', 'lib', 'python2.5', 'site-packages.zip')
+                            'Resources', 'lib', 
+                            'python'+'.'.join(map(str, sys.version_info[:2])), 
+                            'site-packages.zip')
+    else:
+        zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
    if zp is not None:
        update_zipfile(zp, mod, path)
    else:
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -335,7 +335,7 @@ class PreProcessor(object):
    # Fix pdftohtml markup
    PDFTOHTML  = [
                  # Remove <hr> tags
-                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
+                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
                  # Remove page numbers
                  (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
                  # Remove <br> and replace <br><br> with <p>
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@ -266,6 +266,14 @@ class Manifest(object):
            if result != 0:
                return result
            return cmp(self.id, other.id)
+        
+        def abshref(self, href):
+            if '/' not in self.href:
+                return href
+            dirname = os.path.dirname(self.href)
+            href = os.path.join(dirname, href)
+            href = os.path.normpath(href).replace('\\', '/')
+            return href
    
    def __init__(self, oeb):
        self.oeb = oeb
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -143,17 +143,16 @@ def warn(x):
 class ReBinary(object):
    NSRMAP = {'': None, XML_NS: 'xml'}
    
-    def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
-        self.path = path
+    def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()):
+        self.item = item
        self.logger = logger
-        self.dir = os.path.dirname(path)
        self.manifest = oeb.manifest
        self.tags, self.tattrs = map
        self.buf = StringIO()
        self.anchors = []
        self.page_breaks = []
        self.is_html  = is_html = map is HTML_MAP
-        self.stylizer = Stylizer(root, path, oeb) if is_html else None
+        self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
        self.tree_to_binary(root)
        self.content = self.buf.getvalue()
        self.ahc = self.build_ahc() if is_html else None
@ -210,6 +209,8 @@ class ReBinary(object):
            if attr in ('href', 'src'):
                value = urlnormalize(value)
                path, frag = urldefrag(value)
+                if self.item:
+                    path = self.item.abshref(path)
                prefix = unichr(3)
                if path in self.manifest.hrefs:
                    prefix = unichr(2)
@ -222,7 +223,7 @@ class ReBinary(object):
            elif attr.startswith('ms--'):
                attr = '%' + attr[4:]
            elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
-                value = OEB_CSS_MIME
+                value = CSS_MIME
            if attr in tattrs:
                self.write(tattrs[attr])
            else:
@ -275,7 +276,7 @@ class ReBinary(object):
    def build_ahc(self):
        if len(self.anchors) > 6:
            self.logger.log_warn("More than six anchors in file %r. " \
-                "Some links may not work properly." % self.path)
+                "Some links may not work properly." % self.item.href)
        data = StringIO()
        data.write(unichr(len(self.anchors)).encode('utf-8'))
        for anchor, offset in self.anchors:
@ -479,7 +480,7 @@ class LitWriter(object):
            secnum = 0
            if not isinstance(data, basestring):
                self._add_folder(name)
-                rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
+                rebin = ReBinary(data, item, self._oeb, map=HTML_MAP,
                                 logger=self._logger)
                self._add_file(name + '/ahc', rebin.ahc, 0)
                self._add_file(name + '/aht', rebin.aht, 0)
@ -559,7 +560,7 @@ class LitWriter(object):
        meta.attrib['ms--minimum_level'] = '0'
        meta.attrib['ms--attr5'] = '1'
        meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
-        rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
+        rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP,
                         logger=self._logger)
        meta = rebin.content
        self._meta = meta
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
                        # Remove self closing script tags as they also mess up BeautifulSoup
                        (re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
                        
+                        # BeautifulSoup treats self closing <div> tags as open <div> tags
+                        (re.compile(r'(?i)<\s*div([^>]*)/\s*>'), 
+                         lambda match: '<div%s></div>'%match.group(1))
+                        
                        ]
    # Fix Baen markup
    BAEN = [ 
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
    # Fix pdftohtml markup
    PDFTOHTML  = [
                  # Remove <hr> tags
-                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
+                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
                  # Remove page numbers
                  (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
                  # Remove <br> and replace <br><br> with <p>
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
        if (css.has_key('display') and css['display'].lower() == 'none') or \
           (css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
            return ''
-        text = u''
+        text, alt_text = u'', u''
        for c in tag.contents:
            if limit != None and len(text) > limit:
                break
            if isinstance(c, HTMLConverter.IGNORED_TAGS):
-                return u''
+                continue
            if isinstance(c, NavigableString):
                text += unicode(c)                
            elif isinstance(c, Tag):
                if c.name.lower() == 'img' and c.has_key('alt'):
-                    text += c['alt']
-                    return text
+                    alt_text += c['alt']
+                    continue
                text += self.get_text(c)
-        return text
+        return text if text.strip() else alt_text
    
    def process_links(self):
        def add_toc_entry(text, target):
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -700,7 +700,7 @@ class Text(LRFStream):
    def add_text(self, text):
        s = unicode(text, "utf-16-le")
        if s:
-            s = s.translate(self.text_map)            
+            s = s.translate(self.text_map)
            self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
    
    def end_container(self, tag, stream):
@ -799,18 +799,39 @@ class Text(LRFStream):
        length = len(self.stream)
        style = self.style.as_dict()
        current_style = style.copy()
+        text_tags = set(list(TextAttr.tag_map.keys()) + \
+                        list(Text.text_tags.keys()) + \
+                        list(ruby_tags.keys()))
+        text_tags -= set([0xf500+i for i in range(10)])
+        text_tags.add(0xf5cc)
        
        while stream.tell() < length:
        
-            # Is there some text beofre a tag?
-            pos = self.stream.find('\xf5', stream.tell()) - 1
-            if pos > 0:
-                self.add_text(self.stream[stream.tell():pos])
-                stream.seek(pos)
-            elif pos == -2: # No tags in this stream
+            # Is there some text before a tag?
+            def find_first_tag(start):
+                pos = self.stream.find('\xf5', start)
+                if pos == -1:
+                    return -1
+                try:
+                    stream.seek(pos-1)
+                    _t = Tag(stream)
+                    if _t.id in text_tags:
+                        return pos-1
+                    return find_first_tag(pos+1)
+                    
+                    
+                except:
+                    return find_first_tag(pos+1)
+                    
+            start_pos = stream.tell()        
+            tag_pos = find_first_tag(start_pos)
+            if tag_pos >= start_pos:
+                if tag_pos > start_pos:
+                    self.add_text(self.stream[start_pos:tag_pos])
+                stream.seek(tag_pos)
+            else: # No tags in this stream
                self.add_text(self.stream)
                stream.seek(0, 2)
-                print repr(self.stream)
                break
            
            tag = Tag(stream)
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
            refpage = struct.unpack("<I", stream.read(4))[0]
            refobj  = struct.unpack("<I", stream.read(4))[0]
            cnt = struct.unpack("<H", stream.read(2))[0]
-            label = unicode(stream.read(cnt), "utf_16")
+            raw = stream.read(cnt)
+            label = raw.decode('utf_16_le')
            self._contents.append(TocLabel(refpage, refobj, label))
            c -= 1
            
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -33,7 +33,6 @@ class EXTHHeader(object):
        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
        raw = raw[12:]
        pos = 0
-        
        self.mi = MetaInformation('Unknown', ['Unknown'])
        self.has_fake_cover = True
        
@ -49,9 +48,17 @@ class EXTHHeader(object):
                self.cover_offset, = struct.unpack('>L', content)
            elif id == 202:
                self.thumbnail_offset, = struct.unpack('>L', content)
+            #else:
+            #    print 'unknown record', id, repr(content)
        title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
        if title:
-            self.mi.title = title.group(1).decode(codec, 'ignore')
+            title = title.group(1).decode(codec, 'replace')
+            if len(title) > 2:
+                self.mi.title = title
+            else:
+                title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
+                if title:
+                    self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
            
                
    def process_metadata(self, id, content, codec):
@ -67,7 +74,8 @@ class EXTHHeader(object):
            if not self.mi.tags:
                self.mi.tags = []
            self.mi.tags.append(content.decode(codec, 'ignore'))
-         
+        #else:
+        #    print 'unhandled metadata record', id, repr(content), codec 
            

 class BookHeader(object):
@ -466,6 +474,10 @@ def get_metadata(stream):
            cover =  os.path.join(tdir, mi.cover)
            if os.access(cover, os.R_OK):
                mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
+        else:
+            path = os.path.join(tdir, 'images', '00001.jpg')
+            if os.access(path, os.R_OK):
+                mi.cover_data = ('JPEG', open(path, 'rb').read())
    return mi
        
 def option_parser():
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -1482,8 +1482,9 @@ in which you want to store your books files. Any existing books will be automati
        return True

    
-    def shutdown(self):
-        self.write_settings()
+    def shutdown(self, write_settings=True):
+        if write_settings:
+            self.write_settings()
        self.job_manager.terminate_all_jobs()
        self.device_manager.keep_going = False
        self.cover_cache.stop()
@ -1503,6 +1504,7 @@ in which you want to store your books files. Any existing books will be automati

    
    def closeEvent(self, e):
+        self.write_settings()
        if self.system_tray_icon.isVisible():
            if not dynamic['systray_msg'] and not isosx:
                info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
@ -1512,7 +1514,7 @@ in which you want to store your books files. Any existing books will be automati
        else:
            if self.confirm_quit():
                try:
-                    self.shutdown()
+                    self.shutdown(write_settings=False)
                except:
                    pass
                e.accept()
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@ -338,7 +338,7 @@ class ZipInfo (object):
        if isinstance(self.filename, unicode):
            try:
                return self.filename.encode('ascii'), self.flag_bits
-            except UnicodeEncodeError:
+            except:
                return self.filename.encode('utf-8'), self.flag_bits | 0x800
        else:
            return self.filename, self.flag_bits