Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-09-11 19:19:47 +01:00 · 2010-09-11 19:19:47 +01:00 · 64f881f3a6
commit 64f881f3a6
parent a58aa5f0e5 1e77e6538f
4 changed files with 17 additions and 7 deletions
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -166,6 +166,17 @@ class HTMLPreProcessor(object):
                  (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
                  (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
                  (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
                  # ` with letter before
                  (re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'),
                  (re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'),
                  (re.compile(u'e\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'è'),
                  (re.compile(u'E\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'È'),
                  (re.compile(u'i\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ì'),
                  (re.compile(u'I\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ì'),
                  (re.compile(u'o\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ò'),
                  (re.compile(u'O\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ò'),
                  (re.compile(u'u\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ù'),
                  (re.compile(u'U\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ù'),
                  # ´
                  (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -207,6 +207,7 @@ class PML_HTMLizer(object):
        while html != old:
            old = html
            html = self.cleanup_html_remove_redundant(html)
        html = re.sub(r'(?imu)^\s*', '', html)
        return html
    def cleanup_html_remove_redundant(self, html):
@ -216,7 +217,7 @@ class PML_HTMLizer(object):
                html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
            else:
                html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
-        html = re.sub(r'<p>\s*</p>', '', html)
+        html = re.sub(r'(?imu)<p>\s*</p>', '', html)
        return html
    def start_line(self):
@ -556,7 +557,7 @@ class PML_HTMLizer(object):
                            text = t
                        else:
                            self.toc.add_item(os.path.basename(self.file_name), id, value)
-                            text = '<span id="%s"></span>%s' % (id, t)
+                            text = '%s<span id="%s"></span>' % (t, id)
                    elif c == 'm':
                        empty = False
                        src = self.code_value(line)
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -77,7 +77,7 @@ def separate_paragraphs_print_formatted(txt):
 def preserve_spaces(txt):
    txt = txt.replace(' ', '&nbsp;')
-    txt = txt.replace('\t', '&#09;')
+    txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')
    return txt
 def opf_writer(path, opf_name, manifest, spine, mi):
--- a/src/calibre/utils/filenames.py
+++ b/src/calibre/utils/filenames.py
@ -54,10 +54,8 @@ def shorten_components_to(length, components):
            r = x[0] if x is components[-1] else ''
        else:
            if x is components[-1]:
-                b, _, e = x.rpartition('.')
+                b, e = os.path.splitext(x)
-                if not b and e:
+                if e == '.': e = ''
                    b = e
                    e = ''
                r = b[:-delta]+e
                if r.startswith('.'): r = x[0]+r
            else: