Fix #4371 (Conversion to FB2)

2025-08-11 09:13:57 -04:00 · 2010-01-04 01:22:29 -07:00 · 2010-01-04 01:22:29 -07:00 · 1661dbf0ce
commit 1661dbf0ce
parent b25dd30888 9d2998709d
3 changed files with 29 additions and 30 deletions
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -32,12 +32,9 @@ TAG_MAP = {
    'p' : 'p',
    'li' : 'p',
    'div': 'p',
+    'br' : 'p',
 }

-TAG_FORCE_P = [
-    'br',
-]
-
 TAG_SPACE = []

 TAG_IMAGES = [
@ -48,6 +45,10 @@ TAG_LINKS = [
    'a',
 ]

+BLOCK = [
+    'p',
+]
+
 STYLES = [
    ('font-weight', {'bold'   : 'strong', 'bolder' : 'strong'}),
    ('font-style', {'italic' : 'emphasis'}),
@ -240,7 +241,8 @@ class FB2MLizer(object):
        if id_name:
            fb2_text.append(self.get_anchor(page, id_name))

-        if tag in TAG_FORCE_P:
+        fb2_tag = TAG_MAP.get(tag, None)
+        if fb2_tag == 'p':
            if 'p' in tag_stack+tags:
                # Close all up to p. Close p. Reopen all closed tags including p.
                all_tags = tag_stack+tags
@ -257,9 +259,7 @@ class FB2MLizer(object):
            else:
                fb2_text.append('<p>')
                tags.append('p')
-
-        fb2_tag = TAG_MAP.get(tag, None)
-        if fb2_tag and fb2_tag not in tag_stack+tags:
+        elif fb2_tag and fb2_tag not in tag_stack+tags:
            fb2_text.append('<%s>' % fb2_tag)
            tags.append(fb2_tag)

--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -42,6 +42,7 @@ STYLES = [

 BLOCK_TAGS = [
    'p',
+    'div',
 ]

 BLOCK_STYLES = [
@ -188,7 +189,7 @@ class PMLMLizer(object):
            text = re.sub('\n{2,}', '\n', text)
            text = re.sub('(?imu)^(?P<text>.+)$', lambda mo: mo.group('text') if re.search(r'\\[XxCm]', mo.group('text')) else '    %s' % mo.group('text'), text)
        else:
-            text = re.sub('\n{4,}', '\n\n\n', text)
+            text = re.sub('\n{3,}', '\n\n', text)


        return text
@ -199,6 +200,7 @@ class PMLMLizer(object):
            return []

        text = []
+        tags = []
        style = stylizer.style(elem)

        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
@ -206,13 +208,14 @@ class PMLMLizer(object):
            return []

        tag = barename(elem.tag)
-        tag_count = 0

        # Are we in a paragraph block?
-        if tag in BLOCK_TAGS: # or style['display'] in BLOCK_STYLES:
-            if 'block' not in tag_stack:
-                tag_count += 1
-                tag_stack.append('block')
+        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
+            if 'block' not in tag_stack+tags:
+                tags.append('block')
+            else:
+                # Start new block
+                text.append('\n\n')

        # Process tags that need special processing and that do not have inner
        # text. Usually these require an argument
@ -245,14 +248,13 @@ class PMLMLizer(object):
        #    text.append('\\p')

        pml_tag = TAG_MAP.get(tag, None)
-        if pml_tag and pml_tag not in tag_stack:
-            tag_count += 1
+        if pml_tag and pml_tag not in tag_stack+tags:
            text.append('\\%s' % pml_tag)
-            tag_stack.append(pml_tag)
+            tags.append(pml_tag)

        # Special processing of tags that require an argument.
        # Anchors links
-        if tag in LINK_TAGS and 'q' not in tag_stack:
+        if tag in LINK_TAGS and 'q' not in tag_stack+tags:
            href = elem.get('href')
            if href:
                href = page.abshref(href)
@ -263,8 +265,7 @@ class PMLMLizer(object):
                        self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                    href = self.link_hrefs[href]
                    text.append('\\q="#%s"' % href)
-                tag_count += 1
-                tag_stack.append('q')
+                tags.append('q')

        # Anchor ids
        id_name = elem.get('id')
@ -274,10 +275,9 @@ class PMLMLizer(object):
        # Processes style information
        for s in STYLES:
            style_tag = s[1].get(style[s[0]], None)
-            if style_tag and style_tag not in tag_stack:
-                tag_count += 1
+            if style_tag and style_tag not in tag_stack+tags:
                text.append('\\%s' % style_tag)
-                tag_stack.append(style_tag)
+                tags.append(style_tag)
        # margin

        # Proccess tags that contain text.
@ -285,16 +285,15 @@ class PMLMLizer(object):
            text.append(self.remove_newlines(elem.text))

        for item in elem:
-            text += self.dump_text(item, stylizer, page, tag_stack)
+            text += self.dump_text(item, stylizer, page, tag_stack+tags)
+
+        tags.reverse()
+        text += self.close_tags(tags)

-        close_tag_list = []
-        for i in range(0, tag_count):
-            close_tag_list.insert(0, tag_stack.pop())
-        text += self.close_tags(close_tag_list)
        if tag in SEPARATE_TAGS:
            text.append('\n\n')

-        if 'block' not in tag_stack:
+        if 'block' not in tag_stack+tags:
            text.append('\n\n')

        #if style['page-break-after'] == 'always':
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -102,7 +102,7 @@ class TXTMLizer(object):
            text = re.sub('\n{2,}', '\n', text)
            text = re.sub('(?imu)^(?=.)', '\t', text)
        else:
-            text = re.sub('\n{4,}', '\n\n\n', text)
+            text = re.sub('\n{3,}', '\n\n', text)

        # Replace spaces at the beginning and end of lines
        text = re.sub('(?imu)^[ ]+', '', text)