mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	Fix #4371 (Conversion to FB2)
This commit is contained in:
		
						commit
						1661dbf0ce
					
				@ -32,12 +32,9 @@ TAG_MAP = {
 | 
				
			|||||||
    'p' : 'p',
 | 
					    'p' : 'p',
 | 
				
			||||||
    'li' : 'p',
 | 
					    'li' : 'p',
 | 
				
			||||||
    'div': 'p',
 | 
					    'div': 'p',
 | 
				
			||||||
 | 
					    'br' : 'p',
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TAG_FORCE_P = [
 | 
					 | 
				
			||||||
    'br',
 | 
					 | 
				
			||||||
]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
TAG_SPACE = []
 | 
					TAG_SPACE = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TAG_IMAGES = [
 | 
					TAG_IMAGES = [
 | 
				
			||||||
@ -48,6 +45,10 @@ TAG_LINKS = [
 | 
				
			|||||||
    'a',
 | 
					    'a',
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					BLOCK = [
 | 
				
			||||||
 | 
					    'p',
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
STYLES = [
 | 
					STYLES = [
 | 
				
			||||||
    ('font-weight', {'bold'   : 'strong', 'bolder' : 'strong'}),
 | 
					    ('font-weight', {'bold'   : 'strong', 'bolder' : 'strong'}),
 | 
				
			||||||
    ('font-style', {'italic' : 'emphasis'}),
 | 
					    ('font-style', {'italic' : 'emphasis'}),
 | 
				
			||||||
@ -240,7 +241,8 @@ class FB2MLizer(object):
 | 
				
			|||||||
        if id_name:
 | 
					        if id_name:
 | 
				
			||||||
            fb2_text.append(self.get_anchor(page, id_name))
 | 
					            fb2_text.append(self.get_anchor(page, id_name))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if tag in TAG_FORCE_P:
 | 
					        fb2_tag = TAG_MAP.get(tag, None)
 | 
				
			||||||
 | 
					        if fb2_tag == 'p':
 | 
				
			||||||
            if 'p' in tag_stack+tags:
 | 
					            if 'p' in tag_stack+tags:
 | 
				
			||||||
                # Close all up to p. Close p. Reopen all closed tags including p.
 | 
					                # Close all up to p. Close p. Reopen all closed tags including p.
 | 
				
			||||||
                all_tags = tag_stack+tags
 | 
					                all_tags = tag_stack+tags
 | 
				
			||||||
@ -257,9 +259,7 @@ class FB2MLizer(object):
 | 
				
			|||||||
            else:
 | 
					            else:
 | 
				
			||||||
                fb2_text.append('<p>')
 | 
					                fb2_text.append('<p>')
 | 
				
			||||||
                tags.append('p')
 | 
					                tags.append('p')
 | 
				
			||||||
 | 
					        elif fb2_tag and fb2_tag not in tag_stack+tags:
 | 
				
			||||||
        fb2_tag = TAG_MAP.get(tag, None)
 | 
					 | 
				
			||||||
        if fb2_tag and fb2_tag not in tag_stack+tags:
 | 
					 | 
				
			||||||
            fb2_text.append('<%s>' % fb2_tag)
 | 
					            fb2_text.append('<%s>' % fb2_tag)
 | 
				
			||||||
            tags.append(fb2_tag)
 | 
					            tags.append(fb2_tag)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -42,6 +42,7 @@ STYLES = [
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
BLOCK_TAGS = [
 | 
					BLOCK_TAGS = [
 | 
				
			||||||
    'p',
 | 
					    'p',
 | 
				
			||||||
 | 
					    'div',
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BLOCK_STYLES = [
 | 
					BLOCK_STYLES = [
 | 
				
			||||||
@ -188,7 +189,7 @@ class PMLMLizer(object):
 | 
				
			|||||||
            text = re.sub('\n{2,}', '\n', text)
 | 
					            text = re.sub('\n{2,}', '\n', text)
 | 
				
			||||||
            text = re.sub('(?imu)^(?P<text>.+)$', lambda mo: mo.group('text') if re.search(r'\\[XxCm]', mo.group('text')) else '    %s' % mo.group('text'), text)
 | 
					            text = re.sub('(?imu)^(?P<text>.+)$', lambda mo: mo.group('text') if re.search(r'\\[XxCm]', mo.group('text')) else '    %s' % mo.group('text'), text)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            text = re.sub('\n{4,}', '\n\n\n', text)
 | 
					            text = re.sub('\n{3,}', '\n\n', text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return text
 | 
					        return text
 | 
				
			||||||
@ -199,6 +200,7 @@ class PMLMLizer(object):
 | 
				
			|||||||
            return []
 | 
					            return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        text = []
 | 
					        text = []
 | 
				
			||||||
 | 
					        tags = []
 | 
				
			||||||
        style = stylizer.style(elem)
 | 
					        style = stylizer.style(elem)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
 | 
					        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
 | 
				
			||||||
@ -206,13 +208,14 @@ class PMLMLizer(object):
 | 
				
			|||||||
            return []
 | 
					            return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tag = barename(elem.tag)
 | 
					        tag = barename(elem.tag)
 | 
				
			||||||
        tag_count = 0
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Are we in a paragraph block?
 | 
					        # Are we in a paragraph block?
 | 
				
			||||||
        if tag in BLOCK_TAGS: # or style['display'] in BLOCK_STYLES:
 | 
					        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
 | 
				
			||||||
            if 'block' not in tag_stack:
 | 
					            if 'block' not in tag_stack+tags:
 | 
				
			||||||
                tag_count += 1
 | 
					                tags.append('block')
 | 
				
			||||||
                tag_stack.append('block')
 | 
					            else:
 | 
				
			||||||
 | 
					                # Start new block
 | 
				
			||||||
 | 
					                text.append('\n\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Process tags that need special processing and that do not have inner
 | 
					        # Process tags that need special processing and that do not have inner
 | 
				
			||||||
        # text. Usually these require an argument
 | 
					        # text. Usually these require an argument
 | 
				
			||||||
@ -245,14 +248,13 @@ class PMLMLizer(object):
 | 
				
			|||||||
        #    text.append('\\p')
 | 
					        #    text.append('\\p')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        pml_tag = TAG_MAP.get(tag, None)
 | 
					        pml_tag = TAG_MAP.get(tag, None)
 | 
				
			||||||
        if pml_tag and pml_tag not in tag_stack:
 | 
					        if pml_tag and pml_tag not in tag_stack+tags:
 | 
				
			||||||
            tag_count += 1
 | 
					 | 
				
			||||||
            text.append('\\%s' % pml_tag)
 | 
					            text.append('\\%s' % pml_tag)
 | 
				
			||||||
            tag_stack.append(pml_tag)
 | 
					            tags.append(pml_tag)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Special processing of tags that require an argument.
 | 
					        # Special processing of tags that require an argument.
 | 
				
			||||||
        # Anchors links
 | 
					        # Anchors links
 | 
				
			||||||
        if tag in LINK_TAGS and 'q' not in tag_stack:
 | 
					        if tag in LINK_TAGS and 'q' not in tag_stack+tags:
 | 
				
			||||||
            href = elem.get('href')
 | 
					            href = elem.get('href')
 | 
				
			||||||
            if href:
 | 
					            if href:
 | 
				
			||||||
                href = page.abshref(href)
 | 
					                href = page.abshref(href)
 | 
				
			||||||
@ -263,8 +265,7 @@ class PMLMLizer(object):
 | 
				
			|||||||
                        self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
 | 
					                        self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
 | 
				
			||||||
                    href = self.link_hrefs[href]
 | 
					                    href = self.link_hrefs[href]
 | 
				
			||||||
                    text.append('\\q="#%s"' % href)
 | 
					                    text.append('\\q="#%s"' % href)
 | 
				
			||||||
                tag_count += 1
 | 
					                tags.append('q')
 | 
				
			||||||
                tag_stack.append('q')
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Anchor ids
 | 
					        # Anchor ids
 | 
				
			||||||
        id_name = elem.get('id')
 | 
					        id_name = elem.get('id')
 | 
				
			||||||
@ -274,10 +275,9 @@ class PMLMLizer(object):
 | 
				
			|||||||
        # Processes style information
 | 
					        # Processes style information
 | 
				
			||||||
        for s in STYLES:
 | 
					        for s in STYLES:
 | 
				
			||||||
            style_tag = s[1].get(style[s[0]], None)
 | 
					            style_tag = s[1].get(style[s[0]], None)
 | 
				
			||||||
            if style_tag and style_tag not in tag_stack:
 | 
					            if style_tag and style_tag not in tag_stack+tags:
 | 
				
			||||||
                tag_count += 1
 | 
					 | 
				
			||||||
                text.append('\\%s' % style_tag)
 | 
					                text.append('\\%s' % style_tag)
 | 
				
			||||||
                tag_stack.append(style_tag)
 | 
					                tags.append(style_tag)
 | 
				
			||||||
        # margin
 | 
					        # margin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Proccess tags that contain text.
 | 
					        # Proccess tags that contain text.
 | 
				
			||||||
@ -285,16 +285,15 @@ class PMLMLizer(object):
 | 
				
			|||||||
            text.append(self.remove_newlines(elem.text))
 | 
					            text.append(self.remove_newlines(elem.text))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for item in elem:
 | 
					        for item in elem:
 | 
				
			||||||
            text += self.dump_text(item, stylizer, page, tag_stack)
 | 
					            text += self.dump_text(item, stylizer, page, tag_stack+tags)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        tags.reverse()
 | 
				
			||||||
 | 
					        text += self.close_tags(tags)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        close_tag_list = []
 | 
					 | 
				
			||||||
        for i in range(0, tag_count):
 | 
					 | 
				
			||||||
            close_tag_list.insert(0, tag_stack.pop())
 | 
					 | 
				
			||||||
        text += self.close_tags(close_tag_list)
 | 
					 | 
				
			||||||
        if tag in SEPARATE_TAGS:
 | 
					        if tag in SEPARATE_TAGS:
 | 
				
			||||||
            text.append('\n\n')
 | 
					            text.append('\n\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if 'block' not in tag_stack:
 | 
					        if 'block' not in tag_stack+tags:
 | 
				
			||||||
            text.append('\n\n')
 | 
					            text.append('\n\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        #if style['page-break-after'] == 'always':
 | 
					        #if style['page-break-after'] == 'always':
 | 
				
			||||||
 | 
				
			|||||||
@ -102,7 +102,7 @@ class TXTMLizer(object):
 | 
				
			|||||||
            text = re.sub('\n{2,}', '\n', text)
 | 
					            text = re.sub('\n{2,}', '\n', text)
 | 
				
			||||||
            text = re.sub('(?imu)^(?=.)', '\t', text)
 | 
					            text = re.sub('(?imu)^(?=.)', '\t', text)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            text = re.sub('\n{4,}', '\n\n\n', text)
 | 
					            text = re.sub('\n{3,}', '\n\n', text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Replace spaces at the beginning and end of lines
 | 
					        # Replace spaces at the beginning and end of lines
 | 
				
			||||||
        text = re.sub('(?imu)^[ ]+', '', text)
 | 
					        text = re.sub('(?imu)^[ ]+', '', text)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user