mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Fix #4371 (Conversion to FB2)
This commit is contained in:
commit
1661dbf0ce
@ -32,12 +32,9 @@ TAG_MAP = {
|
|||||||
'p' : 'p',
|
'p' : 'p',
|
||||||
'li' : 'p',
|
'li' : 'p',
|
||||||
'div': 'p',
|
'div': 'p',
|
||||||
|
'br' : 'p',
|
||||||
}
|
}
|
||||||
|
|
||||||
TAG_FORCE_P = [
|
|
||||||
'br',
|
|
||||||
]
|
|
||||||
|
|
||||||
TAG_SPACE = []
|
TAG_SPACE = []
|
||||||
|
|
||||||
TAG_IMAGES = [
|
TAG_IMAGES = [
|
||||||
@ -48,6 +45,10 @@ TAG_LINKS = [
|
|||||||
'a',
|
'a',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
BLOCK = [
|
||||||
|
'p',
|
||||||
|
]
|
||||||
|
|
||||||
STYLES = [
|
STYLES = [
|
||||||
('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
|
('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
|
||||||
('font-style', {'italic' : 'emphasis'}),
|
('font-style', {'italic' : 'emphasis'}),
|
||||||
@ -240,7 +241,8 @@ class FB2MLizer(object):
|
|||||||
if id_name:
|
if id_name:
|
||||||
fb2_text.append(self.get_anchor(page, id_name))
|
fb2_text.append(self.get_anchor(page, id_name))
|
||||||
|
|
||||||
if tag in TAG_FORCE_P:
|
fb2_tag = TAG_MAP.get(tag, None)
|
||||||
|
if fb2_tag == 'p':
|
||||||
if 'p' in tag_stack+tags:
|
if 'p' in tag_stack+tags:
|
||||||
# Close all up to p. Close p. Reopen all closed tags including p.
|
# Close all up to p. Close p. Reopen all closed tags including p.
|
||||||
all_tags = tag_stack+tags
|
all_tags = tag_stack+tags
|
||||||
@ -257,9 +259,7 @@ class FB2MLizer(object):
|
|||||||
else:
|
else:
|
||||||
fb2_text.append('<p>')
|
fb2_text.append('<p>')
|
||||||
tags.append('p')
|
tags.append('p')
|
||||||
|
elif fb2_tag and fb2_tag not in tag_stack+tags:
|
||||||
fb2_tag = TAG_MAP.get(tag, None)
|
|
||||||
if fb2_tag and fb2_tag not in tag_stack+tags:
|
|
||||||
fb2_text.append('<%s>' % fb2_tag)
|
fb2_text.append('<%s>' % fb2_tag)
|
||||||
tags.append(fb2_tag)
|
tags.append(fb2_tag)
|
||||||
|
|
||||||
|
@ -42,6 +42,7 @@ STYLES = [
|
|||||||
|
|
||||||
BLOCK_TAGS = [
|
BLOCK_TAGS = [
|
||||||
'p',
|
'p',
|
||||||
|
'div',
|
||||||
]
|
]
|
||||||
|
|
||||||
BLOCK_STYLES = [
|
BLOCK_STYLES = [
|
||||||
@ -188,7 +189,7 @@ class PMLMLizer(object):
|
|||||||
text = re.sub('\n{2,}', '\n', text)
|
text = re.sub('\n{2,}', '\n', text)
|
||||||
text = re.sub('(?imu)^(?P<text>.+)$', lambda mo: mo.group('text') if re.search(r'\\[XxCm]', mo.group('text')) else ' %s' % mo.group('text'), text)
|
text = re.sub('(?imu)^(?P<text>.+)$', lambda mo: mo.group('text') if re.search(r'\\[XxCm]', mo.group('text')) else ' %s' % mo.group('text'), text)
|
||||||
else:
|
else:
|
||||||
text = re.sub('\n{4,}', '\n\n\n', text)
|
text = re.sub('\n{3,}', '\n\n', text)
|
||||||
|
|
||||||
|
|
||||||
return text
|
return text
|
||||||
@ -199,6 +200,7 @@ class PMLMLizer(object):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
text = []
|
text = []
|
||||||
|
tags = []
|
||||||
style = stylizer.style(elem)
|
style = stylizer.style(elem)
|
||||||
|
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
@ -206,13 +208,14 @@ class PMLMLizer(object):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
tag = barename(elem.tag)
|
tag = barename(elem.tag)
|
||||||
tag_count = 0
|
|
||||||
|
|
||||||
# Are we in a paragraph block?
|
# Are we in a paragraph block?
|
||||||
if tag in BLOCK_TAGS: # or style['display'] in BLOCK_STYLES:
|
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
||||||
if 'block' not in tag_stack:
|
if 'block' not in tag_stack+tags:
|
||||||
tag_count += 1
|
tags.append('block')
|
||||||
tag_stack.append('block')
|
else:
|
||||||
|
# Start new block
|
||||||
|
text.append('\n\n')
|
||||||
|
|
||||||
# Process tags that need special processing and that do not have inner
|
# Process tags that need special processing and that do not have inner
|
||||||
# text. Usually these require an argument
|
# text. Usually these require an argument
|
||||||
@ -245,14 +248,13 @@ class PMLMLizer(object):
|
|||||||
# text.append('\\p')
|
# text.append('\\p')
|
||||||
|
|
||||||
pml_tag = TAG_MAP.get(tag, None)
|
pml_tag = TAG_MAP.get(tag, None)
|
||||||
if pml_tag and pml_tag not in tag_stack:
|
if pml_tag and pml_tag not in tag_stack+tags:
|
||||||
tag_count += 1
|
|
||||||
text.append('\\%s' % pml_tag)
|
text.append('\\%s' % pml_tag)
|
||||||
tag_stack.append(pml_tag)
|
tags.append(pml_tag)
|
||||||
|
|
||||||
# Special processing of tags that require an argument.
|
# Special processing of tags that require an argument.
|
||||||
# Anchors links
|
# Anchors links
|
||||||
if tag in LINK_TAGS and 'q' not in tag_stack:
|
if tag in LINK_TAGS and 'q' not in tag_stack+tags:
|
||||||
href = elem.get('href')
|
href = elem.get('href')
|
||||||
if href:
|
if href:
|
||||||
href = page.abshref(href)
|
href = page.abshref(href)
|
||||||
@ -263,8 +265,7 @@ class PMLMLizer(object):
|
|||||||
self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
||||||
href = self.link_hrefs[href]
|
href = self.link_hrefs[href]
|
||||||
text.append('\\q="#%s"' % href)
|
text.append('\\q="#%s"' % href)
|
||||||
tag_count += 1
|
tags.append('q')
|
||||||
tag_stack.append('q')
|
|
||||||
|
|
||||||
# Anchor ids
|
# Anchor ids
|
||||||
id_name = elem.get('id')
|
id_name = elem.get('id')
|
||||||
@ -274,10 +275,9 @@ class PMLMLizer(object):
|
|||||||
# Processes style information
|
# Processes style information
|
||||||
for s in STYLES:
|
for s in STYLES:
|
||||||
style_tag = s[1].get(style[s[0]], None)
|
style_tag = s[1].get(style[s[0]], None)
|
||||||
if style_tag and style_tag not in tag_stack:
|
if style_tag and style_tag not in tag_stack+tags:
|
||||||
tag_count += 1
|
|
||||||
text.append('\\%s' % style_tag)
|
text.append('\\%s' % style_tag)
|
||||||
tag_stack.append(style_tag)
|
tags.append(style_tag)
|
||||||
# margin
|
# margin
|
||||||
|
|
||||||
# Proccess tags that contain text.
|
# Proccess tags that contain text.
|
||||||
@ -285,16 +285,15 @@ class PMLMLizer(object):
|
|||||||
text.append(self.remove_newlines(elem.text))
|
text.append(self.remove_newlines(elem.text))
|
||||||
|
|
||||||
for item in elem:
|
for item in elem:
|
||||||
text += self.dump_text(item, stylizer, page, tag_stack)
|
text += self.dump_text(item, stylizer, page, tag_stack+tags)
|
||||||
|
|
||||||
|
tags.reverse()
|
||||||
|
text += self.close_tags(tags)
|
||||||
|
|
||||||
close_tag_list = []
|
|
||||||
for i in range(0, tag_count):
|
|
||||||
close_tag_list.insert(0, tag_stack.pop())
|
|
||||||
text += self.close_tags(close_tag_list)
|
|
||||||
if tag in SEPARATE_TAGS:
|
if tag in SEPARATE_TAGS:
|
||||||
text.append('\n\n')
|
text.append('\n\n')
|
||||||
|
|
||||||
if 'block' not in tag_stack:
|
if 'block' not in tag_stack+tags:
|
||||||
text.append('\n\n')
|
text.append('\n\n')
|
||||||
|
|
||||||
#if style['page-break-after'] == 'always':
|
#if style['page-break-after'] == 'always':
|
||||||
|
@ -102,7 +102,7 @@ class TXTMLizer(object):
|
|||||||
text = re.sub('\n{2,}', '\n', text)
|
text = re.sub('\n{2,}', '\n', text)
|
||||||
text = re.sub('(?imu)^(?=.)', '\t', text)
|
text = re.sub('(?imu)^(?=.)', '\t', text)
|
||||||
else:
|
else:
|
||||||
text = re.sub('\n{4,}', '\n\n\n', text)
|
text = re.sub('\n{3,}', '\n\n', text)
|
||||||
|
|
||||||
# Replace spaces at the beginning and end of lines
|
# Replace spaces at the beginning and end of lines
|
||||||
text = re.sub('(?imu)^[ ]+', '', text)
|
text = re.sub('(?imu)^[ ]+', '', text)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user