mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Output: Fix stylizer so it works. Fix handling soft scene breaks created by top margins. FB2 Output: Handle soft scene breaks created by empty paragraphs and top margins. Stylizer: Enable use of lineHeight property.
This commit is contained in:
parent
5b244ac857
commit
7ceeef2a3b
@ -71,19 +71,28 @@ class FB2MLizer(object):
|
||||
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
|
||||
|
||||
def clean_text(self, text):
|
||||
# Condense empty paragraphs into a line break.
|
||||
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<p><empty-line /></p>', text)
|
||||
# Remove empty paragraphs.
|
||||
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
|
||||
# Clean up pargraph endings.
|
||||
text = re.sub(r'(?miu)\s*</p>', '</p>', text)
|
||||
# Put paragraphs following a paragraph on a separate line.
|
||||
text = re.sub(r'(?miu)</p>\s*<p>', '</p>\n\n<p>', text)
|
||||
|
||||
# Remove empty title elements.
|
||||
text = re.sub(r'(?miu)<title>\s*</title>', '', text)
|
||||
text = re.sub(r'(?miu)\s+</title>', '</title>', text)
|
||||
|
||||
# Remove empty sections.
|
||||
text = re.sub(r'(?miu)<section>\s*</section>', '', text)
|
||||
# Clean up sections start and ends.
|
||||
text = re.sub(r'(?miu)\s*</section>', '\n</section>', text)
|
||||
text = re.sub(r'(?miu)</section>\s*', '</section>\n\n', text)
|
||||
text = re.sub(r'(?miu)\s*<section>', '\n<section>', text)
|
||||
text = re.sub(r'(?miu)<section>\s*', '<section>\n', text)
|
||||
text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
|
||||
# Put sectnions followed by sections on a separate line.
|
||||
text = re.sub(r'(?miu)</section>\s*<section>', '</section>\n\n<section>', text)
|
||||
|
||||
if self.opts.insert_blank_line:
|
||||
text = re.sub(r'(?miu)</p>', '</p><empty-line />', text)
|
||||
@ -338,6 +347,11 @@ class FB2MLizer(object):
|
||||
tags = []
|
||||
# First tag in tree
|
||||
tag = barename(elem_tree.tag)
|
||||
# Number of blank lines above tag
|
||||
try:
|
||||
ems = int(round((float(style.marginTop) / style.fontSize) - 1))
|
||||
except:
|
||||
ems = 0
|
||||
|
||||
# Convert TOC entries to <title>s and add <section>s
|
||||
if self.opts.sectionize == 'toc':
|
||||
@ -370,7 +384,9 @@ class FB2MLizer(object):
|
||||
fb2_out.append('<section>')
|
||||
self.section_level += 1
|
||||
|
||||
# Process the XHTML tag if it needs to be converted to an FB2 tag.
|
||||
# Process the XHTML tag and styles. Converted to an FB2 tag.
|
||||
# Use individual if statement not if else. There can be
|
||||
# only one XHTML tag but it can have multiple styles.
|
||||
if tag == 'img':
|
||||
if elem_tree.attrib.get('src', None):
|
||||
# Only write the image tag if it is in the manifest.
|
||||
@ -381,7 +397,11 @@ class FB2MLizer(object):
|
||||
fb2_out += p_txt
|
||||
tags += p_tag
|
||||
fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
|
||||
elif tag == 'br':
|
||||
if tag in ('br', 'hr') or ems:
|
||||
if not ems:
|
||||
multiplier = 1
|
||||
else:
|
||||
multiplier = ems
|
||||
if self.in_p:
|
||||
closed_tags = []
|
||||
open_tags = tag_stack+tags
|
||||
@ -391,52 +411,38 @@ class FB2MLizer(object):
|
||||
closed_tags.append(t)
|
||||
if t == 'p':
|
||||
break
|
||||
fb2_out.append('<empty-line />')
|
||||
fb2_out.append('<empty-line />' * multiplier)
|
||||
closed_tags.reverse()
|
||||
for t in closed_tags:
|
||||
fb2_out.append('<%s>' % t)
|
||||
else:
|
||||
fb2_out.append('<empty-line />')
|
||||
elif tag in ('div', 'li', 'p'):
|
||||
fb2_out.append('<empty-line />' * multiplier)
|
||||
if tag in ('div', 'li', 'p'):
|
||||
p_text, added_p = self.close_open_p(tag_stack+tags)
|
||||
fb2_out += p_text
|
||||
if added_p:
|
||||
tags.append('p')
|
||||
elif tag == 'b':
|
||||
if tag == 'b' or style['font-weight'] in ('bold', 'bolder'):
|
||||
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
|
||||
fb2_out += s_out
|
||||
tags += s_tags
|
||||
elif tag == 'i':
|
||||
if tag == 'i' or style['font-style'] == 'italic':
|
||||
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
|
||||
fb2_out += s_out
|
||||
tags += s_tags
|
||||
elif tag in ('del', 'strike'):
|
||||
if tag in ('del', 'strike') or style['text-decoration'] == 'line-through':
|
||||
s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags)
|
||||
fb2_out += s_out
|
||||
tags += s_tags
|
||||
elif tag == 'sub':
|
||||
if tag == 'sub':
|
||||
s_out, s_tags = self.handle_simple_tag('sub', tag_stack+tags)
|
||||
fb2_out += s_out
|
||||
tags += s_tags
|
||||
elif tag == 'sup':
|
||||
if tag == 'sup':
|
||||
s_out, s_tags = self.handle_simple_tag('sup', tag_stack+tags)
|
||||
fb2_out += s_out
|
||||
tags += s_tags
|
||||
|
||||
# Processes style information.
|
||||
if style['font-style'] == 'italic':
|
||||
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
|
||||
fb2_out += s_out
|
||||
tags += s_tags
|
||||
elif style['font-weight'] in ('bold', 'bolder'):
|
||||
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
|
||||
fb2_out += s_out
|
||||
tags += s_tags
|
||||
elif style['text-decoration'] == 'line-through':
|
||||
s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags)
|
||||
fb2_out += s_out
|
||||
tags += s_tags
|
||||
|
||||
# Process element text.
|
||||
if hasattr(elem_tree, 'text') and elem_tree.text:
|
||||
if not self.in_p:
|
||||
|
@ -633,7 +633,7 @@ class Style(object):
|
||||
def lineHeight(self):
|
||||
if self._lineHeight is None:
|
||||
result = None
|
||||
parent = self._getparent()
|
||||
#parent = self._getparent()
|
||||
if 'line-height' in self._style:
|
||||
lineh = self._style['line-height']
|
||||
if lineh == 'normal':
|
||||
@ -642,9 +642,9 @@ class Style(object):
|
||||
result = float(lineh) * self.fontSize
|
||||
except ValueError:
|
||||
result = self._unit_convert(lineh, base=self.fontSize)
|
||||
elif parent is not None:
|
||||
# TODO: proper inheritance
|
||||
result = parent.lineHeight
|
||||
#elif parent is not None:
|
||||
# # TODO: proper inheritance
|
||||
# result = parent.lineHeight
|
||||
else:
|
||||
result = 1.2 * self.fontSize
|
||||
self._lineHeight = result
|
||||
|
@ -67,10 +67,11 @@ class TXTMLizer(object):
|
||||
output.append(self.get_toc())
|
||||
for item in self.oeb_book.spine:
|
||||
self.log.debug('Converting %s to TXT...' % item.href)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
||||
content = unicode(etree.tostring(item.data, encoding=unicode))
|
||||
content = self.remove_newlines(content)
|
||||
output += self.dump_text(etree.fromstring(content), stylizer, item)
|
||||
content = etree.fromstring(content)
|
||||
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
output += self.dump_text(content.find(XHTML('body')), stylizer, item)
|
||||
output += '\n\n\n\n\n\n'
|
||||
output = u''.join(output)
|
||||
output = u'\n'.join(l.rstrip() for l in output.splitlines())
|
||||
@ -219,11 +220,16 @@ class TXTMLizer(object):
|
||||
if tag in SPACE_TAGS:
|
||||
text.append(u' ')
|
||||
|
||||
# Scene breaks.
|
||||
# Hard scene breaks.
|
||||
if tag == 'hr':
|
||||
text.append('\n\n* * *\n\n')
|
||||
elif style['margin-top']:
|
||||
text.append('\n\n' + '\n' * round(style['margin-top']))
|
||||
# Soft scene breaks.
|
||||
try:
|
||||
ems = int(round((float(style.marginTop) / style.fontSize) - 1))
|
||||
if ems:
|
||||
text.append('\n' * ems)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Process tags that contain text.
|
||||
if hasattr(elem, 'text') and elem.text:
|
||||
|
Loading…
x
Reference in New Issue
Block a user