This commit is contained in:
John Schember 2011-05-01 10:24:56 -04:00
parent 05331d7f05
commit 8853f6c146
2 changed files with 24 additions and 15 deletions

View File

@ -66,6 +66,13 @@ class TXTOutput(OutputFormatPlugin):
help=_('Do not remove image references within the document. This is only ' \
'useful when paired with a txt-output-formatting option that '
'is not none because links are always removed with plain text output.')),
OptionRecommendation(name='keep_color',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Do not remove font color from output. This is only useful when ' \
'txt-output-formatting is set to textile. Textile is the only ' \
'formatting that supports setting font color. If this option is ' \
'not specified font color will not be set and default to the ' \
'color displayed by the reader (generally this is black).')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
@ -111,9 +118,12 @@ class TXTZOutput(TXTOutput):
from calibre.ebooks.oeb.base import OEB_IMAGES
with TemporaryDirectory('_txtz_output') as tdir:
# TXT
with TemporaryFile('index.txt') as tf:
txt_name = 'index.txt'
if opts.txt_output_formatting.lower() == 'textile':
txt_name = 'index.text'
with TemporaryFile(txt_name) as tf:
TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log)
shutil.copy(tf, os.path.join(tdir, 'index.txt'))
shutil.copy(tf, os.path.join(tdir, txt_name))
# Images
for item in oeb_book.manifest:

View File

@ -98,7 +98,7 @@ class TextileMLizer(OEB2HTML):
text = re.sub(u'%\n(p[<>=]{1,2}\.)', r'%\n\n\1', text)
text = re.sub(u'p[<>=]{1,2}\.\n\n?', r'', text)
text = re.sub(r'\n(p.*\.\n)(p.*\.)', r'\n\2', text)
text = re.sub(u'\np.*\.\xa0', r'\np. ', text) # blank paragraph
text = re.sub(r'(^|\n)p\.\n', r'\1p. \n', text) # blank paragraph
text = re.sub(u'\n\xa0', r'\np. ', text) # blank paragraph
text = re.sub(r' {2,}\|', r' |', text) #sort out spaces in tables
# Now put back spaces removed earlier as they're needed here
@ -176,6 +176,11 @@ class TextileMLizer(OEB2HTML):
txt += self.check_styles(style)
return txt
def prepare_string_for_textile(self, txt):
if re.search(r'(\s([*&_+\-=~@%|]|\?{2}))|(([*&_+\-=~@%|]|\?{2})\s)', txt):
return ' ==%s== ' % txt
return txt
def dump_text(self, elem, stylizer):
'''
@elem: The element in the etree that we are working on.
@ -197,7 +202,7 @@ class TextileMLizer(OEB2HTML):
tags = []
tag = barename(elem.tag)
attribs = elem.attrib
# Ignore anything that is set to not be displayed.
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
@ -209,15 +214,9 @@ class TextileMLizer(OEB2HTML):
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div'):
if tag == 'div':
tag = 'p'
block = self.build_block(tag, style, attribs)
# Normal paragraph with no styling.
if block == '\np':
text.append('\n\n')
tags.append('\n')
else:
text.append(block)
text.append('. ')
tags.append('\n')
text.append(self.build_block(tag, style, attribs))
text.append('. ')
tags.append('\n')
#self.style_embed = []
if style['font-style'] == 'italic' or tag in ('i', 'em'):
@ -393,7 +392,7 @@ class TextileMLizer(OEB2HTML):
if hasattr(elem, 'text') and elem.text:
txt = elem.text
if not self.in_pre:
txt = self.remove_newlines(txt)
txt = self.prepare_string_for_textile(self.remove_newlines(txt))
text.append(txt)
self.id_no_text = u''
@ -439,7 +438,7 @@ class TextileMLizer(OEB2HTML):
if hasattr(elem, 'tail') and elem.tail:
tail = elem.tail
if not self.in_pre:
tail = self.remove_newlines(tail)
tail = self.prepare_string_for_textile(self.remove_newlines(tail))
text.append(tail)
return text