From 90362ab56ae0594651571117c0e934e108c7b877 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 31 Mar 2009 18:41:49 -0400 Subject: [PATCH] txt output now uses new conversion pipeline --- src/calibre/customize/builtins.py | 5 +- src/calibre/ebooks/conversion/plumber.py | 2 +- src/calibre/ebooks/metadata/txt.py | 2 +- src/calibre/ebooks/txt/from_any.py | 74 ------------- src/calibre/ebooks/txt/output.py | 62 +++++++++++ src/calibre/ebooks/txt/writer.py | 130 ++++------------------- 6 files changed, 90 insertions(+), 185 deletions(-) delete mode 100644 src/calibre/ebooks/txt/from_any.py create mode 100644 src/calibre/ebooks/txt/output.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 2cbf036c1f..acc7ba71ec 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -160,7 +160,7 @@ class ODTMetadataReader(MetadataReaderPlugin): from calibre.ebooks.metadata.odt import get_metadata return get_metadata(stream) -class TXTMetadataReader(MetaReaderPlugin): +class TXTMetadataReader(MetadataReaderPlugin): name = 'Read TXT metadata' file_types = set(['txt']) @@ -266,9 +266,10 @@ class MOBIMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.mobi.input import MOBIInput from calibre.ebooks.oeb.output import OEBOutput +from calibre.ebooks.txt.output import TXTOutput from calibre.customize.profiles import input_profiles, output_profiles -plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput] +plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput, TXTOutput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 5393aaf034..da41423750 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -195,7 +195,7 @@ OptionRecommendation(name='language', self.input_fmt = input_fmt self.output_fmt = output_fmt - # Build set of all possible options. Two options are equal iff their + # Build set of all possible options. Two options are equal if their # names are the same. self.input_options = self.input_plugin.options.union( self.input_plugin.common_options) diff --git a/src/calibre/ebooks/metadata/txt.py b/src/calibre/ebooks/metadata/txt.py index 5a5ab13ae9..6283c72256 100644 --- a/src/calibre/ebooks/metadata/txt.py +++ b/src/calibre/ebooks/metadata/txt.py @@ -22,7 +22,7 @@ def get_metadata(stream, extract_cover=True): else: mdata += line - mo = re.search('(?u)^[ ]*(?P.+)[ ]*\n\n\n[ ]*(?P<author>.+)[ ]*\n$', mdata) + mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) if mo != None: mi.title = mo.group('title') mi.authors = mo.group('author').split(',') diff --git a/src/calibre/ebooks/txt/from_any.py b/src/calibre/ebooks/txt/from_any.py deleted file mode 100644 index caf5364c3c..0000000000 --- a/src/calibre/ebooks/txt/from_any.py +++ /dev/null @@ -1,74 +0,0 @@ -''' -Convert any ebook format to TXT. -''' - -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \ - 'and Marshall T. Vandegrift <llasram@gmail.com>' \ - 'and John Schember <john@nachtimwald.com>' -__docformat__ = 'restructuredtext en' - -import sys, os, glob, logging - -from calibre.ebooks.epub.from_any import any2epub, formats, USAGE -from calibre.ebooks.epub import config as common_config -from calibre.ptempfile import TemporaryDirectory -from calibre.ebooks.txt.writer import oeb2txt, config as txt_config - -def config(defaults=None): - c = common_config(defaults=defaults, name='txt') - c.remove_opt('profile') - del c.option_set.groups['metadata'] - del c.option_set.groups['traversal'] - del c.option_set.groups['structure detection'] - del c.option_set.groups['toc'] - del c.option_set.groups['page layout'] - txtc = txt_config(defaults=defaults) - c.update(txtc) - return c - -def option_parser(usage=USAGE): - usage = usage % ('TXT', formats()) - parser = config().option_parser(usage=usage) - return parser - -def any2txt(opts, path, notification=None): - ext = os.path.splitext(path)[1] - if not ext: - raise ValueError('Unknown file type: '+path) - ext = ext.lower()[1:] - - if opts.output is None: - opts.output = os.path.splitext(os.path.basename(path))[0]+'.txt' - - opts.output = os.path.abspath(opts.output) - orig_output = opts.output - - with TemporaryDirectory('_any2txt') as tdir: - oebdir = os.path.join(tdir, 'oeb') - os.mkdir(oebdir) - opts.output = os.path.join(tdir, 'dummy.epub') - opts.profile = 'None' - opts.dont_split_on_page_breaks = True - orig_bfs = opts.base_font_size2 - opts.base_font_size2 = 0 - any2epub(opts, path, create_epub=False, oeb_cover=False, extract_to=oebdir) - opts.base_font_size2 = orig_bfs - opf = glob.glob(os.path.join(oebdir, '*.opf'))[0] - opts.output = orig_output - logging.getLogger('html2epub').info(_('Creating TXT file from EPUB...')) - oeb2txt(opts, opf) - -def main(args=sys.argv): - parser = option_parser() - opts, args = parser.parse_args(args) - if len(args) < 2: - parser.print_help() - print 'No input file specified.' - return 1 - any2txt(opts, args[1]) - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py new file mode 100644 index 0000000000..21498074ac --- /dev/null +++ b/src/calibre/ebooks/txt/output.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +import os + +from calibre.customize.conversion import OutputFormatPlugin, \ + OptionRecommendation +from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata +from calibre.ebooks.metadata import authors_to_string + +class TXTOutput(OutputFormatPlugin): + + name = 'TXT Output' + author = 'John Schember' + file_type = 'txt' + + options = set([ + OptionRecommendation(name='newline', recommended_value='system', + level=OptionRecommendation.LOW, long_switch='newline', + short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(), + help=_('Type of newline to use. Options are %s. Default is \'system\'. ' + 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' + 'For Mac OS X use \'unix\'. \'system\' will default to the newline ' + 'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))), + OptionRecommendation(name='prepend_author', recommended_value='true', + level=OptionRecommendation.LOW, long_switch='prepend_author', + choices=['true', 'false'], + help=_('Write the author to the beginning of the file. ' + 'Default is \'true\'. Use \'false\' to disable.')), + OptionRecommendation(name='prepend_title', recommended_value='true', + choices=['true', 'false'], + level=OptionRecommendation.LOW, long_switch='prepend_title', + help=_('Write the title to the beginning of the file. ' + 'Default is \'true\'. Use \'false\' to disable.')) + ]) + + def convert(self, oeb_book, output_path, input_plugin, opts, log): + metadata = TxtMetadata() + if opts.prepend_author.lower() == 'true': + metadata.author = opts.authors if opts.authors else authors_to_string(oeb_book.metadata.authors) + if opts.prepend_title.lower() == 'true': + metadata.title = opts.title if opts.title else oeb_book.metadata.title + + writer = TxtWriter(TxtNewlines(opts.newline).newline, log) + txt = writer.dump(oeb_book.spine, metadata) + + close = False + if not hasattr(output_path, 'write'): + close = True + if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '': + os.makedirs(os.path.dirname(output_path)) + out_stream = open(output_path, 'wb') + else: + out_stream = output_path + + out_stream.seek(0) + out_stream.write(txt) + + if close: + out_stream.close() diff --git a/src/calibre/ebooks/txt/writer.py b/src/calibre/ebooks/txt/writer.py index 205d8423e3..eabc2d64ed 100644 --- a/src/calibre/ebooks/txt/writer.py +++ b/src/calibre/ebooks/txt/writer.py @@ -1,34 +1,26 @@ # -*- coding: utf-8 -*- +from __future__ import with_statement ''' Write content to TXT. ''' -from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' -import os, logging, re, sys +import os, re, sys + +from calibre.ebooks.htmlsymbols import HTML_SYMBOLS from BeautifulSoup import BeautifulSoup -from calibre import LoggingInterface -from calibre.ebooks.htmlsymbols import HTML_SYMBOLS -from calibre.ebooks.epub.iterator import SpineItem -from calibre.ebooks.metadata import authors_to_string -from calibre.ebooks.metadata.meta import metadata_from_formats -from calibre.ebooks.metadata.opf2 import OPF -from calibre.customize.ui import run_plugins_on_postprocess -from calibre.utils.config import Config, StringConfig - -class TXTWriter(object): - def __init__(self, newline): +class TxtWriter(object): + def __init__(self, newline, log): self.newline = newline + self.log = log - def dump(self, oebpath, path, metadata): - opf = OPF(oebpath, os.path.dirname(oebpath)) - spine = [SpineItem(i.path) for i in opf.spine] - - tmpout = '' + def dump(self, spine, metadata): + out = u'' for item in spine: with open(item, 'r') as itemf: content = itemf.read().decode(item.encoding) @@ -39,25 +31,21 @@ class TXTWriter(object): content = self.replace_html_symbols(content) content = self.cleanup_text(content) content = self.specified_newlines(content) - tmpout = tmpout + content + out += content # Prepend metadata if metadata.author != None and metadata.author != '': - tmpout = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + tmpout + out = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + out if metadata.title != None and metadata.title != '': - tmpout = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + tmpout + out = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + out # Put two blank lines at end of file - - end = tmpout[-3 * len(self.newline):] + end = out[-3 * len(self.newline):] for i in range(3 - end.count(self.newline)): - tmpout = tmpout + self.newline + out += self.newline + + return out - if os.path.exists(path): - os.remove(path) - with open(path, 'w+b') as out: - out.write(tmpout.encode('utf-8')) - def strip_html(self, html): stripped = u'' @@ -149,14 +137,8 @@ class TXTWriter(object): if self.newline == '\n': return text - return text.replace('\n', self.newline) - -class TxtMetadata(object): - def __init__(self): - self.author = None - self.title = None - self.series = None - + return text.replace('\n', self.newline) + class TxtNewlines(object): NEWLINE_TYPES = { @@ -170,73 +152,7 @@ class TxtNewlines(object): self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep) -def config(defaults=None): - desc = _('Options to control the conversion to TXT') - if defaults is None: - c = Config('txt', desc) - else: - c = StringConfig(defaults, desc) - - txt = c.add_group('TXT', _('TXT options.')) - - txt('newline', ['--newline'], default='system', - help=_('Type of newline to use. Options are %s. Default is \'system\'. ' - 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' - 'For Mac OS X use \'unix\'. \'system\' will default to the newline ' - 'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))) - txt('prepend_author', ['--prepend-author'], default='true', - help=_('Write the author to the beginning of the file. ' - 'Default is \'true\'. Use \'false\' to disable.')) - txt('prepend_title', ['--prepend-title'], default='true', - help=_('Write the title to the beginning of the file. ' - 'Default is \'true\'. Use \'false\' to disable.')) - - return c - -def option_parser(): - c = config() - parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf') - parser.add_option( - '-o', '--output', default=None, - help=_('Output file. Default is derived from input filename.')) - parser.add_option( - '-v', '--verbose', default=0, action='count', - help=_('Useful for debugging.')) - return parser - -def oeb2txt(opts, inpath): - logger = LoggingInterface(logging.getLogger('oeb2txt')) - logger.setup_cli_handler(opts.verbose) - - outpath = opts.output - if outpath is None: - outpath = os.path.basename(inpath) - outpath = os.path.splitext(outpath)[0] + '.txt' - - mi = metadata_from_formats([inpath]) - metadata = TxtMetadata() - if opts.prepend_author.lower() == 'true': - metadata.author = opts.authors if opts.authors else authors_to_string(mi.authors) - if opts.prepend_title.lower() == 'true': - metadata.title = opts.title if opts.title else mi.title - - newline = TxtNewlines(opts.newline) - - writer = TXTWriter(newline.newline) - writer.dump(inpath, outpath, metadata) - run_plugins_on_postprocess(outpath, 'txt') - logger.log_info(_('Output written to ') + outpath) - -def main(argv=sys.argv): - parser = option_parser() - opts, args = parser.parse_args(argv[1:]) - if len(args) != 1: - parser.print_help() - return 1 - inpath = args[0] - retval = oeb2txt(opts, inpath) - return retval - -if __name__ == '__main__': - sys.exit(main()) - +class TxtMetadata(object): + def __init__(self): + self.title = None + self.author = None