From 90362ab56ae0594651571117c0e934e108c7b877 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 31 Mar 2009 18:41:49 -0400
Subject: [PATCH] txt output now uses new conversion pipeline

---
 src/calibre/customize/builtins.py        |   5 +-
 src/calibre/ebooks/conversion/plumber.py |   2 +-
 src/calibre/ebooks/metadata/txt.py       |   2 +-
 src/calibre/ebooks/txt/from_any.py       |  74 -------------
 src/calibre/ebooks/txt/output.py         |  62 +++++++++++
 src/calibre/ebooks/txt/writer.py         | 130 ++++-------------------
 6 files changed, 90 insertions(+), 185 deletions(-)
 delete mode 100644 src/calibre/ebooks/txt/from_any.py
 create mode 100644 src/calibre/ebooks/txt/output.py
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 2cbf036c1f..acc7ba71ec 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -160,7 +160,7 @@ class ODTMetadataReader(MetadataReaderPlugin):
         from calibre.ebooks.metadata.odt import get_metadata
         return get_metadata(stream)
         
-class TXTMetadataReader(MetaReaderPlugin):
+class TXTMetadataReader(MetadataReaderPlugin):
     
     name        = 'Read TXT metadata'
     file_types  = set(['txt'])
@@ -266,9 +266,10 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.oeb.output import OEBOutput
+from calibre.ebooks.txt.output import TXTOutput
 from calibre.customize.profiles import input_profiles, output_profiles
 
-plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput]
+plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput, TXTOutput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                         x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 5393aaf034..da41423750 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -195,7 +195,7 @@ OptionRecommendation(name='language',
         self.input_fmt = input_fmt
         self.output_fmt = output_fmt
 
-        # Build set of all possible options. Two options are equal iff their
+        # Build set of all possible options. Two options are equal if their
         # names are the same.
         self.input_options  = self.input_plugin.options.union(
                                     self.input_plugin.common_options)
diff --git a/src/calibre/ebooks/metadata/txt.py b/src/calibre/ebooks/metadata/txt.py
index 5a5ab13ae9..6283c72256 100644
--- a/src/calibre/ebooks/metadata/txt.py
+++ b/src/calibre/ebooks/metadata/txt.py
@@ -22,7 +22,7 @@ def get_metadata(stream, extract_cover=True):
         else:
             mdata += line
     
-    mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*\n\n\n[ ]*(?P<author>.+)[ ]*\n$', mdata)
+    mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
     if mo != None:
         mi.title = mo.group('title')
         mi.authors = mo.group('author').split(',')
diff --git a/src/calibre/ebooks/txt/from_any.py b/src/calibre/ebooks/txt/from_any.py
deleted file mode 100644
index caf5364c3c..0000000000
--- a/src/calibre/ebooks/txt/from_any.py
+++ /dev/null
@@ -1,74 +0,0 @@
-'''
-Convert any ebook format to TXT.
-'''
-
-from __future__ import with_statement
-
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
-    'and Marshall T. Vandegrift <llasram@gmail.com>' \
-    'and John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
-import sys, os, glob, logging
-
-from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
-from calibre.ebooks.epub import config as common_config
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.txt.writer import oeb2txt, config as txt_config
-
-def config(defaults=None):
-    c = common_config(defaults=defaults, name='txt')
-    c.remove_opt('profile')
-    del c.option_set.groups['metadata']
-    del c.option_set.groups['traversal']
-    del c.option_set.groups['structure detection']
-    del c.option_set.groups['toc']
-    del c.option_set.groups['page layout']
-    txtc = txt_config(defaults=defaults)
-    c.update(txtc)
-    return c 
-
-def option_parser(usage=USAGE):
-    usage = usage % ('TXT', formats())
-    parser = config().option_parser(usage=usage)
-    return parser
-
-def any2txt(opts, path, notification=None):
-    ext = os.path.splitext(path)[1]
-    if not ext:
-        raise ValueError('Unknown file type: '+path)
-    ext = ext.lower()[1:]
-    
-    if opts.output is None:
-        opts.output = os.path.splitext(os.path.basename(path))[0]+'.txt'
-    
-    opts.output = os.path.abspath(opts.output)
-    orig_output = opts.output
-    
-    with TemporaryDirectory('_any2txt') as tdir:
-        oebdir = os.path.join(tdir, 'oeb')
-        os.mkdir(oebdir)
-        opts.output = os.path.join(tdir, 'dummy.epub')
-        opts.profile = 'None'
-        opts.dont_split_on_page_breaks = True
-        orig_bfs = opts.base_font_size2
-        opts.base_font_size2 = 0
-        any2epub(opts, path, create_epub=False, oeb_cover=False, extract_to=oebdir)
-        opts.base_font_size2 = orig_bfs
-        opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
-        opts.output = orig_output
-        logging.getLogger('html2epub').info(_('Creating TXT file from EPUB...'))
-        oeb2txt(opts, opf)
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) < 2:
-        parser.print_help()
-        print 'No input file specified.'
-        return 1
-    any2txt(opts, args[1])
-    
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
new file mode 100644
index 0000000000..21498074ac
--- /dev/null
+++ b/src/calibre/ebooks/txt/output.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin, \
+    OptionRecommendation
+from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
+from calibre.ebooks.metadata import authors_to_string
+
+class TXTOutput(OutputFormatPlugin):
+
+    name = 'TXT Output'
+    author = 'John Schember'
+    file_type = 'txt'
+
+    options = set([
+                    OptionRecommendation(name='newline', recommended_value='system',
+                        level=OptionRecommendation.LOW, long_switch='newline',
+                        short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
+                        help=_('Type of newline to use. Options are %s. Default is \'system\'. '
+                            'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
+                            'For Mac OS X use \'unix\'. \'system\' will default to the newline '
+                            'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))),
+                    OptionRecommendation(name='prepend_author', recommended_value='true',
+                        level=OptionRecommendation.LOW, long_switch='prepend_author',
+                        choices=['true', 'false'],
+                        help=_('Write the author to the beginning of the file. '
+                            'Default is \'true\'. Use \'false\' to disable.')),
+                    OptionRecommendation(name='prepend_title', recommended_value='true',
+                        choices=['true', 'false'],
+                        level=OptionRecommendation.LOW, long_switch='prepend_title',
+                        help=_('Write the title to the beginning of the file. '
+                            'Default is \'true\'. Use \'false\' to disable.'))
+                 ])
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        metadata = TxtMetadata()
+        if opts.prepend_author.lower() == 'true':
+            metadata.author = opts.authors if opts.authors else authors_to_string(oeb_book.metadata.authors)
+        if opts.prepend_title.lower() == 'true':
+            metadata.title = opts.title if opts.title else oeb_book.metadata.title
+
+        writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
+        txt = writer.dump(oeb_book.spine, metadata)
+
+        close = False
+        if not hasattr(output_path, 'write'):
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            out_stream = output_path
+        
+        out_stream.seek(0)
+        out_stream.write(txt)
+        
+        if close:
+            out_stream.close()
diff --git a/src/calibre/ebooks/txt/writer.py b/src/calibre/ebooks/txt/writer.py
index 205d8423e3..eabc2d64ed 100644
--- a/src/calibre/ebooks/txt/writer.py
+++ b/src/calibre/ebooks/txt/writer.py
@@ -1,34 +1,26 @@
 # -*- coding: utf-8 -*-
+from __future__ import with_statement
 '''
 Write content to TXT.
 '''
-from __future__ import with_statement
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
 
-import os, logging, re, sys
+import os, re, sys
+
+from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
 
 from BeautifulSoup import BeautifulSoup
 
-from calibre import LoggingInterface
-from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
-from calibre.ebooks.epub.iterator import SpineItem
-from calibre.ebooks.metadata import authors_to_string
-from calibre.ebooks.metadata.meta import metadata_from_formats
-from calibre.ebooks.metadata.opf2 import OPF
-from calibre.customize.ui import run_plugins_on_postprocess
-from calibre.utils.config import Config, StringConfig
-
-class TXTWriter(object):
-    def __init__(self, newline):
+class TxtWriter(object):
+    def __init__(self, newline, log):
         self.newline = newline
+        self.log = log
 
-    def dump(self, oebpath, path, metadata):
-        opf = OPF(oebpath, os.path.dirname(oebpath))
-        spine = [SpineItem(i.path) for i in opf.spine]
-
-        tmpout = ''
+    def dump(self, spine, metadata):
+        out = u''
         for item in spine:
             with open(item, 'r') as itemf:
                 content = itemf.read().decode(item.encoding)
@@ -39,25 +31,21 @@ class TXTWriter(object):
                 content = self.replace_html_symbols(content)
                 content = self.cleanup_text(content)
                 content = self.specified_newlines(content)
-                tmpout = tmpout + content
+                out += content
 
         # Prepend metadata
         if metadata.author != None and metadata.author != '':
-            tmpout = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + tmpout
+            out = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + out
         if metadata.title != None and metadata.title != '':
-            tmpout = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + tmpout
+            out = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + out
 
             # Put two blank lines at end of file
-
-            end = tmpout[-3 * len(self.newline):]
+            end = out[-3 * len(self.newline):]
             for i in range(3 - end.count(self.newline)):
-                tmpout = tmpout + self.newline
+                out += self.newline
+
+        return out
 
-        if os.path.exists(path):
-            os.remove(path)
-        with open(path, 'w+b') as out:
-            out.write(tmpout.encode('utf-8'))
-            
     def strip_html(self, html):
         stripped = u''
         
@@ -149,14 +137,8 @@ class TXTWriter(object):
         if self.newline == '\n':
             return text
         
-        return text.replace('\n', self.newline)
-        
-class TxtMetadata(object):
-    def __init__(self):
-        self.author = None
-        self.title = None
-        self.series = None
-        
+        return text.replace('\n', self.newline)        
+
 
 class TxtNewlines(object):
     NEWLINE_TYPES = {
@@ -170,73 +152,7 @@ class TxtNewlines(object):
         self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
 
 
-def config(defaults=None):
-    desc = _('Options to control the conversion to TXT')
-    if defaults is None:
-        c = Config('txt', desc)
-    else:
-        c = StringConfig(defaults, desc)
-        
-    txt = c.add_group('TXT', _('TXT options.'))
-            
-    txt('newline', ['--newline'], default='system',
-        help=_('Type of newline to use. Options are %s. Default is \'system\'. '
-            'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
-            'For Mac OS X use \'unix\'. \'system\' will default to the newline '
-            'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys())))
-    txt('prepend_author', ['--prepend-author'], default='true',
-        help=_('Write the author to the beginning of the file. '
-            'Default is \'true\'. Use \'false\' to disable.'))
-    txt('prepend_title', ['--prepend-title'], default='true',
-        help=_('Write the title to the beginning of the file. '
-            'Default is \'true\'. Use \'false\' to disable.'))
-        
-    return c
-
-def option_parser():
-    c = config()
-    parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
-    parser.add_option(
-        '-o', '--output', default=None, 
-        help=_('Output file. Default is derived from input filename.'))
-    parser.add_option(
-        '-v', '--verbose', default=0, action='count',
-        help=_('Useful for debugging.'))        
-    return parser
-
-def oeb2txt(opts, inpath):
-    logger = LoggingInterface(logging.getLogger('oeb2txt'))
-    logger.setup_cli_handler(opts.verbose)
-    
-    outpath = opts.output
-    if outpath is None:
-        outpath = os.path.basename(inpath)
-        outpath = os.path.splitext(outpath)[0] + '.txt'
-
-    mi = metadata_from_formats([inpath])
-    metadata = TxtMetadata()
-    if opts.prepend_author.lower() == 'true':
-        metadata.author = opts.authors if opts.authors else authors_to_string(mi.authors)
-    if opts.prepend_title.lower() == 'true':
-        metadata.title = opts.title if opts.title else mi.title
-
-    newline = TxtNewlines(opts.newline)
-    
-    writer = TXTWriter(newline.newline)
-    writer.dump(inpath, outpath, metadata)
-    run_plugins_on_postprocess(outpath, 'txt')
-    logger.log_info(_('Output written to ') + outpath)
-    
-def main(argv=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(argv[1:])
-    if len(args) != 1:
-        parser.print_help()
-        return 1
-    inpath = args[0]
-    retval = oeb2txt(opts, inpath)
-    return retval
-
-if __name__ == '__main__':
-    sys.exit(main())
-
+class TxtMetadata(object):
+    def __init__(self):
+        self.title = None
+        self.author = None