From c354272030b3396dbe6d749a1b1038e00ff7f6dc Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 12 Jul 2009 12:47:33 -0400
Subject: [PATCH 1/8] Star of new html to text parser.

---
 src/calibre/ebooks/pml/pmlconverter.py |  2 +-
 src/calibre/ebooks/txt/output.py       |  8 ++-
 src/calibre/ebooks/txt/txtml.py        | 98 ++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 3 deletions(-)
 create mode 100644 src/calibre/ebooks/txt/txtml.py
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 2ca38176d5..a96adc5772 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -38,7 +38,7 @@ PML_HTML_RULES = [
     (re.compile(r'\\U(?P<num>\d+)'), lambda match: '%s' % unichr(int(match.group('num'), 16))),
     (re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % image_name(match.group('name')).strip('\x00')),
     (re.compile(r'\\q="(?P<target>#.+?)"(?P<text>.+?)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))),
-    (re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')),
+    (re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<span id="%s"></span>' % match.group('target')),
     (re.compile(r'\\-'), lambda match: ''),
     (re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.+?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text'))),
     (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.+?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))),
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index 6cb854df10..f1767700e0 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -32,8 +32,12 @@ class TXTOutput(OutputFormatPlugin):
                  ])
 
     def convert(self, oeb_book, output_path, input_plugin, opts, log):
-        writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
-        txt = writer.dump(oeb_book.spine)
+#        writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
+#        txt = writer.dump(oeb_book.spine)
+
+        from calibre.ebooks.txt.txtml import TXTMLizer
+        writer = TXTMLizer(log)
+        txt = writer.extract_content(oeb_book, opts)
 
         close = False
         if not hasattr(output_path, 'write'):
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
new file mode 100644
index 0000000000..5bc7ed45f8
--- /dev/null
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into plain text
+'''
+
+import os
+
+from lxml import etree
+
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+
+BLOCK_TAGS = [
+    'div',
+    'p',
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+    'li',
+]
+
+BLOCK_STYLES = [
+    'block',
+]
+
+class TXTMLizer(object):
+    def __init__(self, log):
+        self.log = log
+
+    def extract_content(self, oeb_book, opts):
+        self.log.info('Converting XHTML to PML markup...')
+        self.oeb_book = oeb_book
+        self.opts = opts
+        return self.mlize_spine()
+
+    def mlize_spine(self):
+        output = u''
+        for item in self.oeb_book.spine:
+            self.log.debug('Converting %s to TXT...' % item.href)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
+            content = self.remove_newlines(content)
+            output += self.dump_text(etree.fromstring(content), stylizer)
+
+        return output
+
+    def remove_newlines(self, text):
+        self.log.debug('\tRemove newlines for processing...')
+        text = text.replace('\r\n', ' ')
+        text = text.replace('\n', ' ')
+        text = text.replace('\r', ' ')
+
+        return text
+
+    def dump_text(self, elem, stylizer):
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            return u''
+
+        text = u''
+        style = stylizer.style(elem)
+
+        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+           or style['visibility'] == 'hidden':
+            return u''
+
+        tag = barename(elem.tag)
+        in_block = False
+
+        # Are we in a paragraph block?
+        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
+            in_block = True
+            #if not text.endswith(os.linesep) and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+            #    print '"%s"' % text
+            #    text += os.linesep + os.linesep
+
+        # Proccess tags that contain text.
+        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+            text += elem.text
+
+        for item in elem:
+            text += self.dump_text(item, stylizer)
+
+        if in_block:
+            text += os.linesep + os.linesep
+
+        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+            text += elem.tail
+
+        return text

From e09193a48fc1966e35113af9d3817d03071ffd38 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 12 Jul 2009 20:22:19 -0400
Subject: [PATCH 2/8] New TXT output processor.

---
 src/calibre/ebooks/txt/newlines.py |  25 +++++
 src/calibre/ebooks/txt/output.py   |  10 +-
 src/calibre/ebooks/txt/txtml.py    |  48 ++++++++--
 src/calibre/ebooks/txt/writer.py   | 146 -----------------------------
 4 files changed, 72 insertions(+), 157 deletions(-)
 create mode 100644 src/calibre/ebooks/txt/newlines.py
 delete mode 100644 src/calibre/ebooks/txt/writer.py

diff --git a/src/calibre/ebooks/txt/newlines.py b/src/calibre/ebooks/txt/newlines.py
new file mode 100644
index 0000000000..983d356206
--- /dev/null
+++ b/src/calibre/ebooks/txt/newlines.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+class TxtNewlines(object):
+
+    NEWLINE_TYPES = {
+                        'system'  : os.linesep,
+                        'unix'    : '\n',
+                        'old_mac' : '\r',
+                        'windows' : '\r\n'
+                     }
+
+    def __init__(self, newline_type):
+        self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
+
+def specified_newlines(newline, text):
+    if newline == os.linesep:
+        return text
+
+    return text.replace(os.linesep, newline)
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index f1767700e0..c13949af2e 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -8,7 +8,8 @@ import os
 
 from calibre.customize.conversion import OutputFormatPlugin, \
     OptionRecommendation
-from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
+from calibre.ebooks.txt.txtml import TXTMLizer
+from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
 
 class TXTOutput(OutputFormatPlugin):
 
@@ -32,12 +33,11 @@ class TXTOutput(OutputFormatPlugin):
                  ])
 
     def convert(self, oeb_book, output_path, input_plugin, opts, log):
-#        writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
-#        txt = writer.dump(oeb_book.spine)
-
-        from calibre.ebooks.txt.txtml import TXTMLizer
         writer = TXTMLizer(log)
         txt = writer.extract_content(oeb_book, opts)
+        
+        log.debug('\tReplacing newlines with selected type...')
+        txt = specified_newlines(TxtNewlines(opts.newline).newline, txt)
 
         close = False
         if not hasattr(output_path, 'write'):
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 5bc7ed45f8..d609426d93 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 Transform OEB content into plain text
 '''
 
-import os
+import os, re
 
 from lxml import etree
 
@@ -32,6 +32,7 @@ BLOCK_STYLES = [
 ]
 
 class TXTMLizer(object):
+    
     def __init__(self, log):
         self.log = log
 
@@ -49,6 +50,7 @@ class TXTMLizer(object):
             content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
             content = self.remove_newlines(content)
             output += self.dump_text(etree.fromstring(content), stylizer)
+        output = self.cleanup_text(output)
 
         return output
 
@@ -60,7 +62,42 @@ class TXTMLizer(object):
 
         return text
 
-    def dump_text(self, elem, stylizer):
+    def cleanup_text(self, text):
+        self.log.debug('\tClean up text...')
+        # Replace bad characters.
+        text = text.replace(u'\xc2', '')
+        text = text.replace(u'\xa0', ' ')
+
+        # Replace tabs, vertical tags and form feeds with single space.
+        text = text.replace('\t+', ' ')
+        text = text.replace('\v+', ' ')
+        text = text.replace('\f+', ' ')
+
+        # Single line paragraph.
+        text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
+
+        # Remove multiple spaces.
+        text = re.sub('[  ]+', ' ', text)
+
+        # Remove excessive newlines.
+        #text = re.sub('\n[ ]+\n', '\n\n', text)
+        #text = re.sub('\n{3,}', '\n\n', text)
+
+        # Replace spaces at the beginning and end of lines
+        text = re.sub('(?imu)^[ ]+', '', text)
+        text = re.sub('(?imu)[ ]+$', '', text)
+
+        return text
+
+    def dump_text(self, elem, stylizer, end=''):
+        '''
+        @elem: The element in the etree that we are working on.
+        @stylizer: The style information attached to the element.
+        @end: The last two characters of the text from the previous element.
+              This is used to determine if a blank line is needed when starting
+              a new block element.
+        '''
+
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
             return u''
@@ -78,16 +115,15 @@ class TXTMLizer(object):
         # Are we in a paragraph block?
         if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
             in_block = True
-            #if not text.endswith(os.linesep) and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-            #    print '"%s"' % text
-            #    text += os.linesep + os.linesep
+            if not end.endswith(os.linesep + os.linesep) and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+                text += os.linesep + os.linesep
 
         # Proccess tags that contain text.
         if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
             text += elem.text
 
         for item in elem:
-            text += self.dump_text(item, stylizer)
+            text += self.dump_text(item, stylizer, text[-2:])
 
         if in_block:
             text += os.linesep + os.linesep
diff --git a/src/calibre/ebooks/txt/writer.py b/src/calibre/ebooks/txt/writer.py
deleted file mode 100644
index a3fbe13199..0000000000
--- a/src/calibre/ebooks/txt/writer.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# -*- coding: utf-8 -*-
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
-'''
-Write content to TXT.
-'''
-
-import os
-import re
-
-from lxml import etree
-
-from calibre import entity_to_unicode
-from calibre.ebooks.oeb.base import XHTML
-
-class TxtWriter(object):
-    def __init__(self, newline, log):
-        self.newline = newline
-        self.log = log
-
-    def dump(self, spine):
-        out = u''
-        for item in spine:
-            self.log.debug('Processing %s...' % item.href)
-            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
-            content = self.remove_newlines(content)
-            content = self.strip_html(content)
-            content = self.replace_html_symbols(content)
-            content = self.cleanup_text(content)
-            content = self.specified_newlines(content)
-            out += content
-
-            # Put two blank lines at end of file
-            end = out[-3 * len(self.newline):]
-            for i in range(3 - end.count(self.newline)):
-                out += self.newline
-
-        return out
-
-    def strip_html(self, text):
-        self.log.debug('\tStripping html...')
-        stripped = u''
-
-        # Remove unnecessary tags
-        for tag in ['script', 'style']:
-            text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
-        text = re.sub('<!--.*-->', '', text)
-        text = re.sub('<\?.*?\?>', '', text)
-        text = re.sub('<@.*?@>', '', text)
-        text = re.sub('<%.*?%>', '', text)
-
-        # Headings usually indicate Chapters.
-        # We are going to use a marker to insert the proper number of
-        # newline characters at the end of cleanup_text because cleanup_text
-        # remove excessive (more than 2 newlines).
-        for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
-            text = re.sub('(?imu)<[ ]*%s[ ]*.*?>' % tag, '-vzxedxy-', text)
-            text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '-vlgzxey-', text)
-
-        # Separate content with space.
-        for tag in ['td']:
-            text = re.sub('(?imu)</[ ]*%s[ ]*>', ' ', text)
-
-        # Separate content with empty line.
-        for tag in ['p', 'div', 'pre', 'li', 'table', 'tr']:
-            text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '\n\n', text)
-
-        for tag in ['hr', 'br']:
-            text = re.sub('(?imu)<[ ]*%s.*?>' % tag, '\n\n', text)
-
-        # Remove any tags that do not need special processing.
-        text = re.sub('<.*?>', '', text)
-
-        stripped = stripped + text
-
-        return stripped
-
-    def replace_html_symbols(self, content):
-        self.log.debug('\tReplacing entities with unicode...')
-        for entity in set(re.findall('&.+?;', content)):
-            mo = re.search('(%s)' % entity[1:-1], content)
-            content = content.replace(entity, entity_to_unicode(mo))
-
-        return content
-
-    def cleanup_text(self, text):
-        self.log.debug('\tClean up text...')
-        # Replace bad characters.
-        text = text.replace(u'\xc2', '')
-        text = text.replace(u'\xa0', ' ')
-
-        # Replace tabs, vertical tags and form feeds with single space.
-        text = text.replace('\t+', ' ')
-        text = text.replace('\v+', ' ')
-        text = text.replace('\f+', ' ')
-
-        # Single line paragraph.
-        text = re.sub('(?<=.)\n(?=.)', ' ', text)
-
-        # Remove multiple spaces.
-        text = re.sub('[  ]+', ' ', text)
-
-        # Remove excessive newlines.
-        text = re.sub('\n[ ]+\n', '\n\n', text)
-        text = re.sub('\n{3,}', '\n\n', text)
-
-        # Replace markers with the proper characters.
-        text = text.replace('-vzxedxy-', '\n\n\n\n\n')
-        text = text.replace('-vlgzxey-', '\n\n\n')
-
-        # Replace spaces at the beginning and end of lines
-        text = re.sub('(?imu)^[ ]+', '', text)
-        text = re.sub('(?imu)[ ]+$', '', text)
-
-        return text
-
-    def remove_newlines(self, text):
-        self.log.debug('\tRemove newlines for processing...')
-        text = text.replace('\r\n', ' ')
-        text = text.replace('\n', ' ')
-        text = text.replace('\r', ' ')
-
-        return text
-
-    def specified_newlines(self, text):
-        self.log.debug('\tReplacing newlines with selected type...')
-        if self.newline == '\n':
-            return text
-
-        return text.replace('\n', self.newline)
-
-
-class TxtNewlines(object):
-    NEWLINE_TYPES = {
-                        'system'  : os.linesep,
-                        'unix'    : '\n',
-                        'old_mac' : '\r',
-                        'windows' : '\r\n'
-                     }
-
-    def __init__(self, newline_type):
-        self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
-

From bfaa45c56393b1b3f5401b439c61477c6874d21d Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 12 Jul 2009 20:42:03 -0400
Subject: [PATCH 3/8] Have palmdoc and ztxt pdb files use new txt parser.

---
 src/calibre/ebooks/pdb/palmdoc/writer.py | 11 +++++++----
 src/calibre/ebooks/pdb/ztxt/writer.py    | 12 ++++++++----
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py
index 8eca0db124..2a46308db8 100644
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@@ -13,8 +13,8 @@ import struct
 from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.ebooks.pdb.formatwriter import FormatWriter
 from calibre.ebooks.pdb.header import PdbHeaderBuilder
-from calibre.ebooks.txt.writer import TxtNewlines
-from calibre.ebooks.txt.writer import TxtWriter
+from calibre.ebooks.txt.txtml import TXTMLizer
+from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
 
 MAX_RECORD_SIZE = 4096
 
@@ -45,8 +45,11 @@ class Writer(FormatWriter):
             out_stream.write(record)
 
     def _generate_text(self, spine):
-        txt_writer = TxtWriter(TxtNewlines('system').newline, self.log)
-        txt = txt_writer.dump(spine).encode(self.opts.output_encoding, 'replace')
+        writer = TXTMLizer(log)
+        txt = writer.extract_content(oeb_book, opts)
+
+        log.debug('\tReplacing newlines with selected type...')
+        txt = specified_newlines(TxtNewlines(opts.newline).newline, txt).encode(self.opts.output_encoding, 'replace')
 
         txt_length = len(txt)
 
diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py
index d6bdeefc59..22f7bf002c 100644
--- a/src/calibre/ebooks/pdb/ztxt/writer.py
+++ b/src/calibre/ebooks/pdb/ztxt/writer.py
@@ -11,8 +11,9 @@ __docformat__ = 'restructuredtext en'
 import struct, zlib
 
 from calibre.ebooks.pdb.formatwriter import FormatWriter
-from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
 from calibre.ebooks.pdb.header import PdbHeaderBuilder
+from calibre.ebooks.txt.txtml import TXTMLizer
+from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
 
 MAX_RECORD_SIZE = 8192
 
@@ -49,9 +50,12 @@ class Writer(FormatWriter):
             out_stream.write(record)
         
     def _generate_text(self, spine):
-        txt_writer = TxtWriter(TxtNewlines('system').newline, self.log)
-        txt = txt_writer.dump(spine).encode(self.opts.output_encoding, 'replace')
-        
+        writer = TXTMLizer(log)
+        txt = writer.extract_content(oeb_book, opts)
+
+        log.debug('\tReplacing newlines with selected type...')
+        txt = specified_newlines(TxtNewlines(opts.newline).newline, txt).encode(self.opts.output_encoding, 'replace')
+
         txt_length = len(txt)
         
         txt_records = []

From 3e3e6a234822858c68c2cbea291463904409b2d6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 12 Jul 2009 20:44:43 -0400
Subject: [PATCH 4/8] Fix missing self. reference.

---
 src/calibre/ebooks/pdb/palmdoc/writer.py | 2 +-
 src/calibre/ebooks/pdb/ztxt/writer.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py
index 2a46308db8..8ca83a8270 100644
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@@ -48,7 +48,7 @@ class Writer(FormatWriter):
         writer = TXTMLizer(log)
         txt = writer.extract_content(oeb_book, opts)
 
-        log.debug('\tReplacing newlines with selected type...')
+        self.log.debug('\tReplacing newlines with selected type...')
         txt = specified_newlines(TxtNewlines(opts.newline).newline, txt).encode(self.opts.output_encoding, 'replace')
 
         txt_length = len(txt)
diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py
index 22f7bf002c..19824fce91 100644
--- a/src/calibre/ebooks/pdb/ztxt/writer.py
+++ b/src/calibre/ebooks/pdb/ztxt/writer.py
@@ -53,7 +53,7 @@ class Writer(FormatWriter):
         writer = TXTMLizer(log)
         txt = writer.extract_content(oeb_book, opts)
 
-        log.debug('\tReplacing newlines with selected type...')
+        self.log.debug('\tReplacing newlines with selected type...')
         txt = specified_newlines(TxtNewlines(opts.newline).newline, txt).encode(self.opts.output_encoding, 'replace')
 
         txt_length = len(txt)

From 82f3409a598f13d5ee6d39b0543bee3619de2682 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 12 Jul 2009 20:46:30 -0400
Subject: [PATCH 5/8] Fix more typos.

---
 src/calibre/ebooks/pdb/palmdoc/writer.py | 6 +++---
 src/calibre/ebooks/pdb/ztxt/writer.py    | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py
index 8ca83a8270..f99c698b2d 100644
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@@ -27,7 +27,7 @@ class Writer(FormatWriter):
     def write_content(self, oeb_book, out_stream, metadata=None):
         title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
 
-        txt_records, txt_length = self._generate_text(oeb_book.spine)
+        txt_records, txt_length = self._generate_text(oeb_book)
         header_record = self._header_record(txt_length, len(txt_records))
 
         section_lengths = [len(header_record)]
@@ -44,8 +44,8 @@ class Writer(FormatWriter):
         for record in [header_record] + txt_records:
             out_stream.write(record)
 
-    def _generate_text(self, spine):
-        writer = TXTMLizer(log)
+    def _generate_text(self, oeb_book):
+        writer = TXTMLizer(self.log)
         txt = writer.extract_content(oeb_book, opts)
 
         self.log.debug('\tReplacing newlines with selected type...')
diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py
index 19824fce91..48600714ca 100644
--- a/src/calibre/ebooks/pdb/ztxt/writer.py
+++ b/src/calibre/ebooks/pdb/ztxt/writer.py
@@ -26,7 +26,7 @@ class Writer(FormatWriter):
     def write_content(self, oeb_book, out_stream, metadata=None):
         title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
 
-        txt_records, txt_length = self._generate_text(oeb_book.spine)
+        txt_records, txt_length = self._generate_text(oeb_book)
         
         crc32 = 0
         section_lengths = []
@@ -49,8 +49,8 @@ class Writer(FormatWriter):
         for record in [header_record]+txt_records:
             out_stream.write(record)
         
-    def _generate_text(self, spine):
-        writer = TXTMLizer(log)
+    def _generate_text(self, oeb_book):
+        writer = TXTMLizer(self.log)
         txt = writer.extract_content(oeb_book, opts)
 
         self.log.debug('\tReplacing newlines with selected type...')

From edeedddeb80bc4323f3e7cbb5ac7fd74434378f2 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 12 Jul 2009 20:52:03 -0400
Subject: [PATCH 6/8] Fix more errors when moving to new txtml output.

---
 src/calibre/ebooks/pdb/palmdoc/writer.py | 5 +++--
 src/calibre/ebooks/pdb/ztxt/writer.py    | 4 ++--
 src/calibre/ebooks/txt/txtml.py          | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py
index f99c698b2d..12c1c4aaa7 100644
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@@ -3,6 +3,7 @@
 '''
 Writer content to palmdoc pdb file.
 '''
+import os
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@@ -46,10 +47,10 @@ class Writer(FormatWriter):
 
     def _generate_text(self, oeb_book):
         writer = TXTMLizer(self.log)
-        txt = writer.extract_content(oeb_book, opts)
+        txt = writer.extract_content(oeb_book, self.opts)
 
         self.log.debug('\tReplacing newlines with selected type...')
-        txt = specified_newlines(TxtNewlines(opts.newline).newline, txt).encode(self.opts.output_encoding, 'replace')
+        txt = specified_newlines(TxtNewlines('windows').newline, txt).encode(self.opts.output_encoding, 'replace')
 
         txt_length = len(txt)
 
diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py
index 48600714ca..566c0def44 100644
--- a/src/calibre/ebooks/pdb/ztxt/writer.py
+++ b/src/calibre/ebooks/pdb/ztxt/writer.py
@@ -51,10 +51,10 @@ class Writer(FormatWriter):
         
     def _generate_text(self, oeb_book):
         writer = TXTMLizer(self.log)
-        txt = writer.extract_content(oeb_book, opts)
+        txt = writer.extract_content(oeb_book, self.opts)
 
         self.log.debug('\tReplacing newlines with selected type...')
-        txt = specified_newlines(TxtNewlines(opts.newline).newline, txt).encode(self.opts.output_encoding, 'replace')
+        txt = specified_newlines(TxtNewlines('windows').newline, txt).encode(self.opts.output_encoding, 'replace')
 
         txt_length = len(txt)
         
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index d609426d93..94f2a181c5 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -37,7 +37,7 @@ class TXTMLizer(object):
         self.log = log
 
     def extract_content(self, oeb_book, opts):
-        self.log.info('Converting XHTML to PML markup...')
+        self.log.info('Converting XHTML to TXT...')
         self.oeb_book = oeb_book
         self.opts = opts
         return self.mlize_spine()

From c602400a68dc3ea48ac77574ffbc90537a8dccb4 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 12 Jul 2009 20:55:21 -0400
Subject: [PATCH 7/8] ztxt pdb output encoding fix.

---
 src/calibre/ebooks/pdb/ztxt/writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py
index 566c0def44..ee4c5752c3 100644
--- a/src/calibre/ebooks/pdb/ztxt/writer.py
+++ b/src/calibre/ebooks/pdb/ztxt/writer.py
@@ -34,7 +34,7 @@ class Writer(FormatWriter):
         self.log.info('Compressing data...')
         for i in range(0, len(txt_records)):
             self.log.debug('\tCompressing record %i' % i)
-            txt_records[i] = compressor.compress(txt_records[i].encode('cp1252', 'replace'))
+            txt_records[i] = compressor.compress(txt_records[i])
             txt_records[i] = txt_records[i] + compressor.flush(zlib.Z_FULL_FLUSH)
             section_lengths.append(len(txt_records[i]))
             crc32 = zlib.crc32(txt_records[i], crc32) & 0xffffffff

From f1806c4aa2c431c17cb1f126effbb5390e8441aa Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 13 Jul 2009 12:57:55 -0400
Subject: [PATCH 8/8] Show multiple authors correctly in metadata dialogs.

---
 src/calibre/gui2/convert/metadata.py        | 11 ++++++++---
 src/calibre/gui2/dialogs/metadata_single.py | 13 +++++--------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/calibre/gui2/convert/metadata.py b/src/calibre/gui2/convert/metadata.py
index 82e7b21148..513535df1b 100644
--- a/src/calibre/gui2/convert/metadata.py
+++ b/src/calibre/gui2/convert/metadata.py
@@ -39,8 +39,8 @@ class MetadataWidget(Widget, Ui_Form):
 
         mi = self.db.get_metadata(self.book_id, index_is_id=True)
         self.title.setText(mi.title)
-        if mi.authors:
-            self.author.setCurrentIndex(self.author.findText(authors_to_string(mi.authors)))
+#        if mi.authors:
+#            self.author.setCurrentIndex(self.author.findText(authors_to_string(mi.authors)))
         if mi.publisher:
             self.publisher.setCurrentIndex(self.publisher.findText(mi.publisher))
         self.author_sort.setText(mi.author_sort if mi.author_sort else '')
@@ -75,7 +75,12 @@ class MetadataWidget(Widget, Ui_Form):
             id, name = i
             name = authors_to_string([name.strip().replace('|', ',') for n in name.split(',')])
             self.author.addItem(name)
-        self.author.setCurrentIndex(-1)
+            
+        au = self.db.authors(self.book_id, True)
+        if not au:
+            au = _('Unknown')
+        au = ' & '.join([a.strip().replace('|', ',') for a in au.split(',')])
+        self.author.setEditText(au)
 
     def initialize_series(self):
         all_series = self.db.all_series()
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 0c2211e5c7..13acd161ae 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -330,19 +330,16 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
     def initalize_authors(self):
         all_authors = self.db.all_authors()
         all_authors.sort(cmp=lambda x, y : cmp(x[1], y[1]))
-        author_id = self.db.author_id(self.row)
-        idx, c = None, 0
         for i in all_authors:
             id, name = i
-            if id == author_id:
-                idx = c
             name = [name.strip().replace('|', ',') for n in name.split(',')]
             self.authors.addItem(authors_to_string(name))
-            c += 1
 
-        self.authors.setEditText('')
-        if idx is not None:
-            self.authors.setCurrentIndex(idx)
+        au = self.db.authors(self.row)
+        if not au:
+            au = _('Unknown')
+        au = ' & '.join([a.strip().replace('|', ',') for a in au.split(',')])
+        self.authors.setEditText(au)
 
     def initialize_series(self):
         self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow)