From 88a60c1610d5ccd0c154d5aa3df08faebb9f873a Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 1 Dec 2010 18:27:33 -0500
Subject: [PATCH 1/6] TXT Output: Turn br tags into spaces.

---
 src/calibre/ebooks/txt/txtml.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 3ecb6940f8..48c94c2543 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -35,6 +35,7 @@ BLOCK_STYLES = [
 
 SPACE_TAGS = [
     'td',
+    'br',
 ]
 
 class TXTMLizer(object):

From 04e3ba0e812c0b2443d19a6eb6a331b94695ed56 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 1 Dec 2010 18:51:49 -0500
Subject: [PATCH 2/6] TXT Output: Basic Markdown formatted output.

---
 src/calibre/ebooks/txt/output.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index 15db4b1974..3c0d475460 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -8,6 +8,7 @@ import os
 
 from calibre.customize.conversion import OutputFormatPlugin, \
     OptionRecommendation
+from calibre.ebooks.txt.markdownml import MarkdownMLizer
 from calibre.ebooks.txt.txtml import TXTMLizer
 from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
 
@@ -44,10 +45,17 @@ class TXTOutput(OutputFormatPlugin):
             recommended_value=False, level=OptionRecommendation.LOW,
             help=_('Force splitting on the max-line-length value when no space '
             'is present. Also allows max-line-length to be below the minimum')),
+        OptionRecommendation(name='markdown_format',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Produce Markdown formatted text.')),
      ])
 
     def convert(self, oeb_book, output_path, input_plugin, opts, log):
-        writer = TXTMLizer(log)
+        if opts.markdown_format:
+            writer = MarkdownMLizer(log)
+        else:
+            writer = TXTMLizer(log)
+        
         txt = writer.extract_content(oeb_book, opts)
 
         log.debug('\tReplacing newlines with selected type...')

From 98a0970f02ed7d953085377f5b5afa69563546e3 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 1 Dec 2010 20:33:52 -0500
Subject: [PATCH 3/6] Add markdownml.py. TXT Output: Remove links option to
 make markdown output cleaner.

---
 src/calibre/ebooks/txt/markdownml.py | 40 ++++++++++++++++++++++++++++
 src/calibre/ebooks/txt/output.py     |  5 ++++
 2 files changed, 45 insertions(+)
 create mode 100644 src/calibre/ebooks/txt/markdownml.py

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
new file mode 100644
index 0000000000..2ea3e7dafe
--- /dev/null
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into Markdown formatted plain text
+'''
+
+import re
+
+from lxml import etree
+
+from calibre.utils.html2text import html2text
+
+class MarkdownMLizer(object):
+
+    def __init__(self, log):
+        self.log = log
+
+    def extract_content(self, oeb_book, opts):
+        self.log.info('Converting XHTML to Markdown formatted TXT...')
+        self.oeb_book = oeb_book
+        self.opts = opts
+
+        return self.mlize_spine()
+
+    def mlize_spine(self):
+        output = [u'']
+        for item in self.oeb_book.spine:
+            self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
+            html = unicode(etree.tostring(item.data, encoding=unicode))
+            if self.opts.remove_links:
+                html = re.sub(r'<\s*a[^>]*>', '', html)
+                html = re.sub(r'<\s*/\s*a\s*>', '', html)
+            output += html2text(html)
+        output = u''.join(output)
+
+        return output
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index 3c0d475460..a6f52f92ca 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -48,6 +48,11 @@ class TXTOutput(OutputFormatPlugin):
         OptionRecommendation(name='markdown_format',
             recommended_value=False, level=OptionRecommendation.LOW,
             help=_('Produce Markdown formatted text.')),
+        OptionRecommendation(name='remove_links',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Remove links within the document. This is only ' \
+            'useful when paried with the markdown-format option because' \
+            'links are removed with plain text output.')),
      ])
 
     def convert(self, oeb_book, output_path, input_plugin, opts, log):

From f078aceb6cf5f9b1a1c589303ca6f24d5adb0d71 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 1 Dec 2010 21:03:54 -0500
Subject: [PATCH 4/6] TXT Output: Add GUI support for new markdown related
 options.

---
 src/calibre/gui2/convert/txt_output.py |  2 +-
 src/calibre/gui2/convert/txt_output.ui | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/convert/txt_output.py b/src/calibre/gui2/convert/txt_output.py
index 8b2e131ec6..3d35151bb8 100644
--- a/src/calibre/gui2/convert/txt_output.py
+++ b/src/calibre/gui2/convert/txt_output.py
@@ -21,7 +21,7 @@ class PluginWidget(Widget, Ui_Form):
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent,
         ['newline', 'max_line_length', 'force_max_line_length',
-        'inline_toc'])
+        'inline_toc', 'markdown_format', 'remove_links'])
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)
 
diff --git a/src/calibre/gui2/convert/txt_output.ui b/src/calibre/gui2/convert/txt_output.ui
index 9eae5a8115..75eac8f5fc 100644
--- a/src/calibre/gui2/convert/txt_output.ui
+++ b/src/calibre/gui2/convert/txt_output.ui
@@ -6,7 +6,7 @@
    <rect>
     <x>0</x>
     <y>0</y>
-    <width>400</width>
+    <width>470</width>
     <height>300</height>
    </rect>
   </property>
@@ -27,7 +27,7 @@
    <item row="0" column="1">
     <widget class="QComboBox" name="opt_newline"/>
    </item>
-   <item row="4" column="0">
+   <item row="6" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -67,6 +67,20 @@
      </property>
     </widget>
    </item>
+   <item row="4" column="0">
+    <widget class="QCheckBox" name="opt_markdown_format">
+     <property name="text">
+      <string>Apply Markdown formatting to text</string>
+     </property>
+    </widget>
+   </item>
+   <item row="5" column="0">
+    <widget class="QCheckBox" name="opt_remove_links">
+     <property name="text">
+      <string>Remove links (&lt;a&gt; tags) before processing</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <resources/>

From d744fb698b96870249a72402cc1db39d0fe81cc2 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 2 Dec 2010 06:54:36 -0500
Subject: [PATCH 5/6] TXT Output: change remove links to keep links and remove
 links by default with Markdown format output. TXT Output: Add keep image
 references option and remove images by default with Markdown format output.

---
 src/calibre/ebooks/txt/markdownml.py   |  5 ++++-
 src/calibre/ebooks/txt/output.py       | 13 +++++++++----
 src/calibre/gui2/convert/txt_output.py |  2 +-
 src/calibre/gui2/convert/txt_output.ui | 15 +++++++++++----
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index 2ea3e7dafe..2f7960e94c 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -31,9 +31,12 @@ class MarkdownMLizer(object):
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
             html = unicode(etree.tostring(item.data, encoding=unicode))
-            if self.opts.remove_links:
+            if not self.opts.keep_links:
                 html = re.sub(r'<\s*a[^>]*>', '', html)
                 html = re.sub(r'<\s*/\s*a\s*>', '', html)
+            if not self.opts.keep_image_references:
+                html = re.sub(r'<\s*img[^>]*>', '', html)
+                html = re.sub(r'<\s*img\s*>', '', html)
             output += html2text(html)
         output = u''.join(output)
 
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index a6f52f92ca..a6369b6f0b 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -48,11 +48,16 @@ class TXTOutput(OutputFormatPlugin):
         OptionRecommendation(name='markdown_format',
             recommended_value=False, level=OptionRecommendation.LOW,
             help=_('Produce Markdown formatted text.')),
-        OptionRecommendation(name='remove_links',
+        OptionRecommendation(name='keep_links',
             recommended_value=False, level=OptionRecommendation.LOW,
-            help=_('Remove links within the document. This is only ' \
-            'useful when paried with the markdown-format option because' \
-            'links are removed with plain text output.')),
+            help=_('Do not remove links within the document. This is only ' \
+            'useful when paired with the markdown-format option because' \
+            'links are always removed with plain text output.')),
+        OptionRecommendation(name='keep_image_references',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Do not remove image references within the document. This is only ' \
+            'useful when paired with the markdown-format option because' \
+            'image references are always removed with plain text output.')),
      ])
 
     def convert(self, oeb_book, output_path, input_plugin, opts, log):
diff --git a/src/calibre/gui2/convert/txt_output.py b/src/calibre/gui2/convert/txt_output.py
index 3d35151bb8..2fafad4b43 100644
--- a/src/calibre/gui2/convert/txt_output.py
+++ b/src/calibre/gui2/convert/txt_output.py
@@ -21,7 +21,7 @@ class PluginWidget(Widget, Ui_Form):
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent,
         ['newline', 'max_line_length', 'force_max_line_length',
-        'inline_toc', 'markdown_format', 'remove_links'])
+        'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references'])
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)
 
diff --git a/src/calibre/gui2/convert/txt_output.ui b/src/calibre/gui2/convert/txt_output.ui
index 75eac8f5fc..19e4ec52a1 100644
--- a/src/calibre/gui2/convert/txt_output.ui
+++ b/src/calibre/gui2/convert/txt_output.ui
@@ -6,7 +6,7 @@
    <rect>
     <x>0</x>
     <y>0</y>
-    <width>470</width>
+    <width>477</width>
     <height>300</height>
    </rect>
   </property>
@@ -27,7 +27,7 @@
    <item row="0" column="1">
     <widget class="QComboBox" name="opt_newline"/>
    </item>
-   <item row="6" column="0">
+   <item row="7" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -75,9 +75,16 @@
     </widget>
    </item>
    <item row="5" column="0">
-    <widget class="QCheckBox" name="opt_remove_links">
+    <widget class="QCheckBox" name="opt_keep_links">
      <property name="text">
-      <string>Remove links (&lt;a&gt; tags) before processing</string>
+      <string>Do not remove links (&lt;a&gt; tags) before processing</string>
+     </property>
+    </widget>
+   </item>
+   <item row="6" column="0">
+    <widget class="QCheckBox" name="opt_keep_image_references">
+     <property name="text">
+      <string>Do not remove image references before processing</string>
      </property>
     </widget>
    </item>

From 8032890d0f352034b95bdf0482fbbf34b1d9f22d Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 2 Dec 2010 07:37:38 -0500
Subject: [PATCH 6/6] TXT Output: Markdown output, ensure separation between
 sections.

---
 src/calibre/ebooks/txt/markdownml.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index 2f7960e94c..116561f355 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -28,16 +28,36 @@ class MarkdownMLizer(object):
 
     def mlize_spine(self):
         output = [u'']
+        
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
+            
             html = unicode(etree.tostring(item.data, encoding=unicode))
+            
             if not self.opts.keep_links:
                 html = re.sub(r'<\s*a[^>]*>', '', html)
                 html = re.sub(r'<\s*/\s*a\s*>', '', html)
             if not self.opts.keep_image_references:
                 html = re.sub(r'<\s*img[^>]*>', '', html)
                 html = re.sub(r'<\s*img\s*>', '', html)
-            output += html2text(html)
+            
+            text = html2text(html)
+        
+            # Ensure the section ends with at least two new line characters.
+            # This is to prevent the last paragraph from a section being
+            # combined into the fist paragraph of the next.
+            end_chars = text[-4:]
+            # Convert all newlines to \n
+            end_chars = end_chars.replace('\r\n', '\n')
+            end_chars = end_chars.replace('\r', '\n')
+            end_chars = end_chars[-2:]
+            if not end_chars[1] == '\n':
+                text += '\n\n'
+            if end_chars[1] == '\n' and not end_chars[0] == '\n':
+                text += '\n'
+            
+            output += text
+            
         output = u''.join(output)
 
         return output