From adcf2a0cb66287623572df6a3109a4e6f7cb39a4 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 4 Sep 2011 12:56:01 -0400
Subject: [PATCH 1/7] Markdown Output: Fix issues with pre tags.

---
 src/calibre/ebooks/txt/markdownml.py | 53 ++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index 087877e78f..8a111670c5 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -55,10 +55,15 @@ class MarkdownMLizer(OEB2HTML):
     def tidy_up(self, text):
         # Remove blank space form beginning of paragraph.
         text = re.sub('(?msu)^[ ]{1,3}', '', text)
+        # pre has 4 spaces. We trimmed 3 so anything with a space left is a pre.
+        text = re.sub('(?msu)^[ ]', '    ', text)
+        
         # Remove spaces from blank lines.
         text = re.sub('(?msu)^[ ]+$', '', text)
+        
         # Reduce blank lines
         text = re.sub('(?msu)\n{7,}', '\n' * 6, text)
+        
         # Remove blank lines at beginning and end of document.
         text = re.sub('^\s*', '', text)
         text = re.sub('\s*$', '\n\n', text)
@@ -80,6 +85,12 @@ class MarkdownMLizer(OEB2HTML):
     def prepare_string_for_markdown(self, txt):
         txt = re.sub(r'([\\`*_{}\[\]()#+!])', r'\\\1', txt)
         return txt
+    
+    def prepare_string_for_pre(self, txt):
+        new_text = []
+        for l in txt.splitlines():
+            new_text.append('    ' + l)
+        return '\n'.join(new_text)
 
     def dump_text(self, elem, stylizer):
         '''
@@ -97,7 +108,7 @@ class MarkdownMLizer(OEB2HTML):
             return ['']
 
         # Setup our variables.
-        text = ['']
+        text = []
         style = stylizer.style(elem)
         tags = []
         tag = barename(elem.tag)
@@ -143,29 +154,41 @@ class MarkdownMLizer(OEB2HTML):
             self.blockquotes += 1
             tags.append('>')
             text.append('> ' * self.blockquotes)
-        elif tag in ('code', 'pre'):
-            self.in_pre = True
-            text.append('    ')
+        elif tag == 'code':
+            if not self.in_pre:
+                text.append('`')
+                tags.append('`')
+        elif tag == 'pre':
+            if not self.in_pre:
+                text.append('\n')
+                tags.append('pre')
+                self.in_pre = True
         elif tag == 'hr':
             text.append('\n* * *')
             tags.append('\n')
         elif tag == 'a':
             # Only write links with absolute (external) urls.
-            if attribs.has_key('href') and '://' in attribs['href']:
+            if self.opts.keep_links and attribs.has_key('href') and '://' in attribs['href']:
                 title = ''
                 if attribs.has_key('title'):
-                    title = ' "' + attribs['title'] + '" '
+                    title = ' "' + attribs['title'] + '"'
+                    remove_space = self.remove_space_after_newline
+                    title = self.remove_newlines(title)
+                    self.remove_space_after_newline = remove_space
                 text.append('[')
                 tags.append('](' + attribs['href'] + title + ')')
         elif tag == 'img':
             if self.opts.keep_image_references:
                 txt = '!'
                 if attribs.has_key('alt'):
-                    txt += '[' + attribs['alt'] + ']'
+                    remove_space = self.remove_space_after_newline
+                    txt += '[' + self.remove_newlines(attribs['alt']) + ']'
+                    self.remove_space_after_newline = remove_space
                 txt += '(' + attribs['src'] + ')'
                 text.append(txt)
         elif tag in ('ol', 'ul'):
             self.list.append({'name': tag, 'num': 0})
+            tags.append(tag)
         elif tag == 'li':
             if self.list:
                 li = self.list[-1]
@@ -182,7 +205,9 @@ class MarkdownMLizer(OEB2HTML):
         # Process tags that contain text.
         if hasattr(elem, 'text') and elem.text:
             txt = elem.text
-            if not self.in_pre:
+            if self.in_pre:
+                txt = self.prepare_string_for_pre(txt)
+            else:
                 txt = self.prepare_string_for_markdown(self.remove_newlines(txt))
             text.append(txt)
 
@@ -193,16 +218,12 @@ class MarkdownMLizer(OEB2HTML):
         # Close all open tags.
         tags.reverse()
         for t in tags:
-            if t in ('pre', 'ul', 'ol', 'li', '>', 'block'):
+            if t in ('pre', 'ul', 'ol', 'li', '>'):
                 if t == 'pre':
                     self.in_pre = False
+                    text.append('\n')
                 elif t == '>':
                     self.blockquotes -= 1
-                elif t == 'block':
-                    if self.style_bold:
-                        text.append('**')
-                    if self.style_italic:
-                        text.append('*')
                 elif t in ('ul', 'ol'):
                     if self.list:
                         self.list.pop()
@@ -224,7 +245,9 @@ class MarkdownMLizer(OEB2HTML):
         # Add the text that is outside of the tag.
         if hasattr(elem, 'tail') and elem.tail:
             tail = elem.tail
-            if not self.in_pre:
+            if self.in_pre:
+                tail = self.prepare_string_for_pre(tail)
+            else:
                 tail = self.prepare_string_for_markdown(self.remove_newlines(tail))
             text.append(tail)
 

From 8a689cf3b6172157351213e86da0d06ed06b450e Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 4 Sep 2011 13:03:52 -0400
Subject: [PATCH 2/7] Markdown Output: Don't escape special characters in code
 blocks.

---
 src/calibre/ebooks/txt/markdownml.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index 8a111670c5..7765736024 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -22,6 +22,7 @@ class MarkdownMLizer(OEB2HTML):
     def extract_content(self, oeb_book, opts):
         self.log.info('Converting XHTML to Markdown formatted TXT...')
         self.opts = opts
+        self.in_code = False
         self.in_pre = False
         self.list = []
         self.blockquotes = 0
@@ -158,6 +159,7 @@ class MarkdownMLizer(OEB2HTML):
             if not self.in_pre:
                 text.append('`')
                 tags.append('`')
+                self.in_code = True
         elif tag == 'pre':
             if not self.in_pre:
                 text.append('\n')
@@ -207,6 +209,8 @@ class MarkdownMLizer(OEB2HTML):
             txt = elem.text
             if self.in_pre:
                 txt = self.prepare_string_for_pre(txt)
+            elif self.in_code:
+                txt = self.remove_newlines(txt)
             else:
                 txt = self.prepare_string_for_markdown(self.remove_newlines(txt))
             text.append(txt)
@@ -234,6 +238,8 @@ class MarkdownMLizer(OEB2HTML):
                     self.style_bold = False
                 elif t == '*':
                     self.style_italic = False
+                elif t == '`':
+                    self.in_code = False
                 text.append('%s' % t)
 
         # Soft scene breaks.
@@ -247,6 +253,8 @@ class MarkdownMLizer(OEB2HTML):
             tail = elem.tail
             if self.in_pre:
                 tail = self.prepare_string_for_pre(tail)
+            elif self.in_code:
+                tail = self.remove_newlines(tail)
             else:
                 tail = self.prepare_string_for_markdown(self.remove_newlines(tail))
             text.append(tail)

From 22e1a293cc19fe127156f28657727f1b5d4a83ce Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 4 Sep 2011 13:16:16 -0400
Subject: [PATCH 3/7] Markdown Output: More code block handling fixes.

---
 src/calibre/ebooks/txt/markdownml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index 7765736024..1f974fda60 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -156,7 +156,7 @@ class MarkdownMLizer(OEB2HTML):
             tags.append('>')
             text.append('> ' * self.blockquotes)
         elif tag == 'code':
-            if not self.in_pre:
+            if not self.in_pre and not self.in_code:
                 text.append('`')
                 tags.append('`')
                 self.in_code = True

From ea3837a3e89d6fe8dd617d0acd30535a90c99810 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 4 Sep 2011 16:31:41 -0400
Subject: [PATCH 4/7] Markdown Output: List fixes.

---
 src/calibre/ebooks/txt/markdownml.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index 1f974fda60..c87836bf7d 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -77,7 +77,7 @@ class MarkdownMLizer(OEB2HTML):
         text = text.replace('\r', ' ')
         # Condense redundant spaces created by replacing newlines with spaces.
         text = re.sub(r'[ ]{2,}', ' ', text)
-        text = re.sub(r'\t+', '', text)
+        #text = re.sub(r'\t+', '', text)
         if self.remove_space_after_newline == True:
             text = re.sub(r'^ +', '', text)
             self.remove_space_after_newline = False
@@ -189,20 +189,30 @@ class MarkdownMLizer(OEB2HTML):
                 txt += '(' + attribs['src'] + ')'
                 text.append(txt)
         elif tag in ('ol', 'ul'):
-            self.list.append({'name': tag, 'num': 0})
             tags.append(tag)
+            # Add the list to our lists of lists so we can track
+            # nested lists.
+            self.list.append({'name': tag, 'num': 0})
         elif tag == 'li':
+            # Get the last list from our list of lists
             if self.list:
                 li = self.list[-1]
             else:
                 li = {'name': 'ul', 'num': 0}
+            # Add a new line to start the item
             text.append('\n')
+            # Add indent if we have nested lists.
+            list_count = len(self.list)
+            # We only care about indenting nested lists.
+            if (list_count - 1) > 0:
+                text.append('\t' * (list_count - 1))
+            # Add blockquote if we have a blockquote in a list item.
             text.append(bq)
+            # Write the proper sign for ordered and unorded lists.
             if li['name'] == 'ul':
                 text.append('+ ')
             elif li['name'] == 'ol':
                 text.append(unicode(len(self.list)) + '. ')
-            tags.append('')
 
         # Process tags that contain text.
         if hasattr(elem, 'text') and elem.text:
@@ -222,7 +232,7 @@ class MarkdownMLizer(OEB2HTML):
         # Close all open tags.
         tags.reverse()
         for t in tags:
-            if t in ('pre', 'ul', 'ol', 'li', '>'):
+            if t in ('pre', 'ul', 'ol', '>'):
                 if t == 'pre':
                     self.in_pre = False
                     text.append('\n')

From 4a6a013bef48a450344e37cdd5733d1c9f2f9c0f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 4 Sep 2011 17:07:45 -0400
Subject: [PATCH 5/7] Markdown Output: Nested list fixes.

---
 src/calibre/ebooks/txt/markdownml.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index c87836bf7d..30e2d1d7be 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -59,6 +59,19 @@ class MarkdownMLizer(OEB2HTML):
         # pre has 4 spaces. We trimmed 3 so anything with a space left is a pre.
         text = re.sub('(?msu)^[ ]', '    ', text)
         
+        # Remove tabs that aren't at the beinning of a line
+        new_text = []
+        for l in text.splitlines():
+            start = re.match('\t+', l)
+            if start:
+                start = start.group()
+            else:
+                start = ''
+            l = re.sub('\t', '', l)
+            new_text.append(start + l)
+        text = '\n'.join(new_text)
+        print(text)
+        
         # Remove spaces from blank lines.
         text = re.sub('(?msu)^[ ]+$', '', text)
         
@@ -77,7 +90,7 @@ class MarkdownMLizer(OEB2HTML):
         text = text.replace('\r', ' ')
         # Condense redundant spaces created by replacing newlines with spaces.
         text = re.sub(r'[ ]{2,}', ' ', text)
-        #text = re.sub(r'\t+', '', text)
+        text = re.sub(r'\t+', '', text)
         if self.remove_space_after_newline == True:
             text = re.sub(r'^ +', '', text)
             self.remove_space_after_newline = False
@@ -212,7 +225,8 @@ class MarkdownMLizer(OEB2HTML):
             if li['name'] == 'ul':
                 text.append('+ ')
             elif li['name'] == 'ol':
-                text.append(unicode(len(self.list)) + '. ')
+                li['num'] += 1
+                text.append(unicode(li['num']) + '. ')
 
         # Process tags that contain text.
         if hasattr(elem, 'text') and elem.text:
@@ -241,8 +255,7 @@ class MarkdownMLizer(OEB2HTML):
                 elif t in ('ul', 'ol'):
                     if self.list:
                         self.list.pop()
-                    if not self.list:
-                        text.append('\n')
+                    text.append('\n')
             else:
                 if t == '**':
                     self.style_bold = False

From cf3b7f85cec0371f1d8c1995ff9e780c7c81d63e Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 4 Sep 2011 18:18:26 -0400
Subject: [PATCH 6/7] Markdown Output: Remove left over print statement from
 debugging.

---
 src/calibre/ebooks/txt/markdownml.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index 30e2d1d7be..878633add3 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -70,7 +70,6 @@ class MarkdownMLizer(OEB2HTML):
             l = re.sub('\t', '', l)
             new_text.append(start + l)
         text = '\n'.join(new_text)
-        print(text)
         
         # Remove spaces from blank lines.
         text = re.sub('(?msu)^[ ]+$', '', text)

From 123991aea51113193ce6af1261831bebeeaa019b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 5 Sep 2011 07:36:02 -0400
Subject: [PATCH 7/7] Fix for issue #816616: PDF Output Too many open files.

---
 src/calibre/ebooks/pdf/writer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py
index dc7f2edba9..ebe6533419 100644
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@@ -198,9 +198,10 @@ class PDFWriter(QObject): # {{{
         try:
             outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
             for item in self.combine_queue:
-                inputPDF = PdfFileReader(open(item, 'rb'))
-                for page in inputPDF.pages:
-                    outPDF.addPage(page)
+                with open(item, 'rb') as item_stream:
+                    inputPDF = PdfFileReader(item_stream)
+                    for page in inputPDF.pages:
+                        outPDF.addPage(page)
             outPDF.write(self.out_stream)
         finally:
             self._delete_tmpdir()