diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index edef712bb4..bbb43af567 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -132,6 +132,9 @@ class CHMReader(CHMFile): lpath = os.path.join(output_dir, path) self._ensure_dir(lpath) data = self.GetFile(path) + if lpath.find(';') != -1: + # fix file names with ";" at the end, see _reformat() + lpath = lpath.split(';')[0] with open(lpath, 'wb') as f: if guess_mimetype(path)[0] == ('text/html'): data = self._reformat(data) @@ -158,14 +161,26 @@ class CHMReader(CHMFile): # cos they really fuck with the flow of things and generally waste space # since we can't use [a,b] syntax to select arbitrary items from a list # we'll have to do this manually... + # only remove the tables, if they have an image with an alt attribute + # containing prev, next or team t = soup('table') if t: if (t[0].previousSibling is None or t[0].previousSibling.previousSibling is None): - t[0].extract() + try: + alt = t[0].img['alt'].lower() + if alt.find('prev') != -1 or alt.find('next') != -1 or alt.find('team') != -1: + t[0].extract() + except: + pass if (t[-1].nextSibling is None or t[-1].nextSibling.nextSibling is None): - t[-1].extract() + try: + alt = t[-1].img['alt'].lower() + if alt.find('prev') != -1 or alt.find('next') != -1 or alt.find('team') != -1: + t[-1].extract() + except: + pass # for some very odd reason each page's content appears to be in a table # too. and this table has sub-tables for random asides... grr. @@ -185,8 +200,24 @@ class CHMReader(CHMFile): except KeyError: # and some don't even have a src= ?! pass - # now give back some pretty html. - return soup.prettify('utf-8') + try: + # if there is only a single table with a single element + # in the body, replace it by the contents of this single element + tables = soup.body.findAll('table', recursive=False) + if tables and len(tables) == 1: + trs = tables[0].findAll('tr', recursive=False) + if trs and len(trs) == 1: + tds = trs[0].findAll('td', recursive=False) + if tds and len(tds) == 1: + tdContents = tds[0].contents + tableIdx = soup.body.contents.index(tables[0]) + tables[0].extract() + while tdContents: + soup.body.insert(tableIdx, tdContents.pop()) + except: + pass + # do not prettify, it would reformat the
 tags!
+        return str(soup)
 
     def Contents(self):
         if self._contents is not None:
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 7be1a3449a..a0570c07ae 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -8,7 +8,8 @@ import os
 
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
-    separate_paragraphs_single_line, separate_paragraphs_print_formatted
+    separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
+    preserve_spaces
 
 class TXTInput(InputFormatPlugin):
 
@@ -28,6 +29,9 @@ class TXTInput(InputFormatPlugin):
                 'an indent (either a tab or 2+ spaces) represents a paragraph. '
                 'Paragraphs end when the next line that starts with an indent '
                 'is reached.')),
+        OptionRecommendation(name='preserve_spaces', recommended_value=False,
+            help=_('Normally extra spaces are condensed into a single space. '
+                'With this option all spaces will be displayed.')),
         OptionRecommendation(name='markdown', recommended_value=False,
             help=_('Run the text input through the markdown pre-processor. To '
                 'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
@@ -48,6 +52,8 @@ class TXTInput(InputFormatPlugin):
             txt = separate_paragraphs_single_line(txt)
         if options.print_formatted_paras:
             txt = separate_paragraphs_print_formatted(txt)
+        if options.preserve_spaces:
+            txt = preserve_spaces(txt)
 
         if options.markdown:
             log.debug('Running text though markdown conversion...')
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index 06276a4bbc..baebf2f298 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -24,6 +24,9 @@ def convert_basic(txt, title=''):
     for line in txt.splitlines():
         lines.append(line.strip())
     txt = '\n'.join(lines)
+    
+    # Condense redundant spaces
+    txt = re.sub('[ ]{2,}', ' ', txt)
 
     # Remove blank lines from the beginning and end of the document.
     txt = re.sub('^\s+(?=.)', '', txt)
@@ -56,6 +59,11 @@ def separate_paragraphs_print_formatted(txt):
     txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt)
     return txt
 
+def preserve_spaces(txt):
+    txt = txt.replace(' ', ' ')
+    txt = txt.replace('\t', '	')
+    return txt
+
 def opf_writer(path, opf_name, manifest, spine, mi):
     opf = OPFCreator(path, mi)
     opf.create_manifest(manifest)
diff --git a/src/calibre/gui2/convert/txt_input.py b/src/calibre/gui2/convert/txt_input.py
index 7fe6f6274c..f108bdd7d5 100644
--- a/src/calibre/gui2/convert/txt_input.py
+++ b/src/calibre/gui2/convert/txt_input.py
@@ -14,6 +14,7 @@ class PluginWidget(Widget, Ui_Form):
 
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent, 'txt_input',
-            ['single_line_paras', 'print_formatted_paras', 'markdown', 'markdown_disable_toc'])
+            ['single_line_paras', 'print_formatted_paras', 'markdown',
+                'markdown_disable_toc', 'preserve_spaces'])
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/txt_input.ui b/src/calibre/gui2/convert/txt_input.ui
index 9fde157d33..5a9527ebc5 100644
--- a/src/calibre/gui2/convert/txt_input.ui
+++ b/src/calibre/gui2/convert/txt_input.ui
@@ -6,7 +6,7 @@
    
     0
     0
-    400
+    470
     300
    
   
@@ -52,7 +52,7 @@
      
     
    
-   
+   
     
      
       Qt::Vertical
@@ -65,10 +65,17 @@
      
     
    
+   
+    
+     
+      Preserve &spaces
+     
+    
+   
   
  
  
-
+ 
   
    opt_markdown
    toggled(bool)