Pull from trunk

2025-08-30 23:00:21 -04:00 · 2010-05-30 22:51:09 -06:00 · 2010-05-30 22:51:09 -06:00 · 1af58e4f03
commit 1af58e4f03
parent f9edb21498 5f40023e1f
5 changed files with 62 additions and 9 deletions
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -132,6 +132,9 @@ class CHMReader(CHMFile):
            lpath = os.path.join(output_dir, path)
            self._ensure_dir(lpath)
            data = self.GetFile(path)
            if lpath.find(';') != -1:
                # fix file names with ";<junk>" at the end, see _reformat()
                lpath = lpath.split(';')[0]
            with open(lpath, 'wb') as f:
                if guess_mimetype(path)[0] == ('text/html'):
                    data = self._reformat(data)
@ -158,14 +161,26 @@ class CHMReader(CHMFile):
        # cos they really fuck with the flow of things and generally waste space
        # since we can't use [a,b] syntax to select arbitrary items from a list
        # we'll have to do this manually...
        # only remove the tables, if they have an image with an alt attribute
        # containing prev, next or team
        t = soup('table')
        if t:
            if (t[0].previousSibling is None
              or t[0].previousSibling.previousSibling is None):
-                t[0].extract()
+                try:
                    alt = t[0].img['alt'].lower()
                    if alt.find('prev') != -1 or alt.find('next') != -1 or alt.find('team') != -1:
                        t[0].extract()
                except:
                    pass
            if (t[-1].nextSibling is None
              or t[-1].nextSibling.nextSibling is None):
-                t[-1].extract()
+                try:
                    alt = t[-1].img['alt'].lower()
                    if alt.find('prev') != -1 or alt.find('next') != -1 or alt.find('team') != -1:
                        t[-1].extract()
                except:
                    pass
        # for some very odd reason each page's content appears to be in a table
        # too. and this table has sub-tables for random asides... grr.
@ -185,8 +200,24 @@ class CHMReader(CHMFile):
            except KeyError:
                # and some don't even have a src= ?!
                pass
-        # now give back some pretty html.
+        try:
-        return soup.prettify('utf-8')
+            # if there is only a single table with a single element
            # in the body, replace it by the contents of this single element
            tables = soup.body.findAll('table', recursive=False)
            if tables and len(tables) == 1:
                trs = tables[0].findAll('tr', recursive=False)
                if trs and len(trs) == 1:
                    tds = trs[0].findAll('td', recursive=False)
                    if tds and len(tds) == 1:
                        tdContents = tds[0].contents
                        tableIdx = soup.body.contents.index(tables[0])
                        tables[0].extract()
                        while tdContents:
                            soup.body.insert(tableIdx, tdContents.pop())
        except:
            pass
        # do not prettify, it would reformat the <pre> tags!
        return str(soup)
    def Contents(self):
        if self._contents is not None:
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -8,7 +8,8 @@ import os
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
-    separate_paragraphs_single_line, separate_paragraphs_print_formatted
+    separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
    preserve_spaces
 class TXTInput(InputFormatPlugin):
@ -28,6 +29,9 @@ class TXTInput(InputFormatPlugin):
                'an indent (either a tab or 2+ spaces) represents a paragraph. '
                'Paragraphs end when the next line that starts with an indent '
                'is reached.')),
        OptionRecommendation(name='preserve_spaces', recommended_value=False,
            help=_('Normally extra spaces are condensed into a single space. '
                'With this option all spaces will be displayed.')),
        OptionRecommendation(name='markdown', recommended_value=False,
            help=_('Run the text input through the markdown pre-processor. To '
                'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
@ -48,6 +52,8 @@ class TXTInput(InputFormatPlugin):
            txt = separate_paragraphs_single_line(txt)
        if options.print_formatted_paras:
            txt = separate_paragraphs_print_formatted(txt)
        if options.preserve_spaces:
            txt = preserve_spaces(txt)
        if options.markdown:
            log.debug('Running text though markdown conversion...')
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -24,6 +24,9 @@ def convert_basic(txt, title=''):
    for line in txt.splitlines():
        lines.append(line.strip())
    txt = '\n'.join(lines)
    # Condense redundant spaces
    txt = re.sub('[ ]{2,}', ' ', txt)
    # Remove blank lines from the beginning and end of the document.
    txt = re.sub('^\s+(?=.)', '', txt)
@ -56,6 +59,11 @@ def separate_paragraphs_print_formatted(txt):
    txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt)
    return txt
 def preserve_spaces(txt):
    txt = txt.replace(' ', '&nbsp;')
    txt = txt.replace('\t', '&#09;')
    return txt
 def opf_writer(path, opf_name, manifest, spine, mi):
    opf = OPFCreator(path, mi)
    opf.create_manifest(manifest)
--- a/src/calibre/gui2/convert/txt_input.py
+++ b/src/calibre/gui2/convert/txt_input.py
@ -14,6 +14,7 @@ class PluginWidget(Widget, Ui_Form):
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent, 'txt_input',
-            ['single_line_paras', 'print_formatted_paras', 'markdown', 'markdown_disable_toc'])
+            ['single_line_paras', 'print_formatted_paras', 'markdown',
                'markdown_disable_toc', 'preserve_spaces'])
        self.db, self.book_id = db, book_id
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/txt_input.ui
+++ b/src/calibre/gui2/convert/txt_input.ui
@ -6,7 +6,7 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>400</width>
+    <width>470</width>
    <height>300</height>
   </rect>
  </property>
@ -52,7 +52,7 @@
     </property>
    </widget>
   </item>
-   <item row="5" column="0">
+   <item row="6" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -65,10 +65,17 @@
     </property>
    </spacer>
   </item>
   <item row="5" column="0">
    <widget class="QCheckBox" name="opt_preserve_spaces">
     <property name="text">
      <string>Preserve &amp;spaces</string>
     </property>
    </widget>
   </item>
  </layout>
 </widget>
 <resources/>
-<connections>
+ <connections>
  <connection>
   <sender>opt_markdown</sender>
   <signal>toggled(bool)</signal>