...

2025-07-09 03:04:10 -04:00 · 2011-01-06 19:25:52 -07:00 · 2011-01-06 19:25:52 -07:00 · 0e7eab2d1e
commit 0e7eab2d1e
parent 332c80aa54 c7332d3651
8 changed files with 38 additions and 134 deletions
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -51,16 +51,16 @@ def chap_head(match):
    chap = match.group('chap')
    title = match.group('title')
    if not title:
-               return '<h1>'+chap+'</h1><br/>\n'
+        return '<h1>'+chap+'</h1><br/>\n'
    else:
-               return '<h1>'+chap+'</h1>\n<h3>'+title+'</h3>\n'
+        return '<h1>'+chap+'</h1>\n<h3>'+title+'</h3>\n'
 def wrap_lines(match):
    ital = match.group('ital')
    if not ital:
-               return ' '
+        return ' '
    else:
-               return ital+' '
+        return ital+' '
 class DocAnalysis(object):
    '''
@ -191,7 +191,7 @@ class Dehyphenator(object):
        dehyphenated = unicode(firsthalf) + unicode(secondhalf)
        lookupword = self.removesuffixes.sub('', dehyphenated)
        if self.prefixes.match(firsthalf) is None:
-           lookupword = self.removeprefix.sub('', lookupword)
+            lookupword = self.removeprefix.sub('', lookupword)
        #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
        try:
            searchresult = self.html.find(lookupword.lower())
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -113,24 +113,24 @@ class PreProcessor(object):
        # some lit files don't have any <p> tags or equivalent (generally just plain text between
        # <pre> tags), check and  mark up line endings if required before proceeding
        if self.no_markup(html, 0.1):
-             self.log("not enough paragraph markers, adding now")
+            self.log("not enough paragraph markers, adding now")
-             # check if content is in pre tags, use txt processor to mark up if so
+            # check if content is in pre tags, use txt processor to mark up if so
-             pre = re.compile(r'<pre>', re.IGNORECASE)
+            pre = re.compile(r'<pre>', re.IGNORECASE)
-             if len(pre.findall(html)) == 1:
+            if len(pre.findall(html)) == 1:
-                 self.log("Running Text Processing")
+                self.log("Running Text Processing")
-                 from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
+                from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
-                 separate_paragraphs_single_line
+                separate_paragraphs_single_line
-                 outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL)
+                outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL)
-                 html = outerhtml.sub('\g<text>', html)
+                html = outerhtml.sub('\g<text>', html)
-                 html = separate_paragraphs_single_line(html)
+                html = separate_paragraphs_single_line(html)
-                 html = preserve_spaces(html)
+                html = preserve_spaces(html)
-                 html = convert_basic(html, epub_split_size_kb=0)
+                html = convert_basic(html, epub_split_size_kb=0)
-             else:
+            else:
-                 # Add markup naively
+                # Add markup naively
-                 # TODO - find out if there are cases where there are more than one <pre> tag or
+                # TODO - find out if there are cases where there are more than one <pre> tag or
-                 # other types of unmarked html and handle them in some better fashion
+                # other types of unmarked html and handle them in some better fashion
-                 add_markup = re.compile('(?<!>)(\n)')
+                add_markup = re.compile('(?<!>)(\n)')
-                 html = add_markup.sub('</p>\n<p>', html)
+                html = add_markup.sub('</p>\n<p>', html)
        ###### Mark Indents/Cleanup ######
        #
@ -164,8 +164,8 @@ class PreProcessor(object):
                self.log("deleting blank lines")
                html = blankreg.sub('', html)
            elif float(len(blanklines)) / float(len(lines)) > 0.40:
-               blanks_between_paragraphs = True
+                blanks_between_paragraphs = True
-               #print "blanks between paragraphs is marked True"
+                #print "blanks between paragraphs is marked True"
            else:
                blanks_between_paragraphs = False
        #self.dump(html, 'before_chapter_markup')
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -173,7 +173,7 @@ class FB2MLizer(object):
            if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml':
                self.oeb_book.spine.insert(0, title_item, True)
        # Create xhtml page to reference cover image so it can be used.
-        if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
+        if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
            id = unicode(self.oeb_book.metadata.cover[0])
            cover_item = self.oeb_book.manifest.ids[id]
            if cover_item.media_type in OEB_RASTER_IMAGES:
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 import os
-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
 from calibre.ebooks.conversion.utils import PreProcessor
@ -18,30 +18,6 @@ class PDBInput(InputFormatPlugin):
    description = 'Convert PDB to HTML'
    file_types  = set(['pdb'])
    options = set([
        OptionRecommendation(name='paragraph_type', recommended_value='auto',
            choices=['auto', 'block', 'single', 'print'],
            help=_('Paragraph structure.\n'
                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
                   '* auto: Try to auto detect paragraph type.\n'
                   '* block: Treat a blank line as a paragraph break.\n'
                   '* single: Assume every line is a paragraph.\n'
                   '* print:  Assume every line starting with 2+ spaces or a tab '
                   'starts a paragraph.')),
        OptionRecommendation(name='formatting_type', recommended_value='auto',
            choices=['auto', 'none', 'markdown'],
            help=_('Formatting used within the document.'
                   '* auto: Try to auto detect the document formatting.\n'
                   '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
                   '* markdown: Run the input though the markdown pre-processor. '
                   'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
        OptionRecommendation(name='preserve_spaces', recommended_value=False,
            help=_('Normally extra spaces are condensed into a single space. '
                'With this option all spaces will be displayed.')),
        OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
            help=_('Do not insert a Table of Contents into the output text.')),
    ])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        header = PdbHeaderReader(stream)
@ -60,4 +36,4 @@ class PDBInput(InputFormatPlugin):
    def preprocess_html(self, options, html):
        self.options = options
        preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
-        return preprocessor(html)
+        return preprocessor(html)
--- a/src/calibre/ebooks/pdb/pdf/reader.py
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@ -19,9 +19,6 @@ class Reader(FormatReader):
        self.stream = stream
        self.log = log
        self.options = options
        setattr(self.options, 'new_pdf_engine', False)
        setattr(self.options, 'no_images', False)
        setattr(self.options, 'unwrap_factor', 0.45)
    def extract_content(self, output_dir):
        self.log.info('Extracting PDF...')
@ -31,7 +28,12 @@ class Reader(FormatReader):
            for x in xrange(self.header.section_count()):
                pdf.write(self.header.section_data(x))
-            from calibre.customize.ui import plugin_for_input_format
+        from calibre.customize.ui import plugin_for_input_format
-            pdf.seek(0)
+
-            return plugin_for_input_format('pdf').convert(pdf, self.options,
+        pdf_plugin = plugin_for_input_format('pdf')
-                'pdf', self.log, [])
+        for option in pdf_plugin.options:
            if not hasattr(self.options, option.option.name):
                setattr(self.options, option.name, option.recommended_value)
        pdf.seek(0)
        return pdf_plugin.convert(pdf, self.options, 'pdf', self.log, {})
--- a/src/calibre/ebooks/tcr/input.py
+++ b/src/calibre/ebooks/tcr/input.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 from cStringIO import StringIO
-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.compression.tcr import decompress
 class TCRInput(InputFormatPlugin):
@ -16,30 +16,6 @@ class TCRInput(InputFormatPlugin):
    description = 'Convert TCR files to HTML'
    file_types  = set(['tcr'])
    options = set([
        OptionRecommendation(name='paragraph_type', recommended_value='auto',
            choices=['auto', 'block', 'single', 'print'],
            help=_('Paragraph structure.\n'
                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
                   '* auto: Try to auto detect paragraph type.\n'
                   '* block: Treat a blank line as a paragraph break.\n'
                   '* single: Assume every line is a paragraph.\n'
                   '* print:  Assume every line starting with 2+ spaces or a tab '
                   'starts a paragraph.')),
        OptionRecommendation(name='formatting_type', recommended_value='auto',
            choices=['auto', 'none', 'markdown'],
            help=_('Formatting used within the document.'
                   '* auto: Try to auto detect the document formatting.\n'
                   '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
                   '* markdown: Run the input though the markdown pre-processor. '
                   'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
        OptionRecommendation(name='preserve_spaces', recommended_value=False,
            help=_('Normally extra spaces are condensed into a single space. '
                'With this option all spaces will be displayed.')),
        OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
            help=_('Do not insert a Table of Contents into the output text.')),
    ])
    def convert(self, stream, options, file_ext, log, accelerators):
        log.info('Decompressing text...')
        raw_txt = decompress(stream)
--- a/src/calibre/gui2/convert/pdb_input.py
+++ b/src/calibre/gui2/convert/pdb_input.py
@ -1,25 +0,0 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2.convert.txt_input_ui import Ui_Form
 from calibre.gui2.convert import Widget
 class PluginWidget(Widget, Ui_Form):
    TITLE = _('PDB Input')
    HELP = _('Options specific to')+' PDB '+_('input')
    COMMIT_NAME = 'pdb_input'
    ICON = I('mimetypes/txt.png')
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
            ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
        self.db, self.book_id = db, book_id
        for x in get_option('paragraph_type').option.choices:
            self.opt_paragraph_type.addItem(x)
        for x in get_option('formatting_type').option.choices:
            self.opt_formatting_type.addItem(x)
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/tcr_input.py
+++ b/src/calibre/gui2/convert/tcr_input.py
@ -1,25 +0,0 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2.convert.txt_input_ui import Ui_Form
 from calibre.gui2.convert import Widget
 class PluginWidget(Widget, Ui_Form):
    TITLE = _('TCR Input')
    HELP = _('Options specific to')+' TCR '+_('input')
    COMMIT_NAME = 'tcr_input'
    ICON = I('mimetypes/txt.png')
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
            ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
        self.db, self.book_id = db, book_id
        for x in get_option('paragraph_type').option.choices:
            self.opt_paragraph_type.addItem(x)
        for x in get_option('formatting_type').option.choices:
            self.opt_formatting_type.addItem(x)
        self.initialize_options(get_option, get_help, db, book_id)