From 7343d48a37227ed8e9093e2ccf4aead31aac614d Mon Sep 17 00:00:00 2001
From: John Schember
Date: Thu, 6 Jan 2011 20:04:11 -0500
Subject: [PATCH 1/4] Remove PDB and TCR input options. TXT auto options are
default and should suffice.
---
src/calibre/ebooks/fb2/fb2ml.py | 2 +-
src/calibre/ebooks/pdb/input.py | 24 ------------------------
src/calibre/ebooks/tcr/input.py | 24 ------------------------
src/calibre/gui2/convert/pdb_input.py | 25 -------------------------
src/calibre/gui2/convert/tcr_input.py | 25 -------------------------
5 files changed, 1 insertion(+), 99 deletions(-)
delete mode 100644 src/calibre/gui2/convert/pdb_input.py
delete mode 100644 src/calibre/gui2/convert/tcr_input.py
diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index f6deab677a..4dd6e7c7ae 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -173,7 +173,7 @@ class FB2MLizer(object):
if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml':
self.oeb_book.spine.insert(0, title_item, True)
# Create xhtml page to reference cover image so it can be used.
- if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
+ if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
id = unicode(self.oeb_book.metadata.cover[0])
cover_item = self.oeb_book.manifest.ids[id]
if cover_item.media_type in OEB_RASTER_IMAGES:
diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py
index 3688abff3f..8c754782a2 100644
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@@ -18,30 +18,6 @@ class PDBInput(InputFormatPlugin):
description = 'Convert PDB to HTML'
file_types = set(['pdb'])
- options = set([
- OptionRecommendation(name='paragraph_type', recommended_value='auto',
- choices=['auto', 'block', 'single', 'print'],
- help=_('Paragraph structure.\n'
- 'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
- '* auto: Try to auto detect paragraph type.\n'
- '* block: Treat a blank line as a paragraph break.\n'
- '* single: Assume every line is a paragraph.\n'
- '* print: Assume every line starting with 2+ spaces or a tab '
- 'starts a paragraph.')),
- OptionRecommendation(name='formatting_type', recommended_value='auto',
- choices=['auto', 'none', 'markdown'],
- help=_('Formatting used within the document.'
- '* auto: Try to auto detect the document formatting.\n'
- '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
- '* markdown: Run the input though the markdown pre-processor. '
- 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
- OptionRecommendation(name='preserve_spaces', recommended_value=False,
- help=_('Normally extra spaces are condensed into a single space. '
- 'With this option all spaces will be displayed.')),
- OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
- help=_('Do not insert a Table of Contents into the output text.')),
- ])
-
def convert(self, stream, options, file_ext, log,
accelerators):
header = PdbHeaderReader(stream)
diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/tcr/input.py
index c1dcef235d..4c759c5be2 100644
--- a/src/calibre/ebooks/tcr/input.py
+++ b/src/calibre/ebooks/tcr/input.py
@@ -16,30 +16,6 @@ class TCRInput(InputFormatPlugin):
description = 'Convert TCR files to HTML'
file_types = set(['tcr'])
- options = set([
- OptionRecommendation(name='paragraph_type', recommended_value='auto',
- choices=['auto', 'block', 'single', 'print'],
- help=_('Paragraph structure.\n'
- 'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
- '* auto: Try to auto detect paragraph type.\n'
- '* block: Treat a blank line as a paragraph break.\n'
- '* single: Assume every line is a paragraph.\n'
- '* print: Assume every line starting with 2+ spaces or a tab '
- 'starts a paragraph.')),
- OptionRecommendation(name='formatting_type', recommended_value='auto',
- choices=['auto', 'none', 'markdown'],
- help=_('Formatting used within the document.'
- '* auto: Try to auto detect the document formatting.\n'
- '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
- '* markdown: Run the input though the markdown pre-processor. '
- 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
- OptionRecommendation(name='preserve_spaces', recommended_value=False,
- help=_('Normally extra spaces are condensed into a single space. '
- 'With this option all spaces will be displayed.')),
- OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
- help=_('Do not insert a Table of Contents into the output text.')),
- ])
-
def convert(self, stream, options, file_ext, log, accelerators):
log.info('Decompressing text...')
raw_txt = decompress(stream)
diff --git a/src/calibre/gui2/convert/pdb_input.py b/src/calibre/gui2/convert/pdb_input.py
deleted file mode 100644
index 16ff1ff236..0000000000
--- a/src/calibre/gui2/convert/pdb_input.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# -*- coding: utf-8 -*-
-
-__license__ = 'GPL 3'
-__copyright__ = '2011, John Schember '
-__docformat__ = 'restructuredtext en'
-
-from calibre.gui2.convert.txt_input_ui import Ui_Form
-from calibre.gui2.convert import Widget
-
-class PluginWidget(Widget, Ui_Form):
-
- TITLE = _('PDB Input')
- HELP = _('Options specific to')+' PDB '+_('input')
- COMMIT_NAME = 'pdb_input'
- ICON = I('mimetypes/txt.png')
-
- def __init__(self, parent, get_option, get_help, db=None, book_id=None):
- Widget.__init__(self, parent,
- ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
- self.db, self.book_id = db, book_id
- for x in get_option('paragraph_type').option.choices:
- self.opt_paragraph_type.addItem(x)
- for x in get_option('formatting_type').option.choices:
- self.opt_formatting_type.addItem(x)
- self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/tcr_input.py b/src/calibre/gui2/convert/tcr_input.py
deleted file mode 100644
index 366643ad5b..0000000000
--- a/src/calibre/gui2/convert/tcr_input.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# -*- coding: utf-8 -*-
-
-__license__ = 'GPL 3'
-__copyright__ = '2011, John Schember '
-__docformat__ = 'restructuredtext en'
-
-from calibre.gui2.convert.txt_input_ui import Ui_Form
-from calibre.gui2.convert import Widget
-
-class PluginWidget(Widget, Ui_Form):
-
- TITLE = _('TCR Input')
- HELP = _('Options specific to')+' TCR '+_('input')
- COMMIT_NAME = 'tcr_input'
- ICON = I('mimetypes/txt.png')
-
- def __init__(self, parent, get_option, get_help, db=None, book_id=None):
- Widget.__init__(self, parent,
- ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
- self.db, self.book_id = db, book_id
- for x in get_option('paragraph_type').option.choices:
- self.opt_paragraph_type.addItem(x)
- for x in get_option('formatting_type').option.choices:
- self.opt_formatting_type.addItem(x)
- self.initialize_options(get_option, get_help, db, book_id)
From 1786820728f1d69d2f5c5bf2ffd4d8f50f4b0219 Mon Sep 17 00:00:00 2001
From: John Schember
Date: Thu, 6 Jan 2011 20:07:09 -0500
Subject: [PATCH 2/4] PDB PDF Input: Dynamically set options based on PDF
plugin.
---
src/calibre/ebooks/pdb/pdf/reader.py | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py
index c151551866..30b0c4c57c 100644
--- a/src/calibre/ebooks/pdb/pdf/reader.py
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@@ -19,9 +19,6 @@ class Reader(FormatReader):
self.stream = stream
self.log = log
self.options = options
- setattr(self.options, 'new_pdf_engine', False)
- setattr(self.options, 'no_images', False)
- setattr(self.options, 'unwrap_factor', 0.45)
def extract_content(self, output_dir):
self.log.info('Extracting PDF...')
@@ -31,7 +28,12 @@ class Reader(FormatReader):
for x in xrange(self.header.section_count()):
pdf.write(self.header.section_data(x))
- from calibre.customize.ui import plugin_for_input_format
- pdf.seek(0)
- return plugin_for_input_format('pdf').convert(pdf, self.options,
- 'pdf', self.log, [])
+ from calibre.customize.ui import plugin_for_input_format
+
+ pdf_plugin = plugin_for_input_format('pdf')
+ for option in pdf_plugin.options:
+ if not hasattr(self.options, option.option.name):
+ setattr(self.options, option.name, option.recommended_value)
+
+ pdf.seek(0)
+ return pdf_plugin.convert(pdf, self.options, 'pdf', self.log, {})
From b5599f8ff2a9006d4312a9c88451afaf6001e41d Mon Sep 17 00:00:00 2001
From: John Schember
Date: Thu, 6 Jan 2011 20:51:28 -0500
Subject: [PATCH 3/4] Fix indents.
---
src/calibre/ebooks/conversion/preprocess.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 3ff816b3bf..9a27274dd8 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -51,16 +51,16 @@ def chap_head(match):
chap = match.group('chap')
title = match.group('title')
if not title:
- return ''+chap+'
\n'
+ return ''+chap+'
\n'
else:
- return ''+chap+'
\n'+title+'
\n'
+ return ''+chap+'
\n'+title+'
\n'
def wrap_lines(match):
ital = match.group('ital')
if not ital:
- return ' '
+ return ' '
else:
- return ital+' '
+ return ital+' '
class DocAnalysis(object):
'''
@@ -191,7 +191,7 @@ class Dehyphenator(object):
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
lookupword = self.removesuffixes.sub('', dehyphenated)
if self.prefixes.match(firsthalf) is None:
- lookupword = self.removeprefix.sub('', lookupword)
+ lookupword = self.removeprefix.sub('', lookupword)
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
try:
searchresult = self.html.find(lookupword.lower())
From c7332d3651a54bf3d9a5890f08af0c6de6776acb Mon Sep 17 00:00:00 2001
From: John Schember
Date: Thu, 6 Jan 2011 20:57:03 -0500
Subject: [PATCH 4/4] Fix indents.
---
src/calibre/ebooks/conversion/utils.py | 40 +++++++++++++-------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 11979b933c..a76ec8675d 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -113,24 +113,24 @@ class PreProcessor(object):
# some lit files don't have any tags or equivalent (generally just plain text between
#
tags), check and mark up line endings if required before proceeding
if self.no_markup(html, 0.1):
- self.log("not enough paragraph markers, adding now")
- # check if content is in pre tags, use txt processor to mark up if so
- pre = re.compile(r'', re.IGNORECASE)
- if len(pre.findall(html)) == 1:
- self.log("Running Text Processing")
- from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
- separate_paragraphs_single_line
- outerhtml = re.compile(r'.*?(?<=)(?P.*)(?=
).*', re.IGNORECASE|re.DOTALL)
- html = outerhtml.sub('\g', html)
- html = separate_paragraphs_single_line(html)
- html = preserve_spaces(html)
- html = convert_basic(html, epub_split_size_kb=0)
- else:
- # Add markup naively
- # TODO - find out if there are cases where there are more than one tag or
- # other types of unmarked html and handle them in some better fashion
- add_markup = re.compile('(?)(\n)')
- html = add_markup.sub('
\n', html)
+ self.log("not enough paragraph markers, adding now")
+ # check if content is in pre tags, use txt processor to mark up if so
+ pre = re.compile(r'
', re.IGNORECASE)
+ if len(pre.findall(html)) == 1:
+ self.log("Running Text Processing")
+ from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
+ separate_paragraphs_single_line
+ outerhtml = re.compile(r'.*?(?<=)(?P.*)(?=
).*', re.IGNORECASE|re.DOTALL)
+ html = outerhtml.sub('\g', html)
+ html = separate_paragraphs_single_line(html)
+ html = preserve_spaces(html)
+ html = convert_basic(html, epub_split_size_kb=0)
+ else:
+ # Add markup naively
+ # TODO - find out if there are cases where there are more than one tag or
+ # other types of unmarked html and handle them in some better fashion
+ add_markup = re.compile('(?)(\n)')
+ html = add_markup.sub('\n', html)
###### Mark Indents/Cleanup ######
#
@@ -164,8 +164,8 @@ class PreProcessor(object):
self.log("deleting blank lines")
html = blankreg.sub('', html)
elif float(len(blanklines)) / float(len(lines)) > 0.40:
- blanks_between_paragraphs = True
- #print "blanks between paragraphs is marked True"
+ blanks_between_paragraphs = True
+ #print "blanks between paragraphs is marked True"
else:
blanks_between_paragraphs = False
#self.dump(html, 'before_chapter_markup')