From 041b8c14f2f45c070f98b50c54427d205085815d Mon Sep 17 00:00:00 2001 From: GRiker Date: Tue, 18 Jan 2011 03:17:28 -0700 Subject: [PATCH 01/28] GwR change capitalize() --- src/calibre/library/catalog.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 8edf266cfb..fe92486462 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -19,6 +19,7 @@ from calibre.ebooks.oeb.base import XHTML_NS from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.config import config_dir from calibre.utils.date import format_date, isoformat, now as nowf +from calibre.utils.icu import capitalize from calibre.utils.logging import default_log as log from calibre.utils.zipfile import ZipFile, ZipInfo from calibre.utils.magick.draw import thumbnail @@ -1459,7 +1460,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) # print # Build the unique_authors set from existing data - authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor] + authors = [(record['author'], capitalize(record['author_sort'])) for record in self.booksByAuthor] # authors[] contains a list of all book authors, with multiple entries for multiple books by author # authors[]: (([0]:friendly [1]:sort)) @@ -2756,7 +2757,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) this_book = {} this_book['author'] = book['author'] this_book['title'] = book['title'] - this_book['author_sort'] = book['author_sort'].capitalize() + this_book['author_sort'] = capitalize(book['author_sort']) this_book['read'] = book['read'] this_book['tags'] = book['tags'] this_book['id'] = book['id'] @@ -3901,14 +3902,14 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) Sort non-series books before series books ''' if not book['series']: - key = '%s %s' % (book['author_sort'].capitalize(), - book['title_sort'].capitalize()) + key = '%s %s' % (capitalize(book['author_sort']), + capitalize(book['title_sort'])) else: index = book['series_index'] integer = int(index) fraction = index-integer series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0')) - key = '%s ~%s %s' % (book['author_sort'].capitalize(), + key = '%s ~%s %s' % (capitalize(book['author_sort']), self.generateSortTitle(book['series']), series_index) return key @@ -3919,7 +3920,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) ''' if not book['series']: key = '%s %s' % (self.author_to_author_sort(book['author']), - book['title_sort'].capitalize()) + capitalize(book['title_sort'])) else: index = book['series_index'] integer = int(index) @@ -4570,7 +4571,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) if self.letter_or_symbol(word[0]) != word[0]: if word[0] > 'A' or (ord('9') < ord(word[0]) < ord('A')) : translated.append('/') - translated.append(word.capitalize()) + translated.append(capitalize(word)) else: if re.search('[0-9]+',word[0]): From fc2ae0d4b54dfaa4c81bddfd080a3989a3aa63fd Mon Sep 17 00:00:00 2001 From: GRiker Date: Tue, 18 Jan 2011 06:21:17 -0700 Subject: [PATCH 02/28] GwR revisions to catalog generator --- resources/catalog/section_list_templates.py | 3 ++ src/calibre/library/catalog.py | 40 +++++++++------------ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/resources/catalog/section_list_templates.py b/resources/catalog/section_list_templates.py index de73147fcf..7f92fad6ac 100644 --- a/resources/catalog/section_list_templates.py +++ b/resources/catalog/section_list_templates.py @@ -6,6 +6,8 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' ''' + These templates control the content of titles displayed in the various sections + Available fields: {title} Title of the book {series} Series name @@ -14,6 +16,7 @@ __docformat__ = 'restructuredtext en' {rating_parens} Rating, in parentheses {pubyear} Year the book was published {pubyear_parens} Year the book was published, in parentheses + ''' # Books by Author by_authors_normal_title_template = '{title} {pubyear_parens}' diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index fe92486462..ea02c29fa7 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1027,17 +1027,12 @@ class EPUB_MOBI(CatalogPlugin): self.__totalSteps += 3 # Load section list templates - templates = ['by_authors_normal_title_template', - 'by_authors_series_title_template', - 'by_titles_normal_title_template', - 'by_titles_series_title_template', - 'by_series_title_template', - 'by_genres_normal_title_template', - 'by_genres_series_title_template', - 'by_recently_added_normal_title_template', - 'by_recently_added_series_title_template', - 'by_month_added_normal_title_template', - 'by_month_added_series_title_template'] + templates = [] + with open(P('catalog/section_list_templates.py'), 'r') as f: + for line in f: + t = re.match("(by_.+_template)",line) + if t: + templates.append(t.group(1)) execfile(P('catalog/section_list_templates.py'), locals()) for t in templates: setattr(self,t,eval(t)) @@ -1441,7 +1436,9 @@ class EPUB_MOBI(CatalogPlugin): # Exit if author matches previous, but author_sort doesn't match if author[0] == current_author[0]: error_msg = _(''' -Inconsistent Author Sort values for Author '{0}' ('{1}' <> '{2}'), unable to build catalog.\n +Inconsistent Author Sort values for Author '{0}': +'{1}' <> '{2}', +unable to build catalog.\n Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog, then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) self.opts.log.warn('\n*** Metadata error ***') @@ -1450,15 +1447,11 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) self.error.append('Metadata error') self.error.append(error_msg) return False + current_author = author self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort) -# for book in self.booksByAuthor: -# print '{0:<10} {1:<5} {2:<20} {3:<20} {4:<20} {5:<20}'.format(book['series'], book['series_index'], book['title'], -# book['author'], book['authors'],book['author_sort']) -# print - # Build the unique_authors set from existing data authors = [(record['author'], capitalize(record['author_sort'])) for record in self.booksByAuthor] @@ -1566,7 +1559,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) this_title['rating'] = record['rating'] if record['rating'] else 0 - if re.match('0100-01-01',str(record['pubdate'].date())): + if re.match('0101-01-01',str(record['pubdate'].date())): this_title['date'] = None else: this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple()) @@ -2683,7 +2676,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) #aTag.insert(0,'%d. %s · %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors']))) # Reassert 'date' since this is the result of a new search - if re.match('0100-01-01',str(book['pubdate'].date())): + if re.match('0101-01-01',str(book['pubdate'].date())): book['date'] = None else: book['date'] = strftime(u'%B %Y', book['pubdate'].timetuple()) @@ -4314,10 +4307,11 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) formats = ' · '.join(formats) # Date of publication - pubdate = book['date'] - pubmonth, pubyear = pubdate.split() - if pubyear == '101': - pubdate = pubmonth = pubyear = '' + if book['date']: + pubdate = book['date'] + pubmonth, pubyear = pubdate.split() + else: + pubdate = pubyear = pubmonth = '' # Thumb _soup = BeautifulSoup('',selfClosingTags=['img']) From 4e93f9d761d865f8e8bc431cb52e0164e598c921 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 08:24:27 -0700 Subject: [PATCH 03/28] Fix #8444 (Calibre doesn't detect Acer LumiRead 600 on Win7, USB 2.0) --- src/calibre/devices/misc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index aaf948f25e..9f8dbcb379 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -193,6 +193,9 @@ class LUMIREAD(USBMS): THUMBNAIL_HEIGHT = 200 + VENDOR_NAME = 'ACER' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'LUMIREAD_600' + def upload_cover(self, path, filename, metadata, filepath): if metadata.thumbnail and metadata.thumbnail[-1]: cfilepath = filepath.replace('/', os.sep) From 492fb4c5266b2af61c44e6d97707c597db57d6f0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 08:48:21 -0700 Subject: [PATCH 04/28] Email: Fix bug when connecting to SMTP relays that use MD5 auth --- src/calibre/utils/smtplib.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/utils/smtplib.py b/src/calibre/utils/smtplib.py index d6f3fb0b69..9992039d00 100755 --- a/src/calibre/utils/smtplib.py +++ b/src/calibre/utils/smtplib.py @@ -554,6 +554,8 @@ class SMTP: def encode_cram_md5(challenge, user, password): challenge = base64.decodestring(challenge) + if isinstance(password, unicode): # Added by Kovid, see http://bugs.python.org/issue5285 + password = password.encode('utf-8') response = user + " " + hmac.HMAC(password, challenge).hexdigest() return encode_base64(response, eol="") From 74d48de82c914883670370de59326305ddfc5686 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 09:06:28 -0700 Subject: [PATCH 05/28] Support for the SmartQ T7 --- src/calibre/devices/android/driver.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 5a82882dfa..277070020b 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -36,7 +36,7 @@ class ANDROID(USBMS): # Google 0x18d1 : { 0x4e11 : [0x0100, 0x226, 0x227], 0x4e12: [0x0100, 0x226, - 0x227], 0x4e21: [0x0100, 0x226, 0x227]}, + 0x227], 0x4e21: [0x0100, 0x226, 0x227], 0xb058: [0x0222]}, # Samsung 0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400], @@ -64,12 +64,13 @@ class ANDROID(USBMS): EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN) VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', - 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS'] + 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', + 'TELECHIP'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', - 'SGH-T849', '_MB300', 'A70S'] + 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S'] From fca9ccc67d87647363d436742a7c02795f15183d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 10:17:56 -0700 Subject: [PATCH 06/28] Fix for shortcuts and combobox delegates --- src/calibre/gui2/library/delegates.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/library/delegates.py b/src/calibre/gui2/library/delegates.py index ea614aa817..e9b9255c64 100644 --- a/src/calibre/gui2/library/delegates.py +++ b/src/calibre/gui2/library/delegates.py @@ -353,6 +353,17 @@ class CcCommentsDelegate(QStyledItemDelegate): # {{{ model.setData(index, QVariant(editor.textbox.html), Qt.EditRole) # }}} +class DelegateCB(QComboBox): # {{{ + + def __init__(self, parent): + QComboBox.__init__(self, parent) + + def event(self, e): + if e.type() == e.ShortcutOverride: + e.accept() + return QComboBox.event(self, e) +# }}} + class CcBoolDelegate(QStyledItemDelegate): # {{{ def __init__(self, parent): ''' @@ -361,7 +372,7 @@ class CcBoolDelegate(QStyledItemDelegate): # {{{ QStyledItemDelegate.__init__(self, parent) def createEditor(self, parent, option, index): - editor = QComboBox(parent) + editor = DelegateCB(parent) items = [_('Y'), _('N'), ' '] icons = [I('ok.png'), I('list_remove.png'), I('blank.png')] if tweaks['bool_custom_columns_are_tristate'] == 'no': From 2a8ebdb76680f484b9fd2140f9b74c434e35f126 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 10:21:57 -0700 Subject: [PATCH 07/28] ... --- resources/recipes/ihned.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/recipes/ihned.recipe b/resources/recipes/ihned.recipe index daf63e19ed..a74f9e5649 100644 --- a/resources/recipes/ihned.recipe +++ b/resources/recipes/ihned.recipe @@ -5,7 +5,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe class IHNed(BasicNewsRecipe): - stahnout_vsechny = False + stahnout_vsechny = True #True = stahuje vsechny z homepage #False = stahuje pouze dnesni clanky (ze dne, kdy je skript spusten) From 5e9fbd8f66fdecd3b7d41dbfe5af3df6d172ac46 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 10:26:35 -0700 Subject: [PATCH 08/28] ... --- src/calibre/gui2/device_drivers/configwidget.ui | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/gui2/device_drivers/configwidget.ui b/src/calibre/gui2/device_drivers/configwidget.ui index f4902a7387..619d7052e8 100644 --- a/src/calibre/gui2/device_drivers/configwidget.ui +++ b/src/calibre/gui2/device_drivers/configwidget.ui @@ -85,6 +85,9 @@ + + If checked, books are placed into sub directories based on their metadata on the device. If unchecked, books are all put into the top level directory. + Use sub directories From 25fa78ed6e1a9469cb64c4f7a3fcfae3779a64ce Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Tue, 18 Jan 2011 17:31:03 +0000 Subject: [PATCH 09/28] Use DelegateCB with enum columns --- src/calibre/gui2/library/delegates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/library/delegates.py b/src/calibre/gui2/library/delegates.py index e9b9255c64..ae9d6e2f71 100644 --- a/src/calibre/gui2/library/delegates.py +++ b/src/calibre/gui2/library/delegates.py @@ -292,7 +292,7 @@ class CcEnumDelegate(QStyledItemDelegate): # {{{ def createEditor(self, parent, option, index): m = index.model() col = m.column_map[index.column()] - editor = QComboBox(parent) + editor = DelegateCB(parent) editor.addItem('') for v in m.custom_columns[col]['display']['enum_values']: editor.addItem(v) From f8182c38043e663ba3c53d334529f8e55fe89608 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 10:47:24 -0700 Subject: [PATCH 10/28] Add a is_undefined_date method to utils.date --- src/calibre/utils/date.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py index 2551b90788..d970ed228d 100644 --- a/src/calibre/utils/date.py +++ b/src/calibre/utils/date.py @@ -46,6 +46,14 @@ local_tz = _local_tz = SafeLocalTimeZone() UNDEFINED_DATE = datetime(101,1,1, tzinfo=utc_tz) +def is_date_undefined(qt_or_dt): + d = qt_or_dt + if hasattr(d, 'toString'): + d = datetime(d.year(), d.month(), d.day(), tzinfo=utc_tz) + return d.year == UNDEFINED_DATE.year and \ + d.month == UNDEFINED_DATE.month and \ + d.day == UNDEFINED_DATE.day + def parse_date(date_string, assume_utc=False, as_utc=True, default=None): ''' Parse a date/time string into a timezone aware datetime object. The timezone From 6c92177944f2a49e3d50d6b5d7ddcd84119016e9 Mon Sep 17 00:00:00 2001 From: GRiker Date: Tue, 18 Jan 2011 10:51:56 -0700 Subject: [PATCH 11/28] GwR revisions to catalog generator --- src/calibre/library/catalog.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index ea02c29fa7..16e90aaf0c 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -18,7 +18,7 @@ from calibre.ebooks.chardet import substitute_entites from calibre.ebooks.oeb.base import XHTML_NS from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.config import config_dir -from calibre.utils.date import format_date, isoformat, now as nowf +from calibre.utils.date import format_date, isoformat, now as nowf, UNDEFINED_DATE, utc_tz from calibre.utils.icu import capitalize from calibre.utils.logging import default_log as log from calibre.utils.zipfile import ZipFile, ZipInfo @@ -1559,6 +1559,8 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) this_title['rating'] = record['rating'] if record['rating'] else 0 + #pubdate = record['pubdate'].astimezone(utc_tz) + #if pubdate == UNDEFINED_DATE: if re.match('0101-01-01',str(record['pubdate'].date())): this_title['date'] = None else: @@ -2676,6 +2678,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) #aTag.insert(0,'%d. %s · %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors']))) # Reassert 'date' since this is the result of a new search + #if book['pubdate'] == UNDEFINED_DATE: # tz doesn't match if re.match('0101-01-01',str(book['pubdate'].date())): book['date'] = None else: From 8c8583b7298dccb243a4c15aaa467ff87af08949 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 10:58:29 -0700 Subject: [PATCH 12/28] Update El Pais --- resources/recipes/el_pais.recipe | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/resources/recipes/el_pais.recipe b/resources/recipes/el_pais.recipe index 2e358060b8..4da3384093 100644 --- a/resources/recipes/el_pais.recipe +++ b/resources/recipes/el_pais.recipe @@ -9,13 +9,14 @@ __docformat__ = 'restructuredtext en' elpais.es ''' +from time import strftime + from calibre.web.feeds.news import BasicNewsRecipe class ElPais(BasicNewsRecipe): __author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells' description = 'Main daily newspaper from Spain' - cover_url = 'http://www.elpais.com/im/tit_logo_global.gif' title = u'El Pais' publisher = u'Ediciones El Pa\xeds SL' category = 'News, politics, culture, economy, general interest' @@ -62,6 +63,6 @@ class ElPais(BasicNewsRecipe): (u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058') ] -def print_version(self, url): - url = url+'?print=1' - return url + def get_cover_url(self): + return 'http://img5.kiosko.net/' + strftime("%Y/%m/%d") + '/es/elpais.750.jpg' + From f178ce16f19c0aa8f149447cf9d287691b66fa52 Mon Sep 17 00:00:00 2001 From: GRiker Date: Tue, 18 Jan 2011 10:58:57 -0700 Subject: [PATCH 13/28] GwR revisions to catalog generator --- src/calibre/library/catalog.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 16e90aaf0c..ae600a29f9 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -18,7 +18,7 @@ from calibre.ebooks.chardet import substitute_entites from calibre.ebooks.oeb.base import XHTML_NS from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.config import config_dir -from calibre.utils.date import format_date, isoformat, now as nowf, UNDEFINED_DATE, utc_tz +from calibre.utils.date import format_date, isoformat, is_date_undefined, now as nowf from calibre.utils.icu import capitalize from calibre.utils.logging import default_log as log from calibre.utils.zipfile import ZipFile, ZipInfo @@ -1559,9 +1559,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) this_title['rating'] = record['rating'] if record['rating'] else 0 - #pubdate = record['pubdate'].astimezone(utc_tz) - #if pubdate == UNDEFINED_DATE: - if re.match('0101-01-01',str(record['pubdate'].date())): + if is_date_undefined(record['pubdate']): this_title['date'] = None else: this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple()) @@ -2677,9 +2675,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) # Use series, series index if avail else just title #aTag.insert(0,'%d. %s · %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors']))) - # Reassert 'date' since this is the result of a new search - #if book['pubdate'] == UNDEFINED_DATE: # tz doesn't match - if re.match('0101-01-01',str(book['pubdate'].date())): + if is_date_undefined(book['pubdate']): book['date'] = None else: book['date'] = strftime(u'%B %Y', book['pubdate'].timetuple()) From 54fb874621bb7c56c35f930633fb226e58f244fb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 13:00:20 -0700 Subject: [PATCH 14/28] ... --- resources/recipes/nytimes_sub.recipe | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index 8f92852237..cdacc42d92 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' @@ -23,6 +24,10 @@ class NYTimes(BasicNewsRecipe): webEdition = False oldest_article = 7 + # replace paid Kindle Version: the name will be changed to "The New York Times" to cause + # previous paid versions of the new york times to best sent to the back issues folder on the kindle + replaceKindleVersion = False + # includeSections: List of sections to include. If empty, all sections found will be included. # Otherwise, only the sections named will be included. For example, # @@ -94,6 +99,10 @@ class NYTimes(BasicNewsRecipe): title='New York Times (Web)' description = 'New York Times on the Web' needs_subscription = True + elif replaceKindleVersion: + title='The New York Times' + description = 'Today\'s New York Times' + needs_subscription = True else: title='New York Times' description = 'Today\'s New York Times' @@ -623,7 +632,7 @@ class NYTimes(BasicNewsRecipe): self.log(">>> No class:'columnGroup first' found <<<") except: self.log("ERROR: One picture per article in postprocess_html") - + try: # Change captions to italic for caption in soup.findAll(True, {'class':'caption'}) : @@ -637,7 +646,7 @@ class NYTimes(BasicNewsRecipe): caption.replaceWith(cTag) except: self.log("ERROR: Problem in change captions to italic") - + try: # Change to

h1 = soup.find('h1') @@ -675,7 +684,7 @@ class NYTimes(BasicNewsRecipe): except: self.log("ERROR: Problem in Change

to

- used in editorial blogs") - try: + try: # Change to for subhead in soup.findAll(True, {'class':'bold'}) : if subhead.contents: @@ -684,15 +693,15 @@ class NYTimes(BasicNewsRecipe): subhead.replaceWith(bTag) except: self.log("ERROR: Problem in Change

to

- used in editorial blogs") - - try: + + try: divTag = soup.find('div',attrs={'id':'articleBody'}) if divTag: divTag['class'] = divTag['id'] except: self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") - - try: + + try: # Add class="authorId" to
so we can format with CSS divTag = soup.find('div',attrs={'id':'authorId'}) if divTag and divTag.contents[0]: @@ -700,10 +709,10 @@ class NYTimes(BasicNewsRecipe): tag['class'] = "authorId" tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], use_alt=False))) - divTag.replaceWith(tag) + divTag.replaceWith(tag) except: self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") - + return soup def populate_article_metadata(self, article, soup, first): shortparagraph = "" From c7c563e0bbaee6820e986d49f987dd3a720ef808 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Tue, 18 Jan 2011 20:22:15 +0000 Subject: [PATCH 15/28] Fix #8441: Custom Meta Data tab requiring capitalization where main GUI does not. --- src/calibre/gui2/widgets.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index 0bb5ee7634..8d3af55bd9 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -479,10 +479,10 @@ class CompleteLineEdit(EnLineEdit): def update_items_cache(self, complete_items): self.completer.update_items_cache(complete_items) - + def set_separator(self, sep): self.separator = sep - + def set_space_before_sep(self, space_before): self.space_before_sep = space_before @@ -527,7 +527,7 @@ class EnComboBox(QComboBox): def __init__(self, *args): QComboBox.__init__(self, *args) self.setLineEdit(EnLineEdit(self)) - self.setAutoCompletionCaseSensitivity(Qt.CaseSensitive) + self.setAutoCompletionCaseSensitivity(Qt.CaseInsensitive) self.setMinimumContentsLength(20) def text(self): @@ -541,17 +541,17 @@ class EnComboBox(QComboBox): self.setCurrentIndex(idx) class CompleteComboBox(EnComboBox): - + def __init__(self, *args): EnComboBox.__init__(self, *args) self.setLineEdit(CompleteLineEdit(self)) def update_items_cache(self, complete_items): self.lineEdit().update_items_cache(complete_items) - + def set_separator(self, sep): self.lineEdit().set_separator(sep) - + def set_space_before_sep(self, space_before): self.lineEdit().set_space_before_sep(space_before) From ebda738c8136455df0dd71cb857e8ca620386982 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 13:49:12 -0700 Subject: [PATCH 16/28] Updated NY Times --- resources/recipes/nytimes_sub.recipe | 275 ++++++++++++++++----------- 1 file changed, 161 insertions(+), 114 deletions(-) diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index cdacc42d92..2424113e31 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -159,6 +159,11 @@ class NYTimes(BasicNewsRecipe): 'relatedSearchesModule', 'side_tool', 'singleAd', + 'entry entry-utility', #added for DealBook + 'entry-tags', #added for DealBook + 'footer promos clearfix', #added for DealBook + 'footer links clearfix', #added for DealBook + 'inlineImage module', #added for DealBook re.compile('^subNavigation'), re.compile('^leaderboard'), re.compile('^module'), @@ -192,6 +197,9 @@ class NYTimes(BasicNewsRecipe): 'side_index', 'side_tool', 'toolsRight', + 'skybox', #added for DealBook + 'TopAd', #added for DealBook + 'related-content', #added for DealBook ]), dict(name=['script', 'noscript', 'style','form','hr'])] no_stylesheets = True @@ -246,7 +254,7 @@ class NYTimes(BasicNewsRecipe): def exclude_url(self,url): if not url.startswith("http"): return True - if not url.endswith(".html"): + if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook return True if 'nytimes.com' not in url: return True @@ -569,7 +577,6 @@ class NYTimes(BasicNewsRecipe): def preprocess_html(self, soup): - if self.webEdition & (self.oldest_article>0): date_tag = soup.find(True,attrs={'class': ['dateline','date']}) if date_tag: @@ -592,128 +599,168 @@ class NYTimes(BasicNewsRecipe): img_div = soup.find('div','inlineImage module') if img_div: img_div.extract() + + return self.strip_anchors(soup) def postprocess_html(self,soup, True): - try: - if self.one_picture_per_article: - # Remove all images after first - largeImg = soup.find(True, {'class':'articleSpanImage'}) - inlineImgs = soup.findAll(True, {'class':'inlineImage module'}) - if largeImg: - for inlineImg in inlineImgs: - inlineImg.extract() - else: - if inlineImgs: - firstImg = inlineImgs[0] - for inlineImg in inlineImgs[1:]: - inlineImg.extract() - # Move firstImg before article body - cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')}) - if cgFirst: - # Strip all sibling NavigableStrings: noise - navstrings = cgFirst.findAll(text=True, recursive=False) - [ns.extract() for ns in navstrings] - headline_found = False - tag = cgFirst.find(True) - insertLoc = 0 - while True: - insertLoc += 1 - if hasattr(tag,'class') and tag['class'] == 'articleHeadline': - headline_found = True - break - tag = tag.nextSibling - if not tag: - headline_found = False - break - if headline_found: - cgFirst.insert(insertLoc,firstImg) - else: - self.log(">>> No class:'columnGroup first' found <<<") - except: - self.log("ERROR: One picture per article in postprocess_html") - try: - # Change captions to italic - for caption in soup.findAll(True, {'class':'caption'}) : - if caption and len(caption) > 0: - cTag = Tag(soup, "p", [("class", "caption")]) - c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() - mp_off = c.find("More Photos") - if mp_off >= 0: - c = c[:mp_off] - cTag.insert(0, c) - caption.replaceWith(cTag) - except: - self.log("ERROR: Problem in change captions to italic") + try: + if self.one_picture_per_article: + # Remove all images after first + largeImg = soup.find(True, {'class':'articleSpanImage'}) + inlineImgs = soup.findAll(True, {'class':'inlineImage module'}) + if largeImg: + for inlineImg in inlineImgs: + inlineImg.extract() + else: + if inlineImgs: + firstImg = inlineImgs[0] + for inlineImg in inlineImgs[1:]: + inlineImg.extract() + # Move firstImg before article body + cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')}) + if cgFirst: + # Strip all sibling NavigableStrings: noise + navstrings = cgFirst.findAll(text=True, recursive=False) + [ns.extract() for ns in navstrings] + headline_found = False + tag = cgFirst.find(True) + insertLoc = 0 + while True: + insertLoc += 1 + if hasattr(tag,'class') and tag['class'] == 'articleHeadline': + headline_found = True + break + tag = tag.nextSibling + if not tag: + headline_found = False + break + if headline_found: + cgFirst.insert(insertLoc,firstImg) + else: + self.log(">>> No class:'columnGroup first' found <<<") + except: + self.log("ERROR: One picture per article in postprocess_html") - try: - # Change to

- h1 = soup.find('h1') - if h1: - headline = h1.find("nyt_headline") - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.contents[0])) - h1.replaceWith(tag) - else: - # Blog entry - replace headline, remove
tags - headline = soup.find('title') - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.contents[0])) - soup.insert(0, tag) - hrs = soup.findAll('hr') - for hr in hrs: - hr.extract() - except: - self.log("ERROR: Problem in Change to

") + try: + # Change captions to italic + for caption in soup.findAll(True, {'class':'caption'}) : + if caption and len(caption) > 0: + cTag = Tag(soup, "p", [("class", "caption")]) + c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() + mp_off = c.find("More Photos") + if mp_off >= 0: + c = c[:mp_off] + cTag.insert(0, c) + caption.replaceWith(cTag) + except: + self.log("ERROR: Problem in change captions to italic") - try: - # Change

to

- used in editorial blogs - masthead = soup.find("h1") - if masthead: - # Nuke the href - if masthead.a: - del(masthead.a['href']) - tag = Tag(soup, "h3") - tag.insert(0, self.fixChars(masthead.contents[0])) - masthead.replaceWith(tag) - except: - self.log("ERROR: Problem in Change

to

- used in editorial blogs") + try: + # Change to

+ h1 = soup.find('h1') + blogheadline = str(h1) #added for dealbook + if h1: + headline = h1.find("nyt_headline") + if headline: + tag = Tag(soup, "h2") + tag['class'] = "headline" + tag.insert(0, self.fixChars(headline.contents[0])) + h1.replaceWith(tag) + elif blogheadline.find('entry-title'):#added for dealbook + tag = Tag(soup, "h2")#added for dealbook + tag['class'] = "headline"#added for dealbook + tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook + h1.replaceWith(tag)#added for dealbook - try: - # Change to - for subhead in soup.findAll(True, {'class':'bold'}) : - if subhead.contents: - bTag = Tag(soup, "b") - bTag.insert(0, subhead.contents[0]) - subhead.replaceWith(bTag) - except: - self.log("ERROR: Problem in Change

to

- used in editorial blogs") + else: + # Blog entry - replace headline, remove
tags - BCC I think this is no longer functional 1-18-2011 + headline = soup.find('title') + if headline: + tag = Tag(soup, "h2") + tag['class'] = "headline" + tag.insert(0, self.fixChars(headline.renderContents())) + soup.insert(0, tag) + hrs = soup.findAll('hr') + for hr in hrs: + hr.extract() + except: + self.log("ERROR: Problem in Change to

") - try: - divTag = soup.find('div',attrs={'id':'articleBody'}) - if divTag: - divTag['class'] = divTag['id'] - except: - self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") + try: + #if this is from a blog (dealbook, fix the byline format + bylineauthor = soup.find('address',attrs={'class':'byline author vcard'}) + if bylineauthor: + tag = Tag(soup, "h6") + tag['class'] = "byline" + tag.insert(0, self.fixChars(bylineauthor.renderContents())) + bylineauthor.replaceWith(tag) + except: + self.log("ERROR: fixing byline author format") - try: - # Add class="authorId" to
so we can format with CSS - divTag = soup.find('div',attrs={'id':'authorId'}) - if divTag and divTag.contents[0]: - tag = Tag(soup, "p") - tag['class'] = "authorId" - tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], - use_alt=False))) - divTag.replaceWith(tag) - except: - self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") + try: + #if this is a blog (dealbook) fix the credit style for the pictures + blogcredit = soup.find('div',attrs={'class':'credit'}) + if blogcredit: + tag = Tag(soup, "h6") + tag['class'] = "credit" + tag.insert(0, self.fixChars(blogcredit.renderContents())) + blogcredit.replaceWith(tag) + except: + self.log("ERROR: fixing credit format") - return soup + + try: + # Change

to

- used in editorial blogs + masthead = soup.find("h1") + if masthead: + # Nuke the href + if masthead.a: + del(masthead.a['href']) + tag = Tag(soup, "h3") + tag.insert(0, self.fixChars(masthead.contents[0])) + masthead.replaceWith(tag) + except: + self.log("ERROR: Problem in Change

to

- used in editorial blogs") + + try: + # Change to + for subhead in soup.findAll(True, {'class':'bold'}) : + if subhead.contents: + bTag = Tag(soup, "b") + bTag.insert(0, subhead.contents[0]) + subhead.replaceWith(bTag) + except: + self.log("ERROR: Problem in Change

to

- used in editorial blogs") + try: + #remove the update tag + blogupdated = soup.find('span', {'class':'update'}) + if blogupdated: + blogupdated.replaceWith("") + except: + self.log("ERROR: Removing strong tag") + + try: + divTag = soup.find('div',attrs={'id':'articleBody'}) + if divTag: + divTag['class'] = divTag['id'] + except: + self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") + + try: + # Add class="authorId" to
so we can format with CSS + divTag = soup.find('div',attrs={'id':'authorId'}) + if divTag and divTag.contents[0]: + tag = Tag(soup, "p") + tag['class'] = "authorId" + tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], + use_alt=False))) + divTag.replaceWith(tag) + except: + self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") + + return soup def populate_article_metadata(self, article, soup, first): shortparagraph = "" try: From 19d7f6d39fcc235105896203ca37b750ca9350cb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 14:02:27 -0700 Subject: [PATCH 17/28] ... --- src/calibre/ebooks/html/input.py | 4 ++-- src/calibre/utils/date.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 1f07f4ca41..ac16e459e8 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -21,7 +21,7 @@ from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.chardet import xml_to_unicode from calibre.customize.conversion import OptionRecommendation from calibre.constants import islinux, isfreebsd, iswindows -from calibre import unicode_path +from calibre import unicode_path, as_unicode from calibre.utils.localization import get_lang from calibre.utils.filenames import ascii_filename from calibre.ebooks.conversion.utils import PreProcessor @@ -112,7 +112,7 @@ class HTMLFile(object): with open(self.path, 'rb') as f: src = f.read() except IOError, err: - msg = 'Could not read from file: %s with error: %s'%(self.path, unicode(err)) + msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err)) if level == 0: raise IOError(msg) raise IgnoreFile(msg, err.errno) diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py index bc16ebb0b6..f67f51ffc6 100644 --- a/src/calibre/utils/date.py +++ b/src/calibre/utils/date.py @@ -52,7 +52,7 @@ def is_date_undefined(qt_or_dt): return True if hasattr(d, 'toString'): d = datetime(d.year(), d.month(), d.day(), tzinfo=utc_tz) - return d.year == UNDEFINED_DATE.year and \ + return d.year <= UNDEFINED_DATE.year and \ d.month == UNDEFINED_DATE.month and \ d.day == UNDEFINED_DATE.day From 78f599d5993c98e7f7ff71b98cf2c74db78fa0bd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 14:24:52 -0700 Subject: [PATCH 18/28] Updated calibre portable bat file --- resources/calibre-portable.bat | 85 +++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/resources/calibre-portable.bat b/resources/calibre-portable.bat index fb3444e34e..473cdc4236 100644 --- a/resources/calibre-portable.bat +++ b/resources/calibre-portable.bat @@ -1,6 +1,4 @@ @echo OFF -REM CalibreRun.bat -REM ~~~~~~~~~~~~~~ REM Batch File to start a Calibre configuration on Windows REM giving explicit control of the location of: REM - Calibe Program Files @@ -24,7 +22,10 @@ REM ------------------------------------- REM Set up Calibre Config folder REM ------------------------------------- -If EXIST CalibreConfig SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig +IF EXIST CalibreConfig ( + SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig + ECHO CONFIG=%cd%\CalibreConfig +) REM -------------------------------------------------------------- @@ -38,24 +39,53 @@ REM drive letter of the USB stick. REM Comment out any of the following that are not to be used REM -------------------------------------------------------------- -SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary -IF EXIST CalibreLibrary SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary -IF EXIST CalibreBooks SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks +IF EXIST U:\eBooks\CalibreLibrary ( + SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary + ECHO LIBRARY=U:\eBOOKS\CalibreLibrary +) +IF EXIST CalibreLibrary ( + SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary + ECHO LIBRARY=%cd%\CalibreLibrary +) +IF EXIST CalibreBooks ( + SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks + ECHO LIBRARY=%cd%\CalibreBooks +) REM -------------------------------------------------------------- -REM Specify Location of metadata database (optional) +REM Specify Location of metadata database (optional) REM REM Location where the metadata.db file is located. If not set REM the same location as Books files will be assumed. This. REM options is used to get better performance when the Library is REM on a (slow) network drive. Putting the metadata.db file -REM locally gives a big performance improvement. +REM locally makes gives a big performance improvement. +REM +REM NOTE. If you use this option, then the ability to switch +REM libraries within Calibre will be disabled. Therefore +REM you do not want to set it if the metadata.db file +REM is at the same location as the book files. REM -------------------------------------------------------------- -IF EXIST CalibreBooks SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db -IF EXIST CalibreMetadata SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db - +IF EXIST CalibreBooks ( + IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreBooks" ( + SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db + ECHO DATABASE=%cd%\CalibreBooks\metadata.db + ECHO ' + ECHO ***CAUTION*** Library Switching will be disabled + ECHO ' + ) +) +IF EXIST CalibreMetadata ( + IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" ( + SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db + ECHO DATABASE=%cd%\CalibreMetadata\metadata.db + ECHO ' + ECHO ***CAUTION*** Library Switching will be disabled + ECHO ' + ) +) REM -------------------------------------------------------------- REM Specify Location of source (optional) @@ -63,13 +93,20 @@ REM REM It is easy to run Calibre from source REM Just set the environment variable to where the source is located REM When running from source the GUI will have a '*' after the version. +REM number that is displayed at the bottom of the Calibre main screen. REM -------------------------------------------------------------- -IF EXIST Calibre\src SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src - +IF EXIST Calibre\src ( + SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src + ECHO SOURCE=%cd%\Calibre\src +) +IF EXIST D:\Calibre\Calibre\src ( + SET CALIBRE_DEVELOP_FROM=D:\Calibre\Calibre\src + ECHO SOURCE=D:\Calibre\Calibre\src +) REM -------------------------------------------------------------- -REM Specify Location of calibre binaries (optinal) +REM Specify Location of calibre binaries (optional) REM REM To avoid needing Calibre to be set in the search path, ensure REM that Calibre Program Files is current directory when starting. @@ -78,21 +115,15 @@ REM This folder can be populated by cpying the Calibre2 folder from REM an existing isntallation or by isntalling direct to here. REM -------------------------------------------------------------- -IF EXIST Calibre2 CD Calibre2 - - -REM -------------------------------------------- -REM Display settings that will be used -REM -------------------------------------------- - -echo PROGRAMS=%cd% -echo SOURCE=%CALIBRE_DEVELOP_FROM% -echo CONFIG=%CALIBRE_CONFIG_DIRECTORY% -echo LIBRARY=%CALIBRE_LIBRARY_DIRECTORY% -echo DATABASE=%CALIBRE_OVERRIDE_DATABASE_PATH% +IF EXIST Calibre2 ( + Calibre2 CD Calibre2 + ECHO PROGRAMS=%cd% +) +REM ---------------------------------------------------------- REM The following gives a chance to check the settings before REM starting Calibre. It can be commented out if not wanted. +REM ---------------------------------------------------------- echo "Press CTRL-C if you do not want to continue" pause @@ -111,4 +142,4 @@ REM Use with /WAIT to wait until Calibre completes to run a task on exit REM -------------------------------------------------------- echo "Starting up Calibre" -START /belownormal Calibre --with-library %CALIBRE_LIBRARY_DIRECTORY% +START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%" From f56b7453b169cc4a8299bb6fc445c168e3dfdb5c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 14:41:49 -0700 Subject: [PATCH 19/28] Fix call to create_oebbok in oeb.iterator --- src/calibre/ebooks/conversion/plumber.py | 32 ++++++++++++------------ src/calibre/ebooks/oeb/iterator.py | 4 +-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 6fdf7ddc68..04ee892c19 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -483,29 +483,29 @@ OptionRecommendation(name='pubdate', OptionRecommendation(name='timestamp', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the book timestamp (used by the date column in calibre).')), - + OptionRecommendation(name='enable_heuristics', recommended_value=False, level=OptionRecommendation.LOW, - help=_('Enable heurisic processing. This option must be set for any ' + help=_('Enable heuristic processing. This option must be set for any ' 'heuristic processing to take place.')), OptionRecommendation(name='markup_chapter_headings', recommended_value=False, level=OptionRecommendation.LOW, - help=_('Detect unformatted chapter headings and sub headings. Change ' + help=_('Detect unformatted chapter headings and sub headings. Change ' 'them to h2 and h3 tags. This setting will not create a TOC, ' 'but can be used in conjunction with structure detection to create ' 'one.')), - + OptionRecommendation(name='italicize_common_cases', recommended_value=False, level=OptionRecommendation.LOW, help=_('Look for common words and patterns that denote ' 'italics and italicize them.')), - + OptionRecommendation(name='fix_indents', recommended_value=False, level=OptionRecommendation.LOW, help=_('Turn indentation created from multiple non-breaking space entities ' 'into CSS indents.')), - + OptionRecommendation(name='html_unwrap_factor', recommended_value=0.40, level=OptionRecommendation.LOW, help=_('Scale used to determine the length at which a line should ' @@ -513,31 +513,31 @@ OptionRecommendation(name='html_unwrap_factor', 'default is 0.4, just below the median line length. If only a ' 'few lines in the document require unwrapping this value should ' 'be reduced')), - + OptionRecommendation(name='unwrap_lines', recommended_value=False, level=OptionRecommendation.LOW, help=_('Unwrap lines using punctuation and other formatting clues.')), - + OptionRecommendation(name='delete_blank_paragraphs', recommended_value=False, level=OptionRecommendation.LOW, help=_('Remove empty paragraphs from the document when they exist between ' 'every other paragraph')), - + OptionRecommendation(name='format_scene_breaks', recommended_value=False, level=OptionRecommendation.LOW, - help=_('left aligned scene break markers are center aligned. ' + help=_('Left aligned scene break markers are center aligned. ' 'Replace soft scene breaks that use multiple blank lines with' 'horizontal rules.')), OptionRecommendation(name='dehyphenate', recommended_value=False, level=OptionRecommendation.LOW, - help=_('Analyses hyphenated words throughout the document. The ' + help=_('Analyze hyphenated words throughout the document. The ' 'document itself is used as a dictionary to determine whether hyphens ' 'should be retained or removed.')), OptionRecommendation(name='renumber_headings', recommended_value=False, level=OptionRecommendation.LOW, - help=_('Looks for occurences of sequential

or

tags. ' + help=_('Looks for occurrences of sequential

or

tags. ' 'The tags are renumbered to prevent splitting in the middle ' 'of chapter headings.')), @@ -545,10 +545,10 @@ OptionRecommendation(name='sr1_search', recommended_value='', level=OptionRecommendation.LOW, help=_('Search pattern (regular expression) to be replaced with ' 'sr1-replace.')), - + OptionRecommendation(name='sr1_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters to replace the text found with sr1-search.')), + help=_('Replacement to replace the text found with sr1-search.')), OptionRecommendation(name='sr2_search', recommended_value='', level=OptionRecommendation.LOW, @@ -557,7 +557,7 @@ OptionRecommendation(name='sr2_search', OptionRecommendation(name='sr2_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters to replace the text found with sr2-search.')), + help=_('Replacement to replace the text found with sr2-search.')), OptionRecommendation(name='sr3_search', recommended_value='', level=OptionRecommendation.LOW, @@ -566,7 +566,7 @@ OptionRecommendation(name='sr3_search', OptionRecommendation(name='sr3_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters to replace the text found with sr3-search.')), + help=_('Replacement to replace the text found with sr3-search.')), ] # }}} diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 08b4369078..299c77af10 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -199,8 +199,8 @@ class EbookIterator(object): not hasattr(self.pathtoopf, 'manifest'): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) - self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts, - plumber.input_plugin) + self.pathtoopf = create_oebbook(self.log, self.pathtoopf, + plumber.opts) if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) From 37ee06549e90d05fa5d0a92ae5a1c4abb89bf2a3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 14:52:35 -0700 Subject: [PATCH 20/28] cleanup --- src/calibre/ebooks/conversion/utils.py | 16 ++++++++-------- src/calibre/ebooks/lit/input.py | 2 -- src/calibre/ebooks/mobi/input.py | 1 - 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 4663eeccdf..6cdb688dff 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -54,7 +54,7 @@ class HeuristicProcessor(object): return '<'+styles+' style="page-break-before:always">'+chap def analyze_title_matches(self, match): - chap = match.group('chap') + #chap = match.group('chap') title = match.group('title') if not title: self.chapters_no_title = self.chapters_no_title + 1 @@ -136,7 +136,7 @@ class HeuristicProcessor(object): 'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.', 'Mlle.', 'Mons.', 'PS.', 'PPS.', ] - + ITALICIZE_STYLE_PATS = [ r'(?msu)(?<=\s)_(?P\S[^_]{0,40}?\S)?_(?=\s)', r'(?msu)(?<=\s)/(?P\S[^/]{0,40}?\S)?/(?=\s)', @@ -150,7 +150,7 @@ class HeuristicProcessor(object): r'(?msu)(?<=\s)/:(?P\S[^:/]{0,40}?\S)?:/(?=\s)', r'(?msu)(?<=\s)\|:(?P\S[^:\|]{0,40}?\S)?:\|(?=\s)', ] - + for word in ITALICIZE_WORDS: html = html.replace(word, '%s' % word) @@ -242,7 +242,7 @@ class HeuristicProcessor(object): lp_title = default_title else: lp_title = simple_title - + if ignorecase: arg_ignorecase = r'(?i)' else: @@ -250,7 +250,7 @@ class HeuristicProcessor(object): if title_req: lp_opt_title_open = '' - lp_opt_title_close = '' + lp_opt_title_close = '' else: lp_opt_title_open = opt_title_open lp_opt_title_close = opt_title_close @@ -399,7 +399,7 @@ class HeuristicProcessor(object): if len(lines) > 1: self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " + unicode(float(len(blanklines)) / float(len(lines))) + " percent blank") - + if float(len(blanklines)) / float(len(lines)) > 0.40: return True else: @@ -460,7 +460,7 @@ class HeuristicProcessor(object): if getattr(self.extra_opts, 'markup_chapter_headings', False): html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs) - if getattr(self.extra_opts, 'italicize_common_cases', False): + if getattr(self.extra_opts, 'italicize_common_cases', False): html = self.markup_italicis(html) # If more than 40% of the lines are empty paragraphs and the user has enabled delete @@ -487,7 +487,7 @@ class HeuristicProcessor(object): unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) length = docanalysis.line_length(unwrap_factor) self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format") - + ###### Unwrap lines ###### if getattr(self.extra_opts, 'unwrap_lines', False): # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py index ff8955939e..ff901c3715 100644 --- a/src/calibre/ebooks/lit/input.py +++ b/src/calibre/ebooks/lit/input.py @@ -7,8 +7,6 @@ __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.conversion.utils import HeuristicProcessor - class LITInput(InputFormatPlugin): diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 8188027e01..4ce3618441 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -3,7 +3,6 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re from calibre.customize.conversion import InputFormatPlugin class MOBIInput(InputFormatPlugin): From eb98c55bb7302454b9041555ff92a110aaecfa2a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 15:10:14 -0700 Subject: [PATCH 21/28] ... --- src/calibre/ebooks/conversion/utils.py | 3 +-- src/calibre/ebooks/txt/input.py | 3 ++- src/calibre/gui2/convert/heuristics.py | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 6cdb688dff..aabb1b8bc4 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -102,8 +102,7 @@ class HeuristicProcessor(object): min_lns = tot_ln_fds * percent #self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup") - if min_lns > tot_htm_ends: - return True + return min_lns > tot_htm_ends def dump(self, raw, where): import os diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index dd14de2d20..5b99b19e74 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -53,6 +53,7 @@ class TXTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + self.log = log log.debug('Reading text from file...') txt = stream.read() @@ -106,7 +107,7 @@ class TXTInput(InputFormatPlugin): log.debug('Auto detected paragraph type as %s' % options.paragraph_type) # Dehyphenate - dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None)) + dehyphenator = Dehyphenator(options.verbose, log=self.log) txt = dehyphenator(txt,'txt', length) # We don't check for block because the processor assumes block. diff --git a/src/calibre/gui2/convert/heuristics.py b/src/calibre/gui2/convert/heuristics.py index 6739c199b7..0655d7400f 100644 --- a/src/calibre/gui2/convert/heuristics.py +++ b/src/calibre/gui2/convert/heuristics.py @@ -25,21 +25,21 @@ class HeuristicsWidget(Widget, Ui_Form): ) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) - + self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics) self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap) - + self.enable_heuristics(self.opt_enable_heuristics.checkState()) def break_cycles(self): Widget.break_cycles(self) - + try: self.opt_enable_heuristics.stateChanged.disconnect() self.opt_unwrap_lines.stateChanged.disconnect() except: pass - + def set_value_handler(self, g, val): if val is None and g is self.opt_html_unwrap_factor: g.setValue(0.0) @@ -57,7 +57,7 @@ class HeuristicsWidget(Widget, Ui_Form): self.opt_format_scene_breaks.setEnabled(state) self.opt_dehyphenate.setEnabled(state) self.opt_renumber_headings.setEnabled(state) - + self.opt_unwrap_lines.setEnabled(state) if state and self.opt_unwrap_lines.checkState() == Qt.Checked: self.opt_html_unwrap_factor.setEnabled(True) From d9cdcca712943946059cf3fe0c0d26051ae2734d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 15:12:07 -0700 Subject: [PATCH 22/28] ... --- src/calibre/gui2/device.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 734d8cd56c..28b5e178ac 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -19,7 +19,7 @@ from calibre.devices.scanner import DeviceScanner from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \ warning_dialog, info_dialog, choose_dir from calibre.ebooks.metadata import authors_to_string -from calibre import preferred_encoding, prints, force_unicode +from calibre import preferred_encoding, prints, force_unicode, as_unicode from calibre.utils.filenames import ascii_filename from calibre.devices.errors import FreeSpaceError from calibre.devices.apple.driver import ITUNES_ASYNC @@ -68,13 +68,7 @@ class DeviceJob(BaseJob): # {{{ if self._aborted: return self.failed = True - try: - ex = unicode(err) - except: - try: - ex = str(err).decode(preferred_encoding, 'replace') - except: - ex = repr(err) + ex = as_unicode(err) self._details = ex + '\n\n' + \ traceback.format_exc() self.exception = err From a5c35c3b930cc5de02f8145b6dc51217c1dcbe58 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 15:48:58 -0700 Subject: [PATCH 23/28] SONY driver: Don't abort when failing to upload a cover --- src/calibre/devices/prs505/driver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index 874fbe4b10..e9329c39c6 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -229,7 +229,11 @@ class PRS505(USBMS): debug_print('PRS505: not uploading cover') return debug_print('PRS505: uploading cover') - self._upload_cover(path, filename, metadata, filepath) + try: + self._upload_cover(path, filename, metadata, filepath) + except: + import traceback + traceback.print_exc() def _upload_cover(self, path, filename, metadata, filepath): if metadata.thumbnail and metadata.thumbnail[-1]: From 4b25d9ef0d47ae7efcd839f072f1f16dd87805fc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 16:02:42 -0700 Subject: [PATCH 24/28] Fix Heuristics preference widget --- resources/images/heuristics.png | Bin 0 -> 9561 bytes src/calibre/gui2/convert/heuristics.py | 22 +-- src/calibre/gui2/convert/heuristics.ui | 150 ++++++++++++------ .../gui2/convert/structure_detection.py | 2 +- 4 files changed, 103 insertions(+), 71 deletions(-) create mode 100644 resources/images/heuristics.png diff --git a/resources/images/heuristics.png b/resources/images/heuristics.png new file mode 100644 index 0000000000000000000000000000000000000000..92c53ae8ff9c70a394afffd4a644bd34e77c70ea GIT binary patch literal 9561 zcmb7KRZtvEklsZWcL=VFlVFPlT_m`>OOW6Y+*u?9NN^I|gKG%x76|U{5FqH{ve@xI z-P1i>P0e)G)YQy3)zjTm-CvZtsvHg`1ttIhz)_Hw(R`7x|2jJ8W$YEV7=96;_eydy zfam{OZhLVO06+&&kdf5($vh79%_N?!f4EPxo8ILR(MOAvFy{O6@Gzf!ev0g8-3^J}2+g-ec?ZRpfm*?A8cfSh#NcsD z&N_NG9yi!ume$9Mi_6QMSB*T!n%CXO+8ga*4AHFlk^hfj>kDA$CcIE7MOUfxF+43U zQFD$L0)V1pIAqWl){lN9`o{f%l6h4m#)yYk=i!hTS;cDvFPX~HbDoF z?lvJZfM~#dJJ6hj&XM{%a4#qLX*a;SK?0lz#~4BA0CN7ZJ`DpH$^fo)Mepu;FXAHq zVYy!z&c6%Yu=IQ46&EDqby;ic4-6y_i$cPN4gEK#$GSdGI+65e85Kpt$Y@nxM`uxO zns4+BQzq8LKsDW9b?+E&DW`~jV;)1&44zqSM=>a}cPjJFK^~>0DlAQU z91Rv;Y9aUeH#@&|_z)omn{|q!UZL8o*>eP|S=U$tJ?QXQ+}w z0Df(z1;@lu!J)7pYV&jF3Fi{_3k3$OcOM%E?HS5W^T{v}ibG80RGZNSo8DlYWOG*yixUQksq=-5)l-4&qx9Q2=wQnVBcP-Q5s0k zvaT`$z{ZGcJleYs)R-b@b8lFsPvZWag~Vq6yOK%Xz3B~w42BB|Adox5?6+BCkg&^_ zjgtNVlGg#bM6T-XU48UfwiU|Fz^NrBN4d0%GUS9VpjZieYn2Fy;-9Fs34Ikl5+J>SQnhX z42pN*f=82w^5w8e1iORj?P`{d4u^0q%L&bWfK;U}`Ap&0 zu|*NY`WX11P*4~&w94B)>J|Ll3SsKs|D^PW`j?qvtod$xw%^~?4TtJC{*#7fOx+`V z@B3;npyRPuBKMG(2`3j8(Yq=)8XyavowQT4pWKjZ&94l3F>JVAZbkcQHe=f_{^&|4 zf?zSu+nkDvnc+AtqZ}_wU-PkPV*)Or^b(zGsmT zGd=9nA-TUwn;n{2P?hHC?RLbBwL@kIBCDXI_S&XebEAnKq)L_!{X|V$a;#Y7BA-{8 z-pGYuZsRwupDg11G(W1o3zukPKQqYCThGJ6pyC%QT>&meU;Wq~zSptl5HsdyReL-o z4{NPNUn`wA>v!1uC18GrKsOZD4sxR>zg3b^dfZAW)(QO1Tmh)cSoQ+QF?b(@f1$e8 zFaW|}{l%*Rx_1{Qvm_BT{4iry2{UQ^4h$cgPagYqGAXHiWh8|JjhzU#Md8Wyf^QPt zYlq*}Q4h;|%im@K+h{o_ z>qP4NWFh`y;o}Tau}vkedK zH_rIQGC>vuVvF}Z%u%Ur!kMDotzur&D&?^rV0UAUY2a6QToRe!M2oDTdL3_sVYet) zGtKzpFeJ*wgK2x2Sx2=0wHk5ad(~)LKXU1uAF}Q(WngT7Myx;l?T7a3`SnmEsg^L# zdTZu%3T45>n|JVTAT1PQyIqAv$7)B8j{I^~Wv!~tUw0hWjct)!x)hfRqP^7vW{Q|u z9=a)xRbk%Vw|iE8-E>DgDBWvyv~hjGzRqWm&3vTX&5r^^4}nw#7;j^RniRiXW9 z{QQBVvpwtd;B(KsLoE3M#;6VjThb$T^>$Lqd4dsCL)3pUbID()AU}7@K%*vjKW(`x zFkcEu@m=^5=EhOAVx~V?K|^MsT|t&`c2A#+phQ+Gza?*Sxv zS^z2g#vKvD#RAS;^)ft>>k&+%h$$`5JR?u8EIXoM;LcdZS1eIMr=`;L6GDs$EwwKS zBU$6!xdSnv_r}AgE3)lSYhl3oGXT5F@?pbvBbnL3V^T1Zk-)m2B+AErHCWFp+~cbRDHgVH%>jGEzcI?E~wH|SF*rBt z0zfmgZeIx@nDy3Z>-X$F#OD(zxH^EfhG`cysE{D)Co*C7k$j%xK$eK2W=jpxpwZ_C z)n&DdirawmX0n?AmfD*#*X5fB@w?bN(a(f*Es|yLVKF*gsM|1z9MGCz_6;{9MB$W4 z0erUd_w7>@-85e?3*(kOmARrVZ{gk%2$L69>6DPjkxj{Kb2c#h2-w*DP^tYU>3tTw z3Of%x_+rPdDLx7qR3bG*Ou}v1+h4Y*B~GqXtBactuH^nX7JqaO*lof(C$4S^u$D~l z2A+uXcbI>Bz43TO8TTR8^wL%l17ztDY(9jtk(xKBVV3(xsN)27tF42F_grG%%I0f9 zhYPbf&xXK$Q~J-EAjZ4ps+M)OWXs?#)}oJ`AG95-zMhU?hFmkYVsZJUU{OsC)S&K7 zRuZWjwEV#zlqT>49q2(owI2K{XXB2Jp~Z=($8BkwXz-+rMe(l4!xKL z>ETwt2WDvf`xnZOT=!4}H)D1kFF;p9uF!##%9k8^m%P9)$aNsn(mOG|`@)Sa=Sj=r?F+V4XoNgqq=#q|ZS z-;zVu%>oP;56>6OJh#Yy5ZGCnR18%dHb7{MnB6t~TxkKK4I{8p^|kKyBr7J|m< za?zZV;-K+;cI;gCk?C}$i#f@-L@`tp+51qj^v7s-l79j}nh56UDjkwErD`v&v3@*! zoK{5=I5~Y)xTG{8fa*DYX}2$|l`n>m1}$PO=0`T6fIfbE1O95Ym_ZQJNjX`uqB9Th zG@1v*z{tnRBPnB*-5@9cpL@%Y8v%cZbN=_rqTDB#D=NK z2Bl{xq$jT5{9BJgzRctesQ^vv4VmoOM!ufqr>Xj&-qEr;HRf|mlAs0mj^_=LODUpT zys(hN`&!VV&3<`E$_g;^*uCODTqbaH)n6D#nZxrYZ>D&Y=vEXsL9M{pX%TN&I^{p8 z(SkqMM}92J|CIpIyA}7 zLpR0Nc*D617N|dr7&zJ-U*USyL7vWHH!0x~`?;zz{H-o6Ces&-6_-|V=Wiz*NIDPw zqA!2vIxhfE*?D^LUSxv*q&DrRc?b>FW>v2=8dP;Mkw|o9bAEiw~RXG~u zhJ#ZSqL{TZgt_P#%|qS%YGywGq^s8gi5<37OnUpcC304Ad}Pli2$7eUg)FPRPZxMO zY+PRlLcc|61Zq|hCO_>e&1*{rrHQvo@m9-1TQwsMEhS-UhcR*)U6_>{f_0@0; zFdNr=u{-lnNSx>47x%GBT4P4*J^Br6&$j7QT2CQFk$j7P$AG5(fI1BGieauhLI0Kw^>qe!_r#p|0V?PYk;Of0UsdbiXRct9j zI93Y^M_Z);AH>Gx7EBqdZv$_J`6)zhQH})TdBWpQ2J(H@_p1`ccK`f_FI%Uc-nx{R1*8am!WGk})%$>XawE`qmoBZCX{8s7Ccq`RKAUZRKZ0AinUR_&^-qq=p z;_qQcm_lV1+I_We0fL;KawwqhCo-i|Y?W}ufUWWQrgHZw_-bj%em;ucq1e}t)a-(c zf3uBj43RYO4z}RcR`f1a_YtBr@S-J}?~9roCF5P2mIJ?)w$WsLLVS}bTW7)l?i{~R z(|cn$gYxs~qJjd&9Fx;~>Fg*o$s`L6U?}IgT#z1t^{ddQB_g$Iv$3T=1M(ZteZhp> zX+j)?^F=p9zi}1AjZ9b}Gr*jZ43>;d&xGA8GsK;sTokWN&aDZ;mhQr54#&sWlv=7T zN%3MFuL)JSSp`hRf>SM2*lRPK^zF!vJTWkO`7F@wo^9m6pSm^VKI3Z9t%RCr21l;P zWqf7%uC&fA1R3?aw@eW7L!1o6rmy*V$|A@A+cP5tp!YW@?*xK!9Uj7Is^nww-%MeU zUY}H)box16k+jHONiNYYn#pP72}b+66rO5)8PA_gdoOyP{`?7B4(AA}QaUn`QE|a| zBuPwffr&9q%>BulwTRxFjQIsQ-|>18#x$QrDpCYKei6)$V_9*W%yDmLZrc`!${rn- z-((K=MC-%M4|4H;!jye|f)j?-Kld$Tl$!465dNi;)>Z?!sQ!spIZDbcc>4JW5I)nm zYXxvw*S=_sj9?Kfobod%Z~z-xZA9D666vXy`(D=jvx*})9+TGHJ)kZ@Qf^g=-2wz%N+(!{J8^;uMp9o6ksrL-s`fx|LG7>gA` zyD^(aLo`7tNlx*X&yj$0Szb8R^vck)b5#c=Kvo4##Taf-8VNvwfq9)4gs=$!K35Sf zF@UjQU3HeXL!bj8K>HKO)hk!G$mTSI^t+_#p|rvKeDMEx|vNMm|}&L)x=d9;|I!#LA*zr6mN}3R#az)^TrOI%FS*R3=&Z< zIs7j78bf^N1HNt3S$yXW=-{8g6LKd~8A(wMdsQrv5jctzoN;ho|1RVp65jTe~C2q7A(3B(g zd8a6#zK{W<>jqT$rQb-|bsbAVXt;%rt(RA^1RnlN&OY- zNw9d-m;RJoZ!R1>RyGT}@Q;X^>rm7(_D=PK@xM2Z?b%aH!z-$=z))v)S5yDcvxSVQ zc{cfiet_u_?^f{~R=Cg+sJGhDk?-=S?D(Gbx5A99PVY#|r9>WKh%u9#V z+KnWP!a1ygq8-NXy^XW{yCNE*qbC+SV!ZV_g*Wzo`U1uVe<&A=wSuXS1#Q)`b;Cfvl=Y&YM=SLD5JYk#Y%MG!h)-A&ye$rR(ds zD_&DogChK{rYFi*FTplqzwIAVJRz5uNWHzYfPr(GPP?IekhaN#T^TsId$Fr)1@^eo zwK6p|Ge4)+n2h%qQDts31wWoxxdSZY{d;=%(Jr-_4k+!@9&AO6Y)O$s?yr@HbT^l#PkX*ZJUM47@*!D|-n%cTsM$aax0J&G@SJj0#Fj@Hc*1C{4#ATUoqc~?p z=ia<#ER)Q9YWryoK7wYIactDq-lOu*wdaj;uEAr6o9|}77IbJH zr-+x)Voa$hIFIF20;}raO7+!QYvgFvvMu@u&$M3~B9!gW$RgBf>aSMM3w&EZT#gz; zzJ~tSkWBu3qHh{jq^$948N?sHlV05)sE;3lu8MiIBf22ST4E3BA=f`@VMzL` z2710`%)TF_OaHbKQkaZE#gb=MoZqTaa2}?v-X|Cz3}pd50=CRppbJjPh3#_BTuNHD zYJSg%Kxlnb5?(ZE<~fPt<60Gdry1*u!&0mSQ)&gCmiGOEr_Twad{9BUO>g960t@cn z|2*toYwhL(|DBMZpnO6gatrjBde~^j6pM-`yr|;vHq#ppE3A>?RV9R*>Wj-0?G+V!8#&(C!~?d$BSg zrz!Y!S9V<@pIb$1nC*BtS^`%Ef6@a0xNth_Uz%Q~sc;@_OwYstZ0xkiR-nE5@^nx1 zywuy>&VOU$?Pc%d8%E813B_G?I?y{nkNJz0`{c2EdLWCiVdjA#*JWVuvf~xKgOHo#80d81i+>=+PXj15mXCiyyu!3m^5a#tY+xK@^+{sG*KtorhcH;sI&VI_v$kb2~6&L`YmJs32TY@(bx3zB4g_{CYoaHhmfTT zaHI)+^>Z`{a5RY`PDc+tgK$ZDJm_+>OL!U z%>fGHt|AKb`zVQY#ZmabhHF*k)XMa9Sw=tw@7%B7P0zaf6QBmhBL&D5EUouu11s~OZobVH9F^^9TeznACWCpy%meluyx2l51_t7P4+Mhs+gbd#;25KKP4IQSHL zEgQcrib-SAD}O6)8k>!wfy9NiT@M0i=+krA{;c;-b+%Ys8#w?FbmtR50K-@t$-Qqk zVBtB5y?9Z+=-z}4=i=(C45WNJK-mn|1iDc3U|;eX+l;R3N%Tc?oG}iQqgfFRHD72I z`Cm93edYE?qiO_9HECq?Fi0zBntioBCKP+)z;}>Sp&+}dIl-utLL7A*@s-K0;!5@GOYaB1hE9UV{ zTT4rO0!SR%%pPEc#m30s+!``gervRAm`YK6A|8j~jYLM(mJdc#GUlJ`lDq<@kIOz9 ztbCp}y0DHnURtb4`q*EgO4ieg%FuFJuxwBBM!Ww7G9>`OC$an%zvx;R_H+Vr11h2pY6D0}w{H9s5hSFLyKx24 z=MF(Kx!e?qj$a6rTQi2n^*p1ODu1JB78PftnN=>q$w5w?Tt5yH4RI*^cC>ki5;8k` z-|2IfejHxIbe^4 z5W(i)m9E&LBp~kpoBT4C>%ZrQMP1|M)E(vH5#=l?3znq8ASOG}9#ZoAcRmQ$6 z4KD#mAI#9E)0o4-h>mrH=n65PT3WeFco0( zitZQ1>XP>^4kRr=0|;gTzusfMMDMi6AOY~Ld3j%-h)BRQE8$o7_rDIGvKw=+>90$G zfVtk2U)gHeNE18QnHaL#JmFSGbwBhj`b0?~%Dc92e{-U3&I1CT%|4?GZL4qeL-?>O zj@hBu%PDNo@Nu6G{#K$d^XOcbOi$}`_x?G$HM!RW*G<5ZreOMc2ze5tD=vfCazA4Z)j`ZvL~;amdsWsWw_v8e z>YeQ_4&{9d86xdyq5#;(cLAyt7OR%=EDpu2pzyZkWTgTA}*#b zE8qa@KmF`e@b1xemW& zBAYDvtSNykbaf)McKhe>A9i|UBGx^{0 zjk_Dv-Hv^%hlZ5zd29l|I2-0UJ@B@aoL+9DV63@T_odAo=E4jDc7y`tl5$a+kjg>|+OGmr0l zSi3ho32RxJl>!P%HeTbEN?9UfmNB=jW(+dBpX}%;`ad~GJ*UBud$MvC7W7;%Tq%IU MJ5`w~Df7_(0eS^uMgRZ+ literal 0 HcmV?d00001 diff --git a/src/calibre/gui2/convert/heuristics.py b/src/calibre/gui2/convert/heuristics.py index 0655d7400f..e788888257 100644 --- a/src/calibre/gui2/convert/heuristics.py +++ b/src/calibre/gui2/convert/heuristics.py @@ -11,9 +11,10 @@ from calibre.gui2.convert import Widget class HeuristicsWidget(Widget, Ui_Form): - TITLE = _('Heuristic Processing') + TITLE = _('Heuristic\nProcessing') HELP = _('Modify the document text and structure using common patterns.') COMMIT_NAME = 'heuristics' + ICON = I('heuristics.png') def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, @@ -46,23 +47,8 @@ class HeuristicsWidget(Widget, Ui_Form): return True def enable_heuristics(self, state): - if state == Qt.Checked: - state = True - else: - state = False - self.opt_markup_chapter_headings.setEnabled(state) - self.opt_italicize_common_cases.setEnabled(state) - self.opt_fix_indents.setEnabled(state) - self.opt_delete_blank_paragraphs.setEnabled(state) - self.opt_format_scene_breaks.setEnabled(state) - self.opt_dehyphenate.setEnabled(state) - self.opt_renumber_headings.setEnabled(state) - - self.opt_unwrap_lines.setEnabled(state) - if state and self.opt_unwrap_lines.checkState() == Qt.Checked: - self.opt_html_unwrap_factor.setEnabled(True) - else: - self.opt_html_unwrap_factor.setEnabled(False) + state = state == Qt.Checked + self.heuristic_options.setEnabled(state) def enable_unwrap(self, state): if state == Qt.Checked: diff --git a/src/calibre/gui2/convert/heuristics.ui b/src/calibre/gui2/convert/heuristics.ui index 8048bef204..4358512996 100644 --- a/src/calibre/gui2/convert/heuristics.ui +++ b/src/calibre/gui2/convert/heuristics.ui @@ -6,7 +6,7 @@ 0 0 - 938 + 724 470 @@ -15,114 +15,160 @@ - + - &Preprocess input file to possibly improve structure detection + <b>Heuristic processing</b> means that calibre will scan your book for common patterns and fix them. As the name implies, this involves guesswork, which means that it could end up worsening the result of a conversion, if calibre guesses wrong. Therefore, it is disabled by default. Often, if a conversion does not turn out as you expect, turning on heuristics can improve matters. + + + true - + + + Qt::Vertical + + + QSizePolicy::Fixed + + + + 20 + 15 + + + + + + + + Enable &heuristic processing + + + + + Heuristic Processing - - + + Unwrap lines - - - - Line &un-wrap factor during preprocess: - - - opt_html_unwrap_factor - - + + + + + + Qt::Horizontal + + + QSizePolicy::Fixed + + + + 40 + 20 + + + + + + + + Line &un-wrap factor : + + + opt_html_unwrap_factor + + + + + + + + + + 1.000000000000000 + + + 0.050000000000000 + + + 0.400000000000000 + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + - - - - - - - 1.000000000000000 - - - 0.050000000000000 - - - 0.400000000000000 - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - + Detect and markup unformatted chapter headings and sub headings - + Renumber sequences of <h1> or <h2> tags to prevent splitting - + Delete blank lines between paragraphs - + Ensure scene breaks are consistently formatted - + Remove unnecessary hyphens - + Italicize common words and patterns - + Replace entity indents with CSS indents - + Qt::Vertical diff --git a/src/calibre/gui2/convert/structure_detection.py b/src/calibre/gui2/convert/structure_detection.py index 2c64303ee7..d8e2f4f122 100644 --- a/src/calibre/gui2/convert/structure_detection.py +++ b/src/calibre/gui2/convert/structure_detection.py @@ -31,7 +31,7 @@ class StructureDetectionWidget(Widget, Ui_Form): self.opt_chapter.set_msg(_('Detect chapters at (XPath expression):')) self.opt_page_breaks_before.set_msg(_('Insert page breaks before ' '(XPath expression):')) - + def break_cycles(self): Widget.break_cycles(self) From 383b15e254c80b596d8455983d2a51db7c367511 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 16:40:29 -0700 Subject: [PATCH 25/28] Cleanup S&R preferences widget --- src/calibre/gui2/convert/bulk.py | 2 +- .../gui2/convert/search_and_replace.py | 18 ++++---- .../gui2/convert/search_and_replace.ui | 44 ++++++++++++++----- src/calibre/gui2/convert/single.py | 2 +- src/calibre/gui2/preferences/conversion.py | 4 +- src/calibre/manual/regexp.rst | 2 +- 6 files changed, 48 insertions(+), 24 deletions(-) diff --git a/src/calibre/gui2/convert/bulk.py b/src/calibre/gui2/convert/bulk.py index b97ab1a2dc..591ac92b2b 100644 --- a/src/calibre/gui2/convert/bulk.py +++ b/src/calibre/gui2/convert/bulk.py @@ -94,7 +94,7 @@ class BulkConfig(Config): if not c: break self.stack.removeWidget(c) - widgets = [lf, hw, sr, ps, sd, toc] + widgets = [lf, hw, ps, sd, toc, sr] if output_widget is not None: widgets.append(output_widget) for w in widgets: diff --git a/src/calibre/gui2/convert/search_and_replace.py b/src/calibre/gui2/convert/search_and_replace.py index c85e4fe414..88f7a2d4a0 100644 --- a/src/calibre/gui2/convert/search_and_replace.py +++ b/src/calibre/gui2/convert/search_and_replace.py @@ -15,6 +15,7 @@ class SearchAndReplaceWidget(Widget, Ui_Form): TITLE = _('Search &\nReplace') HELP = _('Modify the document text and structure using user defined patterns.') COMMIT_NAME = 'search_and_replace' + ICON = I('search.png') def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, @@ -24,19 +25,19 @@ class SearchAndReplaceWidget(Widget, Ui_Form): ) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) - self.opt_sr1_search.set_msg(_('Search Regular Expression')) + self.opt_sr1_search.set_msg(_('&Search Regular Expression')) self.opt_sr1_search.set_book_id(book_id) self.opt_sr1_search.set_db(db) - self.opt_sr2_search.set_msg(_('Search Regular Expression')) + self.opt_sr2_search.set_msg(_('&Search Regular Expression')) self.opt_sr2_search.set_book_id(book_id) self.opt_sr2_search.set_db(db) - self.opt_sr3_search.set_msg(_('Search Regular Expression')) + self.opt_sr3_search.set_msg(_('&Search Regular Expression')) self.opt_sr3_search.set_book_id(book_id) self.opt_sr3_search.set_db(db) - + def break_cycles(self): Widget.break_cycles(self) - + self.opt_sr1_search.break_cycles() self.opt_sr2_search.break_cycles() self.opt_sr3_search.break_cycles() @@ -45,10 +46,11 @@ class SearchAndReplaceWidget(Widget, Ui_Form): for x in ('sr1_search', 'sr2_search', 'sr3_search'): x = getattr(self, 'opt_'+x) try: - pat = unicode(x.regex) - re.compile(pat) + pat = unicode(x.regex).strip() + if pat: + re.compile(pat) except Exception, err: error_dialog(self, _('Invalid regular expression'), - _('Invalid regular expression: %s')%err).exec_() + _('Invalid regular expression: %s')%err, show=True) return False return True diff --git a/src/calibre/gui2/convert/search_and_replace.ui b/src/calibre/gui2/convert/search_and_replace.ui index e0e9570f8c..b7447f8feb 100644 --- a/src/calibre/gui2/convert/search_and_replace.ui +++ b/src/calibre/gui2/convert/search_and_replace.ui @@ -6,8 +6,8 @@ 0 0 - 198 - 350 + 468 + 451 @@ -23,7 +23,7 @@ QLayout::SetDefaultConstraint - + @@ -32,7 +32,7 @@ - 1. + First expression @@ -57,7 +57,10 @@ - Replacement Text + &Replacement Text + + + opt_sr1_replace @@ -74,7 +77,7 @@ - + @@ -83,7 +86,7 @@ - 2. + Second Expression @@ -108,7 +111,10 @@ - Replacement Text + &Replacement Text + + + opt_sr2_replace @@ -125,7 +131,7 @@ - + @@ -134,7 +140,7 @@ - 3. + Third expression @@ -159,7 +165,10 @@ - Replacement Text + &Replacement Text + + + opt_sr3_replace @@ -176,6 +185,19 @@ + + + + <p>Search and replace uses <i>regular expressions</i>. See the <a href="http://calibre-ebook.com/user_manual/regexp.html">regular expressions tutorial</a> to get started with regular expressions. Also clicking the wizard buttons below will allow you to test your regular expression against the current input document. + + + true + + + true + + + diff --git a/src/calibre/gui2/convert/single.py b/src/calibre/gui2/convert/single.py index 8826d398f5..da58de545b 100644 --- a/src/calibre/gui2/convert/single.py +++ b/src/calibre/gui2/convert/single.py @@ -207,7 +207,7 @@ class Config(ResizableDialog, Ui_Dialog): if not c: break self.stack.removeWidget(c) - widgets = [self.mw, lf, hw, sr, ps, sd, toc] + widgets = [self.mw, lf, hw, ps, sd, toc, sr] if input_widget is not None: widgets.append(input_widget) if output_widget is not None: diff --git a/src/calibre/gui2/preferences/conversion.py b/src/calibre/gui2/preferences/conversion.py index 0a8fc375ea..8de9ee1661 100644 --- a/src/calibre/gui2/preferences/conversion.py +++ b/src/calibre/gui2/preferences/conversion.py @@ -85,8 +85,8 @@ class CommonOptions(Base): def load_conversion_widgets(self): self.conversion_widgets = [LookAndFeelWidget, HeuristicsWidget, - SearchAndReplaceWidget, PageSetupWidget, - StructureDetectionWidget, TOCWidget] + PageSetupWidget, + StructureDetectionWidget, TOCWidget, SearchAndReplaceWidget,] class InputOptions(Base): diff --git a/src/calibre/manual/regexp.rst b/src/calibre/manual/regexp.rst index 5cd9a8b097..c8661cd427 100644 --- a/src/calibre/manual/regexp.rst +++ b/src/calibre/manual/regexp.rst @@ -21,7 +21,7 @@ This is, inevitably, going to be somewhat technical- after all, regular expressi Where in |app| can you use regular expressions? --------------------------------------------------- -There are a few places |app| uses regular expressions. There's the header/footer removal in conversion options, metadata detection from filenames in the import settings and, since last version, there's the option to use regular expressions to search and replace in metadata of multiple books. +There are a few places |app| uses regular expressions. There's the Search & Replace in conversion options, metadata detection from filenames in the import settings and Search & Replace when editing the metadata of books in bulk. What on earth *is* a regular expression? ------------------------------------------------ From 10f4b48d681d05c98697e5c50bf10e5bea93774c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 16:42:07 -0700 Subject: [PATCH 26/28] ... --- src/calibre/gui2/convert/search_and_replace.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/convert/search_and_replace.py b/src/calibre/gui2/convert/search_and_replace.py index 88f7a2d4a0..0c7ae56ea0 100644 --- a/src/calibre/gui2/convert/search_and_replace.py +++ b/src/calibre/gui2/convert/search_and_replace.py @@ -46,9 +46,8 @@ class SearchAndReplaceWidget(Widget, Ui_Form): for x in ('sr1_search', 'sr2_search', 'sr3_search'): x = getattr(self, 'opt_'+x) try: - pat = unicode(x.regex).strip() - if pat: - re.compile(pat) + pat = unicode(x.regex) + re.compile(pat) except Exception, err: error_dialog(self, _('Invalid regular expression'), _('Invalid regular expression: %s')%err, show=True) From 4ae546bb60bc0d6d2fa2a777f166b3b799b6863e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 16:54:41 -0700 Subject: [PATCH 27/28] Complete review of Heuristics Processing and S&R conversion options --- src/calibre/ebooks/txt/output.py | 4 ++-- src/calibre/gui2/convert/txt_output.py | 15 ++++++--------- src/calibre/gui2/convert/xexp_edit.ui | 4 ++-- src/calibre/manual/conversion.rst | 22 +++++++++++----------- src/calibre/manual/faq.rst | 4 ++-- 5 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py index 4d0d176fe4..29b3d899bc 100644 --- a/src/calibre/ebooks/txt/output.py +++ b/src/calibre/ebooks/txt/output.py @@ -51,12 +51,12 @@ class TXTOutput(OutputFormatPlugin): recommended_value=False, level=OptionRecommendation.LOW, help=_('Do not remove links within the document. This is only ' \ 'useful when paired with the markdown-format option because' \ - 'links are always removed with plain text output.')), + ' links are always removed with plain text output.')), OptionRecommendation(name='keep_image_references', recommended_value=False, level=OptionRecommendation.LOW, help=_('Do not remove image references within the document. This is only ' \ 'useful when paired with the markdown-format option because' \ - 'image references are always removed with plain text output.')), + ' image references are always removed with plain text output.')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): diff --git a/src/calibre/gui2/convert/txt_output.py b/src/calibre/gui2/convert/txt_output.py index a16dd68014..0e6a6b9574 100644 --- a/src/calibre/gui2/convert/txt_output.py +++ b/src/calibre/gui2/convert/txt_output.py @@ -23,9 +23,9 @@ class PluginWidget(Widget, Ui_Form): ['newline', 'max_line_length', 'force_max_line_length', 'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references', 'txt_output_encoding']) - self.db, self.book_id = db, book_id + self.db, self.book_id = db, book_id for x in get_option('newline').option.choices: - self.opt_newline.addItem(x) + self.opt_newline.addItem(x) self.initialize_options(get_option, get_help, db, book_id) self.opt_markdown_format.stateChanged.connect(self.enable_markdown_format) @@ -33,17 +33,14 @@ class PluginWidget(Widget, Ui_Form): def break_cycles(self): Widget.break_cycles(self) - + try: self.opt_markdown_format.stateChanged.disconnect() except: pass - + def enable_markdown_format(self, state): - if state == Qt.Checked: - state = True - else: - state = False + state = state == Qt.Checked self.opt_keep_links.setEnabled(state) self.opt_keep_image_references.setEnabled(state) - \ No newline at end of file + diff --git a/src/calibre/gui2/convert/xexp_edit.ui b/src/calibre/gui2/convert/xexp_edit.ui index 4b26eb8dcf..18b7c39b52 100644 --- a/src/calibre/gui2/convert/xexp_edit.ui +++ b/src/calibre/gui2/convert/xexp_edit.ui @@ -6,7 +6,7 @@ 0 0 - 434 + 430 74 @@ -59,7 +59,7 @@ ... - + :/images/wizard.png:/images/wizard.png diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index 2bc5687262..de27a5f5bb 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -266,14 +266,14 @@ from bad formatting. Because these functions rely on common patterns, be aware t option may lead to worse results, so use with care. As an example, several of these options will remove all non-breaking-space entities. -:guilabel:`Preprocess input` - This option activates various activates |app|'s Heuristic Processing stage of the conversion pipeline. +:guilabel:`Enable heuristic processing` + This option activates |app|'s Heuristic Processing stage of the conversion pipeline. This must be enabled in order for various sub-functions to be applied :guilabel:`Unwrap lines` Enabling this option will cause |app| to attempt to detect and correct hard line breaks that exist - within a document using punctuation clues and line length. |app| will first attempt to detect whether - hard line breaks exist, if they do not appear to exist |app| will not attempt to unwrap lines. The + within a document using punctuation clues and line length. |app| will first attempt to detect whether + hard line breaks exist, if they do not appear to exist |app| will not attempt to unwrap lines. The line-unwrap factor can be reduced if you want to 'force' |app| to unwrap lines. :guilabel:`Line-unwrap factor` @@ -284,21 +284,21 @@ remove all non-breaking-space entities. :guilabel:`Detect and markup unformatted chapter headings and sub headings` If your document does not have Chapter Markers and titles formatted differently from the rest of the text, - |app| can use this option to attempt detection them and surround them with heading tags. <h2> tags are used - for chapter headings; <h3> tags are used for any titles that are detected. + |app| can use this option to attempt detection them and surround them with heading tags.

tags are used + for chapter headings;

tags are used for any titles that are detected. This function will not create a TOC, but in many cases it will cause |app|'s default chapter detection settings - to correctly detect chapters and build a TOC. Adjust the Xpath under Structure Detection if a TOC is not automatically + to correctly detect chapters and build a TOC. Adjust the XPath under Structure Detection if a TOC is not automatically created. If there are no other headings used in the document then setting "//h:h2" under Structure Detection would be the easiest way to create a TOC for the document. - The inserted headings are not formatted, to apply formatting use the 'extra_css' option under + The inserted headings are not formatted, to apply formatting use the :guilabel:`Extra CSS` option under the Look and Feel conversion settings. For example, to center heading tags, use the following:: h2, h3 { text-align: center } -:guilabel:`Renumber sequences of <h1> or <h2> tags` - Some publishers format chapter headings using multiple <h1> or <h2> tags sequentially. +:guilabel:`Renumber sequences of

or

tags` + Some publishers format chapter headings using multiple

or

tags sequentially. |app|'s default conversion settings will cause such titles to be split into two pieces. This option will re-number the heading tags to prevent splitting. @@ -345,7 +345,7 @@ specifying a replacement expression. The search works by using a python regular expression. All matched text is simply removed from the document or replaced using the replacement pattern. You can learn more about regular expressions and -their syntax at http://docs.python.org/library/re.html. +their syntax at :ref:`regexptutorial`. .. _structure-detection: diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index b473893673..37d18ea329 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -107,10 +107,10 @@ My device is not being detected by |app|? Follow these steps to find the problem: * Make sure that you are connecting only a single device to your computer at a time. Do not have another |app| supported device like an iPhone/iPad etc. at the same time. - * Make sure you are running the latest version of |app|. The latest version can always be downloaded from `http://calibre-ebook.com/download`_. + * Make sure you are running the latest version of |app|. The latest version can always be downloaded from `the calibre website `_. * Ensure your operating system is seeing the device. That is, the device should be mounted as a disk that you can access using Windows explorer or whatever the file management program on your computer is * In calibre, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled. - * If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `http://bugs.calibre-ebook.com`_. + * If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker `_. How does |app| manage collections on my SONY reader? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 2c8218bc36fbd1941834d3e94446127d09106507 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 18 Jan 2011 17:34:45 -0700 Subject: [PATCH 28/28] ... --- src/calibre/ebooks/conversion/preprocess.py | 5 +++-- src/calibre/gui2/convert/search_and_replace.py | 2 +- src/calibre/gui2/convert/single.ui | 6 +++--- src/calibre/manual/regexp.rst | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index bbd71ede3a..087d8ed486 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' import functools, re -from calibre import entity_to_unicode +from calibre import entity_to_unicode, as_unicode XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>') SVG_NS = 'http://www.w3.org/2000/svg' @@ -463,7 +463,8 @@ class HTMLPreProcessor(object): replace_txt = '' rules.insert(0, (search_re, replace_txt)) except Exception as e: - self.log.error('Failed to parse %s regexp because %s' % (search, e)) + self.log.error('Failed to parse %r regexp because %s' % + (search, as_unicode(e))) end_rules = [] # delete soft hyphens - moved here so it's executed after header/footer removal diff --git a/src/calibre/gui2/convert/search_and_replace.py b/src/calibre/gui2/convert/search_and_replace.py index 0c7ae56ea0..04a337a4fc 100644 --- a/src/calibre/gui2/convert/search_and_replace.py +++ b/src/calibre/gui2/convert/search_and_replace.py @@ -12,7 +12,7 @@ from calibre.gui2 import error_dialog class SearchAndReplaceWidget(Widget, Ui_Form): - TITLE = _('Search &\nReplace') + TITLE = _(u'Search\u00a0&\nReplace') HELP = _('Modify the document text and structure using user defined patterns.') COMMIT_NAME = 'search_and_replace' ICON = I('search.png') diff --git a/src/calibre/gui2/convert/single.ui b/src/calibre/gui2/convert/single.ui index ede548d8d7..bb447104d8 100644 --- a/src/calibre/gui2/convert/single.ui +++ b/src/calibre/gui2/convert/single.ui @@ -100,7 +100,7 @@ - 20 + 10 true @@ -129,8 +129,8 @@ 0 0 - 805 - 484 + 810 + 494 diff --git a/src/calibre/manual/regexp.rst b/src/calibre/manual/regexp.rst index c8661cd427..776141b113 100644 --- a/src/calibre/manual/regexp.rst +++ b/src/calibre/manual/regexp.rst @@ -94,7 +94,7 @@ I think I'm beginning to understand these regular expressions now... how do I us Conversions ^^^^^^^^^^^^^^ -Let's begin with the conversion settings, which is really neat. In the structure detection part, you can input a regexp (short for regular expression) that describes the header or footer string that will be removed during the conversion. The neat part is the wizard. Click on the wizard staff and you get a preview of what |app| "sees" during the conversion process. Scroll down to the header or footer you want to remove, select and copy it, paste it into the regexp field on top of the window. If there are variable parts, like page numbers or so, use sets and quantifiers to cover those, and while you're at it, remember to escape special characters, if there are some. Hit the button labeled :guilabel:`Test` and |app| highlights the parts it would remove were you to use the regexp. Once you're satisfied, hit OK and convert. Be careful if your conversion source has tags like this example:: +Let's begin with the conversion settings, which is really neat. In the Search and Replace part, you can input a regexp (short for regular expression) that describes the string that will be replaced during the conversion. The neat part is the wizard. Click on the wizard staff and you get a preview of what |app| "sees" during the conversion process. Scroll down to the string you want to remove, select and copy it, paste it into the regexp field on top of the window. If there are variable parts, like page numbers or so, use sets and quantifiers to cover those, and while you're at it, remember to escape special characters, if there are some. Hit the button labeled :guilabel:`Test` and |app| highlights the parts it would replace were you to use the regexp. Once you're satisfied, hit OK and convert. Be careful if your conversion source has tags like this example:: Maybe, but the cops feel like you do, Anita. What's one more dead vampire? New laws don't change that.

@@ -104,7 +104,7 @@ Let's begin with the conversion settings, which is really neat. In the structure

It had only been two years since Addison v. Clark. The court case gave us a revised version of what life was -(shamelessly ripped out of `this thread `_). You'd have to remove some of the tags as well. In this example, I'd recommend beginning with the tag ````, now you have to end with the corresponding closing tag (opening tags are ````, closing tags are ````), which is simply the next ```` in this case. (Refer to a good HTML manual or ask in the forum if you are unclear on this point.) The opening tag can be described using ````, the closing tag using ````, thus we could remove everything between those tags using ``.*?``. But using this expression would be a bad idea, because it removes everything enclosed by - tags (which, by the way, render the enclosed text in bold print), and it's a fair bet that we'll remove portions of the book in this way. Instead, include the beginning of the enclosed string as well, making the regular expression ``\s*Generated\s+by\s+ABC\s+Amber\s+LIT.*?`` The ``\s`` with quantifiers are included here instead of explicitly using the spaces as seen in the string to catch any variations of the string that might occur. Remember to check what |app| will remove to make sure you don't remove any portions you want to keep if you test a new expression. If you only check one occurrence, you might miss a mismatch somewhere else in the text. Also note that should you accidentally remove more or fewer tags than you actually wanted to, |app| tries to repair the damaged code after doing the header/footer removal. +(shamelessly ripped out of `this thread `_). You'd have to remove some of the tags as well. In this example, I'd recommend beginning with the tag ````, now you have to end with the corresponding closing tag (opening tags are ````, closing tags are ````), which is simply the next ```` in this case. (Refer to a good HTML manual or ask in the forum if you are unclear on this point.) The opening tag can be described using ````, the closing tag using ````, thus we could remove everything between those tags using ``.*?``. But using this expression would be a bad idea, because it removes everything enclosed by - tags (which, by the way, render the enclosed text in bold print), and it's a fair bet that we'll remove portions of the book in this way. Instead, include the beginning of the enclosed string as well, making the regular expression ``\s*Generated\s+by\s+ABC\s+Amber\s+LIT.*?`` The ``\s`` with quantifiers are included here instead of explicitly using the spaces as seen in the string to catch any variations of the string that might occur. Remember to check what |app| will remove to make sure you don't remove any portions you want to keep if you test a new expression. If you only check one occurrence, you might miss a mismatch somewhere else in the text. Also note that should you accidentally remove more or fewer tags than you actually wanted to, |app| tries to repair the damaged code after doing the removal. Adding books ^^^^^^^^^^^^^^^^