From 4cc1d11fdef95b7a64e68032ea58578c2fe20b2c Mon Sep 17 00:00:00 2001 From: Starson17 Date: Wed, 17 Nov 2010 11:38:26 -0500 Subject: [PATCH 01/30] Merge prior to trunk merge. --- src/calibre/gui2/dialogs/user_profiles.py | 18 +++++++++++++++++- src/calibre/web/feeds/news.py | 1 + src/calibre/web/feeds/recipes/collection.py | 5 +++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py index 6901e13968..51910b4996 100644 --- a/src/calibre/gui2/dialogs/user_profiles.py +++ b/src/calibre/gui2/dialogs/user_profiles.py @@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal ' import time, os from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \ - QVariant, QInputDialog + QVariant, QInputDialog, QSortFilterProxyModel from calibre.web.feeds.recipes import compile_recipe from calibre.web.feeds.news import AutomaticNewsRecipe @@ -19,11 +19,20 @@ class CustomRecipeModel(QAbstractListModel): def __init__(self, recipe_model): QAbstractListModel.__init__(self) self.recipe_model = recipe_model + self.proxy_model = QSortFilterProxyModel() + self.proxy_model.setSourceModel(recipe_model) + self.proxy_model.sort(0, Qt.AscendingOrder) + self.proxy_model.setDynamicSortFilter(True) def title(self, index): row = index.row() if row > -1 and row < self.rowCount(): + print 'index is: ', index + print 'row is: ', row + #print 'recipe_model title return is: ', self.recipe_model.custom_recipe_collection[row].get('title', '') + #print 'proxy_model title return is: ', self.proxy_model.custom_recipe_collection[row].get('title', '') return self.recipe_model.custom_recipe_collection[row].get('title', '') + #return self.proxy_model.custom_recipe_collection[row].get('title', '') def script(self, index): row = index.row() @@ -80,7 +89,14 @@ class UserProfiles(ResizableDialog, Ui_Dialog): ResizableDialog.__init__(self, parent) self._model = self.model = CustomRecipeModel(recipe_model) + #self._model = self.model = CustomRecipeModel(proxy_model) self.available_profiles.setModel(self._model) + #proxy = QSortFilterProxyModel() + #proxy.setSourceModel(self._model) + #proxy.sort(0, Qt.AscendingOrder) + #proxy.setDynamicSortFilter(True) + #self.available_profiles.setModel(proxy) + self.available_profiles.currentChanged = self.current_changed self.connect(self.remove_feed_button, SIGNAL('clicked(bool)'), diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index ad2991d620..4dbbdceff8 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -667,6 +667,7 @@ class BasicNewsRecipe(Recipe): def _postprocess_html(self, soup, first_fetch, job_info): + print 'soup in _postprocess_html is: ', soup if self.no_stylesheets: for link in list(soup.findAll('link', type=re.compile('css')))+list(soup.findAll('style')): link.extract() diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py index cc96131c4b..1ee93e4440 100644 --- a/src/calibre/web/feeds/recipes/collection.py +++ b/src/calibre/web/feeds/recipes/collection.py @@ -61,9 +61,14 @@ def serialize_recipe(urn, recipe_class): def serialize_collection(mapping_of_recipe_classes): collection = E.recipe_collection() + ''' for urn in sorted(mapping_of_recipe_classes.keys(), key = lambda key: mapping_of_recipe_classes[key].title): recipe = serialize_recipe(urn, mapping_of_recipe_classes[urn]) collection.append(recipe) + ''' + for urn, recipe_class in mapping_of_recipe_classes.items(): + recipe = serialize_recipe(urn, recipe_class) + collection.append(recipe) collection.set('count', str(len(collection))) return etree.tostring(collection, encoding='utf-8', xml_declaration=True, pretty_print=True) From ce89a41b1f5406c13afbe5504347f39d2253aaa5 Mon Sep 17 00:00:00 2001 From: Starson17 Date: Fri, 19 Nov 2010 11:25:52 -0500 Subject: [PATCH 02/30] Merge prior to trunk merge. --- src/calibre/web/feeds/news.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4dbbdceff8..4e41dcea0a 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -667,7 +667,7 @@ class BasicNewsRecipe(Recipe): def _postprocess_html(self, soup, first_fetch, job_info): - print 'soup in _postprocess_html is: ', soup + #print 'soup in _postprocess_html is: ', soup if self.no_stylesheets: for link in list(soup.findAll('link', type=re.compile('css')))+list(soup.findAll('style')): link.extract() From 8c952c2c328a06c1519d8de9506f1bdda5cc80e9 Mon Sep 17 00:00:00 2001 From: Starson17 Date: Fri, 4 Feb 2011 08:47:03 -0500 Subject: [PATCH 03/30] Sorted user recipes in serialize_collection --- src/calibre/gui2/__init__.py | 1 + src/calibre/gui2/add.py | 16 +++++++++++++--- src/calibre/gui2/preferences/adding.py | 5 +++++ src/calibre/gui2/preferences/adding.ui | 13 +++++++++---- 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index c94b99f141..b6d704f31c 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -50,6 +50,7 @@ gprefs.defaults['action-layout-context-menu-device'] = ( gprefs.defaults['show_splash_screen'] = True gprefs.defaults['toolbar_icon_size'] = 'medium' +gprefs.defaults['automerge'] = 'ignore' gprefs.defaults['toolbar_text'] = 'auto' gprefs.defaults['show_child_bar'] = False gprefs.defaults['font'] = None diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 026fabea07..91c050a58a 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -8,7 +8,7 @@ from functools import partial from PyQt4.Qt import QThread, QObject, Qt, QProgressDialog, pyqtSignal, QTimer from calibre.gui2.dialogs.progress import ProgressDialog -from calibre.gui2 import question_dialog, error_dialog, info_dialog +from calibre.gui2 import question_dialog, error_dialog, info_dialog, gprefs from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata import MetaInformation from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG @@ -181,11 +181,21 @@ class DBAdder(QObject): # {{{ formats = [f for f in formats if not f.lower().endswith('.opf')] if prefs['add_formats_to_existing']: identical_book_list = self.db.find_identical_books(mi) - + if identical_book_list: # books with same author and nearly same title exist in db self.merged_books.add(mi.title) for identical_book in identical_book_list: - self.add_formats(identical_book, formats, replace=False) + if gprefs['automerge'] == 'ignore': + self.add_formats(identical_book, formats, replace=False) + print 'do something for ignore' + if gprefs['automerge'] == 'overwrite': + self.add_formats(identical_book, formats, replace=True) + print 'do something for overwrite' + if gprefs['automerge'] == 'new record': + id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) + self.number_of_books_added += 1 + self.add_formats(id, formats) + print 'do something for new record' else: id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 diff --git a/src/calibre/gui2/preferences/adding.py b/src/calibre/gui2/preferences/adding.py index 7a27ed8f2d..2c6eecdbd0 100644 --- a/src/calibre/gui2/preferences/adding.py +++ b/src/calibre/gui2/preferences/adding.py @@ -11,6 +11,7 @@ from calibre.gui2.preferences import ConfigWidgetBase, test_widget from calibre.gui2.preferences.adding_ui import Ui_Form from calibre.utils.config import prefs from calibre.gui2.widgets import FilenamePattern +from calibre.gui2 import gprefs class ConfigWidget(ConfigWidgetBase, Ui_Form): @@ -22,6 +23,10 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('read_file_metadata', prefs) r('swap_author_names', prefs) r('add_formats_to_existing', prefs) + choices = [(_('Ignore'), 'ignore'), (_('Overwrite'), 'overwrite'), + (_('New Record'), 'new record')] + r('automerge', gprefs, choices=choices) + #print 'The automerge setting is: ', gprefs['automerge'] self.filename_pattern = FilenamePattern(self) self.metadata_box.layout().insertWidget(0, self.filename_pattern) diff --git a/src/calibre/gui2/preferences/adding.ui b/src/calibre/gui2/preferences/adding.ui index 062c45e1ad..414eb204b4 100644 --- a/src/calibre/gui2/preferences/adding.ui +++ b/src/calibre/gui2/preferences/adding.ui @@ -31,19 +31,24 @@ - + #MOD was colspan="2" - If an existing book with a similar title and author is found that does not have the format being added, the format is added -to the existing book, instead of creating a new entry. If the existing book already has the format, then it is silently ignored. + If an existing book with a similar title and author is found, the incoming format will be added to the existing book record where possible. +If the existing book already has the incoming format, then the setting to the right controls and the new format will be ignored, it will overwrite the old format +or a new record will be created. Title match ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. Author match is exact. - If books with similar titles and authors found, &merge the new files automatically + If books with similar titles and authors found, &try to merge the new formats automatically +and do this for duplicate formats: + #MOD added as new item + + From 1a5c956188322d1d43bce5e1a87a7e247e9a1be9 Mon Sep 17 00:00:00 2001 From: Starson17 Date: Fri, 4 Feb 2011 12:17:33 -0500 Subject: [PATCH 04/30] Sorted user recipes in serialize_collection --- src/calibre/gui2/add.py | 40 +++++++++++++++++--------- src/calibre/gui2/preferences/adding.py | 4 +-- src/calibre/gui2/preferences/adding.ui | 14 ++++----- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 91c050a58a..38260aedc4 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -179,28 +179,42 @@ class DBAdder(QObject): # {{{ cover = f.read() orig_formats = formats formats = [f for f in formats if not f.lower().endswith('.opf')] - if prefs['add_formats_to_existing']: + if prefs['add_formats_to_existing']: #automerge is on identical_book_list = self.db.find_identical_books(mi) - - if identical_book_list: # books with same author and nearly same title exist in db + print 'identical_book_list is: ', identical_book_list #We are dealing with only one file of a specific format, and this is a list of matching db book records to the one file/format being processed + if identical_book_list: # books with same author and nearly same title exist in db for the one format being handled self.merged_books.add(mi.title) - for identical_book in identical_book_list: + for identical_book in identical_book_list: #this will add the new format to *each* matching entry in the db - Do we need to do this? if gprefs['automerge'] == 'ignore': self.add_formats(identical_book, formats, replace=False) - print 'do something for ignore' if gprefs['automerge'] == 'overwrite': self.add_formats(identical_book, formats, replace=True) - print 'do something for overwrite' + print 'inside overwrite' if gprefs['automerge'] == 'new record': - id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) - self.number_of_books_added += 1 - self.add_formats(id, formats) - print 'do something for new record' - else: + print 'We are in new record' + ''' + We are here because we have at least one book record in the db that matches the one file/format being processed + We need to check if the file/format being processed matches a format in the matching book record. + If so, create new record (as below), else, add to existing record, as above. + Test if format exists in matching record. identical_book is an id, formats is a FQPN path in a list + ''' + for path in formats: #I think there's always only one path in formats - Check + fmt = os.path.splitext(path)[-1].replace('.', '').upper() #this is the format extension of the incoming file + ib_fmts = self.db.formats(identical_book, index_is_id=True) #These are the formats in the record + if fmt in ib_fmts: #Create a new record if the incoming format already exists in the identical book (ib) record + id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) + self.number_of_books_added += 1 + self.add_formats(id, formats) + #If we created a new record, are we done - or should we go on and add to other existing records that don't have this format? + else: #a new record is not required - the incoming format does not exist in the ib record + self.add_formats(identical_book, formats, replace=False) + + else: # books with same author and nearly same title do not exist in db id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 self.add_formats(id, formats) - else: + + else: #automerge is off -use legacy duplicates code id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False) if id is None: self.duplicates.append((mi, cover, orig_formats)) @@ -214,7 +228,7 @@ class DBAdder(QObject): # {{{ return mi.title def add_formats(self, id, formats, replace=True): - for path in formats: + for path in formats: #path and formats will be the same fully qualified path and book filename when used by automerge fmt = os.path.splitext(path)[-1].replace('.', '').upper() with open(path, 'rb') as f: self.db.add_format(id, fmt, f, index_is_id=True, diff --git a/src/calibre/gui2/preferences/adding.py b/src/calibre/gui2/preferences/adding.py index 2c6eecdbd0..8a7c181b56 100644 --- a/src/calibre/gui2/preferences/adding.py +++ b/src/calibre/gui2/preferences/adding.py @@ -23,10 +23,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('read_file_metadata', prefs) r('swap_author_names', prefs) r('add_formats_to_existing', prefs) - choices = [(_('Ignore'), 'ignore'), (_('Overwrite'), 'overwrite'), - (_('New Record'), 'new record')] + choices = [(_('Ignore the incoming format'), 'ignore'), (_('Overwrite the existing format with the incoming format'), 'overwrite'), (_('Create a new book record for the incoming format'), 'new record')] r('automerge', gprefs, choices=choices) - #print 'The automerge setting is: ', gprefs['automerge'] self.filename_pattern = FilenamePattern(self) self.metadata_box.layout().insertWidget(0, self.filename_pattern) diff --git a/src/calibre/gui2/preferences/adding.ui b/src/calibre/gui2/preferences/adding.ui index 414eb204b4..92b72ede6e 100644 --- a/src/calibre/gui2/preferences/adding.ui +++ b/src/calibre/gui2/preferences/adding.ui @@ -31,22 +31,22 @@ - #MOD was colspan="2" + - If an existing book with a similar title and author is found, the incoming format will be added to the existing book record where possible. -If the existing book already has the incoming format, then the setting to the right controls and the new format will be ignored, it will overwrite the old format -or a new record will be created. + If an existing book with a similar title and author is found, the incoming format will be added to the existing book record, where possible. +If the existing book already has the incoming format, then the setting to the right controls and the new format will be ignored, it will overwrite the existing format +or a new book record will be created for the incoming format. Title match ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. Author match is exact. - If books with similar titles and authors found, &try to merge the new formats automatically -and do this for duplicate formats: + Automerge: If books with similar titles and authors found, try to &merge the incoming formats automatically + into existing book records. The option to the right controls what happens when the existing record already has the incoming format: - #MOD added as new item + From d80f86e0979ea726c728d5c638e65b2ea4b5050d Mon Sep 17 00:00:00 2001 From: Starson17 Date: Fri, 4 Feb 2011 15:26:34 -0500 Subject: [PATCH 05/30] Merge prior to trunk merge. --- src/calibre/gui2/add.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 38260aedc4..57e03645cf 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -181,17 +181,15 @@ class DBAdder(QObject): # {{{ formats = [f for f in formats if not f.lower().endswith('.opf')] if prefs['add_formats_to_existing']: #automerge is on identical_book_list = self.db.find_identical_books(mi) - print 'identical_book_list is: ', identical_book_list #We are dealing with only one file of a specific format, and this is a list of matching db book records to the one file/format being processed if identical_book_list: # books with same author and nearly same title exist in db for the one format being handled self.merged_books.add(mi.title) + a_new_record_has_been_created = False for identical_book in identical_book_list: #this will add the new format to *each* matching entry in the db - Do we need to do this? if gprefs['automerge'] == 'ignore': self.add_formats(identical_book, formats, replace=False) if gprefs['automerge'] == 'overwrite': self.add_formats(identical_book, formats, replace=True) - print 'inside overwrite' - if gprefs['automerge'] == 'new record': - print 'We are in new record' + if gprefs['automerge'] == 'new record' and not a_new_record_has_been_created: ''' We are here because we have at least one book record in the db that matches the one file/format being processed We need to check if the file/format being processed matches a format in the matching book record. @@ -205,6 +203,7 @@ class DBAdder(QObject): # {{{ id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 self.add_formats(id, formats) + a_new_record_has_been_created = True #If we created a new record, are we done - or should we go on and add to other existing records that don't have this format? else: #a new record is not required - the incoming format does not exist in the ib record self.add_formats(identical_book, formats, replace=False) From ac613b1723800c068ac4de54d5da1c28850031be Mon Sep 17 00:00:00 2001 From: Starson17 Date: Fri, 4 Feb 2011 16:15:43 -0500 Subject: [PATCH 06/30] Sorted user recipes in serialize_collection --- src/calibre/gui2/add.py | 19 +++++++++---------- src/calibre/gui2/preferences/adding.py | 2 +- src/calibre/gui2/preferences/adding.ui | 7 +++---- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 57e03645cf..871a61145f 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -181,10 +181,10 @@ class DBAdder(QObject): # {{{ formats = [f for f in formats if not f.lower().endswith('.opf')] if prefs['add_formats_to_existing']: #automerge is on identical_book_list = self.db.find_identical_books(mi) - if identical_book_list: # books with same author and nearly same title exist in db for the one format being handled + if identical_book_list: # books with same author and nearly same title exist in db self.merged_books.add(mi.title) a_new_record_has_been_created = False - for identical_book in identical_book_list: #this will add the new format to *each* matching entry in the db - Do we need to do this? + for identical_book in identical_book_list: if gprefs['automerge'] == 'ignore': self.add_formats(identical_book, formats, replace=False) if gprefs['automerge'] == 'overwrite': @@ -196,16 +196,15 @@ class DBAdder(QObject): # {{{ If so, create new record (as below), else, add to existing record, as above. Test if format exists in matching record. identical_book is an id, formats is a FQPN path in a list ''' - for path in formats: #I think there's always only one path in formats - Check - fmt = os.path.splitext(path)[-1].replace('.', '').upper() #this is the format extension of the incoming file - ib_fmts = self.db.formats(identical_book, index_is_id=True) #These are the formats in the record - if fmt in ib_fmts: #Create a new record if the incoming format already exists in the identical book (ib) record + for path in formats: + fmt = os.path.splitext(path)[-1].replace('.', '').upper() + ib_fmts = self.db.formats(identical_book, index_is_id=True) + if fmt in ib_fmts: #Create a new record id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 self.add_formats(id, formats) a_new_record_has_been_created = True - #If we created a new record, are we done - or should we go on and add to other existing records that don't have this format? - else: #a new record is not required - the incoming format does not exist in the ib record + else: #new record not required self.add_formats(identical_book, formats, replace=False) else: # books with same author and nearly same title do not exist in db @@ -213,7 +212,7 @@ class DBAdder(QObject): # {{{ self.number_of_books_added += 1 self.add_formats(id, formats) - else: #automerge is off -use legacy duplicates code + else: #automerge is off id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False) if id is None: self.duplicates.append((mi, cover, orig_formats)) @@ -227,7 +226,7 @@ class DBAdder(QObject): # {{{ return mi.title def add_formats(self, id, formats, replace=True): - for path in formats: #path and formats will be the same fully qualified path and book filename when used by automerge + for path in formats: fmt = os.path.splitext(path)[-1].replace('.', '').upper() with open(path, 'rb') as f: self.db.add_format(id, fmt, f, index_is_id=True, diff --git a/src/calibre/gui2/preferences/adding.py b/src/calibre/gui2/preferences/adding.py index e2a80bfb8e..50540ddd7d 100644 --- a/src/calibre/gui2/preferences/adding.py +++ b/src/calibre/gui2/preferences/adding.py @@ -24,7 +24,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('read_file_metadata', prefs) r('swap_author_names', prefs) r('add_formats_to_existing', prefs) - choices = [(_('Ignore the incoming format'), 'ignore'), (_('Overwrite the existing format with the incoming format'), 'overwrite'), (_('Create a new book record for the incoming format'), 'new record')] + choices = [(_('Ignore incoming format'), 'ignore'), (_('Overwrite existing format'), 'overwrite'), (_('Create new record'), 'new record')] r('automerge', gprefs, choices=choices) r('new_book_tags', prefs, setting=CommaSeparatedList) diff --git a/src/calibre/gui2/preferences/adding.ui b/src/calibre/gui2/preferences/adding.ui index 8835391895..1cee6d8b9e 100644 --- a/src/calibre/gui2/preferences/adding.ui +++ b/src/calibre/gui2/preferences/adding.ui @@ -62,14 +62,13 @@ If an existing book with a similar title and author is found, the incoming format will be added to the existing book record, where possible. -If the existing book already has the incoming format, then the setting to the right controls and the new format will be ignored, it will overwrite the existing format -or a new book record will be created for the incoming format. +If the existing book already has the incoming format, then the setting to the right controls whether the incoming format will be ignored, overwrite the existing format or a new book record will be created. Title match ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. Author match is exact. - Automerge: If books with similar titles and authors found, try to &merge the incoming formats automatically - into existing book records. The option to the right controls what happens when the existing record already has the incoming format: + Automerge: If books with similar titles and authors found, &merge the incoming formats automatically into +existing book records. The ComboBox to the right controls what happens when an existing record already has the incoming format: From db907d8ccba27936eecdefc8291864a6d2e35250 Mon Sep 17 00:00:00 2001 From: Starson17 Date: Fri, 4 Feb 2011 16:29:04 -0500 Subject: [PATCH 07/30] Sorted user recipes in serialize_collection --- src/calibre/gui2/actions/add.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index 4236a63340..25127d3635 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -244,8 +244,8 @@ class AddAction(InterfaceAction): x.decode(preferred_encoding, 'replace') for x in self._adder.merged_books]) info_dialog(self.gui, _('Merged some books'), - _('Some duplicates were found and merged into the ' - 'following existing books:'), det_msg=books, show=True) + _('The following duplicate books were found and incoming book formats were ' + 'processed and merged into your Calibre database according to your automerge settings:'), det_msg=books, show=True) if getattr(self._adder, 'critical', None): det_msg = [] for name, log in self._adder.critical.items(): From 52c0a1899bbe52f0bbe9fa253c7f9503095781c1 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 6 Feb 2011 13:49:50 -0500 Subject: [PATCH 08/30] TXT Input: Enhance formatting detection regexes. Add basic TXTZ input support. --- src/calibre/ebooks/txt/input.py | 27 +++++++++++++++++++++++---- src/calibre/ebooks/txt/processor.py | 8 ++++---- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index e240205222..ed1597111d 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -4,23 +4,29 @@ __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' +import glob +import mimetypes import os +import shutil +from calibre import _ent_pat, xml_entity_to_unicode from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.chardet import detect from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \ - normalize_line_endings, convert_textile, remove_indents, block_to_single_line -from calibre import _ent_pat, xml_entity_to_unicode + normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \ + image_list +from calibre.ptempfile import TemporaryDirectory +from calibre.utils.zipfile import ZipFile class TXTInput(InputFormatPlugin): name = 'TXT Input' author = 'John Schember' description = 'Convert TXT files to HTML' - file_types = set(['txt']) + file_types = set(['txt', 'txtz']) options = set([ OptionRecommendation(name='paragraph_type', recommended_value='auto', @@ -57,10 +63,23 @@ class TXTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): self.log = log + txt = '' log.debug('Reading text from file...') length = 0 - txt = stream.read() + # Extract content from zip archive. + if file_ext == 'txtz': + log.debug('De-compressing content to temporary directory...') + with TemporaryDirectory('_untxtz') as tdir: + zf = ZipFile(stream) + zf.extractall(tdir) + + txts = glob.glob(os.path.join(tdir, '*.txt')) + for t in txts: + with open(t, 'rb') as tf: + txt += tf.read() + else: + txt = stream.read() # Get the encoding of the document. if options.input_encoding: diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index f7b6cce234..e4ff2763e5 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -221,9 +221,9 @@ def detect_formatting_type(txt): markdown_count += len(re.findall('(?mu)^=+$', txt)) markdown_count += len(re.findall('(?mu)^-+$', txt)) # Images - markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) + markdown_count += len(re.findall('(?u)!\[.*?\](\[|\()', txt)) # Links - markdown_count += len(re.findall('(?u)(^|(?P
[^!]))\[.*?\]\([^)]+\)', txt))
+    markdown_count += len(re.findall('(?u)^|[^!]\[.*?\](\[|\()', txt))
 
     # Check for textile
     # Headings
@@ -231,9 +231,9 @@ def detect_formatting_type(txt):
     # Block quote.
     textile_count += len(re.findall(r'(?mu)^bq\.', txt))
     # Images
-    textile_count += len(re.findall(r'\![^\s]+(?=.*?/)(:[^\s]+)*', txt))
+    textile_count += len(re.findall(r'(?mu)(?<=\!)\S+(?=\!)', txt))
     # Links
-    textile_count += len(re.findall(r'"(?=".*?\()(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
+    textile_count += len(re.findall(r'"[^"]*":\S+', txt))
 
     # Decide if either markdown or textile is used in the text
     # based on the number of unique formatting elements found.

From b6ec16463ee3083530b7aa76975ac60c73ff3386 Mon Sep 17 00:00:00 2001
From: John Schember 
Date: Sun, 6 Feb 2011 13:52:00 -0500
Subject: [PATCH 09/30] ...

---
 src/calibre/ebooks/txt/input.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index ed1597111d..55d37da026 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -5,9 +5,7 @@ __copyright__ = '2009, John Schember '
 __docformat__ = 'restructuredtext en'
 
 import glob
-import mimetypes
 import os
-import shutil
 
 from calibre import _ent_pat, xml_entity_to_unicode
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
@@ -16,8 +14,7 @@ from calibre.ebooks.chardet import detect
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
     separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
     preserve_spaces, detect_paragraph_type, detect_formatting_type, \
-    normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
-    image_list
+    normalize_line_endings, convert_textile, remove_indents, block_to_single_line
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 

From 0257fe4a0202ee94cd56030f338c5baca74da74d Mon Sep 17 00:00:00 2001
From: John Schember 
Date: Sun, 6 Feb 2011 14:09:14 -0500
Subject: [PATCH 10/30] TXT Output: Simplify remove image and link regexes

---
 src/calibre/ebooks/txt/markdownml.py | 6 ++----
 src/calibre/ebooks/txt/textileml.py  | 4 +---
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index 116561f355..c179378049 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -35,11 +35,9 @@ class MarkdownMLizer(object):
             html = unicode(etree.tostring(item.data, encoding=unicode))
             
             if not self.opts.keep_links:
-                html = re.sub(r'<\s*a[^>]*>', '', html)
-                html = re.sub(r'<\s*/\s*a\s*>', '', html)
+                html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
             if not self.opts.keep_image_references:
-                html = re.sub(r'<\s*img[^>]*>', '', html)
-                html = re.sub(r'<\s*img\s*>', '', html)
+                html = re.sub(r'<\s*img[^>]*>', '', html)\
             
             text = html2text(html)
         
diff --git a/src/calibre/ebooks/txt/textileml.py b/src/calibre/ebooks/txt/textileml.py
index 94834d8e79..d7e11695c5 100644
--- a/src/calibre/ebooks/txt/textileml.py
+++ b/src/calibre/ebooks/txt/textileml.py
@@ -36,11 +36,9 @@ class TextileMLizer(object):
             html = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
 
             if not self.opts.keep_links:
-                html = re.sub(r'<\s*a[^>]*>', '', html)
-                html = re.sub(r'<\s*/\s*a\s*>', '', html)
+                html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
             if not self.opts.keep_image_references:
                 html = re.sub(r'<\s*img[^>]*>', '', html)
-                html = re.sub(r'<\s*img\s*>', '', html)
 
             text = html2textile(html)
 

From 837bd4e548e000480eb4d104b66decefc41281c1 Mon Sep 17 00:00:00 2001
From: John Schember 
Date: Sun, 6 Feb 2011 14:37:02 -0500
Subject: [PATCH 11/30] TXTZ metadata reader.

---
 src/calibre/customize/builtins.py   | 11 ++++++++++
 src/calibre/ebooks/__init__.py      |  2 +-
 src/calibre/ebooks/metadata/txt.py  | 12 ++++++----
 src/calibre/ebooks/metadata/txtz.py | 34 +++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 5 deletions(-)
 create mode 100644 src/calibre/ebooks/metadata/txtz.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index e0367515bc..04e880b714 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -325,6 +325,17 @@ class TXTMetadataReader(MetadataReaderPlugin):
         from calibre.ebooks.metadata.txt import get_metadata
         return get_metadata(stream)
 
+class TXTZMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read TXTZ metadata'
+    file_types  = set(['txtz'])
+    description = _('Read metadata from %s files') % 'TXTZ'
+    author      = 'John Schember'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.txtz import get_metadata
+        return get_metadata(stream)
+
 class ZipMetadataReader(MetadataReaderPlugin):
 
     name = 'Read ZIP metadata'
diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index 4dc97f43ed..49604ae682 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -25,7 +25,7 @@ class DRMError(ValueError):
 class ParserError(ValueError):
     pass
 
-BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
+BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm',
                    'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
                    'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                    'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb']
diff --git a/src/calibre/ebooks/metadata/txt.py b/src/calibre/ebooks/metadata/txt.py
index 79713774e3..70d3c72ae0 100644
--- a/src/calibre/ebooks/metadata/txt.py
+++ b/src/calibre/ebooks/metadata/txt.py
@@ -1,16 +1,20 @@
-'''Read meta information from TXT files'''
-
-from __future__ import with_statement
+# -*- coding: utf-8 -*-
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember '
 
+'''
+Read meta information from TXT files
+'''
+
 import re
 
 from calibre.ebooks.metadata import MetaInformation
 
 def get_metadata(stream, extract_cover=True):
-    """ Return metadata as a L{MetaInfo} object """
+    '''
+    Return metadata as a L{MetaInfo} object
+    '''
     mi = MetaInformation(_('Unknown'), [_('Unknown')])
     stream.seek(0)
 
diff --git a/src/calibre/ebooks/metadata/txtz.py b/src/calibre/ebooks/metadata/txtz.py
new file mode 100644
index 0000000000..b9d607c63b
--- /dev/null
+++ b/src/calibre/ebooks/metadata/txtz.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, John Schember '
+
+'''
+Read meta information from TXT files
+'''
+
+import os
+
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ptempfile import TemporaryDirectory
+from calibre.utils.zipfile import ZipFile
+
+def get_metadata(stream, extract_cover=True):
+    '''
+    Return metadata as a L{MetaInfo} object
+    '''
+    mi = MetaInformation(_('Unknown'), [_('Unknown')])
+    stream.seek(0)
+
+    with TemporaryDirectory('_untxtz_mdata') as tdir:
+        try:
+            zf = ZipFile(stream)
+            zf.extract('metadata.opf', tdir)
+            
+            from calibre.ebooks.metadata.opf2 import OPF
+            with open(os.path.join(tdir, 'metadata.opf'), 'rb') as opff:
+                mi = OPF(opff).to_book_metadata()
+        except:
+            return mi
+
+    return mi

From 0f86858c3ba06357a47dc88943ffe32818cd5329 Mon Sep 17 00:00:00 2001
From: John Schember 
Date: Sun, 6 Feb 2011 15:01:31 -0500
Subject: [PATCH 12/30] TXTZ metadata writer.

---
 src/calibre/customize/builtins.py   | 11 +++++++++++
 src/calibre/ebooks/metadata/txtz.py | 13 +++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 04e880b714..b56d015e54 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -423,6 +423,17 @@ class TOPAZMetadataWriter(MetadataWriterPlugin):
         from calibre.ebooks.metadata.topaz import set_metadata
         set_metadata(stream, mi)
 
+class TXTZMetadataWriter(MetadataWriterPlugin):
+
+    name        = 'Set TXTZ metadata'
+    file_types  = set(['txtz'])
+    description = _('Set metadata from %s files') % 'TXTZ'
+    author      = 'John Schember'
+
+    def set_metadata(self, stream, mi, type):
+        from calibre.ebooks.metadata.txtz import set_metadata
+        set_metadata(stream, mi)
+
 # }}}
 
 from calibre.ebooks.comic.input import ComicInput
diff --git a/src/calibre/ebooks/metadata/txtz.py b/src/calibre/ebooks/metadata/txtz.py
index b9d607c63b..ba0078328e 100644
--- a/src/calibre/ebooks/metadata/txtz.py
+++ b/src/calibre/ebooks/metadata/txtz.py
@@ -9,9 +9,12 @@ Read meta information from TXT files
 
 import os
 
+from cStringIO import StringIO
+
 from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
 from calibre.ptempfile import TemporaryDirectory
-from calibre.utils.zipfile import ZipFile
+from calibre.utils.zipfile import ZipFile, safe_replace
 
 def get_metadata(stream, extract_cover=True):
     '''
@@ -24,11 +27,13 @@ def get_metadata(stream, extract_cover=True):
         try:
             zf = ZipFile(stream)
             zf.extract('metadata.opf', tdir)
-            
-            from calibre.ebooks.metadata.opf2 import OPF
             with open(os.path.join(tdir, 'metadata.opf'), 'rb') as opff:
                 mi = OPF(opff).to_book_metadata()
         except:
             return mi
-
     return mi
+
+def set_metadata(stream, mi):
+    stream.seek(0)
+    opf = StringIO(metadata_to_opf(mi))
+    safe_replace(stream, 'metadata.opf', opf)

From c481be879df7a438639b337ff0232b9abf4dc0fc Mon Sep 17 00:00:00 2001
From: John Schember 
Date: Sun, 6 Feb 2011 15:46:07 -0500
Subject: [PATCH 13/30] TXTZ Output support.

---
 src/calibre/customize/builtins.py   |  2 ++
 src/calibre/ebooks/metadata/txtz.py |  1 -
 src/calibre/ebooks/txt/output.py    | 35 +++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index b56d015e54..32c512fe39 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -468,6 +468,7 @@ from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
+from calibre.ebooks.txt.output import TXTZOutput
 from calibre.ebooks.html.output import HTMLOutput
 from calibre.ebooks.snb.output import SNBOutput
 
@@ -553,6 +554,7 @@ plugins += [
     RTFOutput,
     TCROutput,
     TXTOutput,
+    TXTZOutput,
     HTMLOutput,
     SNBOutput,
 ]
diff --git a/src/calibre/ebooks/metadata/txtz.py b/src/calibre/ebooks/metadata/txtz.py
index ba0078328e..ae6efb4838 100644
--- a/src/calibre/ebooks/metadata/txtz.py
+++ b/src/calibre/ebooks/metadata/txtz.py
@@ -34,6 +34,5 @@ def get_metadata(stream, extract_cover=True):
     return mi
 
 def set_metadata(stream, mi):
-    stream.seek(0)
     opf = StringIO(metadata_to_opf(mi))
     safe_replace(stream, 'metadata.opf', opf)
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index b73a6e8908..3905081a84 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -5,11 +5,17 @@ __copyright__ = '2009, John Schember '
 __docformat__ = 'restructuredtext en'
 
 import os
+import shutil
+
+from lxml import etree
 
 from calibre.customize.conversion import OutputFormatPlugin, \
     OptionRecommendation
+from calibre.ebooks.oeb.base import OEB_IMAGES 
 from calibre.ebooks.txt.txtml import TXTMLizer
 from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
+from calibre.ptempfile import TemporaryDirectory, TemporaryFile
+from calibre.utils.zipfile import ZipFile
 
 class TXTOutput(OutputFormatPlugin):
 
@@ -93,3 +99,32 @@ class TXTOutput(OutputFormatPlugin):
         if close:
             out_stream.close()
 
+
+class TXTZOutput(TXTOutput):
+    
+    name = 'TXTZ Output'
+    author = 'John Schember'
+    file_type = 'txtz'
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        with TemporaryDirectory('_txtz_output') as tdir:
+            # TXT
+            with TemporaryFile('index.txt') as tf:
+                TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log)
+                shutil.copy(tf, os.path.join(tdir, 'index.txt'))
+
+            # Images
+            for item in oeb_book.manifest:
+                if item.media_type in OEB_IMAGES:
+                    path = os.path.join(tdir, os.path.dirname(item.href))
+                    if not os.path.exists(path):
+                        os.makedirs(path)
+                    with open(os.path.join(tdir, item.href), 'wb') as imgf:
+                        imgf.write(item.data)
+            
+            # Metadata
+            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: 
+                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
+            
+            txtz = ZipFile(output_path, 'w')
+            txtz.add_dir(tdir)

From 6548dbd33cf2c917aa72bc73b54ba139c4094d2d Mon Sep 17 00:00:00 2001
From: John Schember 
Date: Sun, 6 Feb 2011 16:18:25 -0500
Subject: [PATCH 14/30] TXT Input: Read and set metadata.

---
 src/calibre/ebooks/txt/input.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 55d37da026..12f780913c 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -191,4 +191,11 @@ class TXTInput(InputFormatPlugin):
                 {})
         options.debug_pipeline = odi
         os.remove(htmlfile.name)
+        
+        # Set metadata from file.
+        from calibre.customize.ui import get_file_type_metadata
+        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
+        mi = get_file_type_metadata(stream, file_ext)
+        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
+        
         return oeb

From 1aa66f42fe809583a5fa462e26a9514042864db2 Mon Sep 17 00:00:00 2001
From: John Schember 
Date: Sun, 6 Feb 2011 19:45:39 -0500
Subject: [PATCH 15/30] TXT Output: clean ascii characters. Textile output
 remove span attributes.

---
 src/calibre/ebooks/txt/output.py    | 2 ++
 src/calibre/ebooks/txt/textileml.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index 3905081a84..d021cbbba6 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -15,6 +15,7 @@ from calibre.ebooks.oeb.base import OEB_IMAGES
 from calibre.ebooks.txt.txtml import TXTMLizer
 from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
 from calibre.ptempfile import TemporaryDirectory, TemporaryFile
+from calibre.utils.cleantext import clean_ascii_chars
 from calibre.utils.zipfile import ZipFile
 
 class TXTOutput(OutputFormatPlugin):
@@ -79,6 +80,7 @@ class TXTOutput(OutputFormatPlugin):
             writer = TXTMLizer(log)
 
         txt = writer.extract_content(oeb_book, opts)
+        txt = clean_ascii_chars(txt)
 
         log.debug('\tReplacing newlines with selected type...')
         txt = specified_newlines(TxtNewlines(opts.newline).newline, txt)
diff --git a/src/calibre/ebooks/txt/textileml.py b/src/calibre/ebooks/txt/textileml.py
index d7e11695c5..284e4846d9 100644
--- a/src/calibre/ebooks/txt/textileml.py
+++ b/src/calibre/ebooks/txt/textileml.py
@@ -41,6 +41,7 @@ class TextileMLizer(object):
                 html = re.sub(r'<\s*img[^>]*>', '', html)
 
             text = html2textile(html)
+            text = text.replace('%', '')
 
             # Ensure the section ends with at least two new line characters.
             # This is to prevent the last paragraph from a section being

From ad32cd1d726c2704de10ad22b8bae7b75effdfca Mon Sep 17 00:00:00 2001
From: John Schember 
Date: Sun, 6 Feb 2011 20:41:12 -0500
Subject: [PATCH 16/30] TXT Output: simpilfy retain indent regex. Force 4
 indents to ensure we don't get situations where and entire line is
 whitespace.

---
 src/calibre/ebooks/txt/processor.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index e4ff2763e5..685a7504b9 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -29,8 +29,7 @@ def clean_txt(txt):
     txt = '\n'.join([line.rstrip() for line in txt.splitlines()])
     
     # Replace whitespace at the beginning of the line with  
-    txt = re.sub('(?m)(?P^[ ]+)(?=.)', lambda mo: ' ' * mo.groups('space').count(' '), txt)
-    txt = re.sub('(?m)(?P^[\t]+)(?=.)', lambda mo: ' ' * 4 * mo.groups('space').count('\t'), txt)
+    txt = re.sub('(?m)(?<=^)([ ]{2,}|\t+)(?=.)', ' ' * 4, txt)
 
     # Condense redundant spaces
     txt = re.sub('[ ]{2,}', ' ', txt)

From b8b6c83a1d2096dcbcf3c1d42c09905a76cdaf10 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Mon, 7 Feb 2011 09:29:09 +0000
Subject: [PATCH 17/30] fix #8807: Renaming using the Category Editor, along
 with several other bugs found at the same time

---
 src/calibre/gui2/dialogs/tag_list_editor.py | 54 +++++++++++----------
 src/calibre/gui2/tag_view.py                |  5 +-
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/src/calibre/gui2/dialogs/tag_list_editor.py b/src/calibre/gui2/dialogs/tag_list_editor.py
index ced0e9a505..5e35a236e4 100644
--- a/src/calibre/gui2/dialogs/tag_list_editor.py
+++ b/src/calibre/gui2/dialogs/tag_list_editor.py
@@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal '
 
 from PyQt4.QtCore import SIGNAL, Qt
-from PyQt4.QtGui import QDialog, QListWidgetItem
+from PyQt4.QtGui import QDialog, QListWidgetItem, QListWidget
 
 from calibre.gui2.dialogs.tag_list_editor_ui import Ui_TagListEditor
 from calibre.gui2 import question_dialog, error_dialog
@@ -11,30 +11,38 @@ class ListWidgetItem(QListWidgetItem):
 
     def __init__(self, txt):
         QListWidgetItem.__init__(self, txt)
-        self.old_value = txt
-        self.cur_value = txt
+        self.initial_value = txt
+        self.current_value = txt
+        self.previous_value = txt
 
     def data(self, role):
         if role == Qt.DisplayRole:
-            if self.old_value != self.cur_value:
-                return _('%s (was %s)')%(self.cur_value, self.old_value)
+            if self.initial_value != self.current_value:
+                return _('%s (was %s)')%(self.current_value, self.initial_value)
             else:
-                return self.cur_value
+                return self.current_value
         elif role == Qt.EditRole:
-            return self.cur_value
+            return self.current_value
         else:
             return QListWidgetItem.data(self, role)
 
     def setData(self, role, data):
         if role == Qt.EditRole:
-            self.cur_value = data.toString()
+            self.previous_value = self.current_value
+            self.current_value = data.toString()
         QListWidgetItem.setData(self, role, data)
 
     def text(self):
-        return self.cur_value
+        return self.current_value
+
+    def initial_text(self):
+        return self.initial_value
+
+    def previous_text(self):
+        return self.previous_value
 
     def setText(self, txt):
-        self.cur_value = txt
+        self.current_value = txt
         QListWidgetItem.setText(txt)
 
 class TagListEditor(QDialog, Ui_TagListEditor):
@@ -49,7 +57,7 @@ class TagListEditor(QDialog, Ui_TagListEditor):
         self.setWindowIcon(icon)
 
         self.to_rename = {}
-        self.to_delete = []
+        self.to_delete = set()
         self.all_tags = {}
 
         for k,v in data:
@@ -57,6 +65,7 @@ class TagListEditor(QDialog, Ui_TagListEditor):
         for tag in sorted(self.all_tags.keys(), key=key):
             item = ListWidgetItem(tag)
             item.setData(Qt.UserRole, self.all_tags[tag])
+            item.setFlags (item.flags() | Qt.ItemIsEditable);
             self.available_tags.addItem(item)
 
         if tag_to_match is not None:
@@ -64,23 +73,20 @@ class TagListEditor(QDialog, Ui_TagListEditor):
             if len(items) == 1:
                 self.available_tags.setCurrentItem(items[0])
 
-        self.connect(self.delete_button,  SIGNAL('clicked()'), self.delete_tags)
-        self.connect(self.rename_button,  SIGNAL('clicked()'), self.rename_tag)
-        self.connect(self.available_tags, SIGNAL('itemDoubleClicked(QListWidgetItem *)'), self._rename_tag)
-        self.connect(self.available_tags, SIGNAL('itemChanged(QListWidgetItem *)'), self.finish_editing)
+        self.delete_button.clicked.connect(self.delete_tags)
+        self.rename_button.clicked.connect(self.rename_tag)
+        self.available_tags.itemDoubleClicked.connect(self._rename_tag)
+        self.available_tags.itemChanged.connect(self.finish_editing)
 
     def finish_editing(self, item):
         if not item.text():
                 error_dialog(self, _('Item is blank'),
                              _('An item cannot be set to nothing. Delete it instead.')).exec_()
-                item.setText(self.item_before_editing.text())
+                item.setText(item.previous_text())
                 return
-        if item.text() != self.item_before_editing.text():
-            (id,ign) = self.item_before_editing.data(Qt.UserRole).toInt()
-            if item.text() not in self.to_rename:
-                self.to_rename[item.text()] = [id]
-            else:
-                self.to_rename[item.text()].append(id)
+        if item.text() != item.initial_text():
+            id_ = item.data(Qt.UserRole).toInt()[0]
+            self.to_rename[id_] = item.text()
 
     def rename_tag(self):
         item = self.available_tags.currentItem()
@@ -91,8 +97,6 @@ class TagListEditor(QDialog, Ui_TagListEditor):
             error_dialog(self, _('No item selected'),
                          _('You must select one item from the list of Available items.')).exec_()
             return
-        self.item_before_editing = item.clone()
-        item.setFlags (item.flags() | Qt.ItemIsEditable);
         self.available_tags.editItem(item)
 
     def delete_tags(self, item=None):
@@ -108,7 +112,7 @@ class TagListEditor(QDialog, Ui_TagListEditor):
         row = self.available_tags.row(deletes[0])
         for item in deletes:
             (id,ign) = item.data(Qt.UserRole).toInt()
-            self.to_delete.append(id)
+            self.to_delete.add(id)
             self.available_tags.takeItem(self.available_tags.row(item))
 
         if row >= self.available_tags.count():
diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index 041f0a715e..fd3530d333 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -1259,9 +1259,8 @@ class TagBrowserMixin(object): # {{{
             if rename_func:
                 for item in to_delete:
                     delete_func(item)
-                for text in to_rename:
-                        for old_id in to_rename[text]:
-                            rename_func(old_id, new_name=unicode(text))
+                for old_id in to_rename:
+                    rename_func(old_id, new_name=unicode(to_rename[old_id]))
 
             # Clean up the library view
             self.do_tag_item_renamed()

From 90878e844b82c12b629a6f81210272c261357d99 Mon Sep 17 00:00:00 2001
From: ldolse 
Date: Mon, 7 Feb 2011 18:46:30 +0800
Subject: [PATCH 18/30] fix for text based horizontal rules in dehyphenate and
 scene break markup

---
 src/calibre/ebooks/conversion/preprocess.py | 12 ++++++------
 src/calibre/ebooks/conversion/utils.py      |  3 ++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 691aa307d7..6fafbb992e 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -245,17 +245,17 @@ class Dehyphenator(object):
         self.html = html
         self.format = format
         if format == 'html':
-            intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P()?\s*(\s*){1,2}(?P<(p|div)[^>]*>\s*(]*>\s*

\s*)?\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(]*>)?)\s*(?P[\w\d]+)' % length) + intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)\s*(?=<)(?P()?\s*(\s*){1,2}(?P<(p|div)[^>]*>\s*(]*>\s*

\s*)?\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(]*>)?)\s*(?P[\w\d]+)' % length) elif format == 'pdf': - intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?P

|\s*

\s*<[iub]>)\s*(?P[\w\d]+)'% length) + intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)\s*(?P

|\s*

\s*<[iub]>)\s*(?P[\w\d]+)'% length) elif format == 'txt': - intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length) + intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length) elif format == 'individual_words': - intextmatch = re.compile(u'(?!<)(?P\w+)(-|‐)\s*(?P\w+)(?![^<]*?>)') + intextmatch = re.compile(u'(?!<)(?P[^\W\-]+)(-|‐)\s*(?P\w+)(?![^<]*?>)') elif format == 'html_cleanup': - intextmatch = re.compile(u'(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)') + intextmatch = re.compile(u'(?P[^\W\-]+)(-|‐)\s*(?=<)(?P\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)') elif format == 'txt_cleanup': - intextmatch = re.compile(u'(?P\w+)(-|‐)(?P\s+)(?P[\w\d]+)') + intextmatch = re.compile(u'(?P[^\W\-]+)(-|‐)(?P\s+)(?P[\w\d]+)') html = intextmatch.sub(self.dehyphenate, html) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index c0c2ee8978..6583c258bf 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -34,6 +34,7 @@ class HeuristicProcessor(object): self.line_close = "()?\s*()?\s*()?\s*" self.single_blank = re.compile(r'(\s*]*>\s*

)', re.IGNORECASE) self.scene_break_open = '

' + self.common_in_text_endings = u'[\"\'—’”,\.!\?\…)\w]' def is_pdftohtml(self, src): return '' in src[:1000] @@ -638,7 +639,7 @@ class HeuristicProcessor(object): blanks_count = len(self.any_multi_blank.findall(html)) if blanks_count >= 1: html = self.merge_blanks(html, blanks_count) - scene_break_regex = self.line_open+'(?![\w\'\"])(?P((?P((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close + scene_break_regex = self.line_open+'(?!([\w\'\"]|.*?'+self.common_in_text_endings+'<))(?P((?P((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE) # If the user has enabled scene break replacement, then either softbreaks # or 'hard' scene breaks are replaced, depending on which is in use From cdcfde662562105ebb8948828d9ddf37e3dccbf1 Mon Sep 17 00:00:00 2001 From: ldolse Date: Mon, 7 Feb 2011 19:11:47 +0800 Subject: [PATCH 19/30] added more line beginnings --- src/calibre/ebooks/conversion/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 6583c258bf..d075390e8e 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -34,7 +34,8 @@ class HeuristicProcessor(object): self.line_close = "()?\s*()?\s*()?\s*" self.single_blank = re.compile(r'(\s*]*>\s*

)', re.IGNORECASE) self.scene_break_open = '

' - self.common_in_text_endings = u'[\"\'—’”,\.!\?\…)\w]' + self.common_in_text_endings = u'[\"\'—’”,\.!\?\…\)„\w]' + self.common_in_text_beginnings = u'[\w\'\"“‘‛]' def is_pdftohtml(self, src): return '' in src[:1000] @@ -639,7 +640,7 @@ class HeuristicProcessor(object): blanks_count = len(self.any_multi_blank.findall(html)) if blanks_count >= 1: html = self.merge_blanks(html, blanks_count) - scene_break_regex = self.line_open+'(?!([\w\'\"]|.*?'+self.common_in_text_endings+'<))(?P((?P((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close + scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+'<))(?P((?P((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE) # If the user has enabled scene break replacement, then either softbreaks # or 'hard' scene breaks are replaced, depending on which is in use From 34d32fe464c953b54f26fd552c118bd380073576 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 09:48:57 -0700 Subject: [PATCH 20/30] Fix #8838 (Plugin configuration dialogs not parented) --- src/calibre/gui2/preferences/plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/preferences/plugins.py b/src/calibre/gui2/preferences/plugins.py index 8f77a03c24..4b83df71c7 100644 --- a/src/calibre/gui2/preferences/plugins.py +++ b/src/calibre/gui2/preferences/plugins.py @@ -329,7 +329,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): return error_dialog(self, _('Must restart'), _('You must restart calibre before you can' ' configure the %s plugin')%plugin.name, show=True) - if plugin.do_user_config(): + if plugin.do_user_config(self.gui): self._plugin_model.refresh_plugin(plugin) elif op == 'remove': msg = _('Plugin {0} successfully removed').format(plugin.name) From d9a5b3c3c0dda2a0612ea20650eda9440fa66717 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 09:57:39 -0700 Subject: [PATCH 21/30] Fix #8820 (Accessing Content Server from iPad gives mobile view of ebook catalog) --- src/calibre/library/server/content.py | 2 +- src/calibre/library/server/mobile.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 8af70d5675..62d08aa2c3 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -125,7 +125,7 @@ class ContentServer(object): ua.startswith('Stanza') # A better search would be great - want_mobile = self.MOBILE_UA.search(ua) is not None + want_mobile = self.is_mobile_browser(ua) if self.opts.develop and not want_mobile: cherrypy.log('User agent: '+ua) diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py index 0992e6c30b..1bf9f549bc 100644 --- a/src/calibre/library/server/mobile.py +++ b/src/calibre/library/server/mobile.py @@ -169,6 +169,10 @@ class MobileServer(object): MOBILE_UA = re.compile('(?i)(?:iPhone|Opera Mini|NetFront|webOS|Mobile|Android|imode|DoCoMo|Minimo|Blackberry|MIDP|Symbian|HD2|Kindle)') + def is_mobile_browser(self, ua): + match = self.MOBILE_UA.search(ua) + return match is not None and 'iPad' not in ua + def add_routes(self, connect): connect('mobile', '/mobile', self.mobile) connect('mobile_css', '/mobile/style.css', self.mobile_css) From 88f56d528dc412f5dcc527ef6d36654aa0922cd5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 11:47:37 -0700 Subject: [PATCH 22/30] ... --- src/calibre/manual/faq.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 7248f76436..9c02ace0e8 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -324,15 +324,15 @@ Author names are complex, especially across cultures. |app| has a very flexible Now coming to author name sorting: * When a new author is added to |app| (this happens whenever a book by a new author is added), |app| automatically computes a sort string for both the book and the author. - * By default, this sort string assumes that the author name is in ``First name Last name`` format and generates a ``Last name, First name`` sort string. * Authors in the Tag Browser are sorted by the sort value for the **authors**. Remember that this is different from the Author sort field for a book. + * By default, this sort algorithm assumes that the author name is in ``First name Last name`` format and generates a ``Last name, First name`` sort value. * You can change this algorithm by going to Preferences->Tweaks and setting the :guilabel:`author_sort_copy_method` tweak. * You can force |app| to recalculate the author sort values for every author by right clicking on any author and selecting :guilabel:`Manage authors` * You can force |app| to recalculate the author sort values for all books by using the bulk metadata edit dialog (select all books and click edit metadata) * When recalculating the author sort values for books, |app| uses the author sort values for each individual author. * You can control whether the Tag Browser display authors using their names or their sort values by setting the :guilabel:`categories_use_field_for_author_name` tweak in Preferences->Tweaks - With all this flexibility, it is possible to have |app| manage your author names however you like. For example, one common request is to have |app| display author names LN, FN. To do this first set the ``author_sort_copy_method`` to ``copy``. Then change all author names to LN, FN via the Manage authors dialog. Then have |app| recalculate author sort values as described above. +With all this flexibility, it is possible to have |app| manage your author names however you like. For example, one common request is to have |app| display author names LN, FN. To do this first set the ``author_sort_copy_method`` to ``copy``. Then change all author names to LN, FN via the Manage authors dialog. Then have |app| recalculate author sort values as described above. Why doesn't |app| let me store books in my own directory structure? From bd61ee0bf3ca6c26d936a62bb8e34853ccefb8d5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 12:16:04 -0700 Subject: [PATCH 23/30] Fix #8759 (Automerge_Option_Updates) --- src/calibre/gui2/__init__.py | 1 + src/calibre/gui2/actions/add.py | 4 +-- src/calibre/gui2/add.py | 47 +++++++++++++++++++------- src/calibre/gui2/preferences/adding.py | 8 ++++- src/calibre/gui2/preferences/adding.ui | 46 +++++++++++++++++++++---- src/calibre/library/server/content.py | 1 - 6 files changed, 85 insertions(+), 22 deletions(-) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 9150172fc1..92a68fa840 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -50,6 +50,7 @@ gprefs.defaults['action-layout-context-menu-device'] = ( gprefs.defaults['show_splash_screen'] = True gprefs.defaults['toolbar_icon_size'] = 'medium' +gprefs.defaults['automerge'] = 'ignore' gprefs.defaults['toolbar_text'] = 'auto' gprefs.defaults['show_child_bar'] = False gprefs.defaults['font'] = None diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index 4236a63340..25127d3635 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -244,8 +244,8 @@ class AddAction(InterfaceAction): x.decode(preferred_encoding, 'replace') for x in self._adder.merged_books]) info_dialog(self.gui, _('Merged some books'), - _('Some duplicates were found and merged into the ' - 'following existing books:'), det_msg=books, show=True) + _('The following duplicate books were found and incoming book formats were ' + 'processed and merged into your Calibre database according to your automerge settings:'), det_msg=books, show=True) if getattr(self._adder, 'critical', None): det_msg = [] for name, log in self._adder.critical.items(): diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 026fabea07..086f40feee 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -8,7 +8,7 @@ from functools import partial from PyQt4.Qt import QThread, QObject, Qt, QProgressDialog, pyqtSignal, QTimer from calibre.gui2.dialogs.progress import ProgressDialog -from calibre.gui2 import question_dialog, error_dialog, info_dialog +from calibre.gui2 import question_dialog, error_dialog, info_dialog, gprefs from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata import MetaInformation from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG @@ -179,23 +179,46 @@ class DBAdder(QObject): # {{{ cover = f.read() orig_formats = formats formats = [f for f in formats if not f.lower().endswith('.opf')] - if prefs['add_formats_to_existing']: + if prefs['add_formats_to_existing']: #automerge is on identical_book_list = self.db.find_identical_books(mi) - - if identical_book_list: # books with same author and nearly same title exist in db + if identical_book_list: # books with same author and nearly same title exist in db self.merged_books.add(mi.title) + a_new_record_has_been_created = False for identical_book in identical_book_list: - self.add_formats(identical_book, formats, replace=False) - else: - id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) + if gprefs['automerge'] == 'ignore': + self.add_formats(identical_book, formats, replace=False) + if gprefs['automerge'] == 'overwrite': + self.add_formats(identical_book, formats, replace=True) + if gprefs['automerge'] == 'new record' and not a_new_record_has_been_created: + ''' + We are here because we have at least one book record in the db that matches the one file/format being processed + We need to check if the file/format being processed matches a format in the matching book record. + If so, create new record (as below), else, add to existing record, as above. + Test if format exists in matching record. identical_book is an id, formats is a FQPN path in a list + ''' + for path in formats: + fmt = os.path.splitext(path)[-1].replace('.', '').upper() + ib_fmts = self.db.formats(identical_book, index_is_id=True) + if ib_fmts and fmt in ib_fmts: # Create a new record + if not a_new_record_has_been_created: + id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) + self.number_of_books_added += 1 + self.add_formats(id_, formats) + a_new_record_has_been_created = True + else: #new record not required + self.add_formats(identical_book, formats, replace=False) + + else: # books with same author and nearly same title do not exist in db + id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 - self.add_formats(id, formats) - else: - id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False) - if id is None: + self.add_formats(id_, formats) + + else: #automerge is off + id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=False) + if id_ is None: self.duplicates.append((mi, cover, orig_formats)) else: - self.add_formats(id, formats) + self.add_formats(id_, formats) self.number_of_books_added += 1 else: self.names.append(name) diff --git a/src/calibre/gui2/preferences/adding.py b/src/calibre/gui2/preferences/adding.py index e919d53b64..b4c4ce846a 100644 --- a/src/calibre/gui2/preferences/adding.py +++ b/src/calibre/gui2/preferences/adding.py @@ -12,6 +12,7 @@ from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \ from calibre.gui2.preferences.adding_ui import Ui_Form from calibre.utils.config import prefs from calibre.gui2.widgets import FilenamePattern +from calibre.gui2 import gprefs class ConfigWidget(ConfigWidgetBase, Ui_Form): @@ -23,18 +24,23 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('read_file_metadata', prefs) r('swap_author_names', prefs) r('add_formats_to_existing', prefs) + choices = [ + (_('Ignore duplicate incoming formats'), 'ignore'), + (_('Overwrite existing duplicate formats'), 'overwrite'), + (_('Create new record for each duplicate format'), 'new record')] + r('automerge', gprefs, choices=choices) r('new_book_tags', prefs, setting=CommaSeparatedList) self.filename_pattern = FilenamePattern(self) self.metadata_box.layout().insertWidget(0, self.filename_pattern) self.filename_pattern.changed_signal.connect(self.changed_signal.emit) - def initialize(self): ConfigWidgetBase.initialize(self) self.filename_pattern.blockSignals(True) self.filename_pattern.initialize() self.filename_pattern.blockSignals(False) + self.opt_automerge.setEnabled(self.opt_add_formats_to_existing.isChecked()) def restore_defaults(self): ConfigWidgetBase.restore_defaults(self) diff --git a/src/calibre/gui2/preferences/adding.ui b/src/calibre/gui2/preferences/adding.ui index 75e6c466f0..f9a2c74444 100644 --- a/src/calibre/gui2/preferences/adding.ui +++ b/src/calibre/gui2/preferences/adding.ui @@ -6,7 +6,7 @@ 0 0 - 750 + 753 339 @@ -58,16 +58,33 @@ - + - If an existing book with a similar title and author is found that does not have the format being added, the format is added -to the existing book, instead of creating a new entry. If the existing book already has the format, then it is silently ignored. + Automerge: If books with similar titles and authors found, merge the incoming formats automatically into +existing book records. The box to the right controls what happens when an existing record already has +the incoming format. Note that this option also affects the Copy to library action. Title match ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. Author match is exact. - If books with similar titles and authors found, &merge the new files automatically + &Automerge added books if they already exist in the calibre library: + + + + + + + Automerge: If books with similar titles and authors found, merge the incoming formats automatically into +existing book records. This box controls what happens when an existing record already has +the incoming format: + +Ignore duplicate incoming files - means that existing files in your calibre library will not be replaced +Overwrite existing duplicate files - means that existing files in your calibre library will be replaced +Create new record for each duplicate file - means that a new book entry will be created for each duplicate file + +Title matching ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. +Author matching is exact. @@ -113,5 +130,22 @@ Title match ignores leading indefinite articles ("the", "a", - + + + opt_add_formats_to_existing + toggled(bool) + opt_automerge + setEnabled(bool) + + + 406 + 83 + + + 457 + 83 + + + + diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 62d08aa2c3..11ea2b951e 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -124,7 +124,6 @@ class ContentServer(object): cherrypy.request.headers.get('Want-OPDS-Catalog', 919) != 919 or \ ua.startswith('Stanza') - # A better search would be great want_mobile = self.is_mobile_browser(ua) if self.opts.develop and not want_mobile: cherrypy.log('User agent: '+ua) From 0cf432315be56ff8b75e98a167411fc3308c4b2b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 12:31:04 -0700 Subject: [PATCH 24/30] ... --- src/calibre/manual/faq.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 9c02ace0e8..cdae20ea3b 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -327,12 +327,14 @@ Now coming to author name sorting: * Authors in the Tag Browser are sorted by the sort value for the **authors**. Remember that this is different from the Author sort field for a book. * By default, this sort algorithm assumes that the author name is in ``First name Last name`` format and generates a ``Last name, First name`` sort value. * You can change this algorithm by going to Preferences->Tweaks and setting the :guilabel:`author_sort_copy_method` tweak. - * You can force |app| to recalculate the author sort values for every author by right clicking on any author and selecting :guilabel:`Manage authors` - * You can force |app| to recalculate the author sort values for all books by using the bulk metadata edit dialog (select all books and click edit metadata) - * When recalculating the author sort values for books, |app| uses the author sort values for each individual author. + * You can force |app| to recalculate the author sort values for every author by right clicking on any author and selecting :guilabel:`Manage authors`, then pushing the `Recalculate all author sort values` button. Do this after you have set the author_sort_copy_method tweak to what you want. + * You can force |app| to recalculate the author sort values for all books by using the bulk metadata edit dialog (select all books and click edit metadata, check the `Automatically set author sort` checkbox, then press OK.) + * When recalculating the author sort values for books, |app| uses the author sort values for each individual author. Therefore, ensure that the individual author sort values are correct before recalculating the books' author sort values. * You can control whether the Tag Browser display authors using their names or their sort values by setting the :guilabel:`categories_use_field_for_author_name` tweak in Preferences->Tweaks -With all this flexibility, it is possible to have |app| manage your author names however you like. For example, one common request is to have |app| display author names LN, FN. To do this first set the ``author_sort_copy_method`` to ``copy``. Then change all author names to LN, FN via the Manage authors dialog. Then have |app| recalculate author sort values as described above. +With all this flexibility, it is possible to have |app| manage your author names however you like. For example, one common request is to have |app| display author names LN, FN. To do this first set the ``author_sort_copy_method`` to ``copy``. Then change all author names to LN, FN via the Manage authors dialog. Then have |app| recalculate author sort values for both authors and books as described above. + +Note that you can set an individual author's sort value to whatever you want using :guilabel:`Manage authors`. This is useful when dealing with names that |app| will not get right, such as complex multi-part names like Miguel de Cervantes Saavedra or when dealing with Asian names like Sun Tzu. Why doesn't |app| let me store books in my own directory structure? From ae93283d778057a8e54597867d97a50e82d35bcf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 13:21:04 -0700 Subject: [PATCH 25/30] Fix #8854 (updated recipe for Newyorker) --- resources/recipes/new_yorker.recipe | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/recipes/new_yorker.recipe b/resources/recipes/new_yorker.recipe index d69a4df24f..9eeb8b31ee 100644 --- a/resources/recipes/new_yorker.recipe +++ b/resources/recipes/new_yorker.recipe @@ -54,10 +54,10 @@ class NewYorker(BasicNewsRecipe): ,dict(attrs={'id':['show-header','show-footer'] }) ] remove_attributes = ['lang'] - feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/rss/feeds/everything.xml')] + feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/mrss/feeds/everything.xml')] def print_version(self, url): - return 'http://www.newyorker.com' + url + '?printable=true' + return url + '?printable=true' def image_url_processor(self, baseurl, url): return url.strip() From 01dc213d7030cad9d014bfb037601421c3289ac1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 13:32:12 -0700 Subject: [PATCH 26/30] Fix #8857 (Using calibre with HTC Desire (android)) --- src/calibre/devices/android/driver.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 5912e40a69..11d636791b 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -19,10 +19,15 @@ class ANDROID(USBMS): VENDOR_ID = { # HTC - 0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, - 0x0227, 0x0226], 0x0ff9 - : [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226], - 0xc92 : [0x100], 0xc97: [0x226], 0xc99 : [0x0100]}, + 0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], + 0x0c01 : [0x100, 0x0227, 0x0226], + 0x0ff9 : [0x0100, 0x0227, 0x0226], + 0x0c87 : [0x0100, 0x0227, 0x0226], + 0xc92 : [0x100], + 0xc97 : [0x226], + 0xc99 : [0x0100], + 0xca3 : [0x100], + }, # Eken 0x040d : { 0x8510 : [0x0001], 0x0851 : [0x1] }, From 8a93808ccc246be23706c8a0ccc291ad2f32d8be Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 13:43:36 -0700 Subject: [PATCH 27/30] Fix #8856 (add new device - inves wibook 600) --- src/calibre/devices/eb600/driver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py index e38f72aea5..5374c6c4e2 100644 --- a/src/calibre/devices/eb600/driver.py +++ b/src/calibre/devices/eb600/driver.py @@ -172,10 +172,10 @@ class INVESBOOK(EB600): gui_name = 'Inves Book 600' FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'html', 'pdf', 'rtf', 'txt'] + BCD = [0x110, 0x323] - VENDOR_NAME = 'INVES_E6' - WINDOWS_MAIN_MEM = '00INVES_E600' - WINDOWS_CARD_A_MEM = '00INVES_E600' + VENDOR_NAME = ['INVES_E6', 'INVES-WI'] + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['00INVES_E600', 'INVES-WIBOOK'] class BOOQ(EB600): name = 'Booq Device Interface' From 8eddfd7e780ad459cb3681718a875149c06be1ae Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 7 Feb 2011 17:52:15 -0500 Subject: [PATCH 28/30] TXT Input: Heuristic processing enables smarten punctuation. --- src/calibre/ebooks/txt/input.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 12f780913c..c7f9dbefd4 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -114,6 +114,7 @@ class TXTInput(InputFormatPlugin): if options.formatting_type == 'heuristic': setattr(options, 'enable_heuristics', True) setattr(options, 'unwrap_lines', False) + setattr(options, 'smarten_punctuation', True) # Reformat paragraphs to block formatting based on the detected type. # We don't check for block because the processor assumes block. From dccee37b02d09924b18847a3bc0433587539c919 Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 7 Feb 2011 18:06:38 -0500 Subject: [PATCH 29/30] TXT Input: separate hard scene breaks form text so it does not get wrapped into a paragraph. --- src/calibre/ebooks/txt/input.py | 5 ++++- src/calibre/ebooks/txt/processor.py | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index c7f9dbefd4..8ab1524b02 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -14,7 +14,8 @@ from calibre.ebooks.chardet import detect from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \ - normalize_line_endings, convert_textile, remove_indents, block_to_single_line + normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \ + separate_hard_scene_breaks from calibre.ptempfile import TemporaryDirectory from calibre.utils.zipfile import ZipFile @@ -122,6 +123,7 @@ class TXTInput(InputFormatPlugin): if options.paragraph_type == 'single': txt = separate_paragraphs_single_line(txt) elif options.paragraph_type == 'print': + txt = separate_hard_scene_breaks(txt) txt = separate_paragraphs_print_formatted(txt) txt = block_to_single_line(txt) elif options.paragraph_type == 'unformatted': @@ -133,6 +135,7 @@ class TXTInput(InputFormatPlugin): txt = preprocessor.punctuation_unwrap(length, txt, 'txt') txt = separate_paragraphs_single_line(txt) else: + txt = separate_hard_scene_breaks(txt) txt = block_to_single_line(txt) if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False): diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 685a7504b9..55213381c9 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -120,6 +120,15 @@ def separate_paragraphs_print_formatted(txt): txt = re.sub(u'(?miu)^(?P\t+|[ ]{2,})(?=.)', lambda mo: '\n%s' % mo.group('indent'), txt) return txt +def separate_hard_scene_breaks(txt): + def sep_break(line): + if len(line.strip()) > 0: + return '\n%s\n' % line + else: + return line + txt = re.sub(u'(?miu)^[ \t-=~\/]+$', lambda mo: sep_break(mo.group()), txt) + return txt + def block_to_single_line(txt): txt = re.sub(r'(?<=.)\n(?=.)', ' ', txt) return txt From 72f6e440e81aa4037c5c00026c7bc1fc9ba34395 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Feb 2011 18:45:48 -0700 Subject: [PATCH 30/30] ... --- src/calibre/gui2/dialogs/metadata_single.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 3e711edd2d..153015f50b 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -780,8 +780,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): _('You have changed the tags. In order to use the tags' ' editor, you must either discard or apply these ' 'changes. Apply changes?'), show_copy_button=False): - self.books_to_refresh |= self.apply_tags(commit=True, notify=True, - allow_case_change=True) + self.books_to_refresh |= self.apply_tags(commit=True, + notify=True) self.original_tags = unicode(self.tags.text()) else: self.tags.setText(self.original_tags)