From 7ec70e67e12942c03844debeb771ef19ca6a0561 Mon Sep 17 00:00:00 2001 From: GRiker Date: Fri, 6 Jul 2012 11:18:55 -0600 Subject: [PATCH 01/18] GwR updates to catalog code using icu.sort_key, resolving some epubcheck and flightcrew errors --- src/calibre/gui2/convert/gui_conversion.py | 3 +- src/calibre/gui2/tools.py | 4 +- src/calibre/library/catalogs/epub_mobi.py | 8 +- .../library/catalogs/epub_mobi_builder.py | 213 ++++++++++-------- 4 files changed, 123 insertions(+), 105 deletions(-) diff --git a/src/calibre/gui2/convert/gui_conversion.py b/src/calibre/gui2/convert/gui_conversion.py index ce51d4ca77..1ebf211c57 100644 --- a/src/calibre/gui2/convert/gui_conversion.py +++ b/src/calibre/gui2/convert/gui_conversion.py @@ -31,7 +31,7 @@ def gui_convert_override(input, output, recommendations, notification=DummyRepor override_input_metadata=True) def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, connected_device, - notification=DummyReporter(), log=None): + debug_mode, notification=DummyReporter(), log=None): if log is None: log = Log() from calibre.library import db @@ -50,6 +50,7 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, conne # opts.gui_search_text = something opts.catalog_title = title opts.connected_device = connected_device + opts.debug_mode = debug_mode opts.ids = ids opts.search_text = None opts.sort_by = None diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index 36c1a6f30f..e4c8eeeddb 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -11,6 +11,7 @@ import cPickle, os from PyQt4.Qt import QDialog, QProgressDialog, QString, QTimer +from calibre.constants import DEBUG from calibre.ptempfile import PersistentTemporaryFile from calibre.gui2 import warning_dialog, question_dialog from calibre.gui2.convert.single import NoSupportedInputFormats @@ -319,7 +320,8 @@ def generate_catalog(parent, dbspec, ids, device_manager, db): # {{{ out.name, d.catalog_sync, d.fmt_options, - connected_device + connected_device, + DEBUG ] out.close() diff --git a/src/calibre/library/catalogs/epub_mobi.py b/src/calibre/library/catalogs/epub_mobi.py index cdc27b5e60..dc750e4b92 100644 --- a/src/calibre/library/catalogs/epub_mobi.py +++ b/src/calibre/library/catalogs/epub_mobi.py @@ -11,7 +11,6 @@ import os from collections import namedtuple from calibre import strftime -from calibre.constants import DEBUG from calibre.customize import CatalogPlugin from calibre.customize.conversion import OptionRecommendation, DummyReporter @@ -277,13 +276,16 @@ class EPUB_MOBI(CatalogPlugin): log.error("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width,self.THUMB_SMALLEST)) opts.thumb_width = "1.0" + if opts.debug_mode: + setattr(opts, 'debug_pipeline', os.path.expanduser("~/Desktop/Catalog debug")) + # Display opts keys = opts_dict.keys() keys.sort() build_log.append(" opts:") for key in keys: if key in ['catalog_title','authorClip','connected_kindle','descriptionClip', - 'exclude_book_marker','exclude_genre','exclude_tags', + 'debug_pipeline','exclude_book_marker','exclude_genre','exclude_tags', 'header_note_source_field','merge_comments', 'output_profile','read_book_marker', 'search_text','sort_by','sort_descriptions_by_author','sync', @@ -315,7 +317,7 @@ class EPUB_MOBI(CatalogPlugin): recommendations = [] recommendations.append(('remove_fake_margins', False, OptionRecommendation.HIGH)) - if DEBUG: + if opts.debug_mode: recommendations.append(('comments', '\n'.join(line for line in build_log), OptionRecommendation.HIGH)) else: diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py index 6267651aff..e160886676 100644 --- a/src/calibre/library/catalogs/epub_mobi_builder.py +++ b/src/calibre/library/catalogs/epub_mobi_builder.py @@ -3,7 +3,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Greg Riker' -import datetime, htmlentitydefs, os, re, shutil, zlib +import datetime, htmlentitydefs, os, re, shutil, unicodedata, zlib from copy import deepcopy from xml.sax.saxutils import escape @@ -15,12 +15,11 @@ from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.config import config_dir from calibre.utils.date import format_date, is_date_undefined, now as nowf from calibre.utils.filenames import ascii_text -from calibre.utils.icu import capitalize +from calibre.utils.icu import capitalize, sort_key from calibre.utils.magick.draw import thumbnail from calibre.utils.zipfile import ZipFile - class CatalogBuilder(object): ''' Generates catalog source files from calibre database @@ -42,6 +41,9 @@ class CatalogBuilder(object): # [] = No date ranges added DATE_RANGE=[30] + # Text used in generated catalog for title section with other-than-ASCII leading letter + SYMBOLS = 'Symbols' + # basename output file basename # creator dc:creator in OPF metadata # descriptionClip limits size of NCX descriptions (Kindle only) @@ -565,10 +567,9 @@ class CatalogBuilder(object): self.updateProgressFullStep("Sorting database") self.booksByAuthor = list(self.booksByTitle) - - # Test for author_sort mismatches self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author) - # Build the unique_authors set from existing data + + # Build the unique_authors set from existing data, test for author_sort mismatches authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor] current_author = authors[0] for (i,author) in enumerate(authors): @@ -603,7 +604,8 @@ Author '{0}': current_author = author - self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort) + self.booksByAuthor = sorted(self.booksByAuthor, + key=lambda x: sort_key(self.booksByAuthorSorter_author_sort(x))) # Build the unique_authors set from existing data authors = [(record['author'], capitalize(record['author_sort'])) for record in self.booksByAuthor] @@ -690,7 +692,7 @@ Author '{0}': this_title['series'] = None this_title['series_index'] = 0.0 - this_title['title_sort'] = self.generateSortTitle(ascii_text(this_title['title'])) + this_title['title_sort'] = self.generateSortTitle(this_title['title']) if 'authors' in record: # from calibre.ebooks.metadata import authors_to_string # return authors_to_string(self.authors) @@ -705,7 +707,6 @@ Author '{0}': this_title['author_sort'] = record['author_sort'] else: this_title['author_sort'] = self.author_to_author_sort(this_title['author']) - this_title['author_sort'] = ascii_text(this_title['author_sort']) if record['publisher']: this_title['publisher'] = re.sub('&', '&', record['publisher']) @@ -780,8 +781,11 @@ Author '{0}': # Re-sort based on title_sort if len(titles): - self.booksByTitle = sorted(titles, - key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper())) + #self.booksByTitle = sorted(titles, + # key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper())) + + self.booksByTitle = sorted(titles, key=lambda x: sort_key(x['title_sort'].upper())) + if False and self.verbose: self.opts.log.info("fetchBooksByTitle(): %d books" % len(self.booksByTitle)) self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort')) @@ -923,29 +927,24 @@ Author '{0}': body = soup.find('body') btc = 0 - # Insert section tag + pTag = Tag(soup, "p") + pTag['class'] = 'title' + ptc = 0 aTag = Tag(soup,'a') - aTag['name'] = 'section_start' - body.insert(btc, aTag) - btc += 1 - - # Insert the anchor - aTag = Tag(soup, "a") - aTag['name'] = "bytitle" - body.insert(btc, aTag) - btc += 1 + aTag['id'] = 'section_start' + pTag.insert(ptc, aTag) + ptc += 1 if not self.__generateForKindle: - # We don't need this because the Kindle shows section titles - #

By Title

- pTag = Tag(soup, "p") - pTag['class'] = 'title' + # Kindle don't need this because it shows section titles in Periodical format aTag = Tag(soup, "a") - aTag['name'] = "bytitle" - pTag.insert(0,aTag) - pTag.insert(1,NavigableString('Titles')) - body.insert(btc,pTag) - btc += 1 + aTag['id'] = "bytitle" + pTag.insert(ptc,aTag) + ptc += 1 + pTag.insert(ptc,NavigableString('Titles')) + + body.insert(btc,pTag) + btc += 1 divTag = Tag(soup, "div") dtc = 0 @@ -955,7 +954,7 @@ Author '{0}': # Incoming title : if not self.useSeriesPrefixInTitlesSection: nspt = deepcopy(self.booksByTitle) - nspt = sorted(nspt, key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper())) + nspt = sorted(nspt, key=lambda x: sort_key(x['title_sort'].upper())) self.booksByTitle_noSeriesPrefix = nspt # Loop through the books by title @@ -977,11 +976,14 @@ Author '{0}': if dtc > 0: divRunningTag['class'] = "initial_letter" drtc = 0 - current_letter = self.letter_or_symbol(book['title_sort'][0]) pIndexTag = Tag(soup, "p") pIndexTag['class'] = "author_title_letter_index" aTag = Tag(soup, "a") - aTag['name'] = "%s" % self.letter_or_symbol(book['title_sort'][0]) + current_letter = self.letter_or_symbol(book['title_sort'][0]) + if current_letter == self.SYMBOLS: + aTag['id'] = self.SYMBOLS + else: + aTag['id'] = "%s" % self.generateUnicodeName(current_letter) pIndexTag.insert(0,aTag) pIndexTag.insert(1,NavigableString(self.letter_or_symbol(book['title_sort'][0]))) divRunningTag.insert(dtc,pIndexTag) @@ -1074,19 +1076,6 @@ Author '{0}': btc = 0 - # Insert section tag - aTag = Tag(soup,'a') - aTag['name'] = 'section_start' - body.insert(btc, aTag) - btc += 1 - - # Insert the anchor - aTag = Tag(soup, "a") - anchor_name = friendly_name.lower() - aTag['name'] = anchor_name.replace(" ","") - body.insert(btc, aTag) - btc += 1 - divTag = Tag(soup, "div") dtc = 0 divOpeningTag = None @@ -1117,7 +1106,6 @@ Author '{0}': drtc = 0 divRunningTag = None - current_letter = self.letter_or_symbol(book['author_sort'][0].upper()) author_count = 0 divOpeningTag = Tag(soup, 'div') if dtc > 0: @@ -1126,7 +1114,11 @@ Author '{0}': pIndexTag = Tag(soup, "p") pIndexTag['class'] = "author_title_letter_index" aTag = Tag(soup, "a") - aTag['name'] = "%sauthors" % self.letter_or_symbol(current_letter) + current_letter = self.letter_or_symbol(book['author_sort'][0].upper()) + if current_letter == self.SYMBOLS: + aTag['id'] = self.SYMBOLS + else: + aTag['id'] = "%s_authors" % self.generateUnicodeName(current_letter) pIndexTag.insert(0,aTag) pIndexTag.insert(1,NavigableString(self.letter_or_symbol(book['author_sort'][0].upper()))) divOpeningTag.insert(dotc,pIndexTag) @@ -1158,7 +1150,7 @@ Author '{0}': pAuthorTag = Tag(soup, "p") pAuthorTag['class'] = "author_index" aTag = Tag(soup, "a") - aTag['name'] = "%s" % self.generateAuthorAnchor(current_author) + aTag['id'] = "%s" % self.generateAuthorAnchor(current_author) aTag.insert(0,NavigableString(current_author)) pAuthorTag.insert(0,aTag) if author_count == 1: @@ -1247,19 +1239,25 @@ Author '{0}': # Loop ends here + pTag = Tag(soup, "p") + pTag['class'] = 'title' + ptc = 0 + aTag = Tag(soup,'a') + aTag['id'] = 'section_start' + pTag.insert(ptc, aTag) + ptc += 1 + if not self.__generateForKindle: - # Insert the <h2> tag with book_count at the head - #<h2><a name="byalphaauthor" id="byalphaauthor"></a>By Author</h2> - pTag = Tag(soup, "p") - pTag['class'] = 'title' + # Kindle don't need this because it shows section titles in Periodical format aTag = Tag(soup, "a") anchor_name = friendly_name.lower() - aTag['name'] = anchor_name.replace(" ","") - pTag.insert(0,aTag) - #h2Tag.insert(1,NavigableString('%s (%d)' % (friendly_name, book_count))) - pTag.insert(1,NavigableString('%s' % (friendly_name))) - body.insert(btc,pTag) - btc += 1 + aTag['id'] = anchor_name.replace(" ","") + pTag.insert(ptc,aTag) + ptc += 1 + pTag.insert(ptc,NavigableString('%s' % (friendly_name))) + + body.insert(btc,pTag) + btc += 1 if author_count == 1: divTag.insert(dtc, divOpeningTag) @@ -1294,7 +1292,7 @@ Author '{0}': pIndexTag = Tag(soup, "p") pIndexTag['class'] = "date_index" aTag = Tag(soup, "a") - aTag['name'] = "bda_%s-%s" % (current_date.year, current_date.month) + aTag['id'] = "bda_%s-%s" % (current_date.year, current_date.month) pIndexTag.insert(0,aTag) pIndexTag.insert(1,NavigableString(date_string)) divTag.insert(dtc,pIndexTag) @@ -1312,7 +1310,7 @@ Author '{0}': pAuthorTag['class'] = "author_index" aTag = Tag(soup, "a") if self.opts.generate_authors: - aTag['name'] = "%s" % self.generateAuthorAnchor(current_author) + aTag['id'] = "%s" % self.generateAuthorAnchor(current_author) aTag.insert(0,NavigableString(current_author)) pAuthorTag.insert(0,aTag) divTag.insert(dtc,pAuthorTag) @@ -1386,7 +1384,7 @@ Author '{0}': pIndexTag = Tag(soup, "p") pIndexTag['class'] = "date_index" aTag = Tag(soup, "a") - aTag['name'] = "bda_%s" % date_range.replace(' ','') + aTag['id'] = "bda_%s" % date_range.replace(' ','') pIndexTag.insert(0,aTag) pIndexTag.insert(1,NavigableString(date_range)) divTag.insert(dtc,pIndexTag) @@ -1457,30 +1455,27 @@ Author '{0}': btc = 0 - # Insert section tag - aTag = Tag(soup,'a') - aTag['name'] = 'section_start' - body.insert(btc, aTag) - btc += 1 + pTag = Tag(soup, "p") + pTag['class'] = 'title' + ptc = 0 - # Insert the anchor - aTag = Tag(soup, "a") - anchor_name = friendly_name.lower() - aTag['name'] = anchor_name.replace(" ","") - body.insert(btc, aTag) - btc += 1 + aTag = Tag(soup,'a') + aTag['id'] = 'section_start' + pTag.insert(ptc, aTag) + ptc += 1 if not self.__generateForKindle: - #<h2><a name="byalphaauthor" id="byalphaauthor"></a>By Author</h2> - pTag = Tag(soup, "p") - pTag['class'] = 'title' + # Kindle don't need this because it shows section titles in Periodical format aTag = Tag(soup, "a") anchor_name = friendly_name.lower() - aTag['name'] = anchor_name.replace(" ","") - pTag.insert(0,aTag) - pTag.insert(1,NavigableString('%s' % friendly_name)) - body.insert(btc,pTag) - btc += 1 + aTag['id'] = anchor_name.replace(" ","") + + pTag.insert(ptc,aTag) + ptc += 1 + pTag.insert(ptc, NavigableString('%s' % friendly_name)) + + body.insert(btc,pTag) + btc += 1 divTag = Tag(soup, "div") dtc = 0 @@ -1895,11 +1890,10 @@ Author '{0}': self.updateProgressFullStep("'Genres'") self.genre_tags_dict = self.filterDbTags(self.db.all_tags()) - # Extract books matching filtered_tags genre_list = [] - for friendly_tag in sorted(self.genre_tags_dict): - #print "\ngenerateHTMLByTags(): looking for books with friendly_tag '%s'" % friendly_tag + for friendly_tag in sorted(self.genre_tags_dict, key=sort_key): + #print("\ngenerateHTMLByTags(): looking for books with friendly_tag '%s'" % friendly_tag) # tag_list => { normalized_genre_tag : [{book},{},{}], # normalized_genre_tag : [{book},{},{}] } @@ -2268,7 +2262,7 @@ Author '{0}': navPointTag.insert(1, contentTag) cmiTag = Tag(soup, '%s' % 'calibre:meta-img') - cmiTag['name'] = "mastheadImage" + cmiTag['id'] = "mastheadImage" cmiTag['src'] = "images/mastheadImage.gif" navPointTag.insert(2,cmiTag) navMapTag.insert(0,navPointTag) @@ -2552,7 +2546,10 @@ Author '{0}': navLabelTag.insert(0, textTag) navPointByLetterTag.insert(0,navLabelTag) contentTag = Tag(soup, 'content') - contentTag['src'] = "content/%s.html#%s" % (output, title_letters[i]) + if title_letters[i] == self.SYMBOLS: + contentTag['src'] = "content/%s.html#%s" % (output, title_letters[i]) + else: + contentTag['src'] = "content/%s.html#%s" % (output, self.generateUnicodeName(title_letters[i])) navPointByLetterTag.insert(1,contentTag) if self.generateForKindle: @@ -2640,7 +2637,7 @@ Author '{0}': navLabelTag.insert(0, textTag) navPointByLetterTag.insert(0,navLabelTag) contentTag = Tag(soup, 'content') - contentTag['src'] = "%s#%sauthors" % (HTML_file, authors_by_letter[1]) + contentTag['src'] = "%s#%s_authors" % (HTML_file, self.generateUnicodeName(authors_by_letter[1])) navPointByLetterTag.insert(1,contentTag) @@ -3213,7 +3210,7 @@ Author '{0}': ans = '%s%d %s:\n' % (' ' * indent, len(tags), header) ans += ' ' * (indent + 1) out_str = '' - sorted_tags = sorted(tags) + sorted_tags = sorted(tags, key=sort_key) for tag in next_tag(sorted_tags): out_str += tag if len(out_str) >= line_break: @@ -3234,7 +3231,7 @@ Author '{0}': if tag == ' ': continue - normalized_tags.append(re.sub('\W','',tag).lower()) + normalized_tags.append(re.sub('\W','',ascii_text(tag)).lower()) friendly_tags.append(tag) genre_tags_dict = dict(zip(friendly_tags,normalized_tags)) @@ -3293,18 +3290,24 @@ Author '{0}': body = soup.find('body') btc = 0 + divTag = Tag(soup, 'div') + dtc = 0 + # Insert section tag if this is the section start - first article only if section_head: aTag = Tag(soup,'a') - aTag['name'] = 'section_start' - body.insert(btc, aTag) - btc += 1 + aTag['id'] = 'section_start' + divTag.insert(dtc, aTag) + dtc += 1 + #body.insert(btc, aTag) + #btc += 1 # Create an anchor from the tag aTag = Tag(soup, 'a') - aTag['name'] = "Genre_%s" % genre - body.insert(btc,aTag) + aTag['id'] = "Genre_%s" % genre + divTag.insert(dtc, aTag) + body.insert(btc,divTag) btc += 1 titleTag = body.find(attrs={'class':'title'}) @@ -3477,7 +3480,7 @@ Author '{0}': for (i, tag) in enumerate(sorted(book.get('tags', []))): aTag = Tag(_soup,'a') if self.opts.generate_genres: - aTag['href'] = "Genre_%s.html" % re.sub("\W","",tag.lower()) + aTag['href'] = "Genre_%s.html" % re.sub("\W","",ascii_text(tag).lower()) aTag.insert(0,escape(NavigableString(tag))) genresTag.insert(gtc, aTag) gtc += 1 @@ -3544,8 +3547,10 @@ Author '{0}': btc = 0 # Insert the title anchor for inbound links aTag = Tag(soup, "a") - aTag['name'] = "book%d" % int(book['id']) - body.insert(btc, aTag) + aTag['id'] = "book%d" % int(book['id']) + divTag = Tag(soup, 'div') + divTag.insert(0, aTag) + body.insert(btc, divTag) btc += 1 # Insert the link to the series or remove <a class="series"> @@ -3770,7 +3775,7 @@ Author '{0}': else: word = '%10.0f' % (float(word)) translated.append(word) - return ascii_text(' '.join(translated)) + return ' '.join(translated) def generateThumbnail(self, title, image_dir, thumb_file): ''' @@ -3824,6 +3829,14 @@ Author '{0}': with zf: zf.writestr(title['uuid']+cover_crc, thumb_data) + def generateUnicodeName(self, c): + ''' + Generate an anchor name string + ''' + fullname = unicodedata.name(unicode(c)) + terms = fullname.split() + return "_".join(terms) + def getFriendlyGenreTag(self, genre): # Find the first instance of friendly_tag matching genre for friendly_tag in self.genre_tags_dict: @@ -3837,8 +3850,8 @@ Author '{0}': return markerTags def letter_or_symbol(self,char): - if not re.search('[a-zA-Z]',char): - return 'Symbols' + if not re.search('[a-zA-Z]', ascii_text(char)): + return self.SYMBOLS else: return char From c17c2f96ec5728f6df63da3a6454b1c8fdfa40c9 Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Fri, 6 Jul 2012 11:24:50 -0600 Subject: [PATCH 02/18] Added translation hook for 'Symbols' --- src/calibre/library/catalogs/epub_mobi_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py index e160886676..7cbd639fd7 100644 --- a/src/calibre/library/catalogs/epub_mobi_builder.py +++ b/src/calibre/library/catalogs/epub_mobi_builder.py @@ -42,7 +42,7 @@ class CatalogBuilder(object): DATE_RANGE=[30] # Text used in generated catalog for title section with other-than-ASCII leading letter - SYMBOLS = 'Symbols' + SYMBOLS = _('Symbols') # basename output file basename # creator dc:creator in OPF metadata From fe23e51f5534d7b24de2cc99aa71b07d3e868123 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 7 Jul 2012 11:22:22 +0530 Subject: [PATCH 03/18] ... --- src/calibre/utils/config_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/utils/config_base.py b/src/calibre/utils/config_base.py index a50d0fd153..198c09bfcd 100644 --- a/src/calibre/utils/config_base.py +++ b/src/calibre/utils/config_base.py @@ -427,7 +427,9 @@ def _prefs(): 'accented versions, based on the language you have chosen ' 'for the calibre interface. For example, in ' u' English, searching for n will match ñ and n, but if ' - 'your language is Spanish it will only match n.')) + 'your language is Spanish it will only match n. Note that ' + 'this is much slower than a simple search on very large ' + 'libraries.')) c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.') return c From c9f64d8deb15c2b1a2670a73e5bb7579cbe1ce28 Mon Sep 17 00:00:00 2001 From: GRiker <griker@hotmail.com> Date: Sat, 7 Jul 2012 04:15:07 -0600 Subject: [PATCH 04/18] Backed out changes passing DEBUG to spawned catalog building task. Added debug_pipeline directive (commented out) for manual inspection of generated catalog code pre-conversion. --- src/calibre/gui2/convert/gui_conversion.py | 3 +-- src/calibre/gui2/tools.py | 3 +-- src/calibre/library/catalogs/epub_mobi.py | 13 +++++-------- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/calibre/gui2/convert/gui_conversion.py b/src/calibre/gui2/convert/gui_conversion.py index 1ebf211c57..ce51d4ca77 100644 --- a/src/calibre/gui2/convert/gui_conversion.py +++ b/src/calibre/gui2/convert/gui_conversion.py @@ -31,7 +31,7 @@ def gui_convert_override(input, output, recommendations, notification=DummyRepor override_input_metadata=True) def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, connected_device, - debug_mode, notification=DummyReporter(), log=None): + notification=DummyReporter(), log=None): if log is None: log = Log() from calibre.library import db @@ -50,7 +50,6 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, conne # opts.gui_search_text = something opts.catalog_title = title opts.connected_device = connected_device - opts.debug_mode = debug_mode opts.ids = ids opts.search_text = None opts.sort_by = None diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index e4c8eeeddb..e915545d1a 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -320,8 +320,7 @@ def generate_catalog(parent, dbspec, ids, device_manager, db): # {{{ out.name, d.catalog_sync, d.fmt_options, - connected_device, - DEBUG + connected_device ] out.close() diff --git a/src/calibre/library/catalogs/epub_mobi.py b/src/calibre/library/catalogs/epub_mobi.py index dc750e4b92..81cb247236 100644 --- a/src/calibre/library/catalogs/epub_mobi.py +++ b/src/calibre/library/catalogs/epub_mobi.py @@ -276,8 +276,6 @@ class EPUB_MOBI(CatalogPlugin): log.error("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width,self.THUMB_SMALLEST)) opts.thumb_width = "1.0" - if opts.debug_mode: - setattr(opts, 'debug_pipeline', os.path.expanduser("~/Desktop/Catalog debug")) # Display opts keys = opts_dict.keys() @@ -285,7 +283,7 @@ class EPUB_MOBI(CatalogPlugin): build_log.append(" opts:") for key in keys: if key in ['catalog_title','authorClip','connected_kindle','descriptionClip', - 'debug_pipeline','exclude_book_marker','exclude_genre','exclude_tags', + 'exclude_book_marker','exclude_genre','exclude_tags', 'header_note_source_field','merge_comments', 'output_profile','read_book_marker', 'search_text','sort_by','sort_descriptions_by_author','sync', @@ -317,11 +315,10 @@ class EPUB_MOBI(CatalogPlugin): recommendations = [] recommendations.append(('remove_fake_margins', False, OptionRecommendation.HIGH)) - if opts.debug_mode: - recommendations.append(('comments', '\n'.join(line for line in build_log), - OptionRecommendation.HIGH)) - else: - recommendations.append(('comments', '', OptionRecommendation.HIGH)) + recommendations.append(('comments', '', OptionRecommendation.HIGH)) + + # Use to debug generated catalog code before conversion + #setattr(opts,'debug_pipeline',os.path.expanduser("~/Desktop/Catalog debug")) dp = getattr(opts, 'debug_pipeline', None) if dp is not None: From 8ba0e4bebe5a0d4576463f679e3573cda527f1b5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 7 Jul 2012 18:50:24 +0530 Subject: [PATCH 05/18] Update Cosmopolitan UK --- recipes/cosmopolitan_uk.recipe | 37 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/recipes/cosmopolitan_uk.recipe b/recipes/cosmopolitan_uk.recipe index 7a34d56865..ae23be224d 100644 --- a/recipes/cosmopolitan_uk.recipe +++ b/recipes/cosmopolitan_uk.recipe @@ -1,13 +1,13 @@ -import re from calibre.web.feeds.news import BasicNewsRecipe -#from calibre import __appname__ -from calibre.utils.magick import Image +import re +from calibre import browser + class AdvancedUserRecipe1306097511(BasicNewsRecipe): title = u'Cosmopolitan UK' - description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK' + description = 'Author : D.Asbury : Womens Fashion, beauty and Gossip for women from COSMOPOLITAN -UK' __author__ = 'Dave Asbury' - #last update 21/12/11 + #last update 7/7/12 hopefully get current cover from itunes # greyscale code by Starson cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg' no_stylesheets = True @@ -39,14 +39,19 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): feeds = [ (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')] - def postprocess_html(self, soup, first): - #process all the images - for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): - iurl = tag['src'] - img = Image() - img.open(iurl) - if img < 0: - raise RuntimeError('Out of memory') - img.type = "GrayscaleType" - img.save(iurl) - return soup + def get_cover_url(self): + soup = self.index_to_soup('http://itunes.apple.com/gb/app/cosmopolitan-uk/id461363572?mt=8') + # look for the block containing the sun button and url + cov = soup.find(attrs={'alt' : 'iPhone Screenshot 1'}) + cov2 = str(cov['src']) + br = browser() + br.set_handle_redirect(False) + try: + br.open_novisit(cov2) + cover_url = cov2 + except: + cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg' + + return cover_url + + From 95bdc4fe160c7713a72a369c299b1d0f0b20abc5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 7 Jul 2012 18:56:41 +0530 Subject: [PATCH 06/18] NZZ Folio by Bernd Leinfelder --- recipes/nzz_folio.recipe | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 recipes/nzz_folio.recipe diff --git a/recipes/nzz_folio.recipe b/recipes/nzz_folio.recipe new file mode 100644 index 0000000000..d3743644d3 --- /dev/null +++ b/recipes/nzz_folio.recipe @@ -0,0 +1,61 @@ + +__license__ = 'GPL v3' +__copyright__ = '2012 Bernd Leinfelder <skoll1975@gmail.com>' + +''' +www.nzzfolio.ch +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class Nzzfolio(BasicNewsRecipe): + title = 'NZZ Folio' + __author__ = 'Bernd Leinfelder' + description = 'Aktuelle Artikel des NZZ Folio' + publisher = 'NZZ AG' + category = 'news, politics, nachrichten, Switzerland' + oldest_article = 35 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + language = 'de' + extra_css = """ + body{font-family: Georgia,"Times New Roman",Times,serif } + .artikel h3,.artikel h4,.bildLegende,.question,.autor{font-family: Arial,Verdana,Helvetica,sans-serif} + .bildLegende{font-size: small} + .autor{font-size: 0.9375em; color: #666666} + .quote{font-size: large !important; + font-style: italic; + font-weight: normal !important; + border-bottom: 1px dotted #BFBFBF; + border-top: 1px dotted #BFBFBF; + line-height: 1.25em} + .quelle{color: #666666; font-style: italic; white-space: nowrap} + """ + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables' : True + } + + + remove_attributes=['width','height','lang'] + remove_tags_before = dict(id='content') + remove_tags_after = dict(id='content') + remove_tags = [ + dict(name=['h2','object','link','base','meta','iframe']) + ,dict(id='artikelBar') + ,dict(id='foot') + ,dict(id='bildLegende') + ,dict(name='div',attrs={'class':['box']}) + + ] + + feeds = [ + (u'NZZ Folio' , u'http://rss.nzzfolio.ch/') + ] + From ef1dd71e70feab9703477f0a2f42888801832807 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 7 Jul 2012 22:00:11 +0530 Subject: [PATCH 07/18] ... --- src/calibre/devices/android/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index da787f0b81..3a3138633d 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -10,7 +10,7 @@ import cStringIO from calibre.devices.usbms.driver import USBMS -HTC_BCDS = [0x100, 0x0222, 0x0226, 0x227, 0x228] +HTC_BCDS = [0x100, 0x0222, 0x0226, 0x227, 0x228, 0x229] class ANDROID(USBMS): From 1fda8e97eeca2b75ebe1fb27f681257644bccb49 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 7 Jul 2012 22:02:01 +0530 Subject: [PATCH 08/18] Empire Magazine by Dave Asbury --- recipes/empire_magazine.recipe | 51 ++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 recipes/empire_magazine.recipe diff --git a/recipes/empire_magazine.recipe b/recipes/empire_magazine.recipe new file mode 100644 index 0000000000..138b7bffd1 --- /dev/null +++ b/recipes/empire_magazine.recipe @@ -0,0 +1,51 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1341650280(BasicNewsRecipe): + + title = u'Empire Magazine' + description = 'Author D.Asbury. Film articles from Empire Mag. ' + __author__ = 'Dave Asbury' + # last updated 7/7/12 + remove_empty_feeds = True + remove_javascript = True + no_stylesheets = True + #oldest_article = 7 + max_articles_per_feed = 20 + cover_url = 'http://www.empireonline.com/images/magazine/cover.jpg' + conversion_options = { + 'linearize_tables' : True, + } + #auto_cleanup = True + preprocess_regexps = [ + (re.compile(r'<a href="http://twitter.com/share.*?</a>', re.IGNORECASE | re.DOTALL), lambda match: ''), + (re.compile(r'<head>.*?<!-- CONTENT: START -->', re.IGNORECASE | re.DOTALL), lambda match: '<head></head><!-- CONTENT: START -->'), + (re.compile(r'<!-- LATEST NEWS HEADLINES: START -->.*?<!-- LATEST NEWS HEADLINES: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- LATEST NEWS HEADLINES: START --><!-- LATEST NEWS HEADLINES: END -->'), + (re.compile(r'<!-- RELATED FUTURE FILMS: START -->.*?<!-- RELATED FUTURE FILMS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- RELATED FUTURE FILMS: START --><!-- RELATED FUTURE FILMS: END -->'), + (re.compile(r'<!-- CURRENT HIGHLIGHTS: START-->.*?<!-- CURRENT HIGHLIGHTS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- CURRENT HIGHLIGHTS: START--><!-- CURRENT HIGHLIGHTS: END -->'), + (re.compile(r'<!-- RELATED REVIEWS: START -->.*?<!-- RELATED REVIEWS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- RELATED REVIEWS: START --><!-- RELATED REVIEWS: END -->'), + (re.compile(r'<!-- RELATED INTERVIEWS -->.*?<!-- RELATED REVIEWS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- RELATED INTERVIEWS --><!-- RELATED REVIEWS: END -->'), + (re.compile(r'<!-- CONTENT: END -->.*?</body>', re.IGNORECASE | re.DOTALL), lambda match: '<!-- CONTENT: END --></body>'), + (re.compile(r'<!-- STORY: END -->.*?</body>', re.IGNORECASE | re.DOTALL), lambda match: '<!-- STORY: END --></body>'), + (re.compile(r'<!-- RATINGS GUIDE: START-->.*?<!-- RATINGS GUIDE: END-->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- RATINGS GUIDE: START--><!-- RATINGS GUIDE: END-->'), + (re.compile(r'<strong>SUBSCRIBE TO EMPIRE</strong>.*?</tbody>', re.IGNORECASE | re.DOTALL), lambda match: '</tbody>'), + (re.compile(r'<!-- USER REVIEWS: START -->.*?<!-- USER REVIEWS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- USER REVIEWS: START --><!-- USER REVIEWS: END -->'), + (re.compile(r'Advertisement', re.IGNORECASE | re.DOTALL), lambda match: ''), + (re.compile(r'<a name="haveyoursay".*?now to have your say.', re.IGNORECASE | re.DOTALL), lambda match: ''), + ] + keep_only_tags = [ + # dict(name='h1'), + # dict(attrs={'class' : 'mediumblack'}), + ] + remove_tags = [dict(name='td', attrs={'width':'200', 'valign' : 'top'}), + dict(name='b'), + dict(name='a',attrs={'name' : 'haveyoursay'}), + dict(attrs={'class' : 'newslink'}), + ] + + + feeds = [(u'News', u'http://feed43.com/7338478755673147.xml'), + (u'Recent Features',u'http://feed43.com/4346347750304760.xml'), + (u'Interviews',u'http://feed43.com/3418350077724081.xml'), + (u'Film Reviews',u'http://feed43.com/2643703076510627.xml'), + ] From 436965bd253b6cea2f551489b92b0bf7ffb74baf Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 00:40:06 +0530 Subject: [PATCH 09/18] ... --- src/calibre/gui2/convert/metadata.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/gui2/convert/metadata.py b/src/calibre/gui2/convert/metadata.py index 6d43abdf63..a2c1427b91 100644 --- a/src/calibre/gui2/convert/metadata.py +++ b/src/calibre/gui2/convert/metadata.py @@ -97,6 +97,9 @@ class MetadataWidget(Widget, Ui_Form): else: self.cover.setPixmap(QPixmap(I('default_cover.png'))) self.cover.setToolTip(_('This book has no cover')) + for x in ('author', 'series', 'publisher'): + x = getattr(self, x) + x.lineEdit().deselect() def set_cover_tooltip(self, pm): tt = _('Cover size: %(width)d x %(height)d pixels') % dict( From 8bf3ede5c13a6905915e965505a8e1aaefa6cd2a Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 7 Jul 2012 20:59:19 -0400 Subject: [PATCH 10/18] Store: B&N Remove affiliate portion of B&N plugin as B&N has dropped/bared calibre from participating in the program. --- src/calibre/customize/builtins.py | 1 - src/calibre/gui2/store/stores/bn_plugin.py | 22 ++++------------------ 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index d5e911ef1b..9b9da83b85 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1262,7 +1262,6 @@ class StoreBNStore(StoreBase): headquarters = 'US' formats = ['NOOK'] - affiliate = True class StoreBeamEBooksDEStore(StoreBase): name = 'Beam EBooks DE' diff --git a/src/calibre/gui2/store/stores/bn_plugin.py b/src/calibre/gui2/store/stores/bn_plugin.py index ded20e8823..eac1a6d5a4 100644 --- a/src/calibre/gui2/store/stores/bn_plugin.py +++ b/src/calibre/gui2/store/stores/bn_plugin.py @@ -24,26 +24,12 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog class BNStore(BasicStoreConfig, StorePlugin): def open(self, parent=None, detail_item=None, external=False): - pub_id = 'sHa5EXvYOwA' - # Use Kovid's affiliate id 30% of the time. - if random.randint(1, 10) in (1, 2, 3): - pub_id = '0dsO3kDu/AU' - - murl = 'http://click.linksynergy.com/fs-bin/click?id=%s&offerid=239662.13&type=3&subid=0' % pub_id - - if detail_item: - purl = 'http://click.linksynergy.com/fs-bin/click?id=%s&subid=&offerid=239662.%s&type=2&subid=0' % (pub_id, detail_item) - url = purl - else: - purl = None - url = murl - - #print(url) + url = "http://bn.com" if external or self.config.get('open_external', False): - open_url(QUrl(url_slash_cleaner(url))) + open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url))) else: - d = WebStoreDialog(self.gui, murl, parent, purl) + d = WebStoreDialog(self.gui, url, parent, detail_item) d.setWindowTitle(self.name) d.set_tags(self.config.get('tags', '')) d.exec_() @@ -60,7 +46,7 @@ class BNStore(BasicStoreConfig, StorePlugin): if counter <= 0: break - id = ''.join(data.xpath('.//div[contains(@class, "display-tile-item")]/@data-bn-ean')) + id = ''.join(data.xpath('.//div[contains(@class, "image-bounding-box")]/a/@href')) if not id: continue From 2a551d882bbc19b86d000cf6327cb5d953973745 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 09:26:27 +0530 Subject: [PATCH 11/18] Fix #1022163 (Updated recipe for Adventure Gamers) --- recipes/adventuregamers.recipe | 59 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/recipes/adventuregamers.recipe b/recipes/adventuregamers.recipe index d08eca1723..b82bb7d02d 100644 --- a/recipes/adventuregamers.recipe +++ b/recipes/adventuregamers.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>' +__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>' ''' www.adventuregamers.com ''' @@ -14,24 +14,24 @@ class AdventureGamers(BasicNewsRecipe): publisher = 'Adventure Gamers' category = 'news, games, adventure, technology' oldest_article = 10 - delay = 10 + #delay = 10 max_articles_per_feed = 100 no_stylesheets = True - encoding = 'cp1252' + encoding = 'utf8' remove_javascript = True use_embedded_content = False INDEX = u'http://www.adventuregamers.com' extra_css = """ .pageheader_type{font-size: x-large; font-weight: bold; color: #828D74} - .pageheader_title{font-size: xx-large; color: #394128} + .pageheader_title,.page_title{font-size: xx-large; color: #394128} .pageheader_byline{font-size: small; font-weight: bold; color: #394128} .score_bg {display: inline; width: 100%; margin-bottom: 2em} .score_column_1{ padding-left: 10px; font-size: small; width: 50%} .score_column_2{ padding-left: 10px; font-size: small; width: 50%} .score_column_3{ padding-left: 10px; font-size: small; width: 50%} - .score_header{font-size: large; color: #50544A} - .bodytext{display: block} - body{font-family: Helvetica,Arial,sans-serif} + .score_header{font-size: large; color: #50544A} + img{margin-bottom: 1em;} + body{font-family: 'Open Sans',Helvetica,Arial,sans-serif} """ conversion_options = { @@ -41,35 +41,38 @@ class AdventureGamers(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [ - dict(name='div', attrs={'class':'content_middle'}) - ] - + keep_only_tags = [dict(name='div', attrs={'class':'cleft_inn'})] remove_tags = [ - dict(name=['object','link','embed','form']) - ,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']}) + dict(name=['object','link','embed','form','iframe','meta']) + ,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/scoring'}) + ,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/policies'}) ] - - remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})] + remove_tags_after = [dict(name='div', attrs={'class':'bodytext'})] remove_attributes = ['width','height'] - feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')] + feeds = [(u'Articles', u'http://www.adventuregamers.com/rss/')] def get_article_url(self, article): - return article.get('guid', None) + url = BasicNewsRecipe.get_article_url(self, article) + if '/videos/' in url or '/hypeometer/' in url: + return None + return url def append_page(self, soup, appendtag, position): - pager = soup.find('div',attrs={'class':'toolbar_fat_next'}) + pager = soup.find('div', attrs={'class':'pagination_big'}) if pager: - nexturl = self.INDEX + pager.a['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('div', attrs={'class':'bodytext'}) - for it in texttag.findAll(style=True): - del it['style'] - newpos = len(texttag.contents) - self.append_page(soup2,texttag,newpos) - texttag.extract() - appendtag.insert(position,texttag) + nextpage = soup.find('a', attrs={'class':'next-page'}) + if nextpage: + nexturl = nextpage['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'class':'bodytext'}) + for it in texttag.findAll(style=True): + del it['style'] + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + texttag.extract() + pager.extract() + appendtag.insert(position,texttag) def preprocess_html(self, soup): @@ -78,7 +81,7 @@ class AdventureGamers(BasicNewsRecipe): for item in soup.findAll('div', attrs={'class':'floatright'}): item.extract() self.append_page(soup, soup.body, 3) - pager = soup.find('div',attrs={'class':'toolbar_fat'}) + pager = soup.find('div',attrs={'class':'pagination_big'}) if pager: pager.extract() return self.adeify_images(soup) From 0c2a1df518a638c7c7ed8834784f753a49891f5d Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 09:52:37 +0530 Subject: [PATCH 12/18] Support for retina displays in OS X (I hope) --- setup/installer/osx/app/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup/installer/osx/app/main.py b/setup/installer/osx/app/main.py index 96edbc2c8f..7a600287e3 100644 --- a/setup/installer/osx/app/main.py +++ b/setup/installer/osx/app/main.py @@ -364,6 +364,7 @@ class Py2App(object): 'application. Visit http://calibre-ebook.com for details.'), CFBundleIconFile='library.icns', LSMultipleInstancesProhibited=True, + NSHighResolutionCapable=True, LSEnvironment=env ) plistlib.writePlist(pl, join(self.contents_dir, 'Info.plist')) From da120c620d1f88174270607dfe426151e26b6479 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 10:38:15 +0530 Subject: [PATCH 13/18] ... --- src/calibre/devices/kindle/driver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index a220a68871..a12ad5ebce 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -413,7 +413,8 @@ class KINDLE2(KINDLE): if not opts.extra_customization[self.OPT_APNX]: return - if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', '.prc'): + if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', + '.prc', '.azw3'): return # Create the sidecar folder if necessary From 3bca088525f93470ee6d885352429291359a7cac Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 10:46:27 +0530 Subject: [PATCH 14/18] Speedup completion for authors and tags for libraries on slow storage --- src/calibre/gui2/library/views.py | 2 +- src/calibre/gui2/metadata/basic_widgets.py | 2 +- src/calibre/library/database2.py | 38 ++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index cb32358d3b..59d3b89770 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -125,7 +125,7 @@ class BooksView(QTableView): # {{{ self.last_modified_delegate = DateDelegate(self, tweak_name='gui_last_modified_display_format') self.languages_delegate = LanguagesDelegate(self) - self.tags_delegate = CompleteDelegate(self, ',', 'all_tags') + self.tags_delegate = CompleteDelegate(self, ',', 'all_tag_names') self.authors_delegate = CompleteDelegate(self, '&', 'all_author_names', True) self.cc_names_delegate = CompleteDelegate(self, '&', 'all_custom', True) self.series_delegate = TextDelegate(self) diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index d2a983415f..0d9f4e6aa8 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -1114,7 +1114,7 @@ class TagsEdit(MultiCompleteLineEdit): # {{{ tags = db.tags(id_, index_is_id=True) tags = tags.split(',') if tags else [] self.current_val = tags - self.all_items = db.all_tags() + self.all_items = db.all_tag_names() self.original_val = self.current_val @property diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index e60350b307..905eb84fa4 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -3737,4 +3737,42 @@ books_series_link feeds 'SELECT {0}, count(*) FROM books_{1}_link GROUP BY {0}'.format( fm['link_column'], fm['table'])) + def all_author_names(self): + ai = self.FIELD_MAP['authors'] + ans = set() + for rec in self.data.iterall(): + auts = rec[ai] + if auts: + for x in auts.split(','): + ans.add(x.replace('|', ',')) + return ans + + def all_tag_names(self): + ai = self.FIELD_MAP['tags'] + ans = set() + for rec in self.data.iterall(): + auts = rec[ai] + if auts: + for x in auts.split(','): + ans.add(x) + return ans + + def all_publisher_names(self): + ai = self.FIELD_MAP['publisher'] + ans = set() + for rec in self.data.iterall(): + auts = rec[ai] + if auts: + ans.add(auts) + return ans + + def all_series_names(self): + ai = self.FIELD_MAP['series'] + ans = set() + for rec in self.data.iterall(): + auts = rec[ai] + if auts: + ans.add(auts) + return ans + From 152af5efce21111dd5754568c2e1b1044d995617 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 10:50:30 +0530 Subject: [PATCH 15/18] ... --- src/calibre/gui2/complete.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py index 5aa3d5af9b..a0c0307425 100644 --- a/src/calibre/gui2/complete.py +++ b/src/calibre/gui2/complete.py @@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en' from PyQt4.Qt import (QLineEdit, QAbstractListModel, Qt, QApplication, QCompleter) -from calibre.utils.icu import sort_key, lower +from calibre.utils.icu import sort_key from calibre.gui2 import NONE from calibre.gui2.widgets import EnComboBox, LineEditECM from calibre.utils.config_base import tweaks @@ -24,12 +24,11 @@ class CompleteModel(QAbstractListModel): def set_items(self, items): items = [unicode(x.strip()) for x in items] if len(items) < tweaks['completion_change_to_ascii_sorting']: - self.items = sorted(items, key=lambda x: sort_key(x)) + self.items = sorted(items, key=sort_key) self.sorting = QCompleter.UnsortedModel else: self.items = sorted(items, key=lambda x:x.lower()) self.sorting = QCompleter.CaseInsensitivelySortedModel - self.lowered_items = [lower(x) for x in self.items] self.reset() def rowCount(self, *args): From 6cab70465c8c4c54a1b3ee7e4183c8e8550d39de Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 14:46:10 +0530 Subject: [PATCH 16/18] Add primary_sort_key() --- src/calibre/utils/icu.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 76a374d085..7842a34e82 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -129,7 +129,7 @@ sort_key = py_sort_key if _icu_not_ok else partial(icu_sort_key, _collator) strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator) case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \ - icu_case_sensitive_sort_key + partial(icu_case_sensitive_sort_key, _collator) case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp @@ -164,6 +164,13 @@ def primary_find(pat, src): return py_find(ascii_text(pat), ascii_text(src)) return icu_find(primary_collator(), pat, src) +def primary_sort_key(val): + 'A sort key that ignores case and diacritics' + if _icu_not_ok: + from calibre.utils.filenames import ascii_text + return ascii_text(val).lower() + return primary_collator().sort_key(val) + ################################################################################ def test(): # {{{ From 5395cda9c78eb8ff4d9aad56386676e009343ebe Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 18:22:53 +0530 Subject: [PATCH 17/18] ... --- src/calibre/utils/icu.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 7842a34e82..50e7274b30 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -155,21 +155,30 @@ def primary_strcmp(a, b): if _icu_not_ok: from calibre.utils.filenames import ascii_text return py_strcmp(ascii_text(a), ascii_text(b)) - return primary_collator().strcmp(a, b) + try: + return _primary_collator.strcmp(a, b) + except AttributeError: + return primary_collator().strcmp(a, b) def primary_find(pat, src): 'find that ignores case and accents on letters' if _icu_not_ok: from calibre.utils.filenames import ascii_text return py_find(ascii_text(pat), ascii_text(src)) - return icu_find(primary_collator(), pat, src) + try: + return icu_find(_primary_collator, pat, src) + except AttributeError: + return icu_find(primary_collator(), pat, src) def primary_sort_key(val): 'A sort key that ignores case and diacritics' if _icu_not_ok: from calibre.utils.filenames import ascii_text return ascii_text(val).lower() - return primary_collator().sort_key(val) + try: + return _primary_collator.sort_key(val) + except AttributeError: + return primary_collator().sort_key(val) ################################################################################ From 4bed21a52f4c35c99e3667cabe37834f0873385d Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 8 Jul 2012 19:39:56 +0530 Subject: [PATCH 18/18] Use a secondary collator for sort_key and implement primary_startswith --- src/calibre/utils/icu.c | 42 ++++++++++++++++++++++++++++++++++++++++ src/calibre/utils/icu.py | 42 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 8e8a8e9ec8..c451e9cdac 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -272,6 +272,44 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) return Py_BuildValue("O", ans); } // }}} +// Collator.startswith {{{ +static PyObject * +icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) { + PyObject *a_, *b_; + size_t asz, bsz; + int32_t actual_a, actual_b; + UChar *a, *b; + wchar_t *aw, *bw; + UErrorCode status = U_ZERO_ERROR; + int ans = 0; + + if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL; + asz = PyUnicode_GetSize(a_); bsz = PyUnicode_GetSize(b_); + if (asz < bsz) Py_RETURN_FALSE; + if (bsz == 0) Py_RETURN_TRUE; + + a = (UChar*)calloc(asz*4 + 2, sizeof(UChar)); + b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar)); + aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t)); + bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t)); + + if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory(); + + actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1); + actual_b = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1); + if (actual_a > -1 && actual_b > -1) { + u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status); + u_strFromWCS(b, bsz*4 + 1, &actual_b, bw, -1, &status); + + if (U_SUCCESS(status) && ucol_equal(self->collator, a, actual_b, b, actual_b)) + ans = 1; + } + + free(a); free(b); free(aw); free(bw); + if (ans) Py_RETURN_TRUE; + Py_RETURN_FALSE; +} // }}} + static PyObject* icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs); @@ -296,6 +334,10 @@ static PyMethodDef icu_Collator_methods[] = { "clone() -> returns a clone of this collator." }, + {"startswith", (PyCFunction)icu_Collator_startswith, METH_VARARGS, + "startswith(a, b) -> returns True iff a startswith b, following the current collation rules." + }, + {NULL} /* Sentinel */ }; diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 50e7274b30..0dab76cd30 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -12,7 +12,7 @@ from functools import partial from calibre.constants import plugins from calibre.utils.config_base import tweaks -_icu = _collator = _primary_collator = None +_icu = _collator = _primary_collator = _secondary_collator = None _locale = None _none = u'' @@ -55,6 +55,13 @@ def primary_collator(): _primary_collator.strength = _icu.UCOL_PRIMARY return _primary_collator +def secondary_collator(): + global _secondary_collator + if _secondary_collator is None: + _secondary_collator = _collator.clone() + _secondary_collator.strength = _icu.UCOL_SECONDARY + return _secondary_collator + def py_sort_key(obj): if not obj: return _none @@ -63,7 +70,10 @@ def py_sort_key(obj): def icu_sort_key(collator, obj): if not obj: return _none2 - return collator.sort_key(lower(obj)) + try: + return _secondary_collator.sort_key(obj) + except AttributeError: + return secondary_collator().sort_key(obj) def py_find(pattern, source): pos = source.find(pattern) @@ -77,6 +87,12 @@ def icu_find(collator, pattern, source): except TypeError: return collator.find(unicode(pattern), unicode(source)) +def icu_startswith(collator, a, b): + try: + return collator.startswith(a, b) + except TypeError: + return collator.startswith(unicode(a), unicode(b)) + def py_case_sensitive_sort_key(obj): if not obj: return _none @@ -180,6 +196,15 @@ def primary_sort_key(val): except AttributeError: return primary_collator().sort_key(val) +def primary_startswith(a, b): + if _icu_not_ok: + from calibre.utils.filenames import ascii_text + return ascii_text(a).lower().startswith(ascii_text(b).lower()) + try: + return icu_startswith(_primary_collator, a, b) + except AttributeError: + return icu_startswith(primary_collator(), a, b) + ################################################################################ def test(): # {{{ @@ -299,8 +324,8 @@ pêché''' print print '\nTesting primary collation' - for k, v in {u'pèché': u'peche', u'flüße':u'flusse', - u'Štepánek':u'Štepanek'}.iteritems(): + for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', + u'Štepánek':u'ŠtepaneK'}.iteritems(): if primary_strcmp(k, v) != 0: prints('primary_strcmp() failed with %s != %s'%(k, v)) return @@ -309,10 +334,12 @@ pêché''' return global _primary_collator + orig = _primary_collator _primary_collator = _icu.Collator('es') if primary_strcmp(u'peña', u'pena') == 0: print 'Primary collation in Spanish locale failed' return + _primary_collator = orig print '\nTesting contractions' c = _icu.Collator('cs') @@ -322,6 +349,13 @@ pêché''' print 'Contractions for the Czech language failed' return + print '\nTesting startswith' + p = primary_startswith + if (not p('asd', 'asd') or not p('asd', 'A') or + not p('x', '')): + print 'startswith() failed' + return + # }}} if __name__ == '__main__':