From d134be5b1b89c87fafa01c5fa3a45202b9732848 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 26 Nov 2011 07:58:02 +0530 Subject: [PATCH 1/7] ... --- recipes/independent.recipe | 45 +++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/recipes/independent.recipe b/recipes/independent.recipe index 7403163e6a..ebe0a30fd2 100644 --- a/recipes/independent.recipe +++ b/recipes/independent.recipe @@ -39,7 +39,9 @@ class TheIndependentNew(BasicNewsRecipe): encoding = 'utf-8' remove_tags =[ dict(attrs={'id' : ['RelatedArtTag','renderBiography']}), - dict(attrs={'class' : ['autoplay','openBiogPopup']}) + dict(attrs={'class' : ['autoplay','openBiogPopup']}), + dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}), + dict(attrs={'style' : re.compile('.*')}), ] keep_only_tags =[dict(attrs={'id':'main'})] @@ -113,6 +115,7 @@ class TheIndependentNew(BasicNewsRecipe): return None items_to_extract = [] + slideshow_elements = [] for item in soup.findAll(attrs={'class' : re.compile("widget.*")}): remove = True @@ -131,6 +134,7 @@ class TheIndependentNew(BasicNewsRecipe): if (pattern.search(item['class'])) is not None: if self._FETCH_IMAGES: remove = False + slideshow_elements.append(item) else: remove = True @@ -148,28 +152,29 @@ class TheIndependentNew(BasicNewsRecipe): items_to_extract = [] if self._FETCH_IMAGES: - for item in soup.findAll('a',attrs={'href' : re.compile('.*')}): - if item.img is not None: - #use full size image - img = item.findNext('img') + for element in slideshow_elements: + for item in element.findAll('a',attrs={'href' : re.compile('.*')}): + if item.img is not None: + #use full size image + img = item.findNext('img') - img['src'] = item['href'] + img['src'] = item['href'] - #insert caption if available - if img['title'] is not None and (len(img['title']) > 1): - tag = Tag(soup,'h3') - text = NavigableString(img['title']) - tag.insert(0,text) + #insert caption if available + if img.get('title') and (len(img['title']) > 1): + tag = Tag(soup,'h3') + text = NavigableString(img['title']) + tag.insert(0,text) - #picture before text - img.extract() - item.insert(0,img) - item.insert(1,tag) + #picture before text + img.extract() + item.insert(0,img) + item.insert(1,tag) - # remove link - item.name = "div" - item["class"]='image' - del item["href"] + # remove link + item.name = "div" + item["class"]='image' + del item["href"] #remove empty subtitles @@ -283,7 +288,7 @@ class TheIndependentNew(BasicNewsRecipe): items_to_extract = [] for item in soup.findAll('div', attrs={'class' : 'image'}): img = item.findNext('img') - if img is not None and img['src'] is not None: + if img and img.get('src'): # broken images still point to remote url pattern = re.compile('http://www.independent.co.uk.*') if pattern.match(img["src"]) is not None: From 187ca2a518959a96627a55b0331cd77fc4274570 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 26 Nov 2011 10:28:54 +0530 Subject: [PATCH 2/7] Fix #896412 (Improvement for #886763 (or a little bug in its implementation)) --- src/calibre/ebooks/metadata/__init__.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index d9399e9a3e..8dc14d52df 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -109,12 +109,16 @@ def get_title_sort_pat(lang=None): q = canonicalize_lang(q) if q else q data = tweaks['per_language_title_sort_articles'] ans = data.get(q, None) - if ans is None: - ans = data['eng'] - ans = frozenset(ans + data['eng']) + try: + ans = frozenset(ans) if ans else frozenset(data['eng']) + except: + ans = frozenset((r'A\s+', r'The\s+', r'An\s+')) ans = '|'.join(ans) ans = '^(%s)'%ans - ans = re.compile(ans, re.IGNORECASE) + try: + ans = re.compile(ans, re.IGNORECASE) + except: + ans = re.compile(r'^(A|The|An)\s+', re.IGNORECASE) _title_pats[lang] = ans return ans From 209344e319a96d5cec13ab8b724cb437152abc1b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 26 Nov 2011 10:46:07 +0530 Subject: [PATCH 3/7] ... --- resources/default_tweaks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 8c82f1d4e6..ecd3888bf7 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -242,6 +242,8 @@ per_language_title_sort_articles = { # Greek 'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+', r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ), + # Hungarian + 'hun' : (r'A\s+', 'Az\s+', 'Egy\s+',), } default_language_for_title_sort = None title_sort_articles=r'^(A|The|An)\s+' From 9c7775e43d9043ec2b307f5231238502cffd222d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 26 Nov 2011 12:17:28 +0530 Subject: [PATCH 4/7] Fix #896508 (unused variable warnings) --- src/calibre/gui2/pictureflow/pictureflow.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/pictureflow/pictureflow.cpp b/src/calibre/gui2/pictureflow/pictureflow.cpp index b82747841c..4e9d8a402d 100644 --- a/src/calibre/gui2/pictureflow/pictureflow.cpp +++ b/src/calibre/gui2/pictureflow/pictureflow.cpp @@ -1409,8 +1409,8 @@ void PictureFlow::dataChanged() { d->dataChanged(); } void PictureFlow::emitcurrentChanged(int index) { emit currentChanged(index); } int FlowImages::count() { return 0; } -QImage FlowImages::image(int index) { index=0; return QImage(); } -QString FlowImages::caption(int index) {index=0; return QString(); } -QString FlowImages::subtitle(int index) {index=0; return QString(); } +QImage FlowImages::image(int index) { Q_UNUSED(index); return QImage(); } +QString FlowImages::caption(int index) { Q_UNUSED(index); return QString(); } +QString FlowImages::subtitle(int index) { Q_UNUSED(index); return QString(); } // }}} From ee53cb611db7562c539df662dccc156002397093 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 26 Nov 2011 17:59:23 +0530 Subject: [PATCH 5/7] ... --- src/calibre/library/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 43d4e8276b..86f8a070f3 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -342,7 +342,8 @@ def remove_option_parser(): Remove the books identified by ids from the database. ids should be a comma separated \ list of id numbers (you can get id numbers by using the list command). For example, \ -23,34,57-85 +23,34,57-85 (when specifying a range, the last number in the range is not +included). ''')) def command_remove(args, dbpath): From 98de6945b397e4f18b8a73b7e8b2215b32b2df0d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 26 Nov 2011 18:38:49 +0530 Subject: [PATCH 6/7] =?UTF-8?q?When=20searching=20for=20author=20names=20w?= =?UTF-8?q?ith=20accented=20characters,=20allow=20the=20non=20accented=20v?= =?UTF-8?q?ersion=20to=20match.=20For=20example,=20searching=20for=20Nino?= =?UTF-8?q?=20will=20now=20match=20Ni=C3=B1o.=20Fixes=20#879729=20([Enhanc?= =?UTF-8?q?ement]=20Searching=20for=20accented=20and=20non-standard=20lett?= =?UTF-8?q?ers)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/calibre/library/caches.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 81db8830b7..15d9123b81 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -15,7 +15,7 @@ from calibre.utils.config import tweaks, prefs from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.pyparsing import ParseException -from calibre.utils.localization import canonicalize_lang, lang_map +from calibre.utils.localization import canonicalize_lang, lang_map, get_udc from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre import prints @@ -217,6 +217,7 @@ class ResultCache(SearchQueryParser): # {{{ self.FIELD_MAP = FIELD_MAP self.db_prefs = db_prefs self.composites = {} + self.udc = get_udc() for key in field_metadata: if field_metadata[key]['datatype'] == 'composite': self.composites[field_metadata[key]['rec_index']] = key @@ -261,6 +262,15 @@ class ResultCache(SearchQueryParser): # {{{ # Search functions {{{ + def ascii_name(self, name): + try: + ans = self.udc.decode(name) + if ans == name: + ans = False + except: + ans = False + return ans + def universal_set(self): return set([i[0] for i in self._data if i is not None]) @@ -734,6 +744,8 @@ class ResultCache(SearchQueryParser): # {{{ else: q = query + au_loc = self.FIELD_MAP['authors'] + for id_ in candidates: item = self._data[id_] if item is None: continue @@ -776,6 +788,9 @@ class ResultCache(SearchQueryParser): # {{{ if loc not in exclude_fields: # time for text matching if is_multiple_cols[loc] is not None: vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])] + if loc == au_loc: + vals += filter(None, map(self.ascii_name, + vals)) else: vals = [item[loc]] ### make into list to make _match happy if _match(q, vals, matchkind): From 4fcbaa8919921d426bc4ad9f02eafb1a35835b6c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 26 Nov 2011 20:13:13 +0530 Subject: [PATCH 7/7] Fix #896585 (extra articles for dutch language) --- resources/default_tweaks.py | 4 +++- src/calibre/gui2/duplicates.py | 11 +++++++++++ src/calibre/library/database2.py | 17 +++++++++++++++-- 3 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 src/calibre/gui2/duplicates.py diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index ecd3888bf7..224038b0f9 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -232,7 +232,9 @@ per_language_title_sort_articles = { 'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+', r'Eine\s+', r'Einen\s+', ), # Dutch - 'nld' : (r'De\s+', r'Het\s+', r'Een\s+', ), + 'nld' : (r'De\s+', r'Het\s+', r'Een\s+', r"'n\s+", r"'s\s+", r'Ene\s+', + r'Ener\s+', r'Enes\s+', r'Den\s+', r'Der\s+', r'Des\s+', + r"'t\s+"), # Swedish 'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ), # Turkish diff --git a/src/calibre/gui2/duplicates.py b/src/calibre/gui2/duplicates.py new file mode 100644 index 0000000000..cc6da1e995 --- /dev/null +++ b/src/calibre/gui2/duplicates.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + + diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index a917aa09b2..3782149512 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -13,7 +13,7 @@ import threading, random from itertools import repeat from math import ceil -from calibre import prints +from calibre import prints, force_unicode from calibre.ebooks.metadata import (title_sort, author_to_author_sort, string_to_authors, authors_to_string, get_title_sort_pat) from calibre.ebooks.metadata.opf2 import metadata_to_opf @@ -33,7 +33,7 @@ from calibre import isbytestring from calibre.utils.filenames import ascii_filename from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp from calibre.utils.config import prefs, tweaks, from_json, to_json -from calibre.utils.icu import sort_key, strcmp +from calibre.utils.icu import sort_key, strcmp, lower from calibre.utils.search_query_parser import saved_searches, set_saved_searches from calibre.ebooks import BOOK_EXTENSIONS, check_ebook_format from calibre.utils.magick.draw import save_cover_data_to @@ -1003,6 +1003,19 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False)) return False + def books_with_same_title(self, mi, all_matches=True): + title = mi.title + ans = set() + if title: + title = lower(force_unicode(title)) + for book_id in self.all_ids(): + x = self.title(book_id, index_is_id=True) + if lower(x) == title: + ans.add(book_id) + if not all_matches: + break + return ans + def find_identical_books(self, mi): fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if isinstance(pat, basestring) else pat, repl) for pat, repl in