From d134be5b1b89c87fafa01c5fa3a45202b9732848 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Nov 2011 07:58:02 +0530
Subject: [PATCH 1/7] ...

---
 recipes/independent.recipe | 45 +++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/recipes/independent.recipe b/recipes/independent.recipe
index 7403163e6a..ebe0a30fd2 100644
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@@ -39,7 +39,9 @@ class TheIndependentNew(BasicNewsRecipe):
     encoding                = 'utf-8'
     remove_tags             =[
                                dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
-                               dict(attrs={'class' : ['autoplay','openBiogPopup']})
+                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
+                               dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
+                               dict(attrs={'style' : re.compile('.*')}),
                              ]
 
     keep_only_tags          =[dict(attrs={'id':'main'})]
@@ -113,6 +115,7 @@ class TheIndependentNew(BasicNewsRecipe):
                     return None
 
         items_to_extract = []
+        slideshow_elements = []
 
         for item in soup.findAll(attrs={'class' : re.compile("widget.*")}):
             remove = True
@@ -131,6 +134,7 @@ class TheIndependentNew(BasicNewsRecipe):
             if (pattern.search(item['class'])) is not None:
                 if self._FETCH_IMAGES:
                     remove = False
+                    slideshow_elements.append(item)
                 else:
                     remove = True
 
@@ -148,28 +152,29 @@ class TheIndependentNew(BasicNewsRecipe):
         items_to_extract = []
 
         if self._FETCH_IMAGES:
-            for item in soup.findAll('a',attrs={'href' : re.compile('.*')}):
-                if item.img is not None:
-                    #use full size image
-                    img = item.findNext('img')
+            for element in slideshow_elements:
+                for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
+                    if item.img is not None:
+                        #use full size image
+                        img = item.findNext('img')
 
-                    img['src'] = item['href']
+                        img['src'] = item['href']
 
-                    #insert caption if available
-                    if img['title'] is not None and (len(img['title']) > 1):
-                        tag = Tag(soup,'h3')
-                        text = NavigableString(img['title'])
-                        tag.insert(0,text)
+                        #insert caption if available
+                        if img.get('title') and (len(img['title']) > 1):
+                            tag = Tag(soup,'h3')
+                            text = NavigableString(img['title'])
+                            tag.insert(0,text)
 
-                        #picture before text
-                        img.extract()
-                        item.insert(0,img)
-                        item.insert(1,tag)
+                            #picture before text
+                            img.extract()
+                            item.insert(0,img)
+                            item.insert(1,tag)
 
-                    # remove link
-                    item.name = "div"
-                    item["class"]='image'
-                    del item["href"]
+                        # remove link
+                        item.name = "div"
+                        item["class"]='image'
+                        del item["href"]
 
 
         #remove empty subtitles
@@ -283,7 +288,7 @@ class TheIndependentNew(BasicNewsRecipe):
         items_to_extract = []
         for item in soup.findAll('div', attrs={'class' : 'image'}):
             img = item.findNext('img')
-            if img is not None and img['src'] is not None:
+            if img and img.get('src'):
                 # broken images still point to remote url
                 pattern = re.compile('http://www.independent.co.uk.*')
                 if pattern.match(img["src"]) is not None:

From 187ca2a518959a96627a55b0331cd77fc4274570 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Nov 2011 10:28:54 +0530
Subject: [PATCH 2/7] Fix #896412 (Improvement for #886763 (or a little bug in
 its implementation))

---
 src/calibre/ebooks/metadata/__init__.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index d9399e9a3e..8dc14d52df 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -109,12 +109,16 @@ def get_title_sort_pat(lang=None):
     q = canonicalize_lang(q) if q else q
     data = tweaks['per_language_title_sort_articles']
     ans = data.get(q, None)
-    if ans is None:
-        ans = data['eng']
-    ans = frozenset(ans + data['eng'])
+    try:
+        ans = frozenset(ans) if ans else frozenset(data['eng'])
+    except:
+        ans = frozenset((r'A\s+', r'The\s+', r'An\s+'))
     ans = '|'.join(ans)
     ans = '^(%s)'%ans
-    ans = re.compile(ans, re.IGNORECASE)
+    try:
+        ans = re.compile(ans, re.IGNORECASE)
+    except:
+        ans = re.compile(r'^(A|The|An)\s+', re.IGNORECASE)
     _title_pats[lang] = ans
     return ans
 

From 209344e319a96d5cec13ab8b724cb437152abc1b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Nov 2011 10:46:07 +0530
Subject: [PATCH 3/7] ...

---
 resources/default_tweaks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index 8c82f1d4e6..ecd3888bf7 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -242,6 +242,8 @@ per_language_title_sort_articles = {
         # Greek
         'ell'  : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+',
                   r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ),
+        # Hungarian
+        'hun'  : (r'A\s+', 'Az\s+', 'Egy\s+',),
 }
 default_language_for_title_sort = None
 title_sort_articles=r'^(A|The|An)\s+'

From 9c7775e43d9043ec2b307f5231238502cffd222d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Nov 2011 12:17:28 +0530
Subject: [PATCH 4/7] Fix #896508 (unused variable warnings)

---
 src/calibre/gui2/pictureflow/pictureflow.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/pictureflow/pictureflow.cpp b/src/calibre/gui2/pictureflow/pictureflow.cpp
index b82747841c..4e9d8a402d 100644
--- a/src/calibre/gui2/pictureflow/pictureflow.cpp
+++ b/src/calibre/gui2/pictureflow/pictureflow.cpp
@@ -1409,8 +1409,8 @@ void PictureFlow::dataChanged() { d->dataChanged(); }
 void PictureFlow::emitcurrentChanged(int index) { emit currentChanged(index); }
 
 int FlowImages::count() { return 0; }
-QImage FlowImages::image(int index) { index=0; return QImage(); }
-QString FlowImages::caption(int index) {index=0; return QString(); }
-QString FlowImages::subtitle(int index) {index=0; return QString(); }
+QImage FlowImages::image(int index) { Q_UNUSED(index); return QImage(); }
+QString FlowImages::caption(int index) { Q_UNUSED(index); return QString(); }
+QString FlowImages::subtitle(int index) { Q_UNUSED(index); return QString(); }
 
 // }}}

From ee53cb611db7562c539df662dccc156002397093 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Nov 2011 17:59:23 +0530
Subject: [PATCH 5/7] ...

---
 src/calibre/library/cli.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py
index 43d4e8276b..86f8a070f3 100644
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@@ -342,7 +342,8 @@ def remove_option_parser():
 
 Remove the books identified by ids from the database. ids should be a comma separated \
 list of id numbers (you can get id numbers by using the list command). For example, \
-23,34,57-85
+23,34,57-85 (when specifying a range, the last number in the range is not
+included).
 '''))
 
 def command_remove(args, dbpath):

From 98de6945b397e4f18b8a73b7e8b2215b32b2df0d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Nov 2011 18:38:49 +0530
Subject: [PATCH 6/7] =?UTF-8?q?When=20searching=20for=20author=20names=20w?=
 =?UTF-8?q?ith=20accented=20characters,=20allow=20the=20non=20accented=20v?=
 =?UTF-8?q?ersion=20to=20match.=20For=20example,=20searching=20for=20Nino?=
 =?UTF-8?q?=20will=20now=20match=20Ni=C3=B1o.=20Fixes=20#879729=20([Enhanc?=
 =?UTF-8?q?ement]=20Searching=20for=20accented=20and=20non-standard=20lett?=
 =?UTF-8?q?ers)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/calibre/library/caches.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 81db8830b7..15d9123b81 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -15,7 +15,7 @@ from calibre.utils.config import tweaks, prefs
 from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.utils.pyparsing import ParseException
-from calibre.utils.localization import canonicalize_lang, lang_map
+from calibre.utils.localization import canonicalize_lang, lang_map, get_udc
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre import prints
@@ -217,6 +217,7 @@ class ResultCache(SearchQueryParser): # {{{
         self.FIELD_MAP = FIELD_MAP
         self.db_prefs = db_prefs
         self.composites = {}
+        self.udc = get_udc()
         for key in field_metadata:
             if field_metadata[key]['datatype'] == 'composite':
                 self.composites[field_metadata[key]['rec_index']] = key
@@ -261,6 +262,15 @@ class ResultCache(SearchQueryParser): # {{{
 
     # Search functions {{{
 
+    def ascii_name(self, name):
+        try:
+            ans = self.udc.decode(name)
+            if ans == name:
+                ans = False
+        except:
+            ans = False
+        return ans
+
     def universal_set(self):
         return set([i[0] for i in self._data if i is not None])
 
@@ -734,6 +744,8 @@ class ResultCache(SearchQueryParser): # {{{
                 else:
                     q = query
 
+                au_loc = self.FIELD_MAP['authors']
+
                 for id_ in candidates:
                     item = self._data[id_]
                     if item is None: continue
@@ -776,6 +788,9 @@ class ResultCache(SearchQueryParser): # {{{
                     if loc not in exclude_fields: # time for text matching
                         if is_multiple_cols[loc] is not None:
                             vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
+                            if loc == au_loc:
+                                vals += filter(None, map(self.ascii_name,
+                                    vals))
                         else:
                             vals = [item[loc]] ### make into list to make _match happy
                         if _match(q, vals, matchkind):

From 4fcbaa8919921d426bc4ad9f02eafb1a35835b6c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Nov 2011 20:13:13 +0530
Subject: [PATCH 7/7] Fix #896585 (extra articles for dutch language)

---
 resources/default_tweaks.py      |  4 +++-
 src/calibre/gui2/duplicates.py   | 11 +++++++++++
 src/calibre/library/database2.py | 17 +++++++++++++++--
 3 files changed, 29 insertions(+), 3 deletions(-)
 create mode 100644 src/calibre/gui2/duplicates.py

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index ecd3888bf7..224038b0f9 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -232,7 +232,9 @@ per_language_title_sort_articles = {
         'deu'  : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+',
                   r'Eine\s+', r'Einen\s+', ),
         # Dutch
-        'nld'  : (r'De\s+', r'Het\s+', r'Een\s+', ),
+        'nld'  : (r'De\s+', r'Het\s+', r'Een\s+', r"'n\s+", r"'s\s+", r'Ene\s+',
+                  r'Ener\s+', r'Enes\s+', r'Den\s+', r'Der\s+', r'Des\s+',
+                  r"'t\s+"),
         # Swedish
         'swe'  : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ),
         # Turkish
diff --git a/src/calibre/gui2/duplicates.py b/src/calibre/gui2/duplicates.py
new file mode 100644
index 0000000000..cc6da1e995
--- /dev/null
+++ b/src/calibre/gui2/duplicates.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index a917aa09b2..3782149512 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -13,7 +13,7 @@ import threading, random
 from itertools import repeat
 from math import ceil
 
-from calibre import prints
+from calibre import prints, force_unicode
 from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
         string_to_authors, authors_to_string, get_title_sort_pat)
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
@@ -33,7 +33,7 @@ from calibre import isbytestring
 from calibre.utils.filenames import ascii_filename
 from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp
 from calibre.utils.config import prefs, tweaks, from_json, to_json
-from calibre.utils.icu import sort_key, strcmp
+from calibre.utils.icu import sort_key, strcmp, lower
 from calibre.utils.search_query_parser import saved_searches, set_saved_searches
 from calibre.ebooks import BOOK_EXTENSIONS, check_ebook_format
 from calibre.utils.magick.draw import save_cover_data_to
@@ -1003,6 +1003,19 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
         return False
 
+    def books_with_same_title(self, mi, all_matches=True):
+        title = mi.title
+        ans = set()
+        if title:
+            title = lower(force_unicode(title))
+            for book_id in self.all_ids():
+                x = self.title(book_id, index_is_id=True)
+                if lower(x) == title:
+                    ans.add(book_id)
+                    if not all_matches:
+                        break
+        return ans
+
     def find_identical_books(self, mi):
         fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if
             isinstance(pat, basestring) else pat, repl) for pat, repl in