diff --git a/resources/recipes/el_pais.recipe b/resources/recipes/el_pais.recipe index c953a4dd95..1e2164b2af 100644 --- a/resources/recipes/el_pais.recipe +++ b/resources/recipes/el_pais.recipe @@ -1,8 +1,8 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini, based on earlier version by Kovid Goyal' +__author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' -description = 'Main daily newspaper from Spain - v1.02 (10, January 2010)' +description = 'Main daily newspaper from Spain - v1.03 (03, September 2010)' __docformat__ = 'restructuredtext en' ''' @@ -12,12 +12,12 @@ elpais.es from calibre.web.feeds.news import BasicNewsRecipe class ElPais(BasicNewsRecipe): - __author__ = 'Kovid Goyal & Lorenzo Vigentini' + __author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells' description = 'Main daily newspaper from Spain' cover_url = 'http://www.elpais.com/im/tit_logo_global.gif' title = u'El Pais' - publisher = 'Ediciones El Pais SL' + publisher = u'Ediciones El Pa\xeds SL' category = 'News, politics, culture, economy, general interest' language = 'es' @@ -32,7 +32,8 @@ class ElPais(BasicNewsRecipe): remove_javascript = True no_stylesheets = True - keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','contenido_noticia','caja_despiece','presentacion']})] + keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})] + extra_css = ''' p{style:normal size:12 serif} @@ -40,25 +41,29 @@ class ElPais(BasicNewsRecipe): remove_tags = [ dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}), - dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada']}), - dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas']}) + dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}), + dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}), + dict(name='p', attrs={'class':'nav_meses'}), + dict(attrs={'class':['enlaces_m','miniaturas_m']}) ] feeds = [ (u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'), (u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'), - (u'Espana', u'http://www.elpais.com/rss/feed.html?feedId=1002'), + (u'Espa\xf1a', u'http://www.elpais.com/rss/feed.html?feedId=1002'), (u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'), - (u'Economia', u'http://www.elpais.com/rss/feed.html?feedId=1006'), - (u'Politica', u'http://www.elpais.com/rss/feed.html?feedId=17073'), - (u'Tecnologia', u'http://www.elpais.com/rss/feed.html?feedId=1005'), + (u'Econom\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1006'), + (u'Pol\xedtica', u'http://www.elpais.com/rss/feed.html?feedId=17073'), + (u'Tecnolog\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1005'), (u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'), (u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'), (u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'), - (u'Opinion', u'http://www.elpais.com/rss/feed.html?feedId=1003'), + (u'Opini\xf3n', u'http://www.elpais.com/rss/feed.html?feedId=1003'), (u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'), (u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'), - ] + (u'Medio ambiente', u'http://www.elpais.com/rss/feed.html?feedId=17071'), + (u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058') + ] def print_version(self, url): url = url+'?print=1' diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index c6abe595b6..e318d368ff 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -2488,7 +2488,8 @@ class ITUNES(DriverBase): zf_opf.close() # If 'News' in tags, tweak the title/author for friendlier display in iBooks - if _('News') or _('Catalog') in metadata.tags: + if _('News') in metadata.tags or \ + _('Catalog') in metadata.tags: if metadata.title.find('[') > 0: metadata.title = metadata.title[:metadata.title.find('[')-1] date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y')) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 65a4b62cf2..e002fe916b 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -1308,35 +1308,44 @@ class DeviceMixin(object): # {{{ def book_on_device(self, id, format=None, reset=False): ''' Return an indication of whether the given book represented by its db id - is on the currently connected device. It returns a 4 element list. The + is on the currently connected device. It returns a 6 element list. The first three elements represent memory locations main, carda, and cardb, and are true if the book is identifiably in that memory. The fourth - is the a count of how many instances of the book were found across all - the memory locations. + is a count of how many instances of the book were found across all + the memory locations. The fifth is the type of match. The type can be + one of: None, 'uuid', 'db_id', 'metadata'. The sixth is a set of paths to the + matching books on the device. ''' - loc = [None, None, None, 0] + loc = [None, None, None, 0, None, set([])] if reset: self.book_db_title_cache = None self.book_db_uuid_cache = None self.book_db_id_counts = None + self.book_db_uuid_path_map = None return + string_pat = re.compile('(?u)\W|[_]') + def clean_string(x): + x = x.lower() if x else '' + return string_pat.sub('', x) + if self.book_db_title_cache is None: self.book_db_title_cache = [] self.book_db_uuid_cache = [] + self.book_db_uuid_path_map = {} self.book_db_id_counts = {} for i, l in enumerate(self.booklists()): self.book_db_title_cache.append({}) self.book_db_uuid_cache.append(set()) for book in l: - book_title = book.title.lower() if book.title else '' - book_title = re.sub('(?u)\W|[_]', '', book_title) + book_title = clean_string(book.title) if book_title not in self.book_db_title_cache[i]: self.book_db_title_cache[i][book_title] = \ - {'authors':set(), 'db_ids':set(), 'uuids':set()} - book_authors = authors_to_string(book.authors).lower() - book_authors = re.sub('(?u)\W|[_]', '', book_authors) + {'authors':set(), 'db_ids':set(), + 'uuids':set(), 'paths':set(), + 'uuid_in_library':False} + book_authors = clean_string(authors_to_string(book.authors)) self.book_db_title_cache[i][book_title]['authors'].add(book_authors) db_id = getattr(book, 'application_id', None) if db_id is None: @@ -1350,32 +1359,39 @@ class DeviceMixin(object): # {{{ uuid = getattr(book, 'uuid', None) if uuid is not None: self.book_db_uuid_cache[i].add(uuid) + self.book_db_uuid_path_map[uuid] = book.path + if uuid in self.db_book_uuid_cache: + self.book_db_title_cache[i][book_title]\ + ['uuid_in_library'] = True + self.book_db_title_cache[i][book_title]['paths'].add(book.path) mi = self.library_view.model().db.get_metadata(id, index_is_id=True) for i, l in enumerate(self.booklists()): if mi.uuid in self.book_db_uuid_cache[i]: loc[i] = True + loc[4] = 'uuid' + loc[5].add(self.book_db_uuid_path_map[mi.uuid]) continue - db_title = re.sub('(?u)\W|[_]', '', mi.title.lower()) + db_title = clean_string(mi.title) cache = self.book_db_title_cache[i].get(db_title, None) - if cache: + if cache and not cache['uuid_in_library']: if id in cache['db_ids']: loc[i] = True - continue - if mi.authors and \ - re.sub('(?u)\W|[_]', '', authors_to_string(mi.authors).lower()) \ - in cache['authors']: - # If we get here, then two library books have the same title - # and author. This can happen, especially in the case of - # news. Mark a match and go on. - loc[i] = True + loc[4] = 'db_id' + loc[5] = cache['paths'] continue # Also check author sort, because it can be used as author in # some formats - if mi.author_sort and \ - re.sub('(?u)\W|[_]', '', mi.author_sort.lower()) \ - in cache['authors']: + if (mi.authors and clean_string(authors_to_string(mi.authors)) + in cache['authors']) or (mi.author_sort and + clean_string(mi.author_sort) in cache['authors']): + # We really shouldn't get here, because set_books_in_library + # should have set the db_ids for the books, and therefore + # the if just above should have found them. Mark the book + # anyway, and print a message about the situation loc[i] = True + loc[4] = 'metadata' + loc[5] = cache['paths'] continue loc[3] = self.book_db_id_counts.get(id, 0) return loc @@ -1387,31 +1403,34 @@ class DeviceMixin(object): # {{{ it sets the application_id for matched books. Book_on_device uses that to both speed up matching and to count matches. ''' + + string_pat = re.compile('(?u)\W|[_]') + def clean_string(x): + x = x.lower() if x else '' + return string_pat.sub('', x) + # Force a reset if the caches are not initialized if reset or not hasattr(self, 'db_book_title_cache'): # It might be possible to get here without having initialized the # library view. In this case, simply give up - if not hasattr(self, 'library_view') or self.library_view is None: - return - db = getattr(self.library_view.model(), 'db', None) - if db is None: + try: + db = self.library_view.model().db + except: return # Build a cache (map) of the library, so the search isn't On**2 self.db_book_title_cache = {} self.db_book_uuid_cache = {} for id in db.data.iterallids(): mi = db.get_metadata(id, index_is_id=True) - title = re.sub('(?u)\W|[_]', '', mi.title.lower()) + title = clean_string(mi.title) if title not in self.db_book_title_cache: self.db_book_title_cache[title] = \ {'authors':{}, 'author_sort':{}, 'db_ids':{}} if mi.authors: - authors = authors_to_string(mi.authors).lower() - authors = re.sub('(?u)\W|[_]', '', authors) + authors = clean_string(authors_to_string(mi.authors)) self.db_book_title_cache[title]['authors'][authors] = mi if mi.author_sort: - aus = mi.author_sort.lower() - aus = re.sub('(?u)\W|[_]', '', aus) + aus = clean_string(mi.author_sort) self.db_book_title_cache[title]['author_sort'][aus] = mi self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi self.db_book_uuid_cache[mi.uuid] = mi @@ -1437,8 +1456,7 @@ class DeviceMixin(object): # {{{ self.db_book_uuid_cache[book.uuid].application_id continue - book_title = book.title.lower() if book.title else '' - book_title = re.sub('(?u)\W|[_]', '', book_title) + book_title = clean_string(book.title) book.in_library = None d = self.db_book_title_cache.get(book_title, None) if d is not None: @@ -1460,8 +1478,7 @@ class DeviceMixin(object): # {{{ if book.authors: # Compare against both author and author sort, because # either can appear as the author - book_authors = authors_to_string(book.authors).lower() - book_authors = re.sub('(?u)\W|[_]', '', book_authors) + book_authors = clean_string(authors_to_string(book.authors)) if book_authors in d['authors']: book.in_library = True book.application_id = \ diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index cc4ddb1c17..4106f8c965 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -641,7 +641,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): count = 0 on = self.book_on_device(id) if on is not None: - m, a, b, count = on + m, a, b, count = on[:4] if m is not None: loc.append(_('Main')) if a is not None: