Merge from trunk

This commit is contained in:
Charles Haley 2010-09-10 08:42:19 +01:00
commit 6fa4ce8603
4 changed files with 73 additions and 50 deletions

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on earlier version by Kovid Goyal' __author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
description = 'Main daily newspaper from Spain - v1.02 (10, January 2010)' description = 'Main daily newspaper from Spain - v1.03 (03, September 2010)'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
''' '''
@ -12,12 +12,12 @@ elpais.es
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ElPais(BasicNewsRecipe): class ElPais(BasicNewsRecipe):
__author__ = 'Kovid Goyal & Lorenzo Vigentini' __author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells'
description = 'Main daily newspaper from Spain' description = 'Main daily newspaper from Spain'
cover_url = 'http://www.elpais.com/im/tit_logo_global.gif' cover_url = 'http://www.elpais.com/im/tit_logo_global.gif'
title = u'El Pais' title = u'El Pais'
publisher = 'Ediciones El Pais SL' publisher = u'Ediciones El Pa\xeds SL'
category = 'News, politics, culture, economy, general interest' category = 'News, politics, culture, economy, general interest'
language = 'es' language = 'es'
@ -32,7 +32,8 @@ class ElPais(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','contenido_noticia','caja_despiece','presentacion']})] keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})]
extra_css = ''' extra_css = '''
p{style:normal size:12 serif} p{style:normal size:12 serif}
@ -40,24 +41,28 @@ class ElPais(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}), dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada']}), dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas']}) dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}),
dict(name='p', attrs={'class':'nav_meses'}),
dict(attrs={'class':['enlaces_m','miniaturas_m']})
] ]
feeds = [ feeds = [
(u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'), (u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
(u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'), (u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'),
(u'Espana', u'http://www.elpais.com/rss/feed.html?feedId=1002'), (u'Espa\xf1a', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
(u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'), (u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
(u'Economia', u'http://www.elpais.com/rss/feed.html?feedId=1006'), (u'Econom\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
(u'Politica', u'http://www.elpais.com/rss/feed.html?feedId=17073'), (u'Pol\xedtica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
(u'Tecnologia', u'http://www.elpais.com/rss/feed.html?feedId=1005'), (u'Tecnolog\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
(u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'), (u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
(u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'), (u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
(u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'), (u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
(u'Opinion', u'http://www.elpais.com/rss/feed.html?feedId=1003'), (u'Opini\xf3n', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
(u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'), (u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
(u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'), (u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'),
(u'Medio ambiente', u'http://www.elpais.com/rss/feed.html?feedId=17071'),
(u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058')
] ]
def print_version(self, url): def print_version(self, url):

View File

@ -2488,7 +2488,8 @@ class ITUNES(DriverBase):
zf_opf.close() zf_opf.close()
# If 'News' in tags, tweak the title/author for friendlier display in iBooks # If 'News' in tags, tweak the title/author for friendlier display in iBooks
if _('News') or _('Catalog') in metadata.tags: if _('News') in metadata.tags or \
_('Catalog') in metadata.tags:
if metadata.title.find('[') > 0: if metadata.title.find('[') > 0:
metadata.title = metadata.title[:metadata.title.find('[')-1] metadata.title = metadata.title[:metadata.title.find('[')-1]
date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y')) date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))

View File

@ -1308,35 +1308,44 @@ class DeviceMixin(object): # {{{
def book_on_device(self, id, format=None, reset=False): def book_on_device(self, id, format=None, reset=False):
''' '''
Return an indication of whether the given book represented by its db id Return an indication of whether the given book represented by its db id
is on the currently connected device. It returns a 4 element list. The is on the currently connected device. It returns a 6 element list. The
first three elements represent memory locations main, carda, and cardb, first three elements represent memory locations main, carda, and cardb,
and are true if the book is identifiably in that memory. The fourth and are true if the book is identifiably in that memory. The fourth
is the a count of how many instances of the book were found across all is a count of how many instances of the book were found across all
the memory locations. the memory locations. The fifth is the type of match. The type can be
one of: None, 'uuid', 'db_id', 'metadata'. The sixth is a set of paths to the
matching books on the device.
''' '''
loc = [None, None, None, 0] loc = [None, None, None, 0, None, set([])]
if reset: if reset:
self.book_db_title_cache = None self.book_db_title_cache = None
self.book_db_uuid_cache = None self.book_db_uuid_cache = None
self.book_db_id_counts = None self.book_db_id_counts = None
self.book_db_uuid_path_map = None
return return
string_pat = re.compile('(?u)\W|[_]')
def clean_string(x):
x = x.lower() if x else ''
return string_pat.sub('', x)
if self.book_db_title_cache is None: if self.book_db_title_cache is None:
self.book_db_title_cache = [] self.book_db_title_cache = []
self.book_db_uuid_cache = [] self.book_db_uuid_cache = []
self.book_db_uuid_path_map = {}
self.book_db_id_counts = {} self.book_db_id_counts = {}
for i, l in enumerate(self.booklists()): for i, l in enumerate(self.booklists()):
self.book_db_title_cache.append({}) self.book_db_title_cache.append({})
self.book_db_uuid_cache.append(set()) self.book_db_uuid_cache.append(set())
for book in l: for book in l:
book_title = book.title.lower() if book.title else '' book_title = clean_string(book.title)
book_title = re.sub('(?u)\W|[_]', '', book_title)
if book_title not in self.book_db_title_cache[i]: if book_title not in self.book_db_title_cache[i]:
self.book_db_title_cache[i][book_title] = \ self.book_db_title_cache[i][book_title] = \
{'authors':set(), 'db_ids':set(), 'uuids':set()} {'authors':set(), 'db_ids':set(),
book_authors = authors_to_string(book.authors).lower() 'uuids':set(), 'paths':set(),
book_authors = re.sub('(?u)\W|[_]', '', book_authors) 'uuid_in_library':False}
book_authors = clean_string(authors_to_string(book.authors))
self.book_db_title_cache[i][book_title]['authors'].add(book_authors) self.book_db_title_cache[i][book_title]['authors'].add(book_authors)
db_id = getattr(book, 'application_id', None) db_id = getattr(book, 'application_id', None)
if db_id is None: if db_id is None:
@ -1350,32 +1359,39 @@ class DeviceMixin(object): # {{{
uuid = getattr(book, 'uuid', None) uuid = getattr(book, 'uuid', None)
if uuid is not None: if uuid is not None:
self.book_db_uuid_cache[i].add(uuid) self.book_db_uuid_cache[i].add(uuid)
self.book_db_uuid_path_map[uuid] = book.path
if uuid in self.db_book_uuid_cache:
self.book_db_title_cache[i][book_title]\
['uuid_in_library'] = True
self.book_db_title_cache[i][book_title]['paths'].add(book.path)
mi = self.library_view.model().db.get_metadata(id, index_is_id=True) mi = self.library_view.model().db.get_metadata(id, index_is_id=True)
for i, l in enumerate(self.booklists()): for i, l in enumerate(self.booklists()):
if mi.uuid in self.book_db_uuid_cache[i]: if mi.uuid in self.book_db_uuid_cache[i]:
loc[i] = True loc[i] = True
loc[4] = 'uuid'
loc[5].add(self.book_db_uuid_path_map[mi.uuid])
continue continue
db_title = re.sub('(?u)\W|[_]', '', mi.title.lower()) db_title = clean_string(mi.title)
cache = self.book_db_title_cache[i].get(db_title, None) cache = self.book_db_title_cache[i].get(db_title, None)
if cache: if cache and not cache['uuid_in_library']:
if id in cache['db_ids']: if id in cache['db_ids']:
loc[i] = True loc[i] = True
continue loc[4] = 'db_id'
if mi.authors and \ loc[5] = cache['paths']
re.sub('(?u)\W|[_]', '', authors_to_string(mi.authors).lower()) \
in cache['authors']:
# If we get here, then two library books have the same title
# and author. This can happen, especially in the case of
# news. Mark a match and go on.
loc[i] = True
continue continue
# Also check author sort, because it can be used as author in # Also check author sort, because it can be used as author in
# some formats # some formats
if mi.author_sort and \ if (mi.authors and clean_string(authors_to_string(mi.authors))
re.sub('(?u)\W|[_]', '', mi.author_sort.lower()) \ in cache['authors']) or (mi.author_sort and
in cache['authors']: clean_string(mi.author_sort) in cache['authors']):
# We really shouldn't get here, because set_books_in_library
# should have set the db_ids for the books, and therefore
# the if just above should have found them. Mark the book
# anyway, and print a message about the situation
loc[i] = True loc[i] = True
loc[4] = 'metadata'
loc[5] = cache['paths']
continue continue
loc[3] = self.book_db_id_counts.get(id, 0) loc[3] = self.book_db_id_counts.get(id, 0)
return loc return loc
@ -1387,31 +1403,34 @@ class DeviceMixin(object): # {{{
it sets the application_id for matched books. Book_on_device uses that it sets the application_id for matched books. Book_on_device uses that
to both speed up matching and to count matches. to both speed up matching and to count matches.
''' '''
string_pat = re.compile('(?u)\W|[_]')
def clean_string(x):
x = x.lower() if x else ''
return string_pat.sub('', x)
# Force a reset if the caches are not initialized # Force a reset if the caches are not initialized
if reset or not hasattr(self, 'db_book_title_cache'): if reset or not hasattr(self, 'db_book_title_cache'):
# It might be possible to get here without having initialized the # It might be possible to get here without having initialized the
# library view. In this case, simply give up # library view. In this case, simply give up
if not hasattr(self, 'library_view') or self.library_view is None: try:
return db = self.library_view.model().db
db = getattr(self.library_view.model(), 'db', None) except:
if db is None:
return return
# Build a cache (map) of the library, so the search isn't On**2 # Build a cache (map) of the library, so the search isn't On**2
self.db_book_title_cache = {} self.db_book_title_cache = {}
self.db_book_uuid_cache = {} self.db_book_uuid_cache = {}
for id in db.data.iterallids(): for id in db.data.iterallids():
mi = db.get_metadata(id, index_is_id=True) mi = db.get_metadata(id, index_is_id=True)
title = re.sub('(?u)\W|[_]', '', mi.title.lower()) title = clean_string(mi.title)
if title not in self.db_book_title_cache: if title not in self.db_book_title_cache:
self.db_book_title_cache[title] = \ self.db_book_title_cache[title] = \
{'authors':{}, 'author_sort':{}, 'db_ids':{}} {'authors':{}, 'author_sort':{}, 'db_ids':{}}
if mi.authors: if mi.authors:
authors = authors_to_string(mi.authors).lower() authors = clean_string(authors_to_string(mi.authors))
authors = re.sub('(?u)\W|[_]', '', authors)
self.db_book_title_cache[title]['authors'][authors] = mi self.db_book_title_cache[title]['authors'][authors] = mi
if mi.author_sort: if mi.author_sort:
aus = mi.author_sort.lower() aus = clean_string(mi.author_sort)
aus = re.sub('(?u)\W|[_]', '', aus)
self.db_book_title_cache[title]['author_sort'][aus] = mi self.db_book_title_cache[title]['author_sort'][aus] = mi
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
self.db_book_uuid_cache[mi.uuid] = mi self.db_book_uuid_cache[mi.uuid] = mi
@ -1437,8 +1456,7 @@ class DeviceMixin(object): # {{{
self.db_book_uuid_cache[book.uuid].application_id self.db_book_uuid_cache[book.uuid].application_id
continue continue
book_title = book.title.lower() if book.title else '' book_title = clean_string(book.title)
book_title = re.sub('(?u)\W|[_]', '', book_title)
book.in_library = None book.in_library = None
d = self.db_book_title_cache.get(book_title, None) d = self.db_book_title_cache.get(book_title, None)
if d is not None: if d is not None:
@ -1460,8 +1478,7 @@ class DeviceMixin(object): # {{{
if book.authors: if book.authors:
# Compare against both author and author sort, because # Compare against both author and author sort, because
# either can appear as the author # either can appear as the author
book_authors = authors_to_string(book.authors).lower() book_authors = clean_string(authors_to_string(book.authors))
book_authors = re.sub('(?u)\W|[_]', '', book_authors)
if book_authors in d['authors']: if book_authors in d['authors']:
book.in_library = True book.in_library = True
book.application_id = \ book.application_id = \

View File

@ -641,7 +641,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
count = 0 count = 0
on = self.book_on_device(id) on = self.book_on_device(id)
if on is not None: if on is not None:
m, a, b, count = on m, a, b, count = on[:4]
if m is not None: if m is not None:
loc.append(_('Main')) loc.append(_('Main'))
if a is not None: if a is not None: