Merge from trunk

This commit is contained in:
Charles Haley 2010-09-10 08:42:19 +01:00
commit 6fa4ce8603
4 changed files with 73 additions and 50 deletions

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on earlier version by Kovid Goyal'
__author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
description = 'Main daily newspaper from Spain - v1.02 (10, January 2010)'
description = 'Main daily newspaper from Spain - v1.03 (03, September 2010)'
__docformat__ = 'restructuredtext en'
'''
@ -12,12 +12,12 @@ elpais.es
from calibre.web.feeds.news import BasicNewsRecipe
class ElPais(BasicNewsRecipe):
__author__ = 'Kovid Goyal & Lorenzo Vigentini'
__author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells'
description = 'Main daily newspaper from Spain'
cover_url = 'http://www.elpais.com/im/tit_logo_global.gif'
title = u'El Pais'
publisher = 'Ediciones El Pais SL'
publisher = u'Ediciones El Pa\xeds SL'
category = 'News, politics, culture, economy, general interest'
language = 'es'
@ -32,7 +32,8 @@ class ElPais(BasicNewsRecipe):
remove_javascript = True
no_stylesheets = True
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','contenido_noticia','caja_despiece','presentacion']})]
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})]
extra_css = '''
p{style:normal size:12 serif}
@ -40,25 +41,29 @@ class ElPais(BasicNewsRecipe):
remove_tags = [
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada']}),
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas']})
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}),
dict(name='p', attrs={'class':'nav_meses'}),
dict(attrs={'class':['enlaces_m','miniaturas_m']})
]
feeds = [
(u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
(u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'),
(u'Espana', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
(u'Espa\xf1a', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
(u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
(u'Economia', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
(u'Politica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
(u'Tecnologia', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
(u'Econom\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
(u'Pol\xedtica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
(u'Tecnolog\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
(u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
(u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
(u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
(u'Opinion', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
(u'Opini\xf3n', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
(u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
(u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'),
]
(u'Medio ambiente', u'http://www.elpais.com/rss/feed.html?feedId=17071'),
(u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058')
]
def print_version(self, url):
url = url+'?print=1'

View File

@ -2488,7 +2488,8 @@ class ITUNES(DriverBase):
zf_opf.close()
# If 'News' in tags, tweak the title/author for friendlier display in iBooks
if _('News') or _('Catalog') in metadata.tags:
if _('News') in metadata.tags or \
_('Catalog') in metadata.tags:
if metadata.title.find('[') > 0:
metadata.title = metadata.title[:metadata.title.find('[')-1]
date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))

View File

@ -1308,35 +1308,44 @@ class DeviceMixin(object): # {{{
def book_on_device(self, id, format=None, reset=False):
'''
Return an indication of whether the given book represented by its db id
is on the currently connected device. It returns a 4 element list. The
is on the currently connected device. It returns a 6 element list. The
first three elements represent memory locations main, carda, and cardb,
and are true if the book is identifiably in that memory. The fourth
is the a count of how many instances of the book were found across all
the memory locations.
is a count of how many instances of the book were found across all
the memory locations. The fifth is the type of match. The type can be
one of: None, 'uuid', 'db_id', 'metadata'. The sixth is a set of paths to the
matching books on the device.
'''
loc = [None, None, None, 0]
loc = [None, None, None, 0, None, set([])]
if reset:
self.book_db_title_cache = None
self.book_db_uuid_cache = None
self.book_db_id_counts = None
self.book_db_uuid_path_map = None
return
string_pat = re.compile('(?u)\W|[_]')
def clean_string(x):
x = x.lower() if x else ''
return string_pat.sub('', x)
if self.book_db_title_cache is None:
self.book_db_title_cache = []
self.book_db_uuid_cache = []
self.book_db_uuid_path_map = {}
self.book_db_id_counts = {}
for i, l in enumerate(self.booklists()):
self.book_db_title_cache.append({})
self.book_db_uuid_cache.append(set())
for book in l:
book_title = book.title.lower() if book.title else ''
book_title = re.sub('(?u)\W|[_]', '', book_title)
book_title = clean_string(book.title)
if book_title not in self.book_db_title_cache[i]:
self.book_db_title_cache[i][book_title] = \
{'authors':set(), 'db_ids':set(), 'uuids':set()}
book_authors = authors_to_string(book.authors).lower()
book_authors = re.sub('(?u)\W|[_]', '', book_authors)
{'authors':set(), 'db_ids':set(),
'uuids':set(), 'paths':set(),
'uuid_in_library':False}
book_authors = clean_string(authors_to_string(book.authors))
self.book_db_title_cache[i][book_title]['authors'].add(book_authors)
db_id = getattr(book, 'application_id', None)
if db_id is None:
@ -1350,32 +1359,39 @@ class DeviceMixin(object): # {{{
uuid = getattr(book, 'uuid', None)
if uuid is not None:
self.book_db_uuid_cache[i].add(uuid)
self.book_db_uuid_path_map[uuid] = book.path
if uuid in self.db_book_uuid_cache:
self.book_db_title_cache[i][book_title]\
['uuid_in_library'] = True
self.book_db_title_cache[i][book_title]['paths'].add(book.path)
mi = self.library_view.model().db.get_metadata(id, index_is_id=True)
for i, l in enumerate(self.booklists()):
if mi.uuid in self.book_db_uuid_cache[i]:
loc[i] = True
loc[4] = 'uuid'
loc[5].add(self.book_db_uuid_path_map[mi.uuid])
continue
db_title = re.sub('(?u)\W|[_]', '', mi.title.lower())
db_title = clean_string(mi.title)
cache = self.book_db_title_cache[i].get(db_title, None)
if cache:
if cache and not cache['uuid_in_library']:
if id in cache['db_ids']:
loc[i] = True
continue
if mi.authors and \
re.sub('(?u)\W|[_]', '', authors_to_string(mi.authors).lower()) \
in cache['authors']:
# If we get here, then two library books have the same title
# and author. This can happen, especially in the case of
# news. Mark a match and go on.
loc[i] = True
loc[4] = 'db_id'
loc[5] = cache['paths']
continue
# Also check author sort, because it can be used as author in
# some formats
if mi.author_sort and \
re.sub('(?u)\W|[_]', '', mi.author_sort.lower()) \
in cache['authors']:
if (mi.authors and clean_string(authors_to_string(mi.authors))
in cache['authors']) or (mi.author_sort and
clean_string(mi.author_sort) in cache['authors']):
# We really shouldn't get here, because set_books_in_library
# should have set the db_ids for the books, and therefore
# the if just above should have found them. Mark the book
# anyway, and print a message about the situation
loc[i] = True
loc[4] = 'metadata'
loc[5] = cache['paths']
continue
loc[3] = self.book_db_id_counts.get(id, 0)
return loc
@ -1387,31 +1403,34 @@ class DeviceMixin(object): # {{{
it sets the application_id for matched books. Book_on_device uses that
to both speed up matching and to count matches.
'''
string_pat = re.compile('(?u)\W|[_]')
def clean_string(x):
x = x.lower() if x else ''
return string_pat.sub('', x)
# Force a reset if the caches are not initialized
if reset or not hasattr(self, 'db_book_title_cache'):
# It might be possible to get here without having initialized the
# library view. In this case, simply give up
if not hasattr(self, 'library_view') or self.library_view is None:
return
db = getattr(self.library_view.model(), 'db', None)
if db is None:
try:
db = self.library_view.model().db
except:
return
# Build a cache (map) of the library, so the search isn't On**2
self.db_book_title_cache = {}
self.db_book_uuid_cache = {}
for id in db.data.iterallids():
mi = db.get_metadata(id, index_is_id=True)
title = re.sub('(?u)\W|[_]', '', mi.title.lower())
title = clean_string(mi.title)
if title not in self.db_book_title_cache:
self.db_book_title_cache[title] = \
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
if mi.authors:
authors = authors_to_string(mi.authors).lower()
authors = re.sub('(?u)\W|[_]', '', authors)
authors = clean_string(authors_to_string(mi.authors))
self.db_book_title_cache[title]['authors'][authors] = mi
if mi.author_sort:
aus = mi.author_sort.lower()
aus = re.sub('(?u)\W|[_]', '', aus)
aus = clean_string(mi.author_sort)
self.db_book_title_cache[title]['author_sort'][aus] = mi
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
self.db_book_uuid_cache[mi.uuid] = mi
@ -1437,8 +1456,7 @@ class DeviceMixin(object): # {{{
self.db_book_uuid_cache[book.uuid].application_id
continue
book_title = book.title.lower() if book.title else ''
book_title = re.sub('(?u)\W|[_]', '', book_title)
book_title = clean_string(book.title)
book.in_library = None
d = self.db_book_title_cache.get(book_title, None)
if d is not None:
@ -1460,8 +1478,7 @@ class DeviceMixin(object): # {{{
if book.authors:
# Compare against both author and author sort, because
# either can appear as the author
book_authors = authors_to_string(book.authors).lower()
book_authors = re.sub('(?u)\W|[_]', '', book_authors)
book_authors = clean_string(authors_to_string(book.authors))
if book_authors in d['authors']:
book.in_library = True
book.application_id = \

View File

@ -641,7 +641,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
count = 0
on = self.book_on_device(id)
if on is not None:
m, a, b, count = on
m, a, b, count = on[:4]
if m is not None:
loc.append(_('Main'))
if a is not None: