mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
6fa4ce8603
@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Lorenzo Vigentini, based on earlier version by Kovid Goyal'
|
__author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
description = 'Main daily newspaper from Spain - v1.02 (10, January 2010)'
|
description = 'Main daily newspaper from Spain - v1.03 (03, September 2010)'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -12,12 +12,12 @@ elpais.es
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ElPais(BasicNewsRecipe):
|
class ElPais(BasicNewsRecipe):
|
||||||
__author__ = 'Kovid Goyal & Lorenzo Vigentini'
|
__author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells'
|
||||||
description = 'Main daily newspaper from Spain'
|
description = 'Main daily newspaper from Spain'
|
||||||
|
|
||||||
cover_url = 'http://www.elpais.com/im/tit_logo_global.gif'
|
cover_url = 'http://www.elpais.com/im/tit_logo_global.gif'
|
||||||
title = u'El Pais'
|
title = u'El Pais'
|
||||||
publisher = 'Ediciones El Pais SL'
|
publisher = u'Ediciones El Pa\xeds SL'
|
||||||
category = 'News, politics, culture, economy, general interest'
|
category = 'News, politics, culture, economy, general interest'
|
||||||
|
|
||||||
language = 'es'
|
language = 'es'
|
||||||
@ -32,7 +32,8 @@ class ElPais(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','contenido_noticia','caja_despiece','presentacion']})]
|
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
p{style:normal size:12 serif}
|
p{style:normal size:12 serif}
|
||||||
|
|
||||||
@ -40,24 +41,28 @@ class ElPais(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
|
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
|
||||||
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada']}),
|
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
|
||||||
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas']})
|
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}),
|
||||||
|
dict(name='p', attrs={'class':'nav_meses'}),
|
||||||
|
dict(attrs={'class':['enlaces_m','miniaturas_m']})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
|
(u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
|
||||||
(u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'),
|
(u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'),
|
||||||
(u'Espana', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
|
(u'Espa\xf1a', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
|
||||||
(u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
|
(u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
|
||||||
(u'Economia', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
|
(u'Econom\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
|
||||||
(u'Politica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
|
(u'Pol\xedtica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
|
||||||
(u'Tecnologia', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
|
(u'Tecnolog\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
|
||||||
(u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
|
(u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
|
||||||
(u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
|
(u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
|
||||||
(u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
|
(u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
|
||||||
(u'Opinion', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
|
(u'Opini\xf3n', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
|
||||||
(u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
|
(u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
|
||||||
(u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'),
|
(u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'),
|
||||||
|
(u'Medio ambiente', u'http://www.elpais.com/rss/feed.html?feedId=17071'),
|
||||||
|
(u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058')
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
|
@ -2488,7 +2488,8 @@ class ITUNES(DriverBase):
|
|||||||
zf_opf.close()
|
zf_opf.close()
|
||||||
|
|
||||||
# If 'News' in tags, tweak the title/author for friendlier display in iBooks
|
# If 'News' in tags, tweak the title/author for friendlier display in iBooks
|
||||||
if _('News') or _('Catalog') in metadata.tags:
|
if _('News') in metadata.tags or \
|
||||||
|
_('Catalog') in metadata.tags:
|
||||||
if metadata.title.find('[') > 0:
|
if metadata.title.find('[') > 0:
|
||||||
metadata.title = metadata.title[:metadata.title.find('[')-1]
|
metadata.title = metadata.title[:metadata.title.find('[')-1]
|
||||||
date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))
|
date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))
|
||||||
|
@ -1308,35 +1308,44 @@ class DeviceMixin(object): # {{{
|
|||||||
def book_on_device(self, id, format=None, reset=False):
|
def book_on_device(self, id, format=None, reset=False):
|
||||||
'''
|
'''
|
||||||
Return an indication of whether the given book represented by its db id
|
Return an indication of whether the given book represented by its db id
|
||||||
is on the currently connected device. It returns a 4 element list. The
|
is on the currently connected device. It returns a 6 element list. The
|
||||||
first three elements represent memory locations main, carda, and cardb,
|
first three elements represent memory locations main, carda, and cardb,
|
||||||
and are true if the book is identifiably in that memory. The fourth
|
and are true if the book is identifiably in that memory. The fourth
|
||||||
is the a count of how many instances of the book were found across all
|
is a count of how many instances of the book were found across all
|
||||||
the memory locations.
|
the memory locations. The fifth is the type of match. The type can be
|
||||||
|
one of: None, 'uuid', 'db_id', 'metadata'. The sixth is a set of paths to the
|
||||||
|
matching books on the device.
|
||||||
'''
|
'''
|
||||||
loc = [None, None, None, 0]
|
loc = [None, None, None, 0, None, set([])]
|
||||||
|
|
||||||
if reset:
|
if reset:
|
||||||
self.book_db_title_cache = None
|
self.book_db_title_cache = None
|
||||||
self.book_db_uuid_cache = None
|
self.book_db_uuid_cache = None
|
||||||
self.book_db_id_counts = None
|
self.book_db_id_counts = None
|
||||||
|
self.book_db_uuid_path_map = None
|
||||||
return
|
return
|
||||||
|
|
||||||
|
string_pat = re.compile('(?u)\W|[_]')
|
||||||
|
def clean_string(x):
|
||||||
|
x = x.lower() if x else ''
|
||||||
|
return string_pat.sub('', x)
|
||||||
|
|
||||||
if self.book_db_title_cache is None:
|
if self.book_db_title_cache is None:
|
||||||
self.book_db_title_cache = []
|
self.book_db_title_cache = []
|
||||||
self.book_db_uuid_cache = []
|
self.book_db_uuid_cache = []
|
||||||
|
self.book_db_uuid_path_map = {}
|
||||||
self.book_db_id_counts = {}
|
self.book_db_id_counts = {}
|
||||||
for i, l in enumerate(self.booklists()):
|
for i, l in enumerate(self.booklists()):
|
||||||
self.book_db_title_cache.append({})
|
self.book_db_title_cache.append({})
|
||||||
self.book_db_uuid_cache.append(set())
|
self.book_db_uuid_cache.append(set())
|
||||||
for book in l:
|
for book in l:
|
||||||
book_title = book.title.lower() if book.title else ''
|
book_title = clean_string(book.title)
|
||||||
book_title = re.sub('(?u)\W|[_]', '', book_title)
|
|
||||||
if book_title not in self.book_db_title_cache[i]:
|
if book_title not in self.book_db_title_cache[i]:
|
||||||
self.book_db_title_cache[i][book_title] = \
|
self.book_db_title_cache[i][book_title] = \
|
||||||
{'authors':set(), 'db_ids':set(), 'uuids':set()}
|
{'authors':set(), 'db_ids':set(),
|
||||||
book_authors = authors_to_string(book.authors).lower()
|
'uuids':set(), 'paths':set(),
|
||||||
book_authors = re.sub('(?u)\W|[_]', '', book_authors)
|
'uuid_in_library':False}
|
||||||
|
book_authors = clean_string(authors_to_string(book.authors))
|
||||||
self.book_db_title_cache[i][book_title]['authors'].add(book_authors)
|
self.book_db_title_cache[i][book_title]['authors'].add(book_authors)
|
||||||
db_id = getattr(book, 'application_id', None)
|
db_id = getattr(book, 'application_id', None)
|
||||||
if db_id is None:
|
if db_id is None:
|
||||||
@ -1350,32 +1359,39 @@ class DeviceMixin(object): # {{{
|
|||||||
uuid = getattr(book, 'uuid', None)
|
uuid = getattr(book, 'uuid', None)
|
||||||
if uuid is not None:
|
if uuid is not None:
|
||||||
self.book_db_uuid_cache[i].add(uuid)
|
self.book_db_uuid_cache[i].add(uuid)
|
||||||
|
self.book_db_uuid_path_map[uuid] = book.path
|
||||||
|
if uuid in self.db_book_uuid_cache:
|
||||||
|
self.book_db_title_cache[i][book_title]\
|
||||||
|
['uuid_in_library'] = True
|
||||||
|
self.book_db_title_cache[i][book_title]['paths'].add(book.path)
|
||||||
|
|
||||||
mi = self.library_view.model().db.get_metadata(id, index_is_id=True)
|
mi = self.library_view.model().db.get_metadata(id, index_is_id=True)
|
||||||
for i, l in enumerate(self.booklists()):
|
for i, l in enumerate(self.booklists()):
|
||||||
if mi.uuid in self.book_db_uuid_cache[i]:
|
if mi.uuid in self.book_db_uuid_cache[i]:
|
||||||
loc[i] = True
|
loc[i] = True
|
||||||
|
loc[4] = 'uuid'
|
||||||
|
loc[5].add(self.book_db_uuid_path_map[mi.uuid])
|
||||||
continue
|
continue
|
||||||
db_title = re.sub('(?u)\W|[_]', '', mi.title.lower())
|
db_title = clean_string(mi.title)
|
||||||
cache = self.book_db_title_cache[i].get(db_title, None)
|
cache = self.book_db_title_cache[i].get(db_title, None)
|
||||||
if cache:
|
if cache and not cache['uuid_in_library']:
|
||||||
if id in cache['db_ids']:
|
if id in cache['db_ids']:
|
||||||
loc[i] = True
|
loc[i] = True
|
||||||
continue
|
loc[4] = 'db_id'
|
||||||
if mi.authors and \
|
loc[5] = cache['paths']
|
||||||
re.sub('(?u)\W|[_]', '', authors_to_string(mi.authors).lower()) \
|
|
||||||
in cache['authors']:
|
|
||||||
# If we get here, then two library books have the same title
|
|
||||||
# and author. This can happen, especially in the case of
|
|
||||||
# news. Mark a match and go on.
|
|
||||||
loc[i] = True
|
|
||||||
continue
|
continue
|
||||||
# Also check author sort, because it can be used as author in
|
# Also check author sort, because it can be used as author in
|
||||||
# some formats
|
# some formats
|
||||||
if mi.author_sort and \
|
if (mi.authors and clean_string(authors_to_string(mi.authors))
|
||||||
re.sub('(?u)\W|[_]', '', mi.author_sort.lower()) \
|
in cache['authors']) or (mi.author_sort and
|
||||||
in cache['authors']:
|
clean_string(mi.author_sort) in cache['authors']):
|
||||||
|
# We really shouldn't get here, because set_books_in_library
|
||||||
|
# should have set the db_ids for the books, and therefore
|
||||||
|
# the if just above should have found them. Mark the book
|
||||||
|
# anyway, and print a message about the situation
|
||||||
loc[i] = True
|
loc[i] = True
|
||||||
|
loc[4] = 'metadata'
|
||||||
|
loc[5] = cache['paths']
|
||||||
continue
|
continue
|
||||||
loc[3] = self.book_db_id_counts.get(id, 0)
|
loc[3] = self.book_db_id_counts.get(id, 0)
|
||||||
return loc
|
return loc
|
||||||
@ -1387,31 +1403,34 @@ class DeviceMixin(object): # {{{
|
|||||||
it sets the application_id for matched books. Book_on_device uses that
|
it sets the application_id for matched books. Book_on_device uses that
|
||||||
to both speed up matching and to count matches.
|
to both speed up matching and to count matches.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
string_pat = re.compile('(?u)\W|[_]')
|
||||||
|
def clean_string(x):
|
||||||
|
x = x.lower() if x else ''
|
||||||
|
return string_pat.sub('', x)
|
||||||
|
|
||||||
# Force a reset if the caches are not initialized
|
# Force a reset if the caches are not initialized
|
||||||
if reset or not hasattr(self, 'db_book_title_cache'):
|
if reset or not hasattr(self, 'db_book_title_cache'):
|
||||||
# It might be possible to get here without having initialized the
|
# It might be possible to get here without having initialized the
|
||||||
# library view. In this case, simply give up
|
# library view. In this case, simply give up
|
||||||
if not hasattr(self, 'library_view') or self.library_view is None:
|
try:
|
||||||
return
|
db = self.library_view.model().db
|
||||||
db = getattr(self.library_view.model(), 'db', None)
|
except:
|
||||||
if db is None:
|
|
||||||
return
|
return
|
||||||
# Build a cache (map) of the library, so the search isn't On**2
|
# Build a cache (map) of the library, so the search isn't On**2
|
||||||
self.db_book_title_cache = {}
|
self.db_book_title_cache = {}
|
||||||
self.db_book_uuid_cache = {}
|
self.db_book_uuid_cache = {}
|
||||||
for id in db.data.iterallids():
|
for id in db.data.iterallids():
|
||||||
mi = db.get_metadata(id, index_is_id=True)
|
mi = db.get_metadata(id, index_is_id=True)
|
||||||
title = re.sub('(?u)\W|[_]', '', mi.title.lower())
|
title = clean_string(mi.title)
|
||||||
if title not in self.db_book_title_cache:
|
if title not in self.db_book_title_cache:
|
||||||
self.db_book_title_cache[title] = \
|
self.db_book_title_cache[title] = \
|
||||||
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
|
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
|
||||||
if mi.authors:
|
if mi.authors:
|
||||||
authors = authors_to_string(mi.authors).lower()
|
authors = clean_string(authors_to_string(mi.authors))
|
||||||
authors = re.sub('(?u)\W|[_]', '', authors)
|
|
||||||
self.db_book_title_cache[title]['authors'][authors] = mi
|
self.db_book_title_cache[title]['authors'][authors] = mi
|
||||||
if mi.author_sort:
|
if mi.author_sort:
|
||||||
aus = mi.author_sort.lower()
|
aus = clean_string(mi.author_sort)
|
||||||
aus = re.sub('(?u)\W|[_]', '', aus)
|
|
||||||
self.db_book_title_cache[title]['author_sort'][aus] = mi
|
self.db_book_title_cache[title]['author_sort'][aus] = mi
|
||||||
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
|
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
|
||||||
self.db_book_uuid_cache[mi.uuid] = mi
|
self.db_book_uuid_cache[mi.uuid] = mi
|
||||||
@ -1437,8 +1456,7 @@ class DeviceMixin(object): # {{{
|
|||||||
self.db_book_uuid_cache[book.uuid].application_id
|
self.db_book_uuid_cache[book.uuid].application_id
|
||||||
continue
|
continue
|
||||||
|
|
||||||
book_title = book.title.lower() if book.title else ''
|
book_title = clean_string(book.title)
|
||||||
book_title = re.sub('(?u)\W|[_]', '', book_title)
|
|
||||||
book.in_library = None
|
book.in_library = None
|
||||||
d = self.db_book_title_cache.get(book_title, None)
|
d = self.db_book_title_cache.get(book_title, None)
|
||||||
if d is not None:
|
if d is not None:
|
||||||
@ -1460,8 +1478,7 @@ class DeviceMixin(object): # {{{
|
|||||||
if book.authors:
|
if book.authors:
|
||||||
# Compare against both author and author sort, because
|
# Compare against both author and author sort, because
|
||||||
# either can appear as the author
|
# either can appear as the author
|
||||||
book_authors = authors_to_string(book.authors).lower()
|
book_authors = clean_string(authors_to_string(book.authors))
|
||||||
book_authors = re.sub('(?u)\W|[_]', '', book_authors)
|
|
||||||
if book_authors in d['authors']:
|
if book_authors in d['authors']:
|
||||||
book.in_library = True
|
book.in_library = True
|
||||||
book.application_id = \
|
book.application_id = \
|
||||||
|
@ -641,7 +641,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
count = 0
|
count = 0
|
||||||
on = self.book_on_device(id)
|
on = self.book_on_device(id)
|
||||||
if on is not None:
|
if on is not None:
|
||||||
m, a, b, count = on
|
m, a, b, count = on[:4]
|
||||||
if m is not None:
|
if m is not None:
|
||||||
loc.append(_('Main'))
|
loc.append(_('Main'))
|
||||||
if a is not None:
|
if a is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user