mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
5b9ec88d3a
@ -1,35 +1,43 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Gerardo Diez'
|
||||
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
|
||||
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '5, January 2011 Gerardo Diez<gerardo.diez.garcia@gmail.com> & desUBIKado'
|
||||
__author__ = 'desUBIKado, based on an earlier version by Gerardo Diez'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '13, November 2011'
|
||||
|
||||
'''
|
||||
expansion.es
|
||||
[url]http://www.expansion.com/[/url]
|
||||
'''
|
||||
|
||||
import time
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class Publico(BasicNewsRecipe):
|
||||
title =u'Expansion.com'
|
||||
__author__ ='Gerardo Diez'
|
||||
publisher =u'Unidad Editorial Información Económica, S.L.'
|
||||
category ='finances, catalunya'
|
||||
oldest_article =1
|
||||
|
||||
class expansion_spanish(BasicNewsRecipe):
|
||||
__author__ ='Gerardo Diez & desUBIKado'
|
||||
description ='Financial news from Spain'
|
||||
title =u'Expansion'
|
||||
publisher =u'Unidad Editorial Internet, S.L.'
|
||||
category ='news, finances, Spain'
|
||||
oldest_article = 2
|
||||
simultaneous_downloads = 10
|
||||
max_articles_per_feed =100
|
||||
simultaneous_downloads =10
|
||||
cover_url =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
|
||||
timefmt ='[%A, %d %B, %Y]'
|
||||
encoding ='latin'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
encoding ='iso-8859-15'
|
||||
language ='es'
|
||||
remove_javascript =True
|
||||
no_stylesheets =True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
|
||||
keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']})
|
||||
|
||||
remove_tags =[
|
||||
dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
|
||||
dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
|
||||
dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto','tit_relacionadas','interact','paginacion estirar','sumario derecha']}),
|
||||
dict(name='ul', attrs={'class':['bolos_desarrollo_noticia','not_logged']}),
|
||||
dict(name='span', attrs={'class':['comentarios']}),
|
||||
dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
|
||||
dict(name='div', attrs={'id':['comentarios_lectores_listado']})
|
||||
dict(name='div', attrs={'id':['comentarios_lectores_listado','comentar']})
|
||||
]
|
||||
feeds =[
|
||||
(u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
|
||||
@ -38,42 +46,112 @@ class Publico(BasicNewsRecipe):
|
||||
(u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
|
||||
(u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
|
||||
(u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
|
||||
|
||||
(u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
|
||||
(u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
|
||||
(u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
|
||||
(u'Cr\xe9ditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
|
||||
(u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
|
||||
(u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
|
||||
(u'Fondos de Inversi\xf3n', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
|
||||
(u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
|
||||
|
||||
(u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
|
||||
(u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
|
||||
(u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
|
||||
(u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
|
||||
(u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
|
||||
(u'Energ\xeda', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
|
||||
(u'Inmobiliario y Construcci\xf3n', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
|
||||
(u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
|
||||
(u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
|
||||
(u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
|
||||
(u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
|
||||
(u'Automoci\xf3n e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
|
||||
(u'Distribuci\xf3n', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
|
||||
(u'Deporte y Negocio', u' [url]http://estaticos.expansion.com/rss/empresasdeporte.xml[/url]'),
|
||||
(u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
|
||||
(u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
|
||||
(u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
|
||||
|
||||
(u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
|
||||
(u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
|
||||
(u'Portada: Econom\xeda y Pol\xedtica', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
|
||||
(u'Pol\xedtica', u'http://estaticos.expansion.com/rss/economia.xml'),
|
||||
(u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
|
||||
|
||||
(u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
|
||||
(u'Portada: Opini\xf3n', u'http://estaticos.expansion.com/rss/opinion.xml'),
|
||||
(u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
|
||||
(u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
|
||||
|
||||
(u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
|
||||
(u'Portada: Jur\xeddico', u'http://estaticos.expansion.com/rss/juridico.xml'),
|
||||
(u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
|
||||
(u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
|
||||
(u'Opini\xf3n', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
|
||||
(u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
|
||||
|
||||
(u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
|
||||
(u'Cataluña', u'http://estaticos.expansion.com/rss/catalunya.xml'),
|
||||
(u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
|
||||
(u'Catalu\xf1a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
|
||||
(u'Funci\xf3n p\xfablica', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
|
||||
]
|
||||
|
||||
# Obtener la imagen de portada
|
||||
|
||||
def get_cover_url(self):
|
||||
cover = None
|
||||
st = time.localtime()
|
||||
year = str(st.tm_year)
|
||||
month = "%.2d" % st.tm_mon
|
||||
day = "%.2d" % st.tm_mday
|
||||
#[url]http://img5.kiosko.net/2011/11/14/es/expansion.750.jpg[/url]
|
||||
cover='http://img5.kiosko.net/'+ year + '/' + month + '/' + day +'/es/expansion.750.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
self.log("\nPortada no disponible")
|
||||
cover ='http://www.aproahp.org/enlaces/images/diario_expansion.gif'
|
||||
return cover
|
||||
|
||||
|
||||
|
||||
# Para que no salte la publicidad al recuperar la noticia, y que siempre se recupere
|
||||
# la página web, mando la variable "t" con la hora "linux" o "epoch" actual
|
||||
# haciendole creer al sitio web que justo se acaba de ver la publicidad
|
||||
|
||||
def print_version(self, url):
|
||||
st = time.time()
|
||||
segundos = str(int(st))
|
||||
parametros = '.html?t=' + segundos
|
||||
return url.replace('.html', parametros)
|
||||
|
||||
|
||||
|
||||
_processed_links = []
|
||||
|
||||
def get_article_url(self, article):
|
||||
|
||||
# Para obtener la url original del artículo a partir de la de "feedsportal"
|
||||
|
||||
link = article.get('link', None)
|
||||
if link is None:
|
||||
return article
|
||||
if link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A']
|
||||
b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
|
||||
for i in range(0,len(a)):
|
||||
link=link.replace(a[i],b[i])
|
||||
link="http://"+link
|
||||
|
||||
# Eliminar artículos duplicados en otros feeds
|
||||
|
||||
if not (link in self._processed_links):
|
||||
self._processed_links.append(link)
|
||||
else:
|
||||
link = None
|
||||
|
||||
return link
|
||||
|
||||
|
||||
|
||||
# Un poco de css para mejorar la presentación de las noticias
|
||||
|
||||
extra_css = '''
|
||||
.entradilla {font-family:Arial,Helvetica,sans-serif; font-weight:bold; font-style:italic; font-size:16px;}
|
||||
.fecha_publicacion,.autor {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
|
||||
'''
|
||||
|
||||
|
||||
|
||||
# Para presentar la imagen de los videos incrustados
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
|
||||
(re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
|
||||
(re.compile(r'var id_reproductor', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
|
||||
]
|
||||
|
@ -11,17 +11,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Salon_com(BasicNewsRecipe):
|
||||
title = 'Salon.com'
|
||||
__author__ = 'cix3'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Salon.com - Breaking news, opinion, politics, entertainment, sports and culture.'
|
||||
timefmt = ' [%b %d, %Y]'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':['ad_content', 'clearfix']}), dict(name='hr'), dict(name='img')]
|
||||
|
||||
remove_tags_before = dict(name='h2')
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//div[@class="art"]'
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
('News & Politics', 'http://feeds.salon.com/salon/news'),
|
||||
@ -40,5 +39,5 @@ class Salon_com(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/index.html', '/print.html')
|
||||
return url + '/print/'
|
||||
|
||||
|
@ -38,9 +38,12 @@
|
||||
<hr class="cbj_kindle_banner_hr" />
|
||||
<!--
|
||||
In addition you can add code to show the values of custom columns here.
|
||||
The value is available as _column_name and the title as _column_name_label.
|
||||
For example, if you have a custom column with label #genre, you can add it to
|
||||
this template with _genre_label and _genre. Note that the # is replaced by an underscore.
|
||||
The value is available as _column_name and the title as
|
||||
_column_name_label. For example, if you have a custom column with
|
||||
label #genre, you can add it to this template with _genre_label and
|
||||
_genre. Note that the # is replaced by an underscore. For example
|
||||
|
||||
<div><b>{_genre_label}:</b> {_genre}</div>
|
||||
-->
|
||||
|
||||
<div class="cbj_comments">{comments}</div>
|
||||
|
@ -217,6 +217,7 @@ class ITUNES(DriverBase):
|
||||
# 0x1297 iPhone 4
|
||||
# 0x129a iPad
|
||||
# 0x129f iPad2 (WiFi)
|
||||
# 0x12a0 iPhone 4S
|
||||
# 0x12a2 iPad2 (GSM)
|
||||
# 0x12a3 iPad2 (CDMA)
|
||||
VENDOR_ID = [0x05ac]
|
||||
@ -1305,6 +1306,8 @@ class ITUNES(DriverBase):
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._add_new_copy()")
|
||||
|
||||
self._update_epub_metadata(fpath, metadata)
|
||||
|
||||
db_added = None
|
||||
lb_added = None
|
||||
|
||||
@ -1409,10 +1412,16 @@ class ITUNES(DriverBase):
|
||||
tmp_cover.write(cover_data)
|
||||
|
||||
if lb_added:
|
||||
if lb_added.Artwork.Count:
|
||||
lb_added.Artwork.Item(1).SetArtworkFromFile(tc)
|
||||
else:
|
||||
lb_added.AddArtworkFromFile(tc)
|
||||
try:
|
||||
if lb_added.Artwork.Count:
|
||||
lb_added.Artwork.Item(1).SetArtworkFromFile(tc)
|
||||
else:
|
||||
lb_added.AddArtworkFromFile(tc)
|
||||
except:
|
||||
if DEBUG:
|
||||
self.log.warning(" iTunes automation interface reported an error"
|
||||
" when adding artwork to '%s' in the iTunes Library" % metadata.title)
|
||||
pass
|
||||
|
||||
if db_added:
|
||||
if db_added.Artwork.Count:
|
||||
@ -2663,6 +2672,7 @@ class ITUNES(DriverBase):
|
||||
metadata.timestamp = now()
|
||||
if DEBUG:
|
||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||
|
||||
else:
|
||||
metadata.timestamp = now()
|
||||
if DEBUG:
|
||||
@ -2699,7 +2709,7 @@ class ITUNES(DriverBase):
|
||||
if iswindows and metadata.series:
|
||||
metadata.tags = None
|
||||
|
||||
set_metadata(zfo, metadata, update_timestamp=True)
|
||||
set_metadata(zfo, metadata, apply_null=True, update_timestamp=True)
|
||||
|
||||
def _update_device(self, msg='', wait=True):
|
||||
'''
|
||||
@ -2771,6 +2781,8 @@ class ITUNES(DriverBase):
|
||||
lb_added.sort_name.set(metadata_x.title_sort)
|
||||
|
||||
if db_added:
|
||||
self.log.warning(" waiting for db_added to become writeable ")
|
||||
time.sleep(1.0)
|
||||
db_added.name.set(metadata_x.title)
|
||||
db_added.album.set(metadata_x.title)
|
||||
db_added.artist.set(authors_to_string(metadata_x.authors))
|
||||
|
@ -33,7 +33,7 @@ class IREXDR1000(USBMS):
|
||||
|
||||
MAIN_MEMORY_VOLUME_LABEL = 'IRex Digital Reader 1000 Main Memory'
|
||||
|
||||
EBOOK_DIR_MAIN = 'ebooks'
|
||||
EBOOK_DIR_MAIN = ''
|
||||
DELETE_EXTS = ['.mbp']
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
@ -44,7 +44,7 @@ class IREXDR800(IREXDR1000):
|
||||
WINDOWS_MAIN_MEM = 'DR800'
|
||||
FORMATS = ['epub', 'pdb', 'html', 'pdf', 'txt']
|
||||
|
||||
EBOOK_DIR_MAIN = 'Books'
|
||||
EBOOK_DIR_MAIN = ''
|
||||
DELETE_EXTS = []
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
|
@ -710,7 +710,8 @@ class Metadata(object):
|
||||
fmt('Title sort', self.title_sort)
|
||||
if self.authors:
|
||||
fmt('Author(s)', authors_to_string(self.authors) + \
|
||||
((' [' + self.author_sort + ']') if self.author_sort else ''))
|
||||
((' [' + self.author_sort + ']')
|
||||
if self.author_sort and self.author_sort != _('Unknown') else ''))
|
||||
if self.publisher:
|
||||
fmt('Publisher', self.publisher)
|
||||
if getattr(self, 'book_producer', False):
|
||||
|
@ -55,6 +55,7 @@ class Ozon(Source):
|
||||
# div_book -> search only books, ebooks and audio books
|
||||
search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
|
||||
|
||||
# for ozon.ru search we have to format ISBN with '-'
|
||||
isbn = _format_isbn(log, identifiers.get('isbn', None))
|
||||
# TODO: format isbn!
|
||||
qItems = set([isbn, title])
|
||||
@ -64,7 +65,7 @@ class Ozon(Source):
|
||||
qItems.discard('')
|
||||
qItems = map(_quoteString, qItems)
|
||||
|
||||
q = ' '.join(qItems).strip()
|
||||
q = u' '.join(qItems).strip()
|
||||
log.info(u'search string: ' + q)
|
||||
|
||||
if isinstance(q, unicode):
|
||||
@ -78,13 +79,13 @@ class Ozon(Source):
|
||||
return search_url
|
||||
# }}}
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||
identifiers={}, timeout=30): # {{{
|
||||
if not self.is_configured():
|
||||
return
|
||||
query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
|
||||
if not query:
|
||||
err = 'Insufficient metadata to construct query'
|
||||
err = u'Insufficient metadata to construct query'
|
||||
log.error(err)
|
||||
return err
|
||||
|
||||
@ -109,15 +110,15 @@ class Ozon(Source):
|
||||
# }}}
|
||||
|
||||
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
||||
# some book titles have extra charactes like this
|
||||
# some book titles have extra characters like this
|
||||
# TODO: make a twick
|
||||
reRemoveFromTitle = None
|
||||
reRemoveFromTitle = None
|
||||
#reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
||||
|
||||
|
||||
title = unicode(title).upper() if title else ''
|
||||
if reRemoveFromTitle:
|
||||
title = reRemoveFromTitle.sub('', title)
|
||||
authors = map(_normalizeAuthorNameWithInitials,
|
||||
title = reRemoveFromTitle.sub('', title)
|
||||
authors = map(_normalizeAuthorNameWithInitials,
|
||||
map(unicode.upper, map(unicode, authors))) if authors else None
|
||||
ozon_id = identifiers.get('ozon', None)
|
||||
|
||||
@ -160,7 +161,7 @@ class Ozon(Source):
|
||||
mi.source_relevance = i
|
||||
if ensure_metadata_match(mi):
|
||||
metadata.append(mi)
|
||||
# log.debug(u'added metadata %s %s. '%(mi.title, mi.authors))
|
||||
#log.debug(u'added metadata %s %s.'%(mi.title, mi.authors))
|
||||
else:
|
||||
log.debug(u'skipped metadata %s %s. (does not match the query)'%(mi.title, mi.authors))
|
||||
return metadata
|
||||
@ -285,12 +286,12 @@ class Ozon(Source):
|
||||
url = self.get_book_url(metadata.get_identifiers())[2]
|
||||
|
||||
raw = self.browser.open_novisit(url, timeout=timeout).read()
|
||||
doc = html.fromstring(raw)
|
||||
doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
|
||||
|
||||
xpt_prod_det_at = u'string(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "%s")]/a[1]/@title)'
|
||||
xpt_prod_det_tx = u'substring-after(//div[contains(@class, "product-detail")]//text()[contains(., "%s")], ":")'
|
||||
|
||||
# series
|
||||
# series Серия/Серии
|
||||
xpt = xpt_prod_det_at % u'Сери'
|
||||
# % u'Серия:'
|
||||
series = doc.xpath(xpt)
|
||||
@ -300,7 +301,7 @@ class Ozon(Source):
|
||||
xpt = u'normalize-space(substring-after(//meta[@name="description"]/@content, "ISBN"))'
|
||||
isbn_str = doc.xpath(xpt)
|
||||
if isbn_str:
|
||||
all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if check_isbn(isbn)]
|
||||
all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if _verifyISBNIntegrity(log, isbn)]
|
||||
if all_isbns:
|
||||
metadata.all_isbns = all_isbns
|
||||
metadata.isbn = all_isbns[0]
|
||||
@ -319,7 +320,7 @@ class Ozon(Source):
|
||||
displ_lang = lng_splt[0].strip()
|
||||
metadata.language = _translageLanguageToCode(displ_lang)
|
||||
#log.debug(u'language: %s'%displ_lang)
|
||||
|
||||
|
||||
# can be set before from xml search responce
|
||||
if not metadata.pubdate:
|
||||
xpt = u'normalize-space(//div[@class="product-misc"]//text()[contains(., "г.")])'
|
||||
@ -333,10 +334,10 @@ class Ozon(Source):
|
||||
xpt = u'//table[@id="detail_description"]//tr/td'
|
||||
comment_elem = doc.xpath(xpt)
|
||||
if comment_elem:
|
||||
comments = unicode(etree.tostring(comment_elem[0]))
|
||||
comments = unicode(etree.tostring(comment_elem[0], encoding=unicode))
|
||||
if comments:
|
||||
# cleanup root tag, TODO: remove tags like object/embeded
|
||||
comments = re.sub(r'\A.*?<td.*?>|</td>.*\Z', u'', comments.strip(), re.MULTILINE).strip()
|
||||
comments = re.sub(ur'\A.*?<td.*?>|</td>.*\Z', u'', comments.strip(), re.MULTILINE).strip()
|
||||
if comments and (not metadata.comments or len(comments) > len(metadata.comments)):
|
||||
metadata.comments = comments
|
||||
else:
|
||||
@ -345,8 +346,16 @@ class Ozon(Source):
|
||||
log.debug('No book description found in HTML')
|
||||
# }}}
|
||||
|
||||
def _quoteString(str): # {{{
|
||||
return '"' + str + '"' if str and str.find(' ') != -1 else str
|
||||
def _quoteString(strToQuote): # {{{
|
||||
return '"' + strToQuote + '"' if strToQuote and strToQuote.find(' ') != -1 else strToQuote
|
||||
# }}}
|
||||
|
||||
def _verifyISBNIntegrity(log, isbn): # {{{
|
||||
# Online ISBN-Check http://www.isbn-check.de/
|
||||
res = check_isbn(isbn)
|
||||
if not res:
|
||||
log.error(u'ISBN integrity check failed for "%s"'%isbn)
|
||||
return res is not None
|
||||
# }}}
|
||||
|
||||
# TODO: make customizable
|
||||
@ -425,20 +434,20 @@ def _translageLanguageToCode(displayLang): # {{{
|
||||
# [В.П. Колесников | Колесников В.П.]-> В. П. BКолесников
|
||||
def _normalizeAuthorNameWithInitials(name): # {{{
|
||||
res = name
|
||||
if name:
|
||||
re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||
if name:
|
||||
re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||
re2 = u'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
||||
matcher = re.match(re1, unicode(name), re.UNICODE)
|
||||
if not matcher:
|
||||
matcher = re.match(re2, unicode(name), re.UNICODE)
|
||||
|
||||
|
||||
if matcher:
|
||||
d = matcher.groupdict()
|
||||
res = ' '.join(x for x in (d['fname'], d['mname'], d['lname']) if x)
|
||||
return res
|
||||
# }}}
|
||||
|
||||
def toPubdate(log, yearAsString):
|
||||
def toPubdate(log, yearAsString): # {{{
|
||||
res = None
|
||||
if yearAsString:
|
||||
try:
|
||||
@ -448,7 +457,11 @@ def toPubdate(log, yearAsString):
|
||||
except:
|
||||
log.error('cannot parse to date %s'%yearAsString)
|
||||
return res
|
||||
# }}}
|
||||
|
||||
def _listToUnicodePrintStr(lst): # {{{
|
||||
return u'[' + u', '.join(unicode(x) for x in lst) + u']'
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__': # tests {{{
|
||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/ozon.py
|
||||
|
@ -302,7 +302,19 @@ class MobiWriter(object):
|
||||
|
||||
def generate_record0(self): # MOBI header {{{
|
||||
metadata = self.oeb.metadata
|
||||
exth = self.build_exth()
|
||||
bt = 0x002
|
||||
if self.primary_index_record_idx is not None:
|
||||
if False and self.indexer.is_flat_periodical:
|
||||
# Disabled as setting this to 0x102 causes the Kindle to not
|
||||
# auto archive the issues
|
||||
bt = 0x102
|
||||
elif self.indexer.is_periodical:
|
||||
# If you change this, remember to change the cdetype in the EXTH
|
||||
# header as well
|
||||
bt = {'newspaper':0x101}.get(self.publication_type, 0x103)
|
||||
|
||||
|
||||
exth = self.build_exth(bt)
|
||||
first_image_record = None
|
||||
if self.image_records:
|
||||
first_image_record = len(self.records)
|
||||
@ -351,17 +363,6 @@ class MobiWriter(object):
|
||||
# 0x10 - 0x13 : UID
|
||||
# 0x14 - 0x17 : Generator version
|
||||
|
||||
bt = 0x002
|
||||
if self.primary_index_record_idx is not None:
|
||||
if False and self.indexer.is_flat_periodical:
|
||||
# Disabled as setting this to 0x102 causes the Kindle to not
|
||||
# auto archive the issues
|
||||
bt = 0x102
|
||||
elif self.indexer.is_periodical:
|
||||
# If you change this, remember to change the cdetype in the EXTH
|
||||
# header as well
|
||||
bt = {'newspaper':0x101}.get(self.publication_type, 0x103)
|
||||
|
||||
record0.write(pack(b'>IIIII',
|
||||
0xe8, bt, 65001, uid, 6))
|
||||
|
||||
@ -479,7 +480,7 @@ class MobiWriter(object):
|
||||
self.records[0] = align_block(record0)
|
||||
# }}}
|
||||
|
||||
def build_exth(self): # EXTH Header {{{
|
||||
def build_exth(self, mobi_doctype): # EXTH Header {{{
|
||||
oeb = self.oeb
|
||||
exth = StringIO()
|
||||
nrecs = 0
|
||||
@ -535,16 +536,17 @@ class MobiWriter(object):
|
||||
nrecs += 1
|
||||
|
||||
# Write cdetype
|
||||
if not self.is_periodical and not self.opts.share_not_sync:
|
||||
exth.write(pack(b'>II', 501, 12))
|
||||
exth.write(b'EBOK')
|
||||
nrecs += 1
|
||||
if not self.is_periodical:
|
||||
if not self.opts.share_not_sync:
|
||||
exth.write(pack(b'>II', 501, 12))
|
||||
exth.write(b'EBOK')
|
||||
nrecs += 1
|
||||
else:
|
||||
# Should be b'NWPR' for doc type of 0x101 and b'MAGZ' for doctype
|
||||
# of 0x103 but the old writer didn't write them, and I dont know
|
||||
# what it should be for type 0x102 (b'BLOG'?) so write nothing
|
||||
# instead
|
||||
pass
|
||||
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
|
||||
if ids:
|
||||
exth.write(pack(b'>II', 501, 12))
|
||||
exth.write(ids)
|
||||
nrecs += 1
|
||||
|
||||
# Add a publication date entry
|
||||
if oeb.metadata['date']:
|
||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML
|
||||
from calibre.library.comments import comments_to_html
|
||||
from calibre.utils.date import is_date_undefined
|
||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||
|
||||
JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
|
||||
|
||||
@ -175,15 +176,20 @@ def render_jacket(mi, output_profile,
|
||||
try:
|
||||
display_name, val = mi.format_field_extended(key)[:2]
|
||||
key = key.replace('#', '_')
|
||||
args[key] = val
|
||||
args[key+'_label'] = display_name
|
||||
args[key] = escape(val)
|
||||
args[key+'_label'] = escape(display_name)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Used in the comment describing use of custom columns in templates
|
||||
args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
|
||||
args['_genre'] = args.get('_genre', '{_genre}')
|
||||
|
||||
generated_html = P('jacket/template.xhtml',
|
||||
data=True).decode('utf-8').format(**args)
|
||||
|
||||
# Post-process the generated html to strip out empty header items
|
||||
|
||||
soup = BeautifulSoup(generated_html)
|
||||
if not series:
|
||||
series_tag = soup.find(attrs={'class':'cbj_series'})
|
||||
@ -206,7 +212,8 @@ def render_jacket(mi, output_profile,
|
||||
if hr_tag is not None:
|
||||
hr_tag.extract()
|
||||
|
||||
return soup.renderContents(None)
|
||||
return strip_encoding_declarations(
|
||||
soup.renderContents('utf-8').decode('utf-8'))
|
||||
|
||||
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
||||
|
||||
|
@ -77,7 +77,8 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
result = False
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read())
|
||||
raw = xml_to_unicode(f.read(), verbose=True)[0]
|
||||
doc = html.fromstring(raw)
|
||||
|
||||
# example where we are going to find formats
|
||||
# <div class="l">
|
||||
@ -88,7 +89,7 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
|
||||
# <div class="l">
|
||||
# <p>.epub, .fb2.zip, .pdf</p>
|
||||
# </div>
|
||||
xpt = u'normalize-space(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
|
||||
xpt = u'normalize-space(//div[contains(@id, "saleBlock")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
|
||||
formats = doc.xpath(xpt)
|
||||
if formats:
|
||||
result = True
|
||||
|
@ -12539,7 +12539,7 @@ msgstr "За&грузить метаданные"
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:226
|
||||
msgid "Configure download metadata"
|
||||
msgstr ""
|
||||
msgstr "Настроить загрузку метаданных"
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:230
|
||||
msgid "Change how calibre downloads metadata"
|
||||
@ -12595,7 +12595,7 @@ msgstr "&Пользовательские метаданные"
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:788
|
||||
msgid "&Comments"
|
||||
msgstr "Комментарии"
|
||||
msgstr "&Комментарии"
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:854
|
||||
msgid "Basic metadata"
|
||||
@ -12603,11 +12603,11 @@ msgstr "Основные метаданные"
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:133
|
||||
msgid "Has cover"
|
||||
msgstr "Есть обложка"
|
||||
msgstr "Обложка"
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:133
|
||||
msgid "Has summary"
|
||||
msgstr ""
|
||||
msgstr "Аннотация"
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:190
|
||||
msgid ""
|
||||
@ -12619,7 +12619,7 @@ msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:268
|
||||
msgid "See at"
|
||||
msgstr ""
|
||||
msgstr "Посмотреть на"
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:403
|
||||
msgid "calibre is downloading metadata from: "
|
||||
|
Loading…
x
Reference in New Issue
Block a user