mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
0.8.27+, GwR patches for 875726, 892468
This commit is contained in:
commit
dd8960eeac
@ -1,11 +1,11 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
|
||||||
|
|
||||||
class AdvancedUserRecipe(BasicNewsRecipe):
|
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'heise online'
|
title = 'Heise-online'
|
||||||
description = 'News vom Heise-Verlag'
|
description = 'News vom Heise-Verlag'
|
||||||
__author__ = 'schuster'
|
__author__ = 'schuster'
|
||||||
|
masthead_url = 'http://www.heise.de/icons/ho/heise_online_logo.gif'
|
||||||
|
publisher = 'Heise Zeitschriften Verlag GmbH & Co. KG'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'de'
|
language = 'de'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
@ -14,11 +14,10 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
timeout = 5
|
timeout = 5
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
|
|
||||||
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
|
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
|
||||||
remove_tags = [{'class':'navi_top_container'},
|
remove_tags = [dict(id='navi_top_container'),
|
||||||
dict(id='navi_bottom'),
|
dict(id='navi_bottom'),
|
||||||
dict(id='mitte_rechts'),
|
dict(id='mitte_rechts'),
|
||||||
dict(id='navigation'),
|
dict(id='navigation'),
|
||||||
@ -29,27 +28,31 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
|||||||
dict(id='seiten_navi'),
|
dict(id='seiten_navi'),
|
||||||
dict(id='adbottom'),
|
dict(id='adbottom'),
|
||||||
dict(id='sitemap'),
|
dict(id='sitemap'),
|
||||||
dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
|
dict(name='div', attrs={'id':'sitemap'}),
|
||||||
]
|
dict(name='ul', attrs={'class':'erste_zeile'}),
|
||||||
|
dict(name='ul', attrs={'class':'zweite_zeile'}),
|
||||||
|
dict(name='div', attrs={'class':'navi_top_container'})]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
|
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
|
||||||
('iX', 'http://www.heise.de/ix/news/news.rdf'),
|
('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
|
||||||
('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
|
|
||||||
('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
|
|
||||||
('Security', 'http://www.heise.de/security/news/news-atom.xml'),
|
|
||||||
('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
|
|
||||||
('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
|
|
||||||
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
|
|
||||||
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
|
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
|
||||||
('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
|
('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
|
||||||
('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
|
('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
|
||||||
|
('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
|
||||||
|
('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
|
||||||
|
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
|
||||||
|
('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
|
||||||
|
('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
|
||||||
|
('iX', 'http://www.heise.de/ix/news/news.rdf'),
|
||||||
|
('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
|
||||||
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
|
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
|
||||||
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
|
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
|
||||||
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
|
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
|
||||||
('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
|
('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
|
||||||
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
|
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')]
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?view=print'
|
return url + '?view=print'
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,7 +5,6 @@ www.theweek.com
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
|
||||||
|
|
||||||
class TheWeek(BasicNewsRecipe):
|
class TheWeek(BasicNewsRecipe):
|
||||||
title = 'The Week Magazine'
|
title = 'The Week Magazine'
|
||||||
@ -21,23 +20,7 @@ class TheWeek(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
preprocess_regexps = [(re.compile(r'<h3><a href=.*</body>', re.DOTALL), lambda match: '</body>')]
|
auto_cleanup = True
|
||||||
remove_tags_before = dict(name='h1')
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'articleSubscribe4free'})
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class':['floatLeft','imageCaption','slideshowImageAttribution','postDate','utilities','cartoonInfo','left','middle','col300','articleSubscribe4free',' articleFlyout','articleFlyout floatRight','fourFreeBar']})
|
|
||||||
,dict(name='div', attrs={'id':['cartoonThumbs','rightColumn','header','partners']})
|
|
||||||
,dict(name='ul', attrs={'class':['slideshowNav','hotTopicsList topicList']})
|
|
||||||
]
|
|
||||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
|
||||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
|
||||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
|
||||||
p {font-family:Arial,Helvetica,sans-serif;}
|
|
||||||
'''
|
|
||||||
filter_regexps = [r'www\.palmcoastdata\.com']
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News-Opinion', u'http://theweek.com/section/index/news_opinion.rss'),
|
(u'News-Opinion', u'http://theweek.com/section/index/news_opinion.rss'),
|
||||||
(u'Business', u'http://theweek.com/section/index/business.rss'),
|
(u'Business', u'http://theweek.com/section/index/business.rss'),
|
||||||
|
@ -38,10 +38,12 @@
|
|||||||
<hr class="cbj_kindle_banner_hr" />
|
<hr class="cbj_kindle_banner_hr" />
|
||||||
<!--
|
<!--
|
||||||
In addition you can add code to show the values of custom columns here.
|
In addition you can add code to show the values of custom columns here.
|
||||||
The value is available as _column_name and the title as _column_name_label.
|
The value is available as _column_name and the title as
|
||||||
For example, if you have a custom column with label #genre, you can add it to
|
_column_name_label. For example, if you have a custom column with
|
||||||
this template with:
|
label #genre, you can add it to this template with _genre_label and
|
||||||
<div>{_genre_label}: {_genre}</div>
|
_genre. Note that the # is replaced by an underscore. For example
|
||||||
|
|
||||||
|
<div><b>{_genre_label}:</b> {_genre}</div>
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<div class="cbj_comments">{comments}</div>
|
<div class="cbj_comments">{comments}</div>
|
||||||
|
@ -33,7 +33,7 @@ class IREXDR1000(USBMS):
|
|||||||
|
|
||||||
MAIN_MEMORY_VOLUME_LABEL = 'IRex Digital Reader 1000 Main Memory'
|
MAIN_MEMORY_VOLUME_LABEL = 'IRex Digital Reader 1000 Main Memory'
|
||||||
|
|
||||||
EBOOK_DIR_MAIN = 'ebooks'
|
EBOOK_DIR_MAIN = ''
|
||||||
DELETE_EXTS = ['.mbp']
|
DELETE_EXTS = ['.mbp']
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
@ -44,7 +44,7 @@ class IREXDR800(IREXDR1000):
|
|||||||
WINDOWS_MAIN_MEM = 'DR800'
|
WINDOWS_MAIN_MEM = 'DR800'
|
||||||
FORMATS = ['epub', 'pdb', 'html', 'pdf', 'txt']
|
FORMATS = ['epub', 'pdb', 'html', 'pdf', 'txt']
|
||||||
|
|
||||||
EBOOK_DIR_MAIN = 'Books'
|
EBOOK_DIR_MAIN = ''
|
||||||
DELETE_EXTS = []
|
DELETE_EXTS = []
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
@ -388,13 +388,9 @@ class KINDLE_FIRE(KINDLE2):
|
|||||||
|
|
||||||
EBOOK_DIR_MAIN = 'Documents'
|
EBOOK_DIR_MAIN = 'Documents'
|
||||||
SUPPORTS_SUB_DIRS = False
|
SUPPORTS_SUB_DIRS = False
|
||||||
|
SCAN_FROM_ROOT = True
|
||||||
|
SUPPORTS_SUB_DIRS_FOR_SCAN = True
|
||||||
VENDOR_NAME = 'AMAZON'
|
VENDOR_NAME = 'AMAZON'
|
||||||
WINDOWS_MAIN_MEM = 'KINDLE'
|
WINDOWS_MAIN_MEM = 'KINDLE'
|
||||||
|
|
||||||
def get_main_ebook_dir(self, for_upload=False):
|
|
||||||
if for_upload:
|
|
||||||
return self.EBOOK_DIR_MAIN
|
|
||||||
return ''
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ class NOOK(USBMS):
|
|||||||
return [x.replace('#', '_') for x in components]
|
return [x.replace('#', '_') for x in components]
|
||||||
|
|
||||||
class NOOK_COLOR(NOOK):
|
class NOOK_COLOR(NOOK):
|
||||||
description = _('Communicate with the Nook Color and TSR eBook readers.')
|
description = _('Communicate with the Nook Color, TSR and Tablet eBook readers.')
|
||||||
|
|
||||||
PRODUCT_ID = [0x002, 0x003, 0x004]
|
PRODUCT_ID = [0x002, 0x003, 0x004]
|
||||||
BCD = [0x216]
|
BCD = [0x216]
|
||||||
|
@ -28,6 +28,8 @@ class DeviceConfig(object):
|
|||||||
EXTRA_CUSTOMIZATION_DEFAULT = None
|
EXTRA_CUSTOMIZATION_DEFAULT = None
|
||||||
|
|
||||||
SUPPORTS_SUB_DIRS = False
|
SUPPORTS_SUB_DIRS = False
|
||||||
|
SUPPORTS_SUB_DIRS_FOR_SCAN = False # This setting is used when scanning for
|
||||||
|
# books when SUPPORTS_SUB_DIRS is False
|
||||||
MUST_READ_METADATA = False
|
MUST_READ_METADATA = False
|
||||||
SUPPORTS_USE_AUTHOR_SORT = False
|
SUPPORTS_USE_AUTHOR_SORT = False
|
||||||
|
|
||||||
|
@ -202,7 +202,7 @@ class USBMS(CLI, Device):
|
|||||||
debug_print('USBMS: scan from root', self.SCAN_FROM_ROOT, ebook_dir)
|
debug_print('USBMS: scan from root', self.SCAN_FROM_ROOT, ebook_dir)
|
||||||
if not os.path.exists(ebook_dir): continue
|
if not os.path.exists(ebook_dir): continue
|
||||||
# Get all books in the ebook_dir directory
|
# Get all books in the ebook_dir directory
|
||||||
if self.SUPPORTS_SUB_DIRS:
|
if self.SUPPORTS_SUB_DIRS or self.SUPPORTS_SUB_DIRS_FOR_SCAN:
|
||||||
# build a list of files to check, so we can accurately report progress
|
# build a list of files to check, so we can accurately report progress
|
||||||
flist = []
|
flist = []
|
||||||
for path, dirs, files in os.walk(ebook_dir):
|
for path, dirs, files in os.walk(ebook_dir):
|
||||||
|
@ -710,7 +710,8 @@ class Metadata(object):
|
|||||||
fmt('Title sort', self.title_sort)
|
fmt('Title sort', self.title_sort)
|
||||||
if self.authors:
|
if self.authors:
|
||||||
fmt('Author(s)', authors_to_string(self.authors) + \
|
fmt('Author(s)', authors_to_string(self.authors) + \
|
||||||
((' [' + self.author_sort + ']') if self.author_sort else ''))
|
((' [' + self.author_sort + ']')
|
||||||
|
if self.author_sort and self.author_sort != _('Unknown') else ''))
|
||||||
if self.publisher:
|
if self.publisher:
|
||||||
fmt('Publisher', self.publisher)
|
fmt('Publisher', self.publisher)
|
||||||
if getattr(self, 'book_producer', False):
|
if getattr(self, 'book_producer', False):
|
||||||
|
@ -6,11 +6,12 @@ Created on 4 Jun 2010
|
|||||||
|
|
||||||
from base64 import b64encode, b64decode
|
from base64 import b64encode, b64decode
|
||||||
import json, traceback
|
import json, traceback
|
||||||
|
from datetime import datetime, time
|
||||||
|
|
||||||
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
|
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
|
||||||
from calibre.constants import filesystem_encoding, preferred_encoding
|
from calibre.constants import filesystem_encoding, preferred_encoding
|
||||||
from calibre.library.field_metadata import FieldMetadata
|
from calibre.library.field_metadata import FieldMetadata
|
||||||
from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE
|
from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE, local_tz
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image
|
||||||
from calibre import isbytestring
|
from calibre import isbytestring
|
||||||
|
|
||||||
@ -22,7 +23,13 @@ def string_to_datetime(src):
|
|||||||
return parse_date(src)
|
return parse_date(src)
|
||||||
|
|
||||||
def datetime_to_string(dateval):
|
def datetime_to_string(dateval):
|
||||||
if dateval is None or dateval == UNDEFINED_DATE:
|
if dateval is None:
|
||||||
|
return "None"
|
||||||
|
if not isinstance(dateval, datetime):
|
||||||
|
dateval = datetime.combine(dateval, time())
|
||||||
|
if hasattr(dateval, 'tzinfo') and dateval.tzinfo is None:
|
||||||
|
dateval = dateval.replace(tzinfo=local_tz)
|
||||||
|
if dateval <= UNDEFINED_DATE:
|
||||||
return "None"
|
return "None"
|
||||||
return isoformat(dateval)
|
return isoformat(dateval)
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ import datetime
|
|||||||
from urllib import quote_plus
|
from urllib import quote_plus
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from calibre import as_unicode
|
from calibre import prints, as_unicode
|
||||||
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
|
||||||
@ -54,7 +54,8 @@ class Ozon(Source):
|
|||||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||||
# div_book -> search only books, ebooks and audio books
|
# div_book -> search only books, ebooks and audio books
|
||||||
search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
|
search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
|
||||||
|
|
||||||
|
# for ozon.ru search we have to format ISBN with '-'
|
||||||
isbn = _format_isbn(log, identifiers.get('isbn', None))
|
isbn = _format_isbn(log, identifiers.get('isbn', None))
|
||||||
# TODO: format isbn!
|
# TODO: format isbn!
|
||||||
qItems = set([isbn, title])
|
qItems = set([isbn, title])
|
||||||
@ -64,7 +65,7 @@ class Ozon(Source):
|
|||||||
qItems.discard('')
|
qItems.discard('')
|
||||||
qItems = map(_quoteString, qItems)
|
qItems = map(_quoteString, qItems)
|
||||||
|
|
||||||
q = ' '.join(qItems).strip()
|
q = u' '.join(qItems).strip()
|
||||||
log.info(u'search string: ' + q)
|
log.info(u'search string: ' + q)
|
||||||
|
|
||||||
if isinstance(q, unicode):
|
if isinstance(q, unicode):
|
||||||
@ -78,13 +79,13 @@ class Ozon(Source):
|
|||||||
return search_url
|
return search_url
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||||
identifiers={}, timeout=30):
|
identifiers={}, timeout=30): # {{{
|
||||||
if not self.is_configured():
|
if not self.is_configured():
|
||||||
return
|
return
|
||||||
query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
|
query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
|
||||||
if not query:
|
if not query:
|
||||||
err = 'Insufficient metadata to construct query'
|
err = u'Insufficient metadata to construct query'
|
||||||
log.error(err)
|
log.error(err)
|
||||||
return err
|
return err
|
||||||
|
|
||||||
@ -109,7 +110,7 @@ class Ozon(Source):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
||||||
# some book titles have extra charactes like this
|
# some book titles have extra characters like this
|
||||||
# TODO: make a twick
|
# TODO: make a twick
|
||||||
reRemoveFromTitle = None
|
reRemoveFromTitle = None
|
||||||
#reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
#reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
||||||
@ -160,7 +161,7 @@ class Ozon(Source):
|
|||||||
mi.source_relevance = i
|
mi.source_relevance = i
|
||||||
if ensure_metadata_match(mi):
|
if ensure_metadata_match(mi):
|
||||||
metadata.append(mi)
|
metadata.append(mi)
|
||||||
# log.debug(u'added metadata %s %s. '%(mi.title, mi.authors))
|
#log.debug(u'added metadata %s %s.'%(mi.title, mi.authors))
|
||||||
else:
|
else:
|
||||||
log.debug(u'skipped metadata %s %s. (does not match the query)'%(mi.title, mi.authors))
|
log.debug(u'skipped metadata %s %s. (does not match the query)'%(mi.title, mi.authors))
|
||||||
return metadata
|
return metadata
|
||||||
@ -285,12 +286,12 @@ class Ozon(Source):
|
|||||||
url = self.get_book_url(metadata.get_identifiers())[2]
|
url = self.get_book_url(metadata.get_identifiers())[2]
|
||||||
|
|
||||||
raw = self.browser.open_novisit(url, timeout=timeout).read()
|
raw = self.browser.open_novisit(url, timeout=timeout).read()
|
||||||
doc = html.fromstring(raw)
|
doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
|
||||||
|
|
||||||
xpt_prod_det_at = u'string(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "%s")]/a[1]/@title)'
|
xpt_prod_det_at = u'string(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "%s")]/a[1]/@title)'
|
||||||
xpt_prod_det_tx = u'substring-after(//div[contains(@class, "product-detail")]//text()[contains(., "%s")], ":")'
|
xpt_prod_det_tx = u'substring-after(//div[contains(@class, "product-detail")]//text()[contains(., "%s")], ":")'
|
||||||
|
|
||||||
# series
|
# series Серия/Серии
|
||||||
xpt = xpt_prod_det_at % u'Сери'
|
xpt = xpt_prod_det_at % u'Сери'
|
||||||
# % u'Серия:'
|
# % u'Серия:'
|
||||||
series = doc.xpath(xpt)
|
series = doc.xpath(xpt)
|
||||||
@ -300,7 +301,7 @@ class Ozon(Source):
|
|||||||
xpt = u'normalize-space(substring-after(//meta[@name="description"]/@content, "ISBN"))'
|
xpt = u'normalize-space(substring-after(//meta[@name="description"]/@content, "ISBN"))'
|
||||||
isbn_str = doc.xpath(xpt)
|
isbn_str = doc.xpath(xpt)
|
||||||
if isbn_str:
|
if isbn_str:
|
||||||
all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if check_isbn(isbn)]
|
all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if _verifyISBNIntegrity(log, isbn)]
|
||||||
if all_isbns:
|
if all_isbns:
|
||||||
metadata.all_isbns = all_isbns
|
metadata.all_isbns = all_isbns
|
||||||
metadata.isbn = all_isbns[0]
|
metadata.isbn = all_isbns[0]
|
||||||
@ -333,10 +334,10 @@ class Ozon(Source):
|
|||||||
xpt = u'//table[@id="detail_description"]//tr/td'
|
xpt = u'//table[@id="detail_description"]//tr/td'
|
||||||
comment_elem = doc.xpath(xpt)
|
comment_elem = doc.xpath(xpt)
|
||||||
if comment_elem:
|
if comment_elem:
|
||||||
comments = unicode(etree.tostring(comment_elem[0]))
|
comments = unicode(etree.tostring(comment_elem[0], encoding=unicode))
|
||||||
if comments:
|
if comments:
|
||||||
# cleanup root tag, TODO: remove tags like object/embeded
|
# cleanup root tag, TODO: remove tags like object/embeded
|
||||||
comments = re.sub(r'\A.*?<td.*?>|</td>.*\Z', u'', comments.strip(), re.MULTILINE).strip()
|
comments = re.sub(ur'\A.*?<td.*?>|</td>.*\Z', u'', comments.strip(), re.MULTILINE).strip()
|
||||||
if comments and (not metadata.comments or len(comments) > len(metadata.comments)):
|
if comments and (not metadata.comments or len(comments) > len(metadata.comments)):
|
||||||
metadata.comments = comments
|
metadata.comments = comments
|
||||||
else:
|
else:
|
||||||
@ -345,8 +346,16 @@ class Ozon(Source):
|
|||||||
log.debug('No book description found in HTML')
|
log.debug('No book description found in HTML')
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def _quoteString(str): # {{{
|
def _quoteString(strToQuote): # {{{
|
||||||
return '"' + str + '"' if str and str.find(' ') != -1 else str
|
return '"' + strToQuote + '"' if strToQuote and strToQuote.find(' ') != -1 else strToQuote
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def _verifyISBNIntegrity(log, isbn): # {{{
|
||||||
|
# Online ISBN-Check http://www.isbn-check.de/
|
||||||
|
res = check_isbn(isbn)
|
||||||
|
if not res:
|
||||||
|
log.error(u'ISBN integrity check failed for "%s"'%isbn)
|
||||||
|
return res is not None
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# TODO: make customizable
|
# TODO: make customizable
|
||||||
@ -438,7 +447,7 @@ def _normalizeAuthorNameWithInitials(name): # {{{
|
|||||||
return res
|
return res
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def toPubdate(log, yearAsString):
|
def toPubdate(log, yearAsString): # {{{
|
||||||
res = None
|
res = None
|
||||||
if yearAsString:
|
if yearAsString:
|
||||||
try:
|
try:
|
||||||
@ -448,7 +457,11 @@ def toPubdate(log, yearAsString):
|
|||||||
except:
|
except:
|
||||||
log.error('cannot parse to date %s'%yearAsString)
|
log.error('cannot parse to date %s'%yearAsString)
|
||||||
return res
|
return res
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def _listToUnicodePrintStr(lst): # {{{
|
||||||
|
return u'[' + u', '.join(unicode(x) for x in lst) + u']'
|
||||||
|
# }}}
|
||||||
|
|
||||||
if __name__ == '__main__': # tests {{{
|
if __name__ == '__main__': # tests {{{
|
||||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/ozon.py
|
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/ozon.py
|
||||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|||||||
from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML
|
from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML
|
||||||
from calibre.library.comments import comments_to_html
|
from calibre.library.comments import comments_to_html
|
||||||
from calibre.utils.date import is_date_undefined
|
from calibre.utils.date import is_date_undefined
|
||||||
|
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||||
|
|
||||||
JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
|
JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
|
||||||
|
|
||||||
@ -175,15 +176,20 @@ def render_jacket(mi, output_profile,
|
|||||||
try:
|
try:
|
||||||
display_name, val = mi.format_field_extended(key)[:2]
|
display_name, val = mi.format_field_extended(key)[:2]
|
||||||
key = key.replace('#', '_')
|
key = key.replace('#', '_')
|
||||||
args[key] = val
|
args[key] = escape(val)
|
||||||
args[key+'_label'] = display_name
|
args[key+'_label'] = escape(display_name)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Used in the comment describing use of custom columns in templates
|
||||||
|
args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
|
||||||
|
args['_genre'] = args.get('_genre', '{_genre}')
|
||||||
|
|
||||||
generated_html = P('jacket/template.xhtml',
|
generated_html = P('jacket/template.xhtml',
|
||||||
data=True).decode('utf-8').format(**args)
|
data=True).decode('utf-8').format(**args)
|
||||||
|
|
||||||
# Post-process the generated html to strip out empty header items
|
# Post-process the generated html to strip out empty header items
|
||||||
|
|
||||||
soup = BeautifulSoup(generated_html)
|
soup = BeautifulSoup(generated_html)
|
||||||
if not series:
|
if not series:
|
||||||
series_tag = soup.find(attrs={'class':'cbj_series'})
|
series_tag = soup.find(attrs={'class':'cbj_series'})
|
||||||
@ -206,7 +212,8 @@ def render_jacket(mi, output_profile,
|
|||||||
if hr_tag is not None:
|
if hr_tag is not None:
|
||||||
hr_tag.extract()
|
hr_tag.extract()
|
||||||
|
|
||||||
return soup.renderContents(None)
|
return strip_encoding_declarations(
|
||||||
|
soup.renderContents('utf-8').decode('utf-8'))
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
||||||
|
|
||||||
|
@ -372,13 +372,13 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
|||||||
self.apply_pubdate.setChecked(True)
|
self.apply_pubdate.setChecked(True)
|
||||||
|
|
||||||
def clear_pubdate(self, *args):
|
def clear_pubdate(self, *args):
|
||||||
self.pubdate.setMinimumDateTime(UNDEFINED_QDATETIME)
|
self.pubdate.setDateTime(UNDEFINED_QDATETIME)
|
||||||
|
|
||||||
def do_apply_adddate(self, *args):
|
def do_apply_adddate(self, *args):
|
||||||
self.apply_adddate.setChecked(True)
|
self.apply_adddate.setChecked(True)
|
||||||
|
|
||||||
def clear_adddate(self, *args):
|
def clear_adddate(self, *args):
|
||||||
self.adddate.setMinimumDateTime(UNDEFINED_QDATETIME)
|
self.adddate.setDateTime(UNDEFINED_QDATETIME)
|
||||||
|
|
||||||
def button_clicked(self, which):
|
def button_clicked(self, which):
|
||||||
if which == self.button_box.button(QDialogButtonBox.Apply):
|
if which == self.button_box.button(QDialogButtonBox.Apply):
|
||||||
|
@ -77,7 +77,8 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
result = False
|
result = False
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
raw = xml_to_unicode(f.read(), verbose=True)[0]
|
||||||
|
doc = html.fromstring(raw)
|
||||||
|
|
||||||
# example where we are going to find formats
|
# example where we are going to find formats
|
||||||
# <div class="l">
|
# <div class="l">
|
||||||
@ -88,7 +89,7 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
|
|||||||
# <div class="l">
|
# <div class="l">
|
||||||
# <p>.epub, .fb2.zip, .pdf</p>
|
# <p>.epub, .fb2.zip, .pdf</p>
|
||||||
# </div>
|
# </div>
|
||||||
xpt = u'normalize-space(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
|
xpt = u'normalize-space(//div[contains(@id, "saleBlock")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
|
||||||
formats = doc.xpath(xpt)
|
formats = doc.xpath(xpt)
|
||||||
if formats:
|
if formats:
|
||||||
result = True
|
result = True
|
||||||
|
@ -12539,7 +12539,7 @@ msgstr "За&грузить метаданные"
|
|||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:226
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:226
|
||||||
msgid "Configure download metadata"
|
msgid "Configure download metadata"
|
||||||
msgstr ""
|
msgstr "Настроить загрузку метаданных"
|
||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:230
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:230
|
||||||
msgid "Change how calibre downloads metadata"
|
msgid "Change how calibre downloads metadata"
|
||||||
@ -12595,7 +12595,7 @@ msgstr "&Пользовательские метаданные"
|
|||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:788
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:788
|
||||||
msgid "&Comments"
|
msgid "&Comments"
|
||||||
msgstr "Комментарии"
|
msgstr "&Комментарии"
|
||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:854
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:854
|
||||||
msgid "Basic metadata"
|
msgid "Basic metadata"
|
||||||
@ -12603,11 +12603,11 @@ msgstr "Основные метаданные"
|
|||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:133
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:133
|
||||||
msgid "Has cover"
|
msgid "Has cover"
|
||||||
msgstr "Есть обложка"
|
msgstr "Обложка"
|
||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:133
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:133
|
||||||
msgid "Has summary"
|
msgid "Has summary"
|
||||||
msgstr ""
|
msgstr "Аннотация"
|
||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:190
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:190
|
||||||
msgid ""
|
msgid ""
|
||||||
@ -12619,7 +12619,7 @@ msgstr ""
|
|||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:268
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:268
|
||||||
msgid "See at"
|
msgid "See at"
|
||||||
msgstr ""
|
msgstr "Посмотреть на"
|
||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:403
|
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:403
|
||||||
msgid "calibre is downloading metadata from: "
|
msgid "calibre is downloading metadata from: "
|
||||||
|
@ -291,6 +291,11 @@ def clean_date_for_sort(dt, format):
|
|||||||
if not isinstance(dt, datetime):
|
if not isinstance(dt, datetime):
|
||||||
dt = datetime.combine(dt, time())
|
dt = datetime.combine(dt, time())
|
||||||
|
|
||||||
|
if hasattr(dt, 'tzinfo'):
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=_local_tz)
|
||||||
|
dt = as_local_time(dt)
|
||||||
|
|
||||||
if format == 'iso':
|
if format == 'iso':
|
||||||
format = 'yyMdhms'
|
format = 'yyMdhms'
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user