mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get rid of cssselect in the amazon metadata download plugin
This commit is contained in:
parent
3c8984d2a5
commit
88dbbefa7b
@ -19,11 +19,6 @@ from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
|
|||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
def CSSSelect(expr):
|
|
||||||
from cssselect import HTMLTranslator
|
|
||||||
from lxml.etree import XPath
|
|
||||||
return XPath(HTMLTranslator().css_to_xpath(expr))
|
|
||||||
|
|
||||||
class Worker(Thread): # Get details {{{
|
class Worker(Thread): # Get details {{{
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -269,6 +264,8 @@ class Worker(Thread): # Get details {{{
|
|||||||
self.log.error(msg)
|
self.log.error(msg)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
from css_selectors import Select
|
||||||
|
self.selector = Select(root)
|
||||||
self.parse_details(oraw, root)
|
self.parse_details(oraw, root)
|
||||||
|
|
||||||
def parse_details(self, raw, root):
|
def parse_details(self, raw, root):
|
||||||
@ -337,7 +334,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
self.log.exception('Error parsing cover for url: %r'%self.url)
|
self.log.exception('Error parsing cover for url: %r'%self.url)
|
||||||
mi.has_cover = bool(self.cover_url)
|
mi.has_cover = bool(self.cover_url)
|
||||||
|
|
||||||
non_hero = CSSSelect('div#bookDetails_container_div div#nonHeroSection')(root)
|
non_hero = tuple(self.selector('div#bookDetails_container_div div#nonHeroSection'))
|
||||||
if non_hero:
|
if non_hero:
|
||||||
# New style markup
|
# New style markup
|
||||||
try:
|
try:
|
||||||
@ -417,9 +414,9 @@ class Worker(Thread): # Get details {{{
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def parse_authors(self, root):
|
def parse_authors(self, root):
|
||||||
matches = CSSSelect('#byline .author .contributorNameID')(root)
|
matches = tuple(self.selector('#byline .author .contributorNameID'))
|
||||||
if not matches:
|
if not matches:
|
||||||
matches = CSSSelect('#byline .author a.a-link-normal')(root)
|
matches = tuple(self.selector('#byline .author a.a-link-normal'))
|
||||||
if matches:
|
if matches:
|
||||||
authors = [self.totext(x) for x in matches]
|
authors = [self.totext(x) for x in matches]
|
||||||
return [a for a in authors if a]
|
return [a for a in authors if a]
|
||||||
@ -490,7 +487,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
|
|
||||||
def parse_comments(self, root):
|
def parse_comments(self, root):
|
||||||
ans = ''
|
ans = ''
|
||||||
ns = CSSSelect('#bookDescription_feature_div noscript')(root)
|
ns = tuple(self.selector('#bookDescription_feature_div noscript'))
|
||||||
if ns:
|
if ns:
|
||||||
ns = ns[0]
|
ns = ns[0]
|
||||||
if len(ns) == 0 and ns.text:
|
if len(ns) == 0 and ns.text:
|
||||||
@ -1125,13 +1122,6 @@ if __name__ == '__main__': # tests {{{
|
|||||||
authors_test(['F. Scott Fitzgerald'])]
|
authors_test(['F. Scott Fitzgerald'])]
|
||||||
),
|
),
|
||||||
|
|
||||||
( # A newer book
|
|
||||||
{'identifiers':{'amazon': 'B004JHY6OG'}},
|
|
||||||
[title_test('The Heroes', exact=False),
|
|
||||||
authors_test(['Joe Abercrombie'])]
|
|
||||||
|
|
||||||
),
|
|
||||||
|
|
||||||
] # }}}
|
] # }}}
|
||||||
|
|
||||||
de_tests = [ # {{{
|
de_tests = [ # {{{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user