mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Deal with Amazon changing their websites to use (randomly?) two different html constructs around the format text
This commit is contained in:
parent
0ca49a65f1
commit
993b9f6bcb
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 6 # Needed for dynamic plugin loading
|
||||
store_version = 7 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -59,6 +59,7 @@ class AmazonDEKindleStore(StorePlugin):
|
||||
allText = f.read()
|
||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||
|
||||
format_xpath2 = ''
|
||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||
#print('grid form')
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
@ -89,8 +90,8 @@ class AmazonDEKindleStore(StorePlugin):
|
||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||
#print('list form')
|
||||
data_xpath = '//li[@class="s-result-item"]'
|
||||
format_xpath = (
|
||||
'.//h3[contains(@class, "s-inline")]/text()')
|
||||
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||
asin_xpath = '@data-asin'
|
||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||
@ -115,6 +116,10 @@ class AmazonDEKindleStore(StorePlugin):
|
||||
# if it isn't.
|
||||
format_ = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format_.lower():
|
||||
if format_xpath2:
|
||||
format_ = ''.join(data.xpath(format_xpath2))
|
||||
if 'kindle' not in format_.lower():
|
||||
# print(etree.tostring(data, pretty_print=True))
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 6 # Needed for dynamic plugin loading
|
||||
store_version = 7 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -58,6 +58,7 @@ class AmazonESKindleStore(StorePlugin):
|
||||
allText = f.read()
|
||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||
|
||||
format_xpath2 = ''
|
||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||
#print('grid form')
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
@ -88,8 +89,8 @@ class AmazonESKindleStore(StorePlugin):
|
||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||
#print('list form')
|
||||
data_xpath = '//li[@class="s-result-item"]'
|
||||
format_xpath = (
|
||||
'.//h3[contains(@class, "s-inline")]/text()')
|
||||
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||
asin_xpath = '@data-asin'
|
||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||
@ -114,6 +115,10 @@ class AmazonESKindleStore(StorePlugin):
|
||||
# if it isn't.
|
||||
format_ = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format_.lower():
|
||||
if format_xpath2:
|
||||
format_ = ''.join(data.xpath(format_xpath2))
|
||||
if 'kindle' not in format_.lower():
|
||||
# print(etree.tostring(data, pretty_print=True))
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 6 # Needed for dynamic plugin loading
|
||||
store_version = 7 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -55,6 +55,7 @@ class AmazonFRKindleStore(StorePlugin):
|
||||
allText = f.read()
|
||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||
|
||||
format_xpath2 = ''
|
||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||
#print('grid form')
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
@ -85,8 +86,8 @@ class AmazonFRKindleStore(StorePlugin):
|
||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||
#print('list form')
|
||||
data_xpath = '//li[@class="s-result-item"]'
|
||||
format_xpath = (
|
||||
'.//h3[contains(@class, "s-inline")]/text()')
|
||||
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||
asin_xpath = '@data-asin'
|
||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||
@ -111,6 +112,10 @@ class AmazonFRKindleStore(StorePlugin):
|
||||
# if it isn't.
|
||||
format_ = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format_.lower():
|
||||
if format_xpath2:
|
||||
format_ = ''.join(data.xpath(format_xpath2))
|
||||
if 'kindle' not in format_.lower():
|
||||
# print(etree.tostring(data, pretty_print=True))
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 6 # Needed for dynamic plugin loading
|
||||
store_version = 7 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -58,6 +58,7 @@ class AmazonITKindleStore(StorePlugin):
|
||||
allText = f.read()
|
||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||
|
||||
format_xpath2 = ''
|
||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||
#print('grid form')
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
@ -88,8 +89,8 @@ class AmazonITKindleStore(StorePlugin):
|
||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||
#print('list form')
|
||||
data_xpath = '//li[@class="s-result-item"]'
|
||||
format_xpath = (
|
||||
'.//h3[contains(@class, "s-inline")]/text()')
|
||||
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||
asin_xpath = '@data-asin'
|
||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||
@ -114,6 +115,10 @@ class AmazonITKindleStore(StorePlugin):
|
||||
# if it isn't.
|
||||
format_ = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format_.lower():
|
||||
if format_xpath2:
|
||||
format_ = ''.join(data.xpath(format_xpath2))
|
||||
if 'kindle' not in format_.lower():
|
||||
# print(etree.tostring(data, pretty_print=True))
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 6 # Needed for dynamic plugin loading
|
||||
store_version = 7 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import re
|
||||
from contextlib import closing
|
||||
from lxml import html
|
||||
# from lxml import html
|
||||
|
||||
from PyQt5.Qt import QUrl
|
||||
|
||||
@ -62,6 +63,7 @@ class AmazonUKKindleStore(StorePlugin):
|
||||
allText = f.read()
|
||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||
|
||||
format_xpath2 = ''
|
||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||
#print('grid form')
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
@ -92,8 +94,8 @@ class AmazonUKKindleStore(StorePlugin):
|
||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||
#print('list form')
|
||||
data_xpath = '//li[@class="s-result-item"]'
|
||||
format_xpath = (
|
||||
'.//h3[contains(@class, "s-inline")]/text()')
|
||||
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||
asin_xpath = '@data-asin'
|
||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||
@ -118,6 +120,10 @@ class AmazonUKKindleStore(StorePlugin):
|
||||
# if it isn't.
|
||||
format_ = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format_.lower():
|
||||
if format_xpath2:
|
||||
format_ = ''.join(data.xpath(format_xpath2))
|
||||
if 'kindle' not in format_.lower():
|
||||
# print(etree.tostring(data, pretty_print=True))
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
|
Loading…
x
Reference in New Issue
Block a user