mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Deal with Amazon changing their websites to use (randomly?) two different html constructs around the format text
This commit is contained in:
parent
0ca49a65f1
commit
993b9f6bcb
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 6 # Needed for dynamic plugin loading
|
store_version = 7 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -59,6 +59,7 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
allText = f.read()
|
allText = f.read()
|
||||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
format_xpath2 = ''
|
||||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
#print('grid form')
|
#print('grid form')
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
@ -89,8 +90,8 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
#print('list form')
|
#print('list form')
|
||||||
data_xpath = '//li[@class="s-result-item"]'
|
data_xpath = '//li[@class="s-result-item"]'
|
||||||
format_xpath = (
|
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||||
'.//h3[contains(@class, "s-inline")]/text()')
|
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||||
asin_xpath = '@data-asin'
|
asin_xpath = '@data-asin'
|
||||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||||
@ -115,7 +116,11 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
# if it isn't.
|
# if it isn't.
|
||||||
format_ = ''.join(data.xpath(format_xpath))
|
format_ = ''.join(data.xpath(format_xpath))
|
||||||
if 'kindle' not in format_.lower():
|
if 'kindle' not in format_.lower():
|
||||||
continue
|
if format_xpath2:
|
||||||
|
format_ = ''.join(data.xpath(format_xpath2))
|
||||||
|
if 'kindle' not in format_.lower():
|
||||||
|
# print(etree.tostring(data, pretty_print=True))
|
||||||
|
continue
|
||||||
|
|
||||||
# We must have an asin otherwise we can't easily reference the
|
# We must have an asin otherwise we can't easily reference the
|
||||||
# book later.
|
# book later.
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 6 # Needed for dynamic plugin loading
|
store_version = 7 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -58,6 +58,7 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
allText = f.read()
|
allText = f.read()
|
||||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
format_xpath2 = ''
|
||||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
#print('grid form')
|
#print('grid form')
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
@ -88,8 +89,8 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
#print('list form')
|
#print('list form')
|
||||||
data_xpath = '//li[@class="s-result-item"]'
|
data_xpath = '//li[@class="s-result-item"]'
|
||||||
format_xpath = (
|
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||||
'.//h3[contains(@class, "s-inline")]/text()')
|
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||||
asin_xpath = '@data-asin'
|
asin_xpath = '@data-asin'
|
||||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||||
@ -114,7 +115,11 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
# if it isn't.
|
# if it isn't.
|
||||||
format_ = ''.join(data.xpath(format_xpath))
|
format_ = ''.join(data.xpath(format_xpath))
|
||||||
if 'kindle' not in format_.lower():
|
if 'kindle' not in format_.lower():
|
||||||
continue
|
if format_xpath2:
|
||||||
|
format_ = ''.join(data.xpath(format_xpath2))
|
||||||
|
if 'kindle' not in format_.lower():
|
||||||
|
# print(etree.tostring(data, pretty_print=True))
|
||||||
|
continue
|
||||||
|
|
||||||
# We must have an asin otherwise we can't easily reference the
|
# We must have an asin otherwise we can't easily reference the
|
||||||
# book later.
|
# book later.
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 6 # Needed for dynamic plugin loading
|
store_version = 7 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -55,6 +55,7 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
allText = f.read()
|
allText = f.read()
|
||||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
format_xpath2 = ''
|
||||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
#print('grid form')
|
#print('grid form')
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
@ -85,8 +86,8 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
#print('list form')
|
#print('list form')
|
||||||
data_xpath = '//li[@class="s-result-item"]'
|
data_xpath = '//li[@class="s-result-item"]'
|
||||||
format_xpath = (
|
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||||
'.//h3[contains(@class, "s-inline")]/text()')
|
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||||
asin_xpath = '@data-asin'
|
asin_xpath = '@data-asin'
|
||||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||||
@ -111,7 +112,11 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
# if it isn't.
|
# if it isn't.
|
||||||
format_ = ''.join(data.xpath(format_xpath))
|
format_ = ''.join(data.xpath(format_xpath))
|
||||||
if 'kindle' not in format_.lower():
|
if 'kindle' not in format_.lower():
|
||||||
continue
|
if format_xpath2:
|
||||||
|
format_ = ''.join(data.xpath(format_xpath2))
|
||||||
|
if 'kindle' not in format_.lower():
|
||||||
|
# print(etree.tostring(data, pretty_print=True))
|
||||||
|
continue
|
||||||
|
|
||||||
# We must have an asin otherwise we can't easily reference the
|
# We must have an asin otherwise we can't easily reference the
|
||||||
# book later.
|
# book later.
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 6 # Needed for dynamic plugin loading
|
store_version = 7 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -58,6 +58,7 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
allText = f.read()
|
allText = f.read()
|
||||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
format_xpath2 = ''
|
||||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
#print('grid form')
|
#print('grid form')
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
@ -88,8 +89,8 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
#print('list form')
|
#print('list form')
|
||||||
data_xpath = '//li[@class="s-result-item"]'
|
data_xpath = '//li[@class="s-result-item"]'
|
||||||
format_xpath = (
|
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||||
'.//h3[contains(@class, "s-inline")]/text()')
|
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||||
asin_xpath = '@data-asin'
|
asin_xpath = '@data-asin'
|
||||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||||
@ -114,7 +115,11 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
# if it isn't.
|
# if it isn't.
|
||||||
format_ = ''.join(data.xpath(format_xpath))
|
format_ = ''.join(data.xpath(format_xpath))
|
||||||
if 'kindle' not in format_.lower():
|
if 'kindle' not in format_.lower():
|
||||||
continue
|
if format_xpath2:
|
||||||
|
format_ = ''.join(data.xpath(format_xpath2))
|
||||||
|
if 'kindle' not in format_.lower():
|
||||||
|
# print(etree.tostring(data, pretty_print=True))
|
||||||
|
continue
|
||||||
|
|
||||||
# We must have an asin otherwise we can't easily reference the
|
# We must have an asin otherwise we can't easily reference the
|
||||||
# book later.
|
# book later.
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 6 # Needed for dynamic plugin loading
|
store_version = 7 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import re
|
import re
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
# from lxml import html
|
||||||
|
|
||||||
from PyQt5.Qt import QUrl
|
from PyQt5.Qt import QUrl
|
||||||
|
|
||||||
@ -62,6 +63,7 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
allText = f.read()
|
allText = f.read()
|
||||||
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
format_xpath2 = ''
|
||||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||||
#print('grid form')
|
#print('grid form')
|
||||||
data_xpath = '//div[contains(@class, "prod")]'
|
data_xpath = '//div[contains(@class, "prod")]'
|
||||||
@ -92,8 +94,8 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||||
#print('list form')
|
#print('list form')
|
||||||
data_xpath = '//li[@class="s-result-item"]'
|
data_xpath = '//li[@class="s-result-item"]'
|
||||||
format_xpath = (
|
format_xpath = './/a[contains(@class, "a-size-small")]/text()'
|
||||||
'.//h3[contains(@class, "s-inline")]/text()')
|
format_xpath2 = './/h3[contains(@class, "s-inline")]/text()'
|
||||||
asin_xpath = '@data-asin'
|
asin_xpath = '@data-asin'
|
||||||
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
cover_xpath = './/img[contains(@class, "cfMarker")]/@src'
|
||||||
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
title_xpath = './/h2[contains(@class, "s-access-title")]/text()'
|
||||||
@ -118,7 +120,11 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
# if it isn't.
|
# if it isn't.
|
||||||
format_ = ''.join(data.xpath(format_xpath))
|
format_ = ''.join(data.xpath(format_xpath))
|
||||||
if 'kindle' not in format_.lower():
|
if 'kindle' not in format_.lower():
|
||||||
continue
|
if format_xpath2:
|
||||||
|
format_ = ''.join(data.xpath(format_xpath2))
|
||||||
|
if 'kindle' not in format_.lower():
|
||||||
|
# print(etree.tostring(data, pretty_print=True))
|
||||||
|
continue
|
||||||
|
|
||||||
# We must have an asin otherwise we can't easily reference the
|
# We must have an asin otherwise we can't easily reference the
|
||||||
# book later.
|
# book later.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user