mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle change of markup on google search pages
This commit is contained in:
parent
2529c2104c
commit
c477444367
@ -17,7 +17,7 @@ from calibre import browser as _browser, prints, random_user_agent
|
|||||||
from calibre.utils.monotonic import monotonic
|
from calibre.utils.monotonic import monotonic
|
||||||
from calibre.utils.random_ua import accept_header_for_ua
|
from calibre.utils.random_ua import accept_header_for_ua
|
||||||
|
|
||||||
current_version = (1, 0, 1)
|
current_version = (1, 0, 2)
|
||||||
minimum_calibre_version = (2, 80, 0)
|
minimum_calibre_version = (2, 80, 0)
|
||||||
|
|
||||||
|
|
||||||
@ -230,13 +230,13 @@ def google_search(terms, site=None, br=None, log=prints, safe_search=False, dump
|
|||||||
ans = []
|
ans = []
|
||||||
for div in root.xpath('//*[@id="search"]//*[@id="rso"]//*[@class="g"]'):
|
for div in root.xpath('//*[@id="search"]//*[@id="rso"]//*[@class="g"]'):
|
||||||
try:
|
try:
|
||||||
a = div.xpath('descendant::h3[@class="r"]/a[@href]')[0]
|
a = div.xpath('descendant::div[@class="r"]/a[@href]')[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
log('Ignoring div with no descendant')
|
log('Ignoring div with no descendant')
|
||||||
continue
|
continue
|
||||||
title = tostring(a)
|
title = tostring(a)
|
||||||
try:
|
try:
|
||||||
c = div.xpath('descendant::div[@class="s"]//a[@class="fl"]')[0]
|
c = div.xpath('descendant::div[@role="menu"]//a[@class="fl"]')[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
log('Ignoring {!r} as it has no cached page'.format(title))
|
log('Ignoring {!r} as it has no cached page'.format(title))
|
||||||
continue
|
continue
|
||||||
@ -248,9 +248,9 @@ def google_search(terms, site=None, br=None, log=prints, safe_search=False, dump
|
|||||||
return ans, url
|
return ans, url
|
||||||
|
|
||||||
|
|
||||||
def google_develop():
|
def google_develop(search_terms='1423146786'):
|
||||||
br = browser()
|
br = browser()
|
||||||
for result in google_search('1423146786'.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br)[0]:
|
for result in google_search(search_terms.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br)[0]:
|
||||||
if '/dp/' in result.url:
|
if '/dp/' in result.url:
|
||||||
print(result.title)
|
print(result.title)
|
||||||
print(' ', result.url)
|
print(' ', result.url)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user