mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #8477 (Series/Sequence Info no longer being downloaded)
This commit is contained in:
parent
f573d07e81
commit
1820832fa8
@ -241,7 +241,7 @@ def get_parsed_proxy(typ='http', debug=True):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def browser(honor_time=True, max_time=2, mobile_browser=False):
|
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
||||||
'''
|
'''
|
||||||
Create a mechanize browser for web scraping. The browser handles cookies,
|
Create a mechanize browser for web scraping. The browser handles cookies,
|
||||||
refresh requests and ignores robots.txt. Also uses proxy if avaialable.
|
refresh requests and ignores robots.txt. Also uses proxy if avaialable.
|
||||||
@ -253,8 +253,10 @@ def browser(honor_time=True, max_time=2, mobile_browser=False):
|
|||||||
opener = Browser()
|
opener = Browser()
|
||||||
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
||||||
opener.set_handle_robots(False)
|
opener.set_handle_robots(False)
|
||||||
opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
|
if user_agent is None:
|
||||||
'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13')]
|
user_agent = ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
|
||||||
|
'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13'
|
||||||
|
opener.addheaders = [('User-agent', user_agent)]
|
||||||
http_proxy = get_proxies().get('http', None)
|
http_proxy = get_proxies().get('http', None)
|
||||||
if http_proxy:
|
if http_proxy:
|
||||||
opener.set_proxies({'http':http_proxy})
|
opener.set_proxies({'http':http_proxy})
|
||||||
|
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
Fetch cover from LibraryThing.com based on ISBN number.
|
Fetch cover from LibraryThing.com based on ISBN number.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, socket, os, re
|
import sys, socket, os, re, random
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import mechanize
|
import mechanize
|
||||||
@ -16,13 +16,26 @@ from calibre.ebooks.chardet import strip_encoding_declarations
|
|||||||
|
|
||||||
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
||||||
|
|
||||||
|
def get_ua():
|
||||||
|
choices = [
|
||||||
|
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
|
||||||
|
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
|
||||||
|
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
|
||||||
|
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)'
|
||||||
|
'Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16'
|
||||||
|
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19'
|
||||||
|
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
|
||||||
|
]
|
||||||
|
return choices[random.randint(0, len(choices)-1)]
|
||||||
|
|
||||||
|
|
||||||
class HeadRequest(mechanize.Request):
|
class HeadRequest(mechanize.Request):
|
||||||
|
|
||||||
def get_method(self):
|
def get_method(self):
|
||||||
return 'HEAD'
|
return 'HEAD'
|
||||||
|
|
||||||
def check_for_cover(isbn, timeout=5.):
|
def check_for_cover(isbn, timeout=5.):
|
||||||
br = browser()
|
br = browser(user_agent=get_ua())
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
try:
|
try:
|
||||||
br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout)
|
br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout)
|
||||||
@ -51,7 +64,7 @@ def login(br, username, password, force=True):
|
|||||||
|
|
||||||
def cover_from_isbn(isbn, timeout=5., username=None, password=None):
|
def cover_from_isbn(isbn, timeout=5., username=None, password=None):
|
||||||
src = None
|
src = None
|
||||||
br = browser()
|
br = browser(user_agent=get_ua())
|
||||||
try:
|
try:
|
||||||
return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
|
return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
|
||||||
except:
|
except:
|
||||||
@ -100,7 +113,7 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
|
|||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
mi = MetaInformation(title, authors)
|
mi = MetaInformation(title, authors)
|
||||||
if isbn:
|
if isbn:
|
||||||
br = browser()
|
br = browser(user_agent=get_ua())
|
||||||
if username and password:
|
if username and password:
|
||||||
try:
|
try:
|
||||||
login(br, username, password, force=False)
|
login(br, username, password, force=False)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user