mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #8477 (Series/Sequence Info no longer being downloaded)
This commit is contained in:
parent
f573d07e81
commit
1820832fa8
@ -241,7 +241,7 @@ def get_parsed_proxy(typ='http', debug=True):
|
||||
return ans
|
||||
|
||||
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False):
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
||||
'''
|
||||
Create a mechanize browser for web scraping. The browser handles cookies,
|
||||
refresh requests and ignores robots.txt. Also uses proxy if avaialable.
|
||||
@ -253,8 +253,10 @@ def browser(honor_time=True, max_time=2, mobile_browser=False):
|
||||
opener = Browser()
|
||||
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
||||
opener.set_handle_robots(False)
|
||||
opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
|
||||
'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13')]
|
||||
if user_agent is None:
|
||||
user_agent = ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
|
||||
'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13'
|
||||
opener.addheaders = [('User-agent', user_agent)]
|
||||
http_proxy = get_proxies().get('http', None)
|
||||
if http_proxy:
|
||||
opener.set_proxies({'http':http_proxy})
|
||||
|
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Fetch cover from LibraryThing.com based on ISBN number.
|
||||
'''
|
||||
|
||||
import sys, socket, os, re
|
||||
import sys, socket, os, re, random
|
||||
|
||||
from lxml import html
|
||||
import mechanize
|
||||
@ -16,13 +16,26 @@ from calibre.ebooks.chardet import strip_encoding_declarations
|
||||
|
||||
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
||||
|
||||
def get_ua():
|
||||
choices = [
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
|
||||
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
|
||||
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
|
||||
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)'
|
||||
'Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16'
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19'
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
|
||||
]
|
||||
return choices[random.randint(0, len(choices)-1)]
|
||||
|
||||
|
||||
class HeadRequest(mechanize.Request):
|
||||
|
||||
def get_method(self):
|
||||
return 'HEAD'
|
||||
|
||||
def check_for_cover(isbn, timeout=5.):
|
||||
br = browser()
|
||||
br = browser(user_agent=get_ua())
|
||||
br.set_handle_redirect(False)
|
||||
try:
|
||||
br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout)
|
||||
@ -51,7 +64,7 @@ def login(br, username, password, force=True):
|
||||
|
||||
def cover_from_isbn(isbn, timeout=5., username=None, password=None):
|
||||
src = None
|
||||
br = browser()
|
||||
br = browser(user_agent=get_ua())
|
||||
try:
|
||||
return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
|
||||
except:
|
||||
@ -100,7 +113,7 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
mi = MetaInformation(title, authors)
|
||||
if isbn:
|
||||
br = browser()
|
||||
br = browser(user_agent=get_ua())
|
||||
if username and password:
|
||||
try:
|
||||
login(br, username, password, force=False)
|
||||
|
Loading…
x
Reference in New Issue
Block a user