Fix #8477 (Series/Sequence Info no longer being downloaded)

This commit is contained in:
Kovid Goyal 2011-01-20 11:59:19 -07:00
parent f573d07e81
commit 1820832fa8
2 changed files with 22 additions and 7 deletions

View File

@ -241,7 +241,7 @@ def get_parsed_proxy(typ='http', debug=True):
return ans return ans
def browser(honor_time=True, max_time=2, mobile_browser=False): def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
''' '''
Create a mechanize browser for web scraping. The browser handles cookies, Create a mechanize browser for web scraping. The browser handles cookies,
refresh requests and ignores robots.txt. Also uses proxy if avaialable. refresh requests and ignores robots.txt. Also uses proxy if avaialable.
@ -253,8 +253,10 @@ def browser(honor_time=True, max_time=2, mobile_browser=False):
opener = Browser() opener = Browser()
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time) opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
opener.set_handle_robots(False) opener.set_handle_robots(False)
opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \ if user_agent is None:
'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13')] user_agent = ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13'
opener.addheaders = [('User-agent', user_agent)]
http_proxy = get_proxies().get('http', None) http_proxy = get_proxies().get('http', None)
if http_proxy: if http_proxy:
opener.set_proxies({'http':http_proxy}) opener.set_proxies({'http':http_proxy})

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch cover from LibraryThing.com based on ISBN number. Fetch cover from LibraryThing.com based on ISBN number.
''' '''
import sys, socket, os, re import sys, socket, os, re, random
from lxml import html from lxml import html
import mechanize import mechanize
@ -16,13 +16,26 @@ from calibre.ebooks.chardet import strip_encoding_declarations
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
def get_ua():
choices = [
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)'
'Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16'
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19'
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
]
return choices[random.randint(0, len(choices)-1)]
class HeadRequest(mechanize.Request): class HeadRequest(mechanize.Request):
def get_method(self): def get_method(self):
return 'HEAD' return 'HEAD'
def check_for_cover(isbn, timeout=5.): def check_for_cover(isbn, timeout=5.):
br = browser() br = browser(user_agent=get_ua())
br.set_handle_redirect(False) br.set_handle_redirect(False)
try: try:
br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout) br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout)
@ -51,7 +64,7 @@ def login(br, username, password, force=True):
def cover_from_isbn(isbn, timeout=5., username=None, password=None): def cover_from_isbn(isbn, timeout=5., username=None, password=None):
src = None src = None
br = browser() br = browser(user_agent=get_ua())
try: try:
return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg' return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
except: except:
@ -100,7 +113,7 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation(title, authors) mi = MetaInformation(title, authors)
if isbn: if isbn:
br = browser() br = browser(user_agent=get_ua())
if username and password: if username and password:
try: try:
login(br, username, password, force=False) login(br, username, password, force=False)