Use mechanize

This commit is contained in:
Kovid Goyal 2007-08-17 02:43:07 +00:00
parent 0c95bc3d6d
commit 653cd41d50
3 changed files with 6 additions and 7 deletions

View File

@ -233,7 +233,7 @@ setup(
'argv_emulation' : True, 'argv_emulation' : True,
'iconfile' : 'icons/library.icns', 'iconfile' : 'icons/library.icns',
'frameworks': ['libusb.dylib', 'libunrar.dylib'], 'frameworks': ['libusb.dylib', 'libunrar.dylib'],
'includes' : ['sip', 'pkg_resources', 'PyQt4.QtSvg'], 'includes' : ['sip', 'pkg_resources', 'PyQt4.QtSvg', 'mechanize', 'ClientForm'],
'packages' : ['PIL', 'Authorization',], 'packages' : ['PIL', 'Authorization',],
'excludes' : ['pydoc'], 'excludes' : ['pydoc'],
'plist' : { 'CFBundleGetInfoString' : '''libprs500, an E-book management application.''' 'plist' : { 'CFBundleGetInfoString' : '''libprs500, an E-book management application.'''

View File

@ -15,12 +15,12 @@
''' '''
Fetch a webpage and its links recursively. Fetch a webpage and its links recursively.
''' '''
import sys, socket, urllib2, os, urlparse, codecs, logging, re, time, copy import sys, socket, os, urlparse, codecs, logging, re, time, copy, urllib2
from urllib import url2pathname from urllib import url2pathname
from httplib import responses from httplib import responses
from optparse import OptionParser from optparse import OptionParser
from libprs500 import __version__, __appname__, __author__, setup_cli_handlers from libprs500 import __version__, __appname__, __author__, setup_cli_handlers, browser
from libprs500.ebooks.BeautifulSoup import BeautifulSoup from libprs500.ebooks.BeautifulSoup import BeautifulSoup
class FetchError(Exception): class FetchError(Exception):
@ -57,6 +57,7 @@ class RecursiveFetcher(object):
os.makedirs(self.base_dir) os.makedirs(self.base_dir)
self.default_timeout = socket.getdefaulttimeout() self.default_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(options.timeout) socket.setdefaulttimeout(options.timeout)
self.browser = options.browser if hasattr(options, 'browser') else browser()
self.max_recursions = options.max_recursions self.max_recursions = options.max_recursions
self.match_regexps = [re.compile(i, re.IGNORECASE) for i in options.match_regexps] self.match_regexps = [re.compile(i, re.IGNORECASE) for i in options.match_regexps]
self.filter_regexps = [re.compile(i, re.IGNORECASE) for i in options.filter_regexps] self.filter_regexps = [re.compile(i, re.IGNORECASE) for i in options.filter_regexps]
@ -84,9 +85,7 @@ class RecursiveFetcher(object):
if delta < self.delay: if delta < self.delay:
time.sleep(delta) time.sleep(delta)
try: try:
opener = urllib2.build_opener() f = self.browser.open(url)
opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4')]
f = opener.open(url)
except urllib2.URLError, err: except urllib2.URLError, err:
if hasattr(err, 'code') and responses.has_key(err.code): if hasattr(err, 'code') and responses.has_key(err.code):
raise FetchError, responses[err.code] raise FetchError, responses[err.code]

View File

@ -433,7 +433,7 @@ setup(
options = { 'py2exe' : {'compressed': 1, options = { 'py2exe' : {'compressed': 1,
'optimize' : 2, 'optimize' : 2,
'dist_dir' : PY2EXE_DIR, 'dist_dir' : PY2EXE_DIR,
'includes' : ['sip', 'pkg_resources', 'PyQt4.QtSvg'], 'includes' : ['sip', 'pkg_resources', 'PyQt4.QtSvg', 'mechanize', 'ClientForm'],
'packages' : ['PIL', 'WmfPlugin'], 'packages' : ['PIL', 'WmfPlugin'],
'excludes' : ["Tkconstants", "Tkinter", "tcl", 'excludes' : ["Tkconstants", "Tkinter", "tcl",
"_imagingtk", "ImageTk", "FixTk", "_imagingtk", "ImageTk", "FixTk",