mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add a browse by tags mode
This commit is contained in:
parent
bc115198c7
commit
5eb92ac4ee
@ -14,7 +14,7 @@ from calibre.constants import iswindows, isosx, islinux, isfrozen, \
|
||||
terminal_controller, preferred_encoding, \
|
||||
__appname__, __version__, __author__, \
|
||||
win32event, win32api, winerror, fcntl
|
||||
from calibre.utils import mechanize
|
||||
import mechanize
|
||||
|
||||
def unicode_path(path, abs=False):
|
||||
if not isinstance(path, unicode):
|
||||
|
@ -66,8 +66,6 @@ to auto-generate a Table of Contents.
|
||||
help=_('Maximum number of links from each HTML file to insert into the TOC. Set to 0 to disable. Default is: %default.'))
|
||||
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
||||
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
||||
toc('add_files_to_toc', ['--add-files-to-toc'], default=False,
|
||||
help=_('If more than one HTML file is found, create a TOC entry for each file.'))
|
||||
|
||||
|
||||
return c
|
@ -5,19 +5,19 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
import os, sys, re, shutil, cStringIO
|
||||
from lxml.etree import XPath
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist,\
|
||||
from calibre.ebooks.html import Processor, get_text, merge_metadata, get_filelist,\
|
||||
opf_traverse, create_metadata, rebase_toc
|
||||
from calibre.ebooks.epub import config as common_config
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
|
||||
|
||||
class HTMLProcessor(Parser):
|
||||
class HTMLProcessor(Processor):
|
||||
|
||||
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, toc=None):
|
||||
Parser.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
|
||||
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles):
|
||||
Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
|
||||
name='html2epub')
|
||||
if opts.verbose > 2:
|
||||
self.debug_tree('parsed')
|
||||
@ -27,36 +27,11 @@ class HTMLProcessor(Parser):
|
||||
if opts.verbose > 2:
|
||||
self.debug_tree('nocss')
|
||||
|
||||
if toc is not None:
|
||||
self.populate_toc(toc)
|
||||
|
||||
self.collect_font_statistics()
|
||||
|
||||
self.split()
|
||||
|
||||
def detect_chapters(self):
|
||||
self.detected_chapters = self.opts.chapter(self.root)
|
||||
for elem in self.detected_chapters:
|
||||
style = elem.get('style', '')
|
||||
style += ';page-break-before: always'
|
||||
elem.set(style, style)
|
||||
|
||||
def save(self):
|
||||
head = self.root.xpath('//head')
|
||||
if head:
|
||||
head = head[0]
|
||||
else:
|
||||
head = self.root.xpath('//body')
|
||||
head = head[0] if head else self.root
|
||||
style = etree.SubElement(head, 'style', attrib={'type':'text/css'})
|
||||
style.text='\n'+self.css
|
||||
style.tail = '\n\n'
|
||||
Parser.save(self)
|
||||
|
||||
def populate_toc(self, toc):
|
||||
if self.level >= self.opts.max_toc_recursion:
|
||||
return
|
||||
|
||||
|
||||
|
||||
def collect_font_statistics(self):
|
||||
'''
|
||||
@ -93,11 +68,13 @@ the <spine> element of the OPF file.
|
||||
def parse_content(filelist, opts, tdir):
|
||||
os.makedirs(os.path.join(tdir, 'content', 'resources'))
|
||||
resource_map = {}
|
||||
toc = TOC(base_path=tdir)
|
||||
for htmlfile in filelist:
|
||||
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
||||
resource_map, filelist)
|
||||
hp.populate_toc(toc)
|
||||
hp.save()
|
||||
return resource_map, hp.htmlfile_map
|
||||
return resource_map, hp.htmlfile_map, toc
|
||||
|
||||
def convert(htmlfile, opts, notification=None):
|
||||
htmlfile = os.path.abspath(htmlfile)
|
||||
@ -115,7 +92,7 @@ def convert(htmlfile, opts, notification=None):
|
||||
namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||
|
||||
with TemporaryDirectory('_html2epub') as tdir:
|
||||
resource_map, htmlfile_map = parse_content(filelist, opts, tdir)
|
||||
resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir)
|
||||
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
||||
|
||||
if opf.cover and os.access(opf.cover, os.R_OK):
|
||||
@ -130,6 +107,8 @@ def convert(htmlfile, opts, notification=None):
|
||||
buf = cStringIO.StringIO()
|
||||
if mi.toc:
|
||||
rebase_toc(mi.toc, htmlfile_map, opts.output)
|
||||
if mi.toc is None or len(mi.toc) < 2:
|
||||
mi.toc = generated_toc
|
||||
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as f:
|
||||
mi.render(f, buf)
|
||||
toc = buf.getvalue()
|
||||
|
@ -12,7 +12,7 @@ import sys, re, os, shutil, logging, tempfile, cStringIO
|
||||
from urlparse import urlparse
|
||||
from urllib import unquote
|
||||
|
||||
from lxml import html
|
||||
from lxml import html, etree
|
||||
from lxml.etree import XPath
|
||||
get_text = XPath("//text()")
|
||||
|
||||
@ -83,20 +83,24 @@ class HTMLFile(object):
|
||||
The encoding of the file is available as :member:`encoding`.
|
||||
'''
|
||||
|
||||
HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
|
||||
LINK_PAT = re.compile(
|
||||
HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
|
||||
TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE)
|
||||
LINK_PAT = re.compile(
|
||||
r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s]+))',
|
||||
re.DOTALL|re.IGNORECASE)
|
||||
|
||||
def __init__(self, path_to_html_file, level, encoding, verbose):
|
||||
def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
|
||||
'''
|
||||
:param level: The level of this file. Should be 0 for the root file.
|
||||
:param encoding: Use `encoding` to decode HTML.
|
||||
:param referrer: The :class:`HTMLFile` that first refers to this file.
|
||||
'''
|
||||
self.path = unicode_path(path_to_html_file, abs=True)
|
||||
self.base = os.path.dirname(self.path)
|
||||
self.level = level
|
||||
self.links = []
|
||||
self.path = unicode_path(path_to_html_file, abs=True)
|
||||
self.title = os.path.splitext(os.path.basename(self.path))[0]
|
||||
self.base = os.path.dirname(self.path)
|
||||
self.level = level
|
||||
self.referrer = referrer
|
||||
self.links = []
|
||||
|
||||
try:
|
||||
with open(self.path, 'rb') as f:
|
||||
@ -115,6 +119,9 @@ class HTMLFile(object):
|
||||
self.encoding = encoding
|
||||
|
||||
src = src.decode(encoding, 'replace')
|
||||
match = self.TITLE_PAT.search(src)
|
||||
if match is not None:
|
||||
self.title = match.group(1)
|
||||
self.find_links(src)
|
||||
|
||||
|
||||
@ -187,7 +194,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
|
||||
if link.path is None or link.path in flat:
|
||||
continue
|
||||
try:
|
||||
nf = HTMLFile(link.path, level, encoding, verbose)
|
||||
nf = HTMLFile(link.path, level, encoding, verbose, referrer=hf)
|
||||
nl.append(nf)
|
||||
flat.append(nf)
|
||||
except IgnoreFile, err:
|
||||
@ -383,12 +390,110 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
name = 'resources/' + name
|
||||
self.resource_map[link.path] = name
|
||||
return name
|
||||
|
||||
class Processor(Parser):
|
||||
'''
|
||||
This class builds on :class:`Parser` to provide additional methods
|
||||
to perform various processing/modification tasks on HTML files.
|
||||
'''
|
||||
|
||||
LINKS_PATH = XPath('//a[@href]')
|
||||
|
||||
def detect_chapters(self):
|
||||
self.detected_chapters = self.opts.chapter(self.root)
|
||||
for elem in self.detected_chapters:
|
||||
style = elem.get('style', '').strip()
|
||||
if style and not style.endswith(';'):
|
||||
style += '; '
|
||||
style += 'page-break-before: always'
|
||||
elem.set(style, style)
|
||||
|
||||
def save(self):
|
||||
head = self.root.xpath('//head')
|
||||
if head:
|
||||
head = head[0]
|
||||
else:
|
||||
head = self.root.xpath('//body')
|
||||
head = head[0] if head else self.root
|
||||
style = etree.SubElement(head, 'style', attrib={'type':'text/css'})
|
||||
style.text='\n'+self.css
|
||||
style.tail = '\n\n'
|
||||
Parser.save(self)
|
||||
|
||||
def populate_toc(self, toc):
|
||||
if self.level >= self.opts.max_toc_recursion:
|
||||
return
|
||||
|
||||
referrer = toc
|
||||
if self.htmlfile.referrer is not None:
|
||||
name = self.htmlfile_map[self.htmlfile.referrer]
|
||||
href = 'content/'+name
|
||||
for i in toc.flat():
|
||||
if href == i.href and i.fragment is None:
|
||||
referrer = i
|
||||
break
|
||||
|
||||
def add_item(href, fragment, text, target):
|
||||
for entry in toc.flat():
|
||||
if entry.href == href and entry.fragment ==fragment:
|
||||
return entry
|
||||
if len(text) > 50:
|
||||
text = text[:50] + u'\u2026'
|
||||
return target.add_item(href, fragment, text)
|
||||
|
||||
name = self.htmlfile_map[self.htmlfile]
|
||||
href = 'content/'+name
|
||||
|
||||
if referrer.href != href: # Happens for root file
|
||||
target = add_item(href, None, self.htmlfile.title, referrer)
|
||||
|
||||
# Add links to TOC
|
||||
if self.opts.max_toc_links > 0:
|
||||
for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
|
||||
text = (u''.join(link.xpath('string()'))).strip()
|
||||
if text:
|
||||
href = link.get('href', '')
|
||||
if href:
|
||||
href = 'content/'+href
|
||||
parts = href.split('#')
|
||||
href, fragment = parts[0], None
|
||||
if len(parts) > 1:
|
||||
fragment = parts[1]
|
||||
if self.htmlfile.referrer is not None:
|
||||
name = self.htmlfile_map[self.htmlfile.referrer.path]
|
||||
add_item(href, fragment, text, target)
|
||||
|
||||
# Add chapters to TOC
|
||||
if not self.opts.no_chapters_in_toc:
|
||||
for elem in getattr(self, 'detected_chapters', []):
|
||||
text = (u''.join(elem.xpath('string()'))).strip()
|
||||
if text:
|
||||
name = self.htmlfile_map[self.path]
|
||||
href = 'content/'+name
|
||||
add_item(href, None, text, target)
|
||||
|
||||
|
||||
def extract_css(self):
|
||||
'''
|
||||
Remove all CSS information from the document and store in self.raw_css.
|
||||
This includes <font> tags.
|
||||
'''
|
||||
counter = 0
|
||||
def get_id(chapter, prefix='calibre_css_'):
|
||||
new_id = '%s_%d'%(prefix, counter)
|
||||
counter += 1
|
||||
if chapter.tag.lower() == 'a' and 'name' in chapter.keys():
|
||||
chapter.attrib['id'] = id = chapter.get('name')
|
||||
if not id:
|
||||
chapter.attrib['id'] = chapter.attrib['name'] = new_id
|
||||
return new_id
|
||||
if 'id' in chapter.keys():
|
||||
id = chapter.get('id')
|
||||
else:
|
||||
id = new_id
|
||||
chapter.set('id', id)
|
||||
return id
|
||||
|
||||
css = []
|
||||
for link in self.root.xpath('//link'):
|
||||
if 'css' in link.get('type', 'text/css').lower():
|
||||
@ -402,7 +507,6 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
css.append('\n'.join(get_text(style)))
|
||||
style.getparent().remove(style)
|
||||
|
||||
font_id = 1
|
||||
for font in self.root.xpath('//font'):
|
||||
try:
|
||||
size = int(font.attrib.pop('size', '3'))
|
||||
@ -415,37 +519,15 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
color = font.attrib.pop('color', None)
|
||||
if color is not None:
|
||||
setting += 'color:%s'%color
|
||||
id = 'calibre_font_id_%d'%font_id
|
||||
font.set('id', 'calibre_font_id_%d'%font_id)
|
||||
font_id += 1
|
||||
id = get_id(font)
|
||||
css.append('#%s { %s }'%(id, setting))
|
||||
|
||||
|
||||
css_counter = 1
|
||||
for elem in self.root.xpath('//*[@style]'):
|
||||
if 'id' not in elem.keys():
|
||||
elem.set('id', 'calibre_css_id_%d'%css_counter)
|
||||
css_counter += 1
|
||||
css.append('#%s {%s}'%(elem.get('id'), elem.get('style')))
|
||||
id = get_id(elem)
|
||||
css.append('#%s {%s}'%(id, elem.get('style')))
|
||||
elem.attrib.pop('style')
|
||||
chapter_counter = 1
|
||||
for chapter in self.detected_chapters:
|
||||
if chapter.tag.lower() == 'a':
|
||||
if 'name' in chapter.keys():
|
||||
chapter.attrib['id'] = id = chapter.get('name')
|
||||
elif 'id' in chapter.keys():
|
||||
id = chapter.get('id')
|
||||
else:
|
||||
id = 'calibre_detected_chapter_%d'%chapter_counter
|
||||
chapter_counter += 1
|
||||
chapter.set('id', id)
|
||||
else:
|
||||
if 'id' not in chapter.keys():
|
||||
id = 'calibre_detected_chapter_%d'%chapter_counter
|
||||
chapter_counter += 1
|
||||
chapter.set('id', id)
|
||||
css.append('#%s {%s}'%(id, 'page-break-before:always'))
|
||||
|
||||
|
||||
self.raw_css = '\n\n'.join(css)
|
||||
self.css = unicode(self.raw_css)
|
||||
# TODO: Figure out what to do about CSS imports from linked stylesheets
|
||||
|
@ -48,10 +48,19 @@ class TOC(list):
|
||||
depth = c + 1
|
||||
return depth
|
||||
|
||||
def flat(self):
|
||||
'Depth first iteration over the tree rooted at self'
|
||||
yield self
|
||||
for obj in self:
|
||||
for i in obj.flat():
|
||||
yield i
|
||||
|
||||
@apply
|
||||
def abspath():
|
||||
doc='Return the file this toc entry points to as a absolute path to a file on the system.'
|
||||
def fget(self):
|
||||
if self.href is None:
|
||||
return None
|
||||
path = self.href.replace('/', os.sep)
|
||||
if not os.path.isabs(path):
|
||||
path = os.path.join(self.base_path, path)
|
||||
|
@ -3,7 +3,8 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
""" The GUI """
|
||||
import sys, os, re, StringIO, traceback
|
||||
from PyQt4.QtCore import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, QSize, \
|
||||
QByteArray, QLocale, QUrl, QTranslator, QCoreApplication
|
||||
QByteArray, QLocale, QUrl, QTranslator, QCoreApplication, \
|
||||
QModelIndex
|
||||
from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
|
||||
QIcon, QTableView, QDialogButtonBox, QApplication
|
||||
|
||||
@ -159,7 +160,7 @@ class TableView(QTableView):
|
||||
else:
|
||||
cols = dynamic[key]
|
||||
if not cols:
|
||||
cols = [True for i in range(self.model().columnCount(self))]
|
||||
cols = [True for i in range(self.model().columnCount(QModelIndex()))]
|
||||
|
||||
for i in range(len(cols)):
|
||||
hidden = self.isColumnHidden(i)
|
||||
|
@ -304,8 +304,8 @@ class MetadataSingleDialog(QDialog, Ui_MetadataSingleDialog):
|
||||
self.title.setText(book.title)
|
||||
self.authors.setText(', '.join(book.authors))
|
||||
if book.author_sort: self.author_sort.setText(book.author_sort)
|
||||
self.publisher.setText(book.publisher)
|
||||
self.isbn.setText(book.isbn)
|
||||
if book.publisher: self.publisher.setText(book.publisher)
|
||||
if book.isbn: self.isbn.setText(book.isbn)
|
||||
summ = book.comments
|
||||
if summ:
|
||||
prefix = qstring_to_unicode(self.comments.toPlainText())
|
||||
|
325
src/calibre/gui2/images/minus.svg
Normal file
325
src/calibre/gui2/images/minus.svg
Normal file
@ -0,0 +1,325 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://web.resource.org/cc/"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg2936"
|
||||
sodipodi:version="0.32"
|
||||
inkscape:version="0.45.1"
|
||||
version="1.0"
|
||||
sodipodi:docname="list-remove-jakob.svgz"
|
||||
inkscape:output_extension="org.inkscape.output.svgz.inkscape"
|
||||
sodipodi:docbase="/home/jakob/dev/kde/src/kdebase/runtime/pics/oxygen/scalable/actions">
|
||||
<defs
|
||||
id="defs2938">
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#XMLID_4_"
|
||||
id="linearGradient4284"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.4999996,0,0,2.4999996,174,-145.99998)"
|
||||
x1="-13.757333"
|
||||
y1="76.708466"
|
||||
x2="-62.424866"
|
||||
y2="104.80668" />
|
||||
<linearGradient
|
||||
id="linearGradient3207">
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3209" />
|
||||
<stop
|
||||
style="stop-color:#252525;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop3211" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient5412"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="28"
|
||||
y1="57.5"
|
||||
x2="28"
|
||||
y2="0">
|
||||
<stop
|
||||
offset="0"
|
||||
style="stop-color:#fff14d;stop-opacity:1;"
|
||||
id="stop5414" />
|
||||
<stop
|
||||
offset="1"
|
||||
style="stop-color:#f8ffa0;stop-opacity:0;"
|
||||
id="stop5416" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3260"
|
||||
id="linearGradient4291"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
spreadMethod="reflect"
|
||||
x1="73.742638"
|
||||
y1="15.336544"
|
||||
x2="80"
|
||||
y2="19.281664" />
|
||||
<linearGradient
|
||||
id="linearGradient3030"
|
||||
inkscape:collect="always">
|
||||
<stop
|
||||
id="stop3032"
|
||||
offset="0"
|
||||
style="stop-color:#000000;stop-opacity:0.77902622" />
|
||||
<stop
|
||||
id="stop3034"
|
||||
offset="1"
|
||||
style="stop-color:#000000;stop-opacity:0;" />
|
||||
</linearGradient>
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3030"
|
||||
id="radialGradient4275"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.4999996,0,0,2.4999996,174,-145.99998)"
|
||||
cx="-44"
|
||||
cy="84"
|
||||
fx="-60"
|
||||
fy="100"
|
||||
r="24" />
|
||||
<filter
|
||||
id="filter3387"
|
||||
height="1.249912"
|
||||
y="-0.12495601"
|
||||
width="1.2041403"
|
||||
x="-0.10207015"
|
||||
inkscape:collect="always">
|
||||
<feGaussianBlur
|
||||
id="feGaussianBlur3389"
|
||||
stdDeviation="0.44655691"
|
||||
inkscape:collect="always" />
|
||||
</filter>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3260"
|
||||
id="linearGradient4289"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
spreadMethod="reflect"
|
||||
x1="73.742638"
|
||||
y1="15.336544"
|
||||
x2="80"
|
||||
y2="19.281664" />
|
||||
<radialGradient
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.9792,0,0,0.9725,133.0002,20.8762)"
|
||||
r="55.148"
|
||||
cy="-0.2148"
|
||||
cx="48"
|
||||
id="XMLID_4_">
|
||||
<stop
|
||||
id="stop3082"
|
||||
style="stop-color:#ff0101;stop-opacity:1;"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop3090"
|
||||
style="stop-color:#800000;stop-opacity:1;"
|
||||
offset="1" />
|
||||
</radialGradient>
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#XMLID_4_"
|
||||
id="radialGradient4271"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.4999996,0,0,2.4999996,174,-145.99998)"
|
||||
cx="-44"
|
||||
cy="84"
|
||||
fx="-40"
|
||||
fy="96"
|
||||
r="20" />
|
||||
<linearGradient
|
||||
id="linearGradient3202">
|
||||
<stop
|
||||
style="stop-color:#ff8787;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3204" />
|
||||
<stop
|
||||
style="stop-color:#ff8787;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop3206" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3202"
|
||||
id="linearGradient4268"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.4999996,0,0,2.4999996,17.28126,-145.99998)"
|
||||
x1="11.68106"
|
||||
y1="60.539303"
|
||||
x2="11.68106"
|
||||
y2="108.0104" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3260"
|
||||
id="linearGradient4265"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.4999996,0,0,2.4999996,17.25592,-145.99998)"
|
||||
x1="6.6976352"
|
||||
y1="52"
|
||||
x2="11.68106"
|
||||
y2="96.001434" />
|
||||
<linearGradient
|
||||
id="linearGradient3260"
|
||||
inkscape:collect="always">
|
||||
<stop
|
||||
id="stop3262"
|
||||
offset="0"
|
||||
style="stop-color:#ffffff;stop-opacity:1;" />
|
||||
<stop
|
||||
id="stop3264"
|
||||
offset="1"
|
||||
style="stop-color:#ffffff;stop-opacity:0;" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3260"
|
||||
id="linearGradient4262"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.4999996,0,0,2.4999996,17.25592,-145.99998)"
|
||||
x1="26.697636"
|
||||
y1="96"
|
||||
x2="14.697635"
|
||||
y2="72" />
|
||||
<filter
|
||||
id="filter3191"
|
||||
inkscape:collect="always">
|
||||
<feGaussianBlur
|
||||
id="feGaussianBlur3193"
|
||||
stdDeviation="0.2025"
|
||||
inkscape:collect="always" />
|
||||
</filter>
|
||||
<linearGradient
|
||||
id="linearGradient3225">
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3227" />
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop3229" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3225"
|
||||
id="linearGradient4259"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="97.622581"
|
||||
y1="77.512512"
|
||||
x2="98.097946"
|
||||
y2="105.10625"
|
||||
gradientTransform="translate(-36.000006,-20.000008)" />
|
||||
</defs>
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="4.5078057"
|
||||
inkscape:cx="64"
|
||||
inkscape:cy="64"
|
||||
inkscape:document-units="px"
|
||||
inkscape:current-layer="layer1"
|
||||
height="128px"
|
||||
width="128px"
|
||||
gridtolerance="10000"
|
||||
inkscape:window-width="976"
|
||||
inkscape:window-height="904"
|
||||
inkscape:window-x="260"
|
||||
inkscape:window-y="43"
|
||||
showgrid="false">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid2944"
|
||||
spacingx="4px"
|
||||
spacingy="4px"
|
||||
empspacing="2" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata2941">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Livello 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1">
|
||||
<path
|
||||
style="fill:url(#linearGradient4284);fill-opacity:1;stroke:none;stroke-width:8;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
|
||||
d="M 19.156259,43.999993 C 10.862452,43.999993 4.0000112,50.862435 4.0000112,59.156236 L 4.0000112,68.843735 C 4.0000112,77.137559 10.862452,83.999983 19.156259,83.999983 L 108.84376,83.999983 C 117.13756,83.999983 124,77.137559 124,68.843735 L 124,59.156236 C 124,50.862435 117.13756,43.999993 108.84376,43.999993 L 19.156259,43.999993 z "
|
||||
id="path3012"
|
||||
sodipodi:nodetypes="ccccccccc"
|
||||
clip-path="none" />
|
||||
<path
|
||||
sodipodi:nodetypes="cccccc"
|
||||
transform="matrix(1.2499999,0,0,1.2499999,24.889073,28.928032)"
|
||||
id="path3221"
|
||||
d="M 69.875971,12.057888 C 68.798883,12.123171 67.34775,12.277052 66.875971,12.995388 L 68.465655,24.133449 L 79,23.37409 L 79,22.90534 C 80.740958,20.33518 74.219552,11.998548 69.875971,12.057888 z"
|
||||
style="opacity:0.55056176;fill:url(#linearGradient4291);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;filter:url(#filter3387)"
|
||||
clip-path="none" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccccc"
|
||||
id="path3028"
|
||||
d="M 19.156259,43.999993 C 10.862452,43.999993 4.000007,47.134768 4.000007,55.428569 C 4.000008,53.062884 4.00001,63.906397 4.000011,68.843735 C 4.000011,77.137559 10.862452,83.999983 19.156259,83.999983 L 108.84376,83.999983 C 117.13756,83.999983 124,77.137559 124,68.843735 L 124,59.156236 C 124,50.862435 117.13756,43.999993 108.84376,43.999993 L 19.156259,43.999993 z "
|
||||
style="opacity:0.58052434;fill:url(#radialGradient4275);fill-opacity:1;stroke:none;stroke-width:8;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
|
||||
clip-path="none" />
|
||||
<path
|
||||
style="opacity:0.55056176;fill:url(#linearGradient4289);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;filter:url(#filter3387)"
|
||||
d="M 69.875971,12.057888 C 68.798883,12.123171 67.34775,12.277052 66.875971,12.995388 L 68.465655,24.133449 L 79,23.37409 L 79,22.90534 C 80.740958,20.33518 74.219552,11.998548 69.875971,12.057888 z"
|
||||
id="path3217"
|
||||
transform="matrix(-1.2499999,0,0,1.2499999,103.11092,28.928032)"
|
||||
sodipodi:nodetypes="cccccc"
|
||||
clip-path="none" />
|
||||
<path
|
||||
style="fill:url(#radialGradient4271);fill-opacity:1;stroke:none;stroke-width:2;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
|
||||
d="M 19.156259,53.999993 C 16.321336,53.999993 14.00001,56.321312 14.00001,59.156236 L 14.00001,68.843735 C 14.00001,71.678658 16.321336,73.999984 19.156259,73.999984 L 108.84376,73.999984 C 111.67868,73.999984 114.00001,71.678658 114.00001,68.843735 L 114.00001,59.156236 C 114.00001,56.321312 111.67868,53.999993 108.84376,53.999993 L 19.156259,53.999993 z "
|
||||
id="rect2407"
|
||||
clip-path="none"
|
||||
sodipodi:nodetypes="ccccccccc" />
|
||||
<path
|
||||
style="fill:url(#linearGradient4268);fill-opacity:1;stroke:none;stroke-width:2;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
|
||||
d="M 19.156259,53.999993 C 16.321336,53.999993 14.00001,56.321312 14.00001,59.156236 L 14.00001,68.843735 C 14.00001,69.118235 14.114379,69.361784 14.156259,69.624985 C 22.300473,70.798908 30.88461,71.499985 39.781255,71.499985 C 68.535185,71.499985 94.295855,64.92541 111.26562,54.624988 C 110.54253,54.237113 109.71726,53.999993 108.84376,53.999993 L 19.156259,53.999993 z "
|
||||
id="path3038"
|
||||
clip-path="none"
|
||||
sodipodi:nodetypes="ccccsccc" />
|
||||
<path
|
||||
style="fill:url(#linearGradient4265);fill-opacity:1;stroke:none;stroke-width:2;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
|
||||
d="M 19.156259,53.999993 C 16.321336,53.999993 14.00001,56.321312 14.00001,59.156236 L 14.00001,61.656236 C 14.00001,58.821311 16.321336,54.999988 19.156259,54.999988 L 19.156259,53.999993 z M 108.39942,55.279287 C 91.501435,64.347036 63.84326,71.265635 37.125005,71.265609 C 32.676683,71.265609 28.27615,71.040734 24.000009,70.718735 C 29.150991,71.186035 34.44854,71.499985 39.859381,71.499985 C 66.784535,71.499985 91.068755,65.69071 107.98439,56.499987 C 108.00428,56.489187 108.04263,56.510811 108.0625,56.499987 C 109.1689,55.896911 110.22406,55.257212 111.26562,54.624988 C 110.92486,54.442187 110.55239,54.263038 110.17188,54.156238 C 110.1469,54.149613 110.11889,54.162513 110.09376,54.156238 C 109.71282,54.055113 109.33032,54.006787 108.92189,53.999993 L 108.84376,53.999993 L 107.67188,53.999993 L 108.39942,55.279287 z "
|
||||
id="path3062"
|
||||
sodipodi:nodetypes="ccccccscssscsscccc"
|
||||
clip-path="none" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccscssscsscccc"
|
||||
id="path3087"
|
||||
d="M 19.156259,53.999993 C 16.321336,53.999993 14.00001,56.321312 14.00001,59.156236 L 14.00001,61.656236 C 14.00001,58.821311 16.321336,54.999988 19.156259,54.999988 L 19.156259,53.999993 z M 108.64356,55.279287 C 91.745585,64.347036 63.84326,71.265635 37.125005,71.265609 C 32.676683,71.265609 28.27615,71.040734 24.000009,70.718735 C 29.150991,71.186035 34.44854,71.499985 39.859381,71.499985 C 66.784535,71.499985 91.068755,65.69071 107.98439,56.499987 C 108.00428,56.489187 108.04263,56.510811 108.0625,56.499987 C 109.1689,55.896911 110.22406,55.257212 111.26562,54.624988 C 110.92486,54.442187 110.55239,54.263038 110.17188,54.156238 C 110.1469,54.149613 110.11889,54.162513 110.09376,54.156238 C 109.71282,54.055113 109.33032,54.006787 108.92189,53.999993 L 108.84376,53.999993 L 107.67188,53.999993 L 108.64356,55.279287 z "
|
||||
style="fill:url(#linearGradient4262);fill-opacity:1;stroke:none;stroke-width:2;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
|
||||
clip-path="none" />
|
||||
<path
|
||||
id="path3099"
|
||||
d="M 46.062496,59.999992 C 44.928527,59.999992 43.999996,60.92852 43.999996,62.06249 L 43.999996,65.93749 C 43.999996,67.07146 44.928527,67.99999 46.062496,67.99999 L 81.9375,67.99999 C 83.07147,67.99999 84,67.07146 84,65.93749 L 84,62.06249 C 84,60.92852 83.07147,59.999992 81.9375,59.999992 L 46.062496,59.999992 z "
|
||||
style="fill:none;fill-opacity:1;stroke:url(#linearGradient4259);stroke-width:0.5;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter3191)"
|
||||
clip-path="none"
|
||||
transform="matrix(2.4999996,0,0,2.4999996,-95.999962,-95.99996)"
|
||||
sodipodi:nodetypes="ccccccccc" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 14 KiB |
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 21 KiB |
BIN
src/calibre/gui2/images/publisher.png
Normal file
BIN
src/calibre/gui2/images/publisher.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 17 KiB |
1096
src/calibre/gui2/images/series.svg
Normal file
1096
src/calibre/gui2/images/series.svg
Normal file
File diff suppressed because it is too large
Load Diff
After Width: | Height: | Size: 50 KiB |
503
src/calibre/gui2/images/tags.svg
Normal file
503
src/calibre/gui2/images/tags.svg
Normal file
@ -0,0 +1,503 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://web.resource.org/cc/"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg1307"
|
||||
sodipodi:version="0.32"
|
||||
inkscape:version="0.43"
|
||||
version="1.0"
|
||||
sodipodi:docbase="/home/pinheiro/Documents/pics/new oxygen/svg"
|
||||
sodipodi:docname="rss_tag.svg">
|
||||
<defs
|
||||
id="defs1309">
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient12560">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop12562" />
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop12564" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient11615">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop11617" />
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop11619" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient11584">
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop11586" />
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop11588" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
y2="3.1118"
|
||||
x2="17.0464"
|
||||
y1="7.6073999"
|
||||
x1="17.0464"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="linearGradient2959">
|
||||
<stop
|
||||
id="stop2961"
|
||||
style="stop-color:#EEEEEE"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop2963"
|
||||
style="stop-color:#CECECE"
|
||||
offset="0.2909" />
|
||||
<stop
|
||||
id="stop2965"
|
||||
style="stop-color:#888888"
|
||||
offset="0.85" />
|
||||
<stop
|
||||
id="stop2967"
|
||||
style="stop-color:#555555"
|
||||
offset="1" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient7033">
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop7035" />
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop7037" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient5259">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop5261" />
|
||||
<stop
|
||||
id="stop5267"
|
||||
offset="0.5"
|
||||
style="stop-color:#7f7f7f;stop-opacity:0.33935019;" />
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="1"
|
||||
id="stop5263" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient3291">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3293" />
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop3295" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient2316">
|
||||
<stop
|
||||
id="stop2318"
|
||||
offset="0"
|
||||
style="stop-color:#dd6a0e;stop-opacity:1;" />
|
||||
<stop
|
||||
id="stop2320"
|
||||
offset="1"
|
||||
style="stop-color:#ffb66d;stop-opacity:1;" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3075">
|
||||
<stop
|
||||
id="stop3077"
|
||||
offset="0"
|
||||
style="stop-color:#ffffff;stop-opacity:1;" />
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="0.42597079"
|
||||
id="stop3093" />
|
||||
<stop
|
||||
style="stop-color:#f1f1f1;stop-opacity:1;"
|
||||
offset="0.5892781"
|
||||
id="stop3085" />
|
||||
<stop
|
||||
id="stop3087"
|
||||
offset="0.80219781"
|
||||
style="stop-color:#eaeaea;stop-opacity:1;" />
|
||||
<stop
|
||||
id="stop3079"
|
||||
offset="1"
|
||||
style="stop-color:#dfdfdf;stop-opacity:1;" />
|
||||
</linearGradient>
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3291"
|
||||
id="radialGradient10686"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(1,0,0,0.197802,0,92.82166)"
|
||||
cx="63.912209"
|
||||
cy="115.70919"
|
||||
fx="42.094791"
|
||||
fy="115.7093"
|
||||
r="63.912209" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3075"
|
||||
id="radialGradient11644"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(1.844258,1.256048e-15,-4.710318e-16,1.606667,44.38044,-98.18508)"
|
||||
spreadMethod="reflect"
|
||||
cx="-52.250774"
|
||||
cy="128.00081"
|
||||
fx="-52.250774"
|
||||
fy="128.00081"
|
||||
r="36.937431" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient2316"
|
||||
id="linearGradient11646"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.835095,0,0,0.835095,6.816147,12.32049)"
|
||||
x1="32.39278"
|
||||
y1="79.018364"
|
||||
x2="83.208656"
|
||||
y2="79.018364" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5259"
|
||||
id="linearGradient11648"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(0.267368,-6.264141e-2)"
|
||||
x1="24.851341"
|
||||
y1="60.846405"
|
||||
x2="-35.981007"
|
||||
y2="112.08296" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5259"
|
||||
id="linearGradient11650"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(0.267368,-6.264141e-2)"
|
||||
x1="24.851341"
|
||||
y1="60.846405"
|
||||
x2="-35.981007"
|
||||
y2="112.08296" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5259"
|
||||
id="linearGradient11652"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(0.267368,-6.264141e-2)"
|
||||
x1="24.851341"
|
||||
y1="60.846405"
|
||||
x2="-35.981007"
|
||||
y2="112.08296" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient2316"
|
||||
id="linearGradient11654"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.835095,0,0,0.835095,7.613067,13.11741)"
|
||||
x1="32.39278"
|
||||
y1="79.018364"
|
||||
x2="83.208656"
|
||||
y2="79.018364" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient7033"
|
||||
id="linearGradient11656"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(0.144757,-0.233352)"
|
||||
x1="96.437851"
|
||||
y1="14.713447"
|
||||
x2="96.397697"
|
||||
y2="23.267729" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient2959"
|
||||
id="linearGradient11658"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.965789,0,0,0.944211,3.248297,-0.448682)"
|
||||
x1="111.30237"
|
||||
y1="-18.911451"
|
||||
x2="108.5625"
|
||||
y2="26.541067" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient11584"
|
||||
id="linearGradient11660"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="65.073738"
|
||||
y1="53.097416"
|
||||
x2="62.605522"
|
||||
y2="102.24165" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient2959"
|
||||
id="linearGradient11662"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(1,0,0,0.989491,0.144757,0.128566)"
|
||||
x1="112.14121"
|
||||
y1="0.22972308"
|
||||
x2="108.5625"
|
||||
y2="41.496986" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient11584"
|
||||
id="linearGradient11664"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="236.57014"
|
||||
y1="-50.274925"
|
||||
x2="2.61567"
|
||||
y2="111.73157" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient11584"
|
||||
id="linearGradient11666"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="95.915977"
|
||||
y1="-33.667568"
|
||||
x2="32.102207"
|
||||
y2="129.69464" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient11615"
|
||||
id="radialGradient11668"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.381766,0.111574,-0.139672,0.47791,70.02209,5.232857)"
|
||||
cx="99.498825"
|
||||
cy="33.076019"
|
||||
fx="92.406448"
|
||||
fy="33.504173"
|
||||
r="17.845808" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient12560"
|
||||
id="linearGradient12566"
|
||||
x1="96.686058"
|
||||
y1="28.999111"
|
||||
x2="109.04183"
|
||||
y2="41.42416"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient2316"
|
||||
id="linearGradient15192"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.835095,0,0,0.835095,5.637308,11.14165)"
|
||||
x1="32.39278"
|
||||
y1="79.018364"
|
||||
x2="83.208656"
|
||||
y2="79.018364" />
|
||||
</defs>
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="3.5652975"
|
||||
inkscape:cx="24.19134"
|
||||
inkscape:cy="66.08179"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
inkscape:document-units="px"
|
||||
inkscape:grid-bbox="true"
|
||||
guidetolerance="0.1px"
|
||||
showguides="true"
|
||||
inkscape:guide-bbox="true"
|
||||
inkscape:window-width="1106"
|
||||
inkscape:window-height="958"
|
||||
inkscape:window-x="722"
|
||||
inkscape:window-y="85">
|
||||
<sodipodi:guide
|
||||
orientation="horizontal"
|
||||
position="32.487481"
|
||||
id="guide2204" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata1312">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<cc:license
|
||||
rdf:resource="http://creativecommons.org/licenses/GPL/2.0/" />
|
||||
<dc:contributor>
|
||||
<cc:Agent>
|
||||
<dc:title>Oxygen team</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:contributor>
|
||||
</cc:Work>
|
||||
<cc:License
|
||||
rdf:about="http://creativecommons.org/licenses/LGPL/2.1/">
|
||||
<cc:permits
|
||||
rdf:resource="http://web.resource.org/cc/Reproduction" />
|
||||
<cc:permits
|
||||
rdf:resource="http://web.resource.org/cc/Distribution" />
|
||||
<cc:requires
|
||||
rdf:resource="http://web.resource.org/cc/Notice" />
|
||||
<cc:permits
|
||||
rdf:resource="http://web.resource.org/cc/DerivativeWorks" />
|
||||
<cc:requires
|
||||
rdf:resource="http://web.resource.org/cc/ShareAlike" />
|
||||
<cc:requires
|
||||
rdf:resource="http://web.resource.org/cc/SourceCode" />
|
||||
</cc:License>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
id="layer1"
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer">
|
||||
<path
|
||||
sodipodi:nodetypes="ccc"
|
||||
id="path2276"
|
||||
d="M 50.892799,3.2812959 L 50.892799,0.48658747 L 50.892799,3.2812959 z "
|
||||
style="fill:#ffffff;fill-opacity:0.75688076;fill-rule:nonzero;stroke:none;stroke-width:0.5;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" />
|
||||
<path
|
||||
sodipodi:type="arc"
|
||||
style="opacity:0.38139535;fill:url(#radialGradient10686);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
|
||||
id="path3289"
|
||||
sodipodi:cx="63.912209"
|
||||
sodipodi:cy="115.70919"
|
||||
sodipodi:rx="63.912209"
|
||||
sodipodi:ry="12.641975"
|
||||
d="M 127.82442 115.70919 A 63.912209 12.641975 0 1 1 0,115.70919 A 63.912209 12.641975 0 1 1 127.82442 115.70919 z"
|
||||
transform="matrix(1,0,0,0.416667,0.144757,74.63816)" />
|
||||
<g
|
||||
id="g11627"
|
||||
transform="matrix(0.99373,0,0,0.99373,9.698994e-4,0.76812)">
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccccccccssss"
|
||||
id="rect1410"
|
||||
d="M 65.957252,16.860398 C 62.907696,16.584597 59.770313,17.609836 57.426002,19.954148 L 3.301002,74.047898 C -0.89408299,78.242983 -0.89408309,85.009061 3.301002,89.204148 L 33.457252,119.3604 C 37.652337,123.55549 44.418418,123.55548 48.613502,119.3604 L 102.70726,65.235398 C 105.19809,62.744566 105.83999,59.365361 105.83226,56.141648 L 105.76976,27.454148 C 105.76976,21.521399 100.98376,16.735398 95.051007,16.735398 C 95.051007,16.735398 66.443365,16.827319 65.957252,16.860398 z M 94.801007,21.985398 C 98.463627,21.823968 100.95726,24.470845 100.95726,27.547898 C 100.95726,30.624951 98.471807,33.110398 95.394757,33.110398 C 92.317697,33.110399 89.801007,30.624951 89.801007,27.547898 C 89.801007,24.663161 91.983037,22.1096 94.801007,21.985398 z "
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:5.5999999;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:0.04313725" />
|
||||
<rect
|
||||
transform="matrix(0.707107,-0.707107,0.707107,0.707107,0,0)"
|
||||
rx="7.8982348"
|
||||
ry="7.8982348"
|
||||
y="64.655273"
|
||||
x="-51.673248"
|
||||
height="44.167801"
|
||||
width="73.874908"
|
||||
id="rect3166"
|
||||
style="opacity:1;fill:url(#radialGradient11644);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:5.5999999;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:0.04313725" />
|
||||
<path
|
||||
id="path3173"
|
||||
d="M 75.885718,57.507812 L 34.284739,99.108794"
|
||||
style="opacity:0.35655739;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:url(#linearGradient11646);stroke-width:6.68100023;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<rect
|
||||
style="opacity:0.22950822;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:url(#linearGradient11648);stroke-width:3.00000095;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
|
||||
id="rect5257"
|
||||
width="73.874908"
|
||||
height="44.167801"
|
||||
x="-51.673248"
|
||||
y="64.655273"
|
||||
ry="7.8982348"
|
||||
rx="7.8982348"
|
||||
transform="matrix(0.707107,-0.707107,0.707107,0.707107,0,0)" />
|
||||
<rect
|
||||
transform="matrix(0.707107,-0.707107,0.707107,0.707107,0,0)"
|
||||
rx="7.8982348"
|
||||
ry="7.8982348"
|
||||
y="64.655273"
|
||||
x="-51.673248"
|
||||
height="44.167801"
|
||||
width="73.874908"
|
||||
id="rect5269"
|
||||
style="opacity:0.22950822;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:url(#linearGradient11650);stroke-width:4.70000124;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" />
|
||||
<rect
|
||||
style="opacity:0.22950822;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:url(#linearGradient11652);stroke-width:1.50000048;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
|
||||
id="rect5271"
|
||||
width="73.874908"
|
||||
height="44.167801"
|
||||
x="-51.673248"
|
||||
y="64.655273"
|
||||
ry="7.8982348"
|
||||
rx="7.8982348"
|
||||
transform="matrix(0.707107,-0.707107,0.707107,0.707107,0,0)" />
|
||||
<path
|
||||
style="opacity:0.35655739;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:url(#linearGradient11654);stroke-width:4.28100014;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="M 76.682637,58.304731 L 35.081658,99.905718"
|
||||
id="path6146" />
|
||||
<path
|
||||
sodipodi:nodetypes="cccccccc"
|
||||
id="path7023"
|
||||
d="M 95.974287,17.823297 C 85.481516,17.823297 75.240344,17.739431 64.747574,17.739431 C 62.985501,17.739431 60.491741,18.681986 58.875807,20.29792 C 57.215559,22.024579 55.471445,23.415775 53.811196,25.142433 C 56.225419,22.728211 61.024042,19.417529 64.90428,19.417529 C 72.325291,19.417529 92.255327,19.319227 95.974287,19.385637 C 99.686057,19.385637 103.02976,21.506741 104.92901,24.79634 C 104.24024,22.852996 100.25567,17.823297 95.974287,17.823297 z "
|
||||
style="fill:url(#linearGradient11656);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
style="opacity:1;fill:url(#linearGradient11658);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.28100014;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
|
||||
d="M 100.82818,26.326961 C 98.938027,24.599124 97.412757,21.656447 97.412757,18.406034 C 97.412757,14.872675 99.380507,11.582117 101.55558,8.9316543 C 103.73064,6.2811911 106.84326,4.8075247 109.13903,4.2829843 C 111.4348,3.758444 114.9341,4.1607878 117.10916,5.8173273 C 119.28424,7.4738667 120.58279,10.076472 120.58279,13.60983 C 120.58279,17.143189 118.5547,20.594653 116.37963,23.245116 C 114.20456,25.895577 110.39259,27.695387 108.09681,28.219928 C 107.19159,28.426755 106.70156,28.478743 105.74762,28.384108 L 105.74762,32.025222 C 106.6546,31.947861 107.12437,31.847174 108.09681,31.62499 C 113.50868,30.388482 117.9234,27.25568 120.80298,23.746728 C 123.68255,20.237776 125.33012,16.130459 125.33012,12.032608 C 125.33012,7.9347583 123.68255,4.5770463 120.80298,2.3839513 C 118.5758,0.68772922 115.44044,-0.39061945 111.62798,-0.21262935 C 110.5112,-0.16049075 109.32294,0.008836555 108.09681,0.28898284 C 102.68496,1.5254904 98.240037,4.6877993 95.360457,8.196751 C 92.480887,11.705703 90.863507,15.783513 90.863507,19.881364 C 90.863497,23.979214 92.480887,27.36643 95.360457,29.559527 C 95.900377,29.970733 97.848257,30.831489 97.141137,30.651272 C 97.134377,30.649551 99.104327,31.902937 98.996487,31.335711 C 100.80737,30.597386 101.2053,27.858014 100.82818,26.326961 z "
|
||||
id="path9411"
|
||||
sodipodi:nodetypes="csssssssccsssscssssscc" />
|
||||
<path
|
||||
id="path10706"
|
||||
d="M 90.5625,16.75 C 82.493814,16.777403 66.333335,16.850191 65.96875,16.875 C 62.919193,16.5992 59.781811,17.624438 57.4375,19.96875 L 7.84375,69.53125 C 19.373301,71.513576 35.736073,74.116497 58.96875,77.5 C 67.242732,78.704986 77.262405,79.536346 87.875,80.09375 L 102.71875,65.25 C 105.20958,62.759169 105.85148,59.379963 105.84375,56.15625 L 105.78125,27.46875 C 105.78125,21.536002 100.99525,16.75 95.0625,16.75 C 95.062499,16.75 93.252062,16.740866 90.5625,16.75 z M 94.875,21.96875 C 98.4908,21.851975 100.96875,24.509486 100.96875,27.5625 C 100.96875,30.639554 98.4833,33.125 95.40625,33.125 C 92.329187,33.125002 89.8125,30.639553 89.8125,27.5625 C 89.812497,24.677764 91.99453,22.124202 94.8125,22 C 94.841114,21.998739 94.846529,21.969669 94.875,21.96875 z "
|
||||
style="opacity:0.11065572;fill:url(#linearGradient11660);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:5.5999999;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:0.04313725" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccsssssssccsssscsssss"
|
||||
id="path7046"
|
||||
d="M 97.375359,32.751517 C 99.475509,31.977786 100.76571,30.212919 100.93539,28.016764 C 98.978277,26.206069 97.644757,23.293743 97.644757,19.887456 C 97.644747,16.184655 99.682197,12.7363 101.93432,9.9587343 C 104.18643,7.1811689 107.4093,5.6368332 109.78639,5.0871385 C 112.16349,4.537444 115.78674,4.9590821 118.03885,6.6950607 C 120.29097,8.431039 121.63552,11.158452 121.63552,14.861251 C 121.63552,18.564052 119.53559,22.18103 117.28347,24.958595 C 115.03136,27.73616 111.08436,29.622279 108.70726,30.171974 C 107.76997,30.38872 106.74637,30.487599 105.75864,30.388425 L 105.75864,34.204148 C 106.69775,34.123076 107.70037,33.973164 108.70726,33.740324 C 114.31083,32.44452 118.88193,29.161485 121.86351,25.484262 C 124.84508,21.80704 126.55101,17.502757 126.55101,13.208394 C 126.55101,8.9140326 124.84508,5.3953022 121.86351,3.0970377 C 119.55744,1.3194736 116.31101,0.18941291 112.36351,0.37593852 C 111.20717,0.43057734 109.97682,0.60802477 108.70726,0.90160537 C 103.1037,2.1974095 98.501327,5.5113663 95.519757,9.1885892 C 92.538177,12.865812 90.863507,17.139173 90.863507,21.433535 C 90.863497,25.727897 92.538177,29.277549 95.519757,31.575814 C 96.078797,32.006739 96.705929,32.408418 97.375359,32.751517 z "
|
||||
style="opacity:1;fill:url(#linearGradient11662);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.28100014;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" />
|
||||
<path
|
||||
id="path11592"
|
||||
d="M 87.21875,16.75 C 78.71516,16.782629 66.287762,16.853292 65.96875,16.875 C 62.919193,16.5992 59.781811,17.624438 57.4375,19.96875 L 7.84375,69.53125 C 14.356712,70.651052 22.410305,71.98654 32.375,73.53125 C 43.387521,56.22581 59.726155,36.241802 71.1875,26.65625 C 75.832301,22.771629 83.170429,19.475338 91.53125,16.75 C 91.224346,16.750624 90.898695,16.748858 90.5625,16.75 C 89.553914,16.753425 88.433549,16.745339 87.21875,16.75 z M 105.59375,25.4375 C 104.03905,26.123892 102.48669,26.807023 100.96875,27.5 C 100.96905,27.523802 100.96875,27.538648 100.96875,27.5625 C 100.96875,30.639554 98.4833,33.125 95.40625,33.125 C 94.072055,33.125001 92.839017,32.655165 91.875,31.875 C 85.192675,35.231446 79.793639,38.393861 77.28125,40.90625 C 69.040453,49.147048 61.755175,64.657329 56.59375,77.15625 C 57.378226,77.271014 58.168037,77.383388 58.96875,77.5 C 67.242732,78.704986 77.262405,79.536346 87.875,80.09375 L 102.71875,65.25 C 105.20958,62.759169 105.85148,59.379963 105.84375,56.15625 L 105.78125,27.46875 C 105.78125,26.773506 105.71934,26.095391 105.59375,25.4375 z "
|
||||
style="opacity:0.20081967;fill:url(#linearGradient11664);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:5.5999999;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:0.04313725" />
|
||||
<path
|
||||
style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.28100014;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
|
||||
d="M 100.93539,27.951048 C 98.978277,26.121121 97.644757,23.177863 97.644757,19.735398 C 97.644747,15.99327 99.142637,12.292464 101.39476,9.4853983 C 103.64687,6.6783321 106.33017,5.0096813 108.70726,4.4541483 C 111.08436,3.8986155 113.7364,4.3247318 115.98851,6.0791483 C 118.24063,7.8335645 119.80101,10.805771 119.80101,14.547898 C 119.80101,18.290026 118.24063,22.053332 115.98851,24.860398 C 113.7364,27.667463 111.08436,29.273615 108.70726,29.829148 C 107.76997,30.048196 106.78874,30.148125 105.80101,30.047898 L 105.80101,34.204148 C 106.74012,34.122215 107.70037,33.970711 108.70726,33.735398 C 114.31083,32.425831 118.88193,29.107927 121.86351,25.391648 C 124.84508,21.67537 126.55101,17.325371 126.55101,12.985398 C 126.55101,8.6454257 124.84508,5.0893227 121.86351,2.7666483 C 119.55744,0.97020468 116.31101,-0.17185845 112.36351,0.016648255 C 111.20717,0.071867415 109.97682,0.25119951 108.70726,0.54789827 C 103.1037,1.8574651 98.501327,5.2066196 95.519757,8.9228983 C 92.538177,12.639177 90.863507,16.957925 90.863507,21.297898 C 90.863497,25.637871 92.538177,29.225223 95.519757,31.547898 C 96.078797,31.9834 96.694077,32.357405 97.363507,32.704148 C 97.369937,32.707482 97.388307,32.700823 97.394757,32.704148 C 99.463657,31.922199 100.76571,30.170528 100.93539,27.951048 z "
|
||||
id="path8516"
|
||||
sodipodi:nodetypes="csssssssccsssscssssscc" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccccscscccsscsccccc"
|
||||
style="opacity:0.14344261;fill:url(#linearGradient11666);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:5.5999999;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:0.04313725"
|
||||
d="M 87.21875,16.75 C 78.71516,16.782629 66.287762,16.853292 65.96875,16.875 C 62.919193,16.5992 59.781811,17.624438 57.4375,19.96875 L 7.84375,69.53125 C 14.356712,70.651052 59.726155,36.241802 71.1875,26.65625 C 75.832301,22.771629 83.170429,19.475338 91.53125,16.75 C 91.224346,16.750624 90.898695,16.748858 90.5625,16.75 C 89.553914,16.753425 88.433549,16.745339 87.21875,16.75 z M 105.59375,25.4375 C 104.03905,26.123892 102.48669,26.807023 100.96875,27.5 C 100.96905,27.523802 100.96875,27.538648 100.96875,27.5625 C 100.96875,30.639554 98.4833,33.125 95.40625,33.125 C 94.072055,33.125001 92.839017,32.655165 91.875,31.875 C 85.192675,35.231446 79.793639,38.393861 77.28125,40.90625 C 69.040453,49.147048 77.262405,79.536346 87.875,80.09375 L 102.71875,65.25 C 105.20958,62.759169 105.85148,59.379963 105.84375,56.15625 L 105.78125,27.46875 C 105.78125,26.773506 105.71934,26.095391 105.59375,25.4375 z "
|
||||
id="path11602" />
|
||||
<path
|
||||
style="opacity:0.60655738;fill:url(#radialGradient11668);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:4.28100014;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
|
||||
d="M 90.96875 26.46875 C 91.712717 28.766017 93.069493 30.685101 94.9375 32.125 C 95.493036 32.553222 96.084767 32.971552 96.75 33.3125 C 99.406083 32.361987 100.76114 29.807363 100.21875 26.90625 C 100.06618 26.766783 99.897677 26.623256 99.75 26.46875 L 90.96875 26.46875 z M 112.6875 26.46875 C 110.84997 27.679808 108.83212 28.486717 107.40625 28.8125 C 106.50671 29.018029 106.04171 29.062792 105.09375 28.96875 L 105.09375 30.9375 L 105.09375 32.59375 L 105.09375 34.78125 C 106.02697 34.700686 107.03067 34.54388 108.03125 34.3125 C 113.39667 33.07177 117.81612 29.961838 120.78125 26.46875 L 112.6875 26.46875 z "
|
||||
transform="matrix(1.00631,0,0,1.00631,-9.76019e-4,-0.772966)"
|
||||
id="path11610" />
|
||||
<path
|
||||
id="path14317"
|
||||
d="M 74.706878,56.328972 L 33.105899,97.929959"
|
||||
style="opacity:0.35655739;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:url(#linearGradient15192);stroke-width:10.01680565;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
<path
|
||||
style="opacity:0.53688528;fill:#c5c5c5;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
|
||||
d="M 89.656712,30.354292 C 90.147952,32.680027 92.244725,34.411058 94.787893,34.411059 C 97.331055,34.411059 99.4673,32.680028 99.958536,30.354292 C 99.103765,32.286307 97.092249,33.645631 94.787893,33.645631 C 92.483533,33.645631 90.511491,32.286307 89.656712,30.354292 z "
|
||||
id="path11672" />
|
||||
<path
|
||||
style="fill:url(#linearGradient12566);fill-opacity:1.0;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;opacity:0.30327869"
|
||||
d="M 100.18163,26.901091 C 101.33249,30.951757 103.83808,29.993287 105.1151,33.72654 L 105.1151,36.081713 C 100.54929,34.644208 98.892965,34.944024 96.686058,33.322037 C 98.825095,32.488947 100.73865,30.734861 100.18163,26.901091 z "
|
||||
id="path12550"
|
||||
sodipodi:nodetypes="ccccc" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 27 KiB |
@ -201,9 +201,13 @@ class BooksModel(QAbstractTableModel):
|
||||
LibraryDatabase.sizeof_old_database(path) > 0
|
||||
|
||||
def columnCount(self, parent):
|
||||
if parent and parent.isValid():
|
||||
return 0
|
||||
return len(self.cols)
|
||||
|
||||
def rowCount(self, parent):
|
||||
if parent and parent.isValid():
|
||||
return 0
|
||||
return self.db.rows() if self.db else 0
|
||||
|
||||
def count(self):
|
||||
@ -676,9 +680,13 @@ class DeviceBooksModel(BooksModel):
|
||||
self.reset()
|
||||
|
||||
def columnCount(self, parent):
|
||||
if parent and parent.isValid():
|
||||
return 0
|
||||
return 5
|
||||
|
||||
def rowCount(self, parent):
|
||||
if parent and parent.isValid():
|
||||
return 0
|
||||
return len(self.map)
|
||||
|
||||
def set_database(self, db):
|
||||
@ -855,6 +863,13 @@ class SearchBox(QLineEdit):
|
||||
self.prev_search = text
|
||||
self.emit(SIGNAL('search(PyQt_PyObject, PyQt_PyObject)'), text, refinement)
|
||||
|
||||
def search_from_tokens(self, tokens, all):
|
||||
ans = u' '.join([u'%s:%s'%x for x in tokens])
|
||||
if not all:
|
||||
ans = '[' + ans + ']'
|
||||
self.set_search_string(ans)
|
||||
|
||||
|
||||
def set_search_string(self, txt):
|
||||
self.normalize_state()
|
||||
self.setText(txt)
|
||||
|
@ -245,6 +245,13 @@ in which you want to store your books files. Any existing books will be automati
|
||||
self.cover_cache = CoverCache(self.library_path)
|
||||
self.cover_cache.start()
|
||||
self.library_view.model().cover_cache = self.cover_cache
|
||||
self.tags_view.setVisible(False)
|
||||
self.match_all.setVisible(False)
|
||||
self.match_any.setVisible(False)
|
||||
self.tags_view.set_database(db, self.match_all)
|
||||
self.connect(self.tags_view, SIGNAL('tags_marked(PyQt_PyObject, PyQt_PyObject)'),
|
||||
self.search.search_from_tokens)
|
||||
self.connect(self.status_bar.tag_view_button, SIGNAL('toggled(bool)'), self.toggle_tags_view)
|
||||
########################### Cover Flow ################################
|
||||
self.cover_flow = None
|
||||
if CoverFlow is not None:
|
||||
@ -284,6 +291,16 @@ in which you want to store your books files. Any existing books will be automati
|
||||
self.status_bar.book_info.book_data.setMaximumHeight(1000)
|
||||
self.setMaximumHeight(available_height())
|
||||
|
||||
def toggle_tags_view(self, show):
|
||||
if show:
|
||||
self.tags_view.setVisible(True)
|
||||
self.match_all.setVisible(True)
|
||||
self.match_any.setVisible(True)
|
||||
self.tags_view.setFocus(Qt.OtherFocusReason)
|
||||
else:
|
||||
self.tags_view.setVisible(False)
|
||||
self.match_all.setVisible(False)
|
||||
self.match_any.setVisible(False)
|
||||
|
||||
def sync_cf_to_listview(self, index, *args):
|
||||
if not hasattr(index, 'row') and self.library_view.currentIndex().row() != index:
|
||||
@ -787,7 +804,8 @@ in which you want to store your books files. Any existing books will be automati
|
||||
self.status_bar.showMessage(_('News fetched. Uploading to device.'), 2000)
|
||||
self.persistent_files.append(pt)
|
||||
try:
|
||||
os.remove(pt.name)
|
||||
if not to_device:
|
||||
os.remove(pt.name)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
@ -24,14 +24,6 @@
|
||||
<normaloff>:/library</normaloff>:/library</iconset>
|
||||
</property>
|
||||
<widget class="QWidget" name="centralwidget" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>79</y>
|
||||
<width>865</width>
|
||||
<height>716</height>
|
||||
</rect>
|
||||
</property>
|
||||
<layout class="QGridLayout" >
|
||||
<item row="0" column="0" >
|
||||
<layout class="QHBoxLayout" >
|
||||
@ -242,60 +234,88 @@
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="currentIndex" >
|
||||
<number>2</number>
|
||||
<number>0</number>
|
||||
</property>
|
||||
<widget class="QWidget" name="library" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>100</width>
|
||||
<height>30</height>
|
||||
</rect>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" >
|
||||
<layout class="QVBoxLayout" name="verticalLayout_2" >
|
||||
<item>
|
||||
<widget class="BooksView" name="library_view" >
|
||||
<property name="sizePolicy" >
|
||||
<sizepolicy vsizetype="Expanding" hsizetype="Expanding" >
|
||||
<horstretch>100</horstretch>
|
||||
<verstretch>10</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="acceptDrops" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="dragEnabled" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="dragDropOverwriteMode" >
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="dragDropMode" >
|
||||
<enum>QAbstractItemView::DragDrop</enum>
|
||||
</property>
|
||||
<property name="alternatingRowColors" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="selectionBehavior" >
|
||||
<enum>QAbstractItemView::SelectRows</enum>
|
||||
</property>
|
||||
<property name="showGrid" >
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout" >
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="verticalLayout" >
|
||||
<item>
|
||||
<widget class="QRadioButton" name="match_any" >
|
||||
<property name="text" >
|
||||
<string>Match any</string>
|
||||
</property>
|
||||
<property name="checked" >
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QRadioButton" name="match_all" >
|
||||
<property name="text" >
|
||||
<string>Match all</string>
|
||||
</property>
|
||||
<property name="checked" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="TagsView" name="tags_view" >
|
||||
<property name="tabKeyNavigation" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="alternatingRowColors" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="animated" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="headerHidden" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="BooksView" name="library_view" >
|
||||
<property name="sizePolicy" >
|
||||
<sizepolicy vsizetype="Expanding" hsizetype="Expanding" >
|
||||
<horstretch>100</horstretch>
|
||||
<verstretch>10</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="acceptDrops" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="dragEnabled" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="dragDropOverwriteMode" >
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="dragDropMode" >
|
||||
<enum>QAbstractItemView::DragDrop</enum>
|
||||
</property>
|
||||
<property name="alternatingRowColors" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="selectionBehavior" >
|
||||
<enum>QAbstractItemView::SelectRows</enum>
|
||||
</property>
|
||||
<property name="showGrid" >
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="main_memory" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>100</width>
|
||||
<height>30</height>
|
||||
</rect>
|
||||
</property>
|
||||
<layout class="QGridLayout" >
|
||||
<item row="0" column="0" >
|
||||
<widget class="DeviceBooksView" name="memory_view" >
|
||||
@ -331,14 +351,6 @@
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="page" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>857</width>
|
||||
<height>552</height>
|
||||
</rect>
|
||||
</property>
|
||||
<layout class="QGridLayout" >
|
||||
<item row="0" column="0" >
|
||||
<widget class="DeviceBooksView" name="card_view" >
|
||||
@ -378,14 +390,6 @@
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QToolBar" name="tool_bar" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>865</width>
|
||||
<height>79</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="minimumSize" >
|
||||
<size>
|
||||
<width>0</width>
|
||||
@ -425,14 +429,6 @@
|
||||
<addaction name="action_view" />
|
||||
</widget>
|
||||
<widget class="QStatusBar" name="statusBar" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>795</y>
|
||||
<width>865</width>
|
||||
<height>27</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="mouseTracking" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
@ -564,6 +560,11 @@
|
||||
<extends>QTableView</extends>
|
||||
<header>library.h</header>
|
||||
</customwidget>
|
||||
<customwidget>
|
||||
<class>TagsView</class>
|
||||
<extends>QTreeView</extends>
|
||||
<header>tags.h</header>
|
||||
</customwidget>
|
||||
</customwidgets>
|
||||
<resources>
|
||||
<include location="images.qrc" />
|
||||
|
@ -140,13 +140,28 @@ class CoverFlowButton(QToolButton):
|
||||
def disable(self, reason):
|
||||
self.setDisabled(True)
|
||||
self.setToolTip(_('<p>Browsing books by their covers is disabled.<br>Import of pictureflow module failed:<br>')+reason)
|
||||
|
||||
class TagViewButton(QToolButton):
|
||||
|
||||
def __init__(self, parent=None):
|
||||
QToolButton.__init__(self, parent)
|
||||
self.setIconSize(QSize(80, 80))
|
||||
self.setIcon(QIcon(':/images/tags.svg'))
|
||||
self.setToolTip(_('Click to browse books by tags'))
|
||||
self.setSizePolicy(QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding))
|
||||
self.setCheckable(True)
|
||||
self.setChecked(False)
|
||||
self.setAutoRaise(True)
|
||||
|
||||
|
||||
class StatusBar(QStatusBar):
|
||||
def __init__(self, jobs_dialog):
|
||||
QStatusBar.__init__(self)
|
||||
self.movie_button = MovieButton(QMovie(':/images/jobs-animated.mng'), jobs_dialog)
|
||||
self.cover_flow_button = CoverFlowButton()
|
||||
self.tag_view_button = TagViewButton()
|
||||
self.addPermanentWidget(self.cover_flow_button)
|
||||
self.addPermanentWidget(self.tag_view_button)
|
||||
self.addPermanentWidget(self.movie_button)
|
||||
self.book_info = BookInfoDisplay(self.clearMessage)
|
||||
self.connect(self.book_info, SIGNAL('show_book_info()'), self.show_book_info)
|
||||
|
143
src/calibre/gui2/tags.py
Normal file
143
src/calibre/gui2/tags.py
Normal file
@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Browsing book collection by tags.
|
||||
'''
|
||||
|
||||
from PyQt4.Qt import QAbstractItemModel, Qt, QVariant, QTreeView, QModelIndex, \
|
||||
QFont, SIGNAL, QSize, QColor, QIcon
|
||||
|
||||
NONE = QVariant()
|
||||
|
||||
class TagsView(QTreeView):
|
||||
|
||||
def __init__(self, *args):
|
||||
QTreeView.__init__(self, *args)
|
||||
self.setUniformRowHeights(True)
|
||||
self.setCursor(Qt.PointingHandCursor)
|
||||
self.setIconSize(QSize(30, 30))
|
||||
|
||||
def set_database(self, db, match_all):
|
||||
self._model = TagsModel(db)
|
||||
self.match_all = match_all
|
||||
self.setModel(self._model)
|
||||
self.connect(self, SIGNAL('clicked(QModelIndex)'), self.toggle)
|
||||
|
||||
def toggle(self, index):
|
||||
if self._model.toggle(index):
|
||||
self.emit(SIGNAL('tags_marked(PyQt_PyObject, PyQt_PyObject)'),
|
||||
self._model.tokens(), self.match_all.isChecked())
|
||||
|
||||
class Tag(unicode):
|
||||
|
||||
def __init__(self, name):
|
||||
unicode.__init__(self, name)
|
||||
self.state = 0
|
||||
|
||||
class TagsModel(QAbstractItemModel):
|
||||
|
||||
categories = [_('Authors'), _('Series'), _('Formats'), _('Publishers'), _('Tags')]
|
||||
row_map = {0: 'author', 1:'series', 2:'format', 3:'publisher', 4:'tag'}
|
||||
|
||||
def __init__(self, db):
|
||||
QAbstractItemModel.__init__(self)
|
||||
self.db = db
|
||||
self.refresh()
|
||||
self.bold_font = QFont()
|
||||
self.bold_font.setBold(True)
|
||||
self.bold_font = QVariant(self.bold_font)
|
||||
self.status_map = [QColor(200,200,200, 0), QIcon(':/images/plus.svg'), QIcon(':/images/minus.svg')]
|
||||
self.status_map = list(map(QVariant, self.status_map))
|
||||
self.cmap = [QIcon(':/images/user_profile.svg'), QIcon(':/images/series.svg'), QIcon(':/images/book.svg'), QIcon(':/images/publisher.png'), QIcon(':/images/tags.svg')]
|
||||
self.cmap = list(map(QVariant, self.cmap))
|
||||
|
||||
def refresh(self):
|
||||
self._data = self.db.get_categories()
|
||||
for key in self._data:
|
||||
self._data[key] = list(map(Tag, self._data[key]))
|
||||
self.reset()
|
||||
|
||||
def toggle(self, index):
|
||||
if index.parent().isValid():
|
||||
category = self.row_map[index.parent().row()]
|
||||
tag = self._data[category][index.row()]
|
||||
tag.state = (tag.state + 1)%3
|
||||
self.emit(SIGNAL('dataChanged(QModelIndex,QModelIndex)'), index, index)
|
||||
return True
|
||||
return False
|
||||
|
||||
def tokens(self):
|
||||
ans = []
|
||||
for key in self.row_map.values():
|
||||
for tag in self._data[key]:
|
||||
if tag.state > 0:
|
||||
if tag.state == 2:
|
||||
tag = '!'+tag
|
||||
ans.append((key, tag))
|
||||
return ans
|
||||
|
||||
def index(self, row, col, parent=QModelIndex()):
|
||||
if parent.isValid():
|
||||
if parent.parent().isValid(): # parent is a tag
|
||||
return QModelIndex()
|
||||
try:
|
||||
category = self.row_map[parent.row()]
|
||||
except KeyError:
|
||||
return QModelIndex()
|
||||
if col == 0 and row < len(self._data[category]):
|
||||
return self.createIndex(row, col, parent.row())
|
||||
return QModelIndex()
|
||||
if col == 0 and row < len(self.categories):
|
||||
return self.createIndex(row, col, -1)
|
||||
return QModelIndex()
|
||||
|
||||
def parent(self, index):
|
||||
if not index.isValid() or index.internalId() < 0:
|
||||
return QModelIndex()
|
||||
return self.createIndex(index.internalId(), 0, -1)
|
||||
|
||||
def rowCount(self, parent):
|
||||
if not parent or not parent.isValid():
|
||||
return len(self.categories)
|
||||
if not parent.parent().isValid():
|
||||
return len(self._data[self.row_map[parent.row()]])
|
||||
return 0
|
||||
|
||||
def columnCount(self, parent):
|
||||
return 1
|
||||
|
||||
def flags(self, index):
|
||||
if not index.isValid():
|
||||
return Qt.NoItemFlags
|
||||
return Qt.ItemIsEnabled
|
||||
|
||||
def category_data(self, index, role):
|
||||
if role == Qt.DisplayRole:
|
||||
row = index.row()
|
||||
return QVariant(self.categories[row])
|
||||
if role == Qt.FontRole:
|
||||
return self.bold_font
|
||||
if role == Qt.SizeHintRole:
|
||||
return QVariant(QSize(100, 40))
|
||||
if role == Qt.DecorationRole:
|
||||
return self.cmap[index.row()]
|
||||
return NONE
|
||||
|
||||
def tag_data(self, index, role):
|
||||
category = self.row_map[index.parent().row()]
|
||||
if role == Qt.DisplayRole:
|
||||
return QVariant(self._data[category][index.row()])
|
||||
if role == Qt.DecorationRole:
|
||||
return self.status_map[self._data[category][index.row()].state]
|
||||
return NONE
|
||||
|
||||
|
||||
def data(self, index, role):
|
||||
if not index.parent().isValid():
|
||||
return self.category_data(index, role)
|
||||
if not index.parent().parent().isValid():
|
||||
return self.tag_data(index, role)
|
||||
return NONE
|
@ -214,7 +214,7 @@ class ResultCache(object):
|
||||
for id in ids:
|
||||
self._data[id] = conn.execute('SELECT * from meta WHERE id=?', (id,)).fetchone()
|
||||
return map(self.row, ids)
|
||||
|
||||
|
||||
def refresh(self, db, field, ascending):
|
||||
field = field.lower()
|
||||
method = getattr(self, 'sort_on_' + self.METHOD_MAP[field])
|
||||
@ -396,6 +396,25 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
CREATE INDEX series_idx ON series (name COLLATE NOCASE);
|
||||
CREATE INDEX series_sort_idx ON books (series_index, id);
|
||||
'''))
|
||||
|
||||
def upgrade_version_2(self):
|
||||
''' Fix Foreign key constraints for deleting from link tables. '''
|
||||
script = textwrap.dedent('''\
|
||||
DROP TRIGGER fkc_delete_books_%(ltable)s_link;
|
||||
CREATE TRIGGER fkc_delete_on_%(table)s
|
||||
BEFORE DELETE ON %(table)s
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=OLD.id) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: %(table)s is still referenced')
|
||||
END;
|
||||
END;
|
||||
DELETE FROM %(table)s WHERE (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=%(table)s.id) < 1;
|
||||
''')
|
||||
self.conn.executescript(script%dict(ltable='authors', table='authors', ltable_col='author'))
|
||||
self.conn.executescript(script%dict(ltable='publishers', table='publishers', ltable_col='publisher'))
|
||||
self.conn.executescript(script%dict(ltable='tags', table='tags', ltable_col='tag'))
|
||||
self.conn.executescript(script%dict(ltable='series', table='series', ltable_col='series'))
|
||||
|
||||
def path(self, index, index_is_id=False):
|
||||
'Return the relative path to the directory containing this books files as a unicode string.'
|
||||
@ -596,6 +615,33 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
|
||||
self.conn.commit()
|
||||
|
||||
def clean(self):
|
||||
'''
|
||||
Remove orphaned entries.
|
||||
'''
|
||||
st = 'DELETE FROM %(table)s WHERE (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=%(table)s.id) < 1;'
|
||||
self.conn.execute(st%dict(ltable='authors', table='authors', ltable_col='author'))
|
||||
self.conn.execute(st%dict(ltable='publishers', table='publishers', ltable_col='publisher'))
|
||||
self.conn.execute(st%dict(ltable='tags', table='tags', ltable_col='tag'))
|
||||
self.conn.execute(st%dict(ltable='series', table='series', ltable_col='series'))
|
||||
self.conn.commit()
|
||||
|
||||
def get_categories(self):
|
||||
categories = {}
|
||||
def get(name, category, field='name'):
|
||||
ans = self.conn.execute('SELECT DISTINCT %s FROM %s'%(field, name)).fetchall()
|
||||
ans = [x[0].strip() for x in ans]
|
||||
try:
|
||||
ans.remove('')
|
||||
except ValueError: pass
|
||||
ans.sort()
|
||||
categories[category] = ans
|
||||
for x in (('authors', 'author'), ('tags', 'tag'), ('publishers', 'publisher'), ('series', 'series')):
|
||||
get(*x)
|
||||
get('data', 'format', 'format')
|
||||
return categories
|
||||
|
||||
|
||||
def set(self, row, column, val):
|
||||
'''
|
||||
Convenience method for setting the title, authors, publisher or rating
|
||||
@ -650,6 +696,7 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
`authors`: A list of authors.
|
||||
'''
|
||||
self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,))
|
||||
self.conn.execute('DELETE FROM authors WHERE (SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) < 1')
|
||||
for a in authors:
|
||||
if not a:
|
||||
continue
|
||||
@ -672,9 +719,47 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
return
|
||||
self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id))
|
||||
self.set_path(id, True)
|
||||
|
||||
|
||||
def set_publisher(self, id, publisher):
|
||||
self.conn.execute('DELETE FROM books_publishers_link WHERE book=?',(id,))
|
||||
self.conn.execute('DELETE FROM publishers WHERE (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) < 1')
|
||||
if publisher:
|
||||
pub = self.conn.execute('SELECT id from publishers WHERE name=?', (publisher,)).fetchone()
|
||||
if pub:
|
||||
aid = pub[0]
|
||||
else:
|
||||
aid = self.conn.execute('INSERT INTO publishers(name) VALUES (?)', (publisher,)).lastrowid
|
||||
self.conn.execute('INSERT INTO books_publishers_link(book, publisher) VALUES (?,?)', (id, aid))
|
||||
self.conn.commit()
|
||||
|
||||
def set_tags(self, id, tags, append=False):
|
||||
'''
|
||||
@param tags: list of strings
|
||||
@param append: If True existing tags are not removed
|
||||
'''
|
||||
if not append:
|
||||
self.conn.execute('DELETE FROM books_tags_link WHERE book=?', (id,))
|
||||
self.conn.execute('DELETE FROM tags WHERE (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) < 1')
|
||||
for tag in set(tags):
|
||||
tag = tag.lower().strip()
|
||||
if not tag:
|
||||
continue
|
||||
t = self.conn.execute('SELECT id FROM tags WHERE name=?', (tag,)).fetchone()
|
||||
if t:
|
||||
tid = t[0]
|
||||
else:
|
||||
tid = self.conn.execute('INSERT INTO tags(name) VALUES(?)', (tag,)).lastrowid
|
||||
|
||||
if not self.conn.execute('SELECT book FROM books_tags_link WHERE book=? AND tag=?',
|
||||
(id, tid)).fetchone():
|
||||
self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)',
|
||||
(id, tid))
|
||||
self.conn.commit()
|
||||
|
||||
|
||||
def set_series(self, id, series):
|
||||
self.conn.execute('DELETE FROM books_series_link WHERE book=?',(id,))
|
||||
self.conn.execute('DELETE FROM series WHERE (SELECT COUNT(id) FROM books_series_link WHERE series=series.id) < 1')
|
||||
if series:
|
||||
s = self.conn.execute('SELECT id from series WHERE name=?', (series,)).fetchone()
|
||||
if s:
|
||||
|
@ -31,7 +31,7 @@ class Distribution(object):
|
||||
('libusb', '0.1.12', None, None, None),
|
||||
('Qt', '4.4.0', 'qt', 'libqt4-core libqt4-gui', 'qt4'),
|
||||
('PyQt', '4.4.2', 'PyQt4', 'python-qt4', 'PyQt4'),
|
||||
('mechanize for python', '0.1.7b', 'dev-python/mechanize', 'python-mechanize', 'python-mechanize'),
|
||||
('mechanize for python', '0.1.8', 'dev-python/mechanize', 'python-mechanize', 'python-mechanize'),
|
||||
('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'),
|
||||
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
|
||||
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
|
||||
|
@ -1,125 +0,0 @@
|
||||
__all__ = [
|
||||
'AbstractBasicAuthHandler',
|
||||
'AbstractDigestAuthHandler',
|
||||
'BaseHandler',
|
||||
'Browser',
|
||||
'BrowserStateError',
|
||||
'CacheFTPHandler',
|
||||
'ContentTooShortError',
|
||||
'Cookie',
|
||||
'CookieJar',
|
||||
'CookiePolicy',
|
||||
'DefaultCookiePolicy',
|
||||
'DefaultFactory',
|
||||
'FTPHandler',
|
||||
'Factory',
|
||||
'FileCookieJar',
|
||||
'FileHandler',
|
||||
'FormNotFoundError',
|
||||
'FormsFactory',
|
||||
'GopherError',
|
||||
'GopherHandler',
|
||||
'HTTPBasicAuthHandler',
|
||||
'HTTPCookieProcessor',
|
||||
'HTTPDefaultErrorHandler',
|
||||
'HTTPDigestAuthHandler',
|
||||
'HTTPEquivProcessor',
|
||||
'HTTPError',
|
||||
'HTTPErrorProcessor',
|
||||
'HTTPHandler',
|
||||
'HTTPPasswordMgr',
|
||||
'HTTPPasswordMgrWithDefaultRealm',
|
||||
'HTTPProxyPasswordMgr',
|
||||
'HTTPRedirectDebugProcessor',
|
||||
'HTTPRedirectHandler',
|
||||
'HTTPRefererProcessor',
|
||||
'HTTPRefreshProcessor',
|
||||
'HTTPRequestUpgradeProcessor',
|
||||
'HTTPResponseDebugProcessor',
|
||||
'HTTPRobotRulesProcessor',
|
||||
'HTTPSClientCertMgr',
|
||||
'HTTPSHandler',
|
||||
'HeadParser',
|
||||
'History',
|
||||
'LWPCookieJar',
|
||||
'Link',
|
||||
'LinkNotFoundError',
|
||||
'LinksFactory',
|
||||
'LoadError',
|
||||
'MSIECookieJar',
|
||||
'MozillaCookieJar',
|
||||
'OpenerDirector',
|
||||
'OpenerFactory',
|
||||
'ParseError',
|
||||
'ProxyBasicAuthHandler',
|
||||
'ProxyDigestAuthHandler',
|
||||
'ProxyHandler',
|
||||
'Request',
|
||||
'ResponseUpgradeProcessor',
|
||||
'RobotExclusionError',
|
||||
'RobustFactory',
|
||||
'RobustFormsFactory',
|
||||
'RobustLinksFactory',
|
||||
'RobustTitleFactory',
|
||||
'SeekableProcessor',
|
||||
'SeekableResponseOpener',
|
||||
'TitleFactory',
|
||||
'URLError',
|
||||
'USE_BARE_EXCEPT',
|
||||
'UnknownHandler',
|
||||
'UserAgent',
|
||||
'UserAgentBase',
|
||||
'XHTMLCompatibleHeadParser',
|
||||
'__version__',
|
||||
'build_opener',
|
||||
'install_opener',
|
||||
'lwp_cookie_str',
|
||||
'make_response',
|
||||
'request_host',
|
||||
'response_seek_wrapper', # XXX deprecate in public interface?
|
||||
'seek_wrapped_response' # XXX should probably use this internally in place of response_seek_wrapper()
|
||||
'str2time',
|
||||
'urlopen',
|
||||
'urlretrieve']
|
||||
|
||||
from _mechanize import __version__
|
||||
|
||||
# high-level stateful browser-style interface
|
||||
from _mechanize import \
|
||||
Browser, History, \
|
||||
BrowserStateError, LinkNotFoundError, FormNotFoundError
|
||||
|
||||
# configurable URL-opener interface
|
||||
from _useragent import UserAgentBase, UserAgent
|
||||
from _html import \
|
||||
ParseError, \
|
||||
Link, \
|
||||
Factory, DefaultFactory, RobustFactory, \
|
||||
FormsFactory, LinksFactory, TitleFactory, \
|
||||
RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
|
||||
|
||||
# urllib2 work-alike interface (part from mechanize, part from urllib2)
|
||||
# This is a superset of the urllib2 interface.
|
||||
from _urllib2 import *
|
||||
|
||||
# misc
|
||||
from _opener import ContentTooShortError, OpenerFactory, urlretrieve
|
||||
from _util import http2time as str2time
|
||||
from _response import \
|
||||
response_seek_wrapper, seek_wrapped_response, make_response
|
||||
from _http import HeadParser
|
||||
try:
|
||||
from _http import XHTMLCompatibleHeadParser
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# cookies
|
||||
from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
|
||||
CookieJar, FileCookieJar, LoadError, request_host
|
||||
from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
|
||||
from _mozillacookiejar import MozillaCookieJar
|
||||
from _msiecookiejar import MSIECookieJar
|
||||
|
||||
# If you hate the idea of turning bugs into warnings, do:
|
||||
# import mechanize; mechanize.USE_BARE_EXCEPT = False
|
||||
USE_BARE_EXCEPT = True
|
@ -1,500 +0,0 @@
|
||||
"""HTTP Authentication and Proxy support.
|
||||
|
||||
All but HTTPProxyPasswordMgr come from Python 2.5.
|
||||
|
||||
|
||||
Copyright 2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import re, base64, urlparse, posixpath, md5, sha, sys, copy
|
||||
|
||||
from urllib2 import BaseHandler
|
||||
from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \
|
||||
splitport
|
||||
|
||||
|
||||
def _parse_proxy(proxy):
|
||||
"""Return (scheme, user, password, host/port) given a URL or an authority.
|
||||
|
||||
If a URL is supplied, it must have an authority (host:port) component.
|
||||
According to RFC 3986, having an authority component means the URL must
|
||||
have two slashes after the scheme:
|
||||
|
||||
>>> _parse_proxy('file:/ftp.example.com/')
|
||||
Traceback (most recent call last):
|
||||
ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
|
||||
|
||||
The first three items of the returned tuple may be None.
|
||||
|
||||
Examples of authority parsing:
|
||||
|
||||
>>> _parse_proxy('proxy.example.com')
|
||||
(None, None, None, 'proxy.example.com')
|
||||
>>> _parse_proxy('proxy.example.com:3128')
|
||||
(None, None, None, 'proxy.example.com:3128')
|
||||
|
||||
The authority component may optionally include userinfo (assumed to be
|
||||
username:password):
|
||||
|
||||
>>> _parse_proxy('joe:password@proxy.example.com')
|
||||
(None, 'joe', 'password', 'proxy.example.com')
|
||||
>>> _parse_proxy('joe:password@proxy.example.com:3128')
|
||||
(None, 'joe', 'password', 'proxy.example.com:3128')
|
||||
|
||||
Same examples, but with URLs instead:
|
||||
|
||||
>>> _parse_proxy('http://proxy.example.com/')
|
||||
('http', None, None, 'proxy.example.com')
|
||||
>>> _parse_proxy('http://proxy.example.com:3128/')
|
||||
('http', None, None, 'proxy.example.com:3128')
|
||||
>>> _parse_proxy('http://joe:password@proxy.example.com/')
|
||||
('http', 'joe', 'password', 'proxy.example.com')
|
||||
>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
|
||||
('http', 'joe', 'password', 'proxy.example.com:3128')
|
||||
|
||||
Everything after the authority is ignored:
|
||||
|
||||
>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
|
||||
('ftp', 'joe', 'password', 'proxy.example.com')
|
||||
|
||||
Test for no trailing '/' case:
|
||||
|
||||
>>> _parse_proxy('http://joe:password@proxy.example.com')
|
||||
('http', 'joe', 'password', 'proxy.example.com')
|
||||
|
||||
"""
|
||||
scheme, r_scheme = splittype(proxy)
|
||||
if not r_scheme.startswith("/"):
|
||||
# authority
|
||||
scheme = None
|
||||
authority = proxy
|
||||
else:
|
||||
# URL
|
||||
if not r_scheme.startswith("//"):
|
||||
raise ValueError("proxy URL with no authority: %r" % proxy)
|
||||
# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
|
||||
# and 3.3.), path is empty or starts with '/'
|
||||
end = r_scheme.find("/", 2)
|
||||
if end == -1:
|
||||
end = None
|
||||
authority = r_scheme[2:end]
|
||||
userinfo, hostport = splituser(authority)
|
||||
if userinfo is not None:
|
||||
user, password = splitpasswd(userinfo)
|
||||
else:
|
||||
user = password = None
|
||||
return scheme, user, password, hostport
|
||||
|
||||
class ProxyHandler(BaseHandler):
|
||||
# Proxies must be in front
|
||||
handler_order = 100
|
||||
|
||||
def __init__(self, proxies=None):
|
||||
if proxies is None:
|
||||
proxies = getproxies()
|
||||
assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
|
||||
self.proxies = proxies
|
||||
for type, url in proxies.items():
|
||||
setattr(self, '%s_open' % type,
|
||||
lambda r, proxy=url, type=type, meth=self.proxy_open: \
|
||||
meth(r, proxy, type))
|
||||
|
||||
def proxy_open(self, req, proxy, type):
|
||||
orig_type = req.get_type()
|
||||
proxy_type, user, password, hostport = _parse_proxy(proxy)
|
||||
if proxy_type is None:
|
||||
proxy_type = orig_type
|
||||
if user and password:
|
||||
user_pass = '%s:%s' % (unquote(user), unquote(password))
|
||||
creds = base64.encodestring(user_pass).strip()
|
||||
req.add_header('Proxy-authorization', 'Basic ' + creds)
|
||||
hostport = unquote(hostport)
|
||||
req.set_proxy(hostport, proxy_type)
|
||||
if orig_type == proxy_type:
|
||||
# let other handlers take care of it
|
||||
return None
|
||||
else:
|
||||
# need to start over, because the other handlers don't
|
||||
# grok the proxy's URL type
|
||||
# e.g. if we have a constructor arg proxies like so:
|
||||
# {'http': 'ftp://proxy.example.com'}, we may end up turning
|
||||
# a request for http://acme.example.com/a into one for
|
||||
# ftp://proxy.example.com/a
|
||||
return self.parent.open(req)
|
||||
|
||||
class HTTPPasswordMgr:
|
||||
|
||||
def __init__(self):
|
||||
self.passwd = {}
|
||||
|
||||
def add_password(self, realm, uri, user, passwd):
|
||||
# uri could be a single URI or a sequence
|
||||
if isinstance(uri, basestring):
|
||||
uri = [uri]
|
||||
if not realm in self.passwd:
|
||||
self.passwd[realm] = {}
|
||||
for default_port in True, False:
|
||||
reduced_uri = tuple(
|
||||
[self.reduce_uri(u, default_port) for u in uri])
|
||||
self.passwd[realm][reduced_uri] = (user, passwd)
|
||||
|
||||
def find_user_password(self, realm, authuri):
|
||||
domains = self.passwd.get(realm, {})
|
||||
for default_port in True, False:
|
||||
reduced_authuri = self.reduce_uri(authuri, default_port)
|
||||
for uris, authinfo in domains.iteritems():
|
||||
for uri in uris:
|
||||
if self.is_suburi(uri, reduced_authuri):
|
||||
return authinfo
|
||||
return None, None
|
||||
|
||||
def reduce_uri(self, uri, default_port=True):
|
||||
"""Accept authority or URI and extract only the authority and path."""
|
||||
# note HTTP URLs do not have a userinfo component
|
||||
parts = urlparse.urlsplit(uri)
|
||||
if parts[1]:
|
||||
# URI
|
||||
scheme = parts[0]
|
||||
authority = parts[1]
|
||||
path = parts[2] or '/'
|
||||
else:
|
||||
# host or host:port
|
||||
scheme = None
|
||||
authority = uri
|
||||
path = '/'
|
||||
host, port = splitport(authority)
|
||||
if default_port and port is None and scheme is not None:
|
||||
dport = {"http": 80,
|
||||
"https": 443,
|
||||
}.get(scheme)
|
||||
if dport is not None:
|
||||
authority = "%s:%d" % (host, dport)
|
||||
return authority, path
|
||||
|
||||
def is_suburi(self, base, test):
|
||||
"""Check if test is below base in a URI tree
|
||||
|
||||
Both args must be URIs in reduced form.
|
||||
"""
|
||||
if base == test:
|
||||
return True
|
||||
if base[0] != test[0]:
|
||||
return False
|
||||
common = posixpath.commonprefix((base[1], test[1]))
|
||||
if len(common) == len(base[1]):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
|
||||
|
||||
def find_user_password(self, realm, authuri):
|
||||
user, password = HTTPPasswordMgr.find_user_password(self, realm,
|
||||
authuri)
|
||||
if user is not None:
|
||||
return user, password
|
||||
return HTTPPasswordMgr.find_user_password(self, None, authuri)
|
||||
|
||||
|
||||
class AbstractBasicAuthHandler:
|
||||
|
||||
rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
|
||||
|
||||
# XXX there can actually be multiple auth-schemes in a
|
||||
# www-authenticate header. should probably be a lot more careful
|
||||
# in parsing them to extract multiple alternatives
|
||||
|
||||
def __init__(self, password_mgr=None):
|
||||
if password_mgr is None:
|
||||
password_mgr = HTTPPasswordMgr()
|
||||
self.passwd = password_mgr
|
||||
self.add_password = self.passwd.add_password
|
||||
|
||||
def http_error_auth_reqed(self, authreq, host, req, headers):
|
||||
# host may be an authority (without userinfo) or a URL with an
|
||||
# authority
|
||||
# XXX could be multiple headers
|
||||
authreq = headers.get(authreq, None)
|
||||
if authreq:
|
||||
mo = AbstractBasicAuthHandler.rx.search(authreq)
|
||||
if mo:
|
||||
scheme, realm = mo.groups()
|
||||
if scheme.lower() == 'basic':
|
||||
return self.retry_http_basic_auth(host, req, realm)
|
||||
|
||||
def retry_http_basic_auth(self, host, req, realm):
|
||||
user, pw = self.passwd.find_user_password(realm, host)
|
||||
if pw is not None:
|
||||
raw = "%s:%s" % (user, pw)
|
||||
auth = 'Basic %s' % base64.encodestring(raw).strip()
|
||||
if req.headers.get(self.auth_header, None) == auth:
|
||||
return None
|
||||
newreq = copy.copy(req)
|
||||
newreq.add_header(self.auth_header, auth)
|
||||
newreq.visit = False
|
||||
return self.parent.open(newreq)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
|
||||
|
||||
auth_header = 'Authorization'
|
||||
|
||||
def http_error_401(self, req, fp, code, msg, headers):
|
||||
url = req.get_full_url()
|
||||
return self.http_error_auth_reqed('www-authenticate',
|
||||
url, req, headers)
|
||||
|
||||
|
||||
class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
|
||||
|
||||
auth_header = 'Proxy-authorization'
|
||||
|
||||
def http_error_407(self, req, fp, code, msg, headers):
|
||||
# http_error_auth_reqed requires that there is no userinfo component in
|
||||
# authority. Assume there isn't one, since urllib2 does not (and
|
||||
# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
|
||||
# userinfo.
|
||||
authority = req.get_host()
|
||||
return self.http_error_auth_reqed('proxy-authenticate',
|
||||
authority, req, headers)
|
||||
|
||||
|
||||
def randombytes(n):
|
||||
"""Return n random bytes."""
|
||||
# Use /dev/urandom if it is available. Fall back to random module
|
||||
# if not. It might be worthwhile to extend this function to use
|
||||
# other platform-specific mechanisms for getting random bytes.
|
||||
if os.path.exists("/dev/urandom"):
|
||||
f = open("/dev/urandom")
|
||||
s = f.read(n)
|
||||
f.close()
|
||||
return s
|
||||
else:
|
||||
L = [chr(random.randrange(0, 256)) for i in range(n)]
|
||||
return "".join(L)
|
||||
|
||||
class AbstractDigestAuthHandler:
|
||||
# Digest authentication is specified in RFC 2617.
|
||||
|
||||
# XXX The client does not inspect the Authentication-Info header
|
||||
# in a successful response.
|
||||
|
||||
# XXX It should be possible to test this implementation against
|
||||
# a mock server that just generates a static set of challenges.
|
||||
|
||||
# XXX qop="auth-int" supports is shaky
|
||||
|
||||
def __init__(self, passwd=None):
|
||||
if passwd is None:
|
||||
passwd = HTTPPasswordMgr()
|
||||
self.passwd = passwd
|
||||
self.add_password = self.passwd.add_password
|
||||
self.retried = 0
|
||||
self.nonce_count = 0
|
||||
|
||||
def reset_retry_count(self):
|
||||
self.retried = 0
|
||||
|
||||
def http_error_auth_reqed(self, auth_header, host, req, headers):
|
||||
authreq = headers.get(auth_header, None)
|
||||
if self.retried > 5:
|
||||
# Don't fail endlessly - if we failed once, we'll probably
|
||||
# fail a second time. Hm. Unless the Password Manager is
|
||||
# prompting for the information. Crap. This isn't great
|
||||
# but it's better than the current 'repeat until recursion
|
||||
# depth exceeded' approach <wink>
|
||||
raise HTTPError(req.get_full_url(), 401, "digest auth failed",
|
||||
headers, None)
|
||||
else:
|
||||
self.retried += 1
|
||||
if authreq:
|
||||
scheme = authreq.split()[0]
|
||||
if scheme.lower() == 'digest':
|
||||
return self.retry_http_digest_auth(req, authreq)
|
||||
|
||||
def retry_http_digest_auth(self, req, auth):
|
||||
token, challenge = auth.split(' ', 1)
|
||||
chal = parse_keqv_list(parse_http_list(challenge))
|
||||
auth = self.get_authorization(req, chal)
|
||||
if auth:
|
||||
auth_val = 'Digest %s' % auth
|
||||
if req.headers.get(self.auth_header, None) == auth_val:
|
||||
return None
|
||||
newreq = copy.copy(req)
|
||||
newreq.add_unredirected_header(self.auth_header, auth_val)
|
||||
newreq.visit = False
|
||||
return self.parent.open(newreq)
|
||||
|
||||
def get_cnonce(self, nonce):
|
||||
# The cnonce-value is an opaque
|
||||
# quoted string value provided by the client and used by both client
|
||||
# and server to avoid chosen plaintext attacks, to provide mutual
|
||||
# authentication, and to provide some message integrity protection.
|
||||
# This isn't a fabulous effort, but it's probably Good Enough.
|
||||
dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
|
||||
randombytes(8))).hexdigest()
|
||||
return dig[:16]
|
||||
|
||||
def get_authorization(self, req, chal):
|
||||
try:
|
||||
realm = chal['realm']
|
||||
nonce = chal['nonce']
|
||||
qop = chal.get('qop')
|
||||
algorithm = chal.get('algorithm', 'MD5')
|
||||
# mod_digest doesn't send an opaque, even though it isn't
|
||||
# supposed to be optional
|
||||
opaque = chal.get('opaque', None)
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
H, KD = self.get_algorithm_impls(algorithm)
|
||||
if H is None:
|
||||
return None
|
||||
|
||||
user, pw = self.passwd.find_user_password(realm, req.get_full_url())
|
||||
if user is None:
|
||||
return None
|
||||
|
||||
# XXX not implemented yet
|
||||
if req.has_data():
|
||||
entdig = self.get_entity_digest(req.get_data(), chal)
|
||||
else:
|
||||
entdig = None
|
||||
|
||||
A1 = "%s:%s:%s" % (user, realm, pw)
|
||||
A2 = "%s:%s" % (req.get_method(),
|
||||
# XXX selector: what about proxies and full urls
|
||||
req.get_selector())
|
||||
if qop == 'auth':
|
||||
self.nonce_count += 1
|
||||
ncvalue = '%08x' % self.nonce_count
|
||||
cnonce = self.get_cnonce(nonce)
|
||||
noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
|
||||
respdig = KD(H(A1), noncebit)
|
||||
elif qop is None:
|
||||
respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
|
||||
else:
|
||||
# XXX handle auth-int.
|
||||
pass
|
||||
|
||||
# XXX should the partial digests be encoded too?
|
||||
|
||||
base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
|
||||
'response="%s"' % (user, realm, nonce, req.get_selector(),
|
||||
respdig)
|
||||
if opaque:
|
||||
base += ', opaque="%s"' % opaque
|
||||
if entdig:
|
||||
base += ', digest="%s"' % entdig
|
||||
base += ', algorithm="%s"' % algorithm
|
||||
if qop:
|
||||
base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
|
||||
return base
|
||||
|
||||
def get_algorithm_impls(self, algorithm):
|
||||
# lambdas assume digest modules are imported at the top level
|
||||
if algorithm == 'MD5':
|
||||
H = lambda x: md5.new(x).hexdigest()
|
||||
elif algorithm == 'SHA':
|
||||
H = lambda x: sha.new(x).hexdigest()
|
||||
# XXX MD5-sess
|
||||
KD = lambda s, d: H("%s:%s" % (s, d))
|
||||
return H, KD
|
||||
|
||||
def get_entity_digest(self, data, chal):
|
||||
# XXX not implemented yet
|
||||
return None
|
||||
|
||||
|
||||
class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
|
||||
"""An authentication protocol defined by RFC 2069
|
||||
|
||||
Digest authentication improves on basic authentication because it
|
||||
does not transmit passwords in the clear.
|
||||
"""
|
||||
|
||||
auth_header = 'Authorization'
|
||||
handler_order = 490
|
||||
|
||||
def http_error_401(self, req, fp, code, msg, headers):
|
||||
host = urlparse.urlparse(req.get_full_url())[1]
|
||||
retry = self.http_error_auth_reqed('www-authenticate',
|
||||
host, req, headers)
|
||||
self.reset_retry_count()
|
||||
return retry
|
||||
|
||||
|
||||
class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
|
||||
|
||||
auth_header = 'Proxy-Authorization'
|
||||
handler_order = 490
|
||||
|
||||
def http_error_407(self, req, fp, code, msg, headers):
|
||||
host = req.get_host()
|
||||
retry = self.http_error_auth_reqed('proxy-authenticate',
|
||||
host, req, headers)
|
||||
self.reset_retry_count()
|
||||
return retry
|
||||
|
||||
|
||||
# XXX ugly implementation, should probably not bother deriving
|
||||
class HTTPProxyPasswordMgr(HTTPPasswordMgr):
|
||||
# has default realm and host/port
|
||||
def add_password(self, realm, uri, user, passwd):
|
||||
# uri could be a single URI or a sequence
|
||||
if uri is None or isinstance(uri, basestring):
|
||||
uris = [uri]
|
||||
else:
|
||||
uris = uri
|
||||
passwd_by_domain = self.passwd.setdefault(realm, {})
|
||||
for uri in uris:
|
||||
for default_port in True, False:
|
||||
reduced_uri = self.reduce_uri(uri, default_port)
|
||||
passwd_by_domain[reduced_uri] = (user, passwd)
|
||||
|
||||
def find_user_password(self, realm, authuri):
|
||||
attempts = [(realm, authuri), (None, authuri)]
|
||||
# bleh, want default realm to take precedence over default
|
||||
# URI/authority, hence this outer loop
|
||||
for default_uri in False, True:
|
||||
for realm, authuri in attempts:
|
||||
authinfo_by_domain = self.passwd.get(realm, {})
|
||||
for default_port in True, False:
|
||||
reduced_authuri = self.reduce_uri(authuri, default_port)
|
||||
for uri, authinfo in authinfo_by_domain.iteritems():
|
||||
if uri is None and not default_uri:
|
||||
continue
|
||||
if self.is_suburi(uri, reduced_authuri):
|
||||
return authinfo
|
||||
user, password = None, None
|
||||
|
||||
if user is not None:
|
||||
break
|
||||
return user, password
|
||||
|
||||
def reduce_uri(self, uri, default_port=True):
|
||||
if uri is None:
|
||||
return None
|
||||
return HTTPPasswordMgr.reduce_uri(self, uri, default_port)
|
||||
|
||||
def is_suburi(self, base, test):
|
||||
if base is None:
|
||||
# default to the proxy's host/port
|
||||
hostport, path = test
|
||||
base = (hostport, "/")
|
||||
return HTTPPasswordMgr.is_suburi(self, base, test)
|
||||
|
||||
|
||||
class HTTPSClientCertMgr(HTTPPasswordMgr):
|
||||
# implementation inheritance: this is not a proper subclass
|
||||
def add_key_cert(self, uri, key_file, cert_file):
|
||||
self.add_password(None, uri, key_file, cert_file)
|
||||
def find_key_cert(self, authuri):
|
||||
return HTTPPasswordMgr.find_user_password(self, None, authuri)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,28 +0,0 @@
|
||||
import logging
|
||||
|
||||
from urllib2 import BaseHandler
|
||||
from _response import response_seek_wrapper
|
||||
|
||||
|
||||
class HTTPResponseDebugProcessor(BaseHandler):
|
||||
handler_order = 900 # before redirections, after everything else
|
||||
|
||||
def http_response(self, request, response):
|
||||
if not hasattr(response, "seek"):
|
||||
response = response_seek_wrapper(response)
|
||||
info = logging.getLogger("mechanize.http_responses").info
|
||||
try:
|
||||
info(response.read())
|
||||
finally:
|
||||
response.seek(0)
|
||||
info("*****************************************************")
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
class HTTPRedirectDebugProcessor(BaseHandler):
|
||||
def http_request(self, request):
|
||||
if hasattr(request, "redirect_dict"):
|
||||
info = logging.getLogger("mechanize.http_redirects").info
|
||||
info("redirecting to %s", request.get_full_url())
|
||||
return request
|
@ -1,103 +0,0 @@
|
||||
import urllib2
|
||||
from cStringIO import StringIO
|
||||
import _response
|
||||
|
||||
# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
|
||||
class GzipConsumer:
|
||||
|
||||
def __init__(self, consumer):
|
||||
self.__consumer = consumer
|
||||
self.__decoder = None
|
||||
self.__data = ""
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self.__consumer, key)
|
||||
|
||||
def feed(self, data):
|
||||
if self.__decoder is None:
|
||||
# check if we have a full gzip header
|
||||
data = self.__data + data
|
||||
try:
|
||||
i = 10
|
||||
flag = ord(data[3])
|
||||
if flag & 4: # extra
|
||||
x = ord(data[i]) + 256*ord(data[i+1])
|
||||
i = i + 2 + x
|
||||
if flag & 8: # filename
|
||||
while ord(data[i]):
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
if flag & 16: # comment
|
||||
while ord(data[i]):
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
if flag & 2: # crc
|
||||
i = i + 2
|
||||
if len(data) < i:
|
||||
raise IndexError("not enough data")
|
||||
if data[:3] != "\x1f\x8b\x08":
|
||||
raise IOError("invalid gzip data")
|
||||
data = data[i:]
|
||||
except IndexError:
|
||||
self.__data = data
|
||||
return # need more data
|
||||
import zlib
|
||||
self.__data = ""
|
||||
self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
|
||||
data = self.__decoder.decompress(data)
|
||||
if data:
|
||||
self.__consumer.feed(data)
|
||||
|
||||
def close(self):
|
||||
if self.__decoder:
|
||||
data = self.__decoder.flush()
|
||||
if data:
|
||||
self.__consumer.feed(data)
|
||||
self.__consumer.close()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
# the rest of this module is John Lee's stupid code, not
|
||||
# Fredrik's nice code :-)
|
||||
|
||||
class stupid_gzip_consumer:
|
||||
def __init__(self): self.data = []
|
||||
def feed(self, data): self.data.append(data)
|
||||
|
||||
class stupid_gzip_wrapper(_response.closeable_response):
|
||||
def __init__(self, response):
|
||||
self._response = response
|
||||
|
||||
c = stupid_gzip_consumer()
|
||||
gzc = GzipConsumer(c)
|
||||
gzc.feed(response.read())
|
||||
self.__data = StringIO("".join(c.data))
|
||||
|
||||
def read(self, size=-1):
|
||||
return self.__data.read(size)
|
||||
def readline(self, size=-1):
|
||||
return self.__data.readline(size)
|
||||
def readlines(self, sizehint=-1):
|
||||
return self.__data.readlines(size)
|
||||
|
||||
def __getattr__(self, name):
|
||||
# delegate unknown methods/attributes
|
||||
return getattr(self._response, name)
|
||||
|
||||
class HTTPGzipProcessor(urllib2.BaseHandler):
|
||||
handler_order = 200 # response processing before HTTPEquivProcessor
|
||||
|
||||
def http_request(self, request):
|
||||
request.add_header("Accept-Encoding", "gzip")
|
||||
return request
|
||||
|
||||
def http_response(self, request, response):
|
||||
# post-process response
|
||||
enc_hdrs = response.info().getheaders("Content-encoding")
|
||||
for enc_hdr in enc_hdrs:
|
||||
if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
|
||||
return stupid_gzip_wrapper(response)
|
||||
return response
|
||||
|
||||
https_response = http_response
|
@ -1,226 +0,0 @@
|
||||
"""Utility functions for HTTP header value parsing and construction.
|
||||
|
||||
Copyright 1997-1998, Gisle Aas
|
||||
Copyright 2002-2006, John J. Lee
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import os, re
|
||||
from types import StringType
|
||||
from types import UnicodeType
|
||||
STRING_TYPES = StringType, UnicodeType
|
||||
|
||||
from _util import http2time
|
||||
import _rfc3986
|
||||
|
||||
def is_html(ct_headers, url, allow_xhtml=False):
|
||||
"""
|
||||
ct_headers: Sequence of Content-Type headers
|
||||
url: Response URL
|
||||
|
||||
"""
|
||||
if not ct_headers:
|
||||
# guess
|
||||
ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
|
||||
html_exts = [".htm", ".html"]
|
||||
if allow_xhtml:
|
||||
html_exts += [".xhtml"]
|
||||
return ext in html_exts
|
||||
# use first header
|
||||
ct = split_header_words(ct_headers)[0][0][0]
|
||||
html_types = ["text/html"]
|
||||
if allow_xhtml:
|
||||
html_types += [
|
||||
"text/xhtml", "text/xml",
|
||||
"application/xml", "application/xhtml+xml",
|
||||
]
|
||||
return ct in html_types
|
||||
|
||||
def unmatched(match):
|
||||
"""Return unmatched part of re.Match object."""
|
||||
start, end = match.span(0)
|
||||
return match.string[:start]+match.string[end:]
|
||||
|
||||
token_re = re.compile(r"^\s*([^=\s;,]+)")
|
||||
quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
|
||||
value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
|
||||
escape_re = re.compile(r"\\(.)")
|
||||
def split_header_words(header_values):
|
||||
r"""Parse header values into a list of lists containing key,value pairs.
|
||||
|
||||
The function knows how to deal with ",", ";" and "=" as well as quoted
|
||||
values after "=". A list of space separated tokens are parsed as if they
|
||||
were separated by ";".
|
||||
|
||||
If the header_values passed as argument contains multiple values, then they
|
||||
are treated as if they were a single value separated by comma ",".
|
||||
|
||||
This means that this function is useful for parsing header fields that
|
||||
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
|
||||
the requirement for tokens).
|
||||
|
||||
headers = #header
|
||||
header = (token | parameter) *( [";"] (token | parameter))
|
||||
|
||||
token = 1*<any CHAR except CTLs or separators>
|
||||
separators = "(" | ")" | "<" | ">" | "@"
|
||||
| "," | ";" | ":" | "\" | <">
|
||||
| "/" | "[" | "]" | "?" | "="
|
||||
| "{" | "}" | SP | HT
|
||||
|
||||
quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
|
||||
qdtext = <any TEXT except <">>
|
||||
quoted-pair = "\" CHAR
|
||||
|
||||
parameter = attribute "=" value
|
||||
attribute = token
|
||||
value = token | quoted-string
|
||||
|
||||
Each header is represented by a list of key/value pairs. The value for a
|
||||
simple token (not part of a parameter) is None. Syntactically incorrect
|
||||
headers will not necessarily be parsed as you would want.
|
||||
|
||||
This is easier to describe with some examples:
|
||||
|
||||
>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
|
||||
[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
|
||||
>>> split_header_words(['text/html; charset="iso-8859-1"'])
|
||||
[[('text/html', None), ('charset', 'iso-8859-1')]]
|
||||
>>> split_header_words([r'Basic realm="\"foo\bar\""'])
|
||||
[[('Basic', None), ('realm', '"foobar"')]]
|
||||
|
||||
"""
|
||||
assert type(header_values) not in STRING_TYPES
|
||||
result = []
|
||||
for text in header_values:
|
||||
orig_text = text
|
||||
pairs = []
|
||||
while text:
|
||||
m = token_re.search(text)
|
||||
if m:
|
||||
text = unmatched(m)
|
||||
name = m.group(1)
|
||||
m = quoted_value_re.search(text)
|
||||
if m: # quoted value
|
||||
text = unmatched(m)
|
||||
value = m.group(1)
|
||||
value = escape_re.sub(r"\1", value)
|
||||
else:
|
||||
m = value_re.search(text)
|
||||
if m: # unquoted value
|
||||
text = unmatched(m)
|
||||
value = m.group(1)
|
||||
value = value.rstrip()
|
||||
else:
|
||||
# no value, a lone token
|
||||
value = None
|
||||
pairs.append((name, value))
|
||||
elif text.lstrip().startswith(","):
|
||||
# concatenated headers, as per RFC 2616 section 4.2
|
||||
text = text.lstrip()[1:]
|
||||
if pairs: result.append(pairs)
|
||||
pairs = []
|
||||
else:
|
||||
# skip junk
|
||||
non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
|
||||
assert nr_junk_chars > 0, (
|
||||
"split_header_words bug: '%s', '%s', %s" %
|
||||
(orig_text, text, pairs))
|
||||
text = non_junk
|
||||
if pairs: result.append(pairs)
|
||||
return result
|
||||
|
||||
join_escape_re = re.compile(r"([\"\\])")
|
||||
def join_header_words(lists):
|
||||
"""Do the inverse of the conversion done by split_header_words.
|
||||
|
||||
Takes a list of lists of (key, value) pairs and produces a single header
|
||||
value. Attribute values are quoted if needed.
|
||||
|
||||
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
|
||||
'text/plain; charset="iso-8859/1"'
|
||||
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
|
||||
'text/plain, charset="iso-8859/1"'
|
||||
|
||||
"""
|
||||
headers = []
|
||||
for pairs in lists:
|
||||
attr = []
|
||||
for k, v in pairs:
|
||||
if v is not None:
|
||||
if not re.search(r"^\w+$", v):
|
||||
v = join_escape_re.sub(r"\\\1", v) # escape " and \
|
||||
v = '"%s"' % v
|
||||
if k is None: # Netscape cookies may have no name
|
||||
k = v
|
||||
else:
|
||||
k = "%s=%s" % (k, v)
|
||||
attr.append(k)
|
||||
if attr: headers.append("; ".join(attr))
|
||||
return ", ".join(headers)
|
||||
|
||||
def parse_ns_headers(ns_headers):
|
||||
"""Ad-hoc parser for Netscape protocol cookie-attributes.
|
||||
|
||||
The old Netscape cookie format for Set-Cookie can for instance contain
|
||||
an unquoted "," in the expires field, so we have to use this ad-hoc
|
||||
parser instead of split_header_words.
|
||||
|
||||
XXX This may not make the best possible effort to parse all the crap
|
||||
that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
|
||||
parser is probably better, so could do worse than following that if
|
||||
this ever gives any trouble.
|
||||
|
||||
Currently, this is also used for parsing RFC 2109 cookies.
|
||||
|
||||
"""
|
||||
known_attrs = ("expires", "domain", "path", "secure",
|
||||
# RFC 2109 attrs (may turn up in Netscape cookies, too)
|
||||
"port", "max-age")
|
||||
|
||||
result = []
|
||||
for ns_header in ns_headers:
|
||||
pairs = []
|
||||
version_set = False
|
||||
params = re.split(r";\s*", ns_header)
|
||||
for ii in range(len(params)):
|
||||
param = params[ii]
|
||||
param = param.rstrip()
|
||||
if param == "": continue
|
||||
if "=" not in param:
|
||||
k, v = param, None
|
||||
else:
|
||||
k, v = re.split(r"\s*=\s*", param, 1)
|
||||
k = k.lstrip()
|
||||
if ii != 0:
|
||||
lc = k.lower()
|
||||
if lc in known_attrs:
|
||||
k = lc
|
||||
if k == "version":
|
||||
# This is an RFC 2109 cookie.
|
||||
version_set = True
|
||||
if k == "expires":
|
||||
# convert expires date to seconds since epoch
|
||||
if v.startswith('"'): v = v[1:]
|
||||
if v.endswith('"'): v = v[:-1]
|
||||
v = http2time(v) # None if invalid
|
||||
pairs.append((k, v))
|
||||
|
||||
if pairs:
|
||||
if not version_set:
|
||||
pairs.append(("version", "0"))
|
||||
result.append(pairs)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _test():
|
||||
import doctest, _headersutil
|
||||
return doctest.testmod(_headersutil)
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
@ -1,607 +0,0 @@
|
||||
"""HTML handling.
|
||||
|
||||
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import re, copy, htmlentitydefs
|
||||
import sgmllib, HTMLParser, ClientForm
|
||||
|
||||
import _request
|
||||
from _headersutil import split_header_words, is_html as _is_html
|
||||
import _rfc3986
|
||||
|
||||
DEFAULT_ENCODING = "latin-1"
|
||||
|
||||
|
||||
# the base classe is purely for backwards compatibility
|
||||
class ParseError(ClientForm.ParseError): pass
|
||||
|
||||
|
||||
class CachingGeneratorFunction(object):
|
||||
"""Caching wrapper around a no-arguments iterable."""
|
||||
|
||||
def __init__(self, iterable):
|
||||
self._cache = []
|
||||
# wrap iterable to make it non-restartable (otherwise, repeated
|
||||
# __call__ would give incorrect results)
|
||||
self._iterator = iter(iterable)
|
||||
|
||||
def __call__(self):
|
||||
cache = self._cache
|
||||
for item in cache:
|
||||
yield item
|
||||
for item in self._iterator:
|
||||
cache.append(item)
|
||||
yield item
|
||||
|
||||
|
||||
class EncodingFinder:
|
||||
def __init__(self, default_encoding):
|
||||
self._default_encoding = default_encoding
|
||||
def encoding(self, response):
|
||||
# HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
|
||||
# headers may be in the response. HTTP-EQUIV headers come last,
|
||||
# so try in order from first to last.
|
||||
for ct in response.info().getheaders("content-type"):
|
||||
for k, v in split_header_words([ct])[0]:
|
||||
if k == "charset":
|
||||
return v
|
||||
return self._default_encoding
|
||||
|
||||
class ResponseTypeFinder:
|
||||
def __init__(self, allow_xhtml):
|
||||
self._allow_xhtml = allow_xhtml
|
||||
def is_html(self, response, encoding):
|
||||
ct_hdrs = response.info().getheaders("content-type")
|
||||
url = response.geturl()
|
||||
# XXX encoding
|
||||
return _is_html(ct_hdrs, url, self._allow_xhtml)
|
||||
|
||||
|
||||
# idea for this argument-processing trick is from Peter Otten
|
||||
class Args:
|
||||
def __init__(self, args_map):
|
||||
self.dictionary = dict(args_map)
|
||||
def __getattr__(self, key):
|
||||
try:
|
||||
return self.dictionary[key]
|
||||
except KeyError:
|
||||
return getattr(self.__class__, key)
|
||||
|
||||
def form_parser_args(
|
||||
select_default=False,
|
||||
form_parser_class=None,
|
||||
request_class=None,
|
||||
backwards_compat=False,
|
||||
):
|
||||
return Args(locals())
|
||||
|
||||
|
||||
class Link:
|
||||
def __init__(self, base_url, url, text, tag, attrs):
|
||||
assert None not in [url, tag, attrs]
|
||||
self.base_url = base_url
|
||||
self.absolute_url = _rfc3986.urljoin(base_url, url)
|
||||
self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
|
||||
def __cmp__(self, other):
|
||||
try:
|
||||
for name in "url", "text", "tag", "attrs":
|
||||
if getattr(self, name) != getattr(other, name):
|
||||
return -1
|
||||
except AttributeError:
|
||||
return -1
|
||||
return 0
|
||||
def __repr__(self):
|
||||
return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
|
||||
self.base_url, self.url, self.text, self.tag, self.attrs)
|
||||
|
||||
|
||||
class LinksFactory:
|
||||
|
||||
def __init__(self,
|
||||
link_parser_class=None,
|
||||
link_class=Link,
|
||||
urltags=None,
|
||||
):
|
||||
import _pullparser
|
||||
if link_parser_class is None:
|
||||
link_parser_class = _pullparser.TolerantPullParser
|
||||
self.link_parser_class = link_parser_class
|
||||
self.link_class = link_class
|
||||
if urltags is None:
|
||||
urltags = {
|
||||
"a": "href",
|
||||
"area": "href",
|
||||
"frame": "src",
|
||||
"iframe": "src",
|
||||
}
|
||||
self.urltags = urltags
|
||||
self._response = None
|
||||
self._encoding = None
|
||||
|
||||
def set_response(self, response, base_url, encoding):
|
||||
self._response = response
|
||||
self._encoding = encoding
|
||||
self._base_url = base_url
|
||||
|
||||
def links(self):
|
||||
"""Return an iterator that provides links of the document."""
|
||||
response = self._response
|
||||
encoding = self._encoding
|
||||
base_url = self._base_url
|
||||
p = self.link_parser_class(response, encoding=encoding)
|
||||
|
||||
try:
|
||||
for token in p.tags(*(self.urltags.keys()+["base"])):
|
||||
if token.type == "endtag":
|
||||
continue
|
||||
if token.data == "base":
|
||||
base_href = dict(token.attrs).get("href")
|
||||
if base_href is not None:
|
||||
base_url = base_href
|
||||
continue
|
||||
attrs = dict(token.attrs)
|
||||
tag = token.data
|
||||
name = attrs.get("name")
|
||||
text = None
|
||||
# XXX use attr_encoding for ref'd doc if that doc does not
|
||||
# provide one by other means
|
||||
#attr_encoding = attrs.get("charset")
|
||||
url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL?
|
||||
if not url:
|
||||
# Probably an <A NAME="blah"> link or <AREA NOHREF...>.
|
||||
# For our purposes a link is something with a URL, so
|
||||
# ignore this.
|
||||
continue
|
||||
|
||||
url = _rfc3986.clean_url(url, encoding)
|
||||
if tag == "a":
|
||||
if token.type != "startendtag":
|
||||
# hmm, this'd break if end tag is missing
|
||||
text = p.get_compressed_text(("endtag", tag))
|
||||
# but this doesn't work for eg.
|
||||
# <a href="blah"><b>Andy</b></a>
|
||||
#text = p.get_compressed_text()
|
||||
|
||||
yield Link(base_url, url, text, tag, token.attrs)
|
||||
except sgmllib.SGMLParseError, exc:
|
||||
raise ParseError(exc)
|
||||
|
||||
class FormsFactory:
|
||||
|
||||
"""Makes a sequence of objects satisfying ClientForm.HTMLForm interface.
|
||||
|
||||
After calling .forms(), the .global_form attribute is a form object
|
||||
containing all controls not a descendant of any FORM element.
|
||||
|
||||
For constructor argument docs, see ClientForm.ParseResponse
|
||||
argument docs.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
select_default=False,
|
||||
form_parser_class=None,
|
||||
request_class=None,
|
||||
backwards_compat=False,
|
||||
):
|
||||
import ClientForm
|
||||
self.select_default = select_default
|
||||
if form_parser_class is None:
|
||||
form_parser_class = ClientForm.FormParser
|
||||
self.form_parser_class = form_parser_class
|
||||
if request_class is None:
|
||||
request_class = _request.Request
|
||||
self.request_class = request_class
|
||||
self.backwards_compat = backwards_compat
|
||||
self._response = None
|
||||
self.encoding = None
|
||||
self.global_form = None
|
||||
|
||||
def set_response(self, response, encoding):
|
||||
self._response = response
|
||||
self.encoding = encoding
|
||||
self.global_form = None
|
||||
|
||||
def forms(self):
|
||||
import ClientForm
|
||||
encoding = self.encoding
|
||||
try:
|
||||
forms = ClientForm.ParseResponseEx(
|
||||
self._response,
|
||||
select_default=self.select_default,
|
||||
form_parser_class=self.form_parser_class,
|
||||
request_class=self.request_class,
|
||||
encoding=encoding,
|
||||
_urljoin=_rfc3986.urljoin,
|
||||
_urlparse=_rfc3986.urlsplit,
|
||||
_urlunparse=_rfc3986.urlunsplit,
|
||||
)
|
||||
except ClientForm.ParseError, exc:
|
||||
raise ParseError(exc)
|
||||
self.global_form = forms[0]
|
||||
return forms[1:]
|
||||
|
||||
class TitleFactory:
|
||||
def __init__(self):
|
||||
self._response = self._encoding = None
|
||||
|
||||
def set_response(self, response, encoding):
|
||||
self._response = response
|
||||
self._encoding = encoding
|
||||
|
||||
def title(self):
|
||||
import _pullparser
|
||||
p = _pullparser.TolerantPullParser(
|
||||
self._response, encoding=self._encoding)
|
||||
try:
|
||||
try:
|
||||
p.get_tag("title")
|
||||
except _pullparser.NoMoreTokensError:
|
||||
return None
|
||||
else:
|
||||
return p.get_text()
|
||||
except sgmllib.SGMLParseError, exc:
|
||||
raise ParseError(exc)
|
||||
|
||||
|
||||
def unescape(data, entities, encoding):
|
||||
if data is None or "&" not in data:
|
||||
return data
|
||||
|
||||
def replace_entities(match):
|
||||
ent = match.group()
|
||||
if ent[1] == "#":
|
||||
return unescape_charref(ent[2:-1], encoding)
|
||||
|
||||
repl = entities.get(ent[1:-1])
|
||||
if repl is not None:
|
||||
repl = unichr(repl)
|
||||
if type(repl) != type(""):
|
||||
try:
|
||||
repl = repl.encode(encoding)
|
||||
except UnicodeError:
|
||||
repl = ent
|
||||
else:
|
||||
repl = ent
|
||||
return repl
|
||||
|
||||
return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
|
||||
|
||||
def unescape_charref(data, encoding):
|
||||
name, base = data, 10
|
||||
if name.startswith("x"):
|
||||
name, base= name[1:], 16
|
||||
uc = unichr(int(name, base))
|
||||
if encoding is None:
|
||||
return uc
|
||||
else:
|
||||
try:
|
||||
repl = uc.encode(encoding)
|
||||
except UnicodeError:
|
||||
repl = "&#%s;" % data
|
||||
return repl
|
||||
|
||||
|
||||
# bizarre import gymnastics for bundled BeautifulSoup
|
||||
import _beautifulsoup
|
||||
import ClientForm
|
||||
RobustFormParser, NestingRobustFormParser = ClientForm._create_bs_classes(
|
||||
_beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup
|
||||
)
|
||||
# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-(
|
||||
import sgmllib
|
||||
sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
|
||||
|
||||
class MechanizeBs(_beautifulsoup.BeautifulSoup):
|
||||
_entitydefs = htmlentitydefs.name2codepoint
|
||||
# don't want the magic Microsoft-char workaround
|
||||
PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
|
||||
lambda(x):x.group(1) + ' />'),
|
||||
(re.compile('<!\s+([^<>]*)>'),
|
||||
lambda(x):'<!' + x.group(1) + '>')
|
||||
]
|
||||
|
||||
def __init__(self, encoding, text=None, avoidParserProblems=True,
|
||||
initialTextIsEverything=True):
|
||||
self._encoding = encoding
|
||||
_beautifulsoup.BeautifulSoup.__init__(
|
||||
self, text, avoidParserProblems, initialTextIsEverything)
|
||||
|
||||
def handle_charref(self, ref):
|
||||
t = unescape("&#%s;"%ref, self._entitydefs, self._encoding)
|
||||
self.handle_data(t)
|
||||
def handle_entityref(self, ref):
|
||||
t = unescape("&%s;"%ref, self._entitydefs, self._encoding)
|
||||
self.handle_data(t)
|
||||
def unescape_attrs(self, attrs):
|
||||
escaped_attrs = []
|
||||
for key, val in attrs:
|
||||
val = unescape(val, self._entitydefs, self._encoding)
|
||||
escaped_attrs.append((key, val))
|
||||
return escaped_attrs
|
||||
|
||||
class RobustLinksFactory:
|
||||
|
||||
compress_re = re.compile(r"\s+")
|
||||
|
||||
def __init__(self,
|
||||
link_parser_class=None,
|
||||
link_class=Link,
|
||||
urltags=None,
|
||||
):
|
||||
import _beautifulsoup
|
||||
if link_parser_class is None:
|
||||
link_parser_class = MechanizeBs
|
||||
self.link_parser_class = link_parser_class
|
||||
self.link_class = link_class
|
||||
if urltags is None:
|
||||
urltags = {
|
||||
"a": "href",
|
||||
"area": "href",
|
||||
"frame": "src",
|
||||
"iframe": "src",
|
||||
}
|
||||
self.urltags = urltags
|
||||
self._bs = None
|
||||
self._encoding = None
|
||||
self._base_url = None
|
||||
|
||||
def set_soup(self, soup, base_url, encoding):
|
||||
self._bs = soup
|
||||
self._base_url = base_url
|
||||
self._encoding = encoding
|
||||
|
||||
def links(self):
|
||||
import _beautifulsoup
|
||||
bs = self._bs
|
||||
base_url = self._base_url
|
||||
encoding = self._encoding
|
||||
gen = bs.recursiveChildGenerator()
|
||||
for ch in bs.recursiveChildGenerator():
|
||||
if (isinstance(ch, _beautifulsoup.Tag) and
|
||||
ch.name in self.urltags.keys()+["base"]):
|
||||
link = ch
|
||||
attrs = bs.unescape_attrs(link.attrs)
|
||||
attrs_dict = dict(attrs)
|
||||
if link.name == "base":
|
||||
base_href = attrs_dict.get("href")
|
||||
if base_href is not None:
|
||||
base_url = base_href
|
||||
continue
|
||||
url_attr = self.urltags[link.name]
|
||||
url = attrs_dict.get(url_attr)
|
||||
if not url:
|
||||
continue
|
||||
url = _rfc3986.clean_url(url, encoding)
|
||||
text = link.firstText(lambda t: True)
|
||||
if text is _beautifulsoup.Null:
|
||||
# follow _pullparser's weird behaviour rigidly
|
||||
if link.name == "a":
|
||||
text = ""
|
||||
else:
|
||||
text = None
|
||||
else:
|
||||
text = self.compress_re.sub(" ", text.strip())
|
||||
yield Link(base_url, url, text, link.name, attrs)
|
||||
|
||||
|
||||
class RobustFormsFactory(FormsFactory):
|
||||
def __init__(self, *args, **kwds):
|
||||
import ClientForm
|
||||
args = form_parser_args(*args, **kwds)
|
||||
if args.form_parser_class is None:
|
||||
args.form_parser_class = RobustFormParser
|
||||
FormsFactory.__init__(self, **args.dictionary)
|
||||
|
||||
def set_response(self, response, encoding):
|
||||
self._response = response
|
||||
self.encoding = encoding
|
||||
|
||||
|
||||
class RobustTitleFactory:
|
||||
def __init__(self):
|
||||
self._bs = self._encoding = None
|
||||
|
||||
def set_soup(self, soup, encoding):
|
||||
self._bs = soup
|
||||
self._encoding = encoding
|
||||
|
||||
def title(self):
|
||||
import _beautifulsoup
|
||||
title = self._bs.first("title")
|
||||
if title == _beautifulsoup.Null:
|
||||
return None
|
||||
else:
|
||||
return title.firstText(lambda t: True)
|
||||
|
||||
|
||||
class Factory:
|
||||
"""Factory for forms, links, etc.
|
||||
|
||||
This interface may expand in future.
|
||||
|
||||
Public methods:
|
||||
|
||||
set_request_class(request_class)
|
||||
set_response(response)
|
||||
forms()
|
||||
links()
|
||||
|
||||
Public attributes:
|
||||
|
||||
Note that accessing these attributes may raise ParseError.
|
||||
|
||||
encoding: string specifying the encoding of response if it contains a text
|
||||
document (this value is left unspecified for documents that do not have
|
||||
an encoding, e.g. an image file)
|
||||
is_html: true if response contains an HTML document (XHTML may be
|
||||
regarded as HTML too)
|
||||
title: page title, or None if no title or not HTML
|
||||
global_form: form object containing all controls that are not descendants
|
||||
of any FORM element, or None if the forms_factory does not support
|
||||
supplying a global form
|
||||
|
||||
"""
|
||||
|
||||
LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"]
|
||||
|
||||
def __init__(self, forms_factory, links_factory, title_factory,
|
||||
encoding_finder=EncodingFinder(DEFAULT_ENCODING),
|
||||
response_type_finder=ResponseTypeFinder(allow_xhtml=False),
|
||||
):
|
||||
"""
|
||||
|
||||
Pass keyword arguments only.
|
||||
|
||||
default_encoding: character encoding to use if encoding cannot be
|
||||
determined (or guessed) from the response. You should turn on
|
||||
HTTP-EQUIV handling if you want the best chance of getting this right
|
||||
without resorting to this default. The default value of this
|
||||
parameter (currently latin-1) may change in future.
|
||||
|
||||
"""
|
||||
self._forms_factory = forms_factory
|
||||
self._links_factory = links_factory
|
||||
self._title_factory = title_factory
|
||||
self._encoding_finder = encoding_finder
|
||||
self._response_type_finder = response_type_finder
|
||||
|
||||
self.set_response(None)
|
||||
|
||||
def set_request_class(self, request_class):
|
||||
"""Set urllib2.Request class.
|
||||
|
||||
ClientForm.HTMLForm instances returned by .forms() will return
|
||||
instances of this class when .click()ed.
|
||||
|
||||
"""
|
||||
self._forms_factory.request_class = request_class
|
||||
|
||||
def set_response(self, response):
|
||||
"""Set response.
|
||||
|
||||
The response must either be None or implement the same interface as
|
||||
objects returned by urllib2.urlopen().
|
||||
|
||||
"""
|
||||
self._response = response
|
||||
self._forms_genf = self._links_genf = None
|
||||
self._get_title = None
|
||||
for name in self.LAZY_ATTRS:
|
||||
try:
|
||||
delattr(self, name)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name not in self.LAZY_ATTRS:
|
||||
return getattr(self.__class__, name)
|
||||
|
||||
if name == "encoding":
|
||||
self.encoding = self._encoding_finder.encoding(
|
||||
copy.copy(self._response))
|
||||
return self.encoding
|
||||
elif name == "is_html":
|
||||
self.is_html = self._response_type_finder.is_html(
|
||||
copy.copy(self._response), self.encoding)
|
||||
return self.is_html
|
||||
elif name == "title":
|
||||
if self.is_html:
|
||||
self.title = self._title_factory.title()
|
||||
else:
|
||||
self.title = None
|
||||
return self.title
|
||||
elif name == "global_form":
|
||||
self.forms()
|
||||
return self.global_form
|
||||
|
||||
def forms(self):
|
||||
"""Return iterable over ClientForm.HTMLForm-like objects.
|
||||
|
||||
Raises mechanize.ParseError on failure.
|
||||
"""
|
||||
# this implementation sets .global_form as a side-effect, for benefit
|
||||
# of __getattr__ impl
|
||||
if self._forms_genf is None:
|
||||
try:
|
||||
self._forms_genf = CachingGeneratorFunction(
|
||||
self._forms_factory.forms())
|
||||
except: # XXXX define exception!
|
||||
self.set_response(self._response)
|
||||
raise
|
||||
self.global_form = getattr(
|
||||
self._forms_factory, "global_form", None)
|
||||
return self._forms_genf()
|
||||
|
||||
def links(self):
|
||||
"""Return iterable over mechanize.Link-like objects.
|
||||
|
||||
Raises mechanize.ParseError on failure.
|
||||
"""
|
||||
if self._links_genf is None:
|
||||
try:
|
||||
self._links_genf = CachingGeneratorFunction(
|
||||
self._links_factory.links())
|
||||
except: # XXXX define exception!
|
||||
self.set_response(self._response)
|
||||
raise
|
||||
return self._links_genf()
|
||||
|
||||
class DefaultFactory(Factory):
|
||||
"""Based on sgmllib."""
|
||||
def __init__(self, i_want_broken_xhtml_support=False):
|
||||
Factory.__init__(
|
||||
self,
|
||||
forms_factory=FormsFactory(),
|
||||
links_factory=LinksFactory(),
|
||||
title_factory=TitleFactory(),
|
||||
response_type_finder=ResponseTypeFinder(
|
||||
allow_xhtml=i_want_broken_xhtml_support),
|
||||
)
|
||||
|
||||
def set_response(self, response):
|
||||
Factory.set_response(self, response)
|
||||
if response is not None:
|
||||
self._forms_factory.set_response(
|
||||
copy.copy(response), self.encoding)
|
||||
self._links_factory.set_response(
|
||||
copy.copy(response), response.geturl(), self.encoding)
|
||||
self._title_factory.set_response(
|
||||
copy.copy(response), self.encoding)
|
||||
|
||||
class RobustFactory(Factory):
|
||||
"""Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is
|
||||
DefaultFactory.
|
||||
|
||||
"""
|
||||
def __init__(self, i_want_broken_xhtml_support=False,
|
||||
soup_class=None):
|
||||
Factory.__init__(
|
||||
self,
|
||||
forms_factory=RobustFormsFactory(),
|
||||
links_factory=RobustLinksFactory(),
|
||||
title_factory=RobustTitleFactory(),
|
||||
response_type_finder=ResponseTypeFinder(
|
||||
allow_xhtml=i_want_broken_xhtml_support),
|
||||
)
|
||||
if soup_class is None:
|
||||
soup_class = MechanizeBs
|
||||
self._soup_class = soup_class
|
||||
|
||||
def set_response(self, response):
|
||||
import _beautifulsoup
|
||||
Factory.set_response(self, response)
|
||||
if response is not None:
|
||||
data = response.read()
|
||||
soup = self._soup_class(self.encoding, data)
|
||||
self._forms_factory.set_response(
|
||||
copy.copy(response), self.encoding)
|
||||
self._links_factory.set_soup(
|
||||
soup, response.geturl(), self.encoding)
|
||||
self._title_factory.set_soup(soup, self.encoding)
|
@ -1,729 +0,0 @@
|
||||
"""HTTP related handlers.
|
||||
|
||||
Note that some other HTTP handlers live in more specific modules: _auth.py,
|
||||
_gzip.py, etc.
|
||||
|
||||
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import copy, time, tempfile, htmlentitydefs, re, logging, socket, \
|
||||
urllib2, urllib, httplib, sgmllib
|
||||
from urllib2 import URLError, HTTPError, BaseHandler
|
||||
from cStringIO import StringIO
|
||||
|
||||
from _request import Request
|
||||
from _util import isstringlike
|
||||
from _response import closeable_response, response_seek_wrapper
|
||||
from _html import unescape, unescape_charref
|
||||
from _headersutil import is_html
|
||||
from _clientcookie import CookieJar, request_host
|
||||
import _rfc3986
|
||||
|
||||
debug = logging.getLogger("mechanize").debug
|
||||
|
||||
# monkeypatch urllib2.HTTPError to show URL
|
||||
## def urllib2_str(self):
|
||||
## return 'HTTP Error %s: %s (%s)' % (
|
||||
## self.code, self.msg, self.geturl())
|
||||
## urllib2.HTTPError.__str__ = urllib2_str
|
||||
|
||||
|
||||
CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes
|
||||
DEFAULT_ENCODING = 'latin-1'
|
||||
|
||||
|
||||
# This adds "refresh" to the list of redirectables and provides a redirection
|
||||
# algorithm that doesn't go into a loop in the presence of cookies
|
||||
# (Python 2.4 has this new algorithm, 2.3 doesn't).
|
||||
class HTTPRedirectHandler(BaseHandler):
|
||||
# maximum number of redirections to any single URL
|
||||
# this is needed because of the state that cookies introduce
|
||||
max_repeats = 4
|
||||
# maximum total number of redirections (regardless of URL) before
|
||||
# assuming we're in a loop
|
||||
max_redirections = 10
|
||||
|
||||
# Implementation notes:
|
||||
|
||||
# To avoid the server sending us into an infinite loop, the request
|
||||
# object needs to track what URLs we have already seen. Do this by
|
||||
# adding a handler-specific attribute to the Request object. The value
|
||||
# of the dict is used to count the number of times the same URL has
|
||||
# been visited. This is needed because visiting the same URL twice
|
||||
# does not necessarily imply a loop, thanks to state introduced by
|
||||
# cookies.
|
||||
|
||||
# Always unhandled redirection codes:
|
||||
# 300 Multiple Choices: should not handle this here.
|
||||
# 304 Not Modified: no need to handle here: only of interest to caches
|
||||
# that do conditional GETs
|
||||
# 305 Use Proxy: probably not worth dealing with here
|
||||
# 306 Unused: what was this for in the previous versions of protocol??
|
||||
|
||||
def redirect_request(self, newurl, req, fp, code, msg, headers):
|
||||
"""Return a Request or None in response to a redirect.
|
||||
|
||||
This is called by the http_error_30x methods when a redirection
|
||||
response is received. If a redirection should take place, return a
|
||||
new Request to allow http_error_30x to perform the redirect;
|
||||
otherwise, return None to indicate that an HTTPError should be
|
||||
raised.
|
||||
|
||||
"""
|
||||
if code in (301, 302, 303, "refresh") or \
|
||||
(code == 307 and not req.has_data()):
|
||||
# Strictly (according to RFC 2616), 301 or 302 in response to
|
||||
# a POST MUST NOT cause a redirection without confirmation
|
||||
# from the user (of urllib2, in this case). In practice,
|
||||
# essentially all clients do redirect in this case, so we do
|
||||
# the same.
|
||||
# XXX really refresh redirections should be visiting; tricky to
|
||||
# fix, so this will wait until post-stable release
|
||||
new = Request(newurl,
|
||||
headers=req.headers,
|
||||
origin_req_host=req.get_origin_req_host(),
|
||||
unverifiable=True,
|
||||
visit=False,
|
||||
)
|
||||
new._origin_req = getattr(req, "_origin_req", req)
|
||||
return new
|
||||
else:
|
||||
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
|
||||
|
||||
def http_error_302(self, req, fp, code, msg, headers):
|
||||
# Some servers (incorrectly) return multiple Location headers
|
||||
# (so probably same goes for URI). Use first header.
|
||||
if headers.has_key('location'):
|
||||
newurl = headers.getheaders('location')[0]
|
||||
elif headers.has_key('uri'):
|
||||
newurl = headers.getheaders('uri')[0]
|
||||
else:
|
||||
return
|
||||
newurl = _rfc3986.clean_url(newurl, "latin-1")
|
||||
newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
|
||||
|
||||
# XXX Probably want to forget about the state of the current
|
||||
# request, although that might interact poorly with other
|
||||
# handlers that also use handler-specific request attributes
|
||||
new = self.redirect_request(newurl, req, fp, code, msg, headers)
|
||||
if new is None:
|
||||
return
|
||||
|
||||
# loop detection
|
||||
# .redirect_dict has a key url if url was previously visited.
|
||||
if hasattr(req, 'redirect_dict'):
|
||||
visited = new.redirect_dict = req.redirect_dict
|
||||
if (visited.get(newurl, 0) >= self.max_repeats or
|
||||
len(visited) >= self.max_redirections):
|
||||
raise HTTPError(req.get_full_url(), code,
|
||||
self.inf_msg + msg, headers, fp)
|
||||
else:
|
||||
visited = new.redirect_dict = req.redirect_dict = {}
|
||||
visited[newurl] = visited.get(newurl, 0) + 1
|
||||
|
||||
# Don't close the fp until we are sure that we won't use it
|
||||
# with HTTPError.
|
||||
fp.read()
|
||||
fp.close()
|
||||
|
||||
return self.parent.open(new)
|
||||
|
||||
http_error_301 = http_error_303 = http_error_307 = http_error_302
|
||||
http_error_refresh = http_error_302
|
||||
|
||||
inf_msg = "The HTTP server returned a redirect error that would " \
|
||||
"lead to an infinite loop.\n" \
|
||||
"The last 30x error message was:\n"
|
||||
|
||||
|
||||
# XXX would self.reset() work, instead of raising this exception?
|
||||
class EndOfHeadError(Exception): pass
|
||||
class AbstractHeadParser:
|
||||
# only these elements are allowed in or before HEAD of document
|
||||
head_elems = ("html", "head",
|
||||
"title", "base",
|
||||
"script", "style", "meta", "link", "object")
|
||||
_entitydefs = htmlentitydefs.name2codepoint
|
||||
_encoding = DEFAULT_ENCODING
|
||||
|
||||
def __init__(self):
|
||||
self.http_equiv = []
|
||||
|
||||
def start_meta(self, attrs):
|
||||
http_equiv = content = None
|
||||
for key, value in attrs:
|
||||
if key == "http-equiv":
|
||||
http_equiv = self.unescape_attr_if_required(value)
|
||||
elif key == "content":
|
||||
content = self.unescape_attr_if_required(value)
|
||||
if http_equiv is not None and content is not None:
|
||||
self.http_equiv.append((http_equiv, content))
|
||||
|
||||
def end_head(self):
|
||||
raise EndOfHeadError()
|
||||
|
||||
def handle_entityref(self, name):
|
||||
#debug("%s", name)
|
||||
self.handle_data(unescape(
|
||||
'&%s;' % name, self._entitydefs, self._encoding))
|
||||
|
||||
def handle_charref(self, name):
|
||||
#debug("%s", name)
|
||||
self.handle_data(unescape_charref(name, self._encoding))
|
||||
|
||||
def unescape_attr(self, name):
|
||||
#debug("%s", name)
|
||||
return unescape(name, self._entitydefs, self._encoding)
|
||||
|
||||
def unescape_attrs(self, attrs):
|
||||
#debug("%s", attrs)
|
||||
escaped_attrs = {}
|
||||
for key, val in attrs.items():
|
||||
escaped_attrs[key] = self.unescape_attr(val)
|
||||
return escaped_attrs
|
||||
|
||||
def unknown_entityref(self, ref):
|
||||
self.handle_data("&%s;" % ref)
|
||||
|
||||
def unknown_charref(self, ref):
|
||||
self.handle_data("&#%s;" % ref)
|
||||
|
||||
|
||||
try:
|
||||
import HTMLParser
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
class XHTMLCompatibleHeadParser(AbstractHeadParser,
|
||||
HTMLParser.HTMLParser):
|
||||
def __init__(self):
|
||||
HTMLParser.HTMLParser.__init__(self)
|
||||
AbstractHeadParser.__init__(self)
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag not in self.head_elems:
|
||||
raise EndOfHeadError()
|
||||
try:
|
||||
method = getattr(self, 'start_' + tag)
|
||||
except AttributeError:
|
||||
try:
|
||||
method = getattr(self, 'do_' + tag)
|
||||
except AttributeError:
|
||||
pass # unknown tag
|
||||
else:
|
||||
method(attrs)
|
||||
else:
|
||||
method(attrs)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag not in self.head_elems:
|
||||
raise EndOfHeadError()
|
||||
try:
|
||||
method = getattr(self, 'end_' + tag)
|
||||
except AttributeError:
|
||||
pass # unknown tag
|
||||
else:
|
||||
method()
|
||||
|
||||
def unescape(self, name):
|
||||
# Use the entitydefs passed into constructor, not
|
||||
# HTMLParser.HTMLParser's entitydefs.
|
||||
return self.unescape_attr(name)
|
||||
|
||||
def unescape_attr_if_required(self, name):
|
||||
return name # HTMLParser.HTMLParser already did it
|
||||
|
||||
class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
|
||||
|
||||
def _not_called(self):
|
||||
assert False
|
||||
|
||||
def __init__(self):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
AbstractHeadParser.__init__(self)
|
||||
|
||||
def handle_starttag(self, tag, method, attrs):
|
||||
if tag not in self.head_elems:
|
||||
raise EndOfHeadError()
|
||||
if tag == "meta":
|
||||
method(attrs)
|
||||
|
||||
def unknown_starttag(self, tag, attrs):
|
||||
self.handle_starttag(tag, self._not_called, attrs)
|
||||
|
||||
def handle_endtag(self, tag, method):
|
||||
if tag in self.head_elems:
|
||||
method()
|
||||
else:
|
||||
raise EndOfHeadError()
|
||||
|
||||
def unescape_attr_if_required(self, name):
|
||||
return self.unescape_attr(name)
|
||||
|
||||
def parse_head(fileobj, parser):
|
||||
"""Return a list of key, value pairs."""
|
||||
while 1:
|
||||
data = fileobj.read(CHUNK)
|
||||
try:
|
||||
parser.feed(data)
|
||||
except EndOfHeadError:
|
||||
break
|
||||
if len(data) != CHUNK:
|
||||
# this should only happen if there is no HTML body, or if
|
||||
# CHUNK is big
|
||||
break
|
||||
return parser.http_equiv
|
||||
|
||||
class HTTPEquivProcessor(BaseHandler):
|
||||
"""Append META HTTP-EQUIV headers to regular HTTP headers."""
|
||||
|
||||
handler_order = 300 # before handlers that look at HTTP headers
|
||||
|
||||
def __init__(self, head_parser_class=HeadParser,
|
||||
i_want_broken_xhtml_support=False,
|
||||
):
|
||||
self.head_parser_class = head_parser_class
|
||||
self._allow_xhtml = i_want_broken_xhtml_support
|
||||
|
||||
def http_response(self, request, response):
|
||||
if not hasattr(response, "seek"):
|
||||
response = response_seek_wrapper(response)
|
||||
http_message = response.info()
|
||||
url = response.geturl()
|
||||
ct_hdrs = http_message.getheaders("content-type")
|
||||
if is_html(ct_hdrs, url, self._allow_xhtml):
|
||||
try:
|
||||
try:
|
||||
html_headers = parse_head(response, self.head_parser_class())
|
||||
finally:
|
||||
response.seek(0)
|
||||
except (HTMLParser.HTMLParseError,
|
||||
sgmllib.SGMLParseError):
|
||||
pass
|
||||
else:
|
||||
for hdr, val in html_headers:
|
||||
# add a header
|
||||
http_message.dict[hdr.lower()] = val
|
||||
text = hdr + ": " + val
|
||||
for line in text.split("\n"):
|
||||
http_message.headers.append(line + "\n")
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
class HTTPCookieProcessor(BaseHandler):
|
||||
"""Handle HTTP cookies.
|
||||
|
||||
Public attributes:
|
||||
|
||||
cookiejar: CookieJar instance
|
||||
|
||||
"""
|
||||
def __init__(self, cookiejar=None):
|
||||
if cookiejar is None:
|
||||
cookiejar = CookieJar()
|
||||
self.cookiejar = cookiejar
|
||||
|
||||
def http_request(self, request):
|
||||
self.cookiejar.add_cookie_header(request)
|
||||
return request
|
||||
|
||||
def http_response(self, request, response):
|
||||
self.cookiejar.extract_cookies(response, request)
|
||||
return response
|
||||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
try:
|
||||
import robotparser
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
class MechanizeRobotFileParser(robotparser.RobotFileParser):
|
||||
|
||||
def __init__(self, url='', opener=None):
|
||||
import _opener
|
||||
robotparser.RobotFileParser.__init__(self, url)
|
||||
self._opener = opener
|
||||
|
||||
def set_opener(self, opener=None):
|
||||
if opener is None:
|
||||
opener = _opener.OpenerDirector()
|
||||
self._opener = opener
|
||||
|
||||
def read(self):
|
||||
"""Reads the robots.txt URL and feeds it to the parser."""
|
||||
if self._opener is None:
|
||||
self.set_opener()
|
||||
req = Request(self.url, unverifiable=True, visit=False)
|
||||
try:
|
||||
f = self._opener.open(req)
|
||||
except HTTPError, f:
|
||||
pass
|
||||
except (IOError, socket.error, OSError), exc:
|
||||
robotparser._debug("ignoring error opening %r: %s" %
|
||||
(self.url, exc))
|
||||
return
|
||||
lines = []
|
||||
line = f.readline()
|
||||
while line:
|
||||
lines.append(line.strip())
|
||||
line = f.readline()
|
||||
status = f.code
|
||||
if status == 401 or status == 403:
|
||||
self.disallow_all = True
|
||||
robotparser._debug("disallow all")
|
||||
elif status >= 400:
|
||||
self.allow_all = True
|
||||
robotparser._debug("allow all")
|
||||
elif status == 200 and lines:
|
||||
robotparser._debug("parse lines")
|
||||
self.parse(lines)
|
||||
|
||||
class RobotExclusionError(urllib2.HTTPError):
|
||||
def __init__(self, request, *args):
|
||||
apply(urllib2.HTTPError.__init__, (self,)+args)
|
||||
self.request = request
|
||||
|
||||
class HTTPRobotRulesProcessor(BaseHandler):
|
||||
# before redirections, after everything else
|
||||
handler_order = 800
|
||||
|
||||
try:
|
||||
from httplib import HTTPMessage
|
||||
except:
|
||||
from mimetools import Message
|
||||
http_response_class = Message
|
||||
else:
|
||||
http_response_class = HTTPMessage
|
||||
|
||||
def __init__(self, rfp_class=MechanizeRobotFileParser):
|
||||
self.rfp_class = rfp_class
|
||||
self.rfp = None
|
||||
self._host = None
|
||||
|
||||
def http_request(self, request):
|
||||
scheme = request.get_type()
|
||||
if scheme not in ["http", "https"]:
|
||||
# robots exclusion only applies to HTTP
|
||||
return request
|
||||
|
||||
if request.get_selector() == "/robots.txt":
|
||||
# /robots.txt is always OK to fetch
|
||||
return request
|
||||
|
||||
host = request.get_host()
|
||||
|
||||
# robots.txt requests don't need to be allowed by robots.txt :-)
|
||||
origin_req = getattr(request, "_origin_req", None)
|
||||
if (origin_req is not None and
|
||||
origin_req.get_selector() == "/robots.txt" and
|
||||
origin_req.get_host() == host
|
||||
):
|
||||
return request
|
||||
|
||||
if host != self._host:
|
||||
self.rfp = self.rfp_class()
|
||||
try:
|
||||
self.rfp.set_opener(self.parent)
|
||||
except AttributeError:
|
||||
debug("%r instance does not support set_opener" %
|
||||
self.rfp.__class__)
|
||||
self.rfp.set_url(scheme+"://"+host+"/robots.txt")
|
||||
self.rfp.read()
|
||||
self._host = host
|
||||
|
||||
ua = request.get_header("User-agent", "")
|
||||
if self.rfp.can_fetch(ua, request.get_full_url()):
|
||||
return request
|
||||
else:
|
||||
# XXX This should really have raised URLError. Too late now...
|
||||
msg = "request disallowed by robots.txt"
|
||||
raise RobotExclusionError(
|
||||
request,
|
||||
request.get_full_url(),
|
||||
403, msg,
|
||||
self.http_response_class(StringIO()), StringIO(msg))
|
||||
|
||||
https_request = http_request
|
||||
|
||||
class HTTPRefererProcessor(BaseHandler):
|
||||
"""Add Referer header to requests.
|
||||
|
||||
This only makes sense if you use each RefererProcessor for a single
|
||||
chain of requests only (so, for example, if you use a single
|
||||
HTTPRefererProcessor to fetch a series of URLs extracted from a single
|
||||
page, this will break).
|
||||
|
||||
There's a proper implementation of this in mechanize.Browser.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self.referer = None
|
||||
|
||||
def http_request(self, request):
|
||||
if ((self.referer is not None) and
|
||||
not request.has_header("Referer")):
|
||||
request.add_unredirected_header("Referer", self.referer)
|
||||
return request
|
||||
|
||||
def http_response(self, request, response):
|
||||
self.referer = response.geturl()
|
||||
return response
|
||||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
|
||||
def clean_refresh_url(url):
|
||||
# e.g. Firefox 1.5 does (something like) this
|
||||
if ((url.startswith('"') and url.endswith('"')) or
|
||||
(url.startswith("'") and url.endswith("'"))):
|
||||
url = url[1:-1]
|
||||
return _rfc3986.clean_url(url, "latin-1") # XXX encoding
|
||||
|
||||
def parse_refresh_header(refresh):
|
||||
"""
|
||||
>>> parse_refresh_header("1; url=http://example.com/")
|
||||
(1.0, 'http://example.com/')
|
||||
>>> parse_refresh_header("1; url='http://example.com/'")
|
||||
(1.0, 'http://example.com/')
|
||||
>>> parse_refresh_header("1")
|
||||
(1.0, None)
|
||||
>>> parse_refresh_header("blah")
|
||||
Traceback (most recent call last):
|
||||
ValueError: invalid literal for float(): blah
|
||||
|
||||
"""
|
||||
|
||||
ii = refresh.find(";")
|
||||
if ii != -1:
|
||||
pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
|
||||
jj = newurl_spec.find("=")
|
||||
key = None
|
||||
if jj != -1:
|
||||
key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
|
||||
newurl = clean_refresh_url(newurl)
|
||||
if key is None or key.strip().lower() != "url":
|
||||
raise ValueError()
|
||||
else:
|
||||
pause, newurl = float(refresh), None
|
||||
return pause, newurl
|
||||
|
||||
class HTTPRefreshProcessor(BaseHandler):
|
||||
"""Perform HTTP Refresh redirections.
|
||||
|
||||
Note that if a non-200 HTTP code has occurred (for example, a 30x
|
||||
redirect), this processor will do nothing.
|
||||
|
||||
By default, only zero-time Refresh headers are redirected. Use the
|
||||
max_time attribute / constructor argument to allow Refresh with longer
|
||||
pauses. Use the honor_time attribute / constructor argument to control
|
||||
whether the requested pause is honoured (with a time.sleep()) or
|
||||
skipped in favour of immediate redirection.
|
||||
|
||||
Public attributes:
|
||||
|
||||
max_time: see above
|
||||
honor_time: see above
|
||||
|
||||
"""
|
||||
handler_order = 1000
|
||||
|
||||
def __init__(self, max_time=0, honor_time=True):
|
||||
self.max_time = max_time
|
||||
self.honor_time = honor_time
|
||||
|
||||
def http_response(self, request, response):
|
||||
code, msg, hdrs = response.code, response.msg, response.info()
|
||||
|
||||
if code == 200 and hdrs.has_key("refresh"):
|
||||
refresh = hdrs.getheaders("refresh")[0]
|
||||
try:
|
||||
pause, newurl = parse_refresh_header(refresh)
|
||||
except ValueError:
|
||||
debug("bad Refresh header: %r" % refresh)
|
||||
return response
|
||||
if newurl is None:
|
||||
newurl = response.geturl()
|
||||
if (self.max_time is None) or (pause <= self.max_time):
|
||||
if pause > 1E-3 and self.honor_time:
|
||||
time.sleep(pause)
|
||||
hdrs["location"] = newurl
|
||||
# hardcoded http is NOT a bug
|
||||
response = self.parent.error(
|
||||
"http", request, response,
|
||||
"refresh", msg, hdrs)
|
||||
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
class HTTPErrorProcessor(BaseHandler):
|
||||
"""Process HTTP error responses.
|
||||
|
||||
The purpose of this handler is to to allow other response processors a
|
||||
look-in by removing the call to parent.error() from
|
||||
AbstractHTTPHandler.
|
||||
|
||||
For non-200 error codes, this just passes the job on to the
|
||||
Handler.<proto>_error_<code> methods, via the OpenerDirector.error
|
||||
method. Eventually, urllib2.HTTPDefaultErrorHandler will raise an
|
||||
HTTPError if no other handler handles the error.
|
||||
|
||||
"""
|
||||
handler_order = 1000 # after all other processors
|
||||
|
||||
def http_response(self, request, response):
|
||||
code, msg, hdrs = response.code, response.msg, response.info()
|
||||
|
||||
if code != 200:
|
||||
# hardcoded http is NOT a bug
|
||||
response = self.parent.error(
|
||||
"http", request, response, code, msg, hdrs)
|
||||
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
|
||||
class HTTPDefaultErrorHandler(BaseHandler):
|
||||
def http_error_default(self, req, fp, code, msg, hdrs):
|
||||
# why these error methods took the code, msg, headers args in the first
|
||||
# place rather than a response object, I don't know, but to avoid
|
||||
# multiple wrapping, we're discarding them
|
||||
|
||||
if isinstance(fp, urllib2.HTTPError):
|
||||
response = fp
|
||||
else:
|
||||
response = urllib2.HTTPError(
|
||||
req.get_full_url(), code, msg, hdrs, fp)
|
||||
assert code == response.code
|
||||
assert msg == response.msg
|
||||
assert hdrs == response.hdrs
|
||||
raise response
|
||||
|
||||
|
||||
class AbstractHTTPHandler(BaseHandler):
|
||||
|
||||
def __init__(self, debuglevel=0):
|
||||
self._debuglevel = debuglevel
|
||||
|
||||
def set_http_debuglevel(self, level):
|
||||
self._debuglevel = level
|
||||
|
||||
def do_request_(self, request):
|
||||
host = request.get_host()
|
||||
if not host:
|
||||
raise URLError('no host given')
|
||||
|
||||
if request.has_data(): # POST
|
||||
data = request.get_data()
|
||||
if not request.has_header('Content-type'):
|
||||
request.add_unredirected_header(
|
||||
'Content-type',
|
||||
'application/x-www-form-urlencoded')
|
||||
|
||||
scheme, sel = urllib.splittype(request.get_selector())
|
||||
sel_host, sel_path = urllib.splithost(sel)
|
||||
if not request.has_header('Host'):
|
||||
request.add_unredirected_header('Host', sel_host or host)
|
||||
for name, value in self.parent.addheaders:
|
||||
name = name.capitalize()
|
||||
if not request.has_header(name):
|
||||
request.add_unredirected_header(name, value)
|
||||
|
||||
return request
|
||||
|
||||
def do_open(self, http_class, req):
|
||||
"""Return an addinfourl object for the request, using http_class.
|
||||
|
||||
http_class must implement the HTTPConnection API from httplib.
|
||||
The addinfourl return value is a file-like object. It also
|
||||
has methods and attributes including:
|
||||
- info(): return a mimetools.Message object for the headers
|
||||
- geturl(): return the original request URL
|
||||
- code: HTTP status code
|
||||
"""
|
||||
host = req.get_host()
|
||||
if not host:
|
||||
raise URLError('no host given')
|
||||
|
||||
h = http_class(host) # will parse host:port
|
||||
h.set_debuglevel(self._debuglevel)
|
||||
|
||||
headers = dict(req.headers)
|
||||
headers.update(req.unredirected_hdrs)
|
||||
# We want to make an HTTP/1.1 request, but the addinfourl
|
||||
# class isn't prepared to deal with a persistent connection.
|
||||
# It will try to read all remaining data from the socket,
|
||||
# which will block while the server waits for the next request.
|
||||
# So make sure the connection gets closed after the (only)
|
||||
# request.
|
||||
headers["Connection"] = "close"
|
||||
headers = dict(
|
||||
[(name.title(), val) for name, val in headers.items()])
|
||||
try:
|
||||
h.request(req.get_method(), req.get_selector(), req.data, headers)
|
||||
r = h.getresponse()
|
||||
except socket.error, err: # XXX what error?
|
||||
raise URLError(err)
|
||||
|
||||
# Pick apart the HTTPResponse object to get the addinfourl
|
||||
# object initialized properly.
|
||||
|
||||
# Wrap the HTTPResponse object in socket's file object adapter
|
||||
# for Windows. That adapter calls recv(), so delegate recv()
|
||||
# to read(). This weird wrapping allows the returned object to
|
||||
# have readline() and readlines() methods.
|
||||
|
||||
# XXX It might be better to extract the read buffering code
|
||||
# out of socket._fileobject() and into a base class.
|
||||
|
||||
r.recv = r.read
|
||||
fp = socket._fileobject(r)
|
||||
|
||||
resp = closeable_response(fp, r.msg, req.get_full_url(),
|
||||
r.status, r.reason)
|
||||
return resp
|
||||
|
||||
|
||||
class HTTPHandler(AbstractHTTPHandler):
|
||||
def http_open(self, req):
|
||||
return self.do_open(httplib.HTTPConnection, req)
|
||||
|
||||
http_request = AbstractHTTPHandler.do_request_
|
||||
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
|
||||
class HTTPSConnectionFactory:
|
||||
def __init__(self, key_file, cert_file):
|
||||
self._key_file = key_file
|
||||
self._cert_file = cert_file
|
||||
def __call__(self, hostport):
|
||||
return httplib.HTTPSConnection(
|
||||
hostport,
|
||||
key_file=self._key_file, cert_file=self._cert_file)
|
||||
|
||||
class HTTPSHandler(AbstractHTTPHandler):
|
||||
def __init__(self, client_cert_manager=None):
|
||||
AbstractHTTPHandler.__init__(self)
|
||||
self.client_cert_manager = client_cert_manager
|
||||
|
||||
def https_open(self, req):
|
||||
if self.client_cert_manager is not None:
|
||||
key_file, cert_file = self.client_cert_manager.find_key_cert(
|
||||
req.get_full_url())
|
||||
conn_factory = HTTPSConnectionFactory(key_file, cert_file)
|
||||
else:
|
||||
conn_factory = httplib.HTTPSConnection
|
||||
return self.do_open(conn_factory, req)
|
||||
|
||||
https_request = AbstractHTTPHandler.do_request_
|
@ -1,185 +0,0 @@
|
||||
"""Load / save to libwww-perl (LWP) format files.
|
||||
|
||||
Actually, the format is slightly extended from that used by LWP's
|
||||
(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
|
||||
not recorded by LWP.
|
||||
|
||||
It uses the version string "2.0", though really there isn't an LWP Cookies
|
||||
2.0 format. This indicates that there is extra information in here
|
||||
(domain_dot and port_spec) while still being compatible with libwww-perl,
|
||||
I hope.
|
||||
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
||||
Copyright 1997-1999 Gisle Aas (original libwww-perl code)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import time, re, logging
|
||||
|
||||
from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
|
||||
MISSING_FILENAME_TEXT, LoadError
|
||||
from _headersutil import join_header_words, split_header_words
|
||||
from _util import iso2time, time2isoz
|
||||
|
||||
debug = logging.getLogger("mechanize").debug
|
||||
|
||||
|
||||
def lwp_cookie_str(cookie):
|
||||
"""Return string representation of Cookie in an the LWP cookie file format.
|
||||
|
||||
Actually, the format is extended a bit -- see module docstring.
|
||||
|
||||
"""
|
||||
h = [(cookie.name, cookie.value),
|
||||
("path", cookie.path),
|
||||
("domain", cookie.domain)]
|
||||
if cookie.port is not None: h.append(("port", cookie.port))
|
||||
if cookie.path_specified: h.append(("path_spec", None))
|
||||
if cookie.port_specified: h.append(("port_spec", None))
|
||||
if cookie.domain_initial_dot: h.append(("domain_dot", None))
|
||||
if cookie.secure: h.append(("secure", None))
|
||||
if cookie.expires: h.append(("expires",
|
||||
time2isoz(float(cookie.expires))))
|
||||
if cookie.discard: h.append(("discard", None))
|
||||
if cookie.comment: h.append(("comment", cookie.comment))
|
||||
if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
|
||||
if cookie.rfc2109: h.append(("rfc2109", None))
|
||||
|
||||
keys = cookie.nonstandard_attr_keys()
|
||||
keys.sort()
|
||||
for k in keys:
|
||||
h.append((k, str(cookie.get_nonstandard_attr(k))))
|
||||
|
||||
h.append(("version", str(cookie.version)))
|
||||
|
||||
return join_header_words([h])
|
||||
|
||||
class LWPCookieJar(FileCookieJar):
|
||||
"""
|
||||
The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
|
||||
"Set-Cookie3" is the format used by the libwww-perl libary, not known
|
||||
to be compatible with any browser, but which is easy to read and
|
||||
doesn't lose information about RFC 2965 cookies.
|
||||
|
||||
Additional methods
|
||||
|
||||
as_lwp_str(ignore_discard=True, ignore_expired=True)
|
||||
|
||||
"""
|
||||
|
||||
magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
|
||||
|
||||
def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
|
||||
"""Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
|
||||
|
||||
ignore_discard and ignore_expires: see docstring for FileCookieJar.save
|
||||
|
||||
"""
|
||||
now = time.time()
|
||||
r = []
|
||||
for cookie in self:
|
||||
if not ignore_discard and cookie.discard:
|
||||
debug(" Not saving %s: marked for discard", cookie.name)
|
||||
continue
|
||||
if not ignore_expires and cookie.is_expired(now):
|
||||
debug(" Not saving %s: expired", cookie.name)
|
||||
continue
|
||||
r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
|
||||
return "\n".join(r+[""])
|
||||
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
f = open(filename, "w")
|
||||
try:
|
||||
debug("Saving LWP cookies file")
|
||||
# There really isn't an LWP Cookies 2.0 format, but this indicates
|
||||
# that there is extra information in here (domain_dot and
|
||||
# port_spec) while still being compatible with libwww-perl, I hope.
|
||||
f.write("#LWP-Cookies-2.0\n")
|
||||
f.write(self.as_lwp_str(ignore_discard, ignore_expires))
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def _really_load(self, f, filename, ignore_discard, ignore_expires):
|
||||
magic = f.readline()
|
||||
if not re.search(self.magic_re, magic):
|
||||
msg = "%s does not seem to contain cookies" % filename
|
||||
raise LoadError(msg)
|
||||
|
||||
now = time.time()
|
||||
|
||||
header = "Set-Cookie3:"
|
||||
boolean_attrs = ("port_spec", "path_spec", "domain_dot",
|
||||
"secure", "discard", "rfc2109")
|
||||
value_attrs = ("version",
|
||||
"port", "path", "domain",
|
||||
"expires",
|
||||
"comment", "commenturl")
|
||||
|
||||
try:
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if line == "": break
|
||||
if not line.startswith(header):
|
||||
continue
|
||||
line = line[len(header):].strip()
|
||||
|
||||
for data in split_header_words([line]):
|
||||
name, value = data[0]
|
||||
standard = {}
|
||||
rest = {}
|
||||
for k in boolean_attrs:
|
||||
standard[k] = False
|
||||
for k, v in data[1:]:
|
||||
if k is not None:
|
||||
lc = k.lower()
|
||||
else:
|
||||
lc = None
|
||||
# don't lose case distinction for unknown fields
|
||||
if (lc in value_attrs) or (lc in boolean_attrs):
|
||||
k = lc
|
||||
if k in boolean_attrs:
|
||||
if v is None: v = True
|
||||
standard[k] = v
|
||||
elif k in value_attrs:
|
||||
standard[k] = v
|
||||
else:
|
||||
rest[k] = v
|
||||
|
||||
h = standard.get
|
||||
expires = h("expires")
|
||||
discard = h("discard")
|
||||
if expires is not None:
|
||||
expires = iso2time(expires)
|
||||
if expires is None:
|
||||
discard = True
|
||||
domain = h("domain")
|
||||
domain_specified = domain.startswith(".")
|
||||
c = Cookie(h("version"), name, value,
|
||||
h("port"), h("port_spec"),
|
||||
domain, domain_specified, h("domain_dot"),
|
||||
h("path"), h("path_spec"),
|
||||
h("secure"),
|
||||
expires,
|
||||
discard,
|
||||
h("comment"),
|
||||
h("commenturl"),
|
||||
rest,
|
||||
h("rfc2109"),
|
||||
)
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
self.set_cookie(c)
|
||||
except:
|
||||
reraise_unmasked_exceptions((IOError,))
|
||||
raise LoadError("invalid Set-Cookie3 format file %s" % filename)
|
||||
|
@ -1,656 +0,0 @@
|
||||
"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize.
|
||||
|
||||
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
|
||||
Copyright 2003 Andy Lester (original Perl code)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import urllib2, sys, copy, re
|
||||
|
||||
from _useragent import UserAgentBase
|
||||
from _html import DefaultFactory
|
||||
import _response
|
||||
import _request
|
||||
import _rfc3986
|
||||
|
||||
__version__ = (0, 1, 7, "b", None) # 0.1.7b
|
||||
|
||||
class BrowserStateError(Exception): pass
|
||||
class LinkNotFoundError(Exception): pass
|
||||
class FormNotFoundError(Exception): pass
|
||||
|
||||
|
||||
class History:
|
||||
"""
|
||||
|
||||
Though this will become public, the implied interface is not yet stable.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self._history = [] # LIFO
|
||||
def add(self, request, response):
|
||||
self._history.append((request, response))
|
||||
def back(self, n, _response):
|
||||
response = _response # XXX move Browser._response into this class?
|
||||
while n > 0 or response is None:
|
||||
try:
|
||||
request, response = self._history.pop()
|
||||
except IndexError:
|
||||
raise BrowserStateError("already at start of history")
|
||||
n -= 1
|
||||
return request, response
|
||||
def clear(self):
|
||||
del self._history[:]
|
||||
def close(self):
|
||||
for request, response in self._history:
|
||||
if response is not None:
|
||||
response.close()
|
||||
del self._history[:]
|
||||
|
||||
|
||||
class HTTPRefererProcessor(urllib2.BaseHandler):
|
||||
def http_request(self, request):
|
||||
# See RFC 2616 14.36. The only times we know the source of the
|
||||
# request URI has a URI associated with it are redirect, and
|
||||
# Browser.click() / Browser.submit() / Browser.follow_link().
|
||||
# Otherwise, it's the user's job to add any Referer header before
|
||||
# .open()ing.
|
||||
if hasattr(request, "redirect_dict"):
|
||||
request = self.parent._add_referer_header(
|
||||
request, origin_request=False)
|
||||
return request
|
||||
|
||||
https_request = http_request
|
||||
|
||||
|
||||
class Browser(UserAgentBase):
|
||||
"""Browser-like class with support for history, forms and links.
|
||||
|
||||
BrowserStateError is raised whenever the browser is in the wrong state to
|
||||
complete the requested operation - eg., when .back() is called when the
|
||||
browser history is empty, or when .follow_link() is called when the current
|
||||
response does not contain HTML data.
|
||||
|
||||
Public attributes:
|
||||
|
||||
request: current request (mechanize.Request or urllib2.Request)
|
||||
form: currently selected form (see .select_form())
|
||||
|
||||
"""
|
||||
|
||||
handler_classes = copy.copy(UserAgentBase.handler_classes)
|
||||
handler_classes["_referer"] = HTTPRefererProcessor
|
||||
default_features = copy.copy(UserAgentBase.default_features)
|
||||
default_features.append("_referer")
|
||||
|
||||
def __init__(self,
|
||||
factory=None,
|
||||
history=None,
|
||||
request_class=None,
|
||||
):
|
||||
"""
|
||||
|
||||
Only named arguments should be passed to this constructor.
|
||||
|
||||
factory: object implementing the mechanize.Factory interface.
|
||||
history: object implementing the mechanize.History interface. Note
|
||||
this interface is still experimental and may change in future.
|
||||
request_class: Request class to use. Defaults to mechanize.Request
|
||||
by default for Pythons older than 2.4, urllib2.Request otherwise.
|
||||
|
||||
The Factory and History objects passed in are 'owned' by the Browser,
|
||||
so they should not be shared across Browsers. In particular,
|
||||
factory.set_response() should not be called except by the owning
|
||||
Browser itself.
|
||||
|
||||
Note that the supplied factory's request_class is overridden by this
|
||||
constructor, to ensure only one Request class is used.
|
||||
|
||||
"""
|
||||
self._handle_referer = True
|
||||
|
||||
if history is None:
|
||||
history = History()
|
||||
self._history = history
|
||||
|
||||
if request_class is None:
|
||||
if not hasattr(urllib2.Request, "add_unredirected_header"):
|
||||
request_class = _request.Request
|
||||
else:
|
||||
request_class = urllib2.Request # Python >= 2.4
|
||||
|
||||
if factory is None:
|
||||
factory = DefaultFactory()
|
||||
factory.set_request_class(request_class)
|
||||
self._factory = factory
|
||||
self.request_class = request_class
|
||||
|
||||
self.request = None
|
||||
self._set_response(None, False)
|
||||
|
||||
# do this last to avoid __getattr__ problems
|
||||
UserAgentBase.__init__(self)
|
||||
|
||||
def close(self):
|
||||
UserAgentBase.close(self)
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
if self._history is not None:
|
||||
self._history.close()
|
||||
self._history = None
|
||||
|
||||
# make use after .close easy to spot
|
||||
self.form = None
|
||||
self.request = self._response = None
|
||||
self.request = self.response = self.set_response = None
|
||||
self.geturl = self.reload = self.back = None
|
||||
self.clear_history = self.set_cookie = self.links = self.forms = None
|
||||
self.viewing_html = self.encoding = self.title = None
|
||||
self.select_form = self.click = self.submit = self.click_link = None
|
||||
self.follow_link = self.find_link = None
|
||||
|
||||
def set_handle_referer(self, handle):
|
||||
"""Set whether to add Referer header to each request.
|
||||
|
||||
This base class does not implement this feature (so don't turn this on
|
||||
if you're using this base class directly), but the subclass
|
||||
mechanize.Browser does.
|
||||
|
||||
"""
|
||||
self._set_handler("_referer", handle)
|
||||
self._handle_referer = bool(handle)
|
||||
|
||||
def _add_referer_header(self, request, origin_request=True):
|
||||
if self.request is None:
|
||||
return request
|
||||
scheme = request.get_type()
|
||||
original_scheme = self.request.get_type()
|
||||
if scheme not in ["http", "https"]:
|
||||
return request
|
||||
if not origin_request and not self.request.has_header("Referer"):
|
||||
return request
|
||||
|
||||
if (self._handle_referer and
|
||||
original_scheme in ["http", "https"] and
|
||||
not (original_scheme == "https" and scheme != "https")):
|
||||
# strip URL fragment (RFC 2616 14.36)
|
||||
parts = _rfc3986.urlsplit(self.request.get_full_url())
|
||||
parts = parts[:-1]+(None,)
|
||||
referer = _rfc3986.urlunsplit(parts)
|
||||
request.add_unredirected_header("Referer", referer)
|
||||
return request
|
||||
|
||||
def open_novisit(self, url, data=None):
|
||||
"""Open a URL without visiting it.
|
||||
|
||||
The browser state (including .request, .response(), history, forms and
|
||||
links) are all left unchanged by calling this function.
|
||||
|
||||
The interface is the same as for .open().
|
||||
|
||||
This is useful for things like fetching images.
|
||||
|
||||
See also .retrieve().
|
||||
|
||||
"""
|
||||
return self._mech_open(url, data, visit=False)
|
||||
|
||||
def open(self, url, data=None):
|
||||
return self._mech_open(url, data)
|
||||
|
||||
def _mech_open(self, url, data=None, update_history=True, visit=None):
|
||||
try:
|
||||
url.get_full_url
|
||||
except AttributeError:
|
||||
# string URL -- convert to absolute URL if required
|
||||
scheme, authority = _rfc3986.urlsplit(url)[:2]
|
||||
if scheme is None:
|
||||
# relative URL
|
||||
if self._response is None:
|
||||
raise BrowserStateError(
|
||||
"can't fetch relative reference: "
|
||||
"not viewing any document")
|
||||
url = _rfc3986.urljoin(self._response.geturl(), url)
|
||||
|
||||
request = self._request(url, data, visit)
|
||||
visit = request.visit
|
||||
if visit is None:
|
||||
visit = True
|
||||
|
||||
if visit:
|
||||
self._visit_request(request, update_history)
|
||||
|
||||
success = True
|
||||
try:
|
||||
response = UserAgentBase.open(self, request, data)
|
||||
except urllib2.HTTPError, error:
|
||||
success = False
|
||||
if error.fp is None: # not a response
|
||||
raise
|
||||
response = error
|
||||
## except (IOError, socket.error, OSError), error:
|
||||
## # Yes, urllib2 really does raise all these :-((
|
||||
## # See test_urllib2.py for examples of socket.gaierror and OSError,
|
||||
## # plus note that FTPHandler raises IOError.
|
||||
## # XXX I don't seem to have an example of exactly socket.error being
|
||||
## # raised, only socket.gaierror...
|
||||
## # I don't want to start fixing these here, though, since this is a
|
||||
## # subclass of OpenerDirector, and it would break old code. Even in
|
||||
## # Python core, a fix would need some backwards-compat. hack to be
|
||||
## # acceptable.
|
||||
## raise
|
||||
|
||||
if visit:
|
||||
self._set_response(response, False)
|
||||
response = copy.copy(self._response)
|
||||
elif response is not None:
|
||||
response = _response.upgrade_response(response)
|
||||
|
||||
if not success:
|
||||
raise response
|
||||
return response
|
||||
|
||||
def __str__(self):
|
||||
text = []
|
||||
text.append("<%s " % self.__class__.__name__)
|
||||
if self._response:
|
||||
text.append("visiting %s" % self._response.geturl())
|
||||
else:
|
||||
text.append("(not visiting a URL)")
|
||||
if self.form:
|
||||
text.append("\n selected form:\n %s\n" % str(self.form))
|
||||
text.append(">")
|
||||
return "".join(text)
|
||||
|
||||
def response(self):
|
||||
"""Return a copy of the current response.
|
||||
|
||||
The returned object has the same interface as the object returned by
|
||||
.open() (or urllib2.urlopen()).
|
||||
|
||||
"""
|
||||
return copy.copy(self._response)
|
||||
|
||||
def set_response(self, response):
|
||||
"""Replace current response with (a copy of) response.
|
||||
|
||||
response may be None.
|
||||
|
||||
This is intended mostly for HTML-preprocessing.
|
||||
"""
|
||||
self._set_response(response, True)
|
||||
|
||||
def _set_response(self, response, close_current):
|
||||
# sanity check, necessary but far from sufficient
|
||||
if not (response is None or
|
||||
(hasattr(response, "info") and hasattr(response, "geturl") and
|
||||
hasattr(response, "read")
|
||||
)
|
||||
):
|
||||
raise ValueError("not a response object")
|
||||
|
||||
self.form = None
|
||||
if response is not None:
|
||||
response = _response.upgrade_response(response)
|
||||
if close_current and self._response is not None:
|
||||
self._response.close()
|
||||
self._response = response
|
||||
self._factory.set_response(response)
|
||||
|
||||
def visit_response(self, response, request=None):
|
||||
"""Visit the response, as if it had been .open()ed.
|
||||
|
||||
Unlike .set_response(), this updates history rather than replacing the
|
||||
current response.
|
||||
"""
|
||||
if request is None:
|
||||
request = _request.Request(response.geturl())
|
||||
self._visit_request(request, True)
|
||||
self._set_response(response, False)
|
||||
|
||||
def _visit_request(self, request, update_history):
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
if self.request is not None and update_history:
|
||||
self._history.add(self.request, self._response)
|
||||
self._response = None
|
||||
# we want self.request to be assigned even if UserAgentBase.open
|
||||
# fails
|
||||
self.request = request
|
||||
|
||||
def geturl(self):
|
||||
"""Get URL of current document."""
|
||||
if self._response is None:
|
||||
raise BrowserStateError("not viewing any document")
|
||||
return self._response.geturl()
|
||||
|
||||
def reload(self):
|
||||
"""Reload current document, and return response object."""
|
||||
if self.request is None:
|
||||
raise BrowserStateError("no URL has yet been .open()ed")
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
return self._mech_open(self.request, update_history=False)
|
||||
|
||||
def back(self, n=1):
|
||||
"""Go back n steps in history, and return response object.
|
||||
|
||||
n: go back this number of steps (default 1 step)
|
||||
|
||||
"""
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
self.request, response = self._history.back(n, self._response)
|
||||
self.set_response(response)
|
||||
if not response.read_complete:
|
||||
return self.reload()
|
||||
return copy.copy(response)
|
||||
|
||||
def clear_history(self):
|
||||
self._history.clear()
|
||||
|
||||
def set_cookie(self, cookie_string):
|
||||
"""Request to set a cookie.
|
||||
|
||||
Note that it is NOT necessary to call this method under ordinary
|
||||
circumstances: cookie handling is normally entirely automatic. The
|
||||
intended use case is rather to simulate the setting of a cookie by
|
||||
client script in a web page (e.g. JavaScript). In that case, use of
|
||||
this method is necessary because mechanize currently does not support
|
||||
JavaScript, VBScript, etc.
|
||||
|
||||
The cookie is added in the same way as if it had arrived with the
|
||||
current response, as a result of the current request. This means that,
|
||||
for example, it is not appropriate to set the cookie based on the
|
||||
current request, no cookie will be set.
|
||||
|
||||
The cookie will be returned automatically with subsequent responses
|
||||
made by the Browser instance whenever that's appropriate.
|
||||
|
||||
cookie_string should be a valid value of the Set-Cookie header.
|
||||
|
||||
For example:
|
||||
|
||||
browser.set_cookie(
|
||||
"sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT")
|
||||
|
||||
Currently, this method does not allow for adding RFC 2986 cookies.
|
||||
This limitation will be lifted if anybody requests it.
|
||||
|
||||
"""
|
||||
if self._response is None:
|
||||
raise BrowserStateError("not viewing any document")
|
||||
if self.request.get_type() not in ["http", "https"]:
|
||||
raise BrowserStateError("can't set cookie for non-HTTP/HTTPS "
|
||||
"transactions")
|
||||
cookiejar = self._ua_handlers["_cookies"].cookiejar
|
||||
response = self.response() # copy
|
||||
headers = response.info()
|
||||
headers["Set-cookie"] = cookie_string
|
||||
cookiejar.extract_cookies(response, self.request)
|
||||
|
||||
def links(self, **kwds):
|
||||
"""Return iterable over links (mechanize.Link objects)."""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
links = self._factory.links()
|
||||
if kwds:
|
||||
return self._filter_links(links, **kwds)
|
||||
else:
|
||||
return links
|
||||
|
||||
def forms(self):
|
||||
"""Return iterable over forms.
|
||||
|
||||
The returned form objects implement the ClientForm.HTMLForm interface.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
return self._factory.forms()
|
||||
|
||||
def global_form(self):
|
||||
"""Return the global form object, or None if the factory implementation
|
||||
did not supply one.
|
||||
|
||||
The "global" form object contains all controls that are not descendants of
|
||||
any FORM element.
|
||||
|
||||
The returned form object implements the ClientForm.HTMLForm interface.
|
||||
|
||||
This is a separate method since the global form is not regarded as part
|
||||
of the sequence of forms in the document -- mostly for
|
||||
backwards-compatibility.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
return self._factory.global_form
|
||||
|
||||
def viewing_html(self):
|
||||
"""Return whether the current response contains HTML data."""
|
||||
if self._response is None:
|
||||
raise BrowserStateError("not viewing any document")
|
||||
return self._factory.is_html
|
||||
|
||||
def encoding(self):
|
||||
""""""
|
||||
if self._response is None:
|
||||
raise BrowserStateError("not viewing any document")
|
||||
return self._factory.encoding
|
||||
|
||||
def title(self):
|
||||
"""Return title, or None if there is no title element in the document.
|
||||
|
||||
Tags are stripped or textified as described in docs for
|
||||
PullParser.get_text() method of pullparser module.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
return self._factory.title
|
||||
|
||||
def select_form(self, name=None, predicate=None, nr=None):
|
||||
"""Select an HTML form for input.
|
||||
|
||||
This is a bit like giving a form the "input focus" in a browser.
|
||||
|
||||
If a form is selected, the Browser object supports the HTMLForm
|
||||
interface, so you can call methods like .set_value(), .set(), and
|
||||
.click().
|
||||
|
||||
Another way to select a form is to assign to the .form attribute. The
|
||||
form assigned should be one of the objects returned by the .forms()
|
||||
method.
|
||||
|
||||
At least one of the name, predicate and nr arguments must be supplied.
|
||||
If no matching form is found, mechanize.FormNotFoundError is raised.
|
||||
|
||||
If name is specified, then the form must have the indicated name.
|
||||
|
||||
If predicate is specified, then the form must match that function. The
|
||||
predicate function is passed the HTMLForm as its single argument, and
|
||||
should return a boolean value indicating whether the form matched.
|
||||
|
||||
nr, if supplied, is the sequence number of the form (where 0 is the
|
||||
first). Note that control 0 is the first form matching all the other
|
||||
arguments (if supplied); it is not necessarily the first control in the
|
||||
form.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
if (name is None) and (predicate is None) and (nr is None):
|
||||
raise ValueError(
|
||||
"at least one argument must be supplied to specify form")
|
||||
|
||||
orig_nr = nr
|
||||
for form in self.forms():
|
||||
if name is not None and name != form.name:
|
||||
continue
|
||||
if predicate is not None and not predicate(form):
|
||||
continue
|
||||
if nr:
|
||||
nr -= 1
|
||||
continue
|
||||
self.form = form
|
||||
break # success
|
||||
else:
|
||||
# failure
|
||||
description = []
|
||||
if name is not None: description.append("name '%s'" % name)
|
||||
if predicate is not None:
|
||||
description.append("predicate %s" % predicate)
|
||||
if orig_nr is not None: description.append("nr %d" % orig_nr)
|
||||
description = ", ".join(description)
|
||||
raise FormNotFoundError("no form matching "+description)
|
||||
|
||||
def click(self, *args, **kwds):
|
||||
"""See ClientForm.HTMLForm.click for documentation."""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
request = self.form.click(*args, **kwds)
|
||||
return self._add_referer_header(request)
|
||||
|
||||
def submit(self, *args, **kwds):
|
||||
"""Submit current form.
|
||||
|
||||
Arguments are as for ClientForm.HTMLForm.click().
|
||||
|
||||
Return value is same as for Browser.open().
|
||||
|
||||
"""
|
||||
return self.open(self.click(*args, **kwds))
|
||||
|
||||
def click_link(self, link=None, **kwds):
|
||||
"""Find a link and return a Request object for it.
|
||||
|
||||
Arguments are as for .find_link(), except that a link may be supplied
|
||||
as the first argument.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
if not link:
|
||||
link = self.find_link(**kwds)
|
||||
else:
|
||||
if kwds:
|
||||
raise ValueError(
|
||||
"either pass a Link, or keyword arguments, not both")
|
||||
request = self.request_class(link.absolute_url)
|
||||
return self._add_referer_header(request)
|
||||
|
||||
def follow_link(self, link=None, **kwds):
|
||||
"""Find a link and .open() it.
|
||||
|
||||
Arguments are as for .click_link().
|
||||
|
||||
Return value is same as for Browser.open().
|
||||
|
||||
"""
|
||||
return self.open(self.click_link(link, **kwds))
|
||||
|
||||
def find_link(self, **kwds):
|
||||
"""Find a link in current page.
|
||||
|
||||
Links are returned as mechanize.Link objects.
|
||||
|
||||
# Return third link that .search()-matches the regexp "python"
|
||||
# (by ".search()-matches", I mean that the regular expression method
|
||||
# .search() is used, rather than .match()).
|
||||
find_link(text_regex=re.compile("python"), nr=2)
|
||||
|
||||
# Return first http link in the current page that points to somewhere
|
||||
# on python.org whose link text (after tags have been removed) is
|
||||
# exactly "monty python".
|
||||
find_link(text="monty python",
|
||||
url_regex=re.compile("http.*python.org"))
|
||||
|
||||
# Return first link with exactly three HTML attributes.
|
||||
find_link(predicate=lambda link: len(link.attrs) == 3)
|
||||
|
||||
Links include anchors (<a>), image maps (<area>), and frames (<frame>,
|
||||
<iframe>).
|
||||
|
||||
All arguments must be passed by keyword, not position. Zero or more
|
||||
arguments may be supplied. In order to find a link, all arguments
|
||||
supplied must match.
|
||||
|
||||
If a matching link is not found, mechanize.LinkNotFoundError is raised.
|
||||
|
||||
text: link text between link tags: eg. <a href="blah">this bit</a> (as
|
||||
returned by pullparser.get_compressed_text(), ie. without tags but
|
||||
with opening tags "textified" as per the pullparser docs) must compare
|
||||
equal to this argument, if supplied
|
||||
text_regex: link text between tag (as defined above) must match the
|
||||
regular expression object or regular expression string passed as this
|
||||
argument, if supplied
|
||||
name, name_regex: as for text and text_regex, but matched against the
|
||||
name HTML attribute of the link tag
|
||||
url, url_regex: as for text and text_regex, but matched against the
|
||||
URL of the link tag (note this matches against Link.url, which is a
|
||||
relative or absolute URL according to how it was written in the HTML)
|
||||
tag: element name of opening tag, eg. "a"
|
||||
predicate: a function taking a Link object as its single argument,
|
||||
returning a boolean result, indicating whether the links
|
||||
nr: matches the nth link that matches all other criteria (default 0)
|
||||
|
||||
"""
|
||||
try:
|
||||
return self._filter_links(self._factory.links(), **kwds).next()
|
||||
except StopIteration:
|
||||
raise LinkNotFoundError()
|
||||
|
||||
def __getattr__(self, name):
|
||||
# pass through ClientForm / DOMForm methods and attributes
|
||||
form = self.__dict__.get("form")
|
||||
if form is None:
|
||||
raise AttributeError(
|
||||
"%s instance has no attribute %s (perhaps you forgot to "
|
||||
".select_form()?)" % (self.__class__, name))
|
||||
return getattr(form, name)
|
||||
|
||||
def _filter_links(self, links,
|
||||
text=None, text_regex=None,
|
||||
name=None, name_regex=None,
|
||||
url=None, url_regex=None,
|
||||
tag=None,
|
||||
predicate=None,
|
||||
nr=0
|
||||
):
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
|
||||
found_links = []
|
||||
orig_nr = nr
|
||||
|
||||
for link in links:
|
||||
if url is not None and url != link.url:
|
||||
continue
|
||||
if url_regex is not None and not re.search(url_regex, link.url):
|
||||
continue
|
||||
if (text is not None and
|
||||
(link.text is None or text != link.text)):
|
||||
continue
|
||||
if (text_regex is not None and
|
||||
(link.text is None or not re.search(text_regex, link.text))):
|
||||
continue
|
||||
if name is not None and name != dict(link.attrs).get("name"):
|
||||
continue
|
||||
if name_regex is not None:
|
||||
link_name = dict(link.attrs).get("name")
|
||||
if link_name is None or not re.search(name_regex, link_name):
|
||||
continue
|
||||
if tag is not None and tag != link.tag:
|
||||
continue
|
||||
if predicate is not None and not predicate(link):
|
||||
continue
|
||||
if nr:
|
||||
nr -= 1
|
||||
continue
|
||||
yield link
|
||||
nr = orig_nr
|
@ -1,159 +0,0 @@
|
||||
"""Mozilla / Netscape cookie loading / saving.
|
||||
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
||||
Copyright 1997-1999 Gisle Aas (original libwww-perl code)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import re, time, logging
|
||||
|
||||
from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
|
||||
MISSING_FILENAME_TEXT, LoadError
|
||||
debug = logging.getLogger("ClientCookie").debug
|
||||
|
||||
|
||||
class MozillaCookieJar(FileCookieJar):
|
||||
"""
|
||||
|
||||
WARNING: you may want to backup your browser's cookies file if you use
|
||||
this class to save cookies. I *think* it works, but there have been
|
||||
bugs in the past!
|
||||
|
||||
This class differs from CookieJar only in the format it uses to save and
|
||||
load cookies to and from a file. This class uses the Mozilla/Netscape
|
||||
`cookies.txt' format. lynx uses this file format, too.
|
||||
|
||||
Don't expect cookies saved while the browser is running to be noticed by
|
||||
the browser (in fact, Mozilla on unix will overwrite your saved cookies if
|
||||
you change them on disk while it's running; on Windows, you probably can't
|
||||
save at all while the browser is running).
|
||||
|
||||
Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
|
||||
Netscape cookies on saving.
|
||||
|
||||
In particular, the cookie version and port number information is lost,
|
||||
together with information about whether or not Path, Port and Discard were
|
||||
specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
|
||||
domain as set in the HTTP header started with a dot (yes, I'm aware some
|
||||
domains in Netscape files start with a dot and some don't -- trust me, you
|
||||
really don't want to know any more about this).
|
||||
|
||||
Note that though Mozilla and Netscape use the same format, they use
|
||||
slightly different headers. The class saves cookies using the Netscape
|
||||
header by default (Mozilla can cope with that).
|
||||
|
||||
"""
|
||||
magic_re = "#( Netscape)? HTTP Cookie File"
|
||||
header = """\
|
||||
# Netscape HTTP Cookie File
|
||||
# http://www.netscape.com/newsref/std/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
"""
|
||||
|
||||
def _really_load(self, f, filename, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
|
||||
magic = f.readline()
|
||||
if not re.search(self.magic_re, magic):
|
||||
f.close()
|
||||
raise LoadError(
|
||||
"%s does not look like a Netscape format cookies file" %
|
||||
filename)
|
||||
|
||||
try:
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if line == "": break
|
||||
|
||||
# last field may be absent, so keep any trailing tab
|
||||
if line.endswith("\n"): line = line[:-1]
|
||||
|
||||
# skip comments and blank lines XXX what is $ for?
|
||||
if (line.strip().startswith("#") or
|
||||
line.strip().startswith("$") or
|
||||
line.strip() == ""):
|
||||
continue
|
||||
|
||||
domain, domain_specified, path, secure, expires, name, value = \
|
||||
line.split("\t")
|
||||
secure = (secure == "TRUE")
|
||||
domain_specified = (domain_specified == "TRUE")
|
||||
if name == "":
|
||||
name = value
|
||||
value = None
|
||||
|
||||
initial_dot = domain.startswith(".")
|
||||
assert domain_specified == initial_dot
|
||||
|
||||
discard = False
|
||||
if expires == "":
|
||||
expires = None
|
||||
discard = True
|
||||
|
||||
# assume path_specified is false
|
||||
c = Cookie(0, name, value,
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
path, False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
{})
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
self.set_cookie(c)
|
||||
|
||||
except:
|
||||
reraise_unmasked_exceptions((IOError,))
|
||||
raise LoadError("invalid Netscape format file %s: %s" %
|
||||
(filename, line))
|
||||
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
f = open(filename, "w")
|
||||
try:
|
||||
debug("Saving Netscape cookies.txt file")
|
||||
f.write(self.header)
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if not ignore_discard and cookie.discard:
|
||||
debug(" Not saving %s: marked for discard", cookie.name)
|
||||
continue
|
||||
if not ignore_expires and cookie.is_expired(now):
|
||||
debug(" Not saving %s: expired", cookie.name)
|
||||
continue
|
||||
if cookie.secure: secure = "TRUE"
|
||||
else: secure = "FALSE"
|
||||
if cookie.domain.startswith("."): initial_dot = "TRUE"
|
||||
else: initial_dot = "FALSE"
|
||||
if cookie.expires is not None:
|
||||
expires = str(cookie.expires)
|
||||
else:
|
||||
expires = ""
|
||||
if cookie.value is None:
|
||||
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||
# with no name, whereas cookielib regards it as a
|
||||
# cookie with no value.
|
||||
name = ""
|
||||
value = cookie.name
|
||||
else:
|
||||
name = cookie.name
|
||||
value = cookie.value
|
||||
f.write(
|
||||
"\t".join([cookie.domain, initial_dot, cookie.path,
|
||||
secure, expires, name, value])+
|
||||
"\n")
|
||||
finally:
|
||||
f.close()
|
@ -1,387 +0,0 @@
|
||||
"""Microsoft Internet Explorer cookie loading on Windows.
|
||||
|
||||
Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
# XXX names and comments are not great here
|
||||
|
||||
import os, re, time, struct, logging
|
||||
if os.name == "nt":
|
||||
import _winreg
|
||||
|
||||
from _clientcookie import FileCookieJar, CookieJar, Cookie, \
|
||||
MISSING_FILENAME_TEXT, LoadError
|
||||
|
||||
debug = logging.getLogger("mechanize").debug
|
||||
|
||||
|
||||
def regload(path, leaf):
|
||||
key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
|
||||
_winreg.KEY_ALL_ACCESS)
|
||||
try:
|
||||
value = _winreg.QueryValueEx(key, leaf)[0]
|
||||
except WindowsError:
|
||||
value = None
|
||||
return value
|
||||
|
||||
WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
|
||||
|
||||
def epoch_time_offset_from_win32_filetime(filetime):
|
||||
"""Convert from win32 filetime to seconds-since-epoch value.
|
||||
|
||||
MSIE stores create and expire times as Win32 FILETIME, which is 64
|
||||
bits of 100 nanosecond intervals since Jan 01 1601.
|
||||
|
||||
mechanize expects time in 32-bit value expressed in seconds since the
|
||||
epoch (Jan 01 1970).
|
||||
|
||||
"""
|
||||
if filetime < WIN32_EPOCH:
|
||||
raise ValueError("filetime (%d) is before epoch (%d)" %
|
||||
(filetime, WIN32_EPOCH))
|
||||
|
||||
return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
|
||||
|
||||
def binary_to_char(c): return "%02X" % ord(c)
|
||||
def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
|
||||
|
||||
class MSIEBase:
|
||||
magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
|
||||
padding = "\x0d\xf0\xad\x0b"
|
||||
|
||||
msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
|
||||
cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
|
||||
"(.+\@[\x21-\xFF]+\.txt)")
|
||||
|
||||
# path under HKEY_CURRENT_USER from which to get location of index.dat
|
||||
reg_path = r"software\microsoft\windows" \
|
||||
r"\currentversion\explorer\shell folders"
|
||||
reg_key = "Cookies"
|
||||
|
||||
def __init__(self):
|
||||
self._delayload_domains = {}
|
||||
|
||||
def _delayload_domain(self, domain):
|
||||
# if necessary, lazily load cookies for this domain
|
||||
delayload_info = self._delayload_domains.get(domain)
|
||||
if delayload_info is not None:
|
||||
cookie_file, ignore_discard, ignore_expires = delayload_info
|
||||
try:
|
||||
self.load_cookie_data(cookie_file,
|
||||
ignore_discard, ignore_expires)
|
||||
except (LoadError, IOError):
|
||||
debug("error reading cookie file, skipping: %s", cookie_file)
|
||||
else:
|
||||
del self._delayload_domains[domain]
|
||||
|
||||
def _load_cookies_from_file(self, filename):
|
||||
debug("Loading MSIE cookies file: %s", filename)
|
||||
cookies = []
|
||||
|
||||
cookies_fh = open(filename)
|
||||
|
||||
try:
|
||||
while 1:
|
||||
key = cookies_fh.readline()
|
||||
if key == "": break
|
||||
|
||||
rl = cookies_fh.readline
|
||||
def getlong(rl=rl): return long(rl().rstrip())
|
||||
def getstr(rl=rl): return rl().rstrip()
|
||||
|
||||
key = key.rstrip()
|
||||
value = getstr()
|
||||
domain_path = getstr()
|
||||
flags = getlong() # 0x2000 bit is for secure I think
|
||||
lo_expire = getlong()
|
||||
hi_expire = getlong()
|
||||
lo_create = getlong()
|
||||
hi_create = getlong()
|
||||
sep = getstr()
|
||||
|
||||
if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
|
||||
hi_create, lo_create, sep) or (sep != "*"):
|
||||
break
|
||||
|
||||
m = self.msie_domain_re.search(domain_path)
|
||||
if m:
|
||||
domain = m.group(1)
|
||||
path = m.group(2)
|
||||
|
||||
cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
|
||||
"PATH": path, "FLAGS": flags, "HIXP": hi_expire,
|
||||
"LOXP": lo_expire, "HICREATE": hi_create,
|
||||
"LOCREATE": lo_create})
|
||||
finally:
|
||||
cookies_fh.close()
|
||||
|
||||
return cookies
|
||||
|
||||
def load_cookie_data(self, filename,
|
||||
ignore_discard=False, ignore_expires=False):
|
||||
"""Load cookies from file containing actual cookie data.
|
||||
|
||||
Old cookies are kept unless overwritten by newly loaded ones.
|
||||
|
||||
You should not call this method if the delayload attribute is set.
|
||||
|
||||
I think each of these files contain all cookies for one user, domain,
|
||||
and path.
|
||||
|
||||
filename: file containing cookies -- usually found in a file like
|
||||
C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
|
||||
|
||||
"""
|
||||
now = int(time.time())
|
||||
|
||||
cookie_data = self._load_cookies_from_file(filename)
|
||||
|
||||
for cookie in cookie_data:
|
||||
flags = cookie["FLAGS"]
|
||||
secure = ((flags & 0x2000) != 0)
|
||||
filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
|
||||
expires = epoch_time_offset_from_win32_filetime(filetime)
|
||||
if expires < now:
|
||||
discard = True
|
||||
else:
|
||||
discard = False
|
||||
domain = cookie["DOMAIN"]
|
||||
initial_dot = domain.startswith(".")
|
||||
if initial_dot:
|
||||
domain_specified = True
|
||||
else:
|
||||
# MSIE 5 does not record whether the domain cookie-attribute
|
||||
# was specified.
|
||||
# Assuming it wasn't is conservative, because with strict
|
||||
# domain matching this will match less frequently; with regular
|
||||
# Netscape tail-matching, this will match at exactly the same
|
||||
# times that domain_specified = True would. It also means we
|
||||
# don't have to prepend a dot to achieve consistency with our
|
||||
# own & Mozilla's domain-munging scheme.
|
||||
domain_specified = False
|
||||
|
||||
# assume path_specified is false
|
||||
# XXX is there other stuff in here? -- eg. comment, commentURL?
|
||||
c = Cookie(0,
|
||||
cookie["KEY"], cookie["VALUE"],
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
cookie["PATH"], False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
{"flags": flags})
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
CookieJar.set_cookie(self, c)
|
||||
|
||||
def load_from_registry(self, ignore_discard=False, ignore_expires=False,
|
||||
username=None):
|
||||
"""
|
||||
username: only required on win9x
|
||||
|
||||
"""
|
||||
cookies_dir = regload(self.reg_path, self.reg_key)
|
||||
filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
|
||||
self.load(filename, ignore_discard, ignore_expires, username)
|
||||
|
||||
def _really_load(self, index, filename, ignore_discard, ignore_expires,
|
||||
username):
|
||||
now = int(time.time())
|
||||
|
||||
if username is None:
|
||||
username = os.environ['USERNAME'].lower()
|
||||
|
||||
cookie_dir = os.path.dirname(filename)
|
||||
|
||||
data = index.read(256)
|
||||
if len(data) != 256:
|
||||
raise LoadError("%s file is too short" % filename)
|
||||
|
||||
# Cookies' index.dat file starts with 32 bytes of signature
|
||||
# followed by an offset to the first record, stored as a little-
|
||||
# endian DWORD.
|
||||
sig, size, data = data[:32], data[32:36], data[36:]
|
||||
size = struct.unpack("<L", size)[0]
|
||||
|
||||
# check that sig is valid
|
||||
if not self.magic_re.match(sig) or size != 0x4000:
|
||||
raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
|
||||
(str(filename), sig, size))
|
||||
|
||||
# skip to start of first record
|
||||
index.seek(size, 0)
|
||||
|
||||
sector = 128 # size of sector in bytes
|
||||
|
||||
while 1:
|
||||
data = ""
|
||||
|
||||
# Cookies are usually in two contiguous sectors, so read in two
|
||||
# sectors and adjust if not a Cookie.
|
||||
to_read = 2 * sector
|
||||
d = index.read(to_read)
|
||||
if len(d) != to_read:
|
||||
break
|
||||
data = data + d
|
||||
|
||||
# Each record starts with a 4-byte signature and a count
|
||||
# (little-endian DWORD) of sectors for the record.
|
||||
sig, size, data = data[:4], data[4:8], data[8:]
|
||||
size = struct.unpack("<L", size)[0]
|
||||
|
||||
to_read = (size - 2) * sector
|
||||
|
||||
## from urllib import quote
|
||||
## print "data", quote(data)
|
||||
## print "sig", quote(sig)
|
||||
## print "size in sectors", size
|
||||
## print "size in bytes", size*sector
|
||||
## print "size in units of 16 bytes", (size*sector) / 16
|
||||
## print "size to read in bytes", to_read
|
||||
## print
|
||||
|
||||
if sig != "URL ":
|
||||
assert (sig in ("HASH", "LEAK",
|
||||
self.padding, "\x00\x00\x00\x00"),
|
||||
"unrecognized MSIE index.dat record: %s" %
|
||||
binary_to_str(sig))
|
||||
if sig == "\x00\x00\x00\x00":
|
||||
# assume we've got all the cookies, and stop
|
||||
break
|
||||
if sig == self.padding:
|
||||
continue
|
||||
# skip the rest of this record
|
||||
assert to_read >= 0
|
||||
if size != 2:
|
||||
assert to_read != 0
|
||||
index.seek(to_read, 1)
|
||||
continue
|
||||
|
||||
# read in rest of record if necessary
|
||||
if size > 2:
|
||||
more_data = index.read(to_read)
|
||||
if len(more_data) != to_read: break
|
||||
data = data + more_data
|
||||
|
||||
cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
|
||||
"(%s\@[\x21-\xFF]+\.txt)" % username)
|
||||
m = re.search(cookie_re, data, re.I)
|
||||
if m:
|
||||
cookie_file = os.path.join(cookie_dir, m.group(2))
|
||||
if not self.delayload:
|
||||
try:
|
||||
self.load_cookie_data(cookie_file,
|
||||
ignore_discard, ignore_expires)
|
||||
except (LoadError, IOError):
|
||||
debug("error reading cookie file, skipping: %s",
|
||||
cookie_file)
|
||||
else:
|
||||
domain = m.group(1)
|
||||
i = domain.find("/")
|
||||
if i != -1:
|
||||
domain = domain[:i]
|
||||
|
||||
self._delayload_domains[domain] = (
|
||||
cookie_file, ignore_discard, ignore_expires)
|
||||
|
||||
|
||||
class MSIECookieJar(MSIEBase, FileCookieJar):
|
||||
"""FileCookieJar that reads from the Windows MSIE cookies database.
|
||||
|
||||
MSIECookieJar can read the cookie files of Microsoft Internet Explorer
|
||||
(MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
|
||||
Windows 98. Other configurations may also work, but are untested. Saving
|
||||
cookies in MSIE format is NOT supported. If you save cookies, they'll be
|
||||
in the usual Set-Cookie3 format, which you can read back in using an
|
||||
instance of the plain old CookieJar class. Don't save using the same
|
||||
filename that you loaded cookies from, because you may succeed in
|
||||
clobbering your MSIE cookies index file!
|
||||
|
||||
You should be able to have LWP share Internet Explorer's cookies like
|
||||
this (note you need to supply a username to load_from_registry if you're on
|
||||
Windows 9x or Windows ME):
|
||||
|
||||
cj = MSIECookieJar(delayload=1)
|
||||
# find cookies index file in registry and load cookies from it
|
||||
cj.load_from_registry()
|
||||
opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
|
||||
response = opener.open("http://example.com/")
|
||||
|
||||
Iterating over a delayloaded MSIECookieJar instance will not cause any
|
||||
cookies to be read from disk. To force reading of all cookies from disk,
|
||||
call read_all_cookies. Note that the following methods iterate over self:
|
||||
clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
|
||||
and as_string.
|
||||
|
||||
Additional methods:
|
||||
|
||||
load_from_registry(ignore_discard=False, ignore_expires=False,
|
||||
username=None)
|
||||
load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
|
||||
read_all_cookies()
|
||||
|
||||
"""
|
||||
def __init__(self, filename=None, delayload=False, policy=None):
|
||||
MSIEBase.__init__(self)
|
||||
FileCookieJar.__init__(self, filename, delayload, policy)
|
||||
|
||||
def set_cookie(self, cookie):
|
||||
if self.delayload:
|
||||
self._delayload_domain(cookie.domain)
|
||||
CookieJar.set_cookie(self, cookie)
|
||||
|
||||
def _cookies_for_request(self, request):
|
||||
"""Return a list of cookies to be returned to server."""
|
||||
domains = self._cookies.copy()
|
||||
domains.update(self._delayload_domains)
|
||||
domains = domains.keys()
|
||||
|
||||
cookies = []
|
||||
for domain in domains:
|
||||
cookies.extend(self._cookies_for_domain(domain, request))
|
||||
return cookies
|
||||
|
||||
def _cookies_for_domain(self, domain, request):
|
||||
if not self._policy.domain_return_ok(domain, request):
|
||||
return []
|
||||
debug("Checking %s for cookies to return", domain)
|
||||
if self.delayload:
|
||||
self._delayload_domain(domain)
|
||||
return CookieJar._cookies_for_domain(self, domain, request)
|
||||
|
||||
def read_all_cookies(self):
|
||||
"""Eagerly read in all cookies."""
|
||||
if self.delayload:
|
||||
for domain in self._delayload_domains.keys():
|
||||
self._delayload_domain(domain)
|
||||
|
||||
def load(self, filename, ignore_discard=False, ignore_expires=False,
|
||||
username=None):
|
||||
"""Load cookies from an MSIE 'index.dat' cookies index file.
|
||||
|
||||
filename: full path to cookie index file
|
||||
username: only required on win9x
|
||||
|
||||
"""
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
index = open(filename, "rb")
|
||||
|
||||
try:
|
||||
self._really_load(index, filename, ignore_discard, ignore_expires,
|
||||
username)
|
||||
finally:
|
||||
index.close()
|
@ -1,421 +0,0 @@
|
||||
"""Integration with Python standard library module urllib2: OpenerDirector
|
||||
class.
|
||||
|
||||
Copyright 2004-2006 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import os, urllib2, bisect, urllib, httplib, types, tempfile
|
||||
try:
|
||||
import threading as _threading
|
||||
except ImportError:
|
||||
import dummy_threading as _threading
|
||||
try:
|
||||
set
|
||||
except NameError:
|
||||
import sets
|
||||
set = sets.Set
|
||||
|
||||
import _http
|
||||
import _upgrade
|
||||
import _rfc3986
|
||||
import _response
|
||||
from _util import isstringlike
|
||||
from _request import Request
|
||||
|
||||
|
||||
class ContentTooShortError(urllib2.URLError):
|
||||
def __init__(self, reason, result):
|
||||
urllib2.URLError.__init__(self, reason)
|
||||
self.result = result
|
||||
|
||||
|
||||
class OpenerDirector(urllib2.OpenerDirector):
|
||||
def __init__(self):
|
||||
urllib2.OpenerDirector.__init__(self)
|
||||
# really none of these are (sanely) public -- the lack of initial
|
||||
# underscore on some is just due to following urllib2
|
||||
self.process_response = {}
|
||||
self.process_request = {}
|
||||
self._any_request = {}
|
||||
self._any_response = {}
|
||||
self._handler_index_valid = True
|
||||
self._tempfiles = []
|
||||
|
||||
def add_handler(self, handler):
|
||||
if handler in self.handlers:
|
||||
return
|
||||
# XXX why does self.handlers need to be sorted?
|
||||
bisect.insort(self.handlers, handler)
|
||||
handler.add_parent(self)
|
||||
self._handler_index_valid = False
|
||||
|
||||
def _maybe_reindex_handlers(self):
|
||||
if self._handler_index_valid:
|
||||
return
|
||||
|
||||
handle_error = {}
|
||||
handle_open = {}
|
||||
process_request = {}
|
||||
process_response = {}
|
||||
any_request = set()
|
||||
any_response = set()
|
||||
unwanted = []
|
||||
|
||||
for handler in self.handlers:
|
||||
added = False
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
if meth == "any_request":
|
||||
any_request.add(handler)
|
||||
added = True
|
||||
continue
|
||||
elif meth == "any_response":
|
||||
any_response.add(handler)
|
||||
added = True
|
||||
continue
|
||||
|
||||
ii = meth.find("_")
|
||||
scheme = meth[:ii]
|
||||
condition = meth[ii+1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
jj = meth[ii+1:].find("_") + ii + 1
|
||||
kind = meth[jj+1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = handle_error.setdefault(scheme, {})
|
||||
elif condition == "open":
|
||||
kind = scheme
|
||||
lookup = handle_open
|
||||
elif condition == "request":
|
||||
kind = scheme
|
||||
lookup = process_request
|
||||
elif condition == "response":
|
||||
kind = scheme
|
||||
lookup = process_response
|
||||
else:
|
||||
continue
|
||||
|
||||
lookup.setdefault(kind, set()).add(handler)
|
||||
added = True
|
||||
|
||||
if not added:
|
||||
unwanted.append(handler)
|
||||
|
||||
for handler in unwanted:
|
||||
self.handlers.remove(handler)
|
||||
|
||||
# sort indexed methods
|
||||
# XXX could be cleaned up
|
||||
for lookup in [process_request, process_response]:
|
||||
for scheme, handlers in lookup.iteritems():
|
||||
lookup[scheme] = handlers
|
||||
for scheme, lookup in handle_error.iteritems():
|
||||
for code, handlers in lookup.iteritems():
|
||||
handlers = list(handlers)
|
||||
handlers.sort()
|
||||
lookup[code] = handlers
|
||||
for scheme, handlers in handle_open.iteritems():
|
||||
handlers = list(handlers)
|
||||
handlers.sort()
|
||||
handle_open[scheme] = handlers
|
||||
|
||||
# cache the indexes
|
||||
self.handle_error = handle_error
|
||||
self.handle_open = handle_open
|
||||
self.process_request = process_request
|
||||
self.process_response = process_response
|
||||
self._any_request = any_request
|
||||
self._any_response = any_response
|
||||
|
||||
def _request(self, url_or_req, data, visit):
|
||||
if isstringlike(url_or_req):
|
||||
req = Request(url_or_req, data, visit=visit)
|
||||
else:
|
||||
# already a urllib2.Request or mechanize.Request instance
|
||||
req = url_or_req
|
||||
if data is not None:
|
||||
req.add_data(data)
|
||||
# XXX yuck, give request a .visit attribute if it doesn't have one
|
||||
try:
|
||||
req.visit
|
||||
except AttributeError:
|
||||
req.visit = None
|
||||
if visit is not None:
|
||||
req.visit = visit
|
||||
return req
|
||||
|
||||
def open(self, fullurl, data=None):
|
||||
req = self._request(fullurl, data, None)
|
||||
req_scheme = req.get_type()
|
||||
|
||||
self._maybe_reindex_handlers()
|
||||
|
||||
# pre-process request
|
||||
# XXX should we allow a Processor to change the URL scheme
|
||||
# of the request?
|
||||
request_processors = set(self.process_request.get(req_scheme, []))
|
||||
request_processors.update(self._any_request)
|
||||
request_processors = list(request_processors)
|
||||
request_processors.sort()
|
||||
for processor in request_processors:
|
||||
for meth_name in ["any_request", req_scheme+"_request"]:
|
||||
meth = getattr(processor, meth_name, None)
|
||||
if meth:
|
||||
req = meth(req)
|
||||
|
||||
# In Python >= 2.4, .open() supports processors already, so we must
|
||||
# call ._open() instead.
|
||||
urlopen = getattr(urllib2.OpenerDirector, "_open",
|
||||
urllib2.OpenerDirector.open)
|
||||
response = urlopen(self, req, data)
|
||||
|
||||
# post-process response
|
||||
response_processors = set(self.process_response.get(req_scheme, []))
|
||||
response_processors.update(self._any_response)
|
||||
response_processors = list(response_processors)
|
||||
response_processors.sort()
|
||||
for processor in response_processors:
|
||||
for meth_name in ["any_response", req_scheme+"_response"]:
|
||||
meth = getattr(processor, meth_name, None)
|
||||
if meth:
|
||||
response = meth(req, response)
|
||||
|
||||
return response
|
||||
|
||||
def error(self, proto, *args):
|
||||
if proto in ['http', 'https']:
|
||||
# XXX http[s] protocols are special-cased
|
||||
dict = self.handle_error['http'] # https is not different than http
|
||||
proto = args[2] # YUCK!
|
||||
meth_name = 'http_error_%s' % proto
|
||||
http_err = 1
|
||||
orig_args = args
|
||||
else:
|
||||
dict = self.handle_error
|
||||
meth_name = proto + '_error'
|
||||
http_err = 0
|
||||
args = (dict, proto, meth_name) + args
|
||||
result = apply(self._call_chain, args)
|
||||
if result:
|
||||
return result
|
||||
|
||||
if http_err:
|
||||
args = (dict, 'default', 'http_error_default') + orig_args
|
||||
return apply(self._call_chain, args)
|
||||
|
||||
BLOCK_SIZE = 1024*8
|
||||
def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
|
||||
"""Returns (filename, headers).
|
||||
|
||||
For remote objects, the default filename will refer to a temporary
|
||||
file. Temporary files are removed when the OpenerDirector.close()
|
||||
method is called.
|
||||
|
||||
For file: URLs, at present the returned filename is None. This may
|
||||
change in future.
|
||||
|
||||
If the actual number of bytes read is less than indicated by the
|
||||
Content-Length header, raises ContentTooShortError (a URLError
|
||||
subclass). The exception's .result attribute contains the (filename,
|
||||
headers) that would have been returned.
|
||||
|
||||
"""
|
||||
req = self._request(fullurl, data, False)
|
||||
scheme = req.get_type()
|
||||
fp = self.open(req)
|
||||
headers = fp.info()
|
||||
if filename is None and scheme == 'file':
|
||||
# XXX req.get_selector() seems broken here, return None,
|
||||
# pending sanity :-/
|
||||
return None, headers
|
||||
#return urllib.url2pathname(req.get_selector()), headers
|
||||
if filename:
|
||||
tfp = open(filename, 'wb')
|
||||
else:
|
||||
path = _rfc3986.urlsplit(fullurl)[2]
|
||||
suffix = os.path.splitext(path)[1]
|
||||
fd, filename = tempfile.mkstemp(suffix)
|
||||
self._tempfiles.append(filename)
|
||||
tfp = os.fdopen(fd, 'wb')
|
||||
|
||||
result = filename, headers
|
||||
bs = self.BLOCK_SIZE
|
||||
size = -1
|
||||
read = 0
|
||||
blocknum = 0
|
||||
if reporthook:
|
||||
if "content-length" in headers:
|
||||
size = int(headers["Content-Length"])
|
||||
reporthook(blocknum, bs, size)
|
||||
while 1:
|
||||
block = fp.read(bs)
|
||||
if block == "":
|
||||
break
|
||||
read += len(block)
|
||||
tfp.write(block)
|
||||
blocknum += 1
|
||||
if reporthook:
|
||||
reporthook(blocknum, bs, size)
|
||||
fp.close()
|
||||
tfp.close()
|
||||
del fp
|
||||
del tfp
|
||||
|
||||
# raise exception if actual size does not match content-length header
|
||||
if size >= 0 and read < size:
|
||||
raise ContentTooShortError(
|
||||
"retrieval incomplete: "
|
||||
"got only %i out of %i bytes" % (read, size),
|
||||
result
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
urllib2.OpenerDirector.close(self)
|
||||
|
||||
# make it very obvious this object is no longer supposed to be used
|
||||
self.open = self.error = self.retrieve = self.add_handler = None
|
||||
|
||||
if self._tempfiles:
|
||||
for filename in self._tempfiles:
|
||||
try:
|
||||
os.unlink(filename)
|
||||
except OSError:
|
||||
pass
|
||||
del self._tempfiles[:]
|
||||
|
||||
|
||||
def wrapped_open(urlopen, process_response_object, fullurl, data=None):
|
||||
success = True
|
||||
try:
|
||||
response = urlopen(fullurl, data)
|
||||
except urllib2.HTTPError, error:
|
||||
success = False
|
||||
if error.fp is None: # not a response
|
||||
raise
|
||||
response = error
|
||||
|
||||
if response is not None:
|
||||
response = process_response_object(response)
|
||||
|
||||
if not success:
|
||||
raise response
|
||||
return response
|
||||
|
||||
class ResponseProcessingOpener(OpenerDirector):
|
||||
|
||||
def open(self, fullurl, data=None):
|
||||
def bound_open(fullurl, data=None):
|
||||
return OpenerDirector.open(self, fullurl, data)
|
||||
return wrapped_open(
|
||||
bound_open, self.process_response_object, fullurl, data)
|
||||
|
||||
def process_response_object(self, response):
|
||||
return response
|
||||
|
||||
|
||||
class SeekableResponseOpener(ResponseProcessingOpener):
|
||||
def process_response_object(self, response):
|
||||
return _response.seek_wrapped_response(response)
|
||||
|
||||
|
||||
class OpenerFactory:
|
||||
"""This class's interface is quite likely to change."""
|
||||
|
||||
default_classes = [
|
||||
# handlers
|
||||
urllib2.ProxyHandler,
|
||||
urllib2.UnknownHandler,
|
||||
_http.HTTPHandler, # derived from new AbstractHTTPHandler
|
||||
_http.HTTPDefaultErrorHandler,
|
||||
_http.HTTPRedirectHandler, # bugfixed
|
||||
urllib2.FTPHandler,
|
||||
urllib2.FileHandler,
|
||||
# processors
|
||||
_upgrade.HTTPRequestUpgradeProcessor,
|
||||
_http.HTTPCookieProcessor,
|
||||
_http.HTTPErrorProcessor,
|
||||
]
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
default_classes.append(_http.HTTPSHandler)
|
||||
handlers = []
|
||||
replacement_handlers = []
|
||||
|
||||
def __init__(self, klass=OpenerDirector):
|
||||
self.klass = klass
|
||||
|
||||
def build_opener(self, *handlers):
|
||||
"""Create an opener object from a list of handlers and processors.
|
||||
|
||||
The opener will use several default handlers and processors, including
|
||||
support for HTTP and FTP.
|
||||
|
||||
If any of the handlers passed as arguments are subclasses of the
|
||||
default handlers, the default handlers will not be used.
|
||||
|
||||
"""
|
||||
opener = self.klass()
|
||||
default_classes = list(self.default_classes)
|
||||
skip = []
|
||||
for klass in default_classes:
|
||||
for check in handlers:
|
||||
if type(check) == types.ClassType:
|
||||
if issubclass(check, klass):
|
||||
skip.append(klass)
|
||||
elif type(check) == types.InstanceType:
|
||||
if isinstance(check, klass):
|
||||
skip.append(klass)
|
||||
for klass in skip:
|
||||
default_classes.remove(klass)
|
||||
|
||||
for klass in default_classes:
|
||||
opener.add_handler(klass())
|
||||
for h in handlers:
|
||||
if type(h) == types.ClassType:
|
||||
h = h()
|
||||
opener.add_handler(h)
|
||||
|
||||
return opener
|
||||
|
||||
|
||||
build_opener = OpenerFactory().build_opener
|
||||
|
||||
_opener = None
|
||||
urlopen_lock = _threading.Lock()
|
||||
def urlopen(url, data=None):
|
||||
global _opener
|
||||
if _opener is None:
|
||||
urlopen_lock.acquire()
|
||||
try:
|
||||
if _opener is None:
|
||||
_opener = build_opener()
|
||||
finally:
|
||||
urlopen_lock.release()
|
||||
return _opener.open(url, data)
|
||||
|
||||
def urlretrieve(url, filename=None, reporthook=None, data=None):
|
||||
global _opener
|
||||
if _opener is None:
|
||||
urlopen_lock.acquire()
|
||||
try:
|
||||
if _opener is None:
|
||||
_opener = build_opener()
|
||||
finally:
|
||||
urlopen_lock.release()
|
||||
return _opener.retrieve(url, filename, reporthook, data)
|
||||
|
||||
def install_opener(opener):
|
||||
global _opener
|
||||
_opener = opener
|
@ -1,334 +0,0 @@
|
||||
"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser.
|
||||
|
||||
Examples
|
||||
|
||||
This program extracts all links from a document. It will print one
|
||||
line for each link, containing the URL and the textual description
|
||||
between the <A>...</A> tags:
|
||||
|
||||
import pullparser, sys
|
||||
f = file(sys.argv[1])
|
||||
p = pullparser.PullParser(f)
|
||||
for token in p.tags("a"):
|
||||
if token.type == "endtag": continue
|
||||
url = dict(token.attrs).get("href", "-")
|
||||
text = p.get_compressed_text(endat=("endtag", "a"))
|
||||
print "%s\t%s" % (url, text)
|
||||
|
||||
This program extracts the <TITLE> from the document:
|
||||
|
||||
import pullparser, sys
|
||||
f = file(sys.argv[1])
|
||||
p = pullparser.PullParser(f)
|
||||
if p.get_tag("title"):
|
||||
title = p.get_compressed_text()
|
||||
print "Title: %s" % title
|
||||
|
||||
|
||||
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
|
||||
Copyright 1998-2001 Gisle Aas (original libwww-perl code)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses.
|
||||
|
||||
"""
|
||||
|
||||
import re, htmlentitydefs
|
||||
import sgmllib, HTMLParser
|
||||
|
||||
from _html import unescape, unescape_charref
|
||||
|
||||
|
||||
class NoMoreTokensError(Exception): pass
|
||||
|
||||
class Token:
|
||||
"""Represents an HTML tag, declaration, processing instruction etc.
|
||||
|
||||
Behaves as both a tuple-like object (ie. iterable) and has attributes
|
||||
.type, .data and .attrs.
|
||||
|
||||
>>> t = Token("starttag", "a", [("href", "http://www.python.org/")])
|
||||
>>> t == ("starttag", "a", [("href", "http://www.python.org/")])
|
||||
True
|
||||
>>> (t.type, t.data) == ("starttag", "a")
|
||||
True
|
||||
>>> t.attrs == [("href", "http://www.python.org/")]
|
||||
True
|
||||
|
||||
Public attributes
|
||||
|
||||
type: one of "starttag", "endtag", "startendtag", "charref", "entityref",
|
||||
"data", "comment", "decl", "pi", after the corresponding methods of
|
||||
HTMLParser.HTMLParser
|
||||
data: For a tag, the tag name; otherwise, the relevant data carried by the
|
||||
tag, as a string
|
||||
attrs: list of (name, value) pairs representing HTML attributes
|
||||
(or None if token does not represent an opening tag)
|
||||
|
||||
"""
|
||||
def __init__(self, type, data, attrs=None):
|
||||
self.type = type
|
||||
self.data = data
|
||||
self.attrs = attrs
|
||||
def __iter__(self):
|
||||
return iter((self.type, self.data, self.attrs))
|
||||
def __eq__(self, other):
|
||||
type, data, attrs = other
|
||||
if (self.type == type and
|
||||
self.data == data and
|
||||
self.attrs == attrs):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
def __ne__(self, other): return not self.__eq__(other)
|
||||
def __repr__(self):
|
||||
args = ", ".join(map(repr, [self.type, self.data, self.attrs]))
|
||||
return self.__class__.__name__+"(%s)" % args
|
||||
|
||||
def iter_until_exception(fn, exception, *args, **kwds):
|
||||
while 1:
|
||||
try:
|
||||
yield fn(*args, **kwds)
|
||||
except exception:
|
||||
raise StopIteration
|
||||
|
||||
|
||||
class _AbstractParser:
|
||||
chunk = 1024
|
||||
compress_re = re.compile(r"\s+")
|
||||
def __init__(self, fh, textify={"img": "alt", "applet": "alt"},
|
||||
encoding="ascii", entitydefs=None):
|
||||
"""
|
||||
fh: file-like object (only a .read() method is required) from which to
|
||||
read HTML to be parsed
|
||||
textify: mapping used by .get_text() and .get_compressed_text() methods
|
||||
to represent opening tags as text
|
||||
encoding: encoding used to encode numeric character references by
|
||||
.get_text() and .get_compressed_text() ("ascii" by default)
|
||||
|
||||
entitydefs: mapping like {"amp": "&", ...} containing HTML entity
|
||||
definitions (a sensible default is used). This is used to unescape
|
||||
entities in .get_text() (and .get_compressed_text()) and attribute
|
||||
values. If the encoding can not represent the character, the entity
|
||||
reference is left unescaped. Note that entity references (both
|
||||
numeric - e.g. { or ઼ - and non-numeric - e.g. &) are
|
||||
unescaped in attribute values and the return value of .get_text(), but
|
||||
not in data outside of tags. Instead, entity references outside of
|
||||
tags are represented as tokens. This is a bit odd, it's true :-/
|
||||
|
||||
If the element name of an opening tag matches a key in the textify
|
||||
mapping then that tag is converted to text. The corresponding value is
|
||||
used to specify which tag attribute to obtain the text from. textify
|
||||
maps from element names to either:
|
||||
|
||||
- an HTML attribute name, in which case the HTML attribute value is
|
||||
used as its text value along with the element name in square
|
||||
brackets (eg."alt text goes here[IMG]", or, if the alt attribute
|
||||
were missing, just "[IMG]")
|
||||
- a callable object (eg. a function) which takes a Token and returns
|
||||
the string to be used as its text value
|
||||
|
||||
If textify has no key for an element name, nothing is substituted for
|
||||
the opening tag.
|
||||
|
||||
Public attributes:
|
||||
|
||||
encoding and textify: see above
|
||||
|
||||
"""
|
||||
self._fh = fh
|
||||
self._tokenstack = [] # FIFO
|
||||
self.textify = textify
|
||||
self.encoding = encoding
|
||||
if entitydefs is None:
|
||||
entitydefs = htmlentitydefs.name2codepoint
|
||||
self._entitydefs = entitydefs
|
||||
|
||||
def __iter__(self): return self
|
||||
|
||||
def tags(self, *names):
|
||||
return iter_until_exception(self.get_tag, NoMoreTokensError, *names)
|
||||
|
||||
def tokens(self, *tokentypes):
|
||||
return iter_until_exception(self.get_token, NoMoreTokensError, *tokentypes)
|
||||
|
||||
def next(self):
|
||||
try:
|
||||
return self.get_token()
|
||||
except NoMoreTokensError:
|
||||
raise StopIteration()
|
||||
|
||||
def get_token(self, *tokentypes):
|
||||
"""Pop the next Token object from the stack of parsed tokens.
|
||||
|
||||
If arguments are given, they are taken to be token types in which the
|
||||
caller is interested: tokens representing other elements will be
|
||||
skipped. Element names must be given in lower case.
|
||||
|
||||
Raises NoMoreTokensError.
|
||||
|
||||
"""
|
||||
while 1:
|
||||
while self._tokenstack:
|
||||
token = self._tokenstack.pop(0)
|
||||
if tokentypes:
|
||||
if token.type in tokentypes:
|
||||
return token
|
||||
else:
|
||||
return token
|
||||
data = self._fh.read(self.chunk)
|
||||
if not data:
|
||||
raise NoMoreTokensError()
|
||||
self.feed(data)
|
||||
|
||||
def unget_token(self, token):
|
||||
"""Push a Token back onto the stack."""
|
||||
self._tokenstack.insert(0, token)
|
||||
|
||||
def get_tag(self, *names):
|
||||
"""Return the next Token that represents an opening or closing tag.
|
||||
|
||||
If arguments are given, they are taken to be element names in which the
|
||||
caller is interested: tags representing other elements will be skipped.
|
||||
Element names must be given in lower case.
|
||||
|
||||
Raises NoMoreTokensError.
|
||||
|
||||
"""
|
||||
while 1:
|
||||
tok = self.get_token()
|
||||
if tok.type not in ["starttag", "endtag", "startendtag"]:
|
||||
continue
|
||||
if names:
|
||||
if tok.data in names:
|
||||
return tok
|
||||
else:
|
||||
return tok
|
||||
|
||||
def get_text(self, endat=None):
|
||||
"""Get some text.
|
||||
|
||||
endat: stop reading text at this tag (the tag is included in the
|
||||
returned text); endtag is a tuple (type, name) where type is
|
||||
"starttag", "endtag" or "startendtag", and name is the element name of
|
||||
the tag (element names must be given in lower case)
|
||||
|
||||
If endat is not given, .get_text() will stop at the next opening or
|
||||
closing tag, or when there are no more tokens (no exception is raised).
|
||||
Note that .get_text() includes the text representation (if any) of the
|
||||
opening tag, but pushes the opening tag back onto the stack. As a
|
||||
result, if you want to call .get_text() again, you need to call
|
||||
.get_tag() first (unless you want an empty string returned when you
|
||||
next call .get_text()).
|
||||
|
||||
Entity references are translated using the value of the entitydefs
|
||||
constructor argument (a mapping from names to characters like that
|
||||
provided by the standard module htmlentitydefs). Named entity
|
||||
references that are not in this mapping are left unchanged.
|
||||
|
||||
The textify attribute is used to translate opening tags into text: see
|
||||
the class docstring.
|
||||
|
||||
"""
|
||||
text = []
|
||||
tok = None
|
||||
while 1:
|
||||
try:
|
||||
tok = self.get_token()
|
||||
except NoMoreTokensError:
|
||||
# unget last token (not the one we just failed to get)
|
||||
if tok: self.unget_token(tok)
|
||||
break
|
||||
if tok.type == "data":
|
||||
text.append(tok.data)
|
||||
elif tok.type == "entityref":
|
||||
t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
|
||||
text.append(t)
|
||||
elif tok.type == "charref":
|
||||
t = unescape_charref(tok.data, self.encoding)
|
||||
text.append(t)
|
||||
elif tok.type in ["starttag", "endtag", "startendtag"]:
|
||||
tag_name = tok.data
|
||||
if tok.type in ["starttag", "startendtag"]:
|
||||
alt = self.textify.get(tag_name)
|
||||
if alt is not None:
|
||||
if callable(alt):
|
||||
text.append(alt(tok))
|
||||
elif tok.attrs is not None:
|
||||
for k, v in tok.attrs:
|
||||
if k == alt:
|
||||
text.append(v)
|
||||
text.append("[%s]" % tag_name.upper())
|
||||
if endat is None or endat == (tok.type, tag_name):
|
||||
self.unget_token(tok)
|
||||
break
|
||||
return "".join(text)
|
||||
|
||||
def get_compressed_text(self, *args, **kwds):
|
||||
"""
|
||||
As .get_text(), but collapses each group of contiguous whitespace to a
|
||||
single space character, and removes all initial and trailing
|
||||
whitespace.
|
||||
|
||||
"""
|
||||
text = self.get_text(*args, **kwds)
|
||||
text = text.strip()
|
||||
return self.compress_re.sub(" ", text)
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
self._tokenstack.append(Token("startendtag", tag, attrs))
|
||||
def handle_starttag(self, tag, attrs):
|
||||
self._tokenstack.append(Token("starttag", tag, attrs))
|
||||
def handle_endtag(self, tag):
|
||||
self._tokenstack.append(Token("endtag", tag))
|
||||
def handle_charref(self, name):
|
||||
self._tokenstack.append(Token("charref", name))
|
||||
def handle_entityref(self, name):
|
||||
self._tokenstack.append(Token("entityref", name))
|
||||
def handle_data(self, data):
|
||||
self._tokenstack.append(Token("data", data))
|
||||
def handle_comment(self, data):
|
||||
self._tokenstack.append(Token("comment", data))
|
||||
def handle_decl(self, decl):
|
||||
self._tokenstack.append(Token("decl", decl))
|
||||
def unknown_decl(self, data):
|
||||
# XXX should this call self.error instead?
|
||||
#self.error("unknown declaration: " + `data`)
|
||||
self._tokenstack.append(Token("decl", data))
|
||||
def handle_pi(self, data):
|
||||
self._tokenstack.append(Token("pi", data))
|
||||
|
||||
def unescape_attr(self, name):
|
||||
return unescape(name, self._entitydefs, self.encoding)
|
||||
def unescape_attrs(self, attrs):
|
||||
escaped_attrs = []
|
||||
for key, val in attrs:
|
||||
escaped_attrs.append((key, self.unescape_attr(val)))
|
||||
return escaped_attrs
|
||||
|
||||
class PullParser(_AbstractParser, HTMLParser.HTMLParser):
|
||||
def __init__(self, *args, **kwds):
|
||||
HTMLParser.HTMLParser.__init__(self)
|
||||
_AbstractParser.__init__(self, *args, **kwds)
|
||||
def unescape(self, name):
|
||||
# Use the entitydefs passed into constructor, not
|
||||
# HTMLParser.HTMLParser's entitydefs.
|
||||
return self.unescape_attr(name)
|
||||
|
||||
class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
|
||||
def __init__(self, *args, **kwds):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
_AbstractParser.__init__(self, *args, **kwds)
|
||||
def unknown_starttag(self, tag, attrs):
|
||||
attrs = self.unescape_attrs(attrs)
|
||||
self._tokenstack.append(Token("starttag", tag, attrs))
|
||||
def unknown_endtag(self, tag):
|
||||
self._tokenstack.append(Token("endtag", tag))
|
||||
|
||||
|
||||
def _test():
|
||||
import doctest, _pullparser
|
||||
return doctest.testmod(_pullparser)
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
@ -1,86 +0,0 @@
|
||||
"""Integration with Python standard library module urllib2: Request class.
|
||||
|
||||
Copyright 2004-2006 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import urllib2, urllib, logging
|
||||
|
||||
from _clientcookie import request_host
|
||||
import _rfc3986
|
||||
|
||||
warn = logging.getLogger("mechanize").warning
|
||||
# don't complain about missing logging handler
|
||||
logging.getLogger("mechanize").setLevel(logging.ERROR)
|
||||
|
||||
|
||||
class Request(urllib2.Request):
|
||||
def __init__(self, url, data=None, headers={},
|
||||
origin_req_host=None, unverifiable=False, visit=None):
|
||||
# In mechanize 0.2, the interpretation of a unicode url argument will
|
||||
# change: A unicode url argument will be interpreted as an IRI, and a
|
||||
# bytestring as a URI. For now, we accept unicode or bytestring. We
|
||||
# don't insist that the value is always a URI (specifically, must only
|
||||
# contain characters which are legal), because that might break working
|
||||
# code (who knows what bytes some servers want to see, especially with
|
||||
# browser plugins for internationalised URIs).
|
||||
if not _rfc3986.is_clean_uri(url):
|
||||
warn("url argument is not a URI "
|
||||
"(contains illegal characters) %r" % url)
|
||||
urllib2.Request.__init__(self, url, data, headers)
|
||||
self.selector = None
|
||||
self.unredirected_hdrs = {}
|
||||
self.visit = visit
|
||||
|
||||
# All the terminology below comes from RFC 2965.
|
||||
self.unverifiable = unverifiable
|
||||
# Set request-host of origin transaction.
|
||||
# The origin request-host is needed in order to decide whether
|
||||
# unverifiable sub-requests (automatic redirects, images embedded
|
||||
# in HTML, etc.) are to third-party hosts. If they are, the
|
||||
# resulting transactions might need to be conducted with cookies
|
||||
# turned off.
|
||||
if origin_req_host is None:
|
||||
origin_req_host = request_host(self)
|
||||
self.origin_req_host = origin_req_host
|
||||
|
||||
def get_selector(self):
|
||||
return urllib.splittag(self.__r_host)[0]
|
||||
|
||||
def get_origin_req_host(self):
|
||||
return self.origin_req_host
|
||||
|
||||
def is_unverifiable(self):
|
||||
return self.unverifiable
|
||||
|
||||
def add_unredirected_header(self, key, val):
|
||||
"""Add a header that will not be added to a redirected request."""
|
||||
self.unredirected_hdrs[key.capitalize()] = val
|
||||
|
||||
def has_header(self, header_name):
|
||||
"""True iff request has named header (regular or unredirected)."""
|
||||
return (header_name in self.headers or
|
||||
header_name in self.unredirected_hdrs)
|
||||
|
||||
def get_header(self, header_name, default=None):
|
||||
return self.headers.get(
|
||||
header_name,
|
||||
self.unredirected_hdrs.get(header_name, default))
|
||||
|
||||
def header_items(self):
|
||||
hdrs = self.unredirected_hdrs.copy()
|
||||
hdrs.update(self.headers)
|
||||
return hdrs.items()
|
||||
|
||||
def __str__(self):
|
||||
return "<Request for %s>" % self.get_full_url()
|
||||
|
||||
def get_method(self):
|
||||
if self.has_data():
|
||||
return "POST"
|
||||
else:
|
||||
return "GET"
|
@ -1,515 +0,0 @@
|
||||
"""Response classes.
|
||||
|
||||
The seek_wrapper code is not used if you're using UserAgent with
|
||||
.set_seekable_responses(False), or if you're using the urllib2-level interface
|
||||
without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is
|
||||
instantiated by some handlers (AbstractHTTPHandler), but the closeable_response
|
||||
interface is only depended upon by Browser-level code. Function
|
||||
upgrade_response is only used if you're using Browser or
|
||||
ResponseUpgradeProcessor.
|
||||
|
||||
|
||||
Copyright 2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import copy, mimetools
|
||||
from cStringIO import StringIO
|
||||
import urllib2
|
||||
|
||||
# XXX Andrew Dalke kindly sent me a similar class in response to my request on
|
||||
# comp.lang.python, which I then proceeded to lose. I wrote this class
|
||||
# instead, but I think he's released his code publicly since, could pinch the
|
||||
# tests from it, at least...
|
||||
|
||||
# For testing seek_wrapper invariant (note that
|
||||
# test_urllib2.HandlerTest.test_seekable is expected to fail when this
|
||||
# invariant checking is turned on). The invariant checking is done by module
|
||||
# ipdc, which is available here:
|
||||
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
|
||||
## from ipdbc import ContractBase
|
||||
## class seek_wrapper(ContractBase):
|
||||
class seek_wrapper:
|
||||
"""Adds a seek method to a file object.
|
||||
|
||||
This is only designed for seeking on readonly file-like objects.
|
||||
|
||||
Wrapped file-like object must have a read method. The readline method is
|
||||
only supported if that method is present on the wrapped object. The
|
||||
readlines method is always supported. xreadlines and iteration are
|
||||
supported only for Python 2.2 and above.
|
||||
|
||||
Public attributes:
|
||||
|
||||
wrapped: the wrapped file object
|
||||
is_closed: true iff .close() has been called
|
||||
|
||||
WARNING: All other attributes of the wrapped object (ie. those that are not
|
||||
one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
|
||||
are passed through unaltered, which may or may not make sense for your
|
||||
particular file object.
|
||||
|
||||
"""
|
||||
# General strategy is to check that cache is full enough, then delegate to
|
||||
# the cache (self.__cache, which is a cStringIO.StringIO instance). A seek
|
||||
# position (self.__pos) is maintained independently of the cache, in order
|
||||
# that a single cache may be shared between multiple seek_wrapper objects.
|
||||
# Copying using module copy shares the cache in this way.
|
||||
|
||||
def __init__(self, wrapped):
|
||||
self.wrapped = wrapped
|
||||
self.__read_complete_state = [False]
|
||||
self.__is_closed_state = [False]
|
||||
self.__have_readline = hasattr(self.wrapped, "readline")
|
||||
self.__cache = StringIO()
|
||||
self.__pos = 0 # seek position
|
||||
|
||||
def invariant(self):
|
||||
# The end of the cache is always at the same place as the end of the
|
||||
# wrapped file.
|
||||
return self.wrapped.tell() == len(self.__cache.getvalue())
|
||||
|
||||
def close(self):
|
||||
self.wrapped.close()
|
||||
self.is_closed = True
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name == "is_closed":
|
||||
return self.__is_closed_state[0]
|
||||
elif name == "read_complete":
|
||||
return self.__read_complete_state[0]
|
||||
|
||||
wrapped = self.__dict__.get("wrapped")
|
||||
if wrapped:
|
||||
return getattr(wrapped, name)
|
||||
|
||||
return getattr(self.__class__, name)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name == "is_closed":
|
||||
self.__is_closed_state[0] = bool(value)
|
||||
elif name == "read_complete":
|
||||
if not self.is_closed:
|
||||
self.__read_complete_state[0] = bool(value)
|
||||
else:
|
||||
self.__dict__[name] = value
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
assert whence in [0,1,2]
|
||||
|
||||
# how much data, if any, do we need to read?
|
||||
if whence == 2: # 2: relative to end of *wrapped* file
|
||||
if offset < 0: raise ValueError("negative seek offset")
|
||||
# since we don't know yet where the end of that file is, we must
|
||||
# read everything
|
||||
to_read = None
|
||||
else:
|
||||
if whence == 0: # 0: absolute
|
||||
if offset < 0: raise ValueError("negative seek offset")
|
||||
dest = offset
|
||||
else: # 1: relative to current position
|
||||
pos = self.__pos
|
||||
if pos < offset:
|
||||
raise ValueError("seek to before start of file")
|
||||
dest = pos + offset
|
||||
end = len(self.__cache.getvalue())
|
||||
to_read = dest - end
|
||||
if to_read < 0:
|
||||
to_read = 0
|
||||
|
||||
if to_read != 0:
|
||||
self.__cache.seek(0, 2)
|
||||
if to_read is None:
|
||||
assert whence == 2
|
||||
self.__cache.write(self.wrapped.read())
|
||||
self.read_complete = True
|
||||
self.__pos = self.__cache.tell() - offset
|
||||
else:
|
||||
data = self.wrapped.read(to_read)
|
||||
if not data:
|
||||
self.read_complete = True
|
||||
else:
|
||||
self.__cache.write(data)
|
||||
# Don't raise an exception even if we've seek()ed past the end
|
||||
# of .wrapped, since fseek() doesn't complain in that case.
|
||||
# Also like fseek(), pretend we have seek()ed past the end,
|
||||
# i.e. not:
|
||||
#self.__pos = self.__cache.tell()
|
||||
# but rather:
|
||||
self.__pos = dest
|
||||
else:
|
||||
self.__pos = dest
|
||||
|
||||
def tell(self):
|
||||
return self.__pos
|
||||
|
||||
def __copy__(self):
|
||||
cpy = self.__class__(self.wrapped)
|
||||
cpy.__cache = self.__cache
|
||||
cpy.__read_complete_state = self.__read_complete_state
|
||||
cpy.__is_closed_state = self.__is_closed_state
|
||||
return cpy
|
||||
|
||||
def get_data(self):
|
||||
pos = self.__pos
|
||||
try:
|
||||
self.seek(0)
|
||||
return self.read(-1)
|
||||
finally:
|
||||
self.__pos = pos
|
||||
|
||||
def read(self, size=-1):
|
||||
pos = self.__pos
|
||||
end = len(self.__cache.getvalue())
|
||||
available = end - pos
|
||||
|
||||
# enough data already cached?
|
||||
if size <= available and size != -1:
|
||||
self.__cache.seek(pos)
|
||||
self.__pos = pos+size
|
||||
return self.__cache.read(size)
|
||||
|
||||
# no, so read sufficient data from wrapped file and cache it
|
||||
self.__cache.seek(0, 2)
|
||||
if size == -1:
|
||||
self.__cache.write(self.wrapped.read())
|
||||
self.read_complete = True
|
||||
else:
|
||||
to_read = size - available
|
||||
assert to_read > 0
|
||||
data = self.wrapped.read(to_read)
|
||||
if not data:
|
||||
self.read_complete = True
|
||||
else:
|
||||
self.__cache.write(data)
|
||||
self.__cache.seek(pos)
|
||||
|
||||
data = self.__cache.read(size)
|
||||
self.__pos = self.__cache.tell()
|
||||
assert self.__pos == pos + len(data)
|
||||
return data
|
||||
|
||||
def readline(self, size=-1):
|
||||
if not self.__have_readline:
|
||||
raise NotImplementedError("no readline method on wrapped object")
|
||||
|
||||
# line we're about to read might not be complete in the cache, so
|
||||
# read another line first
|
||||
pos = self.__pos
|
||||
self.__cache.seek(0, 2)
|
||||
data = self.wrapped.readline()
|
||||
if not data:
|
||||
self.read_complete = True
|
||||
else:
|
||||
self.__cache.write(data)
|
||||
self.__cache.seek(pos)
|
||||
|
||||
data = self.__cache.readline()
|
||||
if size != -1:
|
||||
r = data[:size]
|
||||
self.__pos = pos+size
|
||||
else:
|
||||
r = data
|
||||
self.__pos = pos+len(data)
|
||||
return r
|
||||
|
||||
def readlines(self, sizehint=-1):
|
||||
pos = self.__pos
|
||||
self.__cache.seek(0, 2)
|
||||
self.__cache.write(self.wrapped.read())
|
||||
self.read_complete = True
|
||||
self.__cache.seek(pos)
|
||||
data = self.__cache.readlines(sizehint)
|
||||
self.__pos = self.__cache.tell()
|
||||
return data
|
||||
|
||||
def __iter__(self): return self
|
||||
def next(self):
|
||||
line = self.readline()
|
||||
if line == "": raise StopIteration
|
||||
return line
|
||||
|
||||
xreadlines = __iter__
|
||||
|
||||
def __repr__(self):
|
||||
return ("<%s at %s whose wrapped object = %r>" %
|
||||
(self.__class__.__name__, hex(abs(id(self))), self.wrapped))
|
||||
|
||||
|
||||
class response_seek_wrapper(seek_wrapper):
|
||||
|
||||
"""
|
||||
Supports copying response objects and setting response body data.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, wrapped):
|
||||
seek_wrapper.__init__(self, wrapped)
|
||||
self._headers = self.wrapped.info()
|
||||
|
||||
def __copy__(self):
|
||||
cpy = seek_wrapper.__copy__(self)
|
||||
# copy headers from delegate
|
||||
cpy._headers = copy.copy(self.info())
|
||||
return cpy
|
||||
|
||||
# Note that .info() and .geturl() (the only two urllib2 response methods
|
||||
# that are not implemented by seek_wrapper) must be here explicitly rather
|
||||
# than by seek_wrapper's __getattr__ delegation) so that the nasty
|
||||
# dynamically-created HTTPError classes in get_seek_wrapper_class() get the
|
||||
# wrapped object's implementation, and not HTTPError's.
|
||||
|
||||
def info(self):
|
||||
return self._headers
|
||||
|
||||
def geturl(self):
|
||||
return self.wrapped.geturl()
|
||||
|
||||
def set_data(self, data):
|
||||
self.seek(0)
|
||||
self.read()
|
||||
self.close()
|
||||
cache = self._seek_wrapper__cache = StringIO()
|
||||
cache.write(data)
|
||||
self.seek(0)
|
||||
|
||||
|
||||
class eoffile:
|
||||
# file-like object that always claims to be at end-of-file...
|
||||
def read(self, size=-1): return ""
|
||||
def readline(self, size=-1): return ""
|
||||
def __iter__(self): return self
|
||||
def next(self): return ""
|
||||
def close(self): pass
|
||||
|
||||
class eofresponse(eoffile):
|
||||
def __init__(self, url, headers, code, msg):
|
||||
self._url = url
|
||||
self._headers = headers
|
||||
self.code = code
|
||||
self.msg = msg
|
||||
def geturl(self): return self._url
|
||||
def info(self): return self._headers
|
||||
|
||||
|
||||
class closeable_response:
|
||||
"""Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
|
||||
|
||||
Only supports responses returned by mechanize.HTTPHandler.
|
||||
|
||||
After .close(), the following methods are supported:
|
||||
|
||||
.read()
|
||||
.readline()
|
||||
.info()
|
||||
.geturl()
|
||||
.__iter__()
|
||||
.next()
|
||||
.close()
|
||||
|
||||
and the following attributes are supported:
|
||||
|
||||
.code
|
||||
.msg
|
||||
|
||||
Also supports pickling (but the stdlib currently does something to prevent
|
||||
it: http://python.org/sf/1144636).
|
||||
|
||||
"""
|
||||
# presence of this attr indicates is useable after .close()
|
||||
closeable_response = None
|
||||
|
||||
def __init__(self, fp, headers, url, code, msg):
|
||||
self._set_fp(fp)
|
||||
self._headers = headers
|
||||
self._url = url
|
||||
self.code = code
|
||||
self.msg = msg
|
||||
|
||||
def _set_fp(self, fp):
|
||||
self.fp = fp
|
||||
self.read = self.fp.read
|
||||
self.readline = self.fp.readline
|
||||
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
|
||||
if hasattr(self.fp, "fileno"):
|
||||
self.fileno = self.fp.fileno
|
||||
else:
|
||||
self.fileno = lambda: None
|
||||
self.__iter__ = self.fp.__iter__
|
||||
self.next = self.fp.next
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s at %s whose fp = %r>' % (
|
||||
self.__class__.__name__, hex(abs(id(self))), self.fp)
|
||||
|
||||
def info(self):
|
||||
return self._headers
|
||||
|
||||
def geturl(self):
|
||||
return self._url
|
||||
|
||||
def close(self):
|
||||
self.fp._close = True
|
||||
wrapped = self.fp
|
||||
wrapped.close()
|
||||
new_wrapped = eofresponse(
|
||||
self._url, self._headers, self.code, self.msg)
|
||||
self._set_fp(new_wrapped)
|
||||
|
||||
def __getstate__(self):
|
||||
# There are three obvious options here:
|
||||
# 1. truncate
|
||||
# 2. read to end
|
||||
# 3. close socket, pickle state including read position, then open
|
||||
# again on unpickle and use Range header
|
||||
# XXXX um, 4. refuse to pickle unless .close()d. This is better,
|
||||
# actually ("errors should never pass silently"). Pickling doesn't
|
||||
# work anyway ATM, because of http://python.org/sf/1144636 so fix
|
||||
# this later
|
||||
|
||||
# 2 breaks pickle protocol, because one expects the original object
|
||||
# to be left unscathed by pickling. 3 is too complicated and
|
||||
# surprising (and too much work ;-) to happen in a sane __getstate__.
|
||||
# So we do 1.
|
||||
|
||||
state = self.__dict__.copy()
|
||||
new_wrapped = eofresponse(
|
||||
self._url, self._headers, self.code, self.msg)
|
||||
state["wrapped"] = new_wrapped
|
||||
return state
|
||||
|
||||
def test_response(data='test data', headers=[],
|
||||
url="http://example.com/", code=200, msg="OK"):
|
||||
return make_response(data, headers, url, code, msg)
|
||||
|
||||
def test_html_response(data='test data', headers=[],
|
||||
url="http://example.com/", code=200, msg="OK"):
|
||||
headers += [("Content-type", "text/html")]
|
||||
return make_response(data, headers, url, code, msg)
|
||||
|
||||
def make_response(data, headers, url, code, msg):
|
||||
"""Convenient factory for objects implementing response interface.
|
||||
|
||||
data: string containing response body data
|
||||
headers: sequence of (name, value) pairs
|
||||
url: URL of response
|
||||
code: integer response code (e.g. 200)
|
||||
msg: string response code message (e.g. "OK")
|
||||
|
||||
"""
|
||||
mime_headers = make_headers(headers)
|
||||
r = closeable_response(StringIO(data), mime_headers, url, code, msg)
|
||||
return response_seek_wrapper(r)
|
||||
|
||||
|
||||
def make_headers(headers):
|
||||
"""
|
||||
headers: sequence of (name, value) pairs
|
||||
"""
|
||||
hdr_text = []
|
||||
for name_value in headers:
|
||||
hdr_text.append("%s: %s" % name_value)
|
||||
return mimetools.Message(StringIO("\n".join(hdr_text)))
|
||||
|
||||
|
||||
# Rest of this module is especially horrible, but needed, at least until fork
|
||||
# urllib2. Even then, may want to preseve urllib2 compatibility.
|
||||
|
||||
def get_seek_wrapper_class(response):
|
||||
# in order to wrap response objects that are also exceptions, we must
|
||||
# dynamically subclass the exception :-(((
|
||||
if (isinstance(response, urllib2.HTTPError) and
|
||||
not hasattr(response, "seek")):
|
||||
if response.__class__.__module__ == "__builtin__":
|
||||
exc_class_name = response.__class__.__name__
|
||||
else:
|
||||
exc_class_name = "%s.%s" % (
|
||||
response.__class__.__module__, response.__class__.__name__)
|
||||
|
||||
class httperror_seek_wrapper(response_seek_wrapper, response.__class__):
|
||||
# this only derives from HTTPError in order to be a subclass --
|
||||
# the HTTPError behaviour comes from delegation
|
||||
|
||||
_exc_class_name = exc_class_name
|
||||
|
||||
def __init__(self, wrapped):
|
||||
response_seek_wrapper.__init__(self, wrapped)
|
||||
# be compatible with undocumented HTTPError attributes :-(
|
||||
self.hdrs = wrapped.info()
|
||||
self.filename = wrapped.geturl()
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
"<%s (%s instance) at %s "
|
||||
"whose wrapped object = %r>" % (
|
||||
self.__class__.__name__, self._exc_class_name,
|
||||
hex(abs(id(self))), self.wrapped)
|
||||
)
|
||||
wrapper_class = httperror_seek_wrapper
|
||||
else:
|
||||
wrapper_class = response_seek_wrapper
|
||||
return wrapper_class
|
||||
|
||||
def seek_wrapped_response(response):
|
||||
"""Return a copy of response that supports seekable response interface.
|
||||
|
||||
Accepts responses from both mechanize and urllib2 handlers.
|
||||
|
||||
Copes with both oridinary response instances and HTTPError instances (which
|
||||
can't be simply wrapped due to the requirement of preserving the exception
|
||||
base class).
|
||||
"""
|
||||
if not hasattr(response, "seek"):
|
||||
wrapper_class = get_seek_wrapper_class(response)
|
||||
response = wrapper_class(response)
|
||||
assert hasattr(response, "get_data")
|
||||
return response
|
||||
|
||||
def upgrade_response(response):
|
||||
"""Return a copy of response that supports Browser response interface.
|
||||
|
||||
Browser response interface is that of "seekable responses"
|
||||
(response_seek_wrapper), plus the requirement that responses must be
|
||||
useable after .close() (closeable_response).
|
||||
|
||||
Accepts responses from both mechanize and urllib2 handlers.
|
||||
|
||||
Copes with both ordinary response instances and HTTPError instances (which
|
||||
can't be simply wrapped due to the requirement of preserving the exception
|
||||
base class).
|
||||
"""
|
||||
wrapper_class = get_seek_wrapper_class(response)
|
||||
if hasattr(response, "closeable_response"):
|
||||
if not hasattr(response, "seek"):
|
||||
response = wrapper_class(response)
|
||||
assert hasattr(response, "get_data")
|
||||
return copy.copy(response)
|
||||
|
||||
# a urllib2 handler constructed the response, i.e. the response is an
|
||||
# urllib.addinfourl or a urllib2.HTTPError, instead of a
|
||||
# _Util.closeable_response as returned by e.g. mechanize.HTTPHandler
|
||||
try:
|
||||
code = response.code
|
||||
except AttributeError:
|
||||
code = None
|
||||
try:
|
||||
msg = response.msg
|
||||
except AttributeError:
|
||||
msg = None
|
||||
|
||||
# may have already-.read() data from .seek() cache
|
||||
data = None
|
||||
get_data = getattr(response, "get_data", None)
|
||||
if get_data:
|
||||
data = get_data()
|
||||
|
||||
response = closeable_response(
|
||||
response.fp, response.info(), response.geturl(), code, msg)
|
||||
response = wrapper_class(response)
|
||||
if data:
|
||||
response.set_data(data)
|
||||
return response
|
@ -1,240 +0,0 @@
|
||||
"""RFC 3986 URI parsing and relative reference resolution / absolutization.
|
||||
|
||||
(aka splitting and joining)
|
||||
|
||||
Copyright 2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.
|
||||
|
||||
import sys, re, posixpath, urllib
|
||||
|
||||
## def chr_range(a, b):
|
||||
## return "".join(map(chr, range(ord(a), ord(b)+1)))
|
||||
|
||||
## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
## "abcdefghijklmnopqrstuvwxyz"
|
||||
## "0123456789"
|
||||
## "-_.~")
|
||||
## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
|
||||
## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
|
||||
# this re matches any character that's not in URI_CHARS
|
||||
BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
|
||||
|
||||
|
||||
def clean_url(url, encoding):
|
||||
# percent-encode illegal URI characters
|
||||
# Trying to come up with test cases for this gave me a headache, revisit
|
||||
# when do switch to unicode.
|
||||
# Somebody else's comments (lost the attribution):
|
||||
## - IE will return you the url in the encoding you send it
|
||||
## - Mozilla/Firefox will send you latin-1 if there's no non latin-1
|
||||
## characters in your link. It will send you utf-8 however if there are...
|
||||
if type(url) == type(""):
|
||||
url = url.decode(encoding, "replace")
|
||||
url = url.strip()
|
||||
# for second param to urllib.quote(), we want URI_CHARS, minus the
|
||||
# 'always_safe' characters that urllib.quote() never percent-encodes
|
||||
return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
|
||||
|
||||
def is_clean_uri(uri):
|
||||
"""
|
||||
>>> is_clean_uri("ABC!")
|
||||
True
|
||||
>>> is_clean_uri(u"ABC!")
|
||||
True
|
||||
>>> is_clean_uri("ABC|")
|
||||
False
|
||||
>>> is_clean_uri(u"ABC|")
|
||||
False
|
||||
>>> is_clean_uri("http://example.com/0")
|
||||
True
|
||||
>>> is_clean_uri(u"http://example.com/0")
|
||||
True
|
||||
"""
|
||||
# note module re treats bytestrings as through they were decoded as latin-1
|
||||
# so this function accepts both unicode and bytestrings
|
||||
return not bool(BAD_URI_CHARS_RE.search(uri))
|
||||
|
||||
|
||||
SPLIT_MATCH = re.compile(
|
||||
r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
|
||||
def urlsplit(absolute_uri):
|
||||
"""Return scheme, authority, path, query, fragment."""
|
||||
match = SPLIT_MATCH(absolute_uri)
|
||||
if match:
|
||||
g = match.groups()
|
||||
return g[1], g[3], g[4], g[6], g[8]
|
||||
|
||||
def urlunsplit(parts):
|
||||
scheme, authority, path, query, fragment = parts
|
||||
r = []
|
||||
append = r.append
|
||||
if scheme is not None:
|
||||
append(scheme)
|
||||
append(":")
|
||||
if authority is not None:
|
||||
append("//")
|
||||
append(authority)
|
||||
append(path)
|
||||
if query is not None:
|
||||
append("?")
|
||||
append(query)
|
||||
if fragment is not None:
|
||||
append("#")
|
||||
append(fragment)
|
||||
return "".join(r)
|
||||
|
||||
def urljoin(base_uri, uri_reference):
|
||||
return urlunsplit(urljoin_parts(urlsplit(base_uri),
|
||||
urlsplit(uri_reference)))
|
||||
|
||||
# oops, this doesn't do the same thing as the literal translation
|
||||
# from the RFC below
|
||||
## def urljoin_parts(base_parts, reference_parts):
|
||||
## scheme, authority, path, query, fragment = base_parts
|
||||
## rscheme, rauthority, rpath, rquery, rfragment = reference_parts
|
||||
|
||||
## # compute target URI path
|
||||
## if rpath == "":
|
||||
## tpath = path
|
||||
## else:
|
||||
## tpath = rpath
|
||||
## if not tpath.startswith("/"):
|
||||
## tpath = merge(authority, path, tpath)
|
||||
## tpath = posixpath.normpath(tpath)
|
||||
|
||||
## if rscheme is not None:
|
||||
## return (rscheme, rauthority, tpath, rquery, rfragment)
|
||||
## elif rauthority is not None:
|
||||
## return (scheme, rauthority, tpath, rquery, rfragment)
|
||||
## elif rpath == "":
|
||||
## if rquery is not None:
|
||||
## tquery = rquery
|
||||
## else:
|
||||
## tquery = query
|
||||
## return (scheme, authority, tpath, tquery, rfragment)
|
||||
## else:
|
||||
## return (scheme, authority, tpath, rquery, rfragment)
|
||||
|
||||
def urljoin_parts(base_parts, reference_parts):
|
||||
scheme, authority, path, query, fragment = base_parts
|
||||
rscheme, rauthority, rpath, rquery, rfragment = reference_parts
|
||||
|
||||
if rscheme == scheme:
|
||||
rscheme = None
|
||||
|
||||
if rscheme is not None:
|
||||
tscheme, tauthority, tpath, tquery = (
|
||||
rscheme, rauthority, remove_dot_segments(rpath), rquery)
|
||||
else:
|
||||
if rauthority is not None:
|
||||
tauthority, tpath, tquery = (
|
||||
rauthority, remove_dot_segments(rpath), rquery)
|
||||
else:
|
||||
if rpath == "":
|
||||
tpath = path
|
||||
if rquery is not None:
|
||||
tquery = rquery
|
||||
else:
|
||||
tquery = query
|
||||
else:
|
||||
if rpath.startswith("/"):
|
||||
tpath = remove_dot_segments(rpath)
|
||||
else:
|
||||
tpath = merge(authority, path, rpath)
|
||||
tpath = remove_dot_segments(tpath)
|
||||
tquery = rquery
|
||||
tauthority = authority
|
||||
tscheme = scheme
|
||||
tfragment = rfragment
|
||||
return (tscheme, tauthority, tpath, tquery, tfragment)
|
||||
|
||||
# um, something *vaguely* like this is what I want, but I have to generate
|
||||
# lots of test cases first, if only to understand what it is that
|
||||
# remove_dot_segments really does...
|
||||
## def remove_dot_segments(path):
|
||||
## if path == '':
|
||||
## return ''
|
||||
## comps = path.split('/')
|
||||
## new_comps = []
|
||||
## for comp in comps:
|
||||
## if comp in ['.', '']:
|
||||
## if not new_comps or new_comps[-1]:
|
||||
## new_comps.append('')
|
||||
## continue
|
||||
## if comp != '..':
|
||||
## new_comps.append(comp)
|
||||
## elif new_comps:
|
||||
## new_comps.pop()
|
||||
## return '/'.join(new_comps)
|
||||
|
||||
|
||||
def remove_dot_segments(path):
|
||||
r = []
|
||||
while path:
|
||||
# A
|
||||
if path.startswith("../"):
|
||||
path = path[3:]
|
||||
continue
|
||||
if path.startswith("./"):
|
||||
path = path[2:]
|
||||
continue
|
||||
# B
|
||||
if path.startswith("/./"):
|
||||
path = path[2:]
|
||||
continue
|
||||
if path == "/.":
|
||||
path = "/"
|
||||
continue
|
||||
# C
|
||||
if path.startswith("/../"):
|
||||
path = path[3:]
|
||||
if r:
|
||||
r.pop()
|
||||
continue
|
||||
if path == "/..":
|
||||
path = "/"
|
||||
if r:
|
||||
r.pop()
|
||||
continue
|
||||
# D
|
||||
if path == ".":
|
||||
path = path[1:]
|
||||
continue
|
||||
if path == "..":
|
||||
path = path[2:]
|
||||
continue
|
||||
# E
|
||||
start = 0
|
||||
if path.startswith("/"):
|
||||
start = 1
|
||||
ii = path.find("/", start)
|
||||
if ii < 0:
|
||||
ii = None
|
||||
r.append(path[:ii])
|
||||
if ii is None:
|
||||
break
|
||||
path = path[ii:]
|
||||
return "".join(r)
|
||||
|
||||
def merge(base_authority, base_path, ref_path):
|
||||
# XXXX Oddly, the sample Perl implementation of this by Roy Fielding
|
||||
# doesn't even take base_authority as a parameter, despite the wording in
|
||||
# the RFC suggesting otherwise. Perhaps I'm missing some obvious identity.
|
||||
#if base_authority is not None and base_path == "":
|
||||
if base_path == "":
|
||||
return "/" + ref_path
|
||||
ii = base_path.rfind("/")
|
||||
if ii >= 0:
|
||||
return base_path[:ii+1] + ref_path
|
||||
return ref_path
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
@ -1,16 +0,0 @@
|
||||
from urllib2 import BaseHandler
|
||||
from _util import deprecation
|
||||
from _response import response_seek_wrapper
|
||||
|
||||
|
||||
class SeekableProcessor(BaseHandler):
|
||||
"""Deprecated: Make responses seekable."""
|
||||
|
||||
def __init__(self):
|
||||
deprecation(
|
||||
"See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
|
||||
|
||||
def any_response(self, request, response):
|
||||
if not hasattr(response, "seek"):
|
||||
return response_seek_wrapper(response)
|
||||
return response
|
@ -1,40 +0,0 @@
|
||||
from urllib2 import BaseHandler
|
||||
|
||||
from _request import Request
|
||||
from _response import upgrade_response
|
||||
from _util import deprecation
|
||||
|
||||
|
||||
class HTTPRequestUpgradeProcessor(BaseHandler):
|
||||
# upgrade urllib2.Request to this module's Request
|
||||
# yuck!
|
||||
handler_order = 0 # before anything else
|
||||
|
||||
def http_request(self, request):
|
||||
if not hasattr(request, "add_unredirected_header"):
|
||||
newrequest = Request(request._Request__original, request.data,
|
||||
request.headers)
|
||||
try: newrequest.origin_req_host = request.origin_req_host
|
||||
except AttributeError: pass
|
||||
try: newrequest.unverifiable = request.unverifiable
|
||||
except AttributeError: pass
|
||||
try: newrequest.visit = request.visit
|
||||
except AttributeError: pass
|
||||
request = newrequest
|
||||
return request
|
||||
|
||||
https_request = http_request
|
||||
|
||||
|
||||
class ResponseUpgradeProcessor(BaseHandler):
|
||||
# upgrade responses to be .close()able without becoming unusable
|
||||
handler_order = 0 # before anything else
|
||||
|
||||
def __init__(self):
|
||||
deprecation(
|
||||
"See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
|
||||
|
||||
def any_response(self, request, response):
|
||||
if not hasattr(response, 'closeable_response'):
|
||||
response = upgrade_response(response)
|
||||
return response
|
@ -1,62 +0,0 @@
|
||||
# urllib2 work-alike interface
|
||||
# ...from urllib2...
|
||||
from urllib2 import \
|
||||
URLError, \
|
||||
HTTPError, \
|
||||
GopherError
|
||||
# ...and from mechanize
|
||||
from _opener import OpenerDirector, \
|
||||
SeekableResponseOpener, \
|
||||
build_opener, install_opener, urlopen
|
||||
from _auth import \
|
||||
HTTPPasswordMgr, \
|
||||
HTTPPasswordMgrWithDefaultRealm, \
|
||||
AbstractBasicAuthHandler, \
|
||||
AbstractDigestAuthHandler, \
|
||||
HTTPProxyPasswordMgr, \
|
||||
ProxyHandler, \
|
||||
ProxyBasicAuthHandler, \
|
||||
ProxyDigestAuthHandler, \
|
||||
HTTPBasicAuthHandler, \
|
||||
HTTPDigestAuthHandler, \
|
||||
HTTPSClientCertMgr
|
||||
from _request import \
|
||||
Request
|
||||
from _http import \
|
||||
RobotExclusionError
|
||||
|
||||
# handlers...
|
||||
# ...from urllib2...
|
||||
from urllib2 import \
|
||||
BaseHandler, \
|
||||
UnknownHandler, \
|
||||
FTPHandler, \
|
||||
CacheFTPHandler, \
|
||||
FileHandler, \
|
||||
GopherHandler
|
||||
# ...and from mechanize
|
||||
from _http import \
|
||||
HTTPHandler, \
|
||||
HTTPDefaultErrorHandler, \
|
||||
HTTPRedirectHandler, \
|
||||
HTTPEquivProcessor, \
|
||||
HTTPCookieProcessor, \
|
||||
HTTPRefererProcessor, \
|
||||
HTTPRefreshProcessor, \
|
||||
HTTPErrorProcessor, \
|
||||
HTTPRobotRulesProcessor
|
||||
from _upgrade import \
|
||||
HTTPRequestUpgradeProcessor, \
|
||||
ResponseUpgradeProcessor
|
||||
from _debug import \
|
||||
HTTPResponseDebugProcessor, \
|
||||
HTTPRedirectDebugProcessor
|
||||
from _seek import \
|
||||
SeekableProcessor
|
||||
# crap ATM
|
||||
## from _gzip import \
|
||||
## HTTPGzipProcessor
|
||||
import httplib
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
from _http import HTTPSHandler
|
||||
del httplib
|
@ -1,348 +0,0 @@
|
||||
"""Convenient HTTP UserAgent class.
|
||||
|
||||
This is a subclass of urllib2.OpenerDirector.
|
||||
|
||||
|
||||
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import sys, warnings, urllib2
|
||||
|
||||
import _opener
|
||||
import _urllib2
|
||||
import _auth
|
||||
import _gzip
|
||||
import _response
|
||||
|
||||
|
||||
class UserAgentBase(_opener.OpenerDirector):
|
||||
"""Convenient user-agent class.
|
||||
|
||||
Do not use .add_handler() to add a handler for something already dealt with
|
||||
by this code.
|
||||
|
||||
The only reason at present for the distinction between UserAgent and
|
||||
UserAgentBase is so that classes that depend on .seek()able responses
|
||||
(e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass
|
||||
UserAgent exposes a .set_seekable_responses() method that allows switching
|
||||
off the adding of a .seek() method to responses.
|
||||
|
||||
Public attributes:
|
||||
|
||||
addheaders: list of (name, value) pairs specifying headers to send with
|
||||
every request, unless they are overridden in the Request instance.
|
||||
|
||||
>>> ua = UserAgentBase()
|
||||
>>> ua.addheaders = [
|
||||
... ("User-agent", "Mozilla/5.0 (compatible)"),
|
||||
... ("From", "responsible.person@example.com")]
|
||||
|
||||
"""
|
||||
|
||||
handler_classes = {
|
||||
# scheme handlers
|
||||
"http": _urllib2.HTTPHandler,
|
||||
# CacheFTPHandler is buggy, at least in 2.3, so we don't use it
|
||||
"ftp": _urllib2.FTPHandler,
|
||||
"file": _urllib2.FileHandler,
|
||||
"gopher": _urllib2.GopherHandler,
|
||||
|
||||
# other handlers
|
||||
"_unknown": _urllib2.UnknownHandler,
|
||||
# HTTP{S,}Handler depend on HTTPErrorProcessor too
|
||||
"_http_error": _urllib2.HTTPErrorProcessor,
|
||||
"_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor,
|
||||
"_http_default_error": _urllib2.HTTPDefaultErrorHandler,
|
||||
|
||||
# feature handlers
|
||||
"_basicauth": _urllib2.HTTPBasicAuthHandler,
|
||||
"_digestauth": _urllib2.HTTPDigestAuthHandler,
|
||||
"_redirect": _urllib2.HTTPRedirectHandler,
|
||||
"_cookies": _urllib2.HTTPCookieProcessor,
|
||||
"_refresh": _urllib2.HTTPRefreshProcessor,
|
||||
"_equiv": _urllib2.HTTPEquivProcessor,
|
||||
"_proxy": _urllib2.ProxyHandler,
|
||||
"_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
|
||||
"_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
|
||||
"_robots": _urllib2.HTTPRobotRulesProcessor,
|
||||
"_gzip": _gzip.HTTPGzipProcessor, # experimental!
|
||||
|
||||
# debug handlers
|
||||
"_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
|
||||
"_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
|
||||
}
|
||||
|
||||
default_schemes = ["http", "ftp", "file", "gopher"]
|
||||
default_others = ["_unknown", "_http_error", "_http_request_upgrade",
|
||||
"_http_default_error",
|
||||
]
|
||||
default_features = ["_redirect", "_cookies",
|
||||
"_refresh", "_equiv",
|
||||
"_basicauth", "_digestauth",
|
||||
"_proxy", "_proxy_basicauth", "_proxy_digestauth",
|
||||
"_robots",
|
||||
]
|
||||
if hasattr(_urllib2, 'HTTPSHandler'):
|
||||
handler_classes["https"] = _urllib2.HTTPSHandler
|
||||
default_schemes.append("https")
|
||||
|
||||
def __init__(self):
|
||||
_opener.OpenerDirector.__init__(self)
|
||||
|
||||
ua_handlers = self._ua_handlers = {}
|
||||
for scheme in (self.default_schemes+
|
||||
self.default_others+
|
||||
self.default_features):
|
||||
klass = self.handler_classes[scheme]
|
||||
ua_handlers[scheme] = klass()
|
||||
for handler in ua_handlers.itervalues():
|
||||
self.add_handler(handler)
|
||||
|
||||
# Yuck.
|
||||
# Ensure correct default constructor args were passed to
|
||||
# HTTPRefreshProcessor and HTTPEquivProcessor.
|
||||
if "_refresh" in ua_handlers:
|
||||
self.set_handle_refresh(True)
|
||||
if "_equiv" in ua_handlers:
|
||||
self.set_handle_equiv(True)
|
||||
# Ensure default password managers are installed.
|
||||
pm = ppm = None
|
||||
if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
|
||||
pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
|
||||
if ("_proxy_basicauth" in ua_handlers or
|
||||
"_proxy_digestauth" in ua_handlers):
|
||||
ppm = _auth.HTTPProxyPasswordMgr()
|
||||
self.set_password_manager(pm)
|
||||
self.set_proxy_password_manager(ppm)
|
||||
# set default certificate manager
|
||||
if "https" in ua_handlers:
|
||||
cm = _urllib2.HTTPSClientCertMgr()
|
||||
self.set_client_cert_manager(cm)
|
||||
|
||||
def close(self):
|
||||
_opener.OpenerDirector.close(self)
|
||||
self._ua_handlers = None
|
||||
|
||||
# XXX
|
||||
## def set_timeout(self, timeout):
|
||||
## self._timeout = timeout
|
||||
## def set_http_connection_cache(self, conn_cache):
|
||||
## self._http_conn_cache = conn_cache
|
||||
## def set_ftp_connection_cache(self, conn_cache):
|
||||
## # XXX ATM, FTP has cache as part of handler; should it be separate?
|
||||
## self._ftp_conn_cache = conn_cache
|
||||
|
||||
def set_handled_schemes(self, schemes):
|
||||
"""Set sequence of URL scheme (protocol) strings.
|
||||
|
||||
For example: ua.set_handled_schemes(["http", "ftp"])
|
||||
|
||||
If this fails (with ValueError) because you've passed an unknown
|
||||
scheme, the set of handled schemes will not be changed.
|
||||
|
||||
"""
|
||||
want = {}
|
||||
for scheme in schemes:
|
||||
if scheme.startswith("_"):
|
||||
raise ValueError("not a scheme '%s'" % scheme)
|
||||
if scheme not in self.handler_classes:
|
||||
raise ValueError("unknown scheme '%s'")
|
||||
want[scheme] = None
|
||||
|
||||
# get rid of scheme handlers we don't want
|
||||
for scheme, oldhandler in self._ua_handlers.items():
|
||||
if scheme.startswith("_"): continue # not a scheme handler
|
||||
if scheme not in want:
|
||||
self._replace_handler(scheme, None)
|
||||
else:
|
||||
del want[scheme] # already got it
|
||||
# add the scheme handlers that are missing
|
||||
for scheme in want.keys():
|
||||
self._set_handler(scheme, True)
|
||||
|
||||
def set_cookiejar(self, cookiejar):
|
||||
"""Set a mechanize.CookieJar, or None."""
|
||||
self._set_handler("_cookies", obj=cookiejar)
|
||||
|
||||
# XXX could use Greg Stein's httpx for some of this instead?
|
||||
# or httplib2??
|
||||
def set_proxies(self, proxies):
|
||||
"""Set a dictionary mapping URL scheme to proxy specification, or None.
|
||||
|
||||
e.g. {"http": "joe:password@myproxy.example.com:3128",
|
||||
"ftp": "proxy.example.com"}
|
||||
|
||||
"""
|
||||
self._set_handler("_proxy", obj=proxies)
|
||||
|
||||
def add_password(self, url, user, password, realm=None):
|
||||
self._password_manager.add_password(realm, url, user, password)
|
||||
def add_proxy_password(self, user, password, hostport=None, realm=None):
|
||||
self._proxy_password_manager.add_password(
|
||||
realm, hostport, user, password)
|
||||
|
||||
def add_client_certificate(self, url, key_file, cert_file):
|
||||
"""Add an SSL client certificate, for HTTPS client auth.
|
||||
|
||||
key_file and cert_file must be filenames of the key and certificate
|
||||
files, in PEM format. You can use e.g. OpenSSL to convert a p12 (PKCS
|
||||
12) file to PEM format:
|
||||
|
||||
openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem
|
||||
openssl pkcs12 -nocerts -in cert.p12 -out key.pem
|
||||
|
||||
|
||||
Note that client certificate password input is very inflexible ATM. At
|
||||
the moment this seems to be console only, which is presumably the
|
||||
default behaviour of libopenssl. In future mechanize may support
|
||||
third-party libraries that (I assume) allow more options here.
|
||||
|
||||
"""
|
||||
self._client_cert_manager.add_key_cert(url, key_file, cert_file)
|
||||
|
||||
# the following are rarely useful -- use add_password / add_proxy_password
|
||||
# instead
|
||||
def set_password_manager(self, password_manager):
|
||||
"""Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
|
||||
self._password_manager = password_manager
|
||||
self._set_handler("_basicauth", obj=password_manager)
|
||||
self._set_handler("_digestauth", obj=password_manager)
|
||||
def set_proxy_password_manager(self, password_manager):
|
||||
"""Set a mechanize.HTTPProxyPasswordMgr, or None."""
|
||||
self._proxy_password_manager = password_manager
|
||||
self._set_handler("_proxy_basicauth", obj=password_manager)
|
||||
self._set_handler("_proxy_digestauth", obj=password_manager)
|
||||
def set_client_cert_manager(self, cert_manager):
|
||||
"""Set a mechanize.HTTPClientCertMgr, or None."""
|
||||
self._client_cert_manager = cert_manager
|
||||
handler = self._ua_handlers["https"]
|
||||
handler.client_cert_manager = cert_manager
|
||||
|
||||
# these methods all take a boolean parameter
|
||||
def set_handle_robots(self, handle):
|
||||
"""Set whether to observe rules from robots.txt."""
|
||||
self._set_handler("_robots", handle)
|
||||
def set_handle_redirect(self, handle):
|
||||
"""Set whether to handle HTTP 30x redirections."""
|
||||
self._set_handler("_redirect", handle)
|
||||
def set_handle_refresh(self, handle, max_time=None, honor_time=True):
|
||||
"""Set whether to handle HTTP Refresh headers."""
|
||||
self._set_handler("_refresh", handle, constructor_kwds=
|
||||
{"max_time": max_time, "honor_time": honor_time})
|
||||
def set_handle_equiv(self, handle, head_parser_class=None):
|
||||
"""Set whether to treat HTML http-equiv headers like HTTP headers.
|
||||
|
||||
Response objects may be .seek()able if this is set (currently returned
|
||||
responses are, raised HTTPError exception responses are not).
|
||||
|
||||
"""
|
||||
if head_parser_class is not None:
|
||||
constructor_kwds = {"head_parser_class": head_parser_class}
|
||||
else:
|
||||
constructor_kwds={}
|
||||
self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
|
||||
def set_handle_gzip(self, handle):
|
||||
"""Handle gzip transfer encoding.
|
||||
|
||||
"""
|
||||
if handle:
|
||||
warnings.warn(
|
||||
"gzip transfer encoding is experimental!", stacklevel=2)
|
||||
self._set_handler("_gzip", handle)
|
||||
def set_debug_redirects(self, handle):
|
||||
"""Log information about HTTP redirects (including refreshes).
|
||||
|
||||
Logging is performed using module logging. The logger name is
|
||||
"mechanize.http_redirects". To actually print some debug output,
|
||||
eg:
|
||||
|
||||
import sys, logging
|
||||
logger = logging.getLogger("mechanize.http_redirects")
|
||||
logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
Other logger names relevant to this module:
|
||||
|
||||
"mechanize.http_responses"
|
||||
"mechanize.cookies" (or "cookielib" if running Python 2.4)
|
||||
|
||||
To turn on everything:
|
||||
|
||||
import sys, logging
|
||||
logger = logging.getLogger("mechanize")
|
||||
logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
"""
|
||||
self._set_handler("_debug_redirect", handle)
|
||||
def set_debug_responses(self, handle):
|
||||
"""Log HTTP response bodies.
|
||||
|
||||
See docstring for .set_debug_redirects() for details of logging.
|
||||
|
||||
Response objects may be .seek()able if this is set (currently returned
|
||||
responses are, raised HTTPError exception responses are not).
|
||||
|
||||
"""
|
||||
self._set_handler("_debug_response_body", handle)
|
||||
def set_debug_http(self, handle):
|
||||
"""Print HTTP headers to sys.stdout."""
|
||||
level = int(bool(handle))
|
||||
for scheme in "http", "https":
|
||||
h = self._ua_handlers.get(scheme)
|
||||
if h is not None:
|
||||
h.set_http_debuglevel(level)
|
||||
|
||||
def _set_handler(self, name, handle=None, obj=None,
|
||||
constructor_args=(), constructor_kwds={}):
|
||||
if handle is None:
|
||||
handle = obj is not None
|
||||
if handle:
|
||||
handler_class = self.handler_classes[name]
|
||||
if obj is not None:
|
||||
newhandler = handler_class(obj)
|
||||
else:
|
||||
newhandler = handler_class(*constructor_args, **constructor_kwds)
|
||||
else:
|
||||
newhandler = None
|
||||
self._replace_handler(name, newhandler)
|
||||
|
||||
def _replace_handler(self, name, newhandler=None):
|
||||
# first, if handler was previously added, remove it
|
||||
if name is not None:
|
||||
handler = self._ua_handlers.get(name)
|
||||
if handler:
|
||||
try:
|
||||
self.handlers.remove(handler)
|
||||
except ValueError:
|
||||
pass
|
||||
# then add the replacement, if any
|
||||
if newhandler is not None:
|
||||
self.add_handler(newhandler)
|
||||
self._ua_handlers[name] = newhandler
|
||||
|
||||
|
||||
class UserAgent(UserAgentBase):
|
||||
|
||||
def __init__(self):
|
||||
UserAgentBase.__init__(self)
|
||||
self._seekable = False
|
||||
|
||||
def set_seekable_responses(self, handle):
|
||||
"""Make response objects .seek()able."""
|
||||
self._seekable = bool(handle)
|
||||
|
||||
def open(self, fullurl, data=None):
|
||||
if self._seekable:
|
||||
def bound_open(fullurl, data=None):
|
||||
return UserAgentBase.open(self, fullurl, data)
|
||||
response = _opener.wrapped_open(
|
||||
bound_open, _response.seek_wrapped_response, fullurl, data)
|
||||
else:
|
||||
response = UserAgentBase.open(self, fullurl, data)
|
||||
return response
|
@ -1,279 +0,0 @@
|
||||
"""Utility functions and date/time routines.
|
||||
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import re, string, time, warnings
|
||||
|
||||
def deprecation(message):
|
||||
warnings.warn(message, DeprecationWarning, stacklevel=3)
|
||||
def hide_deprecations():
|
||||
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
||||
def reset_deprecations():
|
||||
warnings.filterwarnings('default', category=DeprecationWarning)
|
||||
|
||||
|
||||
def isstringlike(x):
|
||||
try: x+""
|
||||
except: return False
|
||||
else: return True
|
||||
|
||||
## def caller():
|
||||
## try:
|
||||
## raise SyntaxError
|
||||
## except:
|
||||
## import sys
|
||||
## return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
|
||||
|
||||
|
||||
from calendar import timegm
|
||||
|
||||
# Date/time conversion routines for formats used by the HTTP protocol.
|
||||
|
||||
EPOCH = 1970
|
||||
def my_timegm(tt):
|
||||
year, month, mday, hour, min, sec = tt[:6]
|
||||
if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
|
||||
(0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
|
||||
return timegm(tt)
|
||||
else:
|
||||
return None
|
||||
|
||||
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
||||
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
|
||||
months_lower = []
|
||||
for month in months: months_lower.append(month.lower())
|
||||
|
||||
|
||||
def time2isoz(t=None):
|
||||
"""Return a string representing time in seconds since epoch, t.
|
||||
|
||||
If the function is called without an argument, it will use the current
|
||||
time.
|
||||
|
||||
The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
|
||||
representing Universal Time (UTC, aka GMT). An example of this format is:
|
||||
|
||||
1994-11-24 08:49:37Z
|
||||
|
||||
"""
|
||||
if t is None: t = time.time()
|
||||
year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
|
||||
return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
|
||||
year, mon, mday, hour, min, sec)
|
||||
|
||||
def time2netscape(t=None):
|
||||
"""Return a string representing time in seconds since epoch, t.
|
||||
|
||||
If the function is called without an argument, it will use the current
|
||||
time.
|
||||
|
||||
The format of the returned string is like this:
|
||||
|
||||
Wed, DD-Mon-YYYY HH:MM:SS GMT
|
||||
|
||||
"""
|
||||
if t is None: t = time.time()
|
||||
year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
|
||||
return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
|
||||
days[wday], mday, months[mon-1], year, hour, min, sec)
|
||||
|
||||
|
||||
UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
|
||||
|
||||
timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
|
||||
def offset_from_tz_string(tz):
|
||||
offset = None
|
||||
if UTC_ZONES.has_key(tz):
|
||||
offset = 0
|
||||
else:
|
||||
m = timezone_re.search(tz)
|
||||
if m:
|
||||
offset = 3600 * int(m.group(2))
|
||||
if m.group(3):
|
||||
offset = offset + 60 * int(m.group(3))
|
||||
if m.group(1) == '-':
|
||||
offset = -offset
|
||||
return offset
|
||||
|
||||
def _str2time(day, mon, yr, hr, min, sec, tz):
|
||||
# translate month name to number
|
||||
# month numbers start with 1 (January)
|
||||
try:
|
||||
mon = months_lower.index(mon.lower())+1
|
||||
except ValueError:
|
||||
# maybe it's already a number
|
||||
try:
|
||||
imon = int(mon)
|
||||
except ValueError:
|
||||
return None
|
||||
if 1 <= imon <= 12:
|
||||
mon = imon
|
||||
else:
|
||||
return None
|
||||
|
||||
# make sure clock elements are defined
|
||||
if hr is None: hr = 0
|
||||
if min is None: min = 0
|
||||
if sec is None: sec = 0
|
||||
|
||||
yr = int(yr)
|
||||
day = int(day)
|
||||
hr = int(hr)
|
||||
min = int(min)
|
||||
sec = int(sec)
|
||||
|
||||
if yr < 1000:
|
||||
# find "obvious" year
|
||||
cur_yr = time.localtime(time.time())[0]
|
||||
m = cur_yr % 100
|
||||
tmp = yr
|
||||
yr = yr + cur_yr - m
|
||||
m = m - tmp
|
||||
if abs(m) > 50:
|
||||
if m > 0: yr = yr + 100
|
||||
else: yr = yr - 100
|
||||
|
||||
# convert UTC time tuple to seconds since epoch (not timezone-adjusted)
|
||||
t = my_timegm((yr, mon, day, hr, min, sec, tz))
|
||||
|
||||
if t is not None:
|
||||
# adjust time using timezone string, to get absolute time since epoch
|
||||
if tz is None:
|
||||
tz = "UTC"
|
||||
tz = tz.upper()
|
||||
offset = offset_from_tz_string(tz)
|
||||
if offset is None:
|
||||
return None
|
||||
t = t - offset
|
||||
|
||||
return t
|
||||
|
||||
|
||||
strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
|
||||
wkday_re = re.compile(
|
||||
r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
|
||||
loose_http_re = re.compile(
|
||||
r"""^
|
||||
(\d\d?) # day
|
||||
(?:\s+|[-\/])
|
||||
(\w+) # month
|
||||
(?:\s+|[-\/])
|
||||
(\d+) # year
|
||||
(?:
|
||||
(?:\s+|:) # separator before clock
|
||||
(\d\d?):(\d\d) # hour:min
|
||||
(?::(\d\d))? # optional seconds
|
||||
)? # optional clock
|
||||
\s*
|
||||
([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
|
||||
\s*
|
||||
(?:\(\w+\))? # ASCII representation of timezone in parens.
|
||||
\s*$""", re.X)
|
||||
def http2time(text):
|
||||
"""Returns time in seconds since epoch of time represented by a string.
|
||||
|
||||
Return value is an integer.
|
||||
|
||||
None is returned if the format of str is unrecognized, the time is outside
|
||||
the representable range, or the timezone string is not recognized. If the
|
||||
string contains no timezone, UTC is assumed.
|
||||
|
||||
The timezone in the string may be numerical (like "-0800" or "+0100") or a
|
||||
string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
|
||||
timezone strings equivalent to UTC (zero offset) are known to the function.
|
||||
|
||||
The function loosely parses the following formats:
|
||||
|
||||
Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
|
||||
Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
|
||||
Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
|
||||
09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
|
||||
08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
|
||||
08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
|
||||
|
||||
The parser ignores leading and trailing whitespace. The time may be
|
||||
absent.
|
||||
|
||||
If the year is given with only 2 digits, the function will select the
|
||||
century that makes the year closest to the current date.
|
||||
|
||||
"""
|
||||
# fast exit for strictly conforming string
|
||||
m = strict_re.search(text)
|
||||
if m:
|
||||
g = m.groups()
|
||||
mon = months_lower.index(g[1].lower()) + 1
|
||||
tt = (int(g[2]), mon, int(g[0]),
|
||||
int(g[3]), int(g[4]), float(g[5]))
|
||||
return my_timegm(tt)
|
||||
|
||||
# No, we need some messy parsing...
|
||||
|
||||
# clean up
|
||||
text = text.lstrip()
|
||||
text = wkday_re.sub("", text, 1) # Useless weekday
|
||||
|
||||
# tz is time zone specifier string
|
||||
day, mon, yr, hr, min, sec, tz = [None]*7
|
||||
|
||||
# loose regexp parse
|
||||
m = loose_http_re.search(text)
|
||||
if m is not None:
|
||||
day, mon, yr, hr, min, sec, tz = m.groups()
|
||||
else:
|
||||
return None # bad format
|
||||
|
||||
return _str2time(day, mon, yr, hr, min, sec, tz)
|
||||
|
||||
|
||||
iso_re = re.compile(
|
||||
"""^
|
||||
(\d{4}) # year
|
||||
[-\/]?
|
||||
(\d\d?) # numerical month
|
||||
[-\/]?
|
||||
(\d\d?) # day
|
||||
(?:
|
||||
(?:\s+|[-:Tt]) # separator before clock
|
||||
(\d\d?):?(\d\d) # hour:min
|
||||
(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
|
||||
)? # optional clock
|
||||
\s*
|
||||
([-+]?\d\d?:?(:?\d\d)?
|
||||
|Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
|
||||
\s*$""", re.X)
|
||||
def iso2time(text):
|
||||
"""
|
||||
As for http2time, but parses the ISO 8601 formats:
|
||||
|
||||
1994-02-03 14:15:29 -0100 -- ISO 8601 format
|
||||
1994-02-03 14:15:29 -- zone is optional
|
||||
1994-02-03 -- only date
|
||||
1994-02-03T14:15:29 -- Use T as separator
|
||||
19940203T141529Z -- ISO 8601 compact format
|
||||
19940203 -- only date
|
||||
|
||||
"""
|
||||
# clean up
|
||||
text = text.lstrip()
|
||||
|
||||
# tz is time zone specifier string
|
||||
day, mon, yr, hr, min, sec, tz = [None]*7
|
||||
|
||||
# loose regexp parse
|
||||
m = iso_re.search(text)
|
||||
if m is not None:
|
||||
# XXX there's an extra bit of the timezone I'm ignoring here: is
|
||||
# this the right thing to do?
|
||||
yr, mon, day, hr, min, sec, tz, _ = m.groups()
|
||||
else:
|
||||
return None # bad format
|
||||
|
||||
return _str2time(day, mon, yr, hr, min, sec, tz)
|
@ -10,6 +10,7 @@ __docformat__ = "restructuredtext en"
|
||||
import logging, os, cStringIO, time, traceback, re, urlparse, sys
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
from contextlib import nested, closing
|
||||
|
||||
from calibre import browser, __appname__, iswindows, LoggingInterface, strftime
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
|
||||
@ -546,10 +547,8 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
if bn:
|
||||
img = os.path.join(imgdir, 'feed_image_%d%s'%(self.image_counter, os.path.splitext(bn)))
|
||||
try:
|
||||
with open(img, 'wb') as fi:
|
||||
r = self.browser.open(feed.image_url)
|
||||
with nested(open(img, 'wb'), closing(self.browser.open(feed.image_url))) as (fi, r):
|
||||
fi.write(r.read())
|
||||
r.close()
|
||||
self.image_counter += 1
|
||||
feed.image_url = img
|
||||
self.image_map[feed.image_url] = img
|
||||
@ -695,10 +694,8 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
ext = ext.lower() if ext else 'jpg'
|
||||
self.report_progress(1, _('Downloading cover from %s')%cu)
|
||||
cpath = os.path.join(self.output_dir, 'cover.'+ext)
|
||||
with open(cpath, 'wb') as cfile:
|
||||
r = self.browser.open(cu)
|
||||
with nested(open(cpath, 'wb'), closing(self.browser.open(cu))) as (cfile, r):
|
||||
cfile.write(r.read())
|
||||
r.close()
|
||||
self.cover_path = cpath
|
||||
|
||||
|
||||
@ -765,9 +762,8 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with open(opf_path, 'wb') as opf_file:
|
||||
with open(ncx_path, 'wb') as ncx_file:
|
||||
opf.render(opf_file, ncx_file)
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
def article_downloaded(self, request, result):
|
||||
|
Loading…
x
Reference in New Issue
Block a user