Sync to trunk.

This commit is contained in:
John Schember 2011-09-20 19:13:14 -04:00
commit beea6bcd11
14 changed files with 560 additions and 62 deletions

View File

@ -1,20 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1316245412(BasicNewsRecipe):
title = u'Cicero Online'
description = u'Magazin f\xfcr politische Kultur'
publisher = 'Ringier Publishing GmbH'
category = 'news, politics, Germany'
language = 'de'
encoding = 'UTF-8'
__author__ = 'Armin Geller' # 2011-09-17
__author__ = 'Armin Geller' # Upd. 2011-09-19
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
auto_cleanup = False
# remove_javascript = True
remove_tags = [
dict(name='div', attrs={'id':["header", "navigation", "skip-link", "header-print", "header-print-url", "meta-toolbar", "footer"]}),
dict(name='div', attrs={'class':["region region-sidebar-first column sidebar", "breadcrumb", "breadcrumb-title", "meta", "comment-wrapper",
@ -22,6 +23,11 @@ class BasicUserRecipe1316245412(BasicNewsRecipe):
dict(name='div', attrs={'title':["Dossier Auswahl"]}),
dict(name='h2', attrs={'class':["title comment-form"]}),
dict(name='form', attrs={'class':["comment-form user-info-from-cookie"]}),
# 2011-09-19 clean-up on first feed historical caricature- and video preview pictures and social icons
dict(name='table', attrs={'class':["mcx-social-horizontal", "page-header"]}), # 2011-09-19
dict(name='div', attrs={'class':["page-header", "view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1",
"pagination",
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1"]}), # 2011-09-19
]
feeds = [
@ -36,3 +42,6 @@ class BasicUserRecipe1316245412(BasicNewsRecipe):
def print_version(self, url):
return url + '?print'
# def get_cover_url(self):
# return 'http://www.cicero.de/sites/all/themes/cicero/logo.png' # need to find a good logo on their home page!

View File

@ -4,16 +4,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
class IDGse(BasicNewsRecipe):
title = 'IDG'
description = 'IDG.se'
language = 'se'
__author__ = 'zapt0'
language = 'sv'
description = 'IDG.se'
oldest_article = 1
max_articles_per_feed = 40
max_articles_per_feed = 256
no_stylesheets = True
encoding = 'ISO-8859-1'
remove_javascript = True
feeds = [(u'Senaste nytt',u'http://feeds.idg.se/idg/vzzs')]
feeds = [(u'Dagens IDG-nyheter',u'http://feeds.idg.se/idg/ETkj?format=xml')]
def print_version(self,url):
return url + '?articleRenderMode=print&m=print'
@ -30,4 +30,3 @@ class IDGse(BasicNewsRecipe):
dict(name='div', attrs={'id':['preamble_ad']}),
dict(name='ul', attrs={'class':['share']})
]

View File

@ -4,25 +4,17 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1308306308(BasicNewsRecipe):
title = u'Macleans Magazine'
language = 'en_CA'
__author__ = 'sexymax15'
oldest_article = 30
max_articles_per_feed = 12
__author__ = 'Medius'
oldest_article = 7
cover_url = 'http://www.rogersmagazines.com/rms_covers/md/CLE_md.jpg'
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
remove_tags = [dict(name ='img'),dict (id='header'),{'class':'postmetadata'}]
remove_tags_after = {'class':'postmetadata'}
remove_tags = [dict(id='header'),{'class':'comment'}]
remove_tags_after = {'class':'pagination'}
feeds = [(u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/'),
(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
(u'World', u'http://www2.macleans.ca/category/world-from-the-magazine/feed/'),
(u'Business', u'http://www2.macleans.ca/category/business/feed/'),
(u'Arts & Culture', u'http://www2.macleans.ca/category/arts-culture/feed/'),
(u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'),
(u'Health', u'http://www2.macleans.ca/category/health-from-the-magazine/feed/'),
(u'Environment', u'http://www2.macleans.ca/category/environment-from-the-magazine/feed/')]
def print_version(self, url):
return url + 'print/'
feeds = [(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
(u'World', u'http://www2.macleans.ca/category/news-politics/world/feed/'), (u'Business', u'http://www2.macleans.ca/category/business/feed/'), (u'Arts & Culture', u'http://www2.macleans.ca/category/arts/feed/'), (u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'), (u'Health', u'http://www2.macleans.ca/category/life/health/feed/'), (u'Sports', u'http://www2.macleans.ca/category/life/sports/feed/'), (u'Environment', u'http://www2.macleans.ca/category/life/environment/feed/'), (u'Technology', u'http://www2.macleans.ca/category/life/technology/feed/'), (u'Travel', u'http://www2.macleans.ca/category/life/travel/feed/'), (u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/')]

View File

@ -16,6 +16,7 @@ __UseLife__ = True
'''
Change Log:
2011/09/18: parse "column" section stuff from source text files directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
@ -52,16 +53,19 @@ class MPRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'class':['heading']}), # for heading from txt
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
dict(attrs={'class':['images']}) # for images from txt
]
if __KeepImages__:
remove_tags = [dict(name='style'),
@ -232,12 +236,18 @@ class MPRecipe(BasicNewsRecipe):
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
#(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_col(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@ -358,6 +368,24 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
# parse from life.mingpao.com
def parse_section2_col(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
@ -440,6 +468,39 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
# preprocess those .txt based files
def preprocess_raw_html(self, raw_html, url):
if url.rfind('ftp') == -1:
return raw_html
else:
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_img_txt = False
title_started = False
met_article_start_char = False
for item in splitter.split(raw_html):
if item.startswith(u'\u3010'):
met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else:
if next_is_img_txt == False:
if item.startswith('='):
next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else:
if met_article_start_char == False:
if title_started == False:
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else:
new_raw_html = new_raw_html + item + '<p>\n'
else:
next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n'
return new_raw_html + '</div></body></html>'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
@ -593,3 +654,4 @@ class MPRecipe(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

30
recipes/taipei.recipe Normal file
View File

@ -0,0 +1,30 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TN(BasicNewsRecipe):
title = u'Taipei Times'
language = 'en_CN'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
auto_cleanup_keep = '//*[@class="main_ipic"]'
feeds = [
('Editorials',
'http://www.taipeitimes.com/xml/editorials.rss'),
('Taiwan',
'http://www.taipeitimes.com/xml/taiwan.rss'),
('Features',
'http://www.taipeitimes.com/xml/feat.rss'),
('Business',
'http://www.taipeitimes.com/xml/biz.rss'),
('World',
'http://www.taipeitimes.com/xml/world.rss'),
('Sports',
'http://www.taipeitimes.com/xml/sport.rss'),
]

View File

@ -31,7 +31,7 @@ def metadata_from_formats(formats, force_read_metadata=False, pattern=None):
try:
return _metadata_from_formats(formats, force_read_metadata, pattern)
except:
mi = metadata_from_filename(list(iter(formats), pattern)[0])
mi = metadata_from_filename(list(iter(formats))[0], pat=pattern)
if not mi.authors:
mi.authors = [_('Unknown')]
return mi

View File

@ -110,9 +110,9 @@
<string>Some explanation about this template:
-The fields availables are 'author_sort', 'authors', 'id',
'isbn', 'pubdate', 'publisher', 'series_index', 'series',
'tags', 'timestamp', 'title', 'uuid'
'tags', 'timestamp', 'title', 'uuid', 'title_sort'
-For list types ie authors and tags, only the first element
wil be selected.
will be selected.
-For time field, only the date will be used. </string>
</property>
<property name="scaledContents">

View File

@ -87,7 +87,7 @@ class DeviceJob(BaseJob): # {{{
self.failed = True
ex = as_unicode(err)
self._details = ex + '\n\n' + \
traceback.format_exc()
force_unicode(traceback.format_exc())
self.exception = err
finally:
self.job_done()

View File

@ -32,7 +32,7 @@ FIELDS = ['all', 'title', 'title_sort', 'author_sort', 'authors', 'comments',
'rating', 'series_index', 'series', 'size', 'tags', 'timestamp', 'uuid']
#Allowed fields for template
TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate',
TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate', 'title_sort',
'publisher', 'series_index', 'series', 'tags', 'timestamp', 'title', 'uuid' ]
class CSV_XML(CatalogPlugin): # {{{
@ -324,7 +324,7 @@ class BIBTEX(CatalogPlugin): # {{{
def run(self, path_to_output, opts, db, notification=DummyReporter()):
def create_bibtex_entry(entry, fields, mode, template_citation,
bibtexdict, citation_bibtex=True, calibre_files=True):
bibtexdict, db, citation_bibtex=True, calibre_files=True):
#Bibtex doesn't like UTF-8 but keep unicode until writing
#Define starting chain or if book valid strict and not book return a Fail string
@ -345,7 +345,13 @@ class BIBTEX(CatalogPlugin): # {{{
bibtex_entry = [u' '.join(bibtex_entry)]
for field in fields:
if field.startswith('#'):
item = db.get_field(entry['id'],field,index_is_id=True)
elif field == 'title_sort':
item = entry['sort']
else:
item = entry[field]
#check if the field should be included (none or empty)
if item is None:
continue
@ -358,10 +364,6 @@ class BIBTEX(CatalogPlugin): # {{{
if field == 'authors' :
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
'author_sort', 'series'] :
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
elif field == 'id' :
bibtex_entry.append(u'calibreid = "%s"' % int(item))
@ -409,6 +411,14 @@ class BIBTEX(CatalogPlugin): # {{{
bibtex_entry.append(u'year = "%s"' % item.year)
bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
elif field.startswith('#') :
bibtex_entry.append(u'%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item)))
else:
# elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
# 'author_sort', 'series', 'title_sort'] :
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
bibtex_entry = u',\n '.join(bibtex_entry)
bibtex_entry += u' }\n\n'
@ -588,7 +598,7 @@ class BIBTEX(CatalogPlugin): # {{{
for entry in data:
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
bibtexc, citation_bibtex, addfiles_bibtex))
bibtexc, db, citation_bibtex, addfiles_bibtex))
# }}}
class EPUB_MOBI(CatalogPlugin):

View File

@ -154,6 +154,7 @@ class BasicNewsRecipe(Recipe):
#: auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
#: will keep all divs with id="article-image" and spans
#: with class="important"
#:
auto_cleanup_keep = None
#: Specify any extra :term:`CSS` that should be addded to downloaded :term:`HTML` files

View File

@ -7,16 +7,22 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, pprint
import os, pprint, time
from PyQt4.Qt import (QObject, QNetworkAccessManager, QNetworkDiskCache,
QNetworkProxy, QNetworkProxyFactory)
from PyQt4.QtWebKit import QWebPage
QNetworkProxy, QNetworkProxyFactory, QEventLoop, QUrl,
QDialog, QVBoxLayout, QSize)
from PyQt4.QtWebKit import QWebPage, QWebSettings, QWebView
from calibre import USER_AGENT, prints, get_proxies, get_proxy_info
from calibre.constants import ispy3, config_dir
from calibre.utils.logging import ThreadSafeLog
from calibre.gui2 import must_use_qt
from calibre.web.jsbrowser.forms import FormsMixin
class Timeout(Exception): pass
class LoadError(Exception): pass
class WebPage(QWebPage): # {{{
@ -24,6 +30,7 @@ class WebPage(QWebPage): # {{{
confirm_callback=None,
prompt_callback=None,
user_agent=USER_AGENT,
enable_developer_tools=False,
parent=None):
QWebPage.__init__(self, parent)
@ -33,6 +40,12 @@ class WebPage(QWebPage): # {{{
self.prompt_callback = prompt_callback
self.setForwardUnsupportedContent(True)
self.unsupportedContent.connect(self.on_unsupported_content)
settings = self.settings()
if enable_developer_tools:
settings.setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
QWebSettings.enablePersistentStorage(os.path.join(config_dir, 'caches',
'webkit-persistence'))
QWebSettings.setMaximumPagesInCache(0)
def userAgentForUrl(self, url):
return self.user_agent
@ -173,7 +186,36 @@ class NetworkAccessManager(QNetworkAccessManager): # {{{
self.log.debug('\n'.join(debug))
# }}}
class Browser(QObject):
class LoadWatcher(QObject): # {{{
def __init__(self, page, parent=None):
QObject.__init__(self, parent)
self.is_loading = True
self.loaded_ok = None
page.loadFinished.connect(self)
self.page = page
def __call__(self, ok):
self.loaded_ok = ok
self.is_loading = False
self.page.loadFinished.disconnect(self)
self.page = None
# }}}
class BrowserView(QDialog): # {{{
def __init__(self, page, parent=None):
QDialog.__init__(self, parent)
self.l = l = QVBoxLayout(self)
self.setLayout(l)
self.webview = QWebView(self)
l.addWidget(self.webview)
self.resize(QSize(1024, 768))
self.webview.setPage(page)
# }}}
class Browser(QObject, FormsMixin):
'''
Browser (WebKit with no GUI).
@ -202,16 +244,21 @@ class Browser(QObject):
# If True a disk cache is used
use_disk_cache=True,
# Enable Inspect element functionality
enable_developer_tools=False,
# Verbosity
verbosity = 0
):
must_use_qt()
QObject.__init__(self)
FormsMixin.__init__(self)
if log is None:
log = ThreadSafeLog()
if verbosity:
log.filter_level = log.DEBUG
self.log = log
self.jquery_lib = P('content_server/jquery.js', data=True,
allow_user_override=False).decode('utf-8')
@ -220,7 +267,64 @@ class Browser(QObject):
self.page = WebPage(log, confirm_callback=confirm_callback,
prompt_callback=prompt_callback, user_agent=user_agent,
enable_developer_tools=enable_developer_tools,
parent=self)
self.nam = NetworkAccessManager(log, use_disk_cache=use_disk_cache, parent=self)
self.page.setNetworkAccessManager(self.nam)
def _wait_for_load(self, timeout, url=None):
loop = QEventLoop(self)
start_time = time.time()
end_time = start_time + timeout
lw = LoadWatcher(self.page, parent=self)
while lw.is_loading and end_time > time.time():
if not loop.processEvents():
time.sleep(0.01)
if lw.is_loading:
raise Timeout('Loading of %r took longer than %d seconds'%(
url, timeout))
return lw.loaded_ok
def visit(self, url, timeout=30.0):
'''
Open the page specified in URL and wait for it to complete loading.
Note that when this method returns, there may still be javascript
that needs to execute (this method returns when the loadFinished()
signal is called on QWebPage). This method will raise a Timeout
exception if loading takes more than timeout seconds.
Returns True if loading was successful, False otherwise.
'''
self.current_form = None
self.page.mainFrame().load(QUrl(url))
return self._wait_for_load(timeout, url)
def click(self, qwe, wait_for_load=True, ajax_replies=0, timeout=30.0):
'''
Click the QWebElement pointed to by qwe.
:param wait_for_load: If you know that the click is going to cause a
new page to be loaded, set this to True to have
the method block until the new page is loaded
:para ajax_replies: Number of replies to wait for after clicking a link
that triggers some AJAX interaction
'''
js = '''
var e = document.createEvent('MouseEvents');
e.initEvent( 'click', true, true );
this.dispatchEvent(e);
'''
qwe.evaluateJavaScript(js)
if ajax_replies > 0:
raise NotImplementedError('AJAX clicking not implemented')
elif wait_for_load and not self._wait_for_load(timeout):
raise LoadError('Clicking resulted in a failed load')
def show_browser(self):
'''
Show the currently loaded web page in a window. Useful for debugging.
'''
view = BrowserView(self.page)
view.exec_()

View File

@ -0,0 +1,160 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from future_builtins import map
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre import as_unicode
class Control(object):
def __init__(self, qwe):
self.qwe = qwe
self.name = unicode(qwe.attribute('name'))
self.type = unicode(qwe.attribute('type'))
def __repr__(self):
return unicode(self.qwe.toOuterXml())
@dynamic_property
def value(self):
def fget(self):
if self.type in ('checkbox', 'radio'):
return unicode(self.qwe.attribute('checked')) == 'checked'
if self.type in ('text', 'password'):
return unicode(self.qwe.attribute('value'))
def fset(self, val):
if self.type in ('checkbox', 'radio'):
if val:
self.qwe.setAttribute('checked', 'checked')
else:
self.qwe.removeAttribute('checked')
elif self.type in ('text', 'password'):
self.qwe.setAttribute('value', as_unicode(val))
return property(fget=fget, fset=fset)
class RadioControl(object):
def __init__(self, name, controls):
self.name = name
self.type = 'radio'
self.values = {unicode(c.attribute('value')):c for c in controls}
def __repr__(self):
return 'RadioControl(%s)'%(', '.join(self.values))
@dynamic_property
def value(self):
def fget(self):
for val, x in self.values.iteritems():
if unicode(x.attribute('checked')) == 'checked':
return val
def fset(self, val):
control = None
for value, x in self.values.iteritems():
if val == value:
control = x
break
if control is not None:
for x in self.values.itervalues():
x.removeAttribute('checked')
control.setAttribute('checked', 'checked')
return property(fget=fget, fset=fset)
class Form(object):
def __init__(self, qwe):
self.qwe = qwe
self.attributes = {unicode(x):unicode(qwe.attribute(x)) for x in
qwe.attributeNames()}
self.input_controls = list(map(Control, qwe.findAll('input')))
rc = [x for x in self.input_controls if x.type == 'radio']
self.input_controls = [x for x in self.input_controls if x.type != 'radio']
rc_names = {x.name for x in rc}
self.radio_controls = {name:RadioControl(name, [x.qwe for x in rc if x.name == name]) for name in rc_names}
def __getitem__(self, key):
for x in self.input_controls:
if key == x.name:
return x
try:
return self.radio_controls.get(key)
except KeyError:
pass
raise KeyError('No control with the name %s in this form'%key)
def __repr__(self):
attrs = ['%s=%s'%(k, v) for k, v in self.attributes.iteritems()]
return '<form %s>'%(' '.join(attrs))
def submit_control(self, submit_control_selector=None):
if submit_control_selector is not None:
sc = self.qwe.findFirst(submit_control_selector)
if not sc.isNull():
return sc
for c in self.input_controls:
if c.type == 'submit':
return c
for c in self.input_controls:
if c.type == 'image':
return c
class FormsMixin(object):
def __init__(self):
self.current_form = None
def find_form(self, css2_selector=None, nr=None):
mf = self.page.mainFrame()
if css2_selector is not None:
candidate = mf.findFirstElement(css2_selector)
if not candidate.isNull():
return Form(candidate)
if nr is not None and int(nr) > -1:
nr = int(nr)
forms = mf.findAllElements('form')
if nr < forms.count():
return Form(forms.at(nr))
def all_forms(self):
'''
Return all forms present in the current page.
'''
mf = self.page.mainFrame()
return list(map(Form, mf.findAllElements('form').toList()))
def select_form(self, css2_selector=None, nr=None):
'''
Select a form for further processing. Specify the form either with
css2_selector or nr. Raises ValueError if no matching form is found.
:param css2_selector: A CSS2 selector, for example:
'form[action="/accounts/login"]' or 'form[id="loginForm"]'
:param nr: An integer >= 0. Selects the nr'th form in the current page.
'''
self.current_form = self.find_form(css2_selector=css2_selector, nr=nr)
if self.current_form is None:
raise ValueError('No such form found')
return self.current_form
def submit(self, submit_control_selector=None, ajax_replies=0, timeout=30.0):
if self.current_form is None:
raise ValueError('No form selected, use select_form() first')
sc = self.current_form.submit_control(submit_control_selector)
if sc is None:
raise ValueError('No submit control found in the current form')
self.current_form = None
self.click(sc.qwe, ajax_replies=ajax_replies, timeout=timeout)

View File

@ -0,0 +1,131 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import unittest, pprint, threading
import cherrypy
from calibre.web.jsbrowser.browser import Browser
class Server(object):
def __init__(self):
self.form_data = {}
@cherrypy.expose
def index(self):
return '''
<html>
<head><title>JS Browser test</title></head>
<body>
<form id="controls_test" method="post" action="controls_test">
<h3>Test controls</h3>
<div><label>Simple Text:</label><input type="text" name="text"/></div>
<div><label>Password:</label><input type="password" name="password"/></div>
<div><label>Checked Checkbox:</label><input type="checkbox" checked="checked" name="checked_checkbox"/></div>
<div><label>UnChecked Checkbox:</label><input type="checkbox" name="unchecked_checkbox"/></div>
<div><input type="radio" name="sex" value="male" checked="checked" /> Male</div>
<div><input type="radio" name="sex" value="female" /> Female</div>
<div><input type="submit" value="Submit" /></div>
</form>
<form id="image_test" method="post" action="controls_test">
<h3>Test Image submit</h3>
<div><label>Simple Text:</label><input type="text" name="text" value="Image Test" /></div>
<input type="image" src="button_image" alt="Submit" />
</form>
</body>
</html>
'''
@cherrypy.expose
def controls_test(self, **kwargs):
self.form_data = kwargs.copy()
#pprint.pprint(kwargs)
return pprint.pformat(kwargs)
@cherrypy.expose
def button_image(self):
cherrypy.response.headers['Content-Type'] = 'image/png'
return I('next.png', data=True)
class Test(unittest.TestCase):
@classmethod
def run_server(cls):
cherrypy.engine.start()
try:
cherrypy.engine.block()
except:
pass
@classmethod
def setUpClass(cls):
cls.port = 17983
cls.server = Server()
cherrypy.config.update({
'log.screen' : False,
'checker.on' : False,
'engine.autoreload_on' : False,
'request.show_tracebacks': True,
'server.socket_host' : b'127.0.0.1',
'server.socket_port' : cls.port,
'server.socket_timeout' : 10, #seconds
'server.thread_pool' : 1, # number of threads
'server.shutdown_timeout': 0.1, # minutes
})
cherrypy.tree.mount(cls.server, '/', config={'/':{}})
cls.server_thread = threading.Thread(target=cls.run_server)
cls.server_thread.daemon = True
cls.server_thread.start()
cls.browser = Browser(verbosity=0)
@classmethod
def tearDownClass(cls):
cherrypy.engine.exit()
cls.browser = None
def test_control_types(self):
'Test setting data in the various control types'
self.assertEqual(self.browser.visit('http://127.0.0.1:%d'%self.port),
True)
values = {
'checked_checkbox' : (False, None),
'unchecked_checkbox': (True, 'on'),
'text': ('some text', 'some text'),
'password': ('some password', 'some password'),
'sex': ('female', 'female'),
}
f = self.browser.select_form('#controls_test')
for k, vals in values.iteritems():
f[k].value = vals[0]
self.browser.submit()
dat = self.server.form_data
for k, vals in values.iteritems():
self.assertEqual(vals[1], dat.get(k, None),
'Field %s: %r != %r'%(k, vals[1], dat.get(k, None)))
def test_image_submit(self):
'Test submitting a form with a image as the submit control'
self.assertEqual(self.browser.visit('http://127.0.0.1:%d'%self.port),
True)
self.browser.select_form('#image_test')
self.browser.submit()
self.assertEqual(self.server.form_data['text'], 'Image Test')
def tests():
return unittest.TestLoader().loadTestsFromTestCase(Test)
def run():
unittest.TextTestRunner(verbosity=2).run(tests())
if __name__ == '__main__':
run()