diff --git a/recipes/cicero.recipe b/recipes/cicero.recipe
index de6676ae44..beb323cf31 100644
--- a/recipes/cicero.recipe
+++ b/recipes/cicero.recipe
@@ -1,38 +1,47 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1316245412(BasicNewsRecipe):
-
title = u'Cicero Online'
description = u'Magazin f\xfcr politische Kultur'
publisher = 'Ringier Publishing GmbH'
category = 'news, politics, Germany'
language = 'de'
encoding = 'UTF-8'
- __author__ = 'Armin Geller' # 2011-09-17
+ __author__ = 'Armin Geller' # Upd. 2011-09-19
- oldest_article = 7
+ oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
auto_cleanup = False
+# remove_javascript = True
+
remove_tags = [
- dict(name='div', attrs={'id':["header", "navigation", "skip-link", "header-print", "header-print-url", "meta-toolbar", "footer"]}),
- dict(name='div', attrs={'class':["region region-sidebar-first column sidebar", "breadcrumb", "breadcrumb-title", "meta", "comment-wrapper",
- "field field-name-field-show-teaser-right field-type-list-boolean field-label-above"]}),
- dict(name='div', attrs={'title':["Dossier Auswahl"]}),
- dict(name='h2', attrs={'class':["title comment-form"]}),
- dict(name='form', attrs={'class':["comment-form user-info-from-cookie"]}),
- ]
+ dict(name='div', attrs={'id':["header", "navigation", "skip-link", "header-print", "header-print-url", "meta-toolbar", "footer"]}),
+ dict(name='div', attrs={'class':["region region-sidebar-first column sidebar", "breadcrumb", "breadcrumb-title", "meta", "comment-wrapper",
+ "field field-name-field-show-teaser-right field-type-list-boolean field-label-above"]}),
+ dict(name='div', attrs={'title':["Dossier Auswahl"]}),
+ dict(name='h2', attrs={'class':["title comment-form"]}),
+ dict(name='form', attrs={'class':["comment-form user-info-from-cookie"]}),
+ # 2011-09-19 clean-up on first feed historical caricature- and video preview pictures and social icons
+ dict(name='table', attrs={'class':["mcx-social-horizontal", "page-header"]}), # 2011-09-19
+ dict(name='div', attrs={'class':["page-header", "view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1",
+ "pagination",
+ "view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1"]}), # 2011-09-19
+ ]
feeds = [
- (u'Das gesamte Portfolio', u'http://www.cicero.de/rss.xml'),
- (u'Berliner Republik', u'http://www.cicero.de/berliner-republik.xml'),
- (u'Weltb\xfchne', u'http://www.cicero.de/weltbuehne.xml'),
- (u'Kapital', u'http://www.cicero.de/kapital.xml'),
- (u'Salon', u'http://www.cicero.de/salon.xml'),
- (u'Blogs', u'http://www.cicero.de/blogs.xml'), #seems not to be in use at the moment
- ]
+ (u'Das gesamte Portfolio', u'http://www.cicero.de/rss.xml'),
+ (u'Berliner Republik', u'http://www.cicero.de/berliner-republik.xml'),
+ (u'Weltb\xfchne', u'http://www.cicero.de/weltbuehne.xml'),
+ (u'Kapital', u'http://www.cicero.de/kapital.xml'),
+ (u'Salon', u'http://www.cicero.de/salon.xml'),
+ (u'Blogs', u'http://www.cicero.de/blogs.xml'), #seems not to be in use at the moment
+ ]
def print_version(self, url):
- return url + '?print'
+ return url + '?print'
+
+# def get_cover_url(self):
+# return 'http://www.cicero.de/sites/all/themes/cicero/logo.png' # need to find a good logo on their home page!
diff --git a/recipes/idg_se.recipe b/recipes/idg_se.recipe
index b4e86f9643..e5f0203e09 100644
--- a/recipes/idg_se.recipe
+++ b/recipes/idg_se.recipe
@@ -4,19 +4,19 @@ from calibre.web.feeds.news import BasicNewsRecipe
class IDGse(BasicNewsRecipe):
title = 'IDG'
- description = 'IDG.se'
- language = 'se'
__author__ = 'zapt0'
+ language = 'sv'
+ description = 'IDG.se'
oldest_article = 1
- max_articles_per_feed = 40
+ max_articles_per_feed = 256
no_stylesheets = True
encoding = 'ISO-8859-1'
remove_javascript = True
- feeds = [(u'Senaste nytt',u'http://feeds.idg.se/idg/vzzs')]
+ feeds = [(u'Dagens IDG-nyheter',u'http://feeds.idg.se/idg/ETkj?format=xml')]
def print_version(self,url):
- return url + '?articleRenderMode=print&m=print'
+ return url + '?articleRenderMode=print&m=print'
def get_cover_url(this):
return 'http://idgmedia.idg.se/polopoly_fs/2.3275!images/idgmedia_logo_75.jpg'
@@ -30,4 +30,3 @@ class IDGse(BasicNewsRecipe):
dict(name='div', attrs={'id':['preamble_ad']}),
dict(name='ul', attrs={'class':['share']})
]
-
diff --git a/recipes/macleans.recipe b/recipes/macleans.recipe
index 22f94638d9..0421a9c560 100644
--- a/recipes/macleans.recipe
+++ b/recipes/macleans.recipe
@@ -4,25 +4,17 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1308306308(BasicNewsRecipe):
title = u'Macleans Magazine'
language = 'en_CA'
- __author__ = 'sexymax15'
- oldest_article = 30
- max_articles_per_feed = 12
+ __author__ = 'Medius'
+ oldest_article = 7
+ cover_url = 'http://www.rogersmagazines.com/rms_covers/md/CLE_md.jpg'
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
- remove_tags = [dict(name ='img'),dict (id='header'),{'class':'postmetadata'}]
- remove_tags_after = {'class':'postmetadata'}
+ remove_tags = [dict(id='header'),{'class':'comment'}]
+ remove_tags_after = {'class':'pagination'}
- feeds = [(u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/'),
- (u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
-(u'World', u'http://www2.macleans.ca/category/world-from-the-magazine/feed/'),
-(u'Business', u'http://www2.macleans.ca/category/business/feed/'),
-(u'Arts & Culture', u'http://www2.macleans.ca/category/arts-culture/feed/'),
-(u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'),
-(u'Health', u'http://www2.macleans.ca/category/health-from-the-magazine/feed/'),
- (u'Environment', u'http://www2.macleans.ca/category/environment-from-the-magazine/feed/')]
- def print_version(self, url):
- return url + 'print/'
+ feeds = [(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
+(u'World', u'http://www2.macleans.ca/category/news-politics/world/feed/'), (u'Business', u'http://www2.macleans.ca/category/business/feed/'), (u'Arts & Culture', u'http://www2.macleans.ca/category/arts/feed/'), (u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'), (u'Health', u'http://www2.macleans.ca/category/life/health/feed/'), (u'Sports', u'http://www2.macleans.ca/category/life/sports/feed/'), (u'Environment', u'http://www2.macleans.ca/category/life/environment/feed/'), (u'Technology', u'http://www2.macleans.ca/category/life/technology/feed/'), (u'Travel', u'http://www2.macleans.ca/category/life/travel/feed/'), (u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/')]
diff --git a/recipes/ming_pao.recipe b/recipes/ming_pao.recipe
index 7060a7cd3e..ef8ad98bb9 100644
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@@ -16,6 +16,7 @@ __UseLife__ = True
'''
Change Log:
+2011/09/18: parse "column" section stuff from source text files directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
@@ -52,16 +53,19 @@ class MPRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
- extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+ extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
+ dict(attrs={'class':['heading']}), # for heading from txt
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
+ dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
- dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
+ dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
+ dict(attrs={'class':['images']}) # for images from txt
]
if __KeepImages__:
remove_tags = [dict(name='style'),
@@ -232,12 +236,18 @@ class MPRecipe(BasicNewsRecipe):
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
- #(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
+ # parse column section articles directly from .txt files
+ for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
+ ]:
+ articles = self.parse_section2_col(url, keystr)
+ if articles:
+ feeds.append((title, articles))
+
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@@ -358,6 +368,24 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
+ # parse from life.mingpao.com
+ def parse_section2_col(self, url, keystr):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
+ url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
+
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
@@ -440,6 +468,39 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
+ # preprocess those .txt based files
+ def preprocess_raw_html(self, raw_html, url):
+ if url.rfind('ftp') == -1:
+ return raw_html
+ else:
+ splitter = re.compile(r'\n') # Match non-digits
+ new_raw_html = '
Untitled'
+ next_is_img_txt = False
+ title_started = False
+ met_article_start_char = False
+ for item in splitter.split(raw_html):
+ if item.startswith(u'\u3010'):
+ met_article_start_char = True
+ new_raw_html = new_raw_html + '
' + item + '
\n'
+ else:
+ if next_is_img_txt == False:
+ if item.startswith('='):
+ next_is_img_txt = True
+ new_raw_html += '[1:].strip() + '.jpg)
\n'
+ else:
+ if met_article_start_char == False:
+ if title_started == False:
+ new_raw_html = new_raw_html + '
' + item + '\n'
+ title_started = True
+ else:
+ new_raw_html = new_raw_html + item + '\n'
+ else:
+ new_raw_html = new_raw_html + item + '
\n'
+ else:
+ next_is_img_txt = False
+ new_raw_html = new_raw_html + item + '\n'
+ return new_raw_html + '
'
+
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
@@ -593,3 +654,4 @@ class MPRecipe(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)
+
diff --git a/recipes/taipei.recipe b/recipes/taipei.recipe
new file mode 100644
index 0000000000..58579bf65c
--- /dev/null
+++ b/recipes/taipei.recipe
@@ -0,0 +1,30 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TN(BasicNewsRecipe):
+ title = u'Taipei Times'
+ language = 'en_CN'
+ __author__ = 'Krittika Goyal'
+ oldest_article = 1 #days
+ max_articles_per_feed = 25
+ use_embedded_content = False
+
+ no_stylesheets = True
+ auto_cleanup = True
+ auto_cleanup_keep = '//*[@class="main_ipic"]'
+
+ feeds = [
+('Editorials',
+ 'http://www.taipeitimes.com/xml/editorials.rss'),
+('Taiwan',
+ 'http://www.taipeitimes.com/xml/taiwan.rss'),
+('Features',
+ 'http://www.taipeitimes.com/xml/feat.rss'),
+('Business',
+ 'http://www.taipeitimes.com/xml/biz.rss'),
+('World',
+ 'http://www.taipeitimes.com/xml/world.rss'),
+('Sports',
+ 'http://www.taipeitimes.com/xml/sport.rss'),
+]
+
+
diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py
index 3f3cc3e142..d82a2268fa 100644
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@@ -31,7 +31,7 @@ def metadata_from_formats(formats, force_read_metadata=False, pattern=None):
try:
return _metadata_from_formats(formats, force_read_metadata, pattern)
except:
- mi = metadata_from_filename(list(iter(formats), pattern)[0])
+ mi = metadata_from_filename(list(iter(formats))[0], pat=pattern)
if not mi.authors:
mi.authors = [_('Unknown')]
return mi
diff --git a/src/calibre/gui2/catalog/catalog_bibtex.ui b/src/calibre/gui2/catalog/catalog_bibtex.ui
index 5b41e96267..b3d2b56b65 100644
--- a/src/calibre/gui2/catalog/catalog_bibtex.ui
+++ b/src/calibre/gui2/catalog/catalog_bibtex.ui
@@ -110,9 +110,9 @@
Some explanation about this template:
-The fields availables are 'author_sort', 'authors', 'id',
'isbn', 'pubdate', 'publisher', 'series_index', 'series',
- 'tags', 'timestamp', 'title', 'uuid'
+ 'tags', 'timestamp', 'title', 'uuid', 'title_sort'
-For list types ie authors and tags, only the first element
- wil be selected.
+ will be selected.
-For time field, only the date will be used.
diff --git a/src/calibre/gui2/catalog/catalog_csv_xml.py b/src/calibre/gui2/catalog/catalog_csv_xml.py
index 18f2c210dc..a64816cf98 100644
--- a/src/calibre/gui2/catalog/catalog_csv_xml.py
+++ b/src/calibre/gui2/catalog/catalog_csv_xml.py
@@ -29,7 +29,7 @@ class PluginWidget(QWidget, Ui_Form):
QListWidgetItem(x, self.db_fields)
db = db_()
- for x in sorted(db.custom_field_keys()):
+ for x in sorted(db.custom_field_keys()):
self.all_fields.append(x)
QListWidgetItem(x, self.db_fields)
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index e929825245..f3ee0e575e 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -87,7 +87,7 @@ class DeviceJob(BaseJob): # {{{
self.failed = True
ex = as_unicode(err)
self._details = ex + '\n\n' + \
- traceback.format_exc()
+ force_unicode(traceback.format_exc())
self.exception = err
finally:
self.job_done()
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 0f5a31e1d7..1aa114762f 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -32,7 +32,7 @@ FIELDS = ['all', 'title', 'title_sort', 'author_sort', 'authors', 'comments',
'rating', 'series_index', 'series', 'size', 'tags', 'timestamp', 'uuid']
#Allowed fields for template
-TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate',
+TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate', 'title_sort',
'publisher', 'series_index', 'series', 'tags', 'timestamp', 'title', 'uuid' ]
class CSV_XML(CatalogPlugin): # {{{
@@ -324,7 +324,7 @@ class BIBTEX(CatalogPlugin): # {{{
def run(self, path_to_output, opts, db, notification=DummyReporter()):
def create_bibtex_entry(entry, fields, mode, template_citation,
- bibtexdict, citation_bibtex=True, calibre_files=True):
+ bibtexdict, db, citation_bibtex=True, calibre_files=True):
#Bibtex doesn't like UTF-8 but keep unicode until writing
#Define starting chain or if book valid strict and not book return a Fail string
@@ -345,7 +345,13 @@ class BIBTEX(CatalogPlugin): # {{{
bibtex_entry = [u' '.join(bibtex_entry)]
for field in fields:
- item = entry[field]
+ if field.startswith('#'):
+ item = db.get_field(entry['id'],field,index_is_id=True)
+ elif field == 'title_sort':
+ item = entry['sort']
+ else:
+ item = entry[field]
+
#check if the field should be included (none or empty)
if item is None:
continue
@@ -358,10 +364,6 @@ class BIBTEX(CatalogPlugin): # {{{
if field == 'authors' :
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
- elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
- 'author_sort', 'series'] :
- bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
-
elif field == 'id' :
bibtex_entry.append(u'calibreid = "%s"' % int(item))
@@ -409,6 +411,14 @@ class BIBTEX(CatalogPlugin): # {{{
bibtex_entry.append(u'year = "%s"' % item.year)
bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
+ elif field.startswith('#') :
+ bibtex_entry.append(u'%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item)))
+
+ else:
+ # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
+ # 'author_sort', 'series', 'title_sort'] :
+ bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
+
bibtex_entry = u',\n '.join(bibtex_entry)
bibtex_entry += u' }\n\n'
@@ -588,7 +598,7 @@ class BIBTEX(CatalogPlugin): # {{{
for entry in data:
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
- bibtexc, citation_bibtex, addfiles_bibtex))
+ bibtexc, db, citation_bibtex, addfiles_bibtex))
# }}}
class EPUB_MOBI(CatalogPlugin):
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index b7efd611e0..da037ca43b 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -147,13 +147,14 @@ class BasicNewsRecipe(Recipe):
#: Specify elements that the auto cleanup algorithm should never remove
#: The syntax is a XPath expression. For example::
#:
- #: auto_cleanup_keep = '//div[@id="article-image"]' will keep all divs with
+ #: auto_cleanup_keep = '//div[@id="article-image"]' will keep all divs with
#: id="article-image"
- #: auto_cleanup_keep = '//*[@class="important"]' will keep all elements
+ #: auto_cleanup_keep = '//*[@class="important"]' will keep all elements
#: with class="important"
- #: auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
+ #: auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
#: will keep all divs with id="article-image" and spans
#: with class="important"
+ #:
auto_cleanup_keep = None
#: Specify any extra :term:`CSS` that should be addded to downloaded :term:`HTML` files
diff --git a/src/calibre/web/jsbrowser/browser.py b/src/calibre/web/jsbrowser/browser.py
index 133d720aac..5d569a3fac 100644
--- a/src/calibre/web/jsbrowser/browser.py
+++ b/src/calibre/web/jsbrowser/browser.py
@@ -7,16 +7,22 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os, pprint
+import os, pprint, time
from PyQt4.Qt import (QObject, QNetworkAccessManager, QNetworkDiskCache,
- QNetworkProxy, QNetworkProxyFactory)
-from PyQt4.QtWebKit import QWebPage
+ QNetworkProxy, QNetworkProxyFactory, QEventLoop, QUrl,
+ QDialog, QVBoxLayout, QSize)
+from PyQt4.QtWebKit import QWebPage, QWebSettings, QWebView
from calibre import USER_AGENT, prints, get_proxies, get_proxy_info
from calibre.constants import ispy3, config_dir
from calibre.utils.logging import ThreadSafeLog
from calibre.gui2 import must_use_qt
+from calibre.web.jsbrowser.forms import FormsMixin
+
+class Timeout(Exception): pass
+
+class LoadError(Exception): pass
class WebPage(QWebPage): # {{{
@@ -24,6 +30,7 @@ class WebPage(QWebPage): # {{{
confirm_callback=None,
prompt_callback=None,
user_agent=USER_AGENT,
+ enable_developer_tools=False,
parent=None):
QWebPage.__init__(self, parent)
@@ -33,6 +40,12 @@ class WebPage(QWebPage): # {{{
self.prompt_callback = prompt_callback
self.setForwardUnsupportedContent(True)
self.unsupportedContent.connect(self.on_unsupported_content)
+ settings = self.settings()
+ if enable_developer_tools:
+ settings.setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
+ QWebSettings.enablePersistentStorage(os.path.join(config_dir, 'caches',
+ 'webkit-persistence'))
+ QWebSettings.setMaximumPagesInCache(0)
def userAgentForUrl(self, url):
return self.user_agent
@@ -173,7 +186,36 @@ class NetworkAccessManager(QNetworkAccessManager): # {{{
self.log.debug('\n'.join(debug))
# }}}
-class Browser(QObject):
+class LoadWatcher(QObject): # {{{
+
+ def __init__(self, page, parent=None):
+ QObject.__init__(self, parent)
+ self.is_loading = True
+ self.loaded_ok = None
+ page.loadFinished.connect(self)
+ self.page = page
+
+ def __call__(self, ok):
+ self.loaded_ok = ok
+ self.is_loading = False
+ self.page.loadFinished.disconnect(self)
+ self.page = None
+# }}}
+
+class BrowserView(QDialog): # {{{
+
+ def __init__(self, page, parent=None):
+ QDialog.__init__(self, parent)
+ self.l = l = QVBoxLayout(self)
+ self.setLayout(l)
+ self.webview = QWebView(self)
+ l.addWidget(self.webview)
+ self.resize(QSize(1024, 768))
+ self.webview.setPage(page)
+
+# }}}
+
+class Browser(QObject, FormsMixin):
'''
Browser (WebKit with no GUI).
@@ -202,16 +244,21 @@ class Browser(QObject):
# If True a disk cache is used
use_disk_cache=True,
+ # Enable Inspect element functionality
+ enable_developer_tools=False,
+
# Verbosity
verbosity = 0
):
must_use_qt()
QObject.__init__(self)
+ FormsMixin.__init__(self)
if log is None:
log = ThreadSafeLog()
if verbosity:
log.filter_level = log.DEBUG
+ self.log = log
self.jquery_lib = P('content_server/jquery.js', data=True,
allow_user_override=False).decode('utf-8')
@@ -220,7 +267,64 @@ class Browser(QObject):
self.page = WebPage(log, confirm_callback=confirm_callback,
prompt_callback=prompt_callback, user_agent=user_agent,
+ enable_developer_tools=enable_developer_tools,
parent=self)
self.nam = NetworkAccessManager(log, use_disk_cache=use_disk_cache, parent=self)
self.page.setNetworkAccessManager(self.nam)
+ def _wait_for_load(self, timeout, url=None):
+ loop = QEventLoop(self)
+ start_time = time.time()
+ end_time = start_time + timeout
+ lw = LoadWatcher(self.page, parent=self)
+ while lw.is_loading and end_time > time.time():
+ if not loop.processEvents():
+ time.sleep(0.01)
+ if lw.is_loading:
+ raise Timeout('Loading of %r took longer than %d seconds'%(
+ url, timeout))
+
+ return lw.loaded_ok
+
+ def visit(self, url, timeout=30.0):
+ '''
+ Open the page specified in URL and wait for it to complete loading.
+ Note that when this method returns, there may still be javascript
+ that needs to execute (this method returns when the loadFinished()
+ signal is called on QWebPage). This method will raise a Timeout
+ exception if loading takes more than timeout seconds.
+
+ Returns True if loading was successful, False otherwise.
+ '''
+ self.current_form = None
+ self.page.mainFrame().load(QUrl(url))
+ return self._wait_for_load(timeout, url)
+
+ def click(self, qwe, wait_for_load=True, ajax_replies=0, timeout=30.0):
+ '''
+ Click the QWebElement pointed to by qwe.
+
+ :param wait_for_load: If you know that the click is going to cause a
+ new page to be loaded, set this to True to have
+ the method block until the new page is loaded
+ :para ajax_replies: Number of replies to wait for after clicking a link
+ that triggers some AJAX interaction
+ '''
+ js = '''
+ var e = document.createEvent('MouseEvents');
+ e.initEvent( 'click', true, true );
+ this.dispatchEvent(e);
+ '''
+ qwe.evaluateJavaScript(js)
+ if ajax_replies > 0:
+ raise NotImplementedError('AJAX clicking not implemented')
+ elif wait_for_load and not self._wait_for_load(timeout):
+ raise LoadError('Clicking resulted in a failed load')
+
+ def show_browser(self):
+ '''
+ Show the currently loaded web page in a window. Useful for debugging.
+ '''
+ view = BrowserView(self.page)
+ view.exec_()
+
diff --git a/src/calibre/web/jsbrowser/forms.py b/src/calibre/web/jsbrowser/forms.py
new file mode 100644
index 0000000000..9f68e1e003
--- /dev/null
+++ b/src/calibre/web/jsbrowser/forms.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+ print_function)
+from future_builtins import map
+
+__license__ = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+from calibre import as_unicode
+
+class Control(object):
+
+ def __init__(self, qwe):
+ self.qwe = qwe
+ self.name = unicode(qwe.attribute('name'))
+ self.type = unicode(qwe.attribute('type'))
+
+ def __repr__(self):
+ return unicode(self.qwe.toOuterXml())
+
+ @dynamic_property
+ def value(self):
+ def fget(self):
+ if self.type in ('checkbox', 'radio'):
+ return unicode(self.qwe.attribute('checked')) == 'checked'
+ if self.type in ('text', 'password'):
+ return unicode(self.qwe.attribute('value'))
+
+ def fset(self, val):
+ if self.type in ('checkbox', 'radio'):
+ if val:
+ self.qwe.setAttribute('checked', 'checked')
+ else:
+ self.qwe.removeAttribute('checked')
+ elif self.type in ('text', 'password'):
+ self.qwe.setAttribute('value', as_unicode(val))
+
+ return property(fget=fget, fset=fset)
+
+class RadioControl(object):
+
+ def __init__(self, name, controls):
+ self.name = name
+ self.type = 'radio'
+ self.values = {unicode(c.attribute('value')):c for c in controls}
+
+ def __repr__(self):
+ return 'RadioControl(%s)'%(', '.join(self.values))
+
+ @dynamic_property
+ def value(self):
+ def fget(self):
+ for val, x in self.values.iteritems():
+ if unicode(x.attribute('checked')) == 'checked':
+ return val
+
+ def fset(self, val):
+ control = None
+ for value, x in self.values.iteritems():
+ if val == value:
+ control = x
+ break
+ if control is not None:
+ for x in self.values.itervalues():
+ x.removeAttribute('checked')
+ control.setAttribute('checked', 'checked')
+
+ return property(fget=fget, fset=fset)
+
+class Form(object):
+
+ def __init__(self, qwe):
+ self.qwe = qwe
+ self.attributes = {unicode(x):unicode(qwe.attribute(x)) for x in
+ qwe.attributeNames()}
+ self.input_controls = list(map(Control, qwe.findAll('input')))
+ rc = [x for x in self.input_controls if x.type == 'radio']
+ self.input_controls = [x for x in self.input_controls if x.type != 'radio']
+ rc_names = {x.name for x in rc}
+ self.radio_controls = {name:RadioControl(name, [x.qwe for x in rc if x.name == name]) for name in rc_names}
+
+ def __getitem__(self, key):
+ for x in self.input_controls:
+ if key == x.name:
+ return x
+ try:
+ return self.radio_controls.get(key)
+ except KeyError:
+ pass
+ raise KeyError('No control with the name %s in this form'%key)
+
+ def __repr__(self):
+ attrs = ['%s=%s'%(k, v) for k, v in self.attributes.iteritems()]
+ return '