Sync to trunk.

2025-06-23 15:30:45 -04:00 · 2011-09-20 19:13:14 -04:00 · 2011-09-20 19:13:14 -04:00 · beea6bcd11
commit beea6bcd11
parent cb94627b3a 84ee53fc18
14 changed files with 560 additions and 62 deletions
--- a/recipes/cicero.recipe
+++ b/recipes/cicero.recipe
@ -1,38 +1,47 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BasicUserRecipe1316245412(BasicNewsRecipe):
    title = u'Cicero Online'
    description = u'Magazin f\xfcr politische Kultur'
    publisher = 'Ringier Publishing GmbH'
    category = 'news, politics, Germany'
    language = 'de'
    encoding = 'UTF-8'
-    __author__ = 'Armin Geller' # 2011-09-17
+    __author__ = 'Armin Geller' # Upd. 2011-09-19
-    oldest_article = 7
+    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    auto_cleanup = False
 #    remove_javascript = True
    remove_tags = [
-    dict(name='div', attrs={'id':["header", "navigation", "skip-link", "header-print", "header-print-url", "meta-toolbar", "footer"]}),
+                    dict(name='div', attrs={'id':["header", "navigation", "skip-link", "header-print", "header-print-url", "meta-toolbar", "footer"]}),
-    dict(name='div', attrs={'class':["region region-sidebar-first column sidebar", "breadcrumb", "breadcrumb-title", "meta", "comment-wrapper",
+                    dict(name='div', attrs={'class':["region region-sidebar-first column sidebar", "breadcrumb", "breadcrumb-title", "meta", "comment-wrapper",
-    "field field-name-field-show-teaser-right field-type-list-boolean field-label-above"]}),
+                                                        "field field-name-field-show-teaser-right field-type-list-boolean field-label-above"]}),
-    dict(name='div', attrs={'title':["Dossier Auswahl"]}),
+                    dict(name='div', attrs={'title':["Dossier Auswahl"]}),
-    dict(name='h2', attrs={'class':["title comment-form"]}),
+                    dict(name='h2', attrs={'class':["title comment-form"]}),
-    dict(name='form', attrs={'class':["comment-form user-info-from-cookie"]}),
+                    dict(name='form', attrs={'class':["comment-form user-info-from-cookie"]}),
-    ]
+                    # 2011-09-19 clean-up on first feed historical caricature- and video preview pictures and social icons
                    dict(name='table', attrs={'class':["mcx-social-horizontal", "page-header"]}),   # 2011-09-19
                    dict(name='div', attrs={'class':["page-header", "view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1",
                                                      "pagination",
                                                      "view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1"]}), # 2011-09-19
                   ]
    feeds = [
-    (u'Das gesamte Portfolio', u'http://www.cicero.de/rss.xml'),
+              (u'Das gesamte Portfolio', u'http://www.cicero.de/rss.xml'),
-    (u'Berliner Republik', u'http://www.cicero.de/berliner-republik.xml'),
+              (u'Berliner Republik', u'http://www.cicero.de/berliner-republik.xml'),
-    (u'Weltb\xfchne', u'http://www.cicero.de/weltbuehne.xml'),
+              (u'Weltb\xfchne', u'http://www.cicero.de/weltbuehne.xml'),
-    (u'Kapital', u'http://www.cicero.de/kapital.xml'),
+              (u'Kapital', u'http://www.cicero.de/kapital.xml'),
-    (u'Salon', u'http://www.cicero.de/salon.xml'),
+              (u'Salon', u'http://www.cicero.de/salon.xml'),
-    (u'Blogs', u'http://www.cicero.de/blogs.xml'), #seems not to be in use at the moment
+              (u'Blogs', u'http://www.cicero.de/blogs.xml'), #seems not to be in use at the moment
-    ]
+             ]
    def print_version(self, url):
-        return url + '?print'
+          return url + '?print'
 #    def get_cover_url(self):
 #          return 'http://www.cicero.de/sites/all/themes/cicero/logo.png' # need to find a good logo on their home page!
--- a/recipes/idg_se.recipe
+++ b/recipes/idg_se.recipe
@ -4,19 +4,19 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class IDGse(BasicNewsRecipe):
    title               = 'IDG'
    description = 'IDG.se'
    language = 'se'
    __author__ = 'zapt0'
    language = 'sv'
    description = 'IDG.se'
    oldest_article = 1
-    max_articles_per_feed = 40
+    max_articles_per_feed = 256
    no_stylesheets = True
    encoding = 'ISO-8859-1'
    remove_javascript = True
-    feeds          = [(u'Senaste nytt',u'http://feeds.idg.se/idg/vzzs')]
+    feeds          = [(u'Dagens IDG-nyheter',u'http://feeds.idg.se/idg/ETkj?format=xml')]
    def print_version(self,url):
-            return url + '?articleRenderMode=print&m=print'
+        return url + '?articleRenderMode=print&m=print'
    def get_cover_url(this):
        return 'http://idgmedia.idg.se/polopoly_fs/2.3275!images/idgmedia_logo_75.jpg'
@ -30,4 +30,3 @@ class IDGse(BasicNewsRecipe):
                                    dict(name='div', attrs={'id':['preamble_ad']}),
                                    dict(name='ul', attrs={'class':['share']})
                                ]
--- a/recipes/macleans.recipe
+++ b/recipes/macleans.recipe
@ -4,25 +4,17 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1308306308(BasicNewsRecipe):
    title          = u'Macleans Magazine'
    language = 'en_CA'
-    __author__ = 'sexymax15'
+    __author__ = 'Medius'
-    oldest_article = 30
+    oldest_article = 7
-    max_articles_per_feed = 12
+    cover_url = 'http://www.rogersmagazines.com/rms_covers/md/CLE_md.jpg'
    use_embedded_content = False
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
-    remove_tags = [dict(name ='img'),dict (id='header'),{'class':'postmetadata'}]
+    remove_tags = [dict(id='header'),{'class':'comment'}]
-    remove_tags_after = {'class':'postmetadata'}
+    remove_tags_after = {'class':'pagination'}
-    feeds          = [(u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/'),
+    feeds          = [(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
- (u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
+(u'World', u'http://www2.macleans.ca/category/news-politics/world/feed/'), (u'Business', u'http://www2.macleans.ca/category/business/feed/'), (u'Arts & Culture', u'http://www2.macleans.ca/category/arts/feed/'), (u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'), (u'Health', u'http://www2.macleans.ca/category/life/health/feed/'), (u'Sports', u'http://www2.macleans.ca/category/life/sports/feed/'), (u'Environment', u'http://www2.macleans.ca/category/life/environment/feed/'), (u'Technology', u'http://www2.macleans.ca/category/life/technology/feed/'), (u'Travel', u'http://www2.macleans.ca/category/life/travel/feed/'), (u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/')]
 (u'World', u'http://www2.macleans.ca/category/world-from-the-magazine/feed/'),
 (u'Business', u'http://www2.macleans.ca/category/business/feed/'),
 (u'Arts & Culture', u'http://www2.macleans.ca/category/arts-culture/feed/'),
 (u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'),
 (u'Health', u'http://www2.macleans.ca/category/health-from-the-magazine/feed/'),
 (u'Environment', u'http://www2.macleans.ca/category/environment-from-the-magazine/feed/')]
    def print_version(self, url):
        return url + 'print/'
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -16,6 +16,7 @@ __UseLife__ = True
 '''
 Change Log:
 2011/09/18: parse "column" section stuff from source text files directly.
 2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
            provide options to remove all images in the file
@ -52,16 +53,19 @@ class MPRecipe(BasicNewsRecipe):
        title       = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
-        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
                          dict(name='font', attrs={'color':['AA0000']}), # for column articles title
                          dict(attrs={'class':['heading']}),  # for heading from txt
                          dict(attrs={'id':['newscontent']}), # entertainment and column page content
                          dict(attrs={'id':['newscontent01','newscontent02']}),
                          dict(attrs={'class':['content']}),  # for content from txt
                          dict(attrs={'class':['photo']}),
                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
-                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
                          dict(attrs={'class':['images']})   # for images from txt
                          ]
        if __KeepImages__:
            remove_tags = [dict(name='style'),
@ -232,12 +236,18 @@ class MPRecipe(BasicNewsRecipe):
                                           (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                           #(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                          ]:
                    articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))
                # parse column section articles directly from .txt files
                for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                          ]:
                    articles = self.parse_section2_col(url, keystr)
                    if articles:
                        feeds.append((title, articles))
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -358,6 +368,24 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles
    # parse from life.mingpao.com
    def parse_section2_col(self, url, keystr):
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
        a.reverse()
        current_articles = []
        included_urls = []
        for i in a:
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
                url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/')  # use printed version of the article
                current_articles.append({'title': title, 'url': url, 'description': ''})
                included_urls.append(url)
        current_articles.reverse()
        return current_articles
    # parse from www.mingpaovan.com
    def parse_section3(self, url, baseUrl):
        self.get_fetchdate()
@ -440,6 +468,39 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles
    # preprocess those .txt based files
    def preprocess_raw_html(self, raw_html, url):
        if url.rfind('ftp') == -1:
            return raw_html
        else:
            splitter = re.compile(r'\n') # Match non-digits
            new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
            next_is_img_txt = False
            title_started = False
            met_article_start_char = False
            for item in splitter.split(raw_html):
                if item.startswith(u'\u3010'):
                    met_article_start_char = True
                    new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                else:
                    if next_is_img_txt == False:
                        if item.startswith('='):
                            next_is_img_txt = True
                            new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
                        else:
                            if met_article_start_char == False:
                                if title_started == False:
                                    new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
                                    title_started = True
                                else:
                                    new_raw_html = new_raw_html + item + '\n'
                            else:
                                new_raw_html = new_raw_html + item + '<p>\n'
                    else:
                        next_is_img_txt = False
                        new_raw_html = new_raw_html + item + '\n'
            return new_raw_html + '</div></body></html>'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
@ -593,3 +654,4 @@ class MPRecipe(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/recipes/taipei.recipe
+++ b/recipes/taipei.recipe
@ -0,0 +1,30 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class TN(BasicNewsRecipe):
    title          = u'Taipei Times'
    language       = 'en_CN'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    auto_cleanup_keep = '//*[@class="main_ipic"]'
    feeds          = [
 ('Editorials',
 'http://www.taipeitimes.com/xml/editorials.rss'),
 ('Taiwan',
 'http://www.taipeitimes.com/xml/taiwan.rss'),
 ('Features',
 'http://www.taipeitimes.com/xml/feat.rss'),
 ('Business',
 'http://www.taipeitimes.com/xml/biz.rss'),
 ('World',
 'http://www.taipeitimes.com/xml/world.rss'),
 ('Sports',
 'http://www.taipeitimes.com/xml/sport.rss'),
 ]
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -31,7 +31,7 @@ def metadata_from_formats(formats, force_read_metadata=False, pattern=None):
    try:
        return _metadata_from_formats(formats, force_read_metadata, pattern)
    except:
-        mi = metadata_from_filename(list(iter(formats), pattern)[0])
+        mi = metadata_from_filename(list(iter(formats))[0], pat=pattern)
        if not mi.authors:
            mi.authors = [_('Unknown')]
        return mi
--- a/src/calibre/gui2/catalog/catalog_bibtex.ui
+++ b/src/calibre/gui2/catalog/catalog_bibtex.ui
@ -110,9 +110,9 @@
      <string>Some explanation about this template:
 -The fields availables are 'author_sort', 'authors', 'id',
    'isbn', 'pubdate', 'publisher', 'series_index', 'series',
-   'tags', 'timestamp', 'title', 'uuid'
+   'tags', 'timestamp', 'title', 'uuid', 'title_sort'
 -For list types ie authors and tags, only the first element
-   wil be selected.
+   will be selected.
 -For time field, only the date will be used. </string>
     </property>
     <property name="scaledContents">
--- a/src/calibre/gui2/catalog/catalog_csv_xml.py
+++ b/src/calibre/gui2/catalog/catalog_csv_xml.py
@ -29,7 +29,7 @@ class PluginWidget(QWidget, Ui_Form):
                QListWidgetItem(x, self.db_fields)
        db = db_()
-        for x in  sorted(db.custom_field_keys()):
+        for x in sorted(db.custom_field_keys()):
            self.all_fields.append(x)
            QListWidgetItem(x, self.db_fields)
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -87,7 +87,7 @@ class DeviceJob(BaseJob): # {{{
            self.failed = True
            ex = as_unicode(err)
            self._details = ex + '\n\n' + \
-                traceback.format_exc()
+                force_unicode(traceback.format_exc())
            self.exception = err
        finally:
            self.job_done()
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -32,7 +32,7 @@ FIELDS = ['all', 'title', 'title_sort', 'author_sort', 'authors', 'comments',
          'rating', 'series_index', 'series', 'size', 'tags', 'timestamp', 'uuid']
 #Allowed fields for template
-TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate',
+TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate', 'title_sort',
    'publisher', 'series_index', 'series', 'tags', 'timestamp', 'title', 'uuid' ]
 class CSV_XML(CatalogPlugin): # {{{
@ -324,7 +324,7 @@ class BIBTEX(CatalogPlugin): # {{{
    def run(self, path_to_output, opts, db, notification=DummyReporter()):
        def create_bibtex_entry(entry, fields, mode, template_citation,
-            bibtexdict, citation_bibtex=True, calibre_files=True):
+                                    bibtexdict, db, citation_bibtex=True, calibre_files=True):
            #Bibtex doesn't like UTF-8 but keep unicode until writing
            #Define starting chain or if book valid strict and not book return a Fail string
@ -345,7 +345,13 @@ class BIBTEX(CatalogPlugin): # {{{
                bibtex_entry = [u' '.join(bibtex_entry)]
            for field in fields:
-                item = entry[field]
+                if field.startswith('#'):
                        item = db.get_field(entry['id'],field,index_is_id=True)
                elif field == 'title_sort':
                    item = entry['sort']
                else:
                    item = entry[field]
                #check if the field should be included (none or empty)
                if item is None:
                    continue
@ -358,10 +364,6 @@ class BIBTEX(CatalogPlugin): # {{{
                if field == 'authors' :
                    bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
                elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
                        'author_sort', 'series'] :
                    bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
                elif field == 'id' :
                    bibtex_entry.append(u'calibreid = "%s"' % int(item))
@ -409,6 +411,14 @@ class BIBTEX(CatalogPlugin): # {{{
                    bibtex_entry.append(u'year = "%s"' % item.year)
                    bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
                elif field.startswith('#') :
                    bibtex_entry.append(u'%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item)))
                else:
                    # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
                        # 'author_sort', 'series', 'title_sort'] :
                    bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
            bibtex_entry = u',\n    '.join(bibtex_entry)
            bibtex_entry += u' }\n\n'
@ -588,7 +598,7 @@ class BIBTEX(CatalogPlugin): # {{{
            for entry in data:
                outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
-                    bibtexc, citation_bibtex, addfiles_bibtex))
+                    bibtexc, db, citation_bibtex, addfiles_bibtex))
 # }}}
 class EPUB_MOBI(CatalogPlugin):
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -147,13 +147,14 @@ class BasicNewsRecipe(Recipe):
    #: Specify elements that the auto cleanup algorithm should never remove
    #: The syntax is a XPath expression. For example::
    #:
-    #: auto_cleanup_keep = '//div[@id="article-image"]' will keep all divs with
+    #:   auto_cleanup_keep = '//div[@id="article-image"]' will keep all divs with
    #:                                                  id="article-image"
-    #: auto_cleanup_keep = '//*[@class="important"]' will keep all elements
+    #:   auto_cleanup_keep = '//*[@class="important"]' will keep all elements
    #:                                               with class="important"
-    #: auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
+    #:   auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
    #:                     will keep all divs with id="article-image" and spans
    #:                     with class="important"
    #:
    auto_cleanup_keep = None
    #: Specify any extra :term:`CSS` that should be addded to downloaded :term:`HTML` files
--- a/src/calibre/web/jsbrowser/browser.py
+++ b/src/calibre/web/jsbrowser/browser.py
@ -7,16 +7,22 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, pprint
+import os, pprint, time
 from PyQt4.Qt import (QObject, QNetworkAccessManager, QNetworkDiskCache,
-        QNetworkProxy, QNetworkProxyFactory)
+        QNetworkProxy, QNetworkProxyFactory, QEventLoop, QUrl,
-from PyQt4.QtWebKit import QWebPage
+        QDialog, QVBoxLayout, QSize)
 from PyQt4.QtWebKit import QWebPage, QWebSettings, QWebView
 from calibre import USER_AGENT, prints, get_proxies, get_proxy_info
 from calibre.constants import ispy3, config_dir
 from calibre.utils.logging import ThreadSafeLog
 from calibre.gui2 import must_use_qt
 from calibre.web.jsbrowser.forms import FormsMixin
 class Timeout(Exception): pass
 class LoadError(Exception): pass
 class WebPage(QWebPage): # {{{
@ -24,6 +30,7 @@ class WebPage(QWebPage): # {{{
            confirm_callback=None,
            prompt_callback=None,
            user_agent=USER_AGENT,
            enable_developer_tools=False,
            parent=None):
        QWebPage.__init__(self, parent)
@ -33,6 +40,12 @@ class WebPage(QWebPage): # {{{
        self.prompt_callback = prompt_callback
        self.setForwardUnsupportedContent(True)
        self.unsupportedContent.connect(self.on_unsupported_content)
        settings = self.settings()
        if enable_developer_tools:
            settings.setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
        QWebSettings.enablePersistentStorage(os.path.join(config_dir, 'caches',
                'webkit-persistence'))
        QWebSettings.setMaximumPagesInCache(0)
    def userAgentForUrl(self, url):
        return self.user_agent
@ -173,7 +186,36 @@ class NetworkAccessManager(QNetworkAccessManager): # {{{
            self.log.debug('\n'.join(debug))
 # }}}
-class Browser(QObject):
+class LoadWatcher(QObject): # {{{
    def __init__(self, page, parent=None):
        QObject.__init__(self, parent)
        self.is_loading = True
        self.loaded_ok = None
        page.loadFinished.connect(self)
        self.page = page
    def __call__(self, ok):
        self.loaded_ok = ok
        self.is_loading = False
        self.page.loadFinished.disconnect(self)
        self.page = None
 # }}}
 class BrowserView(QDialog): # {{{
    def __init__(self, page, parent=None):
        QDialog.__init__(self, parent)
        self.l = l = QVBoxLayout(self)
        self.setLayout(l)
        self.webview = QWebView(self)
        l.addWidget(self.webview)
        self.resize(QSize(1024, 768))
        self.webview.setPage(page)
 # }}}
 class Browser(QObject, FormsMixin):
    '''
    Browser (WebKit with no GUI).
@ -202,16 +244,21 @@ class Browser(QObject):
            # If True a disk cache is used
            use_disk_cache=True,
            # Enable Inspect element functionality
            enable_developer_tools=False,
            # Verbosity
            verbosity = 0
        ):
        must_use_qt()
        QObject.__init__(self)
        FormsMixin.__init__(self)
        if log is None:
            log = ThreadSafeLog()
        if verbosity:
            log.filter_level = log.DEBUG
        self.log = log
        self.jquery_lib = P('content_server/jquery.js', data=True,
                allow_user_override=False).decode('utf-8')
@ -220,7 +267,64 @@ class Browser(QObject):
        self.page = WebPage(log, confirm_callback=confirm_callback,
                prompt_callback=prompt_callback, user_agent=user_agent,
                enable_developer_tools=enable_developer_tools,
                parent=self)
        self.nam = NetworkAccessManager(log, use_disk_cache=use_disk_cache, parent=self)
        self.page.setNetworkAccessManager(self.nam)
    def _wait_for_load(self, timeout, url=None):
        loop = QEventLoop(self)
        start_time = time.time()
        end_time = start_time + timeout
        lw = LoadWatcher(self.page, parent=self)
        while lw.is_loading and end_time > time.time():
            if not loop.processEvents():
                time.sleep(0.01)
        if lw.is_loading:
            raise Timeout('Loading of %r took longer than %d seconds'%(
                url, timeout))
        return lw.loaded_ok
    def visit(self, url, timeout=30.0):
        '''
        Open the page specified in URL and wait for it to complete loading.
        Note that when this method returns, there may still be javascript
        that needs to execute (this method returns when the loadFinished()
        signal is called on QWebPage). This method will raise a Timeout
        exception if loading takes more than timeout seconds.
        Returns True if loading was successful, False otherwise.
        '''
        self.current_form = None
        self.page.mainFrame().load(QUrl(url))
        return self._wait_for_load(timeout, url)
    def click(self, qwe, wait_for_load=True, ajax_replies=0, timeout=30.0):
        '''
        Click the QWebElement pointed to by qwe.
        :param wait_for_load: If you know that the click is going to cause a
                              new page to be loaded, set this to True to have
                              the method block until the new page is loaded
        :para ajax_replies: Number of replies to wait for after clicking a link
                            that triggers some AJAX interaction
        '''
        js = '''
            var e = document.createEvent('MouseEvents');
            e.initEvent( 'click', true, true );
            this.dispatchEvent(e);
        '''
        qwe.evaluateJavaScript(js)
        if ajax_replies > 0:
            raise NotImplementedError('AJAX clicking not implemented')
        elif wait_for_load and not self._wait_for_load(timeout):
            raise LoadError('Clicking resulted in a failed load')
    def show_browser(self):
        '''
        Show the currently loaded web page in a window. Useful for debugging.
        '''
        view = BrowserView(self.page)
        view.exec_()
--- a/src/calibre/web/jsbrowser/forms.py
+++ b/src/calibre/web/jsbrowser/forms.py
@ -0,0 +1,160 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 from future_builtins import map
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre import as_unicode
 class Control(object):
    def __init__(self, qwe):
        self.qwe = qwe
        self.name = unicode(qwe.attribute('name'))
        self.type = unicode(qwe.attribute('type'))
    def __repr__(self):
        return unicode(self.qwe.toOuterXml())
    @dynamic_property
    def value(self):
        def fget(self):
            if self.type in ('checkbox', 'radio'):
                return unicode(self.qwe.attribute('checked')) == 'checked'
            if self.type in ('text', 'password'):
                return unicode(self.qwe.attribute('value'))
        def fset(self, val):
            if self.type in ('checkbox', 'radio'):
                if val:
                    self.qwe.setAttribute('checked', 'checked')
                else:
                    self.qwe.removeAttribute('checked')
            elif self.type in ('text', 'password'):
                self.qwe.setAttribute('value', as_unicode(val))
        return property(fget=fget, fset=fset)
 class RadioControl(object):
    def __init__(self, name, controls):
        self.name = name
        self.type = 'radio'
        self.values = {unicode(c.attribute('value')):c for c in controls}
    def __repr__(self):
        return 'RadioControl(%s)'%(', '.join(self.values))
    @dynamic_property
    def value(self):
        def fget(self):
            for val, x in self.values.iteritems():
                if unicode(x.attribute('checked')) == 'checked':
                    return val
        def fset(self, val):
            control = None
            for value, x in self.values.iteritems():
                if val == value:
                    control = x
                    break
            if control is not None:
                for x in self.values.itervalues():
                    x.removeAttribute('checked')
                control.setAttribute('checked', 'checked')
        return property(fget=fget, fset=fset)
 class Form(object):
    def __init__(self, qwe):
        self.qwe = qwe
        self.attributes = {unicode(x):unicode(qwe.attribute(x)) for x in
                qwe.attributeNames()}
        self.input_controls = list(map(Control, qwe.findAll('input')))
        rc = [x for x in self.input_controls if x.type == 'radio']
        self.input_controls = [x for x in self.input_controls if x.type != 'radio']
        rc_names = {x.name for x in rc}
        self.radio_controls = {name:RadioControl(name, [x.qwe for x in rc if x.name == name]) for name in rc_names}
    def __getitem__(self, key):
        for x in self.input_controls:
            if key == x.name:
                return x
        try:
            return self.radio_controls.get(key)
        except KeyError:
            pass
        raise KeyError('No control with the name %s in this form'%key)
    def __repr__(self):
        attrs = ['%s=%s'%(k, v) for k, v in self.attributes.iteritems()]
        return '<form %s>'%(' '.join(attrs))
    def submit_control(self, submit_control_selector=None):
        if submit_control_selector is not None:
            sc = self.qwe.findFirst(submit_control_selector)
            if not sc.isNull():
                return sc
        for c in self.input_controls:
            if c.type == 'submit':
                return c
        for c in self.input_controls:
            if c.type == 'image':
                return c
 class FormsMixin(object):
    def __init__(self):
        self.current_form = None
    def find_form(self, css2_selector=None, nr=None):
        mf = self.page.mainFrame()
        if css2_selector is not None:
            candidate = mf.findFirstElement(css2_selector)
            if not candidate.isNull():
                return Form(candidate)
        if nr is not None and int(nr) > -1:
            nr = int(nr)
            forms = mf.findAllElements('form')
            if nr < forms.count():
                return Form(forms.at(nr))
    def all_forms(self):
        '''
        Return all forms present in the current page.
        '''
        mf = self.page.mainFrame()
        return list(map(Form, mf.findAllElements('form').toList()))
    def select_form(self, css2_selector=None, nr=None):
        '''
        Select a form for further processing. Specify the form either with
        css2_selector or nr. Raises ValueError if no matching form is found.
        :param css2_selector: A CSS2 selector, for example:
                    'form[action="/accounts/login"]' or 'form[id="loginForm"]'
        :param nr: An integer >= 0. Selects the nr'th form in the current page.
        '''
        self.current_form = self.find_form(css2_selector=css2_selector, nr=nr)
        if self.current_form is None:
            raise ValueError('No such form found')
        return self.current_form
    def submit(self, submit_control_selector=None, ajax_replies=0, timeout=30.0):
        if self.current_form is None:
            raise ValueError('No form selected, use select_form() first')
        sc = self.current_form.submit_control(submit_control_selector)
        if sc is None:
            raise ValueError('No submit control found in the current form')
        self.current_form = None
        self.click(sc.qwe, ajax_replies=ajax_replies, timeout=timeout)
--- a/src/calibre/web/jsbrowser/test.py
+++ b/src/calibre/web/jsbrowser/test.py
@ -0,0 +1,131 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import unittest, pprint, threading
 import cherrypy
 from calibre.web.jsbrowser.browser import Browser
 class Server(object):
    def __init__(self):
        self.form_data = {}
    @cherrypy.expose
    def index(self):
        return '''
    <html>
    <head><title>JS Browser test</title></head>
    <body>
    <form id="controls_test" method="post" action="controls_test">
        <h3>Test controls</h3>
        <div><label>Simple Text:</label><input type="text" name="text"/></div>
        <div><label>Password:</label><input type="password" name="password"/></div>
        <div><label>Checked Checkbox:</label><input type="checkbox" checked="checked" name="checked_checkbox"/></div>
        <div><label>UnChecked Checkbox:</label><input type="checkbox" name="unchecked_checkbox"/></div>
        <div><input type="radio" name="sex" value="male" checked="checked" /> Male</div>
        <div><input type="radio" name="sex" value="female" /> Female</div>
        <div><input type="submit" value="Submit" /></div>
    </form>
    <form id="image_test" method="post" action="controls_test">
        <h3>Test Image submit</h3>
        <div><label>Simple Text:</label><input type="text" name="text" value="Image Test" /></div>
        <input type="image" src="button_image" alt="Submit" />
    </form>
    </body>
    </html>
    '''
    @cherrypy.expose
    def controls_test(self, **kwargs):
        self.form_data = kwargs.copy()
        #pprint.pprint(kwargs)
        return pprint.pformat(kwargs)
    @cherrypy.expose
    def button_image(self):
        cherrypy.response.headers['Content-Type'] = 'image/png'
        return I('next.png', data=True)
 class Test(unittest.TestCase):
    @classmethod
    def run_server(cls):
        cherrypy.engine.start()
        try:
            cherrypy.engine.block()
        except:
            pass
    @classmethod
    def setUpClass(cls):
        cls.port = 17983
        cls.server = Server()
        cherrypy.config.update({
            'log.screen'             : False,
            'checker.on'             : False,
            'engine.autoreload_on'   : False,
            'request.show_tracebacks': True,
            'server.socket_host'     : b'127.0.0.1',
            'server.socket_port'     : cls.port,
            'server.socket_timeout'  : 10, #seconds
            'server.thread_pool'     : 1, # number of threads
            'server.shutdown_timeout': 0.1, # minutes
        })
        cherrypy.tree.mount(cls.server, '/', config={'/':{}})
        cls.server_thread = threading.Thread(target=cls.run_server)
        cls.server_thread.daemon = True
        cls.server_thread.start()
        cls.browser = Browser(verbosity=0)
    @classmethod
    def tearDownClass(cls):
        cherrypy.engine.exit()
        cls.browser = None
    def test_control_types(self):
        'Test setting data in the various control types'
        self.assertEqual(self.browser.visit('http://127.0.0.1:%d'%self.port),
                True)
        values = {
                'checked_checkbox'  : (False, None),
                'unchecked_checkbox': (True, 'on'),
                'text': ('some text', 'some text'),
                'password': ('some password', 'some password'),
                'sex': ('female', 'female'),
        }
        f = self.browser.select_form('#controls_test')
        for k, vals in values.iteritems():
            f[k].value = vals[0]
        self.browser.submit()
        dat = self.server.form_data
        for k, vals in values.iteritems():
            self.assertEqual(vals[1], dat.get(k, None),
                    'Field %s: %r != %r'%(k, vals[1], dat.get(k, None)))
    def test_image_submit(self):
        'Test submitting a form with a image as the submit control'
        self.assertEqual(self.browser.visit('http://127.0.0.1:%d'%self.port),
                True)
        self.browser.select_form('#image_test')
        self.browser.submit()
        self.assertEqual(self.server.form_data['text'], 'Image Test')
 def tests():
    return unittest.TestLoader().loadTestsFromTestCase(Test)
 def run():
    unittest.TextTestRunner(verbosity=2).run(tests())
 if __name__ == '__main__':
    run()