From 974740539d8ea1acf5657c6a5bfaae75ea00c3e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Apr 2016 14:52:27 +0530 Subject: [PATCH] Remove the jsbrowser as it used the obsolete Qt WebKit There are still a handful of recipes to be ported, but they will require someone to provide login credentials. Also the Woblink store plugin needs to be ported. --- src/calibre/__init__.py | 4 - src/calibre/srv/TODO | 3 +- src/calibre/utils/browser.py | 4 - src/calibre/web/feeds/jsnews.py | 411 ------------ src/calibre/web/feeds/news.py | 50 +- src/calibre/web/feeds/recipes/__init__.py | 3 +- src/calibre/web/fetch/javascript.py | 267 -------- src/calibre/web/jsbrowser/__init__.py | 10 - src/calibre/web/jsbrowser/browser.py | 740 ---------------------- src/calibre/web/jsbrowser/forms.py | 261 -------- src/calibre/web/jsbrowser/login.py | 56 -- src/calibre/web/jsbrowser/test.py | 240 ------- 12 files changed, 5 insertions(+), 2044 deletions(-) delete mode 100644 src/calibre/web/feeds/jsnews.py delete mode 100644 src/calibre/web/fetch/javascript.py delete mode 100644 src/calibre/web/jsbrowser/__init__.py delete mode 100644 src/calibre/web/jsbrowser/browser.py delete mode 100644 src/calibre/web/jsbrowser/forms.py delete mode 100644 src/calibre/web/jsbrowser/login.py delete mode 100644 src/calibre/web/jsbrowser/test.py diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 0edb673093..4a62a5f565 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -419,10 +419,6 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, return opener -def jsbrowser(*args, **kwargs): - from calibre.web.jsbrowser.browser import Browser - return Browser(*args, **kwargs) - def fit_image(width, height, pwidth, pheight): ''' Fit image in box of width pwidth and height pheight. diff --git a/src/calibre/srv/TODO b/src/calibre/srv/TODO index 10e65b3dac..649d7d3644 100644 --- a/src/calibre/srv/TODO +++ b/src/calibre/srv/TODO @@ -3,8 +3,7 @@ Remove all *content_server_* and server_listen_on tweaks Rewrite server integration with nginx/apache section Remove dependency on cherrypy from download and contribs pages and remove -cherrypy private copy (you will have to re-write jsbrowser.test to not use -cherrypy) +cherrypy private copy Remove the bundled routes package diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py index 945e210638..4136b0092c 100644 --- a/src/calibre/utils/browser.py +++ b/src/calibre/utils/browser.py @@ -60,10 +60,6 @@ class Browser(B): B.set_cookiejar(self, *args, **kwargs) self._clone_actions['set_cookiejar'] = ('set_cookiejar', args, kwargs) - def copy_cookies_from_jsbrowser(self, jsbrowser): - for cookie in jsbrowser.cookies: - self.cookiejar.set_cookie(cookie) - def set_cookie(self, name, value, domain, path='/'): self.cookiejar.set_cookie(Cookie( None, name, value, diff --git a/src/calibre/web/feeds/jsnews.py b/src/calibre/web/feeds/jsnews.py deleted file mode 100644 index c5a6d764d6..0000000000 --- a/src/calibre/web/feeds/jsnews.py +++ /dev/null @@ -1,411 +0,0 @@ -#!/usr/bin/env python2 -# vim:fileencoding=utf-8 -from __future__ import (unicode_literals, division, absolute_import, - print_function) - -__license__ = 'GPL v3' -__copyright__ = '2013, Kovid Goyal ' - -import os, re -from io import BytesIO -from functools import partial - -from calibre import force_unicode, walk -from calibre.constants import __appname__ -from calibre.web.feeds import feeds_from_index -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.fetch.javascript import fetch_page, AbortFetch, links_from_selectors -from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations -from calibre.utils.cleantext import clean_xml_chars - -def image_data_to_url(data, base='cover'): - from calibre.utils.imghdr import what - ans = BytesIO(data) - ext = what(None, data) - if not ext: - if data.startswith(b'%PDF-'): - ext = 'pdf' - else: - ext = 'jpg' - ans.name = 'cover.' + ext - return ans - - -class JavascriptRecipe(BasicNewsRecipe): - - ''' - - This recipe class is used to download content from javascript heavy - sites. It uses a full WebKit browser to do the downloading, therefore it - can support sites that use javascript to dynamically fetch content. - - Most of the parameters from :class:`BasicNewsRecipe` still apply, apart - from those noted specifically below. The biggest difference is that you use - CSS selectors to specify tags to keep and remove as well as links to - follow, instead of the BeautifulSoup selectors used in - :class:`BasicNewsRecipe`. Indeed, BeautifulSoup has been completely removed - and replaced by lxml, whereever you previously expected BeautifulSoup to - represent parsed HTML, you will now get lxml trees. See - http://lxml.de/tutorial.html for a tutorial on using lxml. - - The various article pre-processing callbacks such as ``preprocess_html()`` - and ``skip_ad_pages()`` have all been replaced by just two callbacks, - :meth:`preprocess_stage1` and :meth:`preprocess_stage2`. These methods are - a passed the browser instance, and can thus do anything they like. - - An important method that you will often have to implement is - :meth:`load_complete` to tell the download system when a page has finished - loading and is ready to be scraped. - - You can use the builtin recipe for time.com as an example of the usage of - this class. - ''' - - #: Minimum calibre version needed to use this recipe - requires_version = (0, 9, 35) - - #: List of tags to be removed. Specified tags are removed from downloaded HTML. - #: A tag is specified using CSS selectors. - #: A common example:: - #: - #: remove_tags = ['div.advert', 'div.tools'] - #: - #: This will remove all `