py3: port use of urllib in recipes

2025-07-09 03:04:10 -04:00 · 2019-04-01 13:57:21 +05:30 · 2019-04-01 13:57:21 +05:30 · 569c576293
commit 569c576293
parent 930376c036
36 changed files with 178 additions and 119 deletions
--- a/recipes/am730.recipe
+++ b/recipes/am730.recipe
@ -11,7 +11,10 @@ Change Log:
 2013/03/30 -- first version
 '''
-import urllib
+try:
    from urllib.parse import unquote
 except ImportError:
    from urllib import unquote
 from calibre.web.feeds.recipes import BasicNewsRecipe
@ -59,7 +62,7 @@ class AM730(BasicNewsRecipe):
                continue  # not in same section
            title = href.split('/')[-1].split('-')[0]
-            title = urllib.unquote(title.encode('ASCII'))  # .decode('utf-8')
+            title = unquote(title.encode('ASCII'))  # .decode('utf-8')
            if self.debug:
                print(title)
            try:
--- a/recipes/ambito_financiero.recipe
+++ b/recipes/ambito_financiero.recipe
@ -9,7 +9,10 @@ http://www.ambito.com/diario/
 '''
 import time
-import urllib
+try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@ -66,7 +69,7 @@ class Ambito_Financiero(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
-            postdata = urllib.urlencode({
+            postdata = urlencode({
                'txtUser': self.username,
                'txtPassword': self.password
            })
@ -110,7 +113,7 @@ class Ambito_Financiero(BasicNewsRecipe):
        if self.session_id:
            l, s, r = url.rpartition('.html')
            o, s1, artid = l.rpartition('_')
-            postdata = urllib.urlencode({'id': artid, 'id_session': self.session_id})
+            postdata = urlencode({'id': artid, 'id_session': self.session_id})
            response = self.browser.open(
                'http://data.ambito.com/diario/cuerpo_noticia.asp',
                data=postdata,
@ -128,7 +131,7 @@ class Ambito_Financiero(BasicNewsRecipe):
    def cleanup(self):
        if self.session_id is not None:
-            postdata = urllib.urlencode({'session_id': self.session_id})
+            postdata = urlencode({'session_id': self.session_id})
            self.browser.open(
                'http://www.ambito.com/diario/no-cache/login/x_logout.asp', data=postdata, timeout=self.timeout
            )
--- a/recipes/azstarnet.recipe
+++ b/recipes/azstarnet.recipe
@ -4,7 +4,10 @@ __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 azstarnet.com
 '''
-import urllib
+try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from calibre.web.feeds.news import BasicNewsRecipe
@ -31,7 +34,7 @@ class Azstarnet(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open('http://azstarnet.com/')
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
+            data = urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
                                     })
            br.open('http://azstarnet.com/app/registration/proxy.php', data)
        return br
--- a/recipes/barrons.recipe
+++ b/recipes/barrons.recipe
@ -7,6 +7,9 @@ from __future__ import (unicode_literals, division, absolute_import,
 import json
 from mechanize import Request
 try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
 from calibre.web.feeds.news import BasicNewsRecipe
@ -26,7 +29,7 @@ class Barrons(BasicNewsRecipe):
    timefmt = ' [%a, %b %d, %Y]'
    use_embedded_content = False
    no_stylesheets = True
-    match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
+    match_regexps = ['http://online.barrons.com/.*?html\\?mod=.*?|file:.*']
    conversion_options = {'linearize_tables': True}
    # Don't grab articles more than 7 days old
--- a/recipes/brecha.recipe
+++ b/recipes/brecha.recipe
@ -6,7 +6,10 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 www.brecha.com.uy
 '''
-import urllib
+try:
    from urllib.parse import urlencode, quote
 except ImportError:
    from urllib import urlencode, quote
 from calibre.web.feeds.news import BasicNewsRecipe
@ -40,7 +43,7 @@ class Brecha(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open('http://www.brecha.com.uy/index.php/acceder-miembros')
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
+            data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
                                     })
            br.open(
                'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data)
@ -74,5 +77,5 @@ class Brecha(BasicNewsRecipe):
        soup = self.index_to_soup('http://www.brecha.com.uy/index.php')
        for image in soup.findAll('img', alt=True):
            if image['alt'].startswith('Tapa '):
-                return 'http://www.brecha.com.uy' + urllib.quote(image['src'])
+                return 'http://www.brecha.com.uy' + quote(image['src'])
        return None
--- a/recipes/clarin.recipe
+++ b/recipes/clarin.recipe
@ -9,7 +9,10 @@ __copyright__ = '2008-2016, Darko Miletic <darko.miletic at gmail.com>'
 clarin.com
 '''
-import urllib
+try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@ -76,7 +79,7 @@ class Clarin(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm',
+            data = urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm',
                                     'ingresar_ingresar_email_paseInputComponent': self.username,
                                     'ingresar_ingresar_palabraClave_paseInputComponent': self.password,
                                     'ingresar_ingresar_ingresar_paseButton': 'Ingresar',
--- a/recipes/economia.recipe
+++ b/recipes/economia.recipe
@ -1,4 +1,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
--- a/recipes/estadao.recipe
+++ b/recipes/estadao.recipe
@ -1,9 +1,8 @@
 from __future__ import print_function
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import datetime, timedelta
-from calibre.ebooks.BeautifulSoup import Tag, BeautifulSoup
+from calibre.ebooks.BeautifulSoup import Tag
 from calibre.utils.magick import Image, PixelWand
 from urllib2 import Request, urlopen, URLError
 def new_tag(soup, name, attrs=()):
@ -113,20 +112,14 @@ class Estadao(BasicNewsRecipe):
    def get_cover_url(self):
        if self.THUMBALIZR_API:
            cover_url = self.CAPA
            pedido = Request(self.CAPA)
            pedido.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; ' +
                              self.LANGHTM + '; userid=' + self.THUMBALIZR_API + ') Calibre/0.8.47 (like Gecko)')
            pedido.add_header('Accept-Charset', self.ENCHTM)
            pedido.add_header('Referer', self.SCREENSHOT)
            try:
-                resposta = urlopen(pedido)
+                soup = self.index_to_soup(cover_url)
                soup = BeautifulSoup(resposta)
                cover_item = soup.find('body')
                if cover_item:
                    cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
                        '&url=' + self.SCREENSHOT + '&width=600&quality=90'
                return cover_url
-            except URLError:
+            except Exception:
                cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
                    '&url=' + self.SCREENSHOT + '&width=600&quality=90'
                return cover_url
--- a/recipes/folha.recipe
+++ b/recipes/folha.recipe
@ -5,8 +5,11 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.folha.uol.com.br
 '''
 import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
 try:
    from urllib.parse import quote_plus
 except ImportError:
    from urllib import quote_plus
 class Folha_de_s_paulo(BasicNewsRecipe):
@ -69,7 +72,7 @@ class Folha_de_s_paulo(BasicNewsRecipe):
        return curl
    def print_version(self, url):
-        return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
+        return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + quote_plus(url)
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.folha.uol.com.br/')
--- a/recipes/galaxys_edge.recipe
+++ b/recipes/galaxys_edge.recipe
@ -4,8 +4,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import re
 import shutil
 import urllib
 try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
@ -46,7 +49,7 @@ class AdvancedUserRecipe1515196393(BasicNewsRecipe):
            self.log('\t\tdata-parent-id', parent_id)
            self.log('\t\tdata-cat-id', cat_id)
            self.log('\t\tdata-post-id', post_id)
-            data = urllib.urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id})
+            data = urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id})
            r=br.open('http://www.galaxysedge.com/wp-content/themes/galaxyedge/get_content.php', data)
            content_file = PersistentTemporaryFile(suffix='.html', dir=self.ctdir)
            content_file.write(r.read())
--- a/recipes/haaretz_en.recipe
+++ b/recipes/haaretz_en.recipe
@ -4,8 +4,11 @@ __copyright__ = '2010-2015, Darko Miletic <darko.miletic at gmail.com>'
 www.haaretz.com
 '''
 import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
 try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 class Haaretz_en(BasicNewsRecipe):
@ -62,7 +65,7 @@ class Haaretz_en(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.PREFIX)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password  # noqa
+            data = urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password  # noqa
                                     })
            br.open('https://sso.haaretz.com/sso/sso/signIn', data)
        return br
--- a/recipes/harpers_full.recipe
+++ b/recipes/harpers_full.recipe
@ -15,7 +15,10 @@ anything in username/password fields
 import time
 import re
-import urllib
+try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@ -61,7 +64,7 @@ class Harpers_full(BasicNewsRecipe):
        br.open('https://harpers.org/')
        if self.username is not None and self.password is not None:
            tt = time.localtime() * 1000
-            data = urllib.urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt
+            data = urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt
                                     })
            br.open(self.LOGIN, data)
        return br
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@ -5,8 +5,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from css_selectors import Select
 from mechanize import Request
 from urllib import urlencode
 import json
 try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 class HBR(BasicNewsRecipe):
--- a/recipes/houston_chronicle.recipe
+++ b/recipes/houston_chronicle.recipe
@ -8,8 +8,6 @@ chron.com
 '''
 import re
 import time
 import urllib2
 import io
 from datetime import datetime
 import traceback
 import sys
@ -19,8 +17,6 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.ebooks.BeautifulSoup import NavigableString
 from calibre.utils.date import dt_factory, local_tz
 from lxml import html
 from lxml import etree
 regex_date_only = re.compile(r"""(?:January|February|March|April|
 {8}May|June|July|August|September|October|November|
@ -62,12 +58,8 @@ def validate_link(page, link, title):
    return link, title
-def get_article_parsed(this_url):
+def get_article_parsed(index_to_soup, this_url):
-    page = urllib2.urlopen(this_url)
+    return index_to_soup(this_url, as_tree=True)
    content = page.read()
    parser = etree.HTMLParser()
    parsed = html.parse(io.BytesIO(bytes(content)), parser)
    return parsed
 def sort_subject(element_list):
@ -91,8 +83,8 @@ def sort_subject(element_list):
    return combined_list
-def get_links_from_section_page(page):
+def get_links_from_section_page(index_to_soup, page):
-    page_doc = get_article_parsed(base_url + page[1][0])
+    page_doc = get_article_parsed(index_to_soup, base_url + page[1][0])
    els = page_doc.xpath(xpath_general)
    element_list = []
    for el in els:
@ -110,13 +102,13 @@ def get_links_from_section_page(page):
    return [page[0], sorted_element_list]
-def get_all_links_from_sections():
+def get_all_links_from_sections(index_to_soup):
    all_sections = []
    article_set = set()
    final_dict = OrderedDict()
    for item in pages.items():
        print("getting links from {0}".format(item[0]))
-        all_sections.append(get_links_from_section_page(item))
+        all_sections.append(get_links_from_section_page(index_to_soup, item))
    for section in all_sections:
        section_id = section[0]
        article_list = section[1]
@ -232,7 +224,7 @@ class HoustonChronicle(BasicNewsRecipe):
        self.timefmt = ' [%a, %d %b, %Y]'
        self.log('starting parse_index: ', time.strftime(self.timestampfmt))
        feeds = []
-        sections = get_all_links_from_sections()
+        sections = get_all_links_from_sections(self.index_to_soup)
        for section_id, article_list in sections.items():
            self.log("Getting {0} section, {1:d} articles".format(section_id, len(article_list)))
            articles = []
--- a/recipes/irish_times.recipe
+++ b/recipes/irish_times.recipe
@ -7,6 +7,9 @@ import urlparse, re
 import json
 from uuid import uuid4
 from mechanize import Request
 try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from calibre.web.feeds.news import BasicNewsRecipe
--- a/recipes/jbpress.recipe
+++ b/recipes/jbpress.recipe
@ -1,4 +1,7 @@
-import urllib2
+try:
    from urllib.request import urlopen
 except ImportError:
    from urllib2 import urlopen
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -39,7 +42,7 @@ class JBPress(BasicNewsRecipe):
        return br
    def print_version(self, url):
-        url = urllib2.urlopen(url).geturl()  # resolve redirect.
+        url = urlopen(url).geturl()  # resolve redirect.
        return url.replace('/-/', '/print/')
    def preprocess_html(self, soup):
--- a/recipes/la_jornada.recipe
+++ b/recipes/la_jornada.recipe
@ -5,6 +5,9 @@ www.jornada.unam.mx
 '''
 import re
 try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from urlparse import urlparse, urlunparse, parse_qs
 from calibre import strftime
--- a/recipes/lanacion_chile.recipe
+++ b/recipes/lanacion_chile.recipe
@ -5,7 +5,10 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lanacion.cl
 '''
-import urllib
+try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
 from calibre.web.feeds.news import BasicNewsRecipe
@ -36,7 +39,7 @@ class LaNacionChile(BasicNewsRecipe):
    feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
    def print_version(self, url):
-        toprint = urllib.quote(url, ':/')
+        toprint = quote(url, ':/')
        return u'http://www.lanacion.cl/cgi-bx/imprimir.cgi?_URL=' + toprint
    def preprocess_html(self, soup):
--- a/recipes/le_monde_sub_paper.recipe
+++ b/recipes/le_monde_sub_paper.recipe
@ -7,7 +7,6 @@ Lemonde.fr: Version abonnée
 '''
 import os, zipfile, re, time
 from urllib2 import HTTPError
 from calibre.constants import preferred_encoding
 from calibre.web.feeds.news import BasicNewsRecipe
@ -97,7 +96,7 @@ class LeMondeAbonne(BasicNewsRecipe):
            try:
                response = browser.open(url)
                continue
-            except HTTPError:
+            except Exception:
                second -= 24 * 60 * 60
        tmp = PersistentTemporaryFile(suffix='.zip')
--- a/recipes/lemonde_dip.recipe
+++ b/recipes/lemonde_dip.recipe
@ -4,7 +4,10 @@ __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 mondediplo.com
 '''
-import urllib
+try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@ -43,7 +46,7 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.LOGIN)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter'
+            data = urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter'
                                     })
            br.open(self.LOGIN, data)
        return br
--- a/recipes/modoros.recipe
+++ b/recipes/modoros.recipe
@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.constants import config_dir, CONFIG_DIR_MODE
 import os
 import os.path
-import urllib
+try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
 from hashlib import md5
@ -62,7 +65,7 @@ class ModorosBlogHu(BasicNewsRecipe):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for feed in feeds:
-            feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='')
+            feed_hash = quote(feed.title.encode('utf-8'), safe='')
            feed_fn = os.path.join(feed_dir, feed_hash)
            past_items = set()
--- a/recipes/now_toronto.recipe
+++ b/recipes/now_toronto.recipe
@ -6,7 +6,6 @@ __license__ = 'GPL v3'
 __copyright__ = '2010, Starson17'
 import os
 import urllib2
 import zipfile
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
@ -25,10 +24,10 @@ class NowToronto(BasicNewsRecipe):
        epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
        soup = self.index_to_soup(epub_feed)
        url = soup.find(name='feedburner:origlink').string
-        f = urllib2.urlopen(url)
+        raw = self.index_to_soup(url, raw=True)
        tmp = PersistentTemporaryFile(suffix='.epub')
        self.report_progress(0, _('downloading epub'))
-        tmp.write(f.read())
+        tmp.write(raw)
        tmp.close()
        zfile = zipfile.ZipFile(tmp.name, 'r')
        self.report_progress(0, _('extracting epub'))
--- a/recipes/nursingtimes.recipe
+++ b/recipes/nursingtimes.recipe
@ -4,7 +4,11 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 www.nursingtimes.net
 '''
-import urllib
+try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from calibre.web.feeds.recipes import BasicNewsRecipe
@ -31,7 +35,7 @@ class NursingTimes(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.LOGIN)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13'  # noqa
+            data = urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13'  # noqa
                                     })
            br.open(self.LOGIN, data)
        return br
--- a/recipes/oc_register.recipe
+++ b/recipes/oc_register.recipe
@ -3,7 +3,6 @@
 from __future__ import unicode_literals, division, absolute_import, print_function
 import time
 import json
 import urllib
 from pprint import pprint
 from calibre.web.feeds.news import BasicNewsRecipe
@ -87,9 +86,7 @@ class OrangeCountyRegister(BasicNewsRecipe):
        return cleanedHTML
    def loadURL(self, url):
-        socket = urllib.urlopen(url)
+        return self.index_to_soup(url, raw=True)
        rawHTML = socket.read()
        return rawHTML
    def htmlToAttribsDict(self, rawHTML):
        tokenStart = 'dataLayer.push({'
--- a/recipes/office_space.recipe
+++ b/recipes/office_space.recipe
@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.constants import config_dir, CONFIG_DIR_MODE
 import os
 import os.path
-import urllib
+try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
 from hashlib import md5
@ -86,7 +89,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for feed in feeds:
-            feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='')
+            feed_hash = quote(feed.title.encode('utf-8'), safe='')
            feed_fn = os.path.join(feed_dir, feed_hash)
            past_items = set()
--- a/recipes/orlando_sentinel.recipe
+++ b/recipes/orlando_sentinel.recipe
@ -1,5 +1,8 @@
 import urllib
 import re
 try:
    from urllib.parse import unquote
 except ImportError:
    from urllib import unquote
 from calibre.web.feeds.news import BasicNewsRecipe
@ -37,7 +40,7 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe):
        ans = None
        try:
            s = article.summary
-            ans = urllib.unquote(
+            ans = unquote(
                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
        except:
            pass
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -8,8 +8,14 @@ import json
 import operator
 import re
 import tempfile
-import urllib
+try:
-import urllib2
+    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 try:
    from urllib.error import HTTPError, URLError
 except ImportError:
    from urllib2 import HTTPError, URLError
 __license__ = 'GPL v3'
@ -99,18 +105,8 @@ class Pocket(BasicNewsRecipe):
            self.get_auth_uri(),
            self.get_pull_articles_uri()
        )
-        try:
+        data = self.index_to_soup(fetch_url, raw=True)
-            request = urllib2.Request(fetch_url)
+        pocket_feed = json.loads(data)['list']
            response = urllib2.urlopen(request)
            pocket_feed = json.load(response)['list']
        except urllib2.HTTPError as e:
            self.log.exception(
                "Pocket returned an error: {0}".format(e.info()))
            return []
        except urllib2.URLError as e:
            self.log.exception(
                "Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
            return []
        if len(pocket_feed) < self.minimum_articles:
            self.mark_as_read_after_dl = False
@ -143,10 +139,10 @@ class Pocket(BasicNewsRecipe):
        fc_tag = soup.find('script', text=re.compile("formCheck"))
        fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
        article_id = url.split("/")[-1]
-        data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id})
+        data = urlencode({'itemId': article_id, 'formCheck': fc_id})
        try:
            response = self.browser.open(ajax_url, data)
-        except urllib2.HTTPError as e:
+        except HTTPError as e:
            self.log.exception("unable to get textview {0}".format(e.info()))
            raise e
        return json.load(response)['article']
@ -186,13 +182,12 @@ class Pocket(BasicNewsRecipe):
            self.get_auth_uri()
        )
        try:
-            request = urllib2.Request(mark_read_url)
+            self.browser.open_novisit(mark_read_url)
-            urllib2.urlopen(request)
+        except HTTPError as e:
        except urllib2.HTTPError as e:
            self.log.exception(
                'Pocket returned an error while archiving articles: {0}'.format(e))
            return []
-        except urllib2.URLError as e:
+        except URLError as e:
            self.log.exception(
                "Unable to connect to getpocket.com's modify api: {0}".format(e))
            return []
--- a/recipes/roger_ebert.recipe
+++ b/recipes/roger_ebert.recipe
@ -1,5 +1,4 @@
 import re
 import urllib2
 from calibre.web.feeds.news import BasicNewsRecipe
@ -54,7 +53,7 @@ class Ebert(BasicNewsRecipe):
            self.report_progress(0, _('Fetching feed') + ' %s...' %
                                 (feedtitle if feedtitle else feedurl))
            articles = []
-            page = urllib2.urlopen(feedurl).read()
+            page = self.index_to_soup(feedurl, raw=True)
            if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
                pattern = self.patternReviews
--- a/recipes/roger_ebert_blog.recipe
+++ b/recipes/roger_ebert_blog.recipe
@ -1,5 +1,4 @@
 import re
 import urllib2
 import time
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre import strftime
@ -68,7 +67,7 @@ class Ebert(BasicNewsRecipe):
            self.report_progress(0, _('Fetching feed') + ' %s...' %
                                 (feedtitle if feedtitle else feedurl))
            articles = []
-            page = urllib2.urlopen(feedurl).read()
+            page = self.index_to_soup(feedurl, raw=True)
            if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
                pattern = self.patternReviews
--- a/recipes/sunday_times_magazine.recipe
+++ b/recipes/sunday_times_magazine.recipe
@ -3,8 +3,11 @@ __copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.thetimes.co.uk/magazine/the-sunday-times-magazine/
 '''
 import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
 try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 def classes(classes):
@ -49,7 +52,7 @@ class TimesOnline(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open('http://www.thetimes.co.uk/')
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({
+            data = urlencode({
                'gotoUrl': self.INDEX,
                'username': self.username,
                'password': self.password})
--- a/recipes/taz.recipe
+++ b/recipes/taz.recipe
@ -9,10 +9,14 @@ __docformat__ = 'restructuredtext de'
 www.taz.de/digiabo
 '''
 import os
 import urllib2
 import zipfile
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
 try:
    from urllib.request import HTTPBasicAuthHandler, build_opener, install_opener, urlopen
    from urllib.error import HTTPError
 except ImportError:
    from urllib2 import HTTPBasicAuthHandler, build_opener, install_opener, urlopen, HTTPError
 class TazDigiabo(BasicNewsRecipe):
@ -34,17 +38,17 @@ class TazDigiabo(BasicNewsRecipe):
        url = domain + "/epub/"
-        auth_handler = urllib2.HTTPBasicAuthHandler()
+        auth_handler = HTTPBasicAuthHandler()
        auth_handler.add_password(realm='TAZ-ABO',
                                  uri=url,
                                  user=self.username,
                                  passwd=self.password)
-        opener = urllib2.build_opener(auth_handler)
+        opener = build_opener(auth_handler)
-        urllib2.install_opener(opener)
+        install_opener(opener)
        try:
-            f = urllib2.urlopen(url)
+            f = urlopen(url)
-        except urllib2.HTTPError:
+        except HTTPError:
            self.report_progress(0, _('Can\'t login to download issue'))
            raise ValueError('Failed to login, check your username and'
                             ' password')
--- a/recipes/thenewcriterion.recipe
+++ b/recipes/thenewcriterion.recipe
@ -9,9 +9,12 @@ __copyright__ = '2019, Darko Miletic <darko.miletic at gmail.com>'
 www.newcriterion.com
 '''
-import urllib
+try:
-import urllib2
+    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 import re
 from mechanize import Request
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
@ -52,7 +55,7 @@ class TheNewCriterion(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open('https://www.newcriterion.com/')
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'login': self.username, 'password': self.password})
+            data = urlencode({'login': self.username, 'password': self.password})
            header = {
                'X-OCTOBER-REQUEST-HANDLER': 'onSignin',
                'X-Requested-With': 'XMLHttpRequest',
@ -60,7 +63,7 @@ class TheNewCriterion(BasicNewsRecipe):
                'X-OCTOBER-REQUEST-PARTIALS':'',
                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
            }
-            request = urllib2.Request('https://www.newcriterion.com/', data, header)
+            request = Request('https://www.newcriterion.com/', data, header)
            br.open(request)
        return br
--- a/recipes/times_online.recipe
+++ b/recipes/times_online.recipe
@ -3,8 +3,11 @@ __copyright__ = '2010-2017, Bobby Steel <bob at xdca.com>, Darko Miletic'
 '''
 www.thetimes.co.uk
 '''
 import urllib
 import html5lib
 try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from lxml import html
 from calibre.web.feeds.news import BasicNewsRecipe
@ -79,7 +82,7 @@ class TimesOnline(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open('http://www.thetimes.co.uk/')
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({
+            data = urlencode({
                'gotoUrl': self.INDEX,
                'username': self.username,
                'password': self.password})
--- a/recipes/tomshardware.recipe
+++ b/recipes/tomshardware.recipe
@ -4,7 +4,10 @@ __copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
 tomshardware.com/us
 '''
-import urllib
+try:
    from urllib.parse import urlencode
 except ImportError:
    from urllib import urlencode
 from calibre.web.feeds.recipes import BasicNewsRecipe
@ -30,7 +33,7 @@ class Tomshardware(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.INDEX + '/us/')
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
+            data = urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
                                     })
            br.open(self.LOGIN, data)
        return br
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@ -5,8 +5,11 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 import json
 from urllib import quote
 try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
 from mechanize import Request
 from calibre import random_user_agent
--- a/recipes/wsj_free.recipe
+++ b/recipes/wsj_free.recipe
@ -5,6 +5,9 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 import json
 try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
 from mechanize import Request