From 569c5762936cb61bc333c051e92b8c76eb2c4cf2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 1 Apr 2019 13:57:21 +0530
Subject: [PATCH] py3: port use of urllib in recipes

---
 recipes/am730.recipe                 |  7 ++++--
 recipes/ambito_financiero.recipe     | 11 +++++----
 recipes/azstarnet.recipe             |  9 ++++---
 recipes/barrons.recipe               |  7 ++++--
 recipes/brecha.recipe                |  9 ++++---
 recipes/clarin.recipe                |  7 ++++--
 recipes/economia.recipe              |  5 +++-
 recipes/estadao.recipe               | 13 +++--------
 recipes/folha.recipe                 |  7 ++++--
 recipes/galaxys_edge.recipe          |  7 ++++--
 recipes/haaretz_en.recipe            |  7 ++++--
 recipes/harpers_full.recipe          |  7 ++++--
 recipes/hbr.recipe                   |  5 +++-
 recipes/houston_chronicle.recipe     | 22 ++++++-----------
 recipes/irish_times.recipe           |  5 +++-
 recipes/jbpress.recipe               |  9 ++++---
 recipes/la_jornada.recipe            |  5 +++-
 recipes/lanacion_chile.recipe        |  7 ++++--
 recipes/le_monde_sub_paper.recipe    |  3 +--
 recipes/lemonde_dip.recipe           |  7 ++++--
 recipes/modoros.recipe               |  7 ++++--
 recipes/now_toronto.recipe           |  5 ++--
 recipes/nursingtimes.recipe          |  8 +++++--
 recipes/oc_register.recipe           |  5 +---
 recipes/office_space.recipe          |  7 ++++--
 recipes/orlando_sentinel.recipe      |  7 ++++--
 recipes/readitlater.recipe           | 35 ++++++++++++----------------
 recipes/roger_ebert.recipe           |  3 +--
 recipes/roger_ebert_blog.recipe      |  3 +--
 recipes/sunday_times_magazine.recipe |  7 ++++--
 recipes/taz.recipe                   | 16 ++++++++-----
 recipes/thenewcriterion.recipe       | 11 +++++----
 recipes/times_online.recipe          |  7 ++++--
 recipes/tomshardware.recipe          |  7 ++++--
 recipes/wsj.recipe                   |  5 +++-
 recipes/wsj_free.recipe              |  5 +++-
 36 files changed, 178 insertions(+), 119 deletions(-)

diff --git a/recipes/am730.recipe b/recipes/am730.recipe
index b670aa6a92..3885a9f585 100644
--- a/recipes/am730.recipe
+++ b/recipes/am730.recipe
@@ -11,7 +11,10 @@ Change Log:
 2013/03/30 -- first version
 '''
 
-import urllib
+try:
+    from urllib.parse import unquote
+except ImportError:
+    from urllib import unquote
 
 from calibre.web.feeds.recipes import BasicNewsRecipe
 
@@ -59,7 +62,7 @@ class AM730(BasicNewsRecipe):
                 continue  # not in same section
 
             title = href.split('/')[-1].split('-')[0]
-            title = urllib.unquote(title.encode('ASCII'))  # .decode('utf-8')
+            title = unquote(title.encode('ASCII'))  # .decode('utf-8')
             if self.debug:
                 print(title)
             try:
diff --git a/recipes/ambito_financiero.recipe b/recipes/ambito_financiero.recipe
index 12d75bfa21..4e2e968cbd 100644
--- a/recipes/ambito_financiero.recipe
+++ b/recipes/ambito_financiero.recipe
@@ -9,7 +9,10 @@ http://www.ambito.com/diario/
 '''
 
 import time
-import urllib
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@@ -66,7 +69,7 @@ class Ambito_Financiero(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open(self.INDEX)
         if self.username is not None and self.password is not None:
-            postdata = urllib.urlencode({
+            postdata = urlencode({
                 'txtUser': self.username,
                 'txtPassword': self.password
             })
@@ -110,7 +113,7 @@ class Ambito_Financiero(BasicNewsRecipe):
         if self.session_id:
             l, s, r = url.rpartition('.html')
             o, s1, artid = l.rpartition('_')
-            postdata = urllib.urlencode({'id': artid, 'id_session': self.session_id})
+            postdata = urlencode({'id': artid, 'id_session': self.session_id})
             response = self.browser.open(
                 'http://data.ambito.com/diario/cuerpo_noticia.asp',
                 data=postdata,
@@ -128,7 +131,7 @@ class Ambito_Financiero(BasicNewsRecipe):
 
     def cleanup(self):
         if self.session_id is not None:
-            postdata = urllib.urlencode({'session_id': self.session_id})
+            postdata = urlencode({'session_id': self.session_id})
             self.browser.open(
                 'http://www.ambito.com/diario/no-cache/login/x_logout.asp', data=postdata, timeout=self.timeout
             )
diff --git a/recipes/azstarnet.recipe b/recipes/azstarnet.recipe
index 153d0a56e3..81918bdd99 100644
--- a/recipes/azstarnet.recipe
+++ b/recipes/azstarnet.recipe
@@ -4,12 +4,15 @@ __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 azstarnet.com
 '''
-import urllib
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 from calibre.web.feeds.news import BasicNewsRecipe
 
 
 class Azstarnet(BasicNewsRecipe):
-    title = 'Arizona  Daily Star'
+    title = 'Arizona Daily Star'
     __author__ = 'Darko Miletic'
     description = 'news from Arizona'
     language = 'en'
@@ -31,7 +34,7 @@ class Azstarnet(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open('http://azstarnet.com/')
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
+            data = urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
                                      })
             br.open('http://azstarnet.com/app/registration/proxy.php', data)
         return br
diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe
index 7b57fa79a2..c4e72f8358 100644
--- a/recipes/barrons.recipe
+++ b/recipes/barrons.recipe
@@ -7,7 +7,10 @@ from __future__ import (unicode_literals, division, absolute_import,
 
 import json
 from mechanize import Request
-from urllib import quote
+try:
+    from urllib.parse import quote
+except ImportError:
+    from urllib import quote
 
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -26,7 +29,7 @@ class Barrons(BasicNewsRecipe):
     timefmt = ' [%a, %b %d, %Y]'
     use_embedded_content = False
     no_stylesheets = True
-    match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
+    match_regexps = ['http://online.barrons.com/.*?html\\?mod=.*?|file:.*']
     conversion_options = {'linearize_tables': True}
 
     # Don't grab articles more than 7 days old
diff --git a/recipes/brecha.recipe b/recipes/brecha.recipe
index d5884d2cc6..bb66966598 100644
--- a/recipes/brecha.recipe
+++ b/recipes/brecha.recipe
@@ -6,7 +6,10 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 www.brecha.com.uy
 '''
 
-import urllib
+try:
+    from urllib.parse import urlencode, quote
+except ImportError:
+    from urllib import urlencode, quote
 from calibre.web.feeds.news import BasicNewsRecipe
 
 
@@ -40,7 +43,7 @@ class Brecha(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open('http://www.brecha.com.uy/index.php/acceder-miembros')
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
+            data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
                                      })
             br.open(
                 'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data)
@@ -74,5 +77,5 @@ class Brecha(BasicNewsRecipe):
         soup = self.index_to_soup('http://www.brecha.com.uy/index.php')
         for image in soup.findAll('img', alt=True):
             if image['alt'].startswith('Tapa '):
-                return 'http://www.brecha.com.uy' + urllib.quote(image['src'])
+                return 'http://www.brecha.com.uy' + quote(image['src'])
         return None
diff --git a/recipes/clarin.recipe b/recipes/clarin.recipe
index 616cb7c8cd..ade7bcc4c5 100644
--- a/recipes/clarin.recipe
+++ b/recipes/clarin.recipe
@@ -9,7 +9,10 @@ __copyright__ = '2008-2016, Darko Miletic <darko.miletic at gmail.com>'
 clarin.com
 '''
 
-import urllib
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -76,7 +79,7 @@ class Clarin(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open(self.INDEX)
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm',
+            data = urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm',
                                      'ingresar_ingresar_email_paseInputComponent': self.username,
                                      'ingresar_ingresar_palabraClave_paseInputComponent': self.password,
                                      'ingresar_ingresar_ingresar_paseButton': 'Ingresar',
diff --git a/recipes/economia.recipe b/recipes/economia.recipe
index dc18a93cfd..294e9bd452 100644
--- a/recipes/economia.recipe
+++ b/recipes/economia.recipe
@@ -1,5 +1,8 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-from urllib import quote
+try:
+    from urllib.parse import quote
+except ImportError:
+    from urllib import quote
 
 
 class EconomiaMagazine(BasicNewsRecipe):
diff --git a/recipes/estadao.recipe b/recipes/estadao.recipe
index 5e7bf6b43f..05661c48ca 100644
--- a/recipes/estadao.recipe
+++ b/recipes/estadao.recipe
@@ -1,9 +1,8 @@
 from __future__ import print_function
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import datetime, timedelta
-from calibre.ebooks.BeautifulSoup import Tag, BeautifulSoup
+from calibre.ebooks.BeautifulSoup import Tag
 from calibre.utils.magick import Image, PixelWand
-from urllib2 import Request, urlopen, URLError
 
 
 def new_tag(soup, name, attrs=()):
@@ -113,20 +112,14 @@ class Estadao(BasicNewsRecipe):
     def get_cover_url(self):
         if self.THUMBALIZR_API:
             cover_url = self.CAPA
-            pedido = Request(self.CAPA)
-            pedido.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; ' +
-                              self.LANGHTM + '; userid=' + self.THUMBALIZR_API + ') Calibre/0.8.47 (like Gecko)')
-            pedido.add_header('Accept-Charset', self.ENCHTM)
-            pedido.add_header('Referer', self.SCREENSHOT)
             try:
-                resposta = urlopen(pedido)
-                soup = BeautifulSoup(resposta)
+                soup = self.index_to_soup(cover_url)
                 cover_item = soup.find('body')
                 if cover_item:
                     cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
                         '&url=' + self.SCREENSHOT + '&width=600&quality=90'
                 return cover_url
-            except URLError:
+            except Exception:
                 cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
                     '&url=' + self.SCREENSHOT + '&width=600&quality=90'
                 return cover_url
diff --git a/recipes/folha.recipe b/recipes/folha.recipe
index 7fd9407a38..a74e61c585 100644
--- a/recipes/folha.recipe
+++ b/recipes/folha.recipe
@@ -5,8 +5,11 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.folha.uol.com.br
 '''
-import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
+try:
+    from urllib.parse import quote_plus
+except ImportError:
+    from urllib import quote_plus
 
 
 class Folha_de_s_paulo(BasicNewsRecipe):
@@ -69,7 +72,7 @@ class Folha_de_s_paulo(BasicNewsRecipe):
         return curl
 
     def print_version(self, url):
-        return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
+        return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + quote_plus(url)
 
     def get_cover_url(self):
         soup = self.index_to_soup('http://www.folha.uol.com.br/')
diff --git a/recipes/galaxys_edge.recipe b/recipes/galaxys_edge.recipe
index 29a25ecd93..01ff26eea9 100644
--- a/recipes/galaxys_edge.recipe
+++ b/recipes/galaxys_edge.recipe
@@ -4,8 +4,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 
 import re
 import shutil
-import urllib
 
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -46,7 +49,7 @@ class AdvancedUserRecipe1515196393(BasicNewsRecipe):
             self.log('\t\tdata-parent-id', parent_id)
             self.log('\t\tdata-cat-id', cat_id)
             self.log('\t\tdata-post-id', post_id)
-            data = urllib.urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id})
+            data = urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id})
             r=br.open('http://www.galaxysedge.com/wp-content/themes/galaxyedge/get_content.php', data)
             content_file = PersistentTemporaryFile(suffix='.html', dir=self.ctdir)
             content_file.write(r.read())
diff --git a/recipes/haaretz_en.recipe b/recipes/haaretz_en.recipe
index 85ac3b513d..41bfa0f166 100644
--- a/recipes/haaretz_en.recipe
+++ b/recipes/haaretz_en.recipe
@@ -4,8 +4,11 @@ __copyright__ = '2010-2015, Darko Miletic <darko.miletic at gmail.com>'
 www.haaretz.com
 '''
 
-import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 
 
 class Haaretz_en(BasicNewsRecipe):
@@ -62,7 +65,7 @@ class Haaretz_en(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open(self.PREFIX)
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password  # noqa
+            data = urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password  # noqa
                                      })
             br.open('https://sso.haaretz.com/sso/sso/signIn', data)
         return br
diff --git a/recipes/harpers_full.recipe b/recipes/harpers_full.recipe
index fd7517600c..66bd84e77b 100644
--- a/recipes/harpers_full.recipe
+++ b/recipes/harpers_full.recipe
@@ -15,7 +15,10 @@ anything in username/password fields
 
 import time
 import re
-import urllib
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -61,7 +64,7 @@ class Harpers_full(BasicNewsRecipe):
         br.open('https://harpers.org/')
         if self.username is not None and self.password is not None:
             tt = time.localtime() * 1000
-            data = urllib.urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt
+            data = urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt
                                      })
             br.open(self.LOGIN, data)
         return br
diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe
index 9b1e132f7c..5d27597184 100644
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@@ -5,8 +5,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from css_selectors import Select
 from mechanize import Request
-from urllib import urlencode
 import json
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 
 
 class HBR(BasicNewsRecipe):
diff --git a/recipes/houston_chronicle.recipe b/recipes/houston_chronicle.recipe
index 2a2ea05dc9..9808e9cd51 100644
--- a/recipes/houston_chronicle.recipe
+++ b/recipes/houston_chronicle.recipe
@@ -8,8 +8,6 @@ chron.com
 '''
 import re
 import time
-import urllib2
-import io
 from datetime import datetime
 import traceback
 import sys
@@ -19,8 +17,6 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.ebooks.BeautifulSoup import NavigableString
 from calibre.utils.date import dt_factory, local_tz
-from lxml import html
-from lxml import etree
 
 regex_date_only = re.compile(r"""(?:January|February|March|April|
  {8}May|June|July|August|September|October|November|
@@ -62,12 +58,8 @@ def validate_link(page, link, title):
     return link, title
 
 
-def get_article_parsed(this_url):
-    page = urllib2.urlopen(this_url)
-    content = page.read()
-    parser = etree.HTMLParser()
-    parsed = html.parse(io.BytesIO(bytes(content)), parser)
-    return parsed
+def get_article_parsed(index_to_soup, this_url):
+    return index_to_soup(this_url, as_tree=True)
 
 
 def sort_subject(element_list):
@@ -91,8 +83,8 @@ def sort_subject(element_list):
     return combined_list
 
 
-def get_links_from_section_page(page):
-    page_doc = get_article_parsed(base_url + page[1][0])
+def get_links_from_section_page(index_to_soup, page):
+    page_doc = get_article_parsed(index_to_soup, base_url + page[1][0])
     els = page_doc.xpath(xpath_general)
     element_list = []
     for el in els:
@@ -110,13 +102,13 @@ def get_links_from_section_page(page):
     return [page[0], sorted_element_list]
 
 
-def get_all_links_from_sections():
+def get_all_links_from_sections(index_to_soup):
     all_sections = []
     article_set = set()
     final_dict = OrderedDict()
     for item in pages.items():
         print("getting links from {0}".format(item[0]))
-        all_sections.append(get_links_from_section_page(item))
+        all_sections.append(get_links_from_section_page(index_to_soup, item))
     for section in all_sections:
         section_id = section[0]
         article_list = section[1]
@@ -232,7 +224,7 @@ class HoustonChronicle(BasicNewsRecipe):
         self.timefmt = ' [%a, %d %b, %Y]'
         self.log('starting parse_index: ', time.strftime(self.timestampfmt))
         feeds = []
-        sections = get_all_links_from_sections()
+        sections = get_all_links_from_sections(self.index_to_soup)
         for section_id, article_list in sections.items():
             self.log("Getting {0} section, {1:d} articles".format(section_id, len(article_list)))
             articles = []
diff --git a/recipes/irish_times.recipe b/recipes/irish_times.recipe
index 74e7d8d828..246473fc3d 100644
--- a/recipes/irish_times.recipe
+++ b/recipes/irish_times.recipe
@@ -7,7 +7,10 @@ import urlparse, re
 import json
 from uuid import uuid4
 from mechanize import Request
-from urllib import urlencode
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
diff --git a/recipes/jbpress.recipe b/recipes/jbpress.recipe
index 5e7fc5eedd..97f120cc2b 100644
--- a/recipes/jbpress.recipe
+++ b/recipes/jbpress.recipe
@@ -1,4 +1,7 @@
-import urllib2
+try:
+    from urllib.request import urlopen
+except ImportError:
+    from urllib2 import urlopen
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -39,11 +42,11 @@ class JBPress(BasicNewsRecipe):
         return br
 
     def print_version(self, url):
-        url = urllib2.urlopen(url).geturl()  # resolve redirect.
+        url = urlopen(url).geturl()  # resolve redirect.
         return url.replace('/-/', '/print/')
 
     def preprocess_html(self, soup):
-            # remove breadcrumb
+        # remove breadcrumb
         h3s = soup.findAll('h3')
         for h3 in h3s:
             if re.compile('^JBpress&gt;').match(h3.string):
diff --git a/recipes/la_jornada.recipe b/recipes/la_jornada.recipe
index 2b8289e078..ccdf9c347d 100644
--- a/recipes/la_jornada.recipe
+++ b/recipes/la_jornada.recipe
@@ -5,7 +5,10 @@ www.jornada.unam.mx
 '''
 
 import re
-from urllib import urlencode
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 from urlparse import urlparse, urlunparse, parse_qs
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
diff --git a/recipes/lanacion_chile.recipe b/recipes/lanacion_chile.recipe
index 558964a78a..a158732e81 100644
--- a/recipes/lanacion_chile.recipe
+++ b/recipes/lanacion_chile.recipe
@@ -5,7 +5,10 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lanacion.cl
 '''
-import urllib
+try:
+    from urllib.parse import quote
+except ImportError:
+    from urllib import quote
 
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -36,7 +39,7 @@ class LaNacionChile(BasicNewsRecipe):
     feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
 
     def print_version(self, url):
-        toprint = urllib.quote(url, ':/')
+        toprint = quote(url, ':/')
         return u'http://www.lanacion.cl/cgi-bx/imprimir.cgi?_URL=' + toprint
 
     def preprocess_html(self, soup):
diff --git a/recipes/le_monde_sub_paper.recipe b/recipes/le_monde_sub_paper.recipe
index 294e91d4d2..c1fe795989 100644
--- a/recipes/le_monde_sub_paper.recipe
+++ b/recipes/le_monde_sub_paper.recipe
@@ -7,7 +7,6 @@ Lemonde.fr: Version abonnée
 '''
 
 import os, zipfile, re, time
-from urllib2 import HTTPError
 from calibre.constants import preferred_encoding
 
 from calibre.web.feeds.news import BasicNewsRecipe
@@ -97,7 +96,7 @@ class LeMondeAbonne(BasicNewsRecipe):
             try:
                 response = browser.open(url)
                 continue
-            except HTTPError:
+            except Exception:
                 second -= 24 * 60 * 60
 
         tmp = PersistentTemporaryFile(suffix='.zip')
diff --git a/recipes/lemonde_dip.recipe b/recipes/lemonde_dip.recipe
index 716daecc79..8bc0eb4d81 100644
--- a/recipes/lemonde_dip.recipe
+++ b/recipes/lemonde_dip.recipe
@@ -4,7 +4,10 @@ __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 mondediplo.com
 '''
 
-import urllib
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -43,7 +46,7 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open(self.LOGIN)
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter'
+            data = urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter'
                                      })
             br.open(self.LOGIN, data)
         return br
diff --git a/recipes/modoros.recipe b/recipes/modoros.recipe
index 02af57584a..608afcef28 100644
--- a/recipes/modoros.recipe
+++ b/recipes/modoros.recipe
@@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.constants import config_dir, CONFIG_DIR_MODE
 import os
 import os.path
-import urllib
+try:
+    from urllib.parse import quote
+except ImportError:
+    from urllib import quote
 from hashlib import md5
 
 
@@ -62,7 +65,7 @@ class ModorosBlogHu(BasicNewsRecipe):
         feeds = BasicNewsRecipe.parse_feeds(self)
 
         for feed in feeds:
-            feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='')
+            feed_hash = quote(feed.title.encode('utf-8'), safe='')
             feed_fn = os.path.join(feed_dir, feed_hash)
 
             past_items = set()
diff --git a/recipes/now_toronto.recipe b/recipes/now_toronto.recipe
index 6693a880ae..20bfe03dd7 100644
--- a/recipes/now_toronto.recipe
+++ b/recipes/now_toronto.recipe
@@ -6,7 +6,6 @@ __license__ = 'GPL v3'
 __copyright__ = '2010, Starson17'
 
 import os
-import urllib2
 import zipfile
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
@@ -25,10 +24,10 @@ class NowToronto(BasicNewsRecipe):
         epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
         soup = self.index_to_soup(epub_feed)
         url = soup.find(name='feedburner:origlink').string
-        f = urllib2.urlopen(url)
+        raw = self.index_to_soup(url, raw=True)
         tmp = PersistentTemporaryFile(suffix='.epub')
         self.report_progress(0, _('downloading epub'))
-        tmp.write(f.read())
+        tmp.write(raw)
         tmp.close()
         zfile = zipfile.ZipFile(tmp.name, 'r')
         self.report_progress(0, _('extracting epub'))
diff --git a/recipes/nursingtimes.recipe b/recipes/nursingtimes.recipe
index d3ffcd5010..ca1d90ed59 100644
--- a/recipes/nursingtimes.recipe
+++ b/recipes/nursingtimes.recipe
@@ -4,7 +4,11 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 www.nursingtimes.net
 '''
 
-import urllib
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
+
 from calibre.web.feeds.recipes import BasicNewsRecipe
 
 
@@ -31,7 +35,7 @@ class NursingTimes(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open(self.LOGIN)
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13'  # noqa
+            data = urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13'  # noqa
                                      })
             br.open(self.LOGIN, data)
         return br
diff --git a/recipes/oc_register.recipe b/recipes/oc_register.recipe
index 69f44d5ad1..6ce5412818 100644
--- a/recipes/oc_register.recipe
+++ b/recipes/oc_register.recipe
@@ -3,7 +3,6 @@
 from __future__ import unicode_literals, division, absolute_import, print_function
 import time
 import json
-import urllib
 from pprint import pprint
 
 from calibre.web.feeds.news import BasicNewsRecipe
@@ -87,9 +86,7 @@ class OrangeCountyRegister(BasicNewsRecipe):
         return cleanedHTML
 
     def loadURL(self, url):
-        socket = urllib.urlopen(url)
-        rawHTML = socket.read()
-        return rawHTML
+        return self.index_to_soup(url, raw=True)
 
     def htmlToAttribsDict(self, rawHTML):
         tokenStart = 'dataLayer.push({'
diff --git a/recipes/office_space.recipe b/recipes/office_space.recipe
index 17d10e848c..4a0977809e 100644
--- a/recipes/office_space.recipe
+++ b/recipes/office_space.recipe
@@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.constants import config_dir, CONFIG_DIR_MODE
 import os
 import os.path
-import urllib
+try:
+    from urllib.parse import quote
+except ImportError:
+    from urllib import quote
 from hashlib import md5
 
 
@@ -86,7 +89,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe):
         feeds = BasicNewsRecipe.parse_feeds(self)
 
         for feed in feeds:
-            feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='')
+            feed_hash = quote(feed.title.encode('utf-8'), safe='')
             feed_fn = os.path.join(feed_dir, feed_hash)
 
             past_items = set()
diff --git a/recipes/orlando_sentinel.recipe b/recipes/orlando_sentinel.recipe
index cebe4780f7..7962f7e213 100644
--- a/recipes/orlando_sentinel.recipe
+++ b/recipes/orlando_sentinel.recipe
@@ -1,5 +1,8 @@
-import urllib
 import re
+try:
+    from urllib.parse import unquote
+except ImportError:
+    from urllib import unquote
 from calibre.web.feeds.news import BasicNewsRecipe
 
 
@@ -37,7 +40,7 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe):
         ans = None
         try:
             s = article.summary
-            ans = urllib.unquote(
+            ans = unquote(
                 re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
         except:
             pass
diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 7729777f74..c7462be7ff 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -8,8 +8,14 @@ import json
 import operator
 import re
 import tempfile
-import urllib
-import urllib2
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
+try:
+    from urllib.error import HTTPError, URLError
+except ImportError:
+    from urllib2 import HTTPError, URLError
 
 
 __license__ = 'GPL v3'
@@ -99,18 +105,8 @@ class Pocket(BasicNewsRecipe):
             self.get_auth_uri(),
             self.get_pull_articles_uri()
         )
-        try:
-            request = urllib2.Request(fetch_url)
-            response = urllib2.urlopen(request)
-            pocket_feed = json.load(response)['list']
-        except urllib2.HTTPError as e:
-            self.log.exception(
-                "Pocket returned an error: {0}".format(e.info()))
-            return []
-        except urllib2.URLError as e:
-            self.log.exception(
-                "Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
-            return []
+        data = self.index_to_soup(fetch_url, raw=True)
+        pocket_feed = json.loads(data)['list']
 
         if len(pocket_feed) < self.minimum_articles:
             self.mark_as_read_after_dl = False
@@ -143,10 +139,10 @@ class Pocket(BasicNewsRecipe):
         fc_tag = soup.find('script', text=re.compile("formCheck"))
         fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
         article_id = url.split("/")[-1]
-        data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id})
+        data = urlencode({'itemId': article_id, 'formCheck': fc_id})
         try:
             response = self.browser.open(ajax_url, data)
-        except urllib2.HTTPError as e:
+        except HTTPError as e:
             self.log.exception("unable to get textview {0}".format(e.info()))
             raise e
         return json.load(response)['article']
@@ -186,13 +182,12 @@ class Pocket(BasicNewsRecipe):
             self.get_auth_uri()
         )
         try:
-            request = urllib2.Request(mark_read_url)
-            urllib2.urlopen(request)
-        except urllib2.HTTPError as e:
+            self.browser.open_novisit(mark_read_url)
+        except HTTPError as e:
             self.log.exception(
                 'Pocket returned an error while archiving articles: {0}'.format(e))
             return []
-        except urllib2.URLError as e:
+        except URLError as e:
             self.log.exception(
                 "Unable to connect to getpocket.com's modify api: {0}".format(e))
             return []
diff --git a/recipes/roger_ebert.recipe b/recipes/roger_ebert.recipe
index 6fd357caea..834c8dad1a 100644
--- a/recipes/roger_ebert.recipe
+++ b/recipes/roger_ebert.recipe
@@ -1,5 +1,4 @@
 import re
-import urllib2
 from calibre.web.feeds.news import BasicNewsRecipe
 
 
@@ -54,7 +53,7 @@ class Ebert(BasicNewsRecipe):
             self.report_progress(0, _('Fetching feed') + ' %s...' %
                                  (feedtitle if feedtitle else feedurl))
             articles = []
-            page = urllib2.urlopen(feedurl).read()
+            page = self.index_to_soup(feedurl, raw=True)
 
             if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
                 pattern = self.patternReviews
diff --git a/recipes/roger_ebert_blog.recipe b/recipes/roger_ebert_blog.recipe
index 3a50f91e77..7dae6f5d93 100644
--- a/recipes/roger_ebert_blog.recipe
+++ b/recipes/roger_ebert_blog.recipe
@@ -1,5 +1,4 @@
 import re
-import urllib2
 import time
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre import strftime
@@ -68,7 +67,7 @@ class Ebert(BasicNewsRecipe):
             self.report_progress(0, _('Fetching feed') + ' %s...' %
                                  (feedtitle if feedtitle else feedurl))
             articles = []
-            page = urllib2.urlopen(feedurl).read()
+            page = self.index_to_soup(feedurl, raw=True)
 
             if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
                 pattern = self.patternReviews
diff --git a/recipes/sunday_times_magazine.recipe b/recipes/sunday_times_magazine.recipe
index bf19e25c0e..b7bebff615 100644
--- a/recipes/sunday_times_magazine.recipe
+++ b/recipes/sunday_times_magazine.recipe
@@ -3,8 +3,11 @@ __copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.thetimes.co.uk/magazine/the-sunday-times-magazine/
 '''
-import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 
 
 def classes(classes):
@@ -49,7 +52,7 @@ class TimesOnline(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open('http://www.thetimes.co.uk/')
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({
+            data = urlencode({
                 'gotoUrl': self.INDEX,
                 'username': self.username,
                 'password': self.password})
diff --git a/recipes/taz.recipe b/recipes/taz.recipe
index cda4168beb..a4d97b067c 100644
--- a/recipes/taz.recipe
+++ b/recipes/taz.recipe
@@ -9,10 +9,14 @@ __docformat__ = 'restructuredtext de'
 www.taz.de/digiabo
 '''
 import os
-import urllib2
 import zipfile
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
+try:
+    from urllib.request import HTTPBasicAuthHandler, build_opener, install_opener, urlopen
+    from urllib.error import HTTPError
+except ImportError:
+    from urllib2 import HTTPBasicAuthHandler, build_opener, install_opener, urlopen, HTTPError
 
 
 class TazDigiabo(BasicNewsRecipe):
@@ -34,17 +38,17 @@ class TazDigiabo(BasicNewsRecipe):
 
         url = domain + "/epub/"
 
-        auth_handler = urllib2.HTTPBasicAuthHandler()
+        auth_handler = HTTPBasicAuthHandler()
         auth_handler.add_password(realm='TAZ-ABO',
                                   uri=url,
                                   user=self.username,
                                   passwd=self.password)
-        opener = urllib2.build_opener(auth_handler)
-        urllib2.install_opener(opener)
+        opener = build_opener(auth_handler)
+        install_opener(opener)
 
         try:
-            f = urllib2.urlopen(url)
-        except urllib2.HTTPError:
+            f = urlopen(url)
+        except HTTPError:
             self.report_progress(0, _('Can\'t login to download issue'))
             raise ValueError('Failed to login, check your username and'
                              ' password')
diff --git a/recipes/thenewcriterion.recipe b/recipes/thenewcriterion.recipe
index 9bb281aa4f..fa08a1d2b3 100644
--- a/recipes/thenewcriterion.recipe
+++ b/recipes/thenewcriterion.recipe
@@ -9,9 +9,12 @@ __copyright__ = '2019, Darko Miletic <darko.miletic at gmail.com>'
 www.newcriterion.com
 '''
 
-import urllib
-import urllib2
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 import re
+from mechanize import Request
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
@@ -52,7 +55,7 @@ class TheNewCriterion(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open('https://www.newcriterion.com/')
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'login': self.username, 'password': self.password})
+            data = urlencode({'login': self.username, 'password': self.password})
             header = {
                 'X-OCTOBER-REQUEST-HANDLER': 'onSignin',
                 'X-Requested-With': 'XMLHttpRequest',
@@ -60,7 +63,7 @@ class TheNewCriterion(BasicNewsRecipe):
                 'X-OCTOBER-REQUEST-PARTIALS':'',
                 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
             }
-            request = urllib2.Request('https://www.newcriterion.com/', data, header)
+            request = Request('https://www.newcriterion.com/', data, header)
             br.open(request)
         return br
 
diff --git a/recipes/times_online.recipe b/recipes/times_online.recipe
index 017b57f66b..720f5ac9dc 100644
--- a/recipes/times_online.recipe
+++ b/recipes/times_online.recipe
@@ -3,8 +3,11 @@ __copyright__ = '2010-2017, Bobby Steel <bob at xdca.com>, Darko Miletic'
 '''
 www.thetimes.co.uk
 '''
-import urllib
 import html5lib
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 from lxml import html
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -79,7 +82,7 @@ class TimesOnline(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open('http://www.thetimes.co.uk/')
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({
+            data = urlencode({
                 'gotoUrl': self.INDEX,
                 'username': self.username,
                 'password': self.password})
diff --git a/recipes/tomshardware.recipe b/recipes/tomshardware.recipe
index 27b33d8988..f8e4ef959e 100644
--- a/recipes/tomshardware.recipe
+++ b/recipes/tomshardware.recipe
@@ -4,7 +4,10 @@ __copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
 tomshardware.com/us
 '''
 
-import urllib
+try:
+    from urllib.parse import urlencode
+except ImportError:
+    from urllib import urlencode
 from calibre.web.feeds.recipes import BasicNewsRecipe
 
 
@@ -30,7 +33,7 @@ class Tomshardware(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self)
         br.open(self.INDEX + '/us/')
         if self.username is not None and self.password is not None:
-            data = urllib.urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
+            data = urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
                                      })
             br.open(self.LOGIN, data)
         return br
diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
index 8cf03260b7..3b2b9e503b 100644
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@@ -5,8 +5,11 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 import json
-from urllib import quote
 
+try:
+    from urllib.parse import quote
+except ImportError:
+    from urllib import quote
 from mechanize import Request
 
 from calibre import random_user_agent
diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe
index 831c483c41..b140dc072d 100644
--- a/recipes/wsj_free.recipe
+++ b/recipes/wsj_free.recipe
@@ -5,7 +5,10 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 import json
-from urllib import quote
+try:
+    from urllib.parse import quote
+except ImportError:
+    from urllib import quote
 
 from mechanize import Request