From 569c5762936cb61bc333c051e92b8c76eb2c4cf2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Apr 2019 13:57:21 +0530 Subject: [PATCH] py3: port use of urllib in recipes --- recipes/am730.recipe | 7 ++++-- recipes/ambito_financiero.recipe | 11 +++++---- recipes/azstarnet.recipe | 9 ++++--- recipes/barrons.recipe | 7 ++++-- recipes/brecha.recipe | 9 ++++--- recipes/clarin.recipe | 7 ++++-- recipes/economia.recipe | 5 +++- recipes/estadao.recipe | 13 +++-------- recipes/folha.recipe | 7 ++++-- recipes/galaxys_edge.recipe | 7 ++++-- recipes/haaretz_en.recipe | 7 ++++-- recipes/harpers_full.recipe | 7 ++++-- recipes/hbr.recipe | 5 +++- recipes/houston_chronicle.recipe | 22 ++++++----------- recipes/irish_times.recipe | 5 +++- recipes/jbpress.recipe | 9 ++++--- recipes/la_jornada.recipe | 5 +++- recipes/lanacion_chile.recipe | 7 ++++-- recipes/le_monde_sub_paper.recipe | 3 +-- recipes/lemonde_dip.recipe | 7 ++++-- recipes/modoros.recipe | 7 ++++-- recipes/now_toronto.recipe | 5 ++-- recipes/nursingtimes.recipe | 8 +++++-- recipes/oc_register.recipe | 5 +--- recipes/office_space.recipe | 7 ++++-- recipes/orlando_sentinel.recipe | 7 ++++-- recipes/readitlater.recipe | 35 ++++++++++++---------------- recipes/roger_ebert.recipe | 3 +-- recipes/roger_ebert_blog.recipe | 3 +-- recipes/sunday_times_magazine.recipe | 7 ++++-- recipes/taz.recipe | 16 ++++++++----- recipes/thenewcriterion.recipe | 11 +++++---- recipes/times_online.recipe | 7 ++++-- recipes/tomshardware.recipe | 7 ++++-- recipes/wsj.recipe | 5 +++- recipes/wsj_free.recipe | 5 +++- 36 files changed, 178 insertions(+), 119 deletions(-) diff --git a/recipes/am730.recipe b/recipes/am730.recipe index b670aa6a92..3885a9f585 100644 --- a/recipes/am730.recipe +++ b/recipes/am730.recipe @@ -11,7 +11,10 @@ Change Log: 2013/03/30 -- first version ''' -import urllib +try: + from urllib.parse import unquote +except ImportError: + from urllib import unquote from calibre.web.feeds.recipes import BasicNewsRecipe @@ -59,7 +62,7 @@ class AM730(BasicNewsRecipe): continue # not in same section title = href.split('/')[-1].split('-')[0] - title = urllib.unquote(title.encode('ASCII')) # .decode('utf-8') + title = unquote(title.encode('ASCII')) # .decode('utf-8') if self.debug: print(title) try: diff --git a/recipes/ambito_financiero.recipe b/recipes/ambito_financiero.recipe index 12d75bfa21..4e2e968cbd 100644 --- a/recipes/ambito_financiero.recipe +++ b/recipes/ambito_financiero.recipe @@ -9,7 +9,10 @@ http://www.ambito.com/diario/ ''' import time -import urllib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe @@ -66,7 +69,7 @@ class Ambito_Financiero(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open(self.INDEX) if self.username is not None and self.password is not None: - postdata = urllib.urlencode({ + postdata = urlencode({ 'txtUser': self.username, 'txtPassword': self.password }) @@ -110,7 +113,7 @@ class Ambito_Financiero(BasicNewsRecipe): if self.session_id: l, s, r = url.rpartition('.html') o, s1, artid = l.rpartition('_') - postdata = urllib.urlencode({'id': artid, 'id_session': self.session_id}) + postdata = urlencode({'id': artid, 'id_session': self.session_id}) response = self.browser.open( 'http://data.ambito.com/diario/cuerpo_noticia.asp', data=postdata, @@ -128,7 +131,7 @@ class Ambito_Financiero(BasicNewsRecipe): def cleanup(self): if self.session_id is not None: - postdata = urllib.urlencode({'session_id': self.session_id}) + postdata = urlencode({'session_id': self.session_id}) self.browser.open( 'http://www.ambito.com/diario/no-cache/login/x_logout.asp', data=postdata, timeout=self.timeout ) diff --git a/recipes/azstarnet.recipe b/recipes/azstarnet.recipe index 153d0a56e3..81918bdd99 100644 --- a/recipes/azstarnet.recipe +++ b/recipes/azstarnet.recipe @@ -4,12 +4,15 @@ __copyright__ = '2009-2010, Darko Miletic ' ''' azstarnet.com ''' -import urllib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from calibre.web.feeds.news import BasicNewsRecipe class Azstarnet(BasicNewsRecipe): - title = 'Arizona Daily Star' + title = 'Arizona Daily Star' __author__ = 'Darko Miletic' description = 'news from Arizona' language = 'en' @@ -31,7 +34,7 @@ class Azstarnet(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open('http://azstarnet.com/') if self.username is not None and self.password is not None: - data = urllib.urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/' + data = urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/' }) br.open('http://azstarnet.com/app/registration/proxy.php', data) return br diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe index 7b57fa79a2..c4e72f8358 100644 --- a/recipes/barrons.recipe +++ b/recipes/barrons.recipe @@ -7,7 +7,10 @@ from __future__ import (unicode_literals, division, absolute_import, import json from mechanize import Request -from urllib import quote +try: + from urllib.parse import quote +except ImportError: + from urllib import quote from calibre.web.feeds.news import BasicNewsRecipe @@ -26,7 +29,7 @@ class Barrons(BasicNewsRecipe): timefmt = ' [%a, %b %d, %Y]' use_embedded_content = False no_stylesheets = True - match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*'] + match_regexps = ['http://online.barrons.com/.*?html\\?mod=.*?|file:.*'] conversion_options = {'linearize_tables': True} # Don't grab articles more than 7 days old diff --git a/recipes/brecha.recipe b/recipes/brecha.recipe index d5884d2cc6..bb66966598 100644 --- a/recipes/brecha.recipe +++ b/recipes/brecha.recipe @@ -6,7 +6,10 @@ __copyright__ = '2012, Darko Miletic ' www.brecha.com.uy ''' -import urllib +try: + from urllib.parse import urlencode, quote +except ImportError: + from urllib import urlencode, quote from calibre.web.feeds.news import BasicNewsRecipe @@ -40,7 +43,7 @@ class Brecha(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open('http://www.brecha.com.uy/index.php/acceder-miembros') if self.username is not None and self.password is not None: - data = urllib.urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password + data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password }) br.open( 'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data) @@ -74,5 +77,5 @@ class Brecha(BasicNewsRecipe): soup = self.index_to_soup('http://www.brecha.com.uy/index.php') for image in soup.findAll('img', alt=True): if image['alt'].startswith('Tapa '): - return 'http://www.brecha.com.uy' + urllib.quote(image['src']) + return 'http://www.brecha.com.uy' + quote(image['src']) return None diff --git a/recipes/clarin.recipe b/recipes/clarin.recipe index 616cb7c8cd..ade7bcc4c5 100644 --- a/recipes/clarin.recipe +++ b/recipes/clarin.recipe @@ -9,7 +9,10 @@ __copyright__ = '2008-2016, Darko Miletic ' clarin.com ''' -import urllib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe @@ -76,7 +79,7 @@ class Clarin(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open(self.INDEX) if self.username is not None and self.password is not None: - data = urllib.urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm', + data = urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm', 'ingresar_ingresar_email_paseInputComponent': self.username, 'ingresar_ingresar_palabraClave_paseInputComponent': self.password, 'ingresar_ingresar_ingresar_paseButton': 'Ingresar', diff --git a/recipes/economia.recipe b/recipes/economia.recipe index dc18a93cfd..294e9bd452 100644 --- a/recipes/economia.recipe +++ b/recipes/economia.recipe @@ -1,5 +1,8 @@ from calibre.web.feeds.news import BasicNewsRecipe -from urllib import quote +try: + from urllib.parse import quote +except ImportError: + from urllib import quote class EconomiaMagazine(BasicNewsRecipe): diff --git a/recipes/estadao.recipe b/recipes/estadao.recipe index 5e7bf6b43f..05661c48ca 100644 --- a/recipes/estadao.recipe +++ b/recipes/estadao.recipe @@ -1,9 +1,8 @@ from __future__ import print_function from calibre.web.feeds.news import BasicNewsRecipe from datetime import datetime, timedelta -from calibre.ebooks.BeautifulSoup import Tag, BeautifulSoup +from calibre.ebooks.BeautifulSoup import Tag from calibre.utils.magick import Image, PixelWand -from urllib2 import Request, urlopen, URLError def new_tag(soup, name, attrs=()): @@ -113,20 +112,14 @@ class Estadao(BasicNewsRecipe): def get_cover_url(self): if self.THUMBALIZR_API: cover_url = self.CAPA - pedido = Request(self.CAPA) - pedido.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; ' + - self.LANGHTM + '; userid=' + self.THUMBALIZR_API + ') Calibre/0.8.47 (like Gecko)') - pedido.add_header('Accept-Charset', self.ENCHTM) - pedido.add_header('Referer', self.SCREENSHOT) try: - resposta = urlopen(pedido) - soup = BeautifulSoup(resposta) + soup = self.index_to_soup(cover_url) cover_item = soup.find('body') if cover_item: cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \ '&url=' + self.SCREENSHOT + '&width=600&quality=90' return cover_url - except URLError: + except Exception: cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \ '&url=' + self.SCREENSHOT + '&width=600&quality=90' return cover_url diff --git a/recipes/folha.recipe b/recipes/folha.recipe index 7fd9407a38..a74e61c585 100644 --- a/recipes/folha.recipe +++ b/recipes/folha.recipe @@ -5,8 +5,11 @@ __copyright__ = '2012, Darko Miletic ' ''' www.folha.uol.com.br ''' -import urllib from calibre.web.feeds.news import BasicNewsRecipe +try: + from urllib.parse import quote_plus +except ImportError: + from urllib import quote_plus class Folha_de_s_paulo(BasicNewsRecipe): @@ -69,7 +72,7 @@ class Folha_de_s_paulo(BasicNewsRecipe): return curl def print_version(self, url): - return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url) + return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + quote_plus(url) def get_cover_url(self): soup = self.index_to_soup('http://www.folha.uol.com.br/') diff --git a/recipes/galaxys_edge.recipe b/recipes/galaxys_edge.recipe index 29a25ecd93..01ff26eea9 100644 --- a/recipes/galaxys_edge.recipe +++ b/recipes/galaxys_edge.recipe @@ -4,8 +4,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera import re import shutil -import urllib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.web.feeds.news import BasicNewsRecipe @@ -46,7 +49,7 @@ class AdvancedUserRecipe1515196393(BasicNewsRecipe): self.log('\t\tdata-parent-id', parent_id) self.log('\t\tdata-cat-id', cat_id) self.log('\t\tdata-post-id', post_id) - data = urllib.urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id}) + data = urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id}) r=br.open('http://www.galaxysedge.com/wp-content/themes/galaxyedge/get_content.php', data) content_file = PersistentTemporaryFile(suffix='.html', dir=self.ctdir) content_file.write(r.read()) diff --git a/recipes/haaretz_en.recipe b/recipes/haaretz_en.recipe index 85ac3b513d..41bfa0f166 100644 --- a/recipes/haaretz_en.recipe +++ b/recipes/haaretz_en.recipe @@ -4,8 +4,11 @@ __copyright__ = '2010-2015, Darko Miletic ' www.haaretz.com ''' -import urllib from calibre.web.feeds.news import BasicNewsRecipe +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode class Haaretz_en(BasicNewsRecipe): @@ -62,7 +65,7 @@ class Haaretz_en(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open(self.PREFIX) if self.username is not None and self.password is not None: - data = urllib.urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password # noqa + data = urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password # noqa }) br.open('https://sso.haaretz.com/sso/sso/signIn', data) return br diff --git a/recipes/harpers_full.recipe b/recipes/harpers_full.recipe index fd7517600c..66bd84e77b 100644 --- a/recipes/harpers_full.recipe +++ b/recipes/harpers_full.recipe @@ -15,7 +15,10 @@ anything in username/password fields import time import re -import urllib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe @@ -61,7 +64,7 @@ class Harpers_full(BasicNewsRecipe): br.open('https://harpers.org/') if self.username is not None and self.password is not None: tt = time.localtime() * 1000 - data = urllib.urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt + data = urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt }) br.open(self.LOGIN, data) return br diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe index 9b1e132f7c..5d27597184 100644 --- a/recipes/hbr.recipe +++ b/recipes/hbr.recipe @@ -5,8 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe from css_selectors import Select from mechanize import Request -from urllib import urlencode import json +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode class HBR(BasicNewsRecipe): diff --git a/recipes/houston_chronicle.recipe b/recipes/houston_chronicle.recipe index 2a2ea05dc9..9808e9cd51 100644 --- a/recipes/houston_chronicle.recipe +++ b/recipes/houston_chronicle.recipe @@ -8,8 +8,6 @@ chron.com ''' import re import time -import urllib2 -import io from datetime import datetime import traceback import sys @@ -19,8 +17,6 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.utils.cleantext import clean_ascii_chars from calibre.ebooks.BeautifulSoup import NavigableString from calibre.utils.date import dt_factory, local_tz -from lxml import html -from lxml import etree regex_date_only = re.compile(r"""(?:January|February|March|April| {8}May|June|July|August|September|October|November| @@ -62,12 +58,8 @@ def validate_link(page, link, title): return link, title -def get_article_parsed(this_url): - page = urllib2.urlopen(this_url) - content = page.read() - parser = etree.HTMLParser() - parsed = html.parse(io.BytesIO(bytes(content)), parser) - return parsed +def get_article_parsed(index_to_soup, this_url): + return index_to_soup(this_url, as_tree=True) def sort_subject(element_list): @@ -91,8 +83,8 @@ def sort_subject(element_list): return combined_list -def get_links_from_section_page(page): - page_doc = get_article_parsed(base_url + page[1][0]) +def get_links_from_section_page(index_to_soup, page): + page_doc = get_article_parsed(index_to_soup, base_url + page[1][0]) els = page_doc.xpath(xpath_general) element_list = [] for el in els: @@ -110,13 +102,13 @@ def get_links_from_section_page(page): return [page[0], sorted_element_list] -def get_all_links_from_sections(): +def get_all_links_from_sections(index_to_soup): all_sections = [] article_set = set() final_dict = OrderedDict() for item in pages.items(): print("getting links from {0}".format(item[0])) - all_sections.append(get_links_from_section_page(item)) + all_sections.append(get_links_from_section_page(index_to_soup, item)) for section in all_sections: section_id = section[0] article_list = section[1] @@ -232,7 +224,7 @@ class HoustonChronicle(BasicNewsRecipe): self.timefmt = ' [%a, %d %b, %Y]' self.log('starting parse_index: ', time.strftime(self.timestampfmt)) feeds = [] - sections = get_all_links_from_sections() + sections = get_all_links_from_sections(self.index_to_soup) for section_id, article_list in sections.items(): self.log("Getting {0} section, {1:d} articles".format(section_id, len(article_list))) articles = [] diff --git a/recipes/irish_times.recipe b/recipes/irish_times.recipe index 74e7d8d828..246473fc3d 100644 --- a/recipes/irish_times.recipe +++ b/recipes/irish_times.recipe @@ -7,7 +7,10 @@ import urlparse, re import json from uuid import uuid4 from mechanize import Request -from urllib import urlencode +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile diff --git a/recipes/jbpress.recipe b/recipes/jbpress.recipe index 5e7fc5eedd..97f120cc2b 100644 --- a/recipes/jbpress.recipe +++ b/recipes/jbpress.recipe @@ -1,4 +1,7 @@ -import urllib2 +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen import re from calibre.web.feeds.news import BasicNewsRecipe @@ -39,11 +42,11 @@ class JBPress(BasicNewsRecipe): return br def print_version(self, url): - url = urllib2.urlopen(url).geturl() # resolve redirect. + url = urlopen(url).geturl() # resolve redirect. return url.replace('/-/', '/print/') def preprocess_html(self, soup): - # remove breadcrumb + # remove breadcrumb h3s = soup.findAll('h3') for h3 in h3s: if re.compile('^JBpress>').match(h3.string): diff --git a/recipes/la_jornada.recipe b/recipes/la_jornada.recipe index 2b8289e078..ccdf9c347d 100644 --- a/recipes/la_jornada.recipe +++ b/recipes/la_jornada.recipe @@ -5,7 +5,10 @@ www.jornada.unam.mx ''' import re -from urllib import urlencode +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from urlparse import urlparse, urlunparse, parse_qs from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe diff --git a/recipes/lanacion_chile.recipe b/recipes/lanacion_chile.recipe index 558964a78a..a158732e81 100644 --- a/recipes/lanacion_chile.recipe +++ b/recipes/lanacion_chile.recipe @@ -5,7 +5,10 @@ __copyright__ = '2009, Darko Miletic ' ''' lanacion.cl ''' -import urllib +try: + from urllib.parse import quote +except ImportError: + from urllib import quote from calibre.web.feeds.news import BasicNewsRecipe @@ -36,7 +39,7 @@ class LaNacionChile(BasicNewsRecipe): feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')] def print_version(self, url): - toprint = urllib.quote(url, ':/') + toprint = quote(url, ':/') return u'http://www.lanacion.cl/cgi-bx/imprimir.cgi?_URL=' + toprint def preprocess_html(self, soup): diff --git a/recipes/le_monde_sub_paper.recipe b/recipes/le_monde_sub_paper.recipe index 294e91d4d2..c1fe795989 100644 --- a/recipes/le_monde_sub_paper.recipe +++ b/recipes/le_monde_sub_paper.recipe @@ -7,7 +7,6 @@ Lemonde.fr: Version abonnée ''' import os, zipfile, re, time -from urllib2 import HTTPError from calibre.constants import preferred_encoding from calibre.web.feeds.news import BasicNewsRecipe @@ -97,7 +96,7 @@ class LeMondeAbonne(BasicNewsRecipe): try: response = browser.open(url) continue - except HTTPError: + except Exception: second -= 24 * 60 * 60 tmp = PersistentTemporaryFile(suffix='.zip') diff --git a/recipes/lemonde_dip.recipe b/recipes/lemonde_dip.recipe index 716daecc79..8bc0eb4d81 100644 --- a/recipes/lemonde_dip.recipe +++ b/recipes/lemonde_dip.recipe @@ -4,7 +4,10 @@ __copyright__ = '2008-2011, Darko Miletic ' mondediplo.com ''' -import urllib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe @@ -43,7 +46,7 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open(self.LOGIN) if self.username is not None and self.password is not None: - data = urllib.urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter' + data = urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter' }) br.open(self.LOGIN, data) return br diff --git a/recipes/modoros.recipe b/recipes/modoros.recipe index 02af57584a..608afcef28 100644 --- a/recipes/modoros.recipe +++ b/recipes/modoros.recipe @@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.constants import config_dir, CONFIG_DIR_MODE import os import os.path -import urllib +try: + from urllib.parse import quote +except ImportError: + from urllib import quote from hashlib import md5 @@ -62,7 +65,7 @@ class ModorosBlogHu(BasicNewsRecipe): feeds = BasicNewsRecipe.parse_feeds(self) for feed in feeds: - feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='') + feed_hash = quote(feed.title.encode('utf-8'), safe='') feed_fn = os.path.join(feed_dir, feed_hash) past_items = set() diff --git a/recipes/now_toronto.recipe b/recipes/now_toronto.recipe index 6693a880ae..20bfe03dd7 100644 --- a/recipes/now_toronto.recipe +++ b/recipes/now_toronto.recipe @@ -6,7 +6,6 @@ __license__ = 'GPL v3' __copyright__ = '2010, Starson17' import os -import urllib2 import zipfile from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -25,10 +24,10 @@ class NowToronto(BasicNewsRecipe): epub_feed = "http://feeds.feedburner.com/NowEpubEditions" soup = self.index_to_soup(epub_feed) url = soup.find(name='feedburner:origlink').string - f = urllib2.urlopen(url) + raw = self.index_to_soup(url, raw=True) tmp = PersistentTemporaryFile(suffix='.epub') self.report_progress(0, _('downloading epub')) - tmp.write(f.read()) + tmp.write(raw) tmp.close() zfile = zipfile.ZipFile(tmp.name, 'r') self.report_progress(0, _('extracting epub')) diff --git a/recipes/nursingtimes.recipe b/recipes/nursingtimes.recipe index d3ffcd5010..ca1d90ed59 100644 --- a/recipes/nursingtimes.recipe +++ b/recipes/nursingtimes.recipe @@ -4,7 +4,11 @@ __copyright__ = '2010, Darko Miletic ' www.nursingtimes.net ''' -import urllib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode + from calibre.web.feeds.recipes import BasicNewsRecipe @@ -31,7 +35,7 @@ class NursingTimes(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open(self.LOGIN) if self.username is not None and self.password is not None: - data = urllib.urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13' # noqa + data = urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13' # noqa }) br.open(self.LOGIN, data) return br diff --git a/recipes/oc_register.recipe b/recipes/oc_register.recipe index 69f44d5ad1..6ce5412818 100644 --- a/recipes/oc_register.recipe +++ b/recipes/oc_register.recipe @@ -3,7 +3,6 @@ from __future__ import unicode_literals, division, absolute_import, print_function import time import json -import urllib from pprint import pprint from calibre.web.feeds.news import BasicNewsRecipe @@ -87,9 +86,7 @@ class OrangeCountyRegister(BasicNewsRecipe): return cleanedHTML def loadURL(self, url): - socket = urllib.urlopen(url) - rawHTML = socket.read() - return rawHTML + return self.index_to_soup(url, raw=True) def htmlToAttribsDict(self, rawHTML): tokenStart = 'dataLayer.push({' diff --git a/recipes/office_space.recipe b/recipes/office_space.recipe index 17d10e848c..4a0977809e 100644 --- a/recipes/office_space.recipe +++ b/recipes/office_space.recipe @@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.constants import config_dir, CONFIG_DIR_MODE import os import os.path -import urllib +try: + from urllib.parse import quote +except ImportError: + from urllib import quote from hashlib import md5 @@ -86,7 +89,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe): feeds = BasicNewsRecipe.parse_feeds(self) for feed in feeds: - feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='') + feed_hash = quote(feed.title.encode('utf-8'), safe='') feed_fn = os.path.join(feed_dir, feed_hash) past_items = set() diff --git a/recipes/orlando_sentinel.recipe b/recipes/orlando_sentinel.recipe index cebe4780f7..7962f7e213 100644 --- a/recipes/orlando_sentinel.recipe +++ b/recipes/orlando_sentinel.recipe @@ -1,5 +1,8 @@ -import urllib import re +try: + from urllib.parse import unquote +except ImportError: + from urllib import unquote from calibre.web.feeds.news import BasicNewsRecipe @@ -37,7 +40,7 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe): ans = None try: s = article.summary - ans = urllib.unquote( + ans = unquote( re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1)) except: pass diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 7729777f74..c7462be7ff 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -8,8 +8,14 @@ import json import operator import re import tempfile -import urllib -import urllib2 +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode +try: + from urllib.error import HTTPError, URLError +except ImportError: + from urllib2 import HTTPError, URLError __license__ = 'GPL v3' @@ -99,18 +105,8 @@ class Pocket(BasicNewsRecipe): self.get_auth_uri(), self.get_pull_articles_uri() ) - try: - request = urllib2.Request(fetch_url) - response = urllib2.urlopen(request) - pocket_feed = json.load(response)['list'] - except urllib2.HTTPError as e: - self.log.exception( - "Pocket returned an error: {0}".format(e.info())) - return [] - except urllib2.URLError as e: - self.log.exception( - "Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url)) - return [] + data = self.index_to_soup(fetch_url, raw=True) + pocket_feed = json.loads(data)['list'] if len(pocket_feed) < self.minimum_articles: self.mark_as_read_after_dl = False @@ -143,10 +139,10 @@ class Pocket(BasicNewsRecipe): fc_tag = soup.find('script', text=re.compile("formCheck")) fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1) article_id = url.split("/")[-1] - data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id}) + data = urlencode({'itemId': article_id, 'formCheck': fc_id}) try: response = self.browser.open(ajax_url, data) - except urllib2.HTTPError as e: + except HTTPError as e: self.log.exception("unable to get textview {0}".format(e.info())) raise e return json.load(response)['article'] @@ -186,13 +182,12 @@ class Pocket(BasicNewsRecipe): self.get_auth_uri() ) try: - request = urllib2.Request(mark_read_url) - urllib2.urlopen(request) - except urllib2.HTTPError as e: + self.browser.open_novisit(mark_read_url) + except HTTPError as e: self.log.exception( 'Pocket returned an error while archiving articles: {0}'.format(e)) return [] - except urllib2.URLError as e: + except URLError as e: self.log.exception( "Unable to connect to getpocket.com's modify api: {0}".format(e)) return [] diff --git a/recipes/roger_ebert.recipe b/recipes/roger_ebert.recipe index 6fd357caea..834c8dad1a 100644 --- a/recipes/roger_ebert.recipe +++ b/recipes/roger_ebert.recipe @@ -1,5 +1,4 @@ import re -import urllib2 from calibre.web.feeds.news import BasicNewsRecipe @@ -54,7 +53,7 @@ class Ebert(BasicNewsRecipe): self.report_progress(0, _('Fetching feed') + ' %s...' % (feedtitle if feedtitle else feedurl)) articles = [] - page = urllib2.urlopen(feedurl).read() + page = self.index_to_soup(feedurl, raw=True) if feedtitle == 'Reviews' or feedtitle == 'Great Movies': pattern = self.patternReviews diff --git a/recipes/roger_ebert_blog.recipe b/recipes/roger_ebert_blog.recipe index 3a50f91e77..7dae6f5d93 100644 --- a/recipes/roger_ebert_blog.recipe +++ b/recipes/roger_ebert_blog.recipe @@ -1,5 +1,4 @@ import re -import urllib2 import time from calibre.web.feeds.news import BasicNewsRecipe from calibre import strftime @@ -68,7 +67,7 @@ class Ebert(BasicNewsRecipe): self.report_progress(0, _('Fetching feed') + ' %s...' % (feedtitle if feedtitle else feedurl)) articles = [] - page = urllib2.urlopen(feedurl).read() + page = self.index_to_soup(feedurl, raw=True) if feedtitle == 'Reviews' or feedtitle == 'Great Movies': pattern = self.patternReviews diff --git a/recipes/sunday_times_magazine.recipe b/recipes/sunday_times_magazine.recipe index bf19e25c0e..b7bebff615 100644 --- a/recipes/sunday_times_magazine.recipe +++ b/recipes/sunday_times_magazine.recipe @@ -3,8 +3,11 @@ __copyright__ = '2010-2013, Darko Miletic ' ''' www.thetimes.co.uk/magazine/the-sunday-times-magazine/ ''' -import urllib from calibre.web.feeds.news import BasicNewsRecipe +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode def classes(classes): @@ -49,7 +52,7 @@ class TimesOnline(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open('http://www.thetimes.co.uk/') if self.username is not None and self.password is not None: - data = urllib.urlencode({ + data = urlencode({ 'gotoUrl': self.INDEX, 'username': self.username, 'password': self.password}) diff --git a/recipes/taz.recipe b/recipes/taz.recipe index cda4168beb..a4d97b067c 100644 --- a/recipes/taz.recipe +++ b/recipes/taz.recipe @@ -9,10 +9,14 @@ __docformat__ = 'restructuredtext de' www.taz.de/digiabo ''' import os -import urllib2 import zipfile from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile +try: + from urllib.request import HTTPBasicAuthHandler, build_opener, install_opener, urlopen + from urllib.error import HTTPError +except ImportError: + from urllib2 import HTTPBasicAuthHandler, build_opener, install_opener, urlopen, HTTPError class TazDigiabo(BasicNewsRecipe): @@ -34,17 +38,17 @@ class TazDigiabo(BasicNewsRecipe): url = domain + "/epub/" - auth_handler = urllib2.HTTPBasicAuthHandler() + auth_handler = HTTPBasicAuthHandler() auth_handler.add_password(realm='TAZ-ABO', uri=url, user=self.username, passwd=self.password) - opener = urllib2.build_opener(auth_handler) - urllib2.install_opener(opener) + opener = build_opener(auth_handler) + install_opener(opener) try: - f = urllib2.urlopen(url) - except urllib2.HTTPError: + f = urlopen(url) + except HTTPError: self.report_progress(0, _('Can\'t login to download issue')) raise ValueError('Failed to login, check your username and' ' password') diff --git a/recipes/thenewcriterion.recipe b/recipes/thenewcriterion.recipe index 9bb281aa4f..fa08a1d2b3 100644 --- a/recipes/thenewcriterion.recipe +++ b/recipes/thenewcriterion.recipe @@ -9,9 +9,12 @@ __copyright__ = '2019, Darko Miletic ' www.newcriterion.com ''' -import urllib -import urllib2 +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode import re +from mechanize import Request from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -52,7 +55,7 @@ class TheNewCriterion(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open('https://www.newcriterion.com/') if self.username is not None and self.password is not None: - data = urllib.urlencode({'login': self.username, 'password': self.password}) + data = urlencode({'login': self.username, 'password': self.password}) header = { 'X-OCTOBER-REQUEST-HANDLER': 'onSignin', 'X-Requested-With': 'XMLHttpRequest', @@ -60,7 +63,7 @@ class TheNewCriterion(BasicNewsRecipe): 'X-OCTOBER-REQUEST-PARTIALS':'', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } - request = urllib2.Request('https://www.newcriterion.com/', data, header) + request = Request('https://www.newcriterion.com/', data, header) br.open(request) return br diff --git a/recipes/times_online.recipe b/recipes/times_online.recipe index 017b57f66b..720f5ac9dc 100644 --- a/recipes/times_online.recipe +++ b/recipes/times_online.recipe @@ -3,8 +3,11 @@ __copyright__ = '2010-2017, Bobby Steel , Darko Miletic' ''' www.thetimes.co.uk ''' -import urllib import html5lib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from lxml import html from calibre.web.feeds.news import BasicNewsRecipe @@ -79,7 +82,7 @@ class TimesOnline(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open('http://www.thetimes.co.uk/') if self.username is not None and self.password is not None: - data = urllib.urlencode({ + data = urlencode({ 'gotoUrl': self.INDEX, 'username': self.username, 'password': self.password}) diff --git a/recipes/tomshardware.recipe b/recipes/tomshardware.recipe index 27b33d8988..f8e4ef959e 100644 --- a/recipes/tomshardware.recipe +++ b/recipes/tomshardware.recipe @@ -4,7 +4,10 @@ __copyright__ = '2008-2013, Darko Miletic ' tomshardware.com/us ''' -import urllib +try: + from urllib.parse import urlencode +except ImportError: + from urllib import urlencode from calibre.web.feeds.recipes import BasicNewsRecipe @@ -30,7 +33,7 @@ class Tomshardware(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open(self.INDEX + '/us/') if self.username is not None and self.password is not None: - data = urllib.urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password + data = urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password }) br.open(self.LOGIN, data) return br diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index 8cf03260b7..3b2b9e503b 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -5,8 +5,11 @@ from __future__ import absolute_import, division, print_function, unicode_literals import json -from urllib import quote +try: + from urllib.parse import quote +except ImportError: + from urllib import quote from mechanize import Request from calibre import random_user_agent diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe index 831c483c41..b140dc072d 100644 --- a/recipes/wsj_free.recipe +++ b/recipes/wsj_free.recipe @@ -5,7 +5,10 @@ from __future__ import absolute_import, division, print_function, unicode_literals import json -from urllib import quote +try: + from urllib.parse import quote +except ImportError: + from urllib import quote from mechanize import Request