py3: port use of urllib in recipes

This commit is contained in:
Kovid Goyal 2019-04-01 13:57:21 +05:30
parent 930376c036
commit 569c576293
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
36 changed files with 178 additions and 119 deletions

View File

@ -11,7 +11,10 @@ Change Log:
2013/03/30 -- first version
'''
import urllib
try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
from calibre.web.feeds.recipes import BasicNewsRecipe
@ -59,7 +62,7 @@ class AM730(BasicNewsRecipe):
continue # not in same section
title = href.split('/')[-1].split('-')[0]
title = urllib.unquote(title.encode('ASCII')) # .decode('utf-8')
title = unquote(title.encode('ASCII')) # .decode('utf-8')
if self.debug:
print(title)
try:

View File

@ -9,7 +9,10 @@ http://www.ambito.com/diario/
'''
import time
import urllib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -66,7 +69,7 @@ class Ambito_Financiero(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open(self.INDEX)
if self.username is not None and self.password is not None:
postdata = urllib.urlencode({
postdata = urlencode({
'txtUser': self.username,
'txtPassword': self.password
})
@ -110,7 +113,7 @@ class Ambito_Financiero(BasicNewsRecipe):
if self.session_id:
l, s, r = url.rpartition('.html')
o, s1, artid = l.rpartition('_')
postdata = urllib.urlencode({'id': artid, 'id_session': self.session_id})
postdata = urlencode({'id': artid, 'id_session': self.session_id})
response = self.browser.open(
'http://data.ambito.com/diario/cuerpo_noticia.asp',
data=postdata,
@ -128,7 +131,7 @@ class Ambito_Financiero(BasicNewsRecipe):
def cleanup(self):
if self.session_id is not None:
postdata = urllib.urlencode({'session_id': self.session_id})
postdata = urlencode({'session_id': self.session_id})
self.browser.open(
'http://www.ambito.com/diario/no-cache/login/x_logout.asp', data=postdata, timeout=self.timeout
)

View File

@ -4,7 +4,10 @@ __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
azstarnet.com
'''
import urllib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from calibre.web.feeds.news import BasicNewsRecipe
@ -31,7 +34,7 @@ class Azstarnet(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open('http://azstarnet.com/')
if self.username is not None and self.password is not None:
data = urllib.urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
data = urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
})
br.open('http://azstarnet.com/app/registration/proxy.php', data)
return br

View File

@ -7,6 +7,9 @@ from __future__ import (unicode_literals, division, absolute_import,
import json
from mechanize import Request
try:
from urllib.parse import quote
except ImportError:
from urllib import quote
from calibre.web.feeds.news import BasicNewsRecipe
@ -26,7 +29,7 @@ class Barrons(BasicNewsRecipe):
timefmt = ' [%a, %b %d, %Y]'
use_embedded_content = False
no_stylesheets = True
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
match_regexps = ['http://online.barrons.com/.*?html\\?mod=.*?|file:.*']
conversion_options = {'linearize_tables': True}
# Don't grab articles more than 7 days old

View File

@ -6,7 +6,10 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
www.brecha.com.uy
'''
import urllib
try:
from urllib.parse import urlencode, quote
except ImportError:
from urllib import urlencode, quote
from calibre.web.feeds.news import BasicNewsRecipe
@ -40,7 +43,7 @@ class Brecha(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open('http://www.brecha.com.uy/index.php/acceder-miembros')
if self.username is not None and self.password is not None:
data = urllib.urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
})
br.open(
'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data)
@ -74,5 +77,5 @@ class Brecha(BasicNewsRecipe):
soup = self.index_to_soup('http://www.brecha.com.uy/index.php')
for image in soup.findAll('img', alt=True):
if image['alt'].startswith('Tapa '):
return 'http://www.brecha.com.uy' + urllib.quote(image['src'])
return 'http://www.brecha.com.uy' + quote(image['src'])
return None

View File

@ -9,7 +9,10 @@ __copyright__ = '2008-2016, Darko Miletic <darko.miletic at gmail.com>'
clarin.com
'''
import urllib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -76,7 +79,7 @@ class Clarin(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open(self.INDEX)
if self.username is not None and self.password is not None:
data = urllib.urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm',
data = urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm',
'ingresar_ingresar_email_paseInputComponent': self.username,
'ingresar_ingresar_palabraClave_paseInputComponent': self.password,
'ingresar_ingresar_ingresar_paseButton': 'Ingresar',

View File

@ -1,4 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe
try:
from urllib.parse import quote
except ImportError:
from urllib import quote

View File

@ -1,9 +1,8 @@
from __future__ import print_function
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag, BeautifulSoup
from calibre.ebooks.BeautifulSoup import Tag
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
def new_tag(soup, name, attrs=()):
@ -113,20 +112,14 @@ class Estadao(BasicNewsRecipe):
def get_cover_url(self):
if self.THUMBALIZR_API:
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; ' +
self.LANGHTM + '; userid=' + self.THUMBALIZR_API + ') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset', self.ENCHTM)
pedido.add_header('Referer', self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
soup = self.index_to_soup(cover_url)
cover_item = soup.find('body')
if cover_item:
cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
'&url=' + self.SCREENSHOT + '&width=600&quality=90'
return cover_url
except URLError:
except Exception:
cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
'&url=' + self.SCREENSHOT + '&width=600&quality=90'
return cover_url

View File

@ -5,8 +5,11 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.folha.uol.com.br
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
try:
from urllib.parse import quote_plus
except ImportError:
from urllib import quote_plus
class Folha_de_s_paulo(BasicNewsRecipe):
@ -69,7 +72,7 @@ class Folha_de_s_paulo(BasicNewsRecipe):
return curl
def print_version(self, url):
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + quote_plus(url)
def get_cover_url(self):
soup = self.index_to_soup('http://www.folha.uol.com.br/')

View File

@ -4,8 +4,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import re
import shutil
import urllib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe
@ -46,7 +49,7 @@ class AdvancedUserRecipe1515196393(BasicNewsRecipe):
self.log('\t\tdata-parent-id', parent_id)
self.log('\t\tdata-cat-id', cat_id)
self.log('\t\tdata-post-id', post_id)
data = urllib.urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id})
data = urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id})
r=br.open('http://www.galaxysedge.com/wp-content/themes/galaxyedge/get_content.php', data)
content_file = PersistentTemporaryFile(suffix='.html', dir=self.ctdir)
content_file.write(r.read())

View File

@ -4,8 +4,11 @@ __copyright__ = '2010-2015, Darko Miletic <darko.miletic at gmail.com>'
www.haaretz.com
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
class Haaretz_en(BasicNewsRecipe):
@ -62,7 +65,7 @@ class Haaretz_en(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open(self.PREFIX)
if self.username is not None and self.password is not None:
data = urllib.urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password # noqa
data = urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password # noqa
})
br.open('https://sso.haaretz.com/sso/sso/signIn', data)
return br

View File

@ -15,7 +15,10 @@ anything in username/password fields
import time
import re
import urllib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -61,7 +64,7 @@ class Harpers_full(BasicNewsRecipe):
br.open('https://harpers.org/')
if self.username is not None and self.password is not None:
tt = time.localtime() * 1000
data = urllib.urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt
data = urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt
})
br.open(self.LOGIN, data)
return br

View File

@ -5,8 +5,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
from css_selectors import Select
from mechanize import Request
from urllib import urlencode
import json
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
class HBR(BasicNewsRecipe):

View File

@ -8,8 +8,6 @@ chron.com
'''
import re
import time
import urllib2
import io
from datetime import datetime
import traceback
import sys
@ -19,8 +17,6 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.utils.cleantext import clean_ascii_chars
from calibre.ebooks.BeautifulSoup import NavigableString
from calibre.utils.date import dt_factory, local_tz
from lxml import html
from lxml import etree
regex_date_only = re.compile(r"""(?:January|February|March|April|
{8}May|June|July|August|September|October|November|
@ -62,12 +58,8 @@ def validate_link(page, link, title):
return link, title
def get_article_parsed(this_url):
page = urllib2.urlopen(this_url)
content = page.read()
parser = etree.HTMLParser()
parsed = html.parse(io.BytesIO(bytes(content)), parser)
return parsed
def get_article_parsed(index_to_soup, this_url):
return index_to_soup(this_url, as_tree=True)
def sort_subject(element_list):
@ -91,8 +83,8 @@ def sort_subject(element_list):
return combined_list
def get_links_from_section_page(page):
page_doc = get_article_parsed(base_url + page[1][0])
def get_links_from_section_page(index_to_soup, page):
page_doc = get_article_parsed(index_to_soup, base_url + page[1][0])
els = page_doc.xpath(xpath_general)
element_list = []
for el in els:
@ -110,13 +102,13 @@ def get_links_from_section_page(page):
return [page[0], sorted_element_list]
def get_all_links_from_sections():
def get_all_links_from_sections(index_to_soup):
all_sections = []
article_set = set()
final_dict = OrderedDict()
for item in pages.items():
print("getting links from {0}".format(item[0]))
all_sections.append(get_links_from_section_page(item))
all_sections.append(get_links_from_section_page(index_to_soup, item))
for section in all_sections:
section_id = section[0]
article_list = section[1]
@ -232,7 +224,7 @@ class HoustonChronicle(BasicNewsRecipe):
self.timefmt = ' [%a, %d %b, %Y]'
self.log('starting parse_index: ', time.strftime(self.timestampfmt))
feeds = []
sections = get_all_links_from_sections()
sections = get_all_links_from_sections(self.index_to_soup)
for section_id, article_list in sections.items():
self.log("Getting {0} section, {1:d} articles".format(section_id, len(article_list)))
articles = []

View File

@ -7,6 +7,9 @@ import urlparse, re
import json
from uuid import uuid4
from mechanize import Request
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -1,4 +1,7 @@
import urllib2
try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen
import re
from calibre.web.feeds.news import BasicNewsRecipe
@ -39,7 +42,7 @@ class JBPress(BasicNewsRecipe):
return br
def print_version(self, url):
url = urllib2.urlopen(url).geturl() # resolve redirect.
url = urlopen(url).geturl() # resolve redirect.
return url.replace('/-/', '/print/')
def preprocess_html(self, soup):

View File

@ -5,6 +5,9 @@ www.jornada.unam.mx
'''
import re
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from urlparse import urlparse, urlunparse, parse_qs
from calibre import strftime

View File

@ -5,7 +5,10 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
lanacion.cl
'''
import urllib
try:
from urllib.parse import quote
except ImportError:
from urllib import quote
from calibre.web.feeds.news import BasicNewsRecipe
@ -36,7 +39,7 @@ class LaNacionChile(BasicNewsRecipe):
feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
def print_version(self, url):
toprint = urllib.quote(url, ':/')
toprint = quote(url, ':/')
return u'http://www.lanacion.cl/cgi-bx/imprimir.cgi?_URL=' + toprint
def preprocess_html(self, soup):

View File

@ -7,7 +7,6 @@ Lemonde.fr: Version abonnée
'''
import os, zipfile, re, time
from urllib2 import HTTPError
from calibre.constants import preferred_encoding
from calibre.web.feeds.news import BasicNewsRecipe
@ -97,7 +96,7 @@ class LeMondeAbonne(BasicNewsRecipe):
try:
response = browser.open(url)
continue
except HTTPError:
except Exception:
second -= 24 * 60 * 60
tmp = PersistentTemporaryFile(suffix='.zip')

View File

@ -4,7 +4,10 @@ __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
mondediplo.com
'''
import urllib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -43,7 +46,7 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open(self.LOGIN)
if self.username is not None and self.password is not None:
data = urllib.urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter'
data = urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter'
})
br.open(self.LOGIN, data)
return br

View File

@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.constants import config_dir, CONFIG_DIR_MODE
import os
import os.path
import urllib
try:
from urllib.parse import quote
except ImportError:
from urllib import quote
from hashlib import md5
@ -62,7 +65,7 @@ class ModorosBlogHu(BasicNewsRecipe):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='')
feed_hash = quote(feed.title.encode('utf-8'), safe='')
feed_fn = os.path.join(feed_dir, feed_hash)
past_items = set()

View File

@ -6,7 +6,6 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Starson17'
import os
import urllib2
import zipfile
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
@ -25,10 +24,10 @@ class NowToronto(BasicNewsRecipe):
epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
soup = self.index_to_soup(epub_feed)
url = soup.find(name='feedburner:origlink').string
f = urllib2.urlopen(url)
raw = self.index_to_soup(url, raw=True)
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0, _('downloading epub'))
tmp.write(f.read())
tmp.write(raw)
tmp.close()
zfile = zipfile.ZipFile(tmp.name, 'r')
self.report_progress(0, _('extracting epub'))

View File

@ -4,7 +4,11 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
www.nursingtimes.net
'''
import urllib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from calibre.web.feeds.recipes import BasicNewsRecipe
@ -31,7 +35,7 @@ class NursingTimes(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open(self.LOGIN)
if self.username is not None and self.password is not None:
data = urllib.urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13' # noqa
data = urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13' # noqa
})
br.open(self.LOGIN, data)
return br

View File

@ -3,7 +3,6 @@
from __future__ import unicode_literals, division, absolute_import, print_function
import time
import json
import urllib
from pprint import pprint
from calibre.web.feeds.news import BasicNewsRecipe
@ -87,9 +86,7 @@ class OrangeCountyRegister(BasicNewsRecipe):
return cleanedHTML
def loadURL(self, url):
socket = urllib.urlopen(url)
rawHTML = socket.read()
return rawHTML
return self.index_to_soup(url, raw=True)
def htmlToAttribsDict(self, rawHTML):
tokenStart = 'dataLayer.push({'

View File

@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.constants import config_dir, CONFIG_DIR_MODE
import os
import os.path
import urllib
try:
from urllib.parse import quote
except ImportError:
from urllib import quote
from hashlib import md5
@ -86,7 +89,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='')
feed_hash = quote(feed.title.encode('utf-8'), safe='')
feed_fn = os.path.join(feed_dir, feed_hash)
past_items = set()

View File

@ -1,5 +1,8 @@
import urllib
import re
try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
from calibre.web.feeds.news import BasicNewsRecipe
@ -37,7 +40,7 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe):
ans = None
try:
s = article.summary
ans = urllib.unquote(
ans = unquote(
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
except:
pass

View File

@ -8,8 +8,14 @@ import json
import operator
import re
import tempfile
import urllib
import urllib2
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
try:
from urllib.error import HTTPError, URLError
except ImportError:
from urllib2 import HTTPError, URLError
__license__ = 'GPL v3'
@ -99,18 +105,8 @@ class Pocket(BasicNewsRecipe):
self.get_auth_uri(),
self.get_pull_articles_uri()
)
try:
request = urllib2.Request(fetch_url)
response = urllib2.urlopen(request)
pocket_feed = json.load(response)['list']
except urllib2.HTTPError as e:
self.log.exception(
"Pocket returned an error: {0}".format(e.info()))
return []
except urllib2.URLError as e:
self.log.exception(
"Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
return []
data = self.index_to_soup(fetch_url, raw=True)
pocket_feed = json.loads(data)['list']
if len(pocket_feed) < self.minimum_articles:
self.mark_as_read_after_dl = False
@ -143,10 +139,10 @@ class Pocket(BasicNewsRecipe):
fc_tag = soup.find('script', text=re.compile("formCheck"))
fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
article_id = url.split("/")[-1]
data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id})
data = urlencode({'itemId': article_id, 'formCheck': fc_id})
try:
response = self.browser.open(ajax_url, data)
except urllib2.HTTPError as e:
except HTTPError as e:
self.log.exception("unable to get textview {0}".format(e.info()))
raise e
return json.load(response)['article']
@ -186,13 +182,12 @@ class Pocket(BasicNewsRecipe):
self.get_auth_uri()
)
try:
request = urllib2.Request(mark_read_url)
urllib2.urlopen(request)
except urllib2.HTTPError as e:
self.browser.open_novisit(mark_read_url)
except HTTPError as e:
self.log.exception(
'Pocket returned an error while archiving articles: {0}'.format(e))
return []
except urllib2.URLError as e:
except URLError as e:
self.log.exception(
"Unable to connect to getpocket.com's modify api: {0}".format(e))
return []

View File

@ -1,5 +1,4 @@
import re
import urllib2
from calibre.web.feeds.news import BasicNewsRecipe
@ -54,7 +53,7 @@ class Ebert(BasicNewsRecipe):
self.report_progress(0, _('Fetching feed') + ' %s...' %
(feedtitle if feedtitle else feedurl))
articles = []
page = urllib2.urlopen(feedurl).read()
page = self.index_to_soup(feedurl, raw=True)
if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
pattern = self.patternReviews

View File

@ -1,5 +1,4 @@
import re
import urllib2
import time
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime
@ -68,7 +67,7 @@ class Ebert(BasicNewsRecipe):
self.report_progress(0, _('Fetching feed') + ' %s...' %
(feedtitle if feedtitle else feedurl))
articles = []
page = urllib2.urlopen(feedurl).read()
page = self.index_to_soup(feedurl, raw=True)
if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
pattern = self.patternReviews

View File

@ -3,8 +3,11 @@ __copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
'''
www.thetimes.co.uk/magazine/the-sunday-times-magazine/
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
def classes(classes):
@ -49,7 +52,7 @@ class TimesOnline(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open('http://www.thetimes.co.uk/')
if self.username is not None and self.password is not None:
data = urllib.urlencode({
data = urlencode({
'gotoUrl': self.INDEX,
'username': self.username,
'password': self.password})

View File

@ -9,10 +9,14 @@ __docformat__ = 'restructuredtext de'
www.taz.de/digiabo
'''
import os
import urllib2
import zipfile
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
try:
from urllib.request import HTTPBasicAuthHandler, build_opener, install_opener, urlopen
from urllib.error import HTTPError
except ImportError:
from urllib2 import HTTPBasicAuthHandler, build_opener, install_opener, urlopen, HTTPError
class TazDigiabo(BasicNewsRecipe):
@ -34,17 +38,17 @@ class TazDigiabo(BasicNewsRecipe):
url = domain + "/epub/"
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler = HTTPBasicAuthHandler()
auth_handler.add_password(realm='TAZ-ABO',
uri=url,
user=self.username,
passwd=self.password)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
opener = build_opener(auth_handler)
install_opener(opener)
try:
f = urllib2.urlopen(url)
except urllib2.HTTPError:
f = urlopen(url)
except HTTPError:
self.report_progress(0, _('Can\'t login to download issue'))
raise ValueError('Failed to login, check your username and'
' password')

View File

@ -9,9 +9,12 @@ __copyright__ = '2019, Darko Miletic <darko.miletic at gmail.com>'
www.newcriterion.com
'''
import urllib
import urllib2
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
import re
from mechanize import Request
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
@ -52,7 +55,7 @@ class TheNewCriterion(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open('https://www.newcriterion.com/')
if self.username is not None and self.password is not None:
data = urllib.urlencode({'login': self.username, 'password': self.password})
data = urlencode({'login': self.username, 'password': self.password})
header = {
'X-OCTOBER-REQUEST-HANDLER': 'onSignin',
'X-Requested-With': 'XMLHttpRequest',
@ -60,7 +63,7 @@ class TheNewCriterion(BasicNewsRecipe):
'X-OCTOBER-REQUEST-PARTIALS':'',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
request = urllib2.Request('https://www.newcriterion.com/', data, header)
request = Request('https://www.newcriterion.com/', data, header)
br.open(request)
return br

View File

@ -3,8 +3,11 @@ __copyright__ = '2010-2017, Bobby Steel <bob at xdca.com>, Darko Miletic'
'''
www.thetimes.co.uk
'''
import urllib
import html5lib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from lxml import html
from calibre.web.feeds.news import BasicNewsRecipe
@ -79,7 +82,7 @@ class TimesOnline(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open('http://www.thetimes.co.uk/')
if self.username is not None and self.password is not None:
data = urllib.urlencode({
data = urlencode({
'gotoUrl': self.INDEX,
'username': self.username,
'password': self.password})

View File

@ -4,7 +4,10 @@ __copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
tomshardware.com/us
'''
import urllib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from calibre.web.feeds.recipes import BasicNewsRecipe
@ -30,7 +33,7 @@ class Tomshardware(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self)
br.open(self.INDEX + '/us/')
if self.username is not None and self.password is not None:
data = urllib.urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
data = urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
})
br.open(self.LOGIN, data)
return br

View File

@ -5,8 +5,11 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import json
from urllib import quote
try:
from urllib.parse import quote
except ImportError:
from urllib import quote
from mechanize import Request
from calibre import random_user_agent

View File

@ -5,6 +5,9 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import json
try:
from urllib.parse import quote
except ImportError:
from urllib import quote
from mechanize import Request