mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: port use of urllib in recipes
This commit is contained in:
parent
930376c036
commit
569c576293
@ -11,7 +11,10 @@ Change Log:
|
||||
2013/03/30 -- first version
|
||||
'''
|
||||
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import unquote
|
||||
except ImportError:
|
||||
from urllib import unquote
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
@ -59,7 +62,7 @@ class AM730(BasicNewsRecipe):
|
||||
continue # not in same section
|
||||
|
||||
title = href.split('/')[-1].split('-')[0]
|
||||
title = urllib.unquote(title.encode('ASCII')) # .decode('utf-8')
|
||||
title = unquote(title.encode('ASCII')) # .decode('utf-8')
|
||||
if self.debug:
|
||||
print(title)
|
||||
try:
|
||||
|
@ -9,7 +9,10 @@ http://www.ambito.com/diario/
|
||||
'''
|
||||
|
||||
import time
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
import re
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -66,7 +69,7 @@ class Ambito_Financiero(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
postdata = urllib.urlencode({
|
||||
postdata = urlencode({
|
||||
'txtUser': self.username,
|
||||
'txtPassword': self.password
|
||||
})
|
||||
@ -110,7 +113,7 @@ class Ambito_Financiero(BasicNewsRecipe):
|
||||
if self.session_id:
|
||||
l, s, r = url.rpartition('.html')
|
||||
o, s1, artid = l.rpartition('_')
|
||||
postdata = urllib.urlencode({'id': artid, 'id_session': self.session_id})
|
||||
postdata = urlencode({'id': artid, 'id_session': self.session_id})
|
||||
response = self.browser.open(
|
||||
'http://data.ambito.com/diario/cuerpo_noticia.asp',
|
||||
data=postdata,
|
||||
@ -128,7 +131,7 @@ class Ambito_Financiero(BasicNewsRecipe):
|
||||
|
||||
def cleanup(self):
|
||||
if self.session_id is not None:
|
||||
postdata = urllib.urlencode({'session_id': self.session_id})
|
||||
postdata = urlencode({'session_id': self.session_id})
|
||||
self.browser.open(
|
||||
'http://www.ambito.com/diario/no-cache/login/x_logout.asp', data=postdata, timeout=self.timeout
|
||||
)
|
||||
|
@ -4,7 +4,10 @@ __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
azstarnet.com
|
||||
'''
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -31,7 +34,7 @@ class Azstarnet(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://azstarnet.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
|
||||
data = urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
|
||||
})
|
||||
br.open('http://azstarnet.com/app/registration/proxy.php', data)
|
||||
return br
|
||||
|
@ -7,7 +7,10 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
|
||||
import json
|
||||
from mechanize import Request
|
||||
from urllib import quote
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -26,7 +29,7 @@ class Barrons(BasicNewsRecipe):
|
||||
timefmt = ' [%a, %b %d, %Y]'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
|
||||
match_regexps = ['http://online.barrons.com/.*?html\\?mod=.*?|file:.*']
|
||||
conversion_options = {'linearize_tables': True}
|
||||
|
||||
# Don't grab articles more than 7 days old
|
||||
|
@ -6,7 +6,10 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
www.brecha.com.uy
|
||||
'''
|
||||
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import urlencode, quote
|
||||
except ImportError:
|
||||
from urllib import urlencode, quote
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -40,7 +43,7 @@ class Brecha(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://www.brecha.com.uy/index.php/acceder-miembros')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
|
||||
data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
|
||||
})
|
||||
br.open(
|
||||
'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data)
|
||||
@ -74,5 +77,5 @@ class Brecha(BasicNewsRecipe):
|
||||
soup = self.index_to_soup('http://www.brecha.com.uy/index.php')
|
||||
for image in soup.findAll('img', alt=True):
|
||||
if image['alt'].startswith('Tapa '):
|
||||
return 'http://www.brecha.com.uy' + urllib.quote(image['src'])
|
||||
return 'http://www.brecha.com.uy' + quote(image['src'])
|
||||
return None
|
||||
|
@ -9,7 +9,10 @@ __copyright__ = '2008-2016, Darko Miletic <darko.miletic at gmail.com>'
|
||||
clarin.com
|
||||
'''
|
||||
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -76,7 +79,7 @@ class Clarin(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm',
|
||||
data = urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm',
|
||||
'ingresar_ingresar_email_paseInputComponent': self.username,
|
||||
'ingresar_ingresar_palabraClave_paseInputComponent': self.password,
|
||||
'ingresar_ingresar_ingresar_paseButton': 'Ingresar',
|
||||
|
@ -1,5 +1,8 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from urllib import quote
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
|
||||
|
||||
class EconomiaMagazine(BasicNewsRecipe):
|
||||
|
@ -1,9 +1,8 @@
|
||||
from __future__ import print_function
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from datetime import datetime, timedelta
|
||||
from calibre.ebooks.BeautifulSoup import Tag, BeautifulSoup
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.utils.magick import Image, PixelWand
|
||||
from urllib2 import Request, urlopen, URLError
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
@ -113,20 +112,14 @@ class Estadao(BasicNewsRecipe):
|
||||
def get_cover_url(self):
|
||||
if self.THUMBALIZR_API:
|
||||
cover_url = self.CAPA
|
||||
pedido = Request(self.CAPA)
|
||||
pedido.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; ' +
|
||||
self.LANGHTM + '; userid=' + self.THUMBALIZR_API + ') Calibre/0.8.47 (like Gecko)')
|
||||
pedido.add_header('Accept-Charset', self.ENCHTM)
|
||||
pedido.add_header('Referer', self.SCREENSHOT)
|
||||
try:
|
||||
resposta = urlopen(pedido)
|
||||
soup = BeautifulSoup(resposta)
|
||||
soup = self.index_to_soup(cover_url)
|
||||
cover_item = soup.find('body')
|
||||
if cover_item:
|
||||
cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
|
||||
'&url=' + self.SCREENSHOT + '&width=600&quality=90'
|
||||
return cover_url
|
||||
except URLError:
|
||||
except Exception:
|
||||
cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
|
||||
'&url=' + self.SCREENSHOT + '&width=600&quality=90'
|
||||
return cover_url
|
||||
|
@ -5,8 +5,11 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.folha.uol.com.br
|
||||
'''
|
||||
import urllib
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
try:
|
||||
from urllib.parse import quote_plus
|
||||
except ImportError:
|
||||
from urllib import quote_plus
|
||||
|
||||
|
||||
class Folha_de_s_paulo(BasicNewsRecipe):
|
||||
@ -69,7 +72,7 @@ class Folha_de_s_paulo(BasicNewsRecipe):
|
||||
return curl
|
||||
|
||||
def print_version(self, url):
|
||||
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
|
||||
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + quote_plus(url)
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.folha.uol.com.br/')
|
||||
|
@ -4,8 +4,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import urllib
|
||||
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -46,7 +49,7 @@ class AdvancedUserRecipe1515196393(BasicNewsRecipe):
|
||||
self.log('\t\tdata-parent-id', parent_id)
|
||||
self.log('\t\tdata-cat-id', cat_id)
|
||||
self.log('\t\tdata-post-id', post_id)
|
||||
data = urllib.urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id})
|
||||
data = urlencode({'action':'get_content', 'cat_id':cat_id, 'parent_id':parent_id, 'post_id':post_id})
|
||||
r=br.open('http://www.galaxysedge.com/wp-content/themes/galaxyedge/get_content.php', data)
|
||||
content_file = PersistentTemporaryFile(suffix='.html', dir=self.ctdir)
|
||||
content_file.write(r.read())
|
||||
|
@ -4,8 +4,11 @@ __copyright__ = '2010-2015, Darko Miletic <darko.miletic at gmail.com>'
|
||||
www.haaretz.com
|
||||
'''
|
||||
|
||||
import urllib
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
|
||||
|
||||
class Haaretz_en(BasicNewsRecipe):
|
||||
@ -62,7 +65,7 @@ class Haaretz_en(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.PREFIX)
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password # noqa
|
||||
data = urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password # noqa
|
||||
})
|
||||
br.open('https://sso.haaretz.com/sso/sso/signIn', data)
|
||||
return br
|
||||
|
@ -15,7 +15,10 @@ anything in username/password fields
|
||||
|
||||
import time
|
||||
import re
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -61,7 +64,7 @@ class Harpers_full(BasicNewsRecipe):
|
||||
br.open('https://harpers.org/')
|
||||
if self.username is not None and self.password is not None:
|
||||
tt = time.localtime() * 1000
|
||||
data = urllib.urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt
|
||||
data = urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt
|
||||
})
|
||||
br.open(self.LOGIN, data)
|
||||
return br
|
||||
|
@ -5,8 +5,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from css_selectors import Select
|
||||
from mechanize import Request
|
||||
from urllib import urlencode
|
||||
import json
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
|
||||
|
||||
class HBR(BasicNewsRecipe):
|
||||
|
@ -8,8 +8,6 @@ chron.com
|
||||
'''
|
||||
import re
|
||||
import time
|
||||
import urllib2
|
||||
import io
|
||||
from datetime import datetime
|
||||
import traceback
|
||||
import sys
|
||||
@ -19,8 +17,6 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.ebooks.BeautifulSoup import NavigableString
|
||||
from calibre.utils.date import dt_factory, local_tz
|
||||
from lxml import html
|
||||
from lxml import etree
|
||||
|
||||
regex_date_only = re.compile(r"""(?:January|February|March|April|
|
||||
{8}May|June|July|August|September|October|November|
|
||||
@ -62,12 +58,8 @@ def validate_link(page, link, title):
|
||||
return link, title
|
||||
|
||||
|
||||
def get_article_parsed(this_url):
|
||||
page = urllib2.urlopen(this_url)
|
||||
content = page.read()
|
||||
parser = etree.HTMLParser()
|
||||
parsed = html.parse(io.BytesIO(bytes(content)), parser)
|
||||
return parsed
|
||||
def get_article_parsed(index_to_soup, this_url):
|
||||
return index_to_soup(this_url, as_tree=True)
|
||||
|
||||
|
||||
def sort_subject(element_list):
|
||||
@ -91,8 +83,8 @@ def sort_subject(element_list):
|
||||
return combined_list
|
||||
|
||||
|
||||
def get_links_from_section_page(page):
|
||||
page_doc = get_article_parsed(base_url + page[1][0])
|
||||
def get_links_from_section_page(index_to_soup, page):
|
||||
page_doc = get_article_parsed(index_to_soup, base_url + page[1][0])
|
||||
els = page_doc.xpath(xpath_general)
|
||||
element_list = []
|
||||
for el in els:
|
||||
@ -110,13 +102,13 @@ def get_links_from_section_page(page):
|
||||
return [page[0], sorted_element_list]
|
||||
|
||||
|
||||
def get_all_links_from_sections():
|
||||
def get_all_links_from_sections(index_to_soup):
|
||||
all_sections = []
|
||||
article_set = set()
|
||||
final_dict = OrderedDict()
|
||||
for item in pages.items():
|
||||
print("getting links from {0}".format(item[0]))
|
||||
all_sections.append(get_links_from_section_page(item))
|
||||
all_sections.append(get_links_from_section_page(index_to_soup, item))
|
||||
for section in all_sections:
|
||||
section_id = section[0]
|
||||
article_list = section[1]
|
||||
@ -232,7 +224,7 @@ class HoustonChronicle(BasicNewsRecipe):
|
||||
self.timefmt = ' [%a, %d %b, %Y]'
|
||||
self.log('starting parse_index: ', time.strftime(self.timestampfmt))
|
||||
feeds = []
|
||||
sections = get_all_links_from_sections()
|
||||
sections = get_all_links_from_sections(self.index_to_soup)
|
||||
for section_id, article_list in sections.items():
|
||||
self.log("Getting {0} section, {1:d} articles".format(section_id, len(article_list)))
|
||||
articles = []
|
||||
|
@ -7,7 +7,10 @@ import urlparse, re
|
||||
import json
|
||||
from uuid import uuid4
|
||||
from mechanize import Request
|
||||
from urllib import urlencode
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
@ -1,4 +1,7 @@
|
||||
import urllib2
|
||||
try:
|
||||
from urllib.request import urlopen
|
||||
except ImportError:
|
||||
from urllib2 import urlopen
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -39,7 +42,7 @@ class JBPress(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
def print_version(self, url):
|
||||
url = urllib2.urlopen(url).geturl() # resolve redirect.
|
||||
url = urlopen(url).geturl() # resolve redirect.
|
||||
return url.replace('/-/', '/print/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -5,7 +5,10 @@ www.jornada.unam.mx
|
||||
'''
|
||||
|
||||
import re
|
||||
from urllib import urlencode
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
from urlparse import urlparse, urlunparse, parse_qs
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
@ -5,7 +5,10 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lanacion.cl
|
||||
'''
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -36,7 +39,7 @@ class LaNacionChile(BasicNewsRecipe):
|
||||
feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
toprint = urllib.quote(url, ':/')
|
||||
toprint = quote(url, ':/')
|
||||
return u'http://www.lanacion.cl/cgi-bx/imprimir.cgi?_URL=' + toprint
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -7,7 +7,6 @@ Lemonde.fr: Version abonnée
|
||||
'''
|
||||
|
||||
import os, zipfile, re, time
|
||||
from urllib2 import HTTPError
|
||||
from calibre.constants import preferred_encoding
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -97,7 +96,7 @@ class LeMondeAbonne(BasicNewsRecipe):
|
||||
try:
|
||||
response = browser.open(url)
|
||||
continue
|
||||
except HTTPError:
|
||||
except Exception:
|
||||
second -= 24 * 60 * 60
|
||||
|
||||
tmp = PersistentTemporaryFile(suffix='.zip')
|
||||
|
@ -4,7 +4,10 @@ __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
mondediplo.com
|
||||
'''
|
||||
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -43,7 +46,7 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.LOGIN)
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter'
|
||||
data = urlencode({'login': self.username, 'pass': self.password, 'enter': 'enter'
|
||||
})
|
||||
br.open(self.LOGIN, data)
|
||||
return br
|
||||
|
@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.constants import config_dir, CONFIG_DIR_MODE
|
||||
import os
|
||||
import os.path
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
from hashlib import md5
|
||||
|
||||
|
||||
@ -62,7 +65,7 @@ class ModorosBlogHu(BasicNewsRecipe):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for feed in feeds:
|
||||
feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='')
|
||||
feed_hash = quote(feed.title.encode('utf-8'), safe='')
|
||||
feed_fn = os.path.join(feed_dir, feed_hash)
|
||||
|
||||
past_items = set()
|
||||
|
@ -6,7 +6,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Starson17'
|
||||
|
||||
import os
|
||||
import urllib2
|
||||
import zipfile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
@ -25,10 +24,10 @@ class NowToronto(BasicNewsRecipe):
|
||||
epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
|
||||
soup = self.index_to_soup(epub_feed)
|
||||
url = soup.find(name='feedburner:origlink').string
|
||||
f = urllib2.urlopen(url)
|
||||
raw = self.index_to_soup(url, raw=True)
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0, _('downloading epub'))
|
||||
tmp.write(f.read())
|
||||
tmp.write(raw)
|
||||
tmp.close()
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
self.report_progress(0, _('extracting epub'))
|
||||
|
@ -4,7 +4,11 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
www.nursingtimes.net
|
||||
'''
|
||||
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -31,7 +35,7 @@ class NursingTimes(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.LOGIN)
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13' # noqa
|
||||
data = urlencode({'campaigncode': '0', 'referrer': '', 'security_text': '', 'SIemail': self.username, 'passWord': self.password, 'LoginButton.x': '27', 'LoginButton.y': '13' # noqa
|
||||
})
|
||||
br.open(self.LOGIN, data)
|
||||
return br
|
||||
|
@ -3,7 +3,6 @@
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
import time
|
||||
import json
|
||||
import urllib
|
||||
from pprint import pprint
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -87,9 +86,7 @@ class OrangeCountyRegister(BasicNewsRecipe):
|
||||
return cleanedHTML
|
||||
|
||||
def loadURL(self, url):
|
||||
socket = urllib.urlopen(url)
|
||||
rawHTML = socket.read()
|
||||
return rawHTML
|
||||
return self.index_to_soup(url, raw=True)
|
||||
|
||||
def htmlToAttribsDict(self, rawHTML):
|
||||
tokenStart = 'dataLayer.push({'
|
||||
|
@ -3,7 +3,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.constants import config_dir, CONFIG_DIR_MODE
|
||||
import os
|
||||
import os.path
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
from hashlib import md5
|
||||
|
||||
|
||||
@ -86,7 +89,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for feed in feeds:
|
||||
feed_hash = urllib.quote(feed.title.encode('utf-8'), safe='')
|
||||
feed_hash = quote(feed.title.encode('utf-8'), safe='')
|
||||
feed_fn = os.path.join(feed_dir, feed_hash)
|
||||
|
||||
past_items = set()
|
||||
|
@ -1,5 +1,8 @@
|
||||
import urllib
|
||||
import re
|
||||
try:
|
||||
from urllib.parse import unquote
|
||||
except ImportError:
|
||||
from urllib import unquote
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -37,7 +40,7 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe):
|
||||
ans = None
|
||||
try:
|
||||
s = article.summary
|
||||
ans = urllib.unquote(
|
||||
ans = unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
|
@ -8,8 +8,14 @@ import json
|
||||
import operator
|
||||
import re
|
||||
import tempfile
|
||||
import urllib
|
||||
import urllib2
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
try:
|
||||
from urllib.error import HTTPError, URLError
|
||||
except ImportError:
|
||||
from urllib2 import HTTPError, URLError
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@ -99,18 +105,8 @@ class Pocket(BasicNewsRecipe):
|
||||
self.get_auth_uri(),
|
||||
self.get_pull_articles_uri()
|
||||
)
|
||||
try:
|
||||
request = urllib2.Request(fetch_url)
|
||||
response = urllib2.urlopen(request)
|
||||
pocket_feed = json.load(response)['list']
|
||||
except urllib2.HTTPError as e:
|
||||
self.log.exception(
|
||||
"Pocket returned an error: {0}".format(e.info()))
|
||||
return []
|
||||
except urllib2.URLError as e:
|
||||
self.log.exception(
|
||||
"Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
|
||||
return []
|
||||
data = self.index_to_soup(fetch_url, raw=True)
|
||||
pocket_feed = json.loads(data)['list']
|
||||
|
||||
if len(pocket_feed) < self.minimum_articles:
|
||||
self.mark_as_read_after_dl = False
|
||||
@ -143,10 +139,10 @@ class Pocket(BasicNewsRecipe):
|
||||
fc_tag = soup.find('script', text=re.compile("formCheck"))
|
||||
fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
|
||||
article_id = url.split("/")[-1]
|
||||
data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id})
|
||||
data = urlencode({'itemId': article_id, 'formCheck': fc_id})
|
||||
try:
|
||||
response = self.browser.open(ajax_url, data)
|
||||
except urllib2.HTTPError as e:
|
||||
except HTTPError as e:
|
||||
self.log.exception("unable to get textview {0}".format(e.info()))
|
||||
raise e
|
||||
return json.load(response)['article']
|
||||
@ -186,13 +182,12 @@ class Pocket(BasicNewsRecipe):
|
||||
self.get_auth_uri()
|
||||
)
|
||||
try:
|
||||
request = urllib2.Request(mark_read_url)
|
||||
urllib2.urlopen(request)
|
||||
except urllib2.HTTPError as e:
|
||||
self.browser.open_novisit(mark_read_url)
|
||||
except HTTPError as e:
|
||||
self.log.exception(
|
||||
'Pocket returned an error while archiving articles: {0}'.format(e))
|
||||
return []
|
||||
except urllib2.URLError as e:
|
||||
except URLError as e:
|
||||
self.log.exception(
|
||||
"Unable to connect to getpocket.com's modify api: {0}".format(e))
|
||||
return []
|
||||
|
@ -1,5 +1,4 @@
|
||||
import re
|
||||
import urllib2
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -54,7 +53,7 @@ class Ebert(BasicNewsRecipe):
|
||||
self.report_progress(0, _('Fetching feed') + ' %s...' %
|
||||
(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
page = urllib2.urlopen(feedurl).read()
|
||||
page = self.index_to_soup(feedurl, raw=True)
|
||||
|
||||
if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
|
||||
pattern = self.patternReviews
|
||||
|
@ -1,5 +1,4 @@
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre import strftime
|
||||
@ -68,7 +67,7 @@ class Ebert(BasicNewsRecipe):
|
||||
self.report_progress(0, _('Fetching feed') + ' %s...' %
|
||||
(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
page = urllib2.urlopen(feedurl).read()
|
||||
page = self.index_to_soup(feedurl, raw=True)
|
||||
|
||||
if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
|
||||
pattern = self.patternReviews
|
||||
|
@ -3,8 +3,11 @@ __copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.thetimes.co.uk/magazine/the-sunday-times-magazine/
|
||||
'''
|
||||
import urllib
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
|
||||
|
||||
def classes(classes):
|
||||
@ -49,7 +52,7 @@ class TimesOnline(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://www.thetimes.co.uk/')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({
|
||||
data = urlencode({
|
||||
'gotoUrl': self.INDEX,
|
||||
'username': self.username,
|
||||
'password': self.password})
|
||||
|
@ -9,10 +9,14 @@ __docformat__ = 'restructuredtext de'
|
||||
www.taz.de/digiabo
|
||||
'''
|
||||
import os
|
||||
import urllib2
|
||||
import zipfile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
try:
|
||||
from urllib.request import HTTPBasicAuthHandler, build_opener, install_opener, urlopen
|
||||
from urllib.error import HTTPError
|
||||
except ImportError:
|
||||
from urllib2 import HTTPBasicAuthHandler, build_opener, install_opener, urlopen, HTTPError
|
||||
|
||||
|
||||
class TazDigiabo(BasicNewsRecipe):
|
||||
@ -34,17 +38,17 @@ class TazDigiabo(BasicNewsRecipe):
|
||||
|
||||
url = domain + "/epub/"
|
||||
|
||||
auth_handler = urllib2.HTTPBasicAuthHandler()
|
||||
auth_handler = HTTPBasicAuthHandler()
|
||||
auth_handler.add_password(realm='TAZ-ABO',
|
||||
uri=url,
|
||||
user=self.username,
|
||||
passwd=self.password)
|
||||
opener = urllib2.build_opener(auth_handler)
|
||||
urllib2.install_opener(opener)
|
||||
opener = build_opener(auth_handler)
|
||||
install_opener(opener)
|
||||
|
||||
try:
|
||||
f = urllib2.urlopen(url)
|
||||
except urllib2.HTTPError:
|
||||
f = urlopen(url)
|
||||
except HTTPError:
|
||||
self.report_progress(0, _('Can\'t login to download issue'))
|
||||
raise ValueError('Failed to login, check your username and'
|
||||
' password')
|
||||
|
@ -9,9 +9,12 @@ __copyright__ = '2019, Darko Miletic <darko.miletic at gmail.com>'
|
||||
www.newcriterion.com
|
||||
'''
|
||||
|
||||
import urllib
|
||||
import urllib2
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
import re
|
||||
from mechanize import Request
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
@ -52,7 +55,7 @@ class TheNewCriterion(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('https://www.newcriterion.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({'login': self.username, 'password': self.password})
|
||||
data = urlencode({'login': self.username, 'password': self.password})
|
||||
header = {
|
||||
'X-OCTOBER-REQUEST-HANDLER': 'onSignin',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
@ -60,7 +63,7 @@ class TheNewCriterion(BasicNewsRecipe):
|
||||
'X-OCTOBER-REQUEST-PARTIALS':'',
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||||
}
|
||||
request = urllib2.Request('https://www.newcriterion.com/', data, header)
|
||||
request = Request('https://www.newcriterion.com/', data, header)
|
||||
br.open(request)
|
||||
return br
|
||||
|
||||
|
@ -3,8 +3,11 @@ __copyright__ = '2010-2017, Bobby Steel <bob at xdca.com>, Darko Miletic'
|
||||
'''
|
||||
www.thetimes.co.uk
|
||||
'''
|
||||
import urllib
|
||||
import html5lib
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -79,7 +82,7 @@ class TimesOnline(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://www.thetimes.co.uk/')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({
|
||||
data = urlencode({
|
||||
'gotoUrl': self.INDEX,
|
||||
'username': self.username,
|
||||
'password': self.password})
|
||||
|
@ -4,7 +4,10 @@ __copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
tomshardware.com/us
|
||||
'''
|
||||
|
||||
import urllib
|
||||
try:
|
||||
from urllib.parse import urlencode
|
||||
except ImportError:
|
||||
from urllib import urlencode
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -30,7 +33,7 @@ class Tomshardware(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX + '/us/')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
|
||||
data = urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
|
||||
})
|
||||
br.open(self.LOGIN, data)
|
||||
return br
|
||||
|
@ -5,8 +5,11 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import json
|
||||
from urllib import quote
|
||||
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
from mechanize import Request
|
||||
|
||||
from calibre import random_user_agent
|
||||
|
@ -5,7 +5,10 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import json
|
||||
from urllib import quote
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
|
||||
from mechanize import Request
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user