uniform string quote (auto-fix)

ruff 'Q'
This commit is contained in:
un-pogaz 2025-01-24 11:14:14 +01:00
parent 2357c1fc48
commit 37771022ce
750 changed files with 8704 additions and 8698 deletions

View File

@ -240,14 +240,14 @@ def generate_ebook_convert_help(preamble, app):
parser, plumber = create_option_parser(['ebook-convert', parser, plumber = create_option_parser(['ebook-convert',
'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log) 'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
groups = [(pl.name+ ' Options', '', g.option_list) for g in groups = [(pl.name+ ' Options', '', g.option_list) for g in
parser.option_groups if g.title == "INPUT OPTIONS"] parser.option_groups if g.title == 'INPUT OPTIONS']
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
for pl in sorted(output_format_plugins(), key=lambda x: x.name): for pl in sorted(output_format_plugins(), key=lambda x: x.name):
parser, plumber = create_option_parser(['ebook-convert', 'd.epub', parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
'dummyi.'+pl.file_type, '-h'], default_log) 'dummyi.'+pl.file_type, '-h'], default_log)
groups = [(pl.name+ ' Options', '', g.option_list) for g in groups = [(pl.name+ ' Options', '', g.option_list) for g in
parser.option_groups if g.title == "OUTPUT OPTIONS"] parser.option_groups if g.title == 'OUTPUT OPTIONS']
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))

View File

@ -55,7 +55,7 @@ class DemoDialog(QDialog):
self.l.addWidget(self.view_button) self.l.addWidget(self.view_button)
self.update_metadata_button = QPushButton( self.update_metadata_button = QPushButton(
'Update metadata in a book\'s files', self) "Update metadata in a book's files", self)
self.update_metadata_button.clicked.connect(self.update_metadata) self.update_metadata_button.clicked.connect(self.update_metadata)
self.l.addWidget(self.update_metadata_button) self.l.addWidget(self.update_metadata_button)

View File

@ -61,7 +61,7 @@ if use_archive:
body = root.xpath('//body')[0] body = root.xpath('//body')[0]
article = E(body, 'article') article = E(body, 'article')
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;') E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try: try:
date = data['dateModified'] date = data['dateModified']
@ -157,7 +157,7 @@ class Economist(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal" __author__ = 'Kovid Goyal'
description = ( description = (
'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and ' 'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and '
'an unworthy, timid ignorance obstructing our progress.”' 'an unworthy, timid ignorance obstructing our progress.”'
@ -170,7 +170,7 @@ class Economist(BasicNewsRecipe):
resolve_internal_links = True resolve_internal_links = True
remove_tags = [ remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']), dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
dict(attrs={'aria-label': "Article Teaser"}), dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={ dict(attrs={
'class': [ 'class': [
'dblClkTrk', 'ec-article-info', 'share_inline_header', 'dblClkTrk', 'ec-article-info', 'share_inline_header',
@ -224,7 +224,7 @@ class Economist(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
# return self.economist_test_article() # return self.economist_test_article()
soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub') soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub')
script_tag = soup.find("script", id="__NEXT_DATA__") script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is None: if script_tag is None:
raise ValueError('No script tag with JSON data found in the weeklyedition archive') raise ValueError('No script tag with JSON data found in the weeklyedition archive')
data = json.loads(script_tag.string) data = json.loads(script_tag.string)
@ -247,20 +247,20 @@ class Economist(BasicNewsRecipe):
self.description = data['description'] self.description = data['description']
feeds_dict = defaultdict(list) feeds_dict = defaultdict(list)
for part in safe_dict(data, "hasPart", "parts"): for part in safe_dict(data, 'hasPart', 'parts'):
section = part['title'] section = part['title']
self.log(section) self.log(section)
for art in safe_dict(part, "hasPart", "parts"): for art in safe_dict(part, 'hasPart', 'parts'):
title = safe_dict(art, "title") title = safe_dict(art, 'title')
desc = safe_dict(art, "rubric") or '' desc = safe_dict(art, 'rubric') or ''
sub = safe_dict(art, "flyTitle") or '' sub = safe_dict(art, 'flyTitle') or ''
if sub and section != sub: if sub and section != sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
pt = PersistentTemporaryFile('.html') pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(art).encode('utf-8')) pt.write(json.dumps(art).encode('utf-8'))
pt.close() pt.close()
url = 'file:///' + pt.name url = 'file:///' + pt.name
feeds_dict[section].append({"title": title, "url": url, "description": desc}) feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc) self.log('\t', title, '\n\t\t', desc)
return [(section, articles) for section, articles in feeds_dict.items()] return [(section, articles) for section, articles in feeds_dict.items()]
@ -311,26 +311,26 @@ class Economist(BasicNewsRecipe):
return ans return ans
def economist_parse_index(self, soup): def economist_parse_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__") script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None: if script_tag is not None:
data = json.loads(script_tag.string) data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.title = safe_dict(data, "props", "pageProps", "content", "headline") self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
# self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600' # self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600'
feeds = [] feeds = []
for coll in safe_dict(data, "props", "pageProps", "content", "collections"): for coll in safe_dict(data, 'props', 'pageProps', 'content', 'collections'):
section = safe_dict(coll, "headline") or '' section = safe_dict(coll, 'headline') or ''
self.log(section) self.log(section)
articles = [] articles = []
for part in safe_dict(coll, "hasPart", "parts"): for part in safe_dict(coll, 'hasPart', 'parts'):
title = safe_dict(part, "headline") or '' title = safe_dict(part, 'headline') or ''
url = safe_dict(part, "url", "canonical") or '' url = safe_dict(part, 'url', 'canonical') or ''
if not title or not url: if not title or not url:
continue continue
desc = safe_dict(part, "description") or '' desc = safe_dict(part, 'description') or ''
sub = safe_dict(part, "subheadline") or '' sub = safe_dict(part, 'subheadline') or ''
if sub: if sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url) self.log('\t', title, '\n\t', desc, '\n\t\t', url)

View File

@ -47,11 +47,11 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501 dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501
] ]
extra_css = """ extra_css = '''
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
""" '''
preprocess_regexps = [(re.compile( preprocess_regexps = [(re.compile(
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')] r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]

View File

@ -28,7 +28,7 @@ class DrawAndCook(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
for title, url in [ for title, url in [
("They Draw and Cook", "http://www.theydrawandcook.com/") ('They Draw and Cook', 'http://www.theydrawandcook.com/')
]: ]:
articles = self.make_links(url) articles = self.make_links(url)
if articles: if articles:

View File

@ -5,11 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheMITPressReader(BasicNewsRecipe): class TheMITPressReader(BasicNewsRecipe):
title = "The MIT Press Reader" title = 'The MIT Press Reader'
__author__ = 'yodha8' __author__ = 'yodha8'
language = 'en' language = 'en'
description = ("Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors." description = ('Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors.'
" This recipe pulls articles from the past 7 days.") ' This recipe pulls articles from the past 7 days.')
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = True

View File

@ -47,13 +47,13 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
if d and isinstance(d, str): if d and isinstance(d, str):
self.oldest_article = float(d) self.oldest_article = float(d)
extra_css = """ extra_css = '''
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; } h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; } h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
""" '''
feeds = [ feeds = [

View File

@ -28,6 +28,6 @@ class Acrimed(BasicNewsRecipe):
lambda m: '<title>' + m.group(1) + '</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')] (re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
extra_css = """ extra_css = '''
.chapo{font-style:italic; margin: 1em 0 0.5em} .chapo{font-style:italic; margin: 1em 0 0.5em}
""" '''

View File

@ -21,7 +21,7 @@ class AdventureGamers(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
INDEX = u'http://www.adventuregamers.com' INDEX = u'http://www.adventuregamers.com'
extra_css = """ extra_css = '''
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74} .pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
.pageheader_title,.page_title{font-size: xx-large; color: #394128} .pageheader_title,.page_title{font-size: xx-large; color: #394128}
.pageheader_byline{font-size: small; font-weight: bold; color: #394128} .pageheader_byline{font-size: small; font-weight: bold; color: #394128}
@ -32,7 +32,7 @@ class AdventureGamers(BasicNewsRecipe):
.score_header{font-size: large; color: #50544A} .score_header{font-size: large; color: #50544A}
img{margin-bottom: 1em;} img{margin-bottom: 1em;}
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif} body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -14,7 +14,7 @@ class afr(BasicNewsRecipe):
description = ( description = (
'For more than 65 years The Australian Financial Review has been the authority on business,' 'For more than 65 years The Australian Financial Review has been the authority on business,'
' finance and investment news in Australia. It has a reputation for independent, award-winning ' ' finance and investment news in Australia. It has a reputation for independent, award-winning '
'journalism and is essential reading for Australia\'s business and investor community.' "journalism and is essential reading for Australia's business and investor community."
) )
masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png' masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png'
encoding = 'utf-8' encoding = 'utf-8'

View File

@ -36,9 +36,9 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
''' '''
def default_cover(self, cover_file): def default_cover(self, cover_file):
""" '''
Crée une couverture personnalisée avec le logo Crée une couverture personnalisée avec le logo
""" '''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -54,7 +54,7 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
weekday = french_weekday[wkd] weekday = french_weekday[wkd]
month = french_month[today.month] month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}" date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh') edition = today.strftime('Édition de %Hh')
# Image de base # Image de base

View File

@ -21,9 +21,9 @@ class AlJazeera(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
extra_css = """ extra_css = '''
body{font-family: Arial,sans-serif} body{font-family: Arial,sans-serif}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'comment': description, 'tags': category,
'publisher': publisher, 'language': language 'publisher': publisher, 'language': language

View File

@ -110,7 +110,7 @@ class AlMonitor(BasicNewsRecipe):
title = title[0:120] + '...' title = title[0:120] + '...'
href = link.get('href') href = link.get('href')
if not href: if not href:
self._p("BAD HREF: " + str(link)) self._p('BAD HREF: ' + str(link))
return return
self.queue_article_link(section, href, title) self.queue_article_link(section, href, title)
@ -158,7 +158,7 @@ class AlMonitor(BasicNewsRecipe):
age = (datetime.datetime.now() - date).days age = (datetime.datetime.now() - date).days
if (age > self.oldest_article): if (age > self.oldest_article):
return "too old" return 'too old'
return False return False
def scrape_article_date(self, soup): def scrape_article_date(self, soup):
@ -174,7 +174,7 @@ class AlMonitor(BasicNewsRecipe):
def date_from_string(self, datestring): def date_from_string(self, datestring):
try: try:
# eg: Posted September 17, 2014 # eg: Posted September 17, 2014
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y") dt = datetime.datetime.strptime(datestring, 'Posted %B %d, %Y')
except: except:
dt = None dt = None

View File

@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AlbertMohlersBlog(BasicNewsRecipe): class AlbertMohlersBlog(BasicNewsRecipe):
title = u'Albert Mohler\'s Blog' title = u"Albert Mohler's Blog"
__author__ = 'Peter Grungi' __author__ = 'Peter Grungi'
language = 'en' language = 'en'
oldest_article = 90 oldest_article = 90
@ -16,5 +16,5 @@ class AlbertMohlersBlog(BasicNewsRecipe):
language = 'en' language = 'en'
author = 'Albert Mohler' author = 'Albert Mohler'
feeds = [(u'Albert Mohler\'s Blog', feeds = [(u"Albert Mohler's Blog",
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')] u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]

View File

@ -43,7 +43,7 @@ class ALD(BasicNewsRecipe):
# Extract a list of dates from the page. # Extract a list of dates from the page.
# Subset this out to the list of target dates for extraction. # Subset this out to the list of target dates for extraction.
date_list = [] date_list = []
for div in soup.findAll('div', attrs={'id': "dayheader"}): for div in soup.findAll('div', attrs={'id': 'dayheader'}):
date_list.append(self.tag_to_string(div)) date_list.append(self.tag_to_string(div))
date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list] date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
date_list_bool = [ date_list_bool = [
@ -54,14 +54,14 @@ class ALD(BasicNewsRecipe):
# Process each paragraph one by one. # Process each paragraph one by one.
# Stop when the text of the previous div is not in the target date list. # Stop when the text of the previous div is not in the target date list.
for div in soup.findAll('div', attrs={'class': "mobile-front"}): for div in soup.findAll('div', attrs={'class': 'mobile-front'}):
for p in div.findAll('p'): for p in div.findAll('p'):
if self.tag_to_string(p.findPreviousSibling('div')) in compress_date: if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
if p.find('a'): if p.find('a'):
title = self.tag_to_string(p) title = self.tag_to_string(p)
link = p.find('a')['href'] link = p.find('a')['href']
if self.tag_to_string(p.findPreviousSibling('h3') if self.tag_to_string(p.findPreviousSibling('h3')
) == "Articles of Note": ) == 'Articles of Note':
articles_note.append({ articles_note.append({
'title': title, 'title': title,
'url': link, 'url': link,
@ -69,7 +69,7 @@ class ALD(BasicNewsRecipe):
'date': '' 'date': ''
}) })
elif self.tag_to_string(p.findPreviousSibling('h3') elif self.tag_to_string(p.findPreviousSibling('h3')
) == "New Books": ) == 'New Books':
new_books.append({ new_books.append({
'title': title, 'title': title,
'url': link, 'url': link,

View File

@ -38,7 +38,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
self.log('Cover URL found:', cover_url) self.log('Cover URL found:', cover_url)
return cover_url return cover_url
self.log('Aucune couverture trouvée, utilisation de l\'image par défaut') self.log("Aucune couverture trouvée, utilisation de l'image par défaut")
return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg' return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg'
except Exception as e: except Exception as e:

View File

@ -58,7 +58,7 @@ class AM730(BasicNewsRecipe):
articles = [] articles = []
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}): for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
href = aTag.get('href',False) href = aTag.get('href',False)
if not href.encode("utf-8").startswith(url.encode("utf-8")) : if not href.encode('utf-8').startswith(url.encode('utf-8')) :
continue # not in same section continue # not in same section
title = href.split('/')[-1].split('-')[0] title = href.split('/')[-1].split('-')[0]

View File

@ -28,9 +28,9 @@ class Ambito(BasicNewsRecipe):
language = 'es_AR' language = 'es_AR'
publication_type = 'newsportal' publication_type = 'newsportal'
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg' masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
extra_css = """ extra_css = '''
body{font-family: Roboto, sans-serif} body{font-family: Roboto, sans-serif}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'comment': description,

View File

@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AmericanThinker(BasicNewsRecipe): class AmericanThinker(BasicNewsRecipe):
title = u'American Thinker' title = u'American Thinker'
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans." description = 'American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans.'
__author__ = 'Walt Anthony' __author__ = 'Walt Anthony'
publisher = 'Thomas Lifson' publisher = 'Thomas Lifson'
category = 'news, politics, USA' category = 'news, politics, USA'

View File

@ -39,4 +39,4 @@ class anan(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update # return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update return url.replace('/show/', '/print/') # 2014-02-27 AGE: update

View File

@ -12,7 +12,7 @@ class ancientegypt(BasicNewsRecipe):
language = 'en' language = 'en'
__author__ = 'unkn0wn' __author__ = 'unkn0wn'
description = ( description = (
'Ancient Egypt is the world\'s leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. ' "Ancient Egypt is the world's leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. "
'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering ' 'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering '
'more than 5000 years of Egyptian history. Published bimonthly.' 'more than 5000 years of Egyptian history. Published bimonthly.'
) )

View File

@ -75,7 +75,7 @@ class andhra(BasicNewsRecipe):
url = str(snaps['OrgId']) url = str(snaps['OrgId'])
if snaps['ObjectType'] == 4: if snaps['ObjectType'] == 4:
continue continue
feeds_dict[section].append({"title": '', "url": url}) feeds_dict[section].append({'title': '', 'url': url})
return [(section, articles) for section, articles in feeds_dict.items()] return [(section, articles) for section, articles in feeds_dict.items()]
def preprocess_raw_html(self, raw, *a): def preprocess_raw_html(self, raw, *a):

View File

@ -75,7 +75,7 @@ class andhra(BasicNewsRecipe):
url = str(snaps['OrgId']) url = str(snaps['OrgId'])
if snaps['ObjectType'] == 4: if snaps['ObjectType'] == 4:
continue continue
feeds_dict[section].append({"title": '', "url": url}) feeds_dict[section].append({'title': '', 'url': url})
return [(section, articles) for section, articles in feeds_dict.items()] return [(section, articles) for section, articles in feeds_dict.items()]
def preprocess_raw_html(self, raw, *a): def preprocess_raw_html(self, raw, *a):

View File

@ -66,19 +66,19 @@ class Arcamax(BasicNewsRecipe):
# (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"), # (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"),
# (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"), # (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"),
# (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"), # (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"),
(u"BC", u"https://www.arcamax.com/thefunnies/bc"), (u'BC', u'https://www.arcamax.com/thefunnies/bc'),
# (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"), # (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"),
# (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"), # (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"),
(u"Blondie", u"https://www.arcamax.com/thefunnies/blondie"), (u'Blondie', u'https://www.arcamax.com/thefunnies/blondie'),
# u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"), # u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"),
# (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"), # (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"),
# (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"), # (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"),
# (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"), # (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"),
(u"Dog Eat Doug", u"https://www.arcamax.com/thefunnies/dogeatdoug"), (u'Dog Eat Doug', u'https://www.arcamax.com/thefunnies/dogeatdoug'),
# (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"), # (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"),
# (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"), # (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"),
(u"Family Circus", u"https://www.arcamax.com/thefunnies/familycircus"), (u'Family Circus', u'https://www.arcamax.com/thefunnies/familycircus'),
(u"Garfield", u"https://www.arcamax.com/thefunnies/garfield"), (u'Garfield', u'https://www.arcamax.com/thefunnies/garfield'),
# (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"), # (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"),
# (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"), # (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"),
# (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"), # (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"),
@ -87,16 +87,16 @@ class Arcamax(BasicNewsRecipe):
# (u"Luann", u"https://www.arcamax.com/thefunnies/luann"), # (u"Luann", u"https://www.arcamax.com/thefunnies/luann"),
# (u"Momma", u"https://www.arcamax.com/thefunnies/momma"), # (u"Momma", u"https://www.arcamax.com/thefunnies/momma"),
# (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"), # (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"),
(u"Mutts", u"https://www.arcamax.com/thefunnies/mutts"), (u'Mutts', u'https://www.arcamax.com/thefunnies/mutts'),
# (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"), # (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"),
# (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"), # (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"),
# (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"), # (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"),
# (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"), # (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"),
# (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"), # (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"),
# (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"), # (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"),
(u"Speed Bump", u"https://www.arcamax.com/thefunnies/speedbump"), (u'Speed Bump', u'https://www.arcamax.com/thefunnies/speedbump'),
(u"Wizard of Id", u"https://www.arcamax.com/thefunnies/wizardofid"), (u'Wizard of Id', u'https://www.arcamax.com/thefunnies/wizardofid'),
(u"Zits", u"https://www.arcamax.com/thefunnies/zits"), (u'Zits', u'https://www.arcamax.com/thefunnies/zits'),
]: ]:
self.log('Finding strips for:', title) self.log('Finding strips for:', title)
articles = self.make_links(url, title) articles = self.make_links(url, title)

View File

@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ArretSurImages(BasicNewsRecipe): class ArretSurImages(BasicNewsRecipe):
title = 'Arrêt sur Images' title = 'Arrêt sur Images'
description = 'Site français d\'analyse des médias' description = "Site français d'analyse des médias"
language = 'fr' language = 'fr'
encoding = 'utf-8' encoding = 'utf-8'
needs_subscription = True needs_subscription = True
@ -27,9 +27,9 @@ class ArretSurImages(BasicNewsRecipe):
] ]
def default_cover(self, cover_file): def default_cover(self, cover_file):
""" '''
Crée une couverture personnalisée avec le logo ASI Crée une couverture personnalisée avec le logo ASI
""" '''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -45,7 +45,7 @@ class ArretSurImages(BasicNewsRecipe):
weekday = french_weekday[wkd] weekday = french_weekday[wkd]
month = french_month[today.month] month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}" date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh') edition = today.strftime('Édition de %Hh')
img = QImage(1400, 1920, QImage.Format_RGB888) img = QImage(1400, 1920, QImage.Format_RGB888)
@ -123,9 +123,9 @@ class ArretSurImages(BasicNewsRecipe):
br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')] br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')]
print('Authentification réussie') print('Authentification réussie')
else: else:
print('Échec de l\'authentification - Vérifiez vos identifiants') print("Échec de l'authentification - Vérifiez vos identifiants")
except Exception as e: except Exception as e:
print(f'Erreur lors de l\'authentification: {str(e)}') print(f"Erreur lors de l'authentification: {str(e)}")
return br return br
def get_article_url(self, article): def get_article_url(self, article):

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = "GPL v3" __license__ = 'GPL v3'
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>" __copyright__ = '2022, Albert Aparicio Isarn <aaparicio at posteo.net>'
""" '''
https://www.asahi.com/ajw/ https://www.asahi.com/ajw/
""" '''
from datetime import datetime from datetime import datetime
@ -14,99 +14,99 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AsahiShimbunEnglishNews(BasicNewsRecipe): class AsahiShimbunEnglishNews(BasicNewsRecipe):
title = "The Asahi Shimbun" title = 'The Asahi Shimbun'
__author__ = "Albert Aparicio Isarn" __author__ = 'Albert Aparicio Isarn'
description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan." description = ('The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan.'
" The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive" ' The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive'
" coverage of cool Japan,focusing on manga, travel and other timely news.") ' coverage of cool Japan,focusing on manga, travel and other timely news.')
publisher = "The Asahi Shimbun Company" publisher = 'The Asahi Shimbun Company'
publication_type = "newspaper" publication_type = 'newspaper'
category = "news, japan" category = 'news, japan'
language = "en_JP" language = 'en_JP'
index = "https://www.asahi.com" index = 'https://www.asahi.com'
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png" masthead_url = 'https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png'
oldest_article = 3 oldest_article = 3
max_articles_per_feed = 40 max_articles_per_feed = 40
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
remove_tags_before = {"id": "MainInner"} remove_tags_before = {'id': 'MainInner'}
remove_tags_after = {"class": "ArticleText"} remove_tags_after = {'class': 'ArticleText'}
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}] remove_tags = [{'name': 'div', 'class': 'SnsUtilityArea'}]
def get_whats_new(self): def get_whats_new(self):
soup = self.index_to_soup(self.index + "/ajw/new") soup = self.index_to_soup(self.index + '/ajw/new')
news_section = soup.find("div", attrs={"class": "specialList"}) news_section = soup.find('div', attrs={'class': 'specialList'})
new_news = [] new_news = []
for item in news_section.findAll("li"): for item in news_section.findAll('li'):
title = item.find("p", attrs={"class": "title"}).string title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find("p", attrs={"class": "date"}).next date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip() date = date_string.strip()
url = self.index + item.find("a")["href"] url = self.index + item.find('a')['href']
new_news.append( new_news.append(
{ {
"title": title, 'title': title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
"url": url, 'url': url,
"description": "", 'description': '',
} }
) )
return new_news return new_news
def get_top6(self, soup): def get_top6(self, soup):
top = soup.find("ul", attrs={"class": "top6"}) top = soup.find('ul', attrs={'class': 'top6'})
top6_news = [] top6_news = []
for item in top.findAll("li"): for item in top.findAll('li'):
title = item.find("p", attrs={"class": "title"}).string title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find("p", attrs={"class": "date"}).next date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip() date = date_string.strip()
url = self.index + item.find("a")["href"] url = self.index + item.find('a')['href']
top6_news.append( top6_news.append(
{ {
"title": title, 'title': title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
"url": url, 'url': url,
"description": "", 'description': '',
} }
) )
return top6_news return top6_news
def get_section_news(self, soup): def get_section_news(self, soup):
news_grid = soup.find("ul", attrs={"class": "default"}) news_grid = soup.find('ul', attrs={'class': 'default'})
news = [] news = []
for item in news_grid.findAll("li"): for item in news_grid.findAll('li'):
title = item.find("p", attrs={"class": "title"}).string title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find("p", attrs={"class": "date"}).next date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip() date = date_string.strip()
url = self.index + item.find("a")["href"] url = self.index + item.find('a')['href']
news.append( news.append(
{ {
"title": title, 'title': title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"), 'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
"url": url, 'url': url,
"description": "", 'description': '',
} }
) )
return news return news
def get_section(self, section): def get_section(self, section):
soup = self.index_to_soup(self.index + "/ajw/" + section) soup = self.index_to_soup(self.index + '/ajw/' + section)
section_news_items = self.get_top6(soup) section_news_items = self.get_top6(soup)
section_news_items.extend(self.get_section_news(soup)) section_news_items.extend(self.get_section_news(soup))
@ -114,26 +114,26 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
return section_news_items return section_news_items
def get_special_section(self, section): def get_special_section(self, section):
soup = self.index_to_soup(self.index + "/ajw/" + section) soup = self.index_to_soup(self.index + '/ajw/' + section)
top = soup.find("div", attrs={"class": "Section"}) top = soup.find('div', attrs={'class': 'Section'})
special_news = [] special_news = []
for item in top.findAll("li"): for item in top.findAll('li'):
item_a = item.find("a") item_a = item.find('a')
text_split = item_a.text.strip().split("\n") text_split = item_a.text.strip().split('\n')
title = text_split[0] title = text_split[0]
description = text_split[1].strip() description = text_split[1].strip()
url = self.index + item_a["href"] url = self.index + item_a['href']
special_news.append( special_news.append(
{ {
"title": title, 'title': title,
"date": "", 'date': '',
"url": url, 'url': url,
"description": description, 'description': description,
} }
) )
@ -144,24 +144,24 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
feeds = [ feeds = [
("What's New", self.get_whats_new()), ("What's New", self.get_whats_new()),
("National Report", self.get_section("national_report")), ('National Report', self.get_section('national_report')),
("Politics", self.get_section("politics")), ('Politics', self.get_section('politics')),
("Business", self.get_section("business")), ('Business', self.get_section('business')),
("Asia & World - China", self.get_section("asia_world/china")), ('Asia & World - China', self.get_section('asia_world/china')),
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")), ('Asia & World - Korean Peninsula', self.get_section('asia_world/korean_peninsula')),
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")), ('Asia & World - Around Asia', self.get_section('asia_world/around_asia')),
("Asia & World - World", self.get_section("asia_world/world")), ('Asia & World - World', self.get_section('asia_world/world')),
("Sci & Tech", self.get_section("sci_tech")), ('Sci & Tech', self.get_section('sci_tech')),
("Culture - Style", self.get_section("culture/style")), ('Culture - Style', self.get_section('culture/style')),
# ("Culture - Cooking", self.get_section("culture/cooking")), # ("Culture - Cooking", self.get_section("culture/cooking")),
("Culture - Movies", self.get_section("culture/movies")), ('Culture - Movies', self.get_section('culture/movies')),
("Culture - Manga & Anime", self.get_section("culture/manga_anime")), ('Culture - Manga & Anime', self.get_section('culture/manga_anime')),
("Travel", self.get_section("travel")), ('Travel', self.get_section('travel')),
("Sports", self.get_section("sports")), ('Sports', self.get_section('sports')),
("Opinion - Editorial", self.get_section("opinion/editorial")), ('Opinion - Editorial', self.get_section('opinion/editorial')),
("Opinion - Vox Populi", self.get_section("opinion/vox")), ('Opinion - Vox Populi', self.get_section('opinion/vox')),
("Opinion - Views", self.get_section("opinion/views")), ('Opinion - Views', self.get_section('opinion/views')),
("Special", self.get_special_section("special")), ('Special', self.get_special_section('special')),
] ]
return feeds return feeds

View File

@ -26,11 +26,11 @@ class AsianReviewOfBooks(BasicNewsRecipe):
publication_type = 'magazine' publication_type = 'magazine'
auto_cleanup = True auto_cleanup = True
masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png' masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png'
extra_css = """ extra_css = '''
body{font-family: "Droid Serif", serif} body{font-family: "Droid Serif", serif}
.entry-title {font-family: "Playfair Display", serif} .entry-title {font-family: "Playfair Display", serif}
img {display: block} img {display: block}
""" '''
recipe_specific_options = { recipe_specific_options = {
'days': { 'days': {

View File

@ -24,12 +24,12 @@ class BuenosAiresHerald(BasicNewsRecipe):
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg' masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
INDEX = 'http://www.buenosairesherald.com' INDEX = 'http://www.buenosairesherald.com'
extra_css = """ extra_css = '''
body{font-family: Arial,Helvetica,sans-serif } body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block} img{margin-bottom: 0.4em; display:block}
h1{font-family: Georgia,serif} h1{font-family: Georgia,serif}
#fecha{text-align: right; font-size: small} #fecha{text-align: right; font-size: small}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -16,7 +16,7 @@ class BangkokPostRecipe(BasicNewsRecipe):
title = u'Bangkok Post' title = u'Bangkok Post'
publisher = u'Post Publishing PCL' publisher = u'Post Publishing PCL'
category = u'News' category = u'News'
description = u'The world\'s window to Thailand' description = u"The world's window to Thailand"
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -8,11 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
class barrons(BasicNewsRecipe): class barrons(BasicNewsRecipe):
title = 'Barron\'s Magazine' title = "Barron's Magazine"
__author__ = 'unkn0wn' __author__ = 'unkn0wn'
description = ( description = (
'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister ' "Barron's is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister "
'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and ' "publication to The Wall Street Journal, Barron's covers U.S. financial information, market developments, and "
'relevant statistics.' 'relevant statistics.'
) )
language = 'en_US' language = 'en_US'
@ -82,7 +82,7 @@ class barrons(BasicNewsRecipe):
recipe_specific_options = { recipe_specific_options = {
'date': { 'date': {
'short': 'The date of the edition to download (YYYYMMDD format)', 'short': 'The date of the edition to download (YYYYMMDD format)',
'long': 'For example, 20240722.\nIf it didn\'t work, try again later.' 'long': "For example, 20240722.\nIf it didn't work, try again later."
} }
} }

View File

@ -135,9 +135,9 @@ class BBCNews(BasicNewsRecipe):
# Select / de-select the feeds you want in your ebook. # Select / de-select the feeds you want in your ebook.
feeds = [ feeds = [
("News Home", "https://feeds.bbci.co.uk/news/rss.xml"), ('News Home', 'https://feeds.bbci.co.uk/news/rss.xml'),
("UK", "https://feeds.bbci.co.uk/news/uk/rss.xml"), ('UK', 'https://feeds.bbci.co.uk/news/uk/rss.xml'),
("World", "https://feeds.bbci.co.uk/news/world/rss.xml"), ('World', 'https://feeds.bbci.co.uk/news/world/rss.xml'),
# ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"), # ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"),
# ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"), # ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"),
# ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"), # ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"),
@ -147,26 +147,26 @@ class BBCNews(BasicNewsRecipe):
# ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"), # ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"),
# ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"), # ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
# ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"), # ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
("US & Canada", "https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"), ('US & Canada', 'https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml'),
("Politics", "https://feeds.bbci.co.uk/news/politics/rss.xml"), ('Politics', 'https://feeds.bbci.co.uk/news/politics/rss.xml'),
("Science/Environment", ('Science/Environment',
"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml"), 'https://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
("Technology", "https://feeds.bbci.co.uk/news/technology/rss.xml"), ('Technology', 'https://feeds.bbci.co.uk/news/technology/rss.xml'),
("Magazine", "https://feeds.bbci.co.uk/news/magazine/rss.xml"), ('Magazine', 'https://feeds.bbci.co.uk/news/magazine/rss.xml'),
("Entertainment/Arts", ('Entertainment/Arts',
"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"), 'https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml'),
# ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"), # ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"),
# ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"), # ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"),
("Business", "https://feeds.bbci.co.uk/news/business/rss.xml"), ('Business', 'https://feeds.bbci.co.uk/news/business/rss.xml'),
("Special Reports", "https://feeds.bbci.co.uk/news/special_reports/rss.xml"), ('Special Reports', 'https://feeds.bbci.co.uk/news/special_reports/rss.xml'),
("Also in the News", "https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"), ('Also in the News', 'https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml'),
# ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"), # ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"),
# ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"), # ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
# ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"), # ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
# ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"), # ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
# ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"), # ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
("Sport Front Page", ('Sport Front Page',
"http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"), 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
# ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"), # ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
# ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"), # ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
# ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"), # ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),

View File

@ -556,19 +556,19 @@ class BBCBrasilRecipe(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
# Handle sports page urls type 01: # Handle sports page urls type 01:
if (url.find("go/rss/-/sport1/") != -1): if (url.find('go/rss/-/sport1/') != -1):
temp_url = url.replace("go/rss/-/", "") temp_url = url.replace('go/rss/-/', '')
# Handle sports page urls type 02: # Handle sports page urls type 02:
elif (url.find("go/rss/int/news/-/sport1/") != -1): elif (url.find('go/rss/int/news/-/sport1/') != -1):
temp_url = url.replace("go/rss/int/news/-/", "") temp_url = url.replace('go/rss/int/news/-/', '')
# Handle regular news page urls: # Handle regular news page urls:
else: else:
temp_url = url.replace("go/rss/int/news/-/", "") temp_url = url.replace('go/rss/int/news/-/', '')
# Always add "?print=true" to the end of the url. # Always add "?print=true" to the end of the url.
print_url = temp_url + "?print=true" print_url = temp_url + '?print=true'
return print_url return print_url

View File

@ -30,7 +30,7 @@ class BillOReilly(BasicNewsRecipe):
feeds.append(("O'Reilly Factor", articles_shows)) feeds.append(("O'Reilly Factor", articles_shows))
if articles_columns: if articles_columns:
feeds.append(("Newspaper Column", articles_columns)) feeds.append(('Newspaper Column', articles_columns))
return feeds return feeds

View File

@ -27,8 +27,8 @@ class bleskRecipe(BasicNewsRecipe):
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png' cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
extra_css = """ extra_css = '''
""" '''
remove_attributes = [] remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']}) remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})

View File

@ -23,7 +23,7 @@ class Blic(BasicNewsRecipe):
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png' masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
language = 'sr' language = 'sr'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = """ extra_css = '''
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Georgia, serif1, serif} body{font-family: Georgia, serif1, serif}
@ -35,7 +35,7 @@ class Blic(BasicNewsRecipe):
.potpis{font-size: x-small; color: gray} .potpis{font-size: x-small; color: gray}
.article_info{font-size: small} .article_info{font-size: small}
img{margin-bottom: 0.8em; margin-top: 0.8em; display: block} img{margin-bottom: 0.8em; margin-top: 0.8em; display: block}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True

View File

@ -56,7 +56,7 @@ class Bloomberg(BasicNewsRecipe):
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg' masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
description = ( description = (
'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,' 'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,'
' companies, events, and trends shaping today\'s complex, global economy.' " companies, events, and trends shaping today's complex, global economy."
) )
remove_empty_feeds = True remove_empty_feeds = True

View File

@ -2,29 +2,29 @@ from urllib.parse import urljoin
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
_issue_url = "" _issue_url = ''
class BookforumMagazine(BasicNewsRecipe): class BookforumMagazine(BasicNewsRecipe):
title = "Bookforum" title = 'Bookforum'
description = ( description = (
"Bookforum is an American book review magazine devoted to books and " 'Bookforum is an American book review magazine devoted to books and '
"the discussion of literature. https://www.bookforum.com/print" 'the discussion of literature. https://www.bookforum.com/print'
) )
language = "en" language = 'en'
__author__ = "ping" __author__ = 'ping'
publication_type = "magazine" publication_type = 'magazine'
encoding = "utf-8" encoding = 'utf-8'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
auto_cleanup = False auto_cleanup = False
compress_news_images = True compress_news_images = True
compress_news_images_auto_size = 8 compress_news_images_auto_size = 8
keep_only_tags = [dict(class_="blog-article")] keep_only_tags = [dict(class_='blog-article')]
remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])] remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])]
extra_css = """ extra_css = '''
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; } .blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; } .blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; } .blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
@ -33,46 +33,46 @@ class BookforumMagazine(BasicNewsRecipe):
display: block; max-width: 100%; height: auto; display: block; max-width: 100%; height: auto;
} }
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; } .blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
""" '''
def preprocess_html(self, soup): def preprocess_html(self, soup):
# strip away links that's not needed # strip away links that's not needed
for ele in soup.select(".blog-article__header a"): for ele in soup.select('.blog-article__header a'):
ele.unwrap() ele.unwrap()
return soup return soup
def parse_index(self): def parse_index(self):
soup = self.index_to_soup( soup = self.index_to_soup(
_issue_url if _issue_url else "https://www.bookforum.com/print" _issue_url if _issue_url else 'https://www.bookforum.com/print'
) )
meta_ele = soup.find("meta", property="og:title") meta_ele = soup.find('meta', property='og:title')
if meta_ele: if meta_ele:
self.timefmt = f' [{meta_ele["content"]}]' self.timefmt = f' [{meta_ele["content"]}]'
cover_ele = soup.find("img", class_="toc-issue__cover") cover_ele = soup.find('img', class_='toc-issue__cover')
if cover_ele: if cover_ele:
self.cover_url = urljoin( self.cover_url = urljoin(
"https://www.bookforum.com", 'https://www.bookforum.com',
soup.find("img", class_="toc-issue__cover")["src"], soup.find('img', class_='toc-issue__cover')['src'],
) )
articles = {} articles = {}
for sect_ele in soup.find_all("div", class_="toc-articles__section"): for sect_ele in soup.find_all('div', class_='toc-articles__section'):
section_name = self.tag_to_string( section_name = self.tag_to_string(
sect_ele.find("a", class_="toc__anchor-links__link") sect_ele.find('a', class_='toc__anchor-links__link')
) )
for article_ele in sect_ele.find_all("article"): for article_ele in sect_ele.find_all('article'):
title_ele = article_ele.find("h1") title_ele = article_ele.find('h1')
sub_title_ele = article_ele.find(class_="toc-article__subtitle") sub_title_ele = article_ele.find(class_='toc-article__subtitle')
articles.setdefault(section_name, []).append( articles.setdefault(section_name, []).append(
{ {
"title": self.tag_to_string(title_ele), 'title': self.tag_to_string(title_ele),
"url": article_ele.find("a", class_="toc-article__link")[ 'url': article_ele.find('a', class_='toc-article__link')[
"href" 'href'
], ],
"description": self.tag_to_string(sub_title_ele) 'description': self.tag_to_string(sub_title_ele)
if sub_title_ele if sub_title_ele
else "", else '',
} }
) )
return articles.items() return articles.items()

View File

@ -22,9 +22,9 @@ class Borsen_dk(BasicNewsRecipe):
language = 'da' language = 'da'
keep_only_tags = [ keep_only_tags = [
dict(name="h1", attrs={'itemprop': 'headline'}), dict(name='h1', attrs={'itemprop': 'headline'}),
dict(name="div", attrs={'itemprob': 'datePublished'}), dict(name='div', attrs={'itemprob': 'datePublished'}),
dict(name="div", attrs={'itemprop': 'articleBody'}), dict(name='div', attrs={'itemprop': 'articleBody'}),
] ]
# Feed are found here: # Feed are found here:

View File

@ -42,24 +42,24 @@ def class_startswith(*prefixes):
# From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true # From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true
comics_to_fetch = { comics_to_fetch = {
"ADAM@HOME": 'ad', 'ADAM@HOME': 'ad',
"ARLO & JANIS": 'aj', 'ARLO & JANIS': 'aj',
# "CUL DE SAC": 'cds', # "CUL DE SAC": 'cds',
# "CURTIS": 'kfcrt', # "CURTIS": 'kfcrt',
"DILBERT": 'dt', 'DILBERT': 'dt',
"DOONESBURY": 'db', 'DOONESBURY': 'db',
"DUSTIN": 'kfdus', 'DUSTIN': 'kfdus',
"F MINUS": 'fm', 'F MINUS': 'fm',
"FOR BETTER OR WORSE": 'fb', 'FOR BETTER OR WORSE': 'fb',
# "GET FUZZY": 'gz', # "GET FUZZY": 'gz',
# "MOTHER GOOSE & GRIMM": 'tmmgg', # "MOTHER GOOSE & GRIMM": 'tmmgg',
# "JUMPSTART": 'jt', # "JUMPSTART": 'jt',
"MONTY": 'mt', 'MONTY': 'mt',
# "POOCH CAFE", # "POOCH CAFE",
"RHYMES WITH ORANGE": 'kfrwo', 'RHYMES WITH ORANGE': 'kfrwo',
# "ROSE IS ROSE": 'rr', # "ROSE IS ROSE": 'rr',
# "ZIPPY THE PINHEAD": 'kfzpy', # "ZIPPY THE PINHEAD": 'kfzpy',
"ZITS": 'kfzt' 'ZITS': 'kfzt'
} }
@ -77,10 +77,10 @@ def extract_json(raw_html):
def absolutize_url(url): def absolutize_url(url):
if url.startswith("//"): if url.startswith('//'):
return "https:" + url return 'https:' + url
if url.startswith('/'): if url.startswith('/'):
url = "https://www.bostonglobe.com" + url url = 'https://www.bostonglobe.com' + url
return url return url
@ -120,7 +120,7 @@ def main():
class BostonGlobeSubscription(BasicNewsRecipe): class BostonGlobeSubscription(BasicNewsRecipe):
title = "Boston Globe" title = 'Boston Globe'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
description = 'The Boston Globe' description = 'The Boston Globe'
language = 'en_US' language = 'en_US'

View File

@ -25,17 +25,17 @@ def class_startswith(*prefixes):
return dict(attrs={'class': q}) return dict(attrs={'class': q})
def absolutize_url(url): def absolutize_url(url):
if url.startswith("//"): if url.startswith('//'):
return "https:" + url return 'https:' + url
if url.startswith('/'): if url.startswith('/'):
url = "https://www.bostonglobe.com" + url url = 'https://www.bostonglobe.com' + url
return url return url
class BostonGlobePrint(BasicNewsRecipe): class BostonGlobePrint(BasicNewsRecipe):
title = "Boston Globe | Print Edition" title = 'Boston Globe | Print Edition'
__author__ = 'Kovid Goyal, unkn0wn' __author__ = 'Kovid Goyal, unkn0wn'
description = 'The Boston Globe - Today\'s Paper' description = "The Boston Globe - Today's Paper"
language = 'en_US' language = 'en_US'
keep_only_tags = [ keep_only_tags = [
@ -70,7 +70,7 @@ class BostonGlobePrint(BasicNewsRecipe):
for image in soup.findAll('img', src=True): for image in soup.findAll('img', src=True):
if image['src'].endswith('750.jpg'): if image['src'].endswith('750.jpg'):
return 'https:' + image['src'] return 'https:' + image['src']
self.log("\nCover unavailable") self.log('\nCover unavailable')
cover = None cover = None
return cover return cover
@ -94,7 +94,7 @@ class BostonGlobePrint(BasicNewsRecipe):
desc = self.tag_to_string(d) desc = self.tag_to_string(d)
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url) self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
feeds_dict[section].append({"title": title, "url": url, "description": desc}) feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
return [(section, articles) for section, articles in feeds_dict.items()] return [(section, articles) for section, articles in feeds_dict.items()]
def preprocess_raw_html(self, raw_html, url): def preprocess_raw_html(self, raw_html, url):

View File

@ -23,40 +23,40 @@ class brewiarz(BasicNewsRecipe):
next_days = 1 next_days = 1
def parse_index(self): def parse_index(self):
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv", dec2rom_dict = {'01': 'i', '02': 'ii', '03': 'iii', '04': 'iv',
"05": "v", "06": "vi", "07": "vii", "08": "viii", '05': 'v', '06': 'vi', '07': 'vii', '08': 'viii',
"09": "ix", "10": "x", "11": "xi", "12": "xii"} '09': 'ix', '10': 'x', '11': 'xi', '12': 'xii'}
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek", weekday_dict = {'Sunday': 'Niedziela', 'Monday': 'Poniedziałek', 'Tuesday': 'Wtorek',
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"} 'Wednesday': 'Środa', 'Thursday': 'Czwartek', 'Friday': 'Piątek', 'Saturday': 'Sobota'}
now = datetime.datetime.now() now = datetime.datetime.now()
feeds = [] feeds = []
for i in range(0, self.next_days): for i in range(0, self.next_days):
url_date = now + datetime.timedelta(days=i) url_date = now + datetime.timedelta(days=i)
url_date_month = url_date.strftime("%m") url_date_month = url_date.strftime('%m')
url_date_month_roman = dec2rom_dict[url_date_month] url_date_month_roman = dec2rom_dict[url_date_month]
url_date_day = url_date.strftime("%d") url_date_day = url_date.strftime('%d')
url_date_year = url_date.strftime("%Y")[2:] url_date_year = url_date.strftime('%Y')[2:]
url_date_weekday = url_date.strftime("%A") url_date_weekday = url_date.strftime('%A')
url_date_weekday_pl = weekday_dict[url_date_weekday] url_date_weekday_pl = weekday_dict[url_date_weekday]
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + \ url = 'http://brewiarz.pl/' + url_date_month_roman + '_' + \
url_date_year + "/" + url_date_day + url_date_month + "/index.php3" url_date_year + '/' + url_date_day + url_date_month + '/index.php3'
articles = self.parse_pages(url) articles = self.parse_pages(url)
if articles: if articles:
title = url_date_weekday_pl + " " + url_date_day + \ title = url_date_weekday_pl + ' ' + url_date_day + \
"." + url_date_month + "." + url_date_year '.' + url_date_month + '.' + url_date_year
feeds.append((title, articles)) feeds.append((title, articles))
else: else:
sectors = self.get_sectors(url) sectors = self.get_sectors(url)
for subpage in sectors: for subpage in sectors:
title = url_date_weekday_pl + " " + url_date_day + "." + \ title = url_date_weekday_pl + ' ' + url_date_day + '.' + \
url_date_month + "." + url_date_year + " - " + subpage.string url_date_month + '.' + url_date_year + ' - ' + subpage.string
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + \ url = 'http://brewiarz.pl/' + url_date_month_roman + '_' + url_date_year + \
"/" + url_date_day + url_date_month + \ '/' + url_date_day + url_date_month + \
"/" + subpage['href'] '/' + subpage['href']
print(url) print(url)
articles = self.parse_pages(url) articles = self.parse_pages(url)
if articles: if articles:
@ -91,7 +91,7 @@ class brewiarz(BasicNewsRecipe):
sublinks = ol.findAll(name='a') sublinks = ol.findAll(name='a')
for sublink in sublinks: for sublink in sublinks:
link_title = self.tag_to_string( link_title = self.tag_to_string(
link) + " - " + self.tag_to_string(sublink) link) + ' - ' + self.tag_to_string(sublink)
link_url_print = re.sub( link_url_print = re.sub(
'php3', 'php3?kr=_druk&wr=lg&', sublink['href']) 'php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
link_url = url[:-10] + link_url_print link_url = url[:-10] + link_url_print
@ -145,7 +145,7 @@ class brewiarz(BasicNewsRecipe):
if x == tag: if x == tag:
break break
else: else:
print("Can't find", tag, "in", tag.parent) print("Can't find", tag, 'in', tag.parent)
continue continue
for r in reversed(tag.contents): for r in reversed(tag.contents):
tag.parent.insert(i, r) tag.parent.insert(i, r)

View File

@ -22,10 +22,10 @@ class Business_insider(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newsportal' publication_type = 'newsportal'
masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg' masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg'
extra_css = """ extra_css = '''
body{font-family: Arial,Helvetica,sans-serif } body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block} img{margin-bottom: 0.4em; display:block}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -64,7 +64,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
if dt.weekday() == 6: if dt.weekday() == 6:
self.log.warn( self.log.warn(
'Business Standard Does Not Have A Print Publication On Sunday. The Reports' 'Business Standard Does Not Have A Print Publication On Sunday. The Reports'
' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.' " And Columns On This Page Today Appeared In The Newspaper's Saturday Edition."
) )
url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today
raw = self.index_to_soup(url, raw=True) raw = self.index_to_soup(url, raw=True)

View File

@ -90,7 +90,7 @@ class BT(BasicNewsRecipe):
# Insert feeds in specified order, if available # Insert feeds in specified order, if available
feedSort = ['Editor\'s Note', 'Editors note'] feedSort = ["Editor's Note", 'Editors note']
for i in feedSort: for i in feedSort:
if i in sections: if i in sections:
feeds.append((i, sections[i])) feeds.append((i, sections[i]))

View File

@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
class CACM(BasicNewsRecipe): class CACM(BasicNewsRecipe):
title = "ACM CACM Magazine" title = 'ACM CACM Magazine'
description = "Published on day 1 of every month." description = 'Published on day 1 of every month.'
language = 'en' language = 'en'
oldest_article = 30 oldest_article = 30
max_articles_per_feed = 100 max_articles_per_feed = 100
@ -17,16 +17,16 @@ class CACM(BasicNewsRecipe):
] ]
def get_cover_url(self): def get_cover_url(self):
""" '''
Parse out cover URL from cover page. Parse out cover URL from cover page.
Example: Example:
From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668 From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668
Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg
""" '''
soup = self.index_to_soup("https://cacm.acm.org/") soup = self.index_to_soup('https://cacm.acm.org/')
a_img = soup.find("a", class_="menuCover") a_img = soup.find('a', class_='menuCover')
img_url = a_img.img["src"] img_url = a_img.img['src']
img_url = img_url.split("?")[0] img_url = img_url.split('?')[0]
img_url = img_url.replace(".large", "") img_url = img_url.replace('.large', '')
return img_url return img_url

View File

@ -29,28 +29,28 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
] ]
feeds = [ feeds = [
(u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"), (u' דף הבית', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml'),
(u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"), (u' 24/7', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml'),
(u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"), (u' באזז', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml'),
(u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"), (u' משפט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml'),
(u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"), (u' רכב', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml'),
(u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"), (u' אחריות וסביבה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml'),
(u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"), (u' דעות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml'),
(u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"), (u' תיירות ותעופה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml'),
(u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"), (u' קריירה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml'),
(u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"), (u' אחד העם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml'),
(u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"), (u' המלצות ואזהרות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml'),
(u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"), (u' הייטק והון סיכון', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml'),
(u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"), (u' חדשות טכנולוגיה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml'),
(u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"), (u' תקשורת', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml'),
(u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"), (u' אינטרנט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml'),
(u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"), (u" מכשירים וגאדג'טים", u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml'),
(u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"), (u' המדריך', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml'),
(u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"), (u' אפליקציות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml'),
(u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"), (u' Play', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml'),
(u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"), (u' הכסף', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml'),
(u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"), (u' עולם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml'),
(u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"), (u' פרסום ושיווק', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml'),
(u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"), (u' פנאי', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml'),
(u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml") (u' עסקי ספורט', u'http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml')
] ]

View File

@ -164,24 +164,24 @@ class CanWestPaper(BasicNewsRecipe):
continue continue
break break
if daysback == 7: if daysback == 7:
self.log("\nCover unavailable") self.log('\nCover unavailable')
cover = None cover = None
return cover return cover
def fixChars(self, string): def fixChars(self, string):
# Replace lsquo (\x91) # Replace lsquo (\x91)
fixed = re.sub("\x91", "", string) fixed = re.sub('\x91', '', string)
# Replace rsquo (\x92) # Replace rsquo (\x92)
fixed = re.sub("\x92", "", fixed) fixed = re.sub('\x92', '', fixed)
# Replace ldquo (\x93) # Replace ldquo (\x93)
fixed = re.sub("\x93", "", fixed) fixed = re.sub('\x93', '', fixed)
# Replace rdquo (\x94) # Replace rdquo (\x94)
fixed = re.sub("\x94", "", fixed) fixed = re.sub('\x94', '', fixed)
# Replace ndash (\x96) # Replace ndash (\x96)
fixed = re.sub("\x96", "", fixed) fixed = re.sub('\x96', '', fixed)
# Replace mdash (\x97) # Replace mdash (\x97)
fixed = re.sub("\x97", "", fixed) fixed = re.sub('\x97', '', fixed)
fixed = re.sub("&#x2019;", "", fixed) fixed = re.sub('&#x2019;', '', fixed)
return fixed return fixed
def massageNCXText(self, description): def massageNCXText(self, description):
@ -262,10 +262,10 @@ class CanWestPaper(BasicNewsRecipe):
if url.startswith('/'): if url.startswith('/'):
url = self.url_prefix + url url = self.url_prefix + url
if not url.startswith(self.url_prefix): if not url.startswith(self.url_prefix):
print("Rejected " + url) print('Rejected ' + url)
return return
if url in self.url_list: if url in self.url_list:
print("Rejected dup " + url) print('Rejected dup ' + url)
return return
self.url_list.append(url) self.url_list.append(url)
title = self.tag_to_string(atag, False) title = self.tag_to_string(atag, False)
@ -277,8 +277,8 @@ class CanWestPaper(BasicNewsRecipe):
return return
dtag = adiv.find('div', 'content') dtag = adiv.find('div', 'content')
description = '' description = ''
print("URL " + url) print('URL ' + url)
print("TITLE " + title) print('TITLE ' + title)
if dtag is not None: if dtag is not None:
stag = dtag.span stag = dtag.span
if stag is not None: if stag is not None:
@ -286,18 +286,18 @@ class CanWestPaper(BasicNewsRecipe):
description = self.tag_to_string(stag, False) description = self.tag_to_string(stag, False)
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print('DESCRIPTION: ' + description)
if key not in articles: if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
def parse_web_index(key, keyurl): def parse_web_index(key, keyurl):
print("Section: " + key + ': ' + self.url_prefix + keyurl) print('Section: ' + key + ': ' + self.url_prefix + keyurl)
try: try:
soup = self.index_to_soup(self.url_prefix + keyurl) soup = self.index_to_soup(self.url_prefix + keyurl)
except: except:
print("Section: " + key + ' NOT FOUND') print('Section: ' + key + ' NOT FOUND')
return return
ans.append(key) ans.append(key)
mainsoup = soup.find('div', 'bodywrapper') mainsoup = soup.find('div', 'bodywrapper')

View File

@ -17,7 +17,7 @@ class Capital(BasicNewsRecipe):
keep_only_tags = [ keep_only_tags = [
dict(name='h1'), dict(name='h1'),
dict(name='p'), dict(name='p'),
dict(name='span', attrs={'id': ["textbody"]}) dict(name='span', attrs={'id': ['textbody']})
] ]
# 3 posts seemed to have utf8 encoding # 3 posts seemed to have utf8 encoding

View File

@ -96,7 +96,7 @@ class CaravanMagazine(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self, *args, **kw) br = BasicNewsRecipe.get_browser(self, *args, **kw)
if not self.username or not self.password: if not self.username or not self.password:
return br return br
data = json.dumps({"0":{"json":{"email":self.username,"password":self.password}}}) data = json.dumps({'0':{'json':{'email':self.username,'password':self.password}}})
if not isinstance(data, bytes): if not isinstance(data, bytes):
data = data.encode('utf-8') data = data.encode('utf-8')
rq = Request( rq = Request(
@ -138,7 +138,7 @@ class CaravanMagazine(BasicNewsRecipe):
d = self.recipe_specific_options.get('date') d = self.recipe_specific_options.get('date')
if d and isinstance(d, str): if d and isinstance(d, str):
x = d.split('-') x = d.split('-')
inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}}) inp = json.dumps({'0':{'json':{'month':int(x[0]),'year':int(x[1])}}})
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='') api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
raw = json.loads(self.index_to_soup(api, raw=True)) raw = json.loads(self.index_to_soup(api, raw=True))
@ -174,7 +174,7 @@ class CaravanMagazine(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
slug = urlparse(url).path slug = urlparse(url).path
inp = json.dumps({"0":{"json":{"slug":slug}}}) inp = json.dumps({'0':{'json':{'slug':slug}}})
return 'https://api.caravanmagazine.in/api/trpc/articles.getFromCache?batch=1&input=' + quote(inp, safe='') return 'https://api.caravanmagazine.in/api/trpc/articles.getFromCache?batch=1&input=' + quote(inp, safe='')
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):

View File

@ -5,9 +5,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
class CATOInstitute(BasicNewsRecipe): class CATOInstitute(BasicNewsRecipe):
title = u'The CATO Institute' title = u'The CATO Institute'
description = "The Cato Institute is a public policy research organization — a think tank — \ description = 'The Cato Institute is a public policy research organization — a think tank — \
dedicated to the principles of individual liberty, limited government, free markets and peace.\ dedicated to the principles of individual liberty, limited government, free markets and peace.\
Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues." Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues.'
__author__ = '_reader' __author__ = '_reader'
__date__ = '05 July 2012' __date__ = '05 July 2012'
__version__ = '1.0' __version__ = '1.0'

View File

@ -24,7 +24,7 @@ class CSMonitor(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif' masthead_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
extra_css = """ extra_css = '''
body{font-family: Arial,Tahoma,Verdana,Helvetica,sans-serif } body{font-family: Arial,Tahoma,Verdana,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block} img{margin-bottom: 0.4em; display:block}
.head {font-family: Georgia,"Times New Roman",Times,serif} .head {font-family: Georgia,"Times New Roman",Times,serif}
@ -32,7 +32,7 @@ class CSMonitor(BasicNewsRecipe):
.hide{display: none} .hide{display: none}
.sLoc{font-weight: bold} .sLoc{font-weight: bold}
ul{list-style-type: none} ul{list-style-type: none}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -39,7 +39,7 @@ class Chronicle(BasicNewsRecipe):
# Go to the issue # Go to the issue
soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/') soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/')
issue = soup0.find('ul', attrs={'class': 'feature-promo-list'}).li issue = soup0.find('ul', attrs={'class': 'feature-promo-list'}).li
issueurl = "http://chronicle.com" + issue.a['href'] issueurl = 'http://chronicle.com' + issue.a['href']
# Find date # Find date
dates = self.tag_to_string(issue.a).split(': ')[-1] dates = self.tag_to_string(issue.a).split(': ')[-1]
@ -47,12 +47,12 @@ class Chronicle(BasicNewsRecipe):
# Find cover # Find cover
cover = soup0.find('div', attrs={ cover = soup0.find('div', attrs={
'class': 'side-content'}).find(attrs={'src': re.compile("photos/biz/Current")}) 'class': 'side-content'}).find(attrs={'src': re.compile('photos/biz/Current')})
if cover is not None: if cover is not None:
if "chronicle.com" in cover['src']: if 'chronicle.com' in cover['src']:
self.cover_url = cover['src'] self.cover_url = cover['src']
else: else:
self.cover_url = "http://chronicle.com" + cover['src'] self.cover_url = 'http://chronicle.com' + cover['src']
# Go to the main body # Go to the main body
soup = self.index_to_soup(issueurl) soup = self.index_to_soup(issueurl)
div = soup.find('div', attrs={'id': 'article-body'}) div = soup.find('div', attrs={'id': 'article-body'})
@ -64,7 +64,7 @@ class Chronicle(BasicNewsRecipe):
a = post.find('a', href=True) a = post.find('a', href=True)
if a is not None: if a is not None:
title = self.tag_to_string(a) title = self.tag_to_string(a)
url = "http://chronicle.com" + a['href'].strip() url = 'http://chronicle.com' + a['href'].strip()
sectiontitle = post.findPrevious('h3') sectiontitle = post.findPrevious('h3')
if sectiontitle is None: if sectiontitle is None:
sectiontitle = post.findPrevious('h4') sectiontitle = post.findPrevious('h4')

View File

@ -18,24 +18,24 @@ class BasicUserRecipe1316245412(BasicNewsRecipe):
# remove_javascript = True # remove_javascript = True
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id': ["header", "navigation", "skip-link", dict(name='div', attrs={'id': ['header', 'navigation', 'skip-link',
"header-print", "header-print-url", "meta-toolbar", "footer"]}), 'header-print', 'header-print-url', 'meta-toolbar', 'footer']}),
dict(name='div', attrs={'class': ["region region-sidebar-first column sidebar", "breadcrumb", dict(name='div', attrs={'class': ['region region-sidebar-first column sidebar', 'breadcrumb',
"breadcrumb-title", "meta", "comment-wrapper", 'breadcrumb-title', 'meta', 'comment-wrapper',
"field field-name-field-show-teaser-right field-type-list-boolean field-label-above", 'field field-name-field-show-teaser-right field-type-list-boolean field-label-above',
"page-header", 'page-header',
"view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1", 'view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1',
"pagination", 'pagination',
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1", 'view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1',
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2", # 2011-09-23 'view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2', # 2011-09-23
"view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2", # 2011-09-23 'view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2', # 2011-09-23
]}), ]}),
dict(name='div', attrs={'title': ["Dossier Auswahl"]}), dict(name='div', attrs={'title': ['Dossier Auswahl']}),
dict(name='h2', attrs={'class': ["title comment-form"]}), dict(name='h2', attrs={'class': ['title comment-form']}),
dict(name='form', attrs={ dict(name='form', attrs={
'class': ["comment-form user-info-from-cookie"]}), 'class': ['comment-form user-info-from-cookie']}),
dict(name='table', attrs={ dict(name='table', attrs={
'class': ["mcx-social-horizontal", "page-header"]}), 'class': ['mcx-social-horizontal', 'page-header']}),
] ]
feeds = [ feeds = [

View File

@ -34,7 +34,7 @@ class AdvancedUserRecipe1234144423(BasicNewsRecipe):
dict(name='div', attrs={'class': ['padding', 'sidebar-photo', 'blog caitlin']})] dict(name='div', attrs={'class': ['padding', 'sidebar-photo', 'blog caitlin']})]
remove_tags = [ remove_tags = [
dict(name=['object', 'link', 'table', 'embed']), dict(name='div', attrs={'id': ["pluckcomments", "StoryChat"]}), dict( dict(name=['object', 'link', 'table', 'embed']), dict(name='div', attrs={'id': ['pluckcomments', 'StoryChat']}), dict(
name='div', attrs={'class': ['articleflex-container', ]}), dict(name='p', attrs={'class': ['posted', 'tags']}) name='div', attrs={'class': ['articleflex-container', ]}), dict(name='p', attrs={'class': ['posted', 'tags']})
] ]

View File

@ -23,14 +23,14 @@ class CiperChile(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'blog' publication_type = 'blog'
masthead_url = 'http://ciperchile.cl/wp-content/themes/cipertheme/css/ui/ciper-logo.png' masthead_url = 'http://ciperchile.cl/wp-content/themes/cipertheme/css/ui/ciper-logo.png'
extra_css = """ extra_css = '''
body{font-family: Arial,sans-serif} body{font-family: Arial,sans-serif}
.excerpt{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: 1.25em} .excerpt{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: 1.25em}
.author{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: small} .author{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: small}
.date{font-family: Georgia,"Times New Roman",Times,serif; font-size: small; color: grey} .date{font-family: Georgia,"Times New Roman",Times,serif; font-size: small; color: grey}
.epigrafe{font-size: small; color: grey} .epigrafe{font-size: small; color: grey}
img{margin-bottom: 0.4em; display:block} img{margin-bottom: 0.4em; display:block}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -44,7 +44,7 @@ class Clarin(BasicNewsRecipe):
# To get all the data (images) # To get all the data (images)
auto_cleanup = False auto_cleanup = False
extra_css = """ extra_css = '''
h1#title { h1#title {
line-height: 1em; line-height: 1em;
margin: 0 0 .5em 0; margin: 0 0 .5em 0;
@ -64,7 +64,7 @@ class Clarin(BasicNewsRecipe):
font-size: .9em; font-size: .9em;
margin-bottom: .5em; margin-bottom: .5em;
} }
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -25,16 +25,16 @@ class CNetJapan(BasicNewsRecipe):
lambda match: '<!-- removed -->'), lambda match: '<!-- removed -->'),
] ]
remove_tags_before = dict(id="contents_l") remove_tags_before = dict(id='contents_l')
remove_tags = [ remove_tags = [
{'class': "social_bkm_share"}, {'class': 'social_bkm_share'},
{'class': "social_bkm_print"}, {'class': 'social_bkm_print'},
{'class': "block20 clearfix"}, {'class': 'block20 clearfix'},
dict(name="div", attrs={'id': 'bookreview'}), dict(name='div', attrs={'id': 'bookreview'}),
{'class': "tag_left_ttl"}, {'class': 'tag_left_ttl'},
{'class': "tag_right"} {'class': 'tag_right'}
] ]
remove_tags_after = {'class': "block20"} remove_tags_after = {'class': 'block20'}
def parse_feeds(self): def parse_feeds(self):

View File

@ -25,16 +25,16 @@ class CNetJapanDigital(BasicNewsRecipe):
lambda match: '<!-- removed -->'), lambda match: '<!-- removed -->'),
] ]
remove_tags_before = dict(id="contents_l") remove_tags_before = dict(id='contents_l')
remove_tags = [ remove_tags = [
{'class': "social_bkm_share"}, {'class': 'social_bkm_share'},
{'class': "social_bkm_print"}, {'class': 'social_bkm_print'},
{'class': "block20 clearfix"}, {'class': 'block20 clearfix'},
dict(name="div", attrs={'id': 'bookreview'}), dict(name='div', attrs={'id': 'bookreview'}),
{'class': "tag_left_ttl"}, {'class': 'tag_left_ttl'},
{'class': "tag_right"} {'class': 'tag_right'}
] ]
remove_tags_after = {'class': "block20"} remove_tags_after = {'class': 'block20'}
def parse_feeds(self): def parse_feeds(self):

View File

@ -25,15 +25,15 @@ class CNetJapanRelease(BasicNewsRecipe):
lambda match: '<!-- removed -->'), lambda match: '<!-- removed -->'),
] ]
remove_tags_before = dict(id="contents_l") remove_tags_before = dict(id='contents_l')
remove_tags = [ remove_tags = [
{'class': "social_bkm_share"}, {'class': 'social_bkm_share'},
{'class': "social_bkm_print"}, {'class': 'social_bkm_print'},
{'class': "block20 clearfix"}, {'class': 'block20 clearfix'},
dict(name="div", attrs={'id': 'bookreview'}), dict(name='div', attrs={'id': 'bookreview'}),
{'class': "tag_left_ttl"} {'class': 'tag_left_ttl'}
] ]
remove_tags_after = {'class': "block20"} remove_tags_after = {'class': 'block20'}
def parse_feeds(self): def parse_feeds(self):

View File

@ -56,7 +56,7 @@ class CnetNews(BasicNewsRecipe):
keep_only_tags = [ keep_only_tags = [
dict(name='h1'), dict(name='h1'),
dict(section='author'), dict(section='author'),
dict(id=["article-body", 'cnetReview']), dict(id=['article-body', 'cnetReview']),
dict(attrs={'class': 'deal-content'}), dict(attrs={'class': 'deal-content'}),
] ]

View File

@ -72,7 +72,7 @@ class CNN(BasicNewsRecipe):
try: try:
br.open(masthead) br.open(masthead)
except: except:
self.log("\nCover unavailable") self.log('\nCover unavailable')
masthead = None masthead = None
return masthead return masthead

View File

@ -36,9 +36,9 @@ class ContretempsRecipe(BasicNewsRecipe):
return None return None
def default_cover(self, cover_file): def default_cover(self, cover_file):
""" '''
Crée une couverture personnalisée pour Contretemps Crée une couverture personnalisée pour Contretemps
""" '''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -56,7 +56,7 @@ class ContretempsRecipe(BasicNewsRecipe):
weekday = french_weekday[wkd] weekday = french_weekday[wkd]
month = french_month[today.month] month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}" date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh%M') edition = today.strftime('Édition de %Hh%M')
# Création de l'image de base (ratio ~1.6 pour format livre) # Création de l'image de base (ratio ~1.6 pour format livre)

View File

@ -5,10 +5,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
class CosmosMagazine(BasicNewsRecipe): class CosmosMagazine(BasicNewsRecipe):
title = "Cosmos Magazine" title = 'Cosmos Magazine'
description = ( description = (
"Cosmos is a quarterly science magazine with 4 editions a year (Mar, Jun, Sep, Dec)." 'Cosmos is a quarterly science magazine with 4 editions a year (Mar, Jun, Sep, Dec).'
"It is produced by The Royal Institution of Australia Inc (RiAus)." 'It is produced by The Royal Institution of Australia Inc (RiAus).'
) )
language = 'en_AU' language = 'en_AU'
__author__ = 'yodha8' __author__ = 'yodha8'

View File

@ -70,12 +70,12 @@ class CourrierInternational(BasicNewsRecipe):
} }
''' '''
needs_subscription = "optional" needs_subscription = 'optional'
login_url = 'http://www.courrierinternational.com/login' login_url = 'http://www.courrierinternational.com/login'
def get_browser(self): def get_browser(self):
def is_form_login(form): def is_form_login(form):
return "id" in form.attrs and form.attrs['id'] == "user-login-form" return 'id' in form.attrs and form.attrs['id'] == 'user-login-form'
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
if self.username: if self.username:
br.open(self.login_url) br.open(self.login_url)
@ -86,8 +86,8 @@ class CourrierInternational(BasicNewsRecipe):
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):
for link in soup.findAll("a", href=re.compile('^/')): for link in soup.findAll('a', href=re.compile('^/')):
link["href"] = 'http://www.courrierinternational.com' + link["href"] link['href'] = 'http://www.courrierinternational.com' + link['href']
return soup return soup
feeds = [ feeds = [

View File

@ -21,10 +21,10 @@ class CubaDebate(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif' masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
publication_type = 'newsportal' publication_type = 'newsportal'
extra_css = """ extra_css = '''
#BlogTitle{font-size: xx-large; font-weight: bold} #BlogTitle{font-size: xx-large; font-weight: bold}
body{font-family: Verdana, Arial, Tahoma, sans-serif} body{font-family: Verdana, Arial, Tahoma, sans-serif}
""" '''
conversion_options = { conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher 'comments': description, 'tags': category, 'language': language, 'publisher': publisher

View File

@ -23,7 +23,7 @@ class DainikBhaskar(BasicNewsRecipe):
soup = self.index_to_soup('https://epaper.bhaskar.com/') soup = self.index_to_soup('https://epaper.bhaskar.com/')
tag = soup.find(attrs={'class': 'scaleDiv'}) tag = soup.find(attrs={'class': 'scaleDiv'})
if tag: if tag:
self.cover_url = tag.find('img')['src'].replace("_ss.jpg", "_l.jpg") self.cover_url = tag.find('img')['src'].replace('_ss.jpg', '_l.jpg')
return super().get_cover_url() return super().get_cover_url()
keep_only_tags = [ keep_only_tags = [

View File

@ -31,11 +31,11 @@ class Danas(BasicNewsRecipe):
auto_cleanup = True auto_cleanup = True
auto_cleanup_keep = '//div[@class="post-intro-above"] //h1[@class="post-title"] | //div[@class="post-intro-title"] | //div[@class="post-meta-wrapper"]' auto_cleanup_keep = '//div[@class="post-intro-above"] //h1[@class="post-title"] | //div[@class="post-intro-title"] | //div[@class="post-meta-wrapper"]'
resolve_internal_links = True resolve_internal_links = True
extra_css = """ extra_css = '''
.author{font-size: small} .author{font-size: small}
.published {font-size: small} .published {font-size: small}
img{margin-bottom: 0.8em} img{margin-bottom: 0.8em}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'comment': description,
@ -66,7 +66,7 @@ class Danas(BasicNewsRecipe):
'avgust', 'septembar', 'oktobar', 'novembar', 'decembar'] 'avgust', 'septembar', 'oktobar', 'novembar', 'decembar']
td = date.today() td = date.today()
monthname = months[td.month - 1] monthname = months[td.month - 1]
lurl = td.strftime("https://www.danas.rs/naslovna/naslovna-strana-za-%d-" + monthname + "-%Y/") lurl = td.strftime('https://www.danas.rs/naslovna/naslovna-strana-za-%d-' + monthname + '-%Y/')
soup = self.index_to_soup(lurl) soup = self.index_to_soup(lurl)
al = soup.find('div', attrs={'class':'corax-image'}) al = soup.find('div', attrs={'class':'corax-image'})
if al and al.img: if al and al.img:

View File

@ -77,9 +77,9 @@ class DeGentenaarOnline(BasicNewsRecipe):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = new_tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ('http-equiv', 'Content-Language'), ('content', self.lang)])
mcharset = new_tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)
return soup return soup

View File

@ -16,8 +16,8 @@ class AdvancedUserRecipe1361743898(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
articles = [] articles = []
feeds = [] feeds = []
soup = self.index_to_soup("http://www.democracyjournal.org") soup = self.index_to_soup('http://www.democracyjournal.org')
for x in soup.findAll(href=re.compile(r"http://www\.democracyjournal\.org/\d*/.*php$")): for x in soup.findAll(href=re.compile(r'http://www\.democracyjournal\.org/\d*/.*php$')):
url = x.get('href') url = x.get('href')
title = self.tag_to_string(x) title = self.tag_to_string(x)
articles.append({'title': title, 'url': url, articles.append({'title': title, 'url': url,

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python
""" '''
demorgen.be demorgen.be
""" '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -13,7 +13,7 @@ class DeMorganBe(BasicNewsRecipe):
description = 'News from Belgium in Dutch' description = 'News from Belgium in Dutch'
oldest_article = 1 oldest_article = 1
language = 'nl_BE' language = 'nl_BE'
encoding = "utf-8" encoding = 'utf-8'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
remove_attributes = ['style', 'height', 'width'] remove_attributes = ['style', 'height', 'width']
@ -23,10 +23,10 @@ class DeMorganBe(BasicNewsRecipe):
masthead_url = 'https://www.demorgen.be/_next/static/media/demorgen_logo.dce579e2.svg' masthead_url = 'https://www.demorgen.be/_next/static/media/demorgen_logo.dce579e2.svg'
cover_url = 'https://usercontent.one/wp/www.insidejazz.be/wp-content/uploads/2018/11/pic0143.png' cover_url = 'https://usercontent.one/wp/www.insidejazz.be/wp-content/uploads/2018/11/pic0143.png'
extra_css = """ extra_css = '''
time, [data-test-id:"article-label"], [data-test-id:"article-sublabel"], [[data-test-id:"article-author"]] { font-size:small; } time, [data-test-id:"article-label"], [data-test-id:"article-sublabel"], [[data-test-id:"article-author"]] { font-size:small; }
[data-test-id:"header-intro"] { font-style: italic; } [data-test-id:"header-intro"] { font-style: italic; }
""" '''
keep_only_tags = [ keep_only_tags = [
dict(name='article', attrs={'id': 'article-content'}), dict(name='article', attrs={'id': 'article-content'}),

View File

@ -23,8 +23,8 @@ class ceskyDenikRecipe(BasicNewsRecipe):
cover_url = 'http://g.denik.cz/images/loga/denik.png' cover_url = 'http://g.denik.cz/images/loga/denik.png'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
extra_css = """ extra_css = '''
""" '''
remove_tags = [] remove_tags = []
keep_only_tags = [dict(name='div', attrs={'class': 'content'})] keep_only_tags = [dict(name='div', attrs={'class': 'content'})]

View File

@ -11,11 +11,11 @@ CZ_MONTHS = ['led', 'úno', 'bře', 'dub', 'kvě', 'čen', 'čec', 'srp', 'zář
def cz_title_time(): def cz_title_time():
""" '''
Helper function to return date with czech locale. Helper function to return date with czech locale.
Uses hardcoded lookup table of day and month names as strftime requires Uses hardcoded lookup table of day and month names as strftime requires
locale change that is not thread safe. locale change that is not thread safe.
""" '''
today = datetime.today() today = datetime.today()
weekday = CZ_DAYS[today.weekday()] weekday = CZ_DAYS[today.weekday()]
month = CZ_MONTHS[today.month-1] month = CZ_MONTHS[today.month-1]
@ -26,9 +26,9 @@ def cz_title_time():
class DenikNRecipe(BasicNewsRecipe): class DenikNRecipe(BasicNewsRecipe):
""" '''
Recipe for the RSS feed of https://denikn.cz/ Recipe for the RSS feed of https://denikn.cz/
""" '''
title = u'Deník N' title = u'Deník N'
__author__ = 'Robert Mihaly' __author__ = 'Robert Mihaly'

View File

@ -31,13 +31,13 @@ class deredactie(BasicNewsRecipe):
catnames = {} catnames = {}
soup = self.index_to_soup( soup = self.index_to_soup(
'http://www.deredactie.be/cm/vrtnieuws.deutsch') 'http://www.deredactie.be/cm/vrtnieuws.deutsch')
for elem in soup.findAll('li', attrs={'id': re.compile("^navItem[2-9]")}): for elem in soup.findAll('li', attrs={'id': re.compile('^navItem[2-9]')}):
a = elem.find('a', href=True) a = elem.find('a', href=True)
m = re.search('(?<=/)[^/]*$', a['href']) m = re.search('(?<=/)[^/]*$', a['href'])
cat = str(m.group(0)) cat = str(m.group(0))
categories.append(cat) categories.append(cat)
catnames[cat] = a['title'] catnames[cat] = a['title']
self.log("found cat %s\n" % catnames[cat]) self.log('found cat %s\n' % catnames[cat])
feeds = [] feeds = []
@ -45,7 +45,7 @@ class deredactie(BasicNewsRecipe):
articles = [] articles = []
soup = self.index_to_soup( soup = self.index_to_soup(
'http://www.deredactie.be/cm/vrtnieuws.deutsch/' + cat) 'http://www.deredactie.be/cm/vrtnieuws.deutsch/' + cat)
for a in soup.findAll('a', attrs={'href': re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}): for a in soup.findAll('a', attrs={'href': re.compile('deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_')}):
skip_this_article = False skip_this_article = False
url = a['href'].strip() url = a['href'].strip()
if url.startswith('/'): if url.startswith('/'):
@ -55,12 +55,12 @@ class deredactie(BasicNewsRecipe):
for article in articles: for article in articles:
if article['url'] == url: if article['url'] == url:
skip_this_article = True skip_this_article = True
self.log("SKIPPING DUP %s" % url) self.log('SKIPPING DUP %s' % url)
break break
if skip_this_article: if skip_this_article:
continue continue
articles.append(myarticle) articles.append(myarticle)
self.log("Adding URL %s\n" % url) self.log('Adding URL %s\n' % url)
if articles: if articles:
feeds.append((catnames[cat], articles)) feeds.append((catnames[cat], articles))
return feeds return feeds

View File

@ -34,7 +34,7 @@ class Volkskrant(BasicNewsRecipe):
dict(id=['like', 'dlik']), dict(id=['like', 'dlik']),
dict(name=['script', 'noscript', 'style']), dict(name=['script', 'noscript', 'style']),
] ]
remove_attributes = ["class", "id", "name", "style"] remove_attributes = ['class', 'id', 'name', 'style']
encoding = 'utf-8' encoding = 'utf-8'
no_stylesheets = True no_stylesheets = True
ignore_duplicate_articles = {'url'} ignore_duplicate_articles = {'url'}
@ -88,7 +88,7 @@ class Volkskrant(BasicNewsRecipe):
) )
) )
sections = [("Numărul curent", articles)] sections = [('Numărul curent', articles)]
return sections return sections
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = "GPL v3" __license__ = 'GPL v3'
"""DistroWatch Weekly""" '''DistroWatch Weekly'''
import datetime import datetime
@ -10,28 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DistroWatchWeekly(BasicNewsRecipe): class DistroWatchWeekly(BasicNewsRecipe):
title = "DistroWatch Weekly" title = 'DistroWatch Weekly'
description = "Weekly news about Linux distributions" description = 'Weekly news about Linux distributions'
category = "Linux, Technology, News" category = 'Linux, Technology, News'
oldest_article = 14 oldest_article = 14
language = "en" language = 'en'
max_articles_per_feed = 50 max_articles_per_feed = 50
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
timefmt = " [%A, %d %B, %Y]" timefmt = ' [%A, %d %B, %Y]'
auto_cleanup = False auto_cleanup = False
keep_only_tags = [ keep_only_tags = [
dict( dict(
attrs={ attrs={
"class": 'class':
lambda x: x and ("News1" in x) lambda x: x and ('News1' in x)
} }
) )
] ]
def _get_mag_date(self): def _get_mag_date(self):
"""Return date of latest weekly issue.""" '''Return date of latest weekly issue.'''
d = datetime.date(2022, 6, 20) d = datetime.date(2022, 6, 20)
t = datetime.date.today() t = datetime.date.today()
@ -45,17 +45,17 @@ class DistroWatchWeekly(BasicNewsRecipe):
# Get URL of latest mag page # Get URL of latest mag page
ld = self._get_mag_date() ld = self._get_mag_date()
url = ld.strftime("https://distrowatch.com/weekly.php?issue=%Y%m%d") url = ld.strftime('https://distrowatch.com/weekly.php?issue=%Y%m%d')
url = url.lower() url = url.lower()
title = ld.strftime("DistroWatch Weekly for %Y-%m-%d") title = ld.strftime('DistroWatch Weekly for %Y-%m-%d')
# Get articles # Get articles
stories = [{ stories = [{
"url": url, 'url': url,
"title": title, 'title': title,
},] },]
index = [ index = [
("Articles", stories), ('Articles', stories),
] ]
return index return index

View File

@ -23,7 +23,7 @@ def new_tag(soup, name, attrs=()):
class DnevnikCro(BasicNewsRecipe): class DnevnikCro(BasicNewsRecipe):
title = 'Dnevnik - Hr' title = 'Dnevnik - Hr'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = "Vijesti iz Hrvatske" description = 'Vijesti iz Hrvatske'
publisher = 'Dnevnik.hr' publisher = 'Dnevnik.hr'
category = 'news, politics, Croatia' category = 'news, politics, Croatia'
oldest_article = 2 oldest_article = 2
@ -67,9 +67,9 @@ class DnevnikCro(BasicNewsRecipe):
del item[attrib] del item[attrib]
mlang = new_tag(soup, 'meta', [ mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ('http-equiv', 'Content-Language'), ('content', self.lang)])
mcharset = new_tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=UTF-8')])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -4,15 +4,15 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
# Comment out sections you are not interested in # Comment out sections you are not interested in
sections = [ sections = [
("정치", "politics"), ('정치', 'politics'),
("사회", "national"), ('사회', 'national'),
("경제", "economy"), ('경제', 'economy'),
("국제", "international"), ('국제', 'international'),
("사설칼럼", "editorials"), ('사설칼럼', 'editorials'),
("의학과학", "science"), ('의학과학', 'science'),
("문화연예", "culture"), ('문화연예', 'culture'),
("스포츠", "sports"), ('스포츠', 'sports'),
("사람속으로", "inmul") ('사람속으로', 'inmul')
# Following sections are marked as marked optional # Following sections are marked as marked optional
# as default. Uncomment to enable. # as default. Uncomment to enable.
# , (u'건강', 'health') # , (u'건강', 'health')
@ -26,24 +26,24 @@ sections = [
class Donga(BasicNewsRecipe): class Donga(BasicNewsRecipe):
language = "ko" language = 'ko'
title = "동아일보" title = '동아일보'
description = "동아일보 기사" description = '동아일보 기사'
__author__ = "Minsik Cho" __author__ = 'Minsik Cho'
ignore_duplicate_articles = {"title", "url"} ignore_duplicate_articles = {'title', 'url'}
compress_news_images = True compress_news_images = True
no_stylesheets = True no_stylesheets = True
oldest_article = 2 oldest_article = 2
encoding = "utf-8" encoding = 'utf-8'
# RSS Feed in syntax: # RSS Feed in syntax:
# https://rss.donga.com/[sections].xml # https://rss.donga.com/[sections].xml
feeds = [(title, "https://rss.donga.com/" + section + ".xml") for (title, section) in sections] feeds = [(title, 'https://rss.donga.com/' + section + '.xml') for (title, section) in sections]
# Remove logo and print buttons # Remove logo and print buttons
remove_tags = [ remove_tags = [
dict(name="div", attrs={"class": "popHeaderWrap"}), dict(name='div', attrs={'class': 'popHeaderWrap'}),
dict(name="div", attrs={"class": "etc"}), dict(name='div', attrs={'class': 'etc'}),
] ]
def print_version(self, url): def print_version(self, url):
@ -51,8 +51,8 @@ class Donga(BasicNewsRecipe):
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1 # https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
# Return print version url with syntax: # Return print version url with syntax:
# https://www.donga.com/news/View?gid=[gid]&date=[date] # https://www.donga.com/news/View?gid=[gid]&date=[date]
reobject = re.search("(?<=/all/)([0-9]*)/([0-9]*)", url) reobject = re.search('(?<=/all/)([0-9]*)/([0-9]*)', url)
date = reobject.group(1) date = reobject.group(1)
gid = reobject.group(2) gid = reobject.group(2)
return "https://www.donga.com/news/View?gid=" + gid + "&date=" + date return 'https://www.donga.com/news/View?gid=' + gid + '&date=' + date

View File

@ -107,11 +107,11 @@ class DRNyheder(BasicNewsRecipe):
keep_only_tags = [ keep_only_tags = [
dict(name="h1", attrs={'class': 'dre-article-title__heading'}), # Title dict(name='h1', attrs={'class': 'dre-article-title__heading'}), # Title
dict(name="div", attrs={'class': 'dre-article-byline'}), # Author dict(name='div', attrs={'class': 'dre-article-byline'}), # Author
dict(name="figure", attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images dict(name='figure', attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images
dict(name="p", attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article dict(name='p', attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article
dict(name="article", attrs={'itemtype': 'http://schema.org/NewsArticle'}), dict(name='article', attrs={'itemtype': 'http://schema.org/NewsArticle'}),
#dict(name="h1", attrs={'class': 'hydra-latest-news-page-short-news__title'}), #dict(name="h1", attrs={'class': 'hydra-latest-news-page-short-news__title'}),
#dict(name="p", attrs={'class': 'hydra-latest-news-page-short-news__paragraph'}), #dict(name="p", attrs={'class': 'hydra-latest-news-page-short-news__paragraph'}),
#dict(name="div", attrs={'class': 'dre-speech'}), #dict(name="div", attrs={'class': 'dre-speech'}),
@ -123,7 +123,7 @@ class DRNyheder(BasicNewsRecipe):
dict(name='div', attrs={'class': [ dict(name='div', attrs={'class': [
'hydra-latest-news-page-short-news__share', 'hydra-latest-news-page-short-news__a11y-container', 'hydra-latest-news-page-short-news__share', 'hydra-latest-news-page-short-news__a11y-container',
'hydra-latest-news-page-short-news__meta', 'hydra-latest-news-page-short-news__image-slider', 'dre-byline__dates']}), 'hydra-latest-news-page-short-news__meta', 'hydra-latest-news-page-short-news__image-slider', 'dre-byline__dates']}),
dict(name="source"), dict(name='source'),
#dict(name='menu', attrs={'class': 'share'}), #dict(name='menu', attrs={'class': 'share'}),
#dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}), #dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}),
] ]

View File

@ -63,20 +63,20 @@ class Dzieje(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
feeds.append((u"Wiadomości", self.find_articles( feeds.append((u'Wiadomości', self.find_articles(
'http://dzieje.pl/wiadomosci'))) 'http://dzieje.pl/wiadomosci')))
feeds.append((u"Kultura i sztuka", self.find_articles( feeds.append((u'Kultura i sztuka', self.find_articles(
'http://dzieje.pl/kulturaisztuka'))) 'http://dzieje.pl/kulturaisztuka')))
feeds.append((u"Film", self.find_articles('http://dzieje.pl/kino'))) feeds.append((u'Film', self.find_articles('http://dzieje.pl/kino')))
feeds.append((u"Rozmaitości historyczne", feeds.append((u'Rozmaitości historyczne',
self.find_articles('http://dzieje.pl/rozmaitości'))) self.find_articles('http://dzieje.pl/rozmaitości')))
feeds.append( feeds.append(
(u"Książka", self.find_articles('http://dzieje.pl/ksiazka'))) (u'Książka', self.find_articles('http://dzieje.pl/ksiazka')))
feeds.append( feeds.append(
(u"Wystawa", self.find_articles('http://dzieje.pl/wystawa'))) (u'Wystawa', self.find_articles('http://dzieje.pl/wystawa')))
feeds.append((u"Edukacja", self.find_articles( feeds.append((u'Edukacja', self.find_articles(
'http://dzieje.pl/edukacja'))) 'http://dzieje.pl/edukacja')))
feeds.append((u"Dzieje się", self.find_articles( feeds.append((u'Dzieje się', self.find_articles(
'http://dzieje.pl/wydarzenia'))) 'http://dzieje.pl/wydarzenia')))
return feeds return feeds

View File

@ -21,7 +21,7 @@ class Dziennik_pl(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}' extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}'
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile( preprocess_regexps = [(re.compile('Komentarze:'), lambda m: ''), (re.compile(
'<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')] '<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
keep_only_tags = [dict(id='article')] keep_only_tags = [dict(id='article')]
remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa: E501 remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa: E501

View File

@ -120,7 +120,7 @@ class DziennikPolski24(BasicNewsRecipe):
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.open('http://www.dziennikpolski24.pl/pl/moje-konto/950606-loguj.html') br.open('http://www.dziennikpolski24.pl/pl/moje-konto/950606-loguj.html')
br.select_form(nr=1) br.select_form(nr=1)
br["user_login[login]"] = self.username br['user_login[login]'] = self.username
br['user_login[pass]'] = self.password br['user_login[pass]'] = self.password
br.submit() br.submit()
return br return br

View File

@ -63,7 +63,7 @@ def load_article_from_json(raw, root):
body = root.xpath('//body')[0] body = root.xpath('//body')[0]
article = E(body, 'article') article = E(body, 'article')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;') E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try: try:
date = data['dateModified'] date = data['dateModified']
@ -97,8 +97,8 @@ def process_web_node(node):
return f'<p>{node.get("textHtml")}</p>' return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("text", "")}</p>' return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE': elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText") alt = '' if node.get('altText') is None else node.get('altText')
cap = "" cap = ''
if node.get('caption'): if node.get('caption'):
if node['caption'].get('textHtml') is not None: if node['caption'].get('textHtml') is not None:
cap = node['caption']['textHtml'] cap = node['caption']['textHtml']
@ -123,7 +123,7 @@ def load_article_from_web_json(raw):
data = json.loads(raw)['props']['pageProps']['cp2Content'] data = json.loads(raw)['props']['pageProps']['cp2Content']
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>' body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
body += f'<h1>{data["headline"]}</h1>' body += f'<h1>{data["headline"]}</h1>'
if data.get("rubric") and data.get("rubric") is not None: if data.get('rubric') and data.get('rubric') is not None:
body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>' body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
try: try:
date = data['dateModified'] date = data['dateModified']
@ -186,7 +186,7 @@ class Economist(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal" __author__ = 'Kovid Goyal'
description = ( description = (
'Global news and current affairs from a European' 'Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)' ' perspective. Best downloaded on Friday mornings (GMT)'
@ -199,7 +199,7 @@ class Economist(BasicNewsRecipe):
resolve_internal_links = True resolve_internal_links = True
remove_tags = [ remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}), dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id': 'player'}), dict(attrs={'id': 'player'}),
dict(attrs={ dict(attrs={
'class': [ 'class': [
@ -266,11 +266,11 @@ class Economist(BasicNewsRecipe):
if edition_date and isinstance(edition_date, str): if edition_date and isinstance(edition_date, str):
return parse_only_date(edition_date, as_utc=False) return parse_only_date(edition_date, as_utc=False)
try: try:
url = self.browser.open("https://www.economist.com/printedition").geturl() url = self.browser.open('https://www.economist.com/printedition').geturl()
except Exception as e: except Exception as e:
self.log('Failed to fetch publication date with error: ' + str(e)) self.log('Failed to fetch publication date with error: ' + str(e))
return super().publication_date() return super().publication_date()
return parse_only_date(url.split("/")[-1], as_utc=False) return parse_only_date(url.split('/')[-1], as_utc=False)
def economist_test_article(self): def economist_test_article(self):
return [('Articles', [{'title':'test', return [('Articles', [{'title':'test',
@ -364,23 +364,23 @@ class Economist(BasicNewsRecipe):
self.log('Got cover:', self.cover_url, '\n', self.description) self.log('Got cover:', self.cover_url, '\n', self.description)
feeds_dict = defaultdict(list) feeds_dict = defaultdict(list)
for part in safe_dict(data, "hasPart", "parts"): for part in safe_dict(data, 'hasPart', 'parts'):
try: try:
section = part['articleSection']['internal'][0]['title'] section = part['articleSection']['internal'][0]['title']
except Exception: except Exception:
section = safe_dict(part, 'print', 'section', 'title') or 'section' section = safe_dict(part, 'print', 'section', 'title') or 'section'
if section not in feeds_dict: if section not in feeds_dict:
self.log(section) self.log(section)
title = safe_dict(part, "title") title = safe_dict(part, 'title')
desc = safe_dict(part, "rubric") or '' desc = safe_dict(part, 'rubric') or ''
sub = safe_dict(part, "flyTitle") or '' sub = safe_dict(part, 'flyTitle') or ''
if sub and section != sub: if sub and section != sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
pt = PersistentTemporaryFile('.html') pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(part).encode('utf-8')) pt.write(json.dumps(part).encode('utf-8'))
pt.close() pt.close()
url = 'file:///' + pt.name url = 'file:///' + pt.name
feeds_dict[section].append({"title": title, "url": url, "description": desc}) feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc) self.log('\t', title, '\n\t\t', desc)
return [(section, articles) for section, articles in feeds_dict.items()] return [(section, articles) for section, articles in feeds_dict.items()]
@ -513,22 +513,22 @@ class Economist(BasicNewsRecipe):
return self.economist_return_index(ans) return self.economist_return_index(ans)
def economist_parse_web_index(self, soup): def economist_parse_web_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__") script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None: if script_tag is not None:
data = json.loads(script_tag.string) data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.description = safe_dict(data, "props", "pageProps", "content", "headline") self.description = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "formattedIssueDate") + ']' self.timefmt = ' [' + safe_dict(data, 'props', 'pageProps', 'content', 'formattedIssueDate') + ']'
self.cover_url = safe_dict(data, "props", "pageProps", "content", "cover", "url").replace( self.cover_url = safe_dict(data, 'props', 'pageProps', 'content', 'cover', 'url').replace(
'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '') 'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '')
self.log('Got cover:', self.cover_url) self.log('Got cover:', self.cover_url)
feeds = [] feeds = []
for part in safe_dict( for part in safe_dict(
data, "props", "pageProps", "content", "headerSections" data, 'props', 'pageProps', 'content', 'headerSections'
) + safe_dict(data, "props", "pageProps", "content", "sections"): ) + safe_dict(data, 'props', 'pageProps', 'content', 'sections'):
section = safe_dict(part, "name") or '' section = safe_dict(part, 'name') or ''
if not section: if not section:
continue continue
self.log(section) self.log(section)
@ -536,12 +536,12 @@ class Economist(BasicNewsRecipe):
articles = [] articles = []
for ar in part['articles']: for ar in part['articles']:
title = safe_dict(ar, "headline") or '' title = safe_dict(ar, 'headline') or ''
url = process_url(safe_dict(ar, "url") or '') url = process_url(safe_dict(ar, 'url') or '')
if not title or not url: if not title or not url:
continue continue
desc = safe_dict(ar, "rubric") or '' desc = safe_dict(ar, 'rubric') or ''
sub = safe_dict(ar, "flyTitle") or '' sub = safe_dict(ar, 'flyTitle') or ''
if sub and section != sub: if sub and section != sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url) self.log('\t', title, '\n\t', desc, '\n\t\t', url)

View File

@ -58,7 +58,7 @@ def load_article_from_json(raw, root):
body = root.xpath('//body')[0] body = root.xpath('//body')[0]
article = E(body, 'article') article = E(body, 'article')
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;') E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
E(article, 'div', data['byline'], style='font-style: italic; color:#202020;') E(article, 'div', data['byline'], style='font-style: italic; color:#202020;')
main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical') main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
@ -130,7 +130,7 @@ class Espresso(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']), dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
dict(attrs={'aria-label': "Article Teaser"}), dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={ dict(attrs={
'class': [ 'class': [
'dblClkTrk', 'ec-article-info', 'share_inline_header', 'dblClkTrk', 'ec-article-info', 'share_inline_header',
@ -189,13 +189,13 @@ class Espresso(BasicNewsRecipe):
self.description = data['rubric'] self.description = data['rubric']
ans = [] ans = []
for part in safe_dict(data, "hasPart", "parts"): for part in safe_dict(data, 'hasPart', 'parts'):
title = safe_dict(part, "title") title = safe_dict(part, 'title')
pt = PersistentTemporaryFile('.html') pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(part).encode('utf-8')) pt.write(json.dumps(part).encode('utf-8'))
pt.close() pt.close()
url = 'file:///' + pt.name url = 'file:///' + pt.name
ans.append({"title": title, "url": url}) ans.append({'title': title, 'url': url})
return [('Espresso', ans)] return [('Espresso', ans)]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -63,7 +63,7 @@ def load_article_from_json(raw, root):
body = root.xpath('//body')[0] body = root.xpath('//body')[0]
article = E(body, 'article') article = E(body, 'article')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;') E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try: try:
date = data['dateModified'] date = data['dateModified']
@ -97,8 +97,8 @@ def process_web_node(node):
return f'<p>{node.get("textHtml")}</p>' return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("text", "")}</p>' return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE': elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText") alt = '' if node.get('altText') is None else node.get('altText')
cap = "" cap = ''
if node.get('caption'): if node.get('caption'):
if node['caption'].get('textHtml') is not None: if node['caption'].get('textHtml') is not None:
cap = node['caption']['textHtml'] cap = node['caption']['textHtml']
@ -123,7 +123,7 @@ def load_article_from_web_json(raw):
data = json.loads(raw)['props']['pageProps']['cp2Content'] data = json.loads(raw)['props']['pageProps']['cp2Content']
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>' body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
body += f'<h1>{data["headline"]}</h1>' body += f'<h1>{data["headline"]}</h1>'
if data.get("rubric") and data.get("rubric") is not None: if data.get('rubric') and data.get('rubric') is not None:
body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>' body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
try: try:
date = data['dateModified'] date = data['dateModified']
@ -186,7 +186,7 @@ class Economist(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal" __author__ = 'Kovid Goyal'
description = ( description = (
'Global news and current affairs from a European' 'Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)' ' perspective. Best downloaded on Friday mornings (GMT)'
@ -199,7 +199,7 @@ class Economist(BasicNewsRecipe):
resolve_internal_links = True resolve_internal_links = True
remove_tags = [ remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}), dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id': 'player'}), dict(attrs={'id': 'player'}),
dict(attrs={ dict(attrs={
'class': [ 'class': [
@ -266,11 +266,11 @@ class Economist(BasicNewsRecipe):
if edition_date and isinstance(edition_date, str): if edition_date and isinstance(edition_date, str):
return parse_only_date(edition_date, as_utc=False) return parse_only_date(edition_date, as_utc=False)
try: try:
url = self.browser.open("https://www.economist.com/printedition").geturl() url = self.browser.open('https://www.economist.com/printedition').geturl()
except Exception as e: except Exception as e:
self.log('Failed to fetch publication date with error: ' + str(e)) self.log('Failed to fetch publication date with error: ' + str(e))
return super().publication_date() return super().publication_date()
return parse_only_date(url.split("/")[-1], as_utc=False) return parse_only_date(url.split('/')[-1], as_utc=False)
def economist_test_article(self): def economist_test_article(self):
return [('Articles', [{'title':'test', return [('Articles', [{'title':'test',
@ -364,23 +364,23 @@ class Economist(BasicNewsRecipe):
self.log('Got cover:', self.cover_url, '\n', self.description) self.log('Got cover:', self.cover_url, '\n', self.description)
feeds_dict = defaultdict(list) feeds_dict = defaultdict(list)
for part in safe_dict(data, "hasPart", "parts"): for part in safe_dict(data, 'hasPart', 'parts'):
try: try:
section = part['articleSection']['internal'][0]['title'] section = part['articleSection']['internal'][0]['title']
except Exception: except Exception:
section = safe_dict(part, 'print', 'section', 'title') or 'section' section = safe_dict(part, 'print', 'section', 'title') or 'section'
if section not in feeds_dict: if section not in feeds_dict:
self.log(section) self.log(section)
title = safe_dict(part, "title") title = safe_dict(part, 'title')
desc = safe_dict(part, "rubric") or '' desc = safe_dict(part, 'rubric') or ''
sub = safe_dict(part, "flyTitle") or '' sub = safe_dict(part, 'flyTitle') or ''
if sub and section != sub: if sub and section != sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
pt = PersistentTemporaryFile('.html') pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(part).encode('utf-8')) pt.write(json.dumps(part).encode('utf-8'))
pt.close() pt.close()
url = 'file:///' + pt.name url = 'file:///' + pt.name
feeds_dict[section].append({"title": title, "url": url, "description": desc}) feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc) self.log('\t', title, '\n\t\t', desc)
return [(section, articles) for section, articles in feeds_dict.items()] return [(section, articles) for section, articles in feeds_dict.items()]
@ -513,22 +513,22 @@ class Economist(BasicNewsRecipe):
return self.economist_return_index(ans) return self.economist_return_index(ans)
def economist_parse_web_index(self, soup): def economist_parse_web_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__") script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None: if script_tag is not None:
data = json.loads(script_tag.string) data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.description = safe_dict(data, "props", "pageProps", "content", "headline") self.description = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "formattedIssueDate") + ']' self.timefmt = ' [' + safe_dict(data, 'props', 'pageProps', 'content', 'formattedIssueDate') + ']'
self.cover_url = safe_dict(data, "props", "pageProps", "content", "cover", "url").replace( self.cover_url = safe_dict(data, 'props', 'pageProps', 'content', 'cover', 'url').replace(
'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '') 'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '')
self.log('Got cover:', self.cover_url) self.log('Got cover:', self.cover_url)
feeds = [] feeds = []
for part in safe_dict( for part in safe_dict(
data, "props", "pageProps", "content", "headerSections" data, 'props', 'pageProps', 'content', 'headerSections'
) + safe_dict(data, "props", "pageProps", "content", "sections"): ) + safe_dict(data, 'props', 'pageProps', 'content', 'sections'):
section = safe_dict(part, "name") or '' section = safe_dict(part, 'name') or ''
if not section: if not section:
continue continue
self.log(section) self.log(section)
@ -536,12 +536,12 @@ class Economist(BasicNewsRecipe):
articles = [] articles = []
for ar in part['articles']: for ar in part['articles']:
title = safe_dict(ar, "headline") or '' title = safe_dict(ar, 'headline') or ''
url = process_url(safe_dict(ar, "url") or '') url = process_url(safe_dict(ar, 'url') or '')
if not title or not url: if not title or not url:
continue continue
desc = safe_dict(ar, "rubric") or '' desc = safe_dict(ar, 'rubric') or ''
sub = safe_dict(ar, "flyTitle") or '' sub = safe_dict(ar, 'flyTitle') or ''
if sub and section != sub: if sub and section != sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url) self.log('\t', title, '\n\t', desc, '\n\t\t', url)

View File

@ -59,7 +59,7 @@ def load_article_from_json(raw, root):
body = root.xpath('//body')[0] body = root.xpath('//body')[0]
article = E(body, 'article') article = E(body, 'article')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;') E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try: try:
date = data['dateModified'] date = data['dateModified']
@ -125,7 +125,7 @@ class EconomistNews(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal" __author__ = 'Kovid Goyal'
description = ( description = (
'Global news and current affairs from a European' 'Global news and current affairs from a European'
' perspective. Get the latest articles here.' ' perspective. Get the latest articles here.'
@ -140,7 +140,7 @@ class EconomistNews(BasicNewsRecipe):
resolve_internal_links = True resolve_internal_links = True
remove_tags = [ remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}), dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id': 'player'}), dict(attrs={'id': 'player'}),
dict(attrs={ dict(attrs={
'class': [ 'class': [
@ -234,9 +234,9 @@ class EconomistNews(BasicNewsRecipe):
articles = [] articles = []
for art in part['hasPart']['parts']: for art in part['hasPart']['parts']:
title = safe_dict(art, "title") title = safe_dict(art, 'title')
desc = safe_dict(art, "rubric") or '' desc = safe_dict(art, 'rubric') or ''
sub = safe_dict(art, "flyTitle") or '' sub = safe_dict(art, 'flyTitle') or ''
if sub and section != sub: if sub and section != sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
if not art.get('text'): if not art.get('text'):
@ -249,7 +249,7 @@ class EconomistNews(BasicNewsRecipe):
pt.write(json.dumps(art).encode('utf-8')) pt.write(json.dumps(art).encode('utf-8'))
pt.close() pt.close()
url = 'file:///' + pt.name url = 'file:///' + pt.name
articles.append({"title": title, "url": url, "description": desc}) articles.append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc) self.log('\t', title, '\n\t\t', desc)
if articles: if articles:
feeds.append((section, articles)) feeds.append((section, articles))

View File

@ -23,8 +23,8 @@ def process_node(node):
return f'<p>{node.get("textHtml")}</p>' return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("text", "")}</p>' return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE': elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText") alt = '' if node.get('altText') is None else node.get('altText')
cap = "" cap = ''
if node.get('caption'): if node.get('caption'):
if node['caption'].get('textHtml') is not None: if node['caption'].get('textHtml') is not None:
cap = node['caption']['textHtml'] cap = node['caption']['textHtml']
@ -112,7 +112,7 @@ class econ_search(BasicNewsRecipe):
title = 'The Economist - Search' title = 'The Economist - Search'
language = 'en' language = 'en'
encoding = 'utf-8' encoding = 'utf-8'
__author__ = "unkn0wn" __author__ = 'unkn0wn'
description = ( description = (
'Use the Advanced section of the recipe to search.' 'Use the Advanced section of the recipe to search.'
) )
@ -128,7 +128,7 @@ class econ_search(BasicNewsRecipe):
resolve_internal_links = True resolve_internal_links = True
remove_tags = [ remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}), dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id':'player'}), dict(attrs={'id':'player'}),
dict(attrs={ dict(attrs={
'class': [ 'class': [

View File

@ -23,8 +23,8 @@ def process_node(node):
return f'<p>{node.get("textHtml")}</p>' return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("text", "")}</p>' return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE': elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText") alt = '' if node.get('altText') is None else node.get('altText')
cap = "" cap = ''
if node.get('caption'): if node.get('caption'):
if node['caption'].get('textHtml') is not None: if node['caption'].get('textHtml') is not None:
cap = node['caption']['textHtml'] cap = node['caption']['textHtml']
@ -122,7 +122,7 @@ class EconomistWorld(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "unkn0wn" __author__ = 'unkn0wn'
description = ( description = (
'The World Ahead is The Economists future-gazing publication. It prepares audiences for what is to ' 'The World Ahead is The Economists future-gazing publication. It prepares audiences for what is to '
'come with mind-stretching insights and expert analysis—all in The Economists clear, elegant style.' 'come with mind-stretching insights and expert analysis—all in The Economists clear, elegant style.'
@ -136,7 +136,7 @@ class EconomistWorld(BasicNewsRecipe):
resolve_internal_links = True resolve_internal_links = True
remove_tags = [ remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']), dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}), dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id': 'player'}), dict(attrs={'id': 'player'}),
dict(attrs={ dict(attrs={
'class': [ 'class': [
@ -205,24 +205,24 @@ class EconomistWorld(BasicNewsRecipe):
return self.economist_return_index(ans) return self.economist_return_index(ans)
def economist_parse_index(self, soup): def economist_parse_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__") script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None: if script_tag is not None:
data = json.loads(script_tag.string) data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.title = safe_dict(data, "props", "pageProps", "content", "headline") self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
self.cover_url = 'https://mma.prnewswire.com/media/2561745/The_Economist_World_Ahead_2025_cover.jpg?w=600' self.cover_url = 'https://mma.prnewswire.com/media/2561745/The_Economist_World_Ahead_2025_cover.jpg?w=600'
feeds = [] feeds = []
for coll in safe_dict(data, "props", "pageProps", "content", "components"): for coll in safe_dict(data, 'props', 'pageProps', 'content', 'components'):
section = safe_dict(coll, "headline") or '' section = safe_dict(coll, 'headline') or ''
self.log(section) self.log(section)
articles = [] articles = []
for part in safe_dict(coll, "items"): for part in safe_dict(coll, 'items'):
title = safe_dict(part, "headline") or '' title = safe_dict(part, 'headline') or ''
url = process_url(safe_dict(part, "url") or '') url = process_url(safe_dict(part, 'url') or '')
desc = safe_dict(part, "rubric") or '' desc = safe_dict(part, 'rubric') or ''
sub = safe_dict(part, "flyTitle") or '' sub = safe_dict(part, 'flyTitle') or ''
if sub and section != sub: if sub and section != sub:
desc = sub + ' :: ' + desc desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url) self.log('\t', title, '\n\t', desc, '\n\t\t', url)

View File

@ -164,24 +164,24 @@ class CanWestPaper(BasicNewsRecipe):
continue continue
break break
if daysback == 7: if daysback == 7:
self.log("\nCover unavailable") self.log('\nCover unavailable')
cover = None cover = None
return cover return cover
def fixChars(self, string): def fixChars(self, string):
# Replace lsquo (\x91) # Replace lsquo (\x91)
fixed = re.sub("\x91", "", string) fixed = re.sub('\x91', '', string)
# Replace rsquo (\x92) # Replace rsquo (\x92)
fixed = re.sub("\x92", "", fixed) fixed = re.sub('\x92', '', fixed)
# Replace ldquo (\x93) # Replace ldquo (\x93)
fixed = re.sub("\x93", "", fixed) fixed = re.sub('\x93', '', fixed)
# Replace rdquo (\x94) # Replace rdquo (\x94)
fixed = re.sub("\x94", "", fixed) fixed = re.sub('\x94', '', fixed)
# Replace ndash (\x96) # Replace ndash (\x96)
fixed = re.sub("\x96", "", fixed) fixed = re.sub('\x96', '', fixed)
# Replace mdash (\x97) # Replace mdash (\x97)
fixed = re.sub("\x97", "", fixed) fixed = re.sub('\x97', '', fixed)
fixed = re.sub("&#x2019;", "", fixed) fixed = re.sub('&#x2019;', '', fixed)
return fixed return fixed
def massageNCXText(self, description): def massageNCXText(self, description):
@ -262,10 +262,10 @@ class CanWestPaper(BasicNewsRecipe):
if url.startswith('/'): if url.startswith('/'):
url = self.url_prefix + url url = self.url_prefix + url
if not url.startswith(self.url_prefix): if not url.startswith(self.url_prefix):
print("Rejected " + url) print('Rejected ' + url)
return return
if url in self.url_list: if url in self.url_list:
print("Rejected dup " + url) print('Rejected dup ' + url)
return return
self.url_list.append(url) self.url_list.append(url)
title = self.tag_to_string(atag, False) title = self.tag_to_string(atag, False)
@ -277,8 +277,8 @@ class CanWestPaper(BasicNewsRecipe):
return return
dtag = adiv.find('div', 'content') dtag = adiv.find('div', 'content')
description = '' description = ''
print("URL " + url) print('URL ' + url)
print("TITLE " + title) print('TITLE ' + title)
if dtag is not None: if dtag is not None:
stag = dtag.span stag = dtag.span
if stag is not None: if stag is not None:
@ -286,18 +286,18 @@ class CanWestPaper(BasicNewsRecipe):
description = self.tag_to_string(stag, False) description = self.tag_to_string(stag, False)
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print('DESCRIPTION: ' + description)
if key not in articles: if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
def parse_web_index(key, keyurl): def parse_web_index(key, keyurl):
print("Section: " + key + ': ' + self.url_prefix + keyurl) print('Section: ' + key + ': ' + self.url_prefix + keyurl)
try: try:
soup = self.index_to_soup(self.url_prefix + keyurl) soup = self.index_to_soup(self.url_prefix + keyurl)
except: except:
print("Section: " + key + ' NOT FOUND') print('Section: ' + key + ' NOT FOUND')
return return
ans.append(key) ans.append(key)
mainsoup = soup.find('div', 'bodywrapper') mainsoup = soup.find('div', 'bodywrapper')

View File

@ -20,12 +20,12 @@ class AdvancedUserRecipe1311790237(BasicNewsRecipe):
masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif' masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = """ extra_css = '''
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
""" '''
feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'), feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'),
(u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'), (u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'),

View File

@ -55,9 +55,9 @@ class RevistaElCultural(BasicNewsRecipe):
if url.startswith('/version_papel/' + titleSection + '/'): if url.startswith('/version_papel/' + titleSection + '/'):
url = 'http://www.elcultural.es' + url url = 'http://www.elcultural.es' + url
self.log('\t\tFound article:', title[0:title.find("|") - 1]) self.log('\t\tFound article:', title[0:title.find('|') - 1])
self.log('\t\t\t', url) self.log('\t\t\t', url)
current_articles.append({'title': title[0:title.find("|") - 1], 'url': url, current_articles.append({'title': title[0:title.find('|') - 1], 'url': url,
'description': '', 'date': ''}) 'description': '', 'date': ''})
return current_articles return current_articles

View File

@ -1,51 +1,51 @@
# -*- mode: python; coding: utf-8; -*- # -*- mode: python; coding: utf-8; -*-
# vim: set syntax=python fileencoding=utf-8 # vim: set syntax=python fileencoding=utf-8
__license__ = "GPL v3" __license__ = 'GPL v3'
__copyright__ = "2023, Tomás Di Domenico <tdido at tdido.eu>" __copyright__ = '2023, Tomás Di Domenico <tdido at tdido.eu>'
""" '''
www.eldiplo.org www.eldiplo.org
""" '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ElDiplo2023(BasicNewsRecipe): class ElDiplo2023(BasicNewsRecipe):
title = "Le Monde Diplomatique - cono sur" title = 'Le Monde Diplomatique - cono sur'
__author__ = "Tomás Di Domenico" __author__ = 'Tomás Di Domenico'
description = "Publicación de Le Monde Diplomatique para el cono sur." description = 'Publicación de Le Monde Diplomatique para el cono sur.'
publisher = "Capital Intelectual" publisher = 'Capital Intelectual'
category = "News, Politics, Argentina, Uruguay, Paraguay, South America, World" category = 'News, Politics, Argentina, Uruguay, Paraguay, South America, World'
oldest_article = 31 oldest_article = 31
no_stylesheets = True no_stylesheets = True
encoding = "utf8" encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = "es_AR" language = 'es_AR'
remove_empty_feeds = True remove_empty_feeds = True
publication_type = "magazine" publication_type = 'magazine'
delay = 1 delay = 1
simultaneous_downloads = 1 simultaneous_downloads = 1
timeout = 8 timeout = 8
needs_subscription = True needs_subscription = True
ignore_duplicate_articles = {"url"} ignore_duplicate_articles = {'url'}
temp_files = [] temp_files = []
fetch_retries = 10 fetch_retries = 10
handle_gzip = True handle_gzip = True
compress_news_images = True compress_news_images = True
scale_news_images_to_device = True scale_news_images_to_device = True
masthead_url = ( masthead_url = (
"https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png" 'https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png'
) )
INDEX = "https://www.eldiplo.org/" INDEX = 'https://www.eldiplo.org/'
conversion_options = {"series": "El Dipló", "publisher": publisher, "base_font_size": 8, "tags": category} conversion_options = {'series': 'El Dipló', 'publisher': publisher, 'base_font_size': 8, 'tags': category}
keep_only_tags = [dict(name=["article"])] keep_only_tags = [dict(name=['article'])]
remove_tags = [dict(name=["button"])] remove_tags = [dict(name=['button'])]
extra_css = """ extra_css = '''
.entry-title { .entry-title {
text-align: center; text-align: center;
} }
@ -67,59 +67,59 @@ class ElDiplo2023(BasicNewsRecipe):
padding-left: 10%; padding-left: 10%;
padding-right: 10%; padding-right: 10%;
} }
""" '''
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
br.open(self.INDEX) br.open(self.INDEX)
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.select_form(id="loginform") br.select_form(id='loginform')
br["log"] = self.username br['log'] = self.username
br["pwd"] = self.password br['pwd'] = self.password
br.submit() br.submit()
return br return br
def get_cover_url(self): def get_cover_url(self):
soup_index = self.index_to_soup(self.INDEX) soup_index = self.index_to_soup(self.INDEX)
tag_sumario = soup_index.find("span", text="Sumario") tag_sumario = soup_index.find('span', text='Sumario')
url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"] url_sumario = 'https://www.eldiplo.org' + tag_sumario.parent['href']
soup = self.index_to_soup(url_sumario) soup = self.index_to_soup(url_sumario)
container = soup.find("div", class_="px-16") container = soup.find('div', class_='px-16')
url = container.find("img")["src"] url = container.find('img')['src']
return getattr(self, "cover_url", url) return getattr(self, 'cover_url', url)
def _process_article(self, article): def _process_article(self, article):
url = article.find("a", href=True, attrs={"class": "title"})["href"] url = article.find('a', href=True, attrs={'class': 'title'})['href']
title = self.tag_to_string(article).replace("Editorial", "Editorial: ") title = self.tag_to_string(article).replace('Editorial', 'Editorial: ')
try: try:
title, authors = title.split(", por") title, authors = title.split(', por')
authors = f"por {authors}" authors = f'por {authors}'
except ValueError: except ValueError:
authors = "" authors = ''
self.log("title: ", title, " url: ", url) self.log('title: ', title, ' url: ', url)
return {"title": title, "url": url, "description": authors, "date": ""} return {'title': title, 'url': url, 'description': authors, 'date': ''}
def preprocess_html(self, soup): def preprocess_html(self, soup):
font_size = "90%" font_size = '90%'
# make the footnotes smaller # make the footnotes smaller
for p in soup.find("div", id="nota_pie").findChildren("p", recursive=False): for p in soup.find('div', id='nota_pie').findChildren('p', recursive=False):
p["style"] = f"font-size: {font_size};" p['style'] = f'font-size: {font_size};'
return soup return soup
def parse_index(self): def parse_index(self):
soup_index = self.index_to_soup(self.INDEX) soup_index = self.index_to_soup(self.INDEX)
tag_sumario = soup_index.find("span", text="Sumario") tag_sumario = soup_index.find('span', text='Sumario')
if tag_sumario is None: if tag_sumario is None:
return None return None
url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"] url_sumario = 'https://www.eldiplo.org' + tag_sumario.parent['href']
self.log(url_sumario) self.log(url_sumario)
soup_sumario = self.index_to_soup(url_sumario) soup_sumario = self.index_to_soup(url_sumario)
@ -128,20 +128,20 @@ class ElDiplo2023(BasicNewsRecipe):
articles = [] articles = []
dossiers = [] dossiers = []
sumario = soup_sumario.find("div", class_="sumario") sumario = soup_sumario.find('div', class_='sumario')
for section in sumario.find_all("div", recursive=False): for section in sumario.find_all('div', recursive=False):
classes = section.attrs["class"] classes = section.attrs['class']
if "dossier" in classes: if 'dossier' in classes:
dtitle = self.tag_to_string(section.find("h3")) dtitle = self.tag_to_string(section.find('h3'))
darticles = [] darticles = []
for article in section.find_all("div", recursive=False): for article in section.find_all('div', recursive=False):
darticles.append(self._process_article(article)) darticles.append(self._process_article(article))
dossiers.append((dtitle, darticles)) dossiers.append((dtitle, darticles))
else: else:
articles.append(self._process_article(section)) articles.append(self._process_article(section))
feeds.append(("Artículos", articles)) feeds.append(('Artículos', articles))
feeds += dossiers feeds += dossiers
return feeds return feeds

View File

@ -119,11 +119,11 @@ div.a_md_a {text-align: center; text-transform: uppercase; font-size: .8rem;}
try: try:
br.open(cover) br.open(cover)
except: except:
self.log("\nCover unavailable") self.log('\nCover unavailable')
cover = None cover = None
return cover return cover
def image_url_processor(cls, baseurl, url): def image_url_processor(cls, baseurl, url):
splitUrl = url.split("cloudfront-") splitUrl = url.split('cloudfront-')
parsedUrl = 'https://cloudfront-' + splitUrl[1] parsedUrl = 'https://cloudfront-' + splitUrl[1]
return parsedUrl return parsedUrl

View File

@ -36,7 +36,7 @@ class ElPaisBabelia(BasicNewsRecipe):
title = self.tag_to_string(post) title = self.tag_to_string(post)
if str(post).find('class=') > 0: if str(post).find('class=') > 0:
klass = post['class'] klass = post['class']
if klass != "": if klass != '':
self.log() self.log()
self.log('--> post: ', post) self.log('--> post: ', post)
self.log('--> url: ', url) self.log('--> url: ', url)

View File

@ -28,12 +28,12 @@ class elcohetealaluna(BasicNewsRecipe):
compress_news_images = True compress_news_images = True
masthead_url = 'https://www.elcohetealaluna.com/wp-content/uploads/2018/06/logo-menu.png' masthead_url = 'https://www.elcohetealaluna.com/wp-content/uploads/2018/06/logo-menu.png'
extra_css = """ extra_css = '''
body{font-family: Georgia, Times, "Times New Roman", serif} body{font-family: Georgia, Times, "Times New Roman", serif}
h1,h2,.post-author-name{font-family: Oswald, sans-serif} h1,h2,.post-author-name{font-family: Oswald, sans-serif}
h2{color: gray} h2{color: gray}
img{margin-top:1em; margin-bottom: 1em; display:block} img{margin-top:1em; margin-bottom: 1em; display:block}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -28,10 +28,10 @@ class ElCronistaArg(BasicNewsRecipe):
auto_cleanup_keep = '//div[@class="header-bottom"] | //h1 | //h2' auto_cleanup_keep = '//div[@class="header-bottom"] | //h1 | //h2'
ignore_duplicate_articles = {'url'} ignore_duplicate_articles = {'url'}
masthead_url = 'https://www.cronista.com/export/sites/diarioelcronista/arte/v2/lg_cronista_footer.png_665574830.png' masthead_url = 'https://www.cronista.com/export/sites/diarioelcronista/arte/v2/lg_cronista_footer.png_665574830.png'
extra_css = """ extra_css = '''
body{font-family: 'Source Sans Pro', sans-serif} body{font-family: 'Source Sans Pro', sans-serif}
h1,h2,h3,h4{font-family: 'Libre Baskerville', serif} h1,h2,h3,h4{font-family: 'Libre Baskerville', serif}
""" '''
conversion_options = { conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language 'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -29,5 +29,5 @@ class Elektroda(BasicNewsRecipe):
feeds = BasicNewsRecipe.parse_feeds(self) feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds: for feed in feeds:
for article in feed.articles[:]: for article in feed.articles[:]:
article.title = article.title[article.title.find("::") + 3:] article.title = article.title[article.title.find('::') + 3:]
return feeds return feeds

View File

@ -35,14 +35,14 @@ class ElMundo(BasicNewsRecipe):
articles_are_obfuscated = True articles_are_obfuscated = True
auto_cleanup = True auto_cleanup = True
temp_files = [] temp_files = []
extra_css = """ extra_css = '''
body{font-family: "PT serif",Georgia,serif,times} body{font-family: "PT serif",Georgia,serif,times}
.metadata_noticia{font-size: small} .metadata_noticia{font-size: small}
.pestana_GDP{font-size: small; font-weight:bold} .pestana_GDP{font-size: small; font-weight:bold}
h1 {color: #333333; font-family: "Clear Sans Bold",Arial,sans-serif,helvetica} h1 {color: #333333; font-family: "Clear Sans Bold",Arial,sans-serif,helvetica}
.hora{color: red} .hora{color: red}
.update{color: gray} .update{color: gray}
""" '''
conversion_options = { conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
@ -83,14 +83,14 @@ class ElMundo(BasicNewsRecipe):
cover = self.masthead_url cover = self.masthead_url
st = time.localtime() st = time.localtime()
year = str(st.tm_year) year = str(st.tm_year)
month = "%.2d" % st.tm_mon month = '%.2d' % st.tm_mon
day = "%.2d" % st.tm_mday day = '%.2d' % st.tm_mday
cover = 'http://img.kiosko.net/' + year + '/' + \ cover = 'http://img.kiosko.net/' + year + '/' + \
month + '/' + day + '/es/elmundo.750.jpg' month + '/' + day + '/es/elmundo.750.jpg'
try: try:
self.browser.open(cover) self.browser.open(cover)
except: except:
self.log("\nPortada no disponible") self.log('\nPortada no disponible')
return cover return cover
def get_obfuscated_article(self, url): def get_obfuscated_article(self, url):
@ -103,7 +103,7 @@ class ElMundo(BasicNewsRecipe):
html = response.read() html = response.read()
count = tries count = tries
except: except:
print("Retrying download...") print('Retrying download...')
count += 1 count += 1
if html is not None: if html is not None:
tfile = PersistentTemporaryFile('_fa.html') tfile = PersistentTemporaryFile('_fa.html')

View File

@ -66,7 +66,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
mcharset = new_tag(soup, 'meta', [ mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) ('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')])
soup.head.insert(0, mcharset) soup.head.insert(0, mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']

View File

@ -18,18 +18,18 @@ class En_Globes_Recipe(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
feeds = [ feeds = [
(u"Main Headlines", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942"), (u'Main Headlines', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942'),
(u"Israeli stocks on Wall Street", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392"), (u'Israeli stocks on Wall Street', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392'),
(u"All news", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725"), (u'All news', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725'),
(u"Macro economics", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389"), (u'Macro economics', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389'),
(u"Aerospace and defense", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380"), (u'Aerospace and defense', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380'),
(u"Real estate", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385"), (u'Real estate', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385'),
(u"Energy and water", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382"), (u'Energy and water', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382'),
(u"Start-ups and venture capital", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397"), (u'Start-ups and venture capital', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397'),
(u"Financial services", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383"), (u'Financial services', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383'),
(u"Tel Aviv markets", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404"), (u'Tel Aviv markets', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404'),
(u"Healthcare", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377"), (u'Healthcare', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377'),
(u"Telecommunications", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386"), (u'Telecommunications', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386'),
(u"Information technology", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376"), (u'Information technology', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376'),
(u"Transport and infrastructure", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388"), (u'Transport and infrastructure', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388'),
] ]

View File

@ -87,8 +87,8 @@ class Engadget(BasicNewsRecipe):
except KeyError: except KeyError:
continue continue
# Reorder the "title" and "content" elements # Reorder the "title" and "content" elements
title_div = soup.find("div", {"class": "caas-title-wrapper"}) title_div = soup.find('div', {'class': 'caas-title-wrapper'})
content_div = soup.find("div", {"class": "caas-content-wrapper"}) content_div = soup.find('div', {'class': 'caas-content-wrapper'})
if title_div and content_div: if title_div and content_div:
soup.body.clear() soup.body.clear()
soup.body.append(title_div) soup.body.append(title_div)

Some files were not shown because too many files have changed in this diff Show More