uniform string quote (auto-fix)

ruff 'Q'
This commit is contained in:
un-pogaz 2025-01-24 11:14:14 +01:00
parent 2357c1fc48
commit 37771022ce
750 changed files with 8704 additions and 8698 deletions

View File

@ -240,14 +240,14 @@ def generate_ebook_convert_help(preamble, app):
parser, plumber = create_option_parser(['ebook-convert',
'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
groups = [(pl.name+ ' Options', '', g.option_list) for g in
parser.option_groups if g.title == "INPUT OPTIONS"]
parser.option_groups if g.title == 'INPUT OPTIONS']
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
for pl in sorted(output_format_plugins(), key=lambda x: x.name):
parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
'dummyi.'+pl.file_type, '-h'], default_log)
groups = [(pl.name+ ' Options', '', g.option_list) for g in
parser.option_groups if g.title == "OUTPUT OPTIONS"]
parser.option_groups if g.title == 'OUTPUT OPTIONS']
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))

View File

@ -55,7 +55,7 @@ class DemoDialog(QDialog):
self.l.addWidget(self.view_button)
self.update_metadata_button = QPushButton(
'Update metadata in a book\'s files', self)
"Update metadata in a book's files", self)
self.update_metadata_button.clicked.connect(self.update_metadata)
self.l.addWidget(self.update_metadata_button)

View File

@ -61,7 +61,7 @@ if use_archive:
body = root.xpath('//body')[0]
article = E(body, 'article')
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try:
date = data['dateModified']
@ -157,7 +157,7 @@ class Economist(BasicNewsRecipe):
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal"
__author__ = 'Kovid Goyal'
description = (
'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and '
'an unworthy, timid ignorance obstructing our progress.”'
@ -170,7 +170,7 @@ class Economist(BasicNewsRecipe):
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
dict(attrs={'aria-label': "Article Teaser"}),
dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={
'class': [
'dblClkTrk', 'ec-article-info', 'share_inline_header',
@ -224,7 +224,7 @@ class Economist(BasicNewsRecipe):
def parse_index(self):
# return self.economist_test_article()
soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub')
script_tag = soup.find("script", id="__NEXT_DATA__")
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is None:
raise ValueError('No script tag with JSON data found in the weeklyedition archive')
data = json.loads(script_tag.string)
@ -247,20 +247,20 @@ class Economist(BasicNewsRecipe):
self.description = data['description']
feeds_dict = defaultdict(list)
for part in safe_dict(data, "hasPart", "parts"):
for part in safe_dict(data, 'hasPart', 'parts'):
section = part['title']
self.log(section)
for art in safe_dict(part, "hasPart", "parts"):
title = safe_dict(art, "title")
desc = safe_dict(art, "rubric") or ''
sub = safe_dict(art, "flyTitle") or ''
for art in safe_dict(part, 'hasPart', 'parts'):
title = safe_dict(art, 'title')
desc = safe_dict(art, 'rubric') or ''
sub = safe_dict(art, 'flyTitle') or ''
if sub and section != sub:
desc = sub + ' :: ' + desc
pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(art).encode('utf-8'))
pt.close()
url = 'file:///' + pt.name
feeds_dict[section].append({"title": title, "url": url, "description": desc})
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc)
return [(section, articles) for section, articles in feeds_dict.items()]
@ -311,26 +311,26 @@ class Economist(BasicNewsRecipe):
return ans
def economist_parse_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__")
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None:
data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.title = safe_dict(data, "props", "pageProps", "content", "headline")
self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
# self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600'
feeds = []
for coll in safe_dict(data, "props", "pageProps", "content", "collections"):
section = safe_dict(coll, "headline") or ''
for coll in safe_dict(data, 'props', 'pageProps', 'content', 'collections'):
section = safe_dict(coll, 'headline') or ''
self.log(section)
articles = []
for part in safe_dict(coll, "hasPart", "parts"):
title = safe_dict(part, "headline") or ''
url = safe_dict(part, "url", "canonical") or ''
for part in safe_dict(coll, 'hasPart', 'parts'):
title = safe_dict(part, 'headline') or ''
url = safe_dict(part, 'url', 'canonical') or ''
if not title or not url:
continue
desc = safe_dict(part, "description") or ''
sub = safe_dict(part, "subheadline") or ''
desc = safe_dict(part, 'description') or ''
sub = safe_dict(part, 'subheadline') or ''
if sub:
desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url)

View File

@ -47,11 +47,11 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501
]
extra_css = """
extra_css = '''
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
"""
'''
preprocess_regexps = [(re.compile(
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]

View File

@ -28,7 +28,7 @@ class DrawAndCook(BasicNewsRecipe):
def parse_index(self):
feeds = []
for title, url in [
("They Draw and Cook", "http://www.theydrawandcook.com/")
('They Draw and Cook', 'http://www.theydrawandcook.com/')
]:
articles = self.make_links(url)
if articles:

View File

@ -5,11 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheMITPressReader(BasicNewsRecipe):
title = "The MIT Press Reader"
title = 'The MIT Press Reader'
__author__ = 'yodha8'
language = 'en'
description = ("Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors."
" This recipe pulls articles from the past 7 days.")
description = ('Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors.'
' This recipe pulls articles from the past 7 days.')
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True

View File

@ -47,13 +47,13 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = """
extra_css = '''
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
"""
'''
feeds = [

View File

@ -28,6 +28,6 @@ class Acrimed(BasicNewsRecipe):
lambda m: '<title>' + m.group(1) + '</title>'),
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
extra_css = """
extra_css = '''
.chapo{font-style:italic; margin: 1em 0 0.5em}
"""
'''

View File

@ -21,7 +21,7 @@ class AdventureGamers(BasicNewsRecipe):
remove_javascript = True
use_embedded_content = False
INDEX = u'http://www.adventuregamers.com'
extra_css = """
extra_css = '''
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
@ -32,7 +32,7 @@ class AdventureGamers(BasicNewsRecipe):
.score_header{font-size: large; color: #50544A}
img{margin-bottom: 1em;}
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -14,7 +14,7 @@ class afr(BasicNewsRecipe):
description = (
'For more than 65 years The Australian Financial Review has been the authority on business,'
' finance and investment news in Australia. It has a reputation for independent, award-winning '
'journalism and is essential reading for Australia\'s business and investor community.'
"journalism and is essential reading for Australia's business and investor community."
)
masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png'
encoding = 'utf-8'

View File

@ -36,9 +36,9 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
'''
def default_cover(self, cover_file):
"""
'''
Crée une couverture personnalisée avec le logo
"""
'''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -54,7 +54,7 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
weekday = french_weekday[wkd]
month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}"
date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh')
# Image de base

View File

@ -21,9 +21,9 @@ class AlJazeera(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
extra_css = """
extra_css = '''
body{font-family: Arial,sans-serif}
"""
'''
conversion_options = {
'comment': description, 'tags': category,
'publisher': publisher, 'language': language

View File

@ -110,7 +110,7 @@ class AlMonitor(BasicNewsRecipe):
title = title[0:120] + '...'
href = link.get('href')
if not href:
self._p("BAD HREF: " + str(link))
self._p('BAD HREF: ' + str(link))
return
self.queue_article_link(section, href, title)
@ -158,7 +158,7 @@ class AlMonitor(BasicNewsRecipe):
age = (datetime.datetime.now() - date).days
if (age > self.oldest_article):
return "too old"
return 'too old'
return False
def scrape_article_date(self, soup):
@ -174,7 +174,7 @@ class AlMonitor(BasicNewsRecipe):
def date_from_string(self, datestring):
try:
# eg: Posted September 17, 2014
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y")
dt = datetime.datetime.strptime(datestring, 'Posted %B %d, %Y')
except:
dt = None

View File

@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AlbertMohlersBlog(BasicNewsRecipe):
title = u'Albert Mohler\'s Blog'
title = u"Albert Mohler's Blog"
__author__ = 'Peter Grungi'
language = 'en'
oldest_article = 90
@ -16,5 +16,5 @@ class AlbertMohlersBlog(BasicNewsRecipe):
language = 'en'
author = 'Albert Mohler'
feeds = [(u'Albert Mohler\'s Blog',
feeds = [(u"Albert Mohler's Blog",
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]

View File

@ -43,7 +43,7 @@ class ALD(BasicNewsRecipe):
# Extract a list of dates from the page.
# Subset this out to the list of target dates for extraction.
date_list = []
for div in soup.findAll('div', attrs={'id': "dayheader"}):
for div in soup.findAll('div', attrs={'id': 'dayheader'}):
date_list.append(self.tag_to_string(div))
date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
date_list_bool = [
@ -54,14 +54,14 @@ class ALD(BasicNewsRecipe):
# Process each paragraph one by one.
# Stop when the text of the previous div is not in the target date list.
for div in soup.findAll('div', attrs={'class': "mobile-front"}):
for div in soup.findAll('div', attrs={'class': 'mobile-front'}):
for p in div.findAll('p'):
if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
if p.find('a'):
title = self.tag_to_string(p)
link = p.find('a')['href']
if self.tag_to_string(p.findPreviousSibling('h3')
) == "Articles of Note":
) == 'Articles of Note':
articles_note.append({
'title': title,
'url': link,
@ -69,7 +69,7 @@ class ALD(BasicNewsRecipe):
'date': ''
})
elif self.tag_to_string(p.findPreviousSibling('h3')
) == "New Books":
) == 'New Books':
new_books.append({
'title': title,
'url': link,

View File

@ -38,7 +38,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
self.log('Cover URL found:', cover_url)
return cover_url
self.log('Aucune couverture trouvée, utilisation de l\'image par défaut')
self.log("Aucune couverture trouvée, utilisation de l'image par défaut")
return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg'
except Exception as e:

View File

@ -58,7 +58,7 @@ class AM730(BasicNewsRecipe):
articles = []
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
href = aTag.get('href',False)
if not href.encode("utf-8").startswith(url.encode("utf-8")) :
if not href.encode('utf-8').startswith(url.encode('utf-8')) :
continue # not in same section
title = href.split('/')[-1].split('-')[0]

View File

@ -28,9 +28,9 @@ class Ambito(BasicNewsRecipe):
language = 'es_AR'
publication_type = 'newsportal'
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
extra_css = """
extra_css = '''
body{font-family: Roboto, sans-serif}
"""
'''
conversion_options = {
'comment': description,

View File

@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AmericanThinker(BasicNewsRecipe):
title = u'American Thinker'
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
description = 'American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans.'
__author__ = 'Walt Anthony'
publisher = 'Thomas Lifson'
category = 'news, politics, USA'

View File

@ -39,4 +39,4 @@ class anan(BasicNewsRecipe):
def print_version(self, url):
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update
return url.replace('/show/', '/print/') # 2014-02-27 AGE: update

View File

@ -12,7 +12,7 @@ class ancientegypt(BasicNewsRecipe):
language = 'en'
__author__ = 'unkn0wn'
description = (
'Ancient Egypt is the world\'s leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. '
"Ancient Egypt is the world's leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. "
'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering '
'more than 5000 years of Egyptian history. Published bimonthly.'
)

View File

@ -75,7 +75,7 @@ class andhra(BasicNewsRecipe):
url = str(snaps['OrgId'])
if snaps['ObjectType'] == 4:
continue
feeds_dict[section].append({"title": '', "url": url})
feeds_dict[section].append({'title': '', 'url': url})
return [(section, articles) for section, articles in feeds_dict.items()]
def preprocess_raw_html(self, raw, *a):

View File

@ -75,7 +75,7 @@ class andhra(BasicNewsRecipe):
url = str(snaps['OrgId'])
if snaps['ObjectType'] == 4:
continue
feeds_dict[section].append({"title": '', "url": url})
feeds_dict[section].append({'title': '', 'url': url})
return [(section, articles) for section, articles in feeds_dict.items()]
def preprocess_raw_html(self, raw, *a):

View File

@ -66,19 +66,19 @@ class Arcamax(BasicNewsRecipe):
# (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"),
# (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"),
# (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"),
(u"BC", u"https://www.arcamax.com/thefunnies/bc"),
(u'BC', u'https://www.arcamax.com/thefunnies/bc'),
# (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"),
# (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"),
(u"Blondie", u"https://www.arcamax.com/thefunnies/blondie"),
(u'Blondie', u'https://www.arcamax.com/thefunnies/blondie'),
# u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"),
# (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"),
# (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"),
# (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"),
(u"Dog Eat Doug", u"https://www.arcamax.com/thefunnies/dogeatdoug"),
(u'Dog Eat Doug', u'https://www.arcamax.com/thefunnies/dogeatdoug'),
# (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"),
# (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"),
(u"Family Circus", u"https://www.arcamax.com/thefunnies/familycircus"),
(u"Garfield", u"https://www.arcamax.com/thefunnies/garfield"),
(u'Family Circus', u'https://www.arcamax.com/thefunnies/familycircus'),
(u'Garfield', u'https://www.arcamax.com/thefunnies/garfield'),
# (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"),
# (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"),
# (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"),
@ -87,16 +87,16 @@ class Arcamax(BasicNewsRecipe):
# (u"Luann", u"https://www.arcamax.com/thefunnies/luann"),
# (u"Momma", u"https://www.arcamax.com/thefunnies/momma"),
# (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"),
(u"Mutts", u"https://www.arcamax.com/thefunnies/mutts"),
(u'Mutts', u'https://www.arcamax.com/thefunnies/mutts'),
# (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"),
# (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"),
# (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"),
# (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"),
# (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"),
# (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"),
(u"Speed Bump", u"https://www.arcamax.com/thefunnies/speedbump"),
(u"Wizard of Id", u"https://www.arcamax.com/thefunnies/wizardofid"),
(u"Zits", u"https://www.arcamax.com/thefunnies/zits"),
(u'Speed Bump', u'https://www.arcamax.com/thefunnies/speedbump'),
(u'Wizard of Id', u'https://www.arcamax.com/thefunnies/wizardofid'),
(u'Zits', u'https://www.arcamax.com/thefunnies/zits'),
]:
self.log('Finding strips for:', title)
articles = self.make_links(url, title)

View File

@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ArretSurImages(BasicNewsRecipe):
title = 'Arrêt sur Images'
description = 'Site français d\'analyse des médias'
description = "Site français d'analyse des médias"
language = 'fr'
encoding = 'utf-8'
needs_subscription = True
@ -27,9 +27,9 @@ class ArretSurImages(BasicNewsRecipe):
]
def default_cover(self, cover_file):
"""
'''
Crée une couverture personnalisée avec le logo ASI
"""
'''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -45,7 +45,7 @@ class ArretSurImages(BasicNewsRecipe):
weekday = french_weekday[wkd]
month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}"
date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh')
img = QImage(1400, 1920, QImage.Format_RGB888)
@ -123,9 +123,9 @@ class ArretSurImages(BasicNewsRecipe):
br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')]
print('Authentification réussie')
else:
print('Échec de l\'authentification - Vérifiez vos identifiants')
print("Échec de l'authentification - Vérifiez vos identifiants")
except Exception as e:
print(f'Erreur lors de l\'authentification: {str(e)}')
print(f"Erreur lors de l'authentification: {str(e)}")
return br
def get_article_url(self, article):

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = "GPL v3"
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
__license__ = 'GPL v3'
__copyright__ = '2022, Albert Aparicio Isarn <aaparicio at posteo.net>'
"""
'''
https://www.asahi.com/ajw/
"""
'''
from datetime import datetime
@ -14,99 +14,99 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AsahiShimbunEnglishNews(BasicNewsRecipe):
title = "The Asahi Shimbun"
__author__ = "Albert Aparicio Isarn"
title = 'The Asahi Shimbun'
__author__ = 'Albert Aparicio Isarn'
description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan."
" The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive"
" coverage of cool Japan,focusing on manga, travel and other timely news.")
publisher = "The Asahi Shimbun Company"
publication_type = "newspaper"
category = "news, japan"
language = "en_JP"
description = ('The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan.'
' The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive'
' coverage of cool Japan,focusing on manga, travel and other timely news.')
publisher = 'The Asahi Shimbun Company'
publication_type = 'newspaper'
category = 'news, japan'
language = 'en_JP'
index = "https://www.asahi.com"
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png"
index = 'https://www.asahi.com'
masthead_url = 'https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png'
oldest_article = 3
max_articles_per_feed = 40
no_stylesheets = True
remove_javascript = True
remove_tags_before = {"id": "MainInner"}
remove_tags_after = {"class": "ArticleText"}
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}]
remove_tags_before = {'id': 'MainInner'}
remove_tags_after = {'class': 'ArticleText'}
remove_tags = [{'name': 'div', 'class': 'SnsUtilityArea'}]
def get_whats_new(self):
soup = self.index_to_soup(self.index + "/ajw/new")
news_section = soup.find("div", attrs={"class": "specialList"})
soup = self.index_to_soup(self.index + '/ajw/new')
news_section = soup.find('div', attrs={'class': 'specialList'})
new_news = []
for item in news_section.findAll("li"):
title = item.find("p", attrs={"class": "title"}).string
date_string = item.find("p", attrs={"class": "date"}).next
for item in news_section.findAll('li'):
title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip()
url = self.index + item.find("a")["href"]
url = self.index + item.find('a')['href']
new_news.append(
{
"title": title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
"url": url,
"description": "",
'title': title,
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
'url': url,
'description': '',
}
)
return new_news
def get_top6(self, soup):
top = soup.find("ul", attrs={"class": "top6"})
top = soup.find('ul', attrs={'class': 'top6'})
top6_news = []
for item in top.findAll("li"):
title = item.find("p", attrs={"class": "title"}).string
date_string = item.find("p", attrs={"class": "date"}).next
for item in top.findAll('li'):
title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip()
url = self.index + item.find("a")["href"]
url = self.index + item.find('a')['href']
top6_news.append(
{
"title": title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
"url": url,
"description": "",
'title': title,
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
'url': url,
'description': '',
}
)
return top6_news
def get_section_news(self, soup):
news_grid = soup.find("ul", attrs={"class": "default"})
news_grid = soup.find('ul', attrs={'class': 'default'})
news = []
for item in news_grid.findAll("li"):
title = item.find("p", attrs={"class": "title"}).string
date_string = item.find("p", attrs={"class": "date"}).next
for item in news_grid.findAll('li'):
title = item.find('p', attrs={'class': 'title'}).string
date_string = item.find('p', attrs={'class': 'date'}).next
date = date_string.strip()
url = self.index + item.find("a")["href"]
url = self.index + item.find('a')['href']
news.append(
{
"title": title,
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
"url": url,
"description": "",
'title': title,
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
'url': url,
'description': '',
}
)
return news
def get_section(self, section):
soup = self.index_to_soup(self.index + "/ajw/" + section)
soup = self.index_to_soup(self.index + '/ajw/' + section)
section_news_items = self.get_top6(soup)
section_news_items.extend(self.get_section_news(soup))
@ -114,26 +114,26 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
return section_news_items
def get_special_section(self, section):
soup = self.index_to_soup(self.index + "/ajw/" + section)
top = soup.find("div", attrs={"class": "Section"})
soup = self.index_to_soup(self.index + '/ajw/' + section)
top = soup.find('div', attrs={'class': 'Section'})
special_news = []
for item in top.findAll("li"):
item_a = item.find("a")
for item in top.findAll('li'):
item_a = item.find('a')
text_split = item_a.text.strip().split("\n")
text_split = item_a.text.strip().split('\n')
title = text_split[0]
description = text_split[1].strip()
url = self.index + item_a["href"]
url = self.index + item_a['href']
special_news.append(
{
"title": title,
"date": "",
"url": url,
"description": description,
'title': title,
'date': '',
'url': url,
'description': description,
}
)
@ -144,24 +144,24 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
feeds = [
("What's New", self.get_whats_new()),
("National Report", self.get_section("national_report")),
("Politics", self.get_section("politics")),
("Business", self.get_section("business")),
("Asia & World - China", self.get_section("asia_world/china")),
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")),
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")),
("Asia & World - World", self.get_section("asia_world/world")),
("Sci & Tech", self.get_section("sci_tech")),
("Culture - Style", self.get_section("culture/style")),
('National Report', self.get_section('national_report')),
('Politics', self.get_section('politics')),
('Business', self.get_section('business')),
('Asia & World - China', self.get_section('asia_world/china')),
('Asia & World - Korean Peninsula', self.get_section('asia_world/korean_peninsula')),
('Asia & World - Around Asia', self.get_section('asia_world/around_asia')),
('Asia & World - World', self.get_section('asia_world/world')),
('Sci & Tech', self.get_section('sci_tech')),
('Culture - Style', self.get_section('culture/style')),
# ("Culture - Cooking", self.get_section("culture/cooking")),
("Culture - Movies", self.get_section("culture/movies")),
("Culture - Manga & Anime", self.get_section("culture/manga_anime")),
("Travel", self.get_section("travel")),
("Sports", self.get_section("sports")),
("Opinion - Editorial", self.get_section("opinion/editorial")),
("Opinion - Vox Populi", self.get_section("opinion/vox")),
("Opinion - Views", self.get_section("opinion/views")),
("Special", self.get_special_section("special")),
('Culture - Movies', self.get_section('culture/movies')),
('Culture - Manga & Anime', self.get_section('culture/manga_anime')),
('Travel', self.get_section('travel')),
('Sports', self.get_section('sports')),
('Opinion - Editorial', self.get_section('opinion/editorial')),
('Opinion - Vox Populi', self.get_section('opinion/vox')),
('Opinion - Views', self.get_section('opinion/views')),
('Special', self.get_special_section('special')),
]
return feeds

View File

@ -26,11 +26,11 @@ class AsianReviewOfBooks(BasicNewsRecipe):
publication_type = 'magazine'
auto_cleanup = True
masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png'
extra_css = """
extra_css = '''
body{font-family: "Droid Serif", serif}
.entry-title {font-family: "Playfair Display", serif}
img {display: block}
"""
'''
recipe_specific_options = {
'days': {

View File

@ -24,12 +24,12 @@ class BuenosAiresHerald(BasicNewsRecipe):
publication_type = 'newspaper'
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
INDEX = 'http://www.buenosairesherald.com'
extra_css = """
extra_css = '''
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
h1{font-family: Georgia,serif}
#fecha{text-align: right; font-size: small}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -16,7 +16,7 @@ class BangkokPostRecipe(BasicNewsRecipe):
title = u'Bangkok Post'
publisher = u'Post Publishing PCL'
category = u'News'
description = u'The world\'s window to Thailand'
description = u"The world's window to Thailand"
oldest_article = 7
max_articles_per_feed = 100

View File

@ -8,11 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
class barrons(BasicNewsRecipe):
title = 'Barron\'s Magazine'
title = "Barron's Magazine"
__author__ = 'unkn0wn'
description = (
'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister '
'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and '
"Barron's is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister "
"publication to The Wall Street Journal, Barron's covers U.S. financial information, market developments, and "
'relevant statistics.'
)
language = 'en_US'
@ -82,7 +82,7 @@ class barrons(BasicNewsRecipe):
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (YYYYMMDD format)',
'long': 'For example, 20240722.\nIf it didn\'t work, try again later.'
'long': "For example, 20240722.\nIf it didn't work, try again later."
}
}

View File

@ -135,9 +135,9 @@ class BBCNews(BasicNewsRecipe):
# Select / de-select the feeds you want in your ebook.
feeds = [
("News Home", "https://feeds.bbci.co.uk/news/rss.xml"),
("UK", "https://feeds.bbci.co.uk/news/uk/rss.xml"),
("World", "https://feeds.bbci.co.uk/news/world/rss.xml"),
('News Home', 'https://feeds.bbci.co.uk/news/rss.xml'),
('UK', 'https://feeds.bbci.co.uk/news/uk/rss.xml'),
('World', 'https://feeds.bbci.co.uk/news/world/rss.xml'),
# ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"),
# ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"),
# ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"),
@ -147,26 +147,26 @@ class BBCNews(BasicNewsRecipe):
# ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"),
# ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
# ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
("US & Canada", "https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
("Politics", "https://feeds.bbci.co.uk/news/politics/rss.xml"),
("Science/Environment",
"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
("Technology", "https://feeds.bbci.co.uk/news/technology/rss.xml"),
("Magazine", "https://feeds.bbci.co.uk/news/magazine/rss.xml"),
("Entertainment/Arts",
"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
('US & Canada', 'https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml'),
('Politics', 'https://feeds.bbci.co.uk/news/politics/rss.xml'),
('Science/Environment',
'https://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
('Technology', 'https://feeds.bbci.co.uk/news/technology/rss.xml'),
('Magazine', 'https://feeds.bbci.co.uk/news/magazine/rss.xml'),
('Entertainment/Arts',
'https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml'),
# ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"),
# ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"),
("Business", "https://feeds.bbci.co.uk/news/business/rss.xml"),
("Special Reports", "https://feeds.bbci.co.uk/news/special_reports/rss.xml"),
("Also in the News", "https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
('Business', 'https://feeds.bbci.co.uk/news/business/rss.xml'),
('Special Reports', 'https://feeds.bbci.co.uk/news/special_reports/rss.xml'),
('Also in the News', 'https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml'),
# ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"),
# ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
# ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
# ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
# ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
("Sport Front Page",
"http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
('Sport Front Page',
'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
# ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
# ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
# ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),

View File

@ -556,19 +556,19 @@ class BBCBrasilRecipe(BasicNewsRecipe):
def print_version(self, url):
# Handle sports page urls type 01:
if (url.find("go/rss/-/sport1/") != -1):
temp_url = url.replace("go/rss/-/", "")
if (url.find('go/rss/-/sport1/') != -1):
temp_url = url.replace('go/rss/-/', '')
# Handle sports page urls type 02:
elif (url.find("go/rss/int/news/-/sport1/") != -1):
temp_url = url.replace("go/rss/int/news/-/", "")
elif (url.find('go/rss/int/news/-/sport1/') != -1):
temp_url = url.replace('go/rss/int/news/-/', '')
# Handle regular news page urls:
else:
temp_url = url.replace("go/rss/int/news/-/", "")
temp_url = url.replace('go/rss/int/news/-/', '')
# Always add "?print=true" to the end of the url.
print_url = temp_url + "?print=true"
print_url = temp_url + '?print=true'
return print_url

View File

@ -30,7 +30,7 @@ class BillOReilly(BasicNewsRecipe):
feeds.append(("O'Reilly Factor", articles_shows))
if articles_columns:
feeds.append(("Newspaper Column", articles_columns))
feeds.append(('Newspaper Column', articles_columns))
return feeds

View File

@ -27,8 +27,8 @@ class bleskRecipe(BasicNewsRecipe):
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
extra_css = '''
'''
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})

View File

@ -23,7 +23,7 @@ class Blic(BasicNewsRecipe):
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
language = 'sr'
publication_type = 'newspaper'
extra_css = """
extra_css = '''
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Georgia, serif1, serif}
@ -35,7 +35,7 @@ class Blic(BasicNewsRecipe):
.potpis{font-size: x-small; color: gray}
.article_info{font-size: small}
img{margin-bottom: 0.8em; margin-top: 0.8em; display: block}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True

View File

@ -56,7 +56,7 @@ class Bloomberg(BasicNewsRecipe):
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
description = (
'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,'
' companies, events, and trends shaping today\'s complex, global economy.'
" companies, events, and trends shaping today's complex, global economy."
)
remove_empty_feeds = True

View File

@ -2,29 +2,29 @@ from urllib.parse import urljoin
from calibre.web.feeds.news import BasicNewsRecipe
_issue_url = ""
_issue_url = ''
class BookforumMagazine(BasicNewsRecipe):
title = "Bookforum"
title = 'Bookforum'
description = (
"Bookforum is an American book review magazine devoted to books and "
"the discussion of literature. https://www.bookforum.com/print"
'Bookforum is an American book review magazine devoted to books and '
'the discussion of literature. https://www.bookforum.com/print'
)
language = "en"
__author__ = "ping"
publication_type = "magazine"
encoding = "utf-8"
language = 'en'
__author__ = 'ping'
publication_type = 'magazine'
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
auto_cleanup = False
compress_news_images = True
compress_news_images_auto_size = 8
keep_only_tags = [dict(class_="blog-article")]
remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])]
keep_only_tags = [dict(class_='blog-article')]
remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])]
extra_css = """
extra_css = '''
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
@ -33,46 +33,46 @@ class BookforumMagazine(BasicNewsRecipe):
display: block; max-width: 100%; height: auto;
}
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
"""
'''
def preprocess_html(self, soup):
# strip away links that's not needed
for ele in soup.select(".blog-article__header a"):
for ele in soup.select('.blog-article__header a'):
ele.unwrap()
return soup
def parse_index(self):
soup = self.index_to_soup(
_issue_url if _issue_url else "https://www.bookforum.com/print"
_issue_url if _issue_url else 'https://www.bookforum.com/print'
)
meta_ele = soup.find("meta", property="og:title")
meta_ele = soup.find('meta', property='og:title')
if meta_ele:
self.timefmt = f' [{meta_ele["content"]}]'
cover_ele = soup.find("img", class_="toc-issue__cover")
cover_ele = soup.find('img', class_='toc-issue__cover')
if cover_ele:
self.cover_url = urljoin(
"https://www.bookforum.com",
soup.find("img", class_="toc-issue__cover")["src"],
'https://www.bookforum.com',
soup.find('img', class_='toc-issue__cover')['src'],
)
articles = {}
for sect_ele in soup.find_all("div", class_="toc-articles__section"):
for sect_ele in soup.find_all('div', class_='toc-articles__section'):
section_name = self.tag_to_string(
sect_ele.find("a", class_="toc__anchor-links__link")
sect_ele.find('a', class_='toc__anchor-links__link')
)
for article_ele in sect_ele.find_all("article"):
title_ele = article_ele.find("h1")
sub_title_ele = article_ele.find(class_="toc-article__subtitle")
for article_ele in sect_ele.find_all('article'):
title_ele = article_ele.find('h1')
sub_title_ele = article_ele.find(class_='toc-article__subtitle')
articles.setdefault(section_name, []).append(
{
"title": self.tag_to_string(title_ele),
"url": article_ele.find("a", class_="toc-article__link")[
"href"
'title': self.tag_to_string(title_ele),
'url': article_ele.find('a', class_='toc-article__link')[
'href'
],
"description": self.tag_to_string(sub_title_ele)
'description': self.tag_to_string(sub_title_ele)
if sub_title_ele
else "",
else '',
}
)
return articles.items()

View File

@ -22,9 +22,9 @@ class Borsen_dk(BasicNewsRecipe):
language = 'da'
keep_only_tags = [
dict(name="h1", attrs={'itemprop': 'headline'}),
dict(name="div", attrs={'itemprob': 'datePublished'}),
dict(name="div", attrs={'itemprop': 'articleBody'}),
dict(name='h1', attrs={'itemprop': 'headline'}),
dict(name='div', attrs={'itemprob': 'datePublished'}),
dict(name='div', attrs={'itemprop': 'articleBody'}),
]
# Feed are found here:

View File

@ -42,24 +42,24 @@ def class_startswith(*prefixes):
# From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true
comics_to_fetch = {
"ADAM@HOME": 'ad',
"ARLO & JANIS": 'aj',
'ADAM@HOME': 'ad',
'ARLO & JANIS': 'aj',
# "CUL DE SAC": 'cds',
# "CURTIS": 'kfcrt',
"DILBERT": 'dt',
"DOONESBURY": 'db',
"DUSTIN": 'kfdus',
"F MINUS": 'fm',
"FOR BETTER OR WORSE": 'fb',
'DILBERT': 'dt',
'DOONESBURY': 'db',
'DUSTIN': 'kfdus',
'F MINUS': 'fm',
'FOR BETTER OR WORSE': 'fb',
# "GET FUZZY": 'gz',
# "MOTHER GOOSE & GRIMM": 'tmmgg',
# "JUMPSTART": 'jt',
"MONTY": 'mt',
'MONTY': 'mt',
# "POOCH CAFE",
"RHYMES WITH ORANGE": 'kfrwo',
'RHYMES WITH ORANGE': 'kfrwo',
# "ROSE IS ROSE": 'rr',
# "ZIPPY THE PINHEAD": 'kfzpy',
"ZITS": 'kfzt'
'ZITS': 'kfzt'
}
@ -77,10 +77,10 @@ def extract_json(raw_html):
def absolutize_url(url):
if url.startswith("//"):
return "https:" + url
if url.startswith('//'):
return 'https:' + url
if url.startswith('/'):
url = "https://www.bostonglobe.com" + url
url = 'https://www.bostonglobe.com' + url
return url
@ -120,7 +120,7 @@ def main():
class BostonGlobeSubscription(BasicNewsRecipe):
title = "Boston Globe"
title = 'Boston Globe'
__author__ = 'Kovid Goyal'
description = 'The Boston Globe'
language = 'en_US'

View File

@ -25,17 +25,17 @@ def class_startswith(*prefixes):
return dict(attrs={'class': q})
def absolutize_url(url):
if url.startswith("//"):
return "https:" + url
if url.startswith('//'):
return 'https:' + url
if url.startswith('/'):
url = "https://www.bostonglobe.com" + url
url = 'https://www.bostonglobe.com' + url
return url
class BostonGlobePrint(BasicNewsRecipe):
title = "Boston Globe | Print Edition"
title = 'Boston Globe | Print Edition'
__author__ = 'Kovid Goyal, unkn0wn'
description = 'The Boston Globe - Today\'s Paper'
description = "The Boston Globe - Today's Paper"
language = 'en_US'
keep_only_tags = [
@ -70,7 +70,7 @@ class BostonGlobePrint(BasicNewsRecipe):
for image in soup.findAll('img', src=True):
if image['src'].endswith('750.jpg'):
return 'https:' + image['src']
self.log("\nCover unavailable")
self.log('\nCover unavailable')
cover = None
return cover
@ -94,7 +94,7 @@ class BostonGlobePrint(BasicNewsRecipe):
desc = self.tag_to_string(d)
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
feeds_dict[section].append({"title": title, "url": url, "description": desc})
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
return [(section, articles) for section, articles in feeds_dict.items()]
def preprocess_raw_html(self, raw_html, url):

View File

@ -23,40 +23,40 @@ class brewiarz(BasicNewsRecipe):
next_days = 1
def parse_index(self):
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
"05": "v", "06": "vi", "07": "vii", "08": "viii",
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
dec2rom_dict = {'01': 'i', '02': 'ii', '03': 'iii', '04': 'iv',
'05': 'v', '06': 'vi', '07': 'vii', '08': 'viii',
'09': 'ix', '10': 'x', '11': 'xi', '12': 'xii'}
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
weekday_dict = {'Sunday': 'Niedziela', 'Monday': 'Poniedziałek', 'Tuesday': 'Wtorek',
'Wednesday': 'Środa', 'Thursday': 'Czwartek', 'Friday': 'Piątek', 'Saturday': 'Sobota'}
now = datetime.datetime.now()
feeds = []
for i in range(0, self.next_days):
url_date = now + datetime.timedelta(days=i)
url_date_month = url_date.strftime("%m")
url_date_month = url_date.strftime('%m')
url_date_month_roman = dec2rom_dict[url_date_month]
url_date_day = url_date.strftime("%d")
url_date_year = url_date.strftime("%Y")[2:]
url_date_weekday = url_date.strftime("%A")
url_date_day = url_date.strftime('%d')
url_date_year = url_date.strftime('%Y')[2:]
url_date_weekday = url_date.strftime('%A')
url_date_weekday_pl = weekday_dict[url_date_weekday]
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + \
url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
url = 'http://brewiarz.pl/' + url_date_month_roman + '_' + \
url_date_year + '/' + url_date_day + url_date_month + '/index.php3'
articles = self.parse_pages(url)
if articles:
title = url_date_weekday_pl + " " + url_date_day + \
"." + url_date_month + "." + url_date_year
title = url_date_weekday_pl + ' ' + url_date_day + \
'.' + url_date_month + '.' + url_date_year
feeds.append((title, articles))
else:
sectors = self.get_sectors(url)
for subpage in sectors:
title = url_date_weekday_pl + " " + url_date_day + "." + \
url_date_month + "." + url_date_year + " - " + subpage.string
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + \
"/" + url_date_day + url_date_month + \
"/" + subpage['href']
title = url_date_weekday_pl + ' ' + url_date_day + '.' + \
url_date_month + '.' + url_date_year + ' - ' + subpage.string
url = 'http://brewiarz.pl/' + url_date_month_roman + '_' + url_date_year + \
'/' + url_date_day + url_date_month + \
'/' + subpage['href']
print(url)
articles = self.parse_pages(url)
if articles:
@ -91,7 +91,7 @@ class brewiarz(BasicNewsRecipe):
sublinks = ol.findAll(name='a')
for sublink in sublinks:
link_title = self.tag_to_string(
link) + " - " + self.tag_to_string(sublink)
link) + ' - ' + self.tag_to_string(sublink)
link_url_print = re.sub(
'php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
link_url = url[:-10] + link_url_print
@ -145,7 +145,7 @@ class brewiarz(BasicNewsRecipe):
if x == tag:
break
else:
print("Can't find", tag, "in", tag.parent)
print("Can't find", tag, 'in', tag.parent)
continue
for r in reversed(tag.contents):
tag.parent.insert(i, r)

View File

@ -22,10 +22,10 @@ class Business_insider(BasicNewsRecipe):
remove_empty_feeds = True
publication_type = 'newsportal'
masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg'
extra_css = """
extra_css = '''
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -64,7 +64,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
if dt.weekday() == 6:
self.log.warn(
'Business Standard Does Not Have A Print Publication On Sunday. The Reports'
' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.'
" And Columns On This Page Today Appeared In The Newspaper's Saturday Edition."
)
url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today
raw = self.index_to_soup(url, raw=True)

View File

@ -90,7 +90,7 @@ class BT(BasicNewsRecipe):
# Insert feeds in specified order, if available
feedSort = ['Editor\'s Note', 'Editors note']
feedSort = ["Editor's Note", 'Editors note']
for i in feedSort:
if i in sections:
feeds.append((i, sections[i]))

View File

@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
class CACM(BasicNewsRecipe):
title = "ACM CACM Magazine"
description = "Published on day 1 of every month."
title = 'ACM CACM Magazine'
description = 'Published on day 1 of every month.'
language = 'en'
oldest_article = 30
max_articles_per_feed = 100
@ -17,16 +17,16 @@ class CACM(BasicNewsRecipe):
]
def get_cover_url(self):
"""
'''
Parse out cover URL from cover page.
Example:
From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668
Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg
"""
'''
soup = self.index_to_soup("https://cacm.acm.org/")
a_img = soup.find("a", class_="menuCover")
img_url = a_img.img["src"]
img_url = img_url.split("?")[0]
img_url = img_url.replace(".large", "")
soup = self.index_to_soup('https://cacm.acm.org/')
a_img = soup.find('a', class_='menuCover')
img_url = a_img.img['src']
img_url = img_url.split('?')[0]
img_url = img_url.replace('.large', '')
return img_url

View File

@ -29,28 +29,28 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
]
feeds = [
(u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"),
(u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"),
(u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"),
(u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"),
(u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"),
(u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"),
(u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"),
(u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"),
(u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"),
(u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"),
(u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"),
(u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"),
(u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"),
(u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"),
(u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"),
(u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"),
(u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"),
(u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"),
(u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"),
(u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"),
(u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"),
(u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"),
(u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"),
(u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml")
(u' דף הבית', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml'),
(u' 24/7', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml'),
(u' באזז', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml'),
(u' משפט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml'),
(u' רכב', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml'),
(u' אחריות וסביבה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml'),
(u' דעות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml'),
(u' תיירות ותעופה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml'),
(u' קריירה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml'),
(u' אחד העם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml'),
(u' המלצות ואזהרות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml'),
(u' הייטק והון סיכון', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml'),
(u' חדשות טכנולוגיה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml'),
(u' תקשורת', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml'),
(u' אינטרנט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml'),
(u" מכשירים וגאדג'טים", u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml'),
(u' המדריך', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml'),
(u' אפליקציות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml'),
(u' Play', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml'),
(u' הכסף', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml'),
(u' עולם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml'),
(u' פרסום ושיווק', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml'),
(u' פנאי', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml'),
(u' עסקי ספורט', u'http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml')
]

View File

@ -164,24 +164,24 @@ class CanWestPaper(BasicNewsRecipe):
continue
break
if daysback == 7:
self.log("\nCover unavailable")
self.log('\nCover unavailable')
cover = None
return cover
def fixChars(self, string):
# Replace lsquo (\x91)
fixed = re.sub("\x91", "", string)
fixed = re.sub('\x91', '', string)
# Replace rsquo (\x92)
fixed = re.sub("\x92", "", fixed)
fixed = re.sub('\x92', '', fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93", "", fixed)
fixed = re.sub('\x93', '', fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94", "", fixed)
fixed = re.sub('\x94', '', fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96", "", fixed)
fixed = re.sub('\x96', '', fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97", "", fixed)
fixed = re.sub("&#x2019;", "", fixed)
fixed = re.sub('\x97', '', fixed)
fixed = re.sub('&#x2019;', '', fixed)
return fixed
def massageNCXText(self, description):
@ -262,10 +262,10 @@ class CanWestPaper(BasicNewsRecipe):
if url.startswith('/'):
url = self.url_prefix + url
if not url.startswith(self.url_prefix):
print("Rejected " + url)
print('Rejected ' + url)
return
if url in self.url_list:
print("Rejected dup " + url)
print('Rejected dup ' + url)
return
self.url_list.append(url)
title = self.tag_to_string(atag, False)
@ -277,8 +277,8 @@ class CanWestPaper(BasicNewsRecipe):
return
dtag = adiv.find('div', 'content')
description = ''
print("URL " + url)
print("TITLE " + title)
print('URL ' + url)
print('TITLE ' + title)
if dtag is not None:
stag = dtag.span
if stag is not None:
@ -286,18 +286,18 @@ class CanWestPaper(BasicNewsRecipe):
description = self.tag_to_string(stag, False)
else:
description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description)
print('DESCRIPTION: ' + description)
if key not in articles:
articles[key] = []
articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content=''))
def parse_web_index(key, keyurl):
print("Section: " + key + ': ' + self.url_prefix + keyurl)
print('Section: ' + key + ': ' + self.url_prefix + keyurl)
try:
soup = self.index_to_soup(self.url_prefix + keyurl)
except:
print("Section: " + key + ' NOT FOUND')
print('Section: ' + key + ' NOT FOUND')
return
ans.append(key)
mainsoup = soup.find('div', 'bodywrapper')

View File

@ -17,7 +17,7 @@ class Capital(BasicNewsRecipe):
keep_only_tags = [
dict(name='h1'),
dict(name='p'),
dict(name='span', attrs={'id': ["textbody"]})
dict(name='span', attrs={'id': ['textbody']})
]
# 3 posts seemed to have utf8 encoding

View File

@ -96,7 +96,7 @@ class CaravanMagazine(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self, *args, **kw)
if not self.username or not self.password:
return br
data = json.dumps({"0":{"json":{"email":self.username,"password":self.password}}})
data = json.dumps({'0':{'json':{'email':self.username,'password':self.password}}})
if not isinstance(data, bytes):
data = data.encode('utf-8')
rq = Request(
@ -138,7 +138,7 @@ class CaravanMagazine(BasicNewsRecipe):
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
x = d.split('-')
inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}})
inp = json.dumps({'0':{'json':{'month':int(x[0]),'year':int(x[1])}}})
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
raw = json.loads(self.index_to_soup(api, raw=True))
@ -174,7 +174,7 @@ class CaravanMagazine(BasicNewsRecipe):
def print_version(self, url):
slug = urlparse(url).path
inp = json.dumps({"0":{"json":{"slug":slug}}})
inp = json.dumps({'0':{'json':{'slug':slug}}})
return 'https://api.caravanmagazine.in/api/trpc/articles.getFromCache?batch=1&input=' + quote(inp, safe='')
def preprocess_raw_html(self, raw, url):

View File

@ -5,9 +5,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
class CATOInstitute(BasicNewsRecipe):
title = u'The CATO Institute'
description = "The Cato Institute is a public policy research organization — a think tank — \
description = 'The Cato Institute is a public policy research organization — a think tank — \
dedicated to the principles of individual liberty, limited government, free markets and peace.\
Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues."
Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues.'
__author__ = '_reader'
__date__ = '05 July 2012'
__version__ = '1.0'

View File

@ -24,7 +24,7 @@ class CSMonitor(BasicNewsRecipe):
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
extra_css = """
extra_css = '''
body{font-family: Arial,Tahoma,Verdana,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.head {font-family: Georgia,"Times New Roman",Times,serif}
@ -32,7 +32,7 @@ class CSMonitor(BasicNewsRecipe):
.hide{display: none}
.sLoc{font-weight: bold}
ul{list-style-type: none}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -39,7 +39,7 @@ class Chronicle(BasicNewsRecipe):
# Go to the issue
soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/')
issue = soup0.find('ul', attrs={'class': 'feature-promo-list'}).li
issueurl = "http://chronicle.com" + issue.a['href']
issueurl = 'http://chronicle.com' + issue.a['href']
# Find date
dates = self.tag_to_string(issue.a).split(': ')[-1]
@ -47,12 +47,12 @@ class Chronicle(BasicNewsRecipe):
# Find cover
cover = soup0.find('div', attrs={
'class': 'side-content'}).find(attrs={'src': re.compile("photos/biz/Current")})
'class': 'side-content'}).find(attrs={'src': re.compile('photos/biz/Current')})
if cover is not None:
if "chronicle.com" in cover['src']:
if 'chronicle.com' in cover['src']:
self.cover_url = cover['src']
else:
self.cover_url = "http://chronicle.com" + cover['src']
self.cover_url = 'http://chronicle.com' + cover['src']
# Go to the main body
soup = self.index_to_soup(issueurl)
div = soup.find('div', attrs={'id': 'article-body'})
@ -64,7 +64,7 @@ class Chronicle(BasicNewsRecipe):
a = post.find('a', href=True)
if a is not None:
title = self.tag_to_string(a)
url = "http://chronicle.com" + a['href'].strip()
url = 'http://chronicle.com' + a['href'].strip()
sectiontitle = post.findPrevious('h3')
if sectiontitle is None:
sectiontitle = post.findPrevious('h4')

View File

@ -18,24 +18,24 @@ class BasicUserRecipe1316245412(BasicNewsRecipe):
# remove_javascript = True
remove_tags = [
dict(name='div', attrs={'id': ["header", "navigation", "skip-link",
"header-print", "header-print-url", "meta-toolbar", "footer"]}),
dict(name='div', attrs={'class': ["region region-sidebar-first column sidebar", "breadcrumb",
"breadcrumb-title", "meta", "comment-wrapper",
"field field-name-field-show-teaser-right field-type-list-boolean field-label-above",
"page-header",
"view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1",
"pagination",
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1",
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2", # 2011-09-23
"view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2", # 2011-09-23
dict(name='div', attrs={'id': ['header', 'navigation', 'skip-link',
'header-print', 'header-print-url', 'meta-toolbar', 'footer']}),
dict(name='div', attrs={'class': ['region region-sidebar-first column sidebar', 'breadcrumb',
'breadcrumb-title', 'meta', 'comment-wrapper',
'field field-name-field-show-teaser-right field-type-list-boolean field-label-above',
'page-header',
'view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1',
'pagination',
'view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1',
'view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2', # 2011-09-23
'view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2', # 2011-09-23
]}),
dict(name='div', attrs={'title': ["Dossier Auswahl"]}),
dict(name='h2', attrs={'class': ["title comment-form"]}),
dict(name='div', attrs={'title': ['Dossier Auswahl']}),
dict(name='h2', attrs={'class': ['title comment-form']}),
dict(name='form', attrs={
'class': ["comment-form user-info-from-cookie"]}),
'class': ['comment-form user-info-from-cookie']}),
dict(name='table', attrs={
'class': ["mcx-social-horizontal", "page-header"]}),
'class': ['mcx-social-horizontal', 'page-header']}),
]
feeds = [

View File

@ -34,7 +34,7 @@ class AdvancedUserRecipe1234144423(BasicNewsRecipe):
dict(name='div', attrs={'class': ['padding', 'sidebar-photo', 'blog caitlin']})]
remove_tags = [
dict(name=['object', 'link', 'table', 'embed']), dict(name='div', attrs={'id': ["pluckcomments", "StoryChat"]}), dict(
dict(name=['object', 'link', 'table', 'embed']), dict(name='div', attrs={'id': ['pluckcomments', 'StoryChat']}), dict(
name='div', attrs={'class': ['articleflex-container', ]}), dict(name='p', attrs={'class': ['posted', 'tags']})
]

View File

@ -23,14 +23,14 @@ class CiperChile(BasicNewsRecipe):
remove_empty_feeds = True
publication_type = 'blog'
masthead_url = 'http://ciperchile.cl/wp-content/themes/cipertheme/css/ui/ciper-logo.png'
extra_css = """
extra_css = '''
body{font-family: Arial,sans-serif}
.excerpt{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: 1.25em}
.author{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: small}
.date{font-family: Georgia,"Times New Roman",Times,serif; font-size: small; color: grey}
.epigrafe{font-size: small; color: grey}
img{margin-bottom: 0.4em; display:block}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -44,7 +44,7 @@ class Clarin(BasicNewsRecipe):
# To get all the data (images)
auto_cleanup = False
extra_css = """
extra_css = '''
h1#title {
line-height: 1em;
margin: 0 0 .5em 0;
@ -64,7 +64,7 @@ class Clarin(BasicNewsRecipe):
font-size: .9em;
margin-bottom: .5em;
}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -25,16 +25,16 @@ class CNetJapan(BasicNewsRecipe):
lambda match: '<!-- removed -->'),
]
remove_tags_before = dict(id="contents_l")
remove_tags_before = dict(id='contents_l')
remove_tags = [
{'class': "social_bkm_share"},
{'class': "social_bkm_print"},
{'class': "block20 clearfix"},
dict(name="div", attrs={'id': 'bookreview'}),
{'class': "tag_left_ttl"},
{'class': "tag_right"}
{'class': 'social_bkm_share'},
{'class': 'social_bkm_print'},
{'class': 'block20 clearfix'},
dict(name='div', attrs={'id': 'bookreview'}),
{'class': 'tag_left_ttl'},
{'class': 'tag_right'}
]
remove_tags_after = {'class': "block20"}
remove_tags_after = {'class': 'block20'}
def parse_feeds(self):

View File

@ -25,16 +25,16 @@ class CNetJapanDigital(BasicNewsRecipe):
lambda match: '<!-- removed -->'),
]
remove_tags_before = dict(id="contents_l")
remove_tags_before = dict(id='contents_l')
remove_tags = [
{'class': "social_bkm_share"},
{'class': "social_bkm_print"},
{'class': "block20 clearfix"},
dict(name="div", attrs={'id': 'bookreview'}),
{'class': "tag_left_ttl"},
{'class': "tag_right"}
{'class': 'social_bkm_share'},
{'class': 'social_bkm_print'},
{'class': 'block20 clearfix'},
dict(name='div', attrs={'id': 'bookreview'}),
{'class': 'tag_left_ttl'},
{'class': 'tag_right'}
]
remove_tags_after = {'class': "block20"}
remove_tags_after = {'class': 'block20'}
def parse_feeds(self):

View File

@ -25,15 +25,15 @@ class CNetJapanRelease(BasicNewsRecipe):
lambda match: '<!-- removed -->'),
]
remove_tags_before = dict(id="contents_l")
remove_tags_before = dict(id='contents_l')
remove_tags = [
{'class': "social_bkm_share"},
{'class': "social_bkm_print"},
{'class': "block20 clearfix"},
dict(name="div", attrs={'id': 'bookreview'}),
{'class': "tag_left_ttl"}
{'class': 'social_bkm_share'},
{'class': 'social_bkm_print'},
{'class': 'block20 clearfix'},
dict(name='div', attrs={'id': 'bookreview'}),
{'class': 'tag_left_ttl'}
]
remove_tags_after = {'class': "block20"}
remove_tags_after = {'class': 'block20'}
def parse_feeds(self):

View File

@ -56,7 +56,7 @@ class CnetNews(BasicNewsRecipe):
keep_only_tags = [
dict(name='h1'),
dict(section='author'),
dict(id=["article-body", 'cnetReview']),
dict(id=['article-body', 'cnetReview']),
dict(attrs={'class': 'deal-content'}),
]

View File

@ -72,7 +72,7 @@ class CNN(BasicNewsRecipe):
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
self.log('\nCover unavailable')
masthead = None
return masthead

View File

@ -36,9 +36,9 @@ class ContretempsRecipe(BasicNewsRecipe):
return None
def default_cover(self, cover_file):
"""
'''
Crée une couverture personnalisée pour Contretemps
"""
'''
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
@ -56,7 +56,7 @@ class ContretempsRecipe(BasicNewsRecipe):
weekday = french_weekday[wkd]
month = french_month[today.month]
date_str = f"{weekday} {today.day} {month} {today.year}"
date_str = f'{weekday} {today.day} {month} {today.year}'
edition = today.strftime('Édition de %Hh%M')
# Création de l'image de base (ratio ~1.6 pour format livre)

View File

@ -5,10 +5,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
class CosmosMagazine(BasicNewsRecipe):
title = "Cosmos Magazine"
title = 'Cosmos Magazine'
description = (
"Cosmos is a quarterly science magazine with 4 editions a year (Mar, Jun, Sep, Dec)."
"It is produced by The Royal Institution of Australia Inc (RiAus)."
'Cosmos is a quarterly science magazine with 4 editions a year (Mar, Jun, Sep, Dec).'
'It is produced by The Royal Institution of Australia Inc (RiAus).'
)
language = 'en_AU'
__author__ = 'yodha8'

View File

@ -70,12 +70,12 @@ class CourrierInternational(BasicNewsRecipe):
}
'''
needs_subscription = "optional"
needs_subscription = 'optional'
login_url = 'http://www.courrierinternational.com/login'
def get_browser(self):
def is_form_login(form):
return "id" in form.attrs and form.attrs['id'] == "user-login-form"
return 'id' in form.attrs and form.attrs['id'] == 'user-login-form'
br = BasicNewsRecipe.get_browser(self)
if self.username:
br.open(self.login_url)
@ -86,8 +86,8 @@ class CourrierInternational(BasicNewsRecipe):
return br
def preprocess_html(self, soup):
for link in soup.findAll("a", href=re.compile('^/')):
link["href"] = 'http://www.courrierinternational.com' + link["href"]
for link in soup.findAll('a', href=re.compile('^/')):
link['href'] = 'http://www.courrierinternational.com' + link['href']
return soup
feeds = [

View File

@ -21,10 +21,10 @@ class CubaDebate(BasicNewsRecipe):
encoding = 'utf-8'
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
publication_type = 'newsportal'
extra_css = """
extra_css = '''
#BlogTitle{font-size: xx-large; font-weight: bold}
body{font-family: Verdana, Arial, Tahoma, sans-serif}
"""
'''
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher

View File

@ -23,7 +23,7 @@ class DainikBhaskar(BasicNewsRecipe):
soup = self.index_to_soup('https://epaper.bhaskar.com/')
tag = soup.find(attrs={'class': 'scaleDiv'})
if tag:
self.cover_url = tag.find('img')['src'].replace("_ss.jpg", "_l.jpg")
self.cover_url = tag.find('img')['src'].replace('_ss.jpg', '_l.jpg')
return super().get_cover_url()
keep_only_tags = [

View File

@ -31,11 +31,11 @@ class Danas(BasicNewsRecipe):
auto_cleanup = True
auto_cleanup_keep = '//div[@class="post-intro-above"] //h1[@class="post-title"] | //div[@class="post-intro-title"] | //div[@class="post-meta-wrapper"]'
resolve_internal_links = True
extra_css = """
extra_css = '''
.author{font-size: small}
.published {font-size: small}
img{margin-bottom: 0.8em}
"""
'''
conversion_options = {
'comment': description,
@ -66,7 +66,7 @@ class Danas(BasicNewsRecipe):
'avgust', 'septembar', 'oktobar', 'novembar', 'decembar']
td = date.today()
monthname = months[td.month - 1]
lurl = td.strftime("https://www.danas.rs/naslovna/naslovna-strana-za-%d-" + monthname + "-%Y/")
lurl = td.strftime('https://www.danas.rs/naslovna/naslovna-strana-za-%d-' + monthname + '-%Y/')
soup = self.index_to_soup(lurl)
al = soup.find('div', attrs={'class':'corax-image'})
if al and al.img:

View File

@ -77,9 +77,9 @@ class DeGentenaarOnline(BasicNewsRecipe):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
('http-equiv', 'Content-Language'), ('content', self.lang)])
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)
return soup

View File

@ -16,8 +16,8 @@ class AdvancedUserRecipe1361743898(BasicNewsRecipe):
def parse_index(self):
articles = []
feeds = []
soup = self.index_to_soup("http://www.democracyjournal.org")
for x in soup.findAll(href=re.compile(r"http://www\.democracyjournal\.org/\d*/.*php$")):
soup = self.index_to_soup('http://www.democracyjournal.org')
for x in soup.findAll(href=re.compile(r'http://www\.democracyjournal\.org/\d*/.*php$')):
url = x.get('href')
title = self.tag_to_string(x)
articles.append({'title': title, 'url': url,

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python
"""
'''
demorgen.be
"""
'''
from calibre.web.feeds.news import BasicNewsRecipe
@ -13,7 +13,7 @@ class DeMorganBe(BasicNewsRecipe):
description = 'News from Belgium in Dutch'
oldest_article = 1
language = 'nl_BE'
encoding = "utf-8"
encoding = 'utf-8'
max_articles_per_feed = 100
no_stylesheets = True
remove_attributes = ['style', 'height', 'width']
@ -23,10 +23,10 @@ class DeMorganBe(BasicNewsRecipe):
masthead_url = 'https://www.demorgen.be/_next/static/media/demorgen_logo.dce579e2.svg'
cover_url = 'https://usercontent.one/wp/www.insidejazz.be/wp-content/uploads/2018/11/pic0143.png'
extra_css = """
extra_css = '''
time, [data-test-id:"article-label"], [data-test-id:"article-sublabel"], [[data-test-id:"article-author"]] { font-size:small; }
[data-test-id:"header-intro"] { font-style: italic; }
"""
'''
keep_only_tags = [
dict(name='article', attrs={'id': 'article-content'}),

View File

@ -23,8 +23,8 @@ class ceskyDenikRecipe(BasicNewsRecipe):
cover_url = 'http://g.denik.cz/images/loga/denik.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
extra_css = '''
'''
remove_tags = []
keep_only_tags = [dict(name='div', attrs={'class': 'content'})]

View File

@ -11,11 +11,11 @@ CZ_MONTHS = ['led', 'úno', 'bře', 'dub', 'kvě', 'čen', 'čec', 'srp', 'zář
def cz_title_time():
"""
'''
Helper function to return date with czech locale.
Uses hardcoded lookup table of day and month names as strftime requires
locale change that is not thread safe.
"""
'''
today = datetime.today()
weekday = CZ_DAYS[today.weekday()]
month = CZ_MONTHS[today.month-1]
@ -26,9 +26,9 @@ def cz_title_time():
class DenikNRecipe(BasicNewsRecipe):
"""
'''
Recipe for the RSS feed of https://denikn.cz/
"""
'''
title = u'Deník N'
__author__ = 'Robert Mihaly'

View File

@ -31,13 +31,13 @@ class deredactie(BasicNewsRecipe):
catnames = {}
soup = self.index_to_soup(
'http://www.deredactie.be/cm/vrtnieuws.deutsch')
for elem in soup.findAll('li', attrs={'id': re.compile("^navItem[2-9]")}):
for elem in soup.findAll('li', attrs={'id': re.compile('^navItem[2-9]')}):
a = elem.find('a', href=True)
m = re.search('(?<=/)[^/]*$', a['href'])
cat = str(m.group(0))
categories.append(cat)
catnames[cat] = a['title']
self.log("found cat %s\n" % catnames[cat])
self.log('found cat %s\n' % catnames[cat])
feeds = []
@ -45,7 +45,7 @@ class deredactie(BasicNewsRecipe):
articles = []
soup = self.index_to_soup(
'http://www.deredactie.be/cm/vrtnieuws.deutsch/' + cat)
for a in soup.findAll('a', attrs={'href': re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
for a in soup.findAll('a', attrs={'href': re.compile('deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_')}):
skip_this_article = False
url = a['href'].strip()
if url.startswith('/'):
@ -55,12 +55,12 @@ class deredactie(BasicNewsRecipe):
for article in articles:
if article['url'] == url:
skip_this_article = True
self.log("SKIPPING DUP %s" % url)
self.log('SKIPPING DUP %s' % url)
break
if skip_this_article:
continue
articles.append(myarticle)
self.log("Adding URL %s\n" % url)
self.log('Adding URL %s\n' % url)
if articles:
feeds.append((catnames[cat], articles))
return feeds

View File

@ -34,7 +34,7 @@ class Volkskrant(BasicNewsRecipe):
dict(id=['like', 'dlik']),
dict(name=['script', 'noscript', 'style']),
]
remove_attributes = ["class", "id", "name", "style"]
remove_attributes = ['class', 'id', 'name', 'style']
encoding = 'utf-8'
no_stylesheets = True
ignore_duplicate_articles = {'url'}
@ -88,7 +88,7 @@ class Volkskrant(BasicNewsRecipe):
)
)
sections = [("Numărul curent", articles)]
sections = [('Numărul curent', articles)]
return sections
def preprocess_html(self, soup):

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python
__license__ = "GPL v3"
__license__ = 'GPL v3'
"""DistroWatch Weekly"""
'''DistroWatch Weekly'''
import datetime
@ -10,28 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DistroWatchWeekly(BasicNewsRecipe):
title = "DistroWatch Weekly"
description = "Weekly news about Linux distributions"
category = "Linux, Technology, News"
title = 'DistroWatch Weekly'
description = 'Weekly news about Linux distributions'
category = 'Linux, Technology, News'
oldest_article = 14
language = "en"
language = 'en'
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
timefmt = " [%A, %d %B, %Y]"
timefmt = ' [%A, %d %B, %Y]'
auto_cleanup = False
keep_only_tags = [
dict(
attrs={
"class":
lambda x: x and ("News1" in x)
'class':
lambda x: x and ('News1' in x)
}
)
]
def _get_mag_date(self):
"""Return date of latest weekly issue."""
'''Return date of latest weekly issue.'''
d = datetime.date(2022, 6, 20)
t = datetime.date.today()
@ -45,17 +45,17 @@ class DistroWatchWeekly(BasicNewsRecipe):
# Get URL of latest mag page
ld = self._get_mag_date()
url = ld.strftime("https://distrowatch.com/weekly.php?issue=%Y%m%d")
url = ld.strftime('https://distrowatch.com/weekly.php?issue=%Y%m%d')
url = url.lower()
title = ld.strftime("DistroWatch Weekly for %Y-%m-%d")
title = ld.strftime('DistroWatch Weekly for %Y-%m-%d')
# Get articles
stories = [{
"url": url,
"title": title,
'url': url,
'title': title,
},]
index = [
("Articles", stories),
('Articles', stories),
]
return index

View File

@ -23,7 +23,7 @@ def new_tag(soup, name, attrs=()):
class DnevnikCro(BasicNewsRecipe):
title = 'Dnevnik - Hr'
__author__ = 'Darko Miletic'
description = "Vijesti iz Hrvatske"
description = 'Vijesti iz Hrvatske'
publisher = 'Dnevnik.hr'
category = 'news, politics, Croatia'
oldest_article = 2
@ -67,9 +67,9 @@ class DnevnikCro(BasicNewsRecipe):
del item[attrib]
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
('http-equiv', 'Content-Language'), ('content', self.lang)])
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
('http-equiv', 'Content-Type'), ('content', 'text/html; charset=UTF-8')])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)
return self.adeify_images(soup)

View File

@ -4,15 +4,15 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
# Comment out sections you are not interested in
sections = [
("정치", "politics"),
("사회", "national"),
("경제", "economy"),
("국제", "international"),
("사설칼럼", "editorials"),
("의학과학", "science"),
("문화연예", "culture"),
("스포츠", "sports"),
("사람속으로", "inmul")
('정치', 'politics'),
('사회', 'national'),
('경제', 'economy'),
('국제', 'international'),
('사설칼럼', 'editorials'),
('의학과학', 'science'),
('문화연예', 'culture'),
('스포츠', 'sports'),
('사람속으로', 'inmul')
# Following sections are marked as marked optional
# as default. Uncomment to enable.
# , (u'건강', 'health')
@ -26,24 +26,24 @@ sections = [
class Donga(BasicNewsRecipe):
language = "ko"
title = "동아일보"
description = "동아일보 기사"
__author__ = "Minsik Cho"
ignore_duplicate_articles = {"title", "url"}
language = 'ko'
title = '동아일보'
description = '동아일보 기사'
__author__ = 'Minsik Cho'
ignore_duplicate_articles = {'title', 'url'}
compress_news_images = True
no_stylesheets = True
oldest_article = 2
encoding = "utf-8"
encoding = 'utf-8'
# RSS Feed in syntax:
# https://rss.donga.com/[sections].xml
feeds = [(title, "https://rss.donga.com/" + section + ".xml") for (title, section) in sections]
feeds = [(title, 'https://rss.donga.com/' + section + '.xml') for (title, section) in sections]
# Remove logo and print buttons
remove_tags = [
dict(name="div", attrs={"class": "popHeaderWrap"}),
dict(name="div", attrs={"class": "etc"}),
dict(name='div', attrs={'class': 'popHeaderWrap'}),
dict(name='div', attrs={'class': 'etc'}),
]
def print_version(self, url):
@ -51,8 +51,8 @@ class Donga(BasicNewsRecipe):
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
# Return print version url with syntax:
# https://www.donga.com/news/View?gid=[gid]&date=[date]
reobject = re.search("(?<=/all/)([0-9]*)/([0-9]*)", url)
reobject = re.search('(?<=/all/)([0-9]*)/([0-9]*)', url)
date = reobject.group(1)
gid = reobject.group(2)
return "https://www.donga.com/news/View?gid=" + gid + "&date=" + date
return 'https://www.donga.com/news/View?gid=' + gid + '&date=' + date

View File

@ -107,11 +107,11 @@ class DRNyheder(BasicNewsRecipe):
keep_only_tags = [
dict(name="h1", attrs={'class': 'dre-article-title__heading'}), # Title
dict(name="div", attrs={'class': 'dre-article-byline'}), # Author
dict(name="figure", attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images
dict(name="p", attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article
dict(name="article", attrs={'itemtype': 'http://schema.org/NewsArticle'}),
dict(name='h1', attrs={'class': 'dre-article-title__heading'}), # Title
dict(name='div', attrs={'class': 'dre-article-byline'}), # Author
dict(name='figure', attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images
dict(name='p', attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article
dict(name='article', attrs={'itemtype': 'http://schema.org/NewsArticle'}),
#dict(name="h1", attrs={'class': 'hydra-latest-news-page-short-news__title'}),
#dict(name="p", attrs={'class': 'hydra-latest-news-page-short-news__paragraph'}),
#dict(name="div", attrs={'class': 'dre-speech'}),
@ -123,7 +123,7 @@ class DRNyheder(BasicNewsRecipe):
dict(name='div', attrs={'class': [
'hydra-latest-news-page-short-news__share', 'hydra-latest-news-page-short-news__a11y-container',
'hydra-latest-news-page-short-news__meta', 'hydra-latest-news-page-short-news__image-slider', 'dre-byline__dates']}),
dict(name="source"),
dict(name='source'),
#dict(name='menu', attrs={'class': 'share'}),
#dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}),
]

View File

@ -63,20 +63,20 @@ class Dzieje(BasicNewsRecipe):
def parse_index(self):
feeds = []
feeds.append((u"Wiadomości", self.find_articles(
feeds.append((u'Wiadomości', self.find_articles(
'http://dzieje.pl/wiadomosci')))
feeds.append((u"Kultura i sztuka", self.find_articles(
feeds.append((u'Kultura i sztuka', self.find_articles(
'http://dzieje.pl/kulturaisztuka')))
feeds.append((u"Film", self.find_articles('http://dzieje.pl/kino')))
feeds.append((u"Rozmaitości historyczne",
feeds.append((u'Film', self.find_articles('http://dzieje.pl/kino')))
feeds.append((u'Rozmaitości historyczne',
self.find_articles('http://dzieje.pl/rozmaitości')))
feeds.append(
(u"Książka", self.find_articles('http://dzieje.pl/ksiazka')))
(u'Książka', self.find_articles('http://dzieje.pl/ksiazka')))
feeds.append(
(u"Wystawa", self.find_articles('http://dzieje.pl/wystawa')))
feeds.append((u"Edukacja", self.find_articles(
(u'Wystawa', self.find_articles('http://dzieje.pl/wystawa')))
feeds.append((u'Edukacja', self.find_articles(
'http://dzieje.pl/edukacja')))
feeds.append((u"Dzieje się", self.find_articles(
feeds.append((u'Dzieje się', self.find_articles(
'http://dzieje.pl/wydarzenia')))
return feeds

View File

@ -21,7 +21,7 @@ class Dziennik_pl(BasicNewsRecipe):
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}'
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile(
preprocess_regexps = [(re.compile('Komentarze:'), lambda m: ''), (re.compile(
'<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
keep_only_tags = [dict(id='article')]
remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa: E501

View File

@ -120,7 +120,7 @@ class DziennikPolski24(BasicNewsRecipe):
if self.username is not None and self.password is not None:
br.open('http://www.dziennikpolski24.pl/pl/moje-konto/950606-loguj.html')
br.select_form(nr=1)
br["user_login[login]"] = self.username
br['user_login[login]'] = self.username
br['user_login[pass]'] = self.password
br.submit()
return br

View File

@ -63,7 +63,7 @@ def load_article_from_json(raw, root):
body = root.xpath('//body')[0]
article = E(body, 'article')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try:
date = data['dateModified']
@ -97,8 +97,8 @@ def process_web_node(node):
return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText")
cap = ""
alt = '' if node.get('altText') is None else node.get('altText')
cap = ''
if node.get('caption'):
if node['caption'].get('textHtml') is not None:
cap = node['caption']['textHtml']
@ -123,7 +123,7 @@ def load_article_from_web_json(raw):
data = json.loads(raw)['props']['pageProps']['cp2Content']
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
body += f'<h1>{data["headline"]}</h1>'
if data.get("rubric") and data.get("rubric") is not None:
if data.get('rubric') and data.get('rubric') is not None:
body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
try:
date = data['dateModified']
@ -186,7 +186,7 @@ class Economist(BasicNewsRecipe):
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal"
__author__ = 'Kovid Goyal'
description = (
'Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)'
@ -199,7 +199,7 @@ class Economist(BasicNewsRecipe):
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}),
dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id': 'player'}),
dict(attrs={
'class': [
@ -266,11 +266,11 @@ class Economist(BasicNewsRecipe):
if edition_date and isinstance(edition_date, str):
return parse_only_date(edition_date, as_utc=False)
try:
url = self.browser.open("https://www.economist.com/printedition").geturl()
url = self.browser.open('https://www.economist.com/printedition').geturl()
except Exception as e:
self.log('Failed to fetch publication date with error: ' + str(e))
return super().publication_date()
return parse_only_date(url.split("/")[-1], as_utc=False)
return parse_only_date(url.split('/')[-1], as_utc=False)
def economist_test_article(self):
return [('Articles', [{'title':'test',
@ -364,23 +364,23 @@ class Economist(BasicNewsRecipe):
self.log('Got cover:', self.cover_url, '\n', self.description)
feeds_dict = defaultdict(list)
for part in safe_dict(data, "hasPart", "parts"):
for part in safe_dict(data, 'hasPart', 'parts'):
try:
section = part['articleSection']['internal'][0]['title']
except Exception:
section = safe_dict(part, 'print', 'section', 'title') or 'section'
if section not in feeds_dict:
self.log(section)
title = safe_dict(part, "title")
desc = safe_dict(part, "rubric") or ''
sub = safe_dict(part, "flyTitle") or ''
title = safe_dict(part, 'title')
desc = safe_dict(part, 'rubric') or ''
sub = safe_dict(part, 'flyTitle') or ''
if sub and section != sub:
desc = sub + ' :: ' + desc
pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(part).encode('utf-8'))
pt.close()
url = 'file:///' + pt.name
feeds_dict[section].append({"title": title, "url": url, "description": desc})
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc)
return [(section, articles) for section, articles in feeds_dict.items()]
@ -513,22 +513,22 @@ class Economist(BasicNewsRecipe):
return self.economist_return_index(ans)
def economist_parse_web_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__")
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None:
data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.description = safe_dict(data, "props", "pageProps", "content", "headline")
self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "formattedIssueDate") + ']'
self.cover_url = safe_dict(data, "props", "pageProps", "content", "cover", "url").replace(
self.description = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
self.timefmt = ' [' + safe_dict(data, 'props', 'pageProps', 'content', 'formattedIssueDate') + ']'
self.cover_url = safe_dict(data, 'props', 'pageProps', 'content', 'cover', 'url').replace(
'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '')
self.log('Got cover:', self.cover_url)
feeds = []
for part in safe_dict(
data, "props", "pageProps", "content", "headerSections"
) + safe_dict(data, "props", "pageProps", "content", "sections"):
section = safe_dict(part, "name") or ''
data, 'props', 'pageProps', 'content', 'headerSections'
) + safe_dict(data, 'props', 'pageProps', 'content', 'sections'):
section = safe_dict(part, 'name') or ''
if not section:
continue
self.log(section)
@ -536,12 +536,12 @@ class Economist(BasicNewsRecipe):
articles = []
for ar in part['articles']:
title = safe_dict(ar, "headline") or ''
url = process_url(safe_dict(ar, "url") or '')
title = safe_dict(ar, 'headline') or ''
url = process_url(safe_dict(ar, 'url') or '')
if not title or not url:
continue
desc = safe_dict(ar, "rubric") or ''
sub = safe_dict(ar, "flyTitle") or ''
desc = safe_dict(ar, 'rubric') or ''
sub = safe_dict(ar, 'flyTitle') or ''
if sub and section != sub:
desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url)

View File

@ -58,7 +58,7 @@ def load_article_from_json(raw, root):
body = root.xpath('//body')[0]
article = E(body, 'article')
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
E(article, 'div', data['byline'], style='font-style: italic; color:#202020;')
main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
@ -130,7 +130,7 @@ class Espresso(BasicNewsRecipe):
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
dict(attrs={'aria-label': "Article Teaser"}),
dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={
'class': [
'dblClkTrk', 'ec-article-info', 'share_inline_header',
@ -189,13 +189,13 @@ class Espresso(BasicNewsRecipe):
self.description = data['rubric']
ans = []
for part in safe_dict(data, "hasPart", "parts"):
title = safe_dict(part, "title")
for part in safe_dict(data, 'hasPart', 'parts'):
title = safe_dict(part, 'title')
pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(part).encode('utf-8'))
pt.close()
url = 'file:///' + pt.name
ans.append({"title": title, "url": url})
ans.append({'title': title, 'url': url})
return [('Espresso', ans)]
def preprocess_html(self, soup):

View File

@ -63,7 +63,7 @@ def load_article_from_json(raw, root):
body = root.xpath('//body')[0]
article = E(body, 'article')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try:
date = data['dateModified']
@ -97,8 +97,8 @@ def process_web_node(node):
return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText")
cap = ""
alt = '' if node.get('altText') is None else node.get('altText')
cap = ''
if node.get('caption'):
if node['caption'].get('textHtml') is not None:
cap = node['caption']['textHtml']
@ -123,7 +123,7 @@ def load_article_from_web_json(raw):
data = json.loads(raw)['props']['pageProps']['cp2Content']
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
body += f'<h1>{data["headline"]}</h1>'
if data.get("rubric") and data.get("rubric") is not None:
if data.get('rubric') and data.get('rubric') is not None:
body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
try:
date = data['dateModified']
@ -186,7 +186,7 @@ class Economist(BasicNewsRecipe):
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal"
__author__ = 'Kovid Goyal'
description = (
'Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)'
@ -199,7 +199,7 @@ class Economist(BasicNewsRecipe):
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}),
dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id': 'player'}),
dict(attrs={
'class': [
@ -266,11 +266,11 @@ class Economist(BasicNewsRecipe):
if edition_date and isinstance(edition_date, str):
return parse_only_date(edition_date, as_utc=False)
try:
url = self.browser.open("https://www.economist.com/printedition").geturl()
url = self.browser.open('https://www.economist.com/printedition').geturl()
except Exception as e:
self.log('Failed to fetch publication date with error: ' + str(e))
return super().publication_date()
return parse_only_date(url.split("/")[-1], as_utc=False)
return parse_only_date(url.split('/')[-1], as_utc=False)
def economist_test_article(self):
return [('Articles', [{'title':'test',
@ -364,23 +364,23 @@ class Economist(BasicNewsRecipe):
self.log('Got cover:', self.cover_url, '\n', self.description)
feeds_dict = defaultdict(list)
for part in safe_dict(data, "hasPart", "parts"):
for part in safe_dict(data, 'hasPart', 'parts'):
try:
section = part['articleSection']['internal'][0]['title']
except Exception:
section = safe_dict(part, 'print', 'section', 'title') or 'section'
if section not in feeds_dict:
self.log(section)
title = safe_dict(part, "title")
desc = safe_dict(part, "rubric") or ''
sub = safe_dict(part, "flyTitle") or ''
title = safe_dict(part, 'title')
desc = safe_dict(part, 'rubric') or ''
sub = safe_dict(part, 'flyTitle') or ''
if sub and section != sub:
desc = sub + ' :: ' + desc
pt = PersistentTemporaryFile('.html')
pt.write(json.dumps(part).encode('utf-8'))
pt.close()
url = 'file:///' + pt.name
feeds_dict[section].append({"title": title, "url": url, "description": desc})
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc)
return [(section, articles) for section, articles in feeds_dict.items()]
@ -513,22 +513,22 @@ class Economist(BasicNewsRecipe):
return self.economist_return_index(ans)
def economist_parse_web_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__")
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None:
data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.description = safe_dict(data, "props", "pageProps", "content", "headline")
self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "formattedIssueDate") + ']'
self.cover_url = safe_dict(data, "props", "pageProps", "content", "cover", "url").replace(
self.description = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
self.timefmt = ' [' + safe_dict(data, 'props', 'pageProps', 'content', 'formattedIssueDate') + ']'
self.cover_url = safe_dict(data, 'props', 'pageProps', 'content', 'cover', 'url').replace(
'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '')
self.log('Got cover:', self.cover_url)
feeds = []
for part in safe_dict(
data, "props", "pageProps", "content", "headerSections"
) + safe_dict(data, "props", "pageProps", "content", "sections"):
section = safe_dict(part, "name") or ''
data, 'props', 'pageProps', 'content', 'headerSections'
) + safe_dict(data, 'props', 'pageProps', 'content', 'sections'):
section = safe_dict(part, 'name') or ''
if not section:
continue
self.log(section)
@ -536,12 +536,12 @@ class Economist(BasicNewsRecipe):
articles = []
for ar in part['articles']:
title = safe_dict(ar, "headline") or ''
url = process_url(safe_dict(ar, "url") or '')
title = safe_dict(ar, 'headline') or ''
url = process_url(safe_dict(ar, 'url') or '')
if not title or not url:
continue
desc = safe_dict(ar, "rubric") or ''
sub = safe_dict(ar, "flyTitle") or ''
desc = safe_dict(ar, 'rubric') or ''
sub = safe_dict(ar, 'flyTitle') or ''
if sub and section != sub:
desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url)

View File

@ -59,7 +59,7 @@ def load_article_from_json(raw, root):
body = root.xpath('//body')[0]
article = E(body, 'article')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try:
date = data['dateModified']
@ -125,7 +125,7 @@ class EconomistNews(BasicNewsRecipe):
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "Kovid Goyal"
__author__ = 'Kovid Goyal'
description = (
'Global news and current affairs from a European'
' perspective. Get the latest articles here.'
@ -140,7 +140,7 @@ class EconomistNews(BasicNewsRecipe):
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}),
dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id': 'player'}),
dict(attrs={
'class': [
@ -234,9 +234,9 @@ class EconomistNews(BasicNewsRecipe):
articles = []
for art in part['hasPart']['parts']:
title = safe_dict(art, "title")
desc = safe_dict(art, "rubric") or ''
sub = safe_dict(art, "flyTitle") or ''
title = safe_dict(art, 'title')
desc = safe_dict(art, 'rubric') or ''
sub = safe_dict(art, 'flyTitle') or ''
if sub and section != sub:
desc = sub + ' :: ' + desc
if not art.get('text'):
@ -249,7 +249,7 @@ class EconomistNews(BasicNewsRecipe):
pt.write(json.dumps(art).encode('utf-8'))
pt.close()
url = 'file:///' + pt.name
articles.append({"title": title, "url": url, "description": desc})
articles.append({'title': title, 'url': url, 'description': desc})
self.log('\t', title, '\n\t\t', desc)
if articles:
feeds.append((section, articles))

View File

@ -23,8 +23,8 @@ def process_node(node):
return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText")
cap = ""
alt = '' if node.get('altText') is None else node.get('altText')
cap = ''
if node.get('caption'):
if node['caption'].get('textHtml') is not None:
cap = node['caption']['textHtml']
@ -112,7 +112,7 @@ class econ_search(BasicNewsRecipe):
title = 'The Economist - Search'
language = 'en'
encoding = 'utf-8'
__author__ = "unkn0wn"
__author__ = 'unkn0wn'
description = (
'Use the Advanced section of the recipe to search.'
)
@ -128,7 +128,7 @@ class econ_search(BasicNewsRecipe):
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}),
dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id':'player'}),
dict(attrs={
'class': [

View File

@ -23,8 +23,8 @@ def process_node(node):
return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText")
cap = ""
alt = '' if node.get('altText') is None else node.get('altText')
cap = ''
if node.get('caption'):
if node['caption'].get('textHtml') is not None:
cap = node['caption']['textHtml']
@ -122,7 +122,7 @@ class EconomistWorld(BasicNewsRecipe):
encoding = 'utf-8'
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
__author__ = "unkn0wn"
__author__ = 'unkn0wn'
description = (
'The World Ahead is The Economists future-gazing publication. It prepares audiences for what is to '
'come with mind-stretching insights and expert analysis—all in The Economists clear, elegant style.'
@ -136,7 +136,7 @@ class EconomistWorld(BasicNewsRecipe):
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
dict(attrs={'aria-label': "Article Teaser"}),
dict(attrs={'aria-label': 'Article Teaser'}),
dict(attrs={'id': 'player'}),
dict(attrs={
'class': [
@ -205,24 +205,24 @@ class EconomistWorld(BasicNewsRecipe):
return self.economist_return_index(ans)
def economist_parse_index(self, soup):
script_tag = soup.find("script", id="__NEXT_DATA__")
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag is not None:
data = json.loads(script_tag.string)
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
self.title = safe_dict(data, "props", "pageProps", "content", "headline")
self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
self.cover_url = 'https://mma.prnewswire.com/media/2561745/The_Economist_World_Ahead_2025_cover.jpg?w=600'
feeds = []
for coll in safe_dict(data, "props", "pageProps", "content", "components"):
section = safe_dict(coll, "headline") or ''
for coll in safe_dict(data, 'props', 'pageProps', 'content', 'components'):
section = safe_dict(coll, 'headline') or ''
self.log(section)
articles = []
for part in safe_dict(coll, "items"):
title = safe_dict(part, "headline") or ''
url = process_url(safe_dict(part, "url") or '')
desc = safe_dict(part, "rubric") or ''
sub = safe_dict(part, "flyTitle") or ''
for part in safe_dict(coll, 'items'):
title = safe_dict(part, 'headline') or ''
url = process_url(safe_dict(part, 'url') or '')
desc = safe_dict(part, 'rubric') or ''
sub = safe_dict(part, 'flyTitle') or ''
if sub and section != sub:
desc = sub + ' :: ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url)

View File

@ -164,24 +164,24 @@ class CanWestPaper(BasicNewsRecipe):
continue
break
if daysback == 7:
self.log("\nCover unavailable")
self.log('\nCover unavailable')
cover = None
return cover
def fixChars(self, string):
# Replace lsquo (\x91)
fixed = re.sub("\x91", "", string)
fixed = re.sub('\x91', '', string)
# Replace rsquo (\x92)
fixed = re.sub("\x92", "", fixed)
fixed = re.sub('\x92', '', fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93", "", fixed)
fixed = re.sub('\x93', '', fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94", "", fixed)
fixed = re.sub('\x94', '', fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96", "", fixed)
fixed = re.sub('\x96', '', fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97", "", fixed)
fixed = re.sub("&#x2019;", "", fixed)
fixed = re.sub('\x97', '', fixed)
fixed = re.sub('&#x2019;', '', fixed)
return fixed
def massageNCXText(self, description):
@ -262,10 +262,10 @@ class CanWestPaper(BasicNewsRecipe):
if url.startswith('/'):
url = self.url_prefix + url
if not url.startswith(self.url_prefix):
print("Rejected " + url)
print('Rejected ' + url)
return
if url in self.url_list:
print("Rejected dup " + url)
print('Rejected dup ' + url)
return
self.url_list.append(url)
title = self.tag_to_string(atag, False)
@ -277,8 +277,8 @@ class CanWestPaper(BasicNewsRecipe):
return
dtag = adiv.find('div', 'content')
description = ''
print("URL " + url)
print("TITLE " + title)
print('URL ' + url)
print('TITLE ' + title)
if dtag is not None:
stag = dtag.span
if stag is not None:
@ -286,18 +286,18 @@ class CanWestPaper(BasicNewsRecipe):
description = self.tag_to_string(stag, False)
else:
description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description)
print('DESCRIPTION: ' + description)
if key not in articles:
articles[key] = []
articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content=''))
def parse_web_index(key, keyurl):
print("Section: " + key + ': ' + self.url_prefix + keyurl)
print('Section: ' + key + ': ' + self.url_prefix + keyurl)
try:
soup = self.index_to_soup(self.url_prefix + keyurl)
except:
print("Section: " + key + ' NOT FOUND')
print('Section: ' + key + ' NOT FOUND')
return
ans.append(key)
mainsoup = soup.find('div', 'bodywrapper')

View File

@ -20,12 +20,12 @@ class AdvancedUserRecipe1311790237(BasicNewsRecipe):
masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
publication_type = 'newspaper'
extra_css = """
extra_css = '''
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
'''
feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'),
(u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'),

View File

@ -55,9 +55,9 @@ class RevistaElCultural(BasicNewsRecipe):
if url.startswith('/version_papel/' + titleSection + '/'):
url = 'http://www.elcultural.es' + url
self.log('\t\tFound article:', title[0:title.find("|") - 1])
self.log('\t\tFound article:', title[0:title.find('|') - 1])
self.log('\t\t\t', url)
current_articles.append({'title': title[0:title.find("|") - 1], 'url': url,
current_articles.append({'title': title[0:title.find('|') - 1], 'url': url,
'description': '', 'date': ''})
return current_articles

View File

@ -1,51 +1,51 @@
# -*- mode: python; coding: utf-8; -*-
# vim: set syntax=python fileencoding=utf-8
__license__ = "GPL v3"
__copyright__ = "2023, Tomás Di Domenico <tdido at tdido.eu>"
__license__ = 'GPL v3'
__copyright__ = '2023, Tomás Di Domenico <tdido at tdido.eu>'
"""
'''
www.eldiplo.org
"""
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElDiplo2023(BasicNewsRecipe):
title = "Le Monde Diplomatique - cono sur"
__author__ = "Tomás Di Domenico"
description = "Publicación de Le Monde Diplomatique para el cono sur."
publisher = "Capital Intelectual"
category = "News, Politics, Argentina, Uruguay, Paraguay, South America, World"
title = 'Le Monde Diplomatique - cono sur'
__author__ = 'Tomás Di Domenico'
description = 'Publicación de Le Monde Diplomatique para el cono sur.'
publisher = 'Capital Intelectual'
category = 'News, Politics, Argentina, Uruguay, Paraguay, South America, World'
oldest_article = 31
no_stylesheets = True
encoding = "utf8"
encoding = 'utf8'
use_embedded_content = False
language = "es_AR"
language = 'es_AR'
remove_empty_feeds = True
publication_type = "magazine"
publication_type = 'magazine'
delay = 1
simultaneous_downloads = 1
timeout = 8
needs_subscription = True
ignore_duplicate_articles = {"url"}
ignore_duplicate_articles = {'url'}
temp_files = []
fetch_retries = 10
handle_gzip = True
compress_news_images = True
scale_news_images_to_device = True
masthead_url = (
"https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png"
'https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png'
)
INDEX = "https://www.eldiplo.org/"
INDEX = 'https://www.eldiplo.org/'
conversion_options = {"series": "El Dipló", "publisher": publisher, "base_font_size": 8, "tags": category}
conversion_options = {'series': 'El Dipló', 'publisher': publisher, 'base_font_size': 8, 'tags': category}
keep_only_tags = [dict(name=["article"])]
keep_only_tags = [dict(name=['article'])]
remove_tags = [dict(name=["button"])]
remove_tags = [dict(name=['button'])]
extra_css = """
extra_css = '''
.entry-title {
text-align: center;
}
@ -67,59 +67,59 @@ class ElDiplo2023(BasicNewsRecipe):
padding-left: 10%;
padding-right: 10%;
}
"""
'''
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.select_form(id="loginform")
br["log"] = self.username
br["pwd"] = self.password
br.select_form(id='loginform')
br['log'] = self.username
br['pwd'] = self.password
br.submit()
return br
def get_cover_url(self):
soup_index = self.index_to_soup(self.INDEX)
tag_sumario = soup_index.find("span", text="Sumario")
url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"]
tag_sumario = soup_index.find('span', text='Sumario')
url_sumario = 'https://www.eldiplo.org' + tag_sumario.parent['href']
soup = self.index_to_soup(url_sumario)
container = soup.find("div", class_="px-16")
url = container.find("img")["src"]
container = soup.find('div', class_='px-16')
url = container.find('img')['src']
return getattr(self, "cover_url", url)
return getattr(self, 'cover_url', url)
def _process_article(self, article):
url = article.find("a", href=True, attrs={"class": "title"})["href"]
title = self.tag_to_string(article).replace("Editorial", "Editorial: ")
url = article.find('a', href=True, attrs={'class': 'title'})['href']
title = self.tag_to_string(article).replace('Editorial', 'Editorial: ')
try:
title, authors = title.split(", por")
authors = f"por {authors}"
title, authors = title.split(', por')
authors = f'por {authors}'
except ValueError:
authors = ""
self.log("title: ", title, " url: ", url)
return {"title": title, "url": url, "description": authors, "date": ""}
authors = ''
self.log('title: ', title, ' url: ', url)
return {'title': title, 'url': url, 'description': authors, 'date': ''}
def preprocess_html(self, soup):
font_size = "90%"
font_size = '90%'
# make the footnotes smaller
for p in soup.find("div", id="nota_pie").findChildren("p", recursive=False):
p["style"] = f"font-size: {font_size};"
for p in soup.find('div', id='nota_pie').findChildren('p', recursive=False):
p['style'] = f'font-size: {font_size};'
return soup
def parse_index(self):
soup_index = self.index_to_soup(self.INDEX)
tag_sumario = soup_index.find("span", text="Sumario")
tag_sumario = soup_index.find('span', text='Sumario')
if tag_sumario is None:
return None
url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"]
url_sumario = 'https://www.eldiplo.org' + tag_sumario.parent['href']
self.log(url_sumario)
soup_sumario = self.index_to_soup(url_sumario)
@ -128,20 +128,20 @@ class ElDiplo2023(BasicNewsRecipe):
articles = []
dossiers = []
sumario = soup_sumario.find("div", class_="sumario")
sumario = soup_sumario.find('div', class_='sumario')
for section in sumario.find_all("div", recursive=False):
classes = section.attrs["class"]
for section in sumario.find_all('div', recursive=False):
classes = section.attrs['class']
if "dossier" in classes:
dtitle = self.tag_to_string(section.find("h3"))
if 'dossier' in classes:
dtitle = self.tag_to_string(section.find('h3'))
darticles = []
for article in section.find_all("div", recursive=False):
for article in section.find_all('div', recursive=False):
darticles.append(self._process_article(article))
dossiers.append((dtitle, darticles))
else:
articles.append(self._process_article(section))
feeds.append(("Artículos", articles))
feeds.append(('Artículos', articles))
feeds += dossiers
return feeds

View File

@ -119,11 +119,11 @@ div.a_md_a {text-align: center; text-transform: uppercase; font-size: .8rem;}
try:
br.open(cover)
except:
self.log("\nCover unavailable")
self.log('\nCover unavailable')
cover = None
return cover
def image_url_processor(cls, baseurl, url):
splitUrl = url.split("cloudfront-")
splitUrl = url.split('cloudfront-')
parsedUrl = 'https://cloudfront-' + splitUrl[1]
return parsedUrl

View File

@ -36,7 +36,7 @@ class ElPaisBabelia(BasicNewsRecipe):
title = self.tag_to_string(post)
if str(post).find('class=') > 0:
klass = post['class']
if klass != "":
if klass != '':
self.log()
self.log('--> post: ', post)
self.log('--> url: ', url)

View File

@ -28,12 +28,12 @@ class elcohetealaluna(BasicNewsRecipe):
compress_news_images = True
masthead_url = 'https://www.elcohetealaluna.com/wp-content/uploads/2018/06/logo-menu.png'
extra_css = """
extra_css = '''
body{font-family: Georgia, Times, "Times New Roman", serif}
h1,h2,.post-author-name{font-family: Oswald, sans-serif}
h2{color: gray}
img{margin-top:1em; margin-bottom: 1em; display:block}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -28,10 +28,10 @@ class ElCronistaArg(BasicNewsRecipe):
auto_cleanup_keep = '//div[@class="header-bottom"] | //h1 | //h2'
ignore_duplicate_articles = {'url'}
masthead_url = 'https://www.cronista.com/export/sites/diarioelcronista/arte/v2/lg_cronista_footer.png_665574830.png'
extra_css = """
extra_css = '''
body{font-family: 'Source Sans Pro', sans-serif}
h1,h2,h3,h4{font-family: 'Libre Baskerville', serif}
"""
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language

View File

@ -29,5 +29,5 @@ class Elektroda(BasicNewsRecipe):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
article.title = article.title[article.title.find("::") + 3:]
article.title = article.title[article.title.find('::') + 3:]
return feeds

View File

@ -35,14 +35,14 @@ class ElMundo(BasicNewsRecipe):
articles_are_obfuscated = True
auto_cleanup = True
temp_files = []
extra_css = """
extra_css = '''
body{font-family: "PT serif",Georgia,serif,times}
.metadata_noticia{font-size: small}
.pestana_GDP{font-size: small; font-weight:bold}
h1 {color: #333333; font-family: "Clear Sans Bold",Arial,sans-serif,helvetica}
.hora{color: red}
.update{color: gray}
"""
'''
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
@ -83,14 +83,14 @@ class ElMundo(BasicNewsRecipe):
cover = self.masthead_url
st = time.localtime()
year = str(st.tm_year)
month = "%.2d" % st.tm_mon
day = "%.2d" % st.tm_mday
month = '%.2d' % st.tm_mon
day = '%.2d' % st.tm_mday
cover = 'http://img.kiosko.net/' + year + '/' + \
month + '/' + day + '/es/elmundo.750.jpg'
try:
self.browser.open(cover)
except:
self.log("\nPortada no disponible")
self.log('\nPortada no disponible')
return cover
def get_obfuscated_article(self, url):
@ -103,7 +103,7 @@ class ElMundo(BasicNewsRecipe):
html = response.read()
count = tries
except:
print("Retrying download...")
print('Retrying download...')
count += 1
if html is not None:
tfile = PersistentTemporaryFile('_fa.html')

View File

@ -66,7 +66,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
def preprocess_html(self, soup):
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')])
soup.head.insert(0, mcharset)
for item in soup.findAll(style=True):
del item['style']

View File

@ -18,18 +18,18 @@ class En_Globes_Recipe(BasicNewsRecipe):
max_articles_per_feed = 100
feeds = [
(u"Main Headlines", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942"),
(u"Israeli stocks on Wall Street", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392"),
(u"All news", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725"),
(u"Macro economics", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389"),
(u"Aerospace and defense", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380"),
(u"Real estate", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385"),
(u"Energy and water", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382"),
(u"Start-ups and venture capital", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397"),
(u"Financial services", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383"),
(u"Tel Aviv markets", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404"),
(u"Healthcare", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377"),
(u"Telecommunications", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386"),
(u"Information technology", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376"),
(u"Transport and infrastructure", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388"),
(u'Main Headlines', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942'),
(u'Israeli stocks on Wall Street', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392'),
(u'All news', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725'),
(u'Macro economics', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389'),
(u'Aerospace and defense', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380'),
(u'Real estate', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385'),
(u'Energy and water', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382'),
(u'Start-ups and venture capital', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397'),
(u'Financial services', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383'),
(u'Tel Aviv markets', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404'),
(u'Healthcare', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377'),
(u'Telecommunications', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386'),
(u'Information technology', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376'),
(u'Transport and infrastructure', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388'),
]

View File

@ -87,8 +87,8 @@ class Engadget(BasicNewsRecipe):
except KeyError:
continue
# Reorder the "title" and "content" elements
title_div = soup.find("div", {"class": "caas-title-wrapper"})
content_div = soup.find("div", {"class": "caas-content-wrapper"})
title_div = soup.find('div', {'class': 'caas-title-wrapper'})
content_div = soup.find('div', {'class': 'caas-content-wrapper'})
if title_div and content_div:
soup.body.clear()
soup.body.append(title_div)

Some files were not shown because too many files have changed in this diff Show More