mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-05 08:40:13 -04:00
uniform string quote (auto-fix)
ruff 'Q'
This commit is contained in:
parent
2357c1fc48
commit
37771022ce
@ -240,14 +240,14 @@ def generate_ebook_convert_help(preamble, app):
|
||||
parser, plumber = create_option_parser(['ebook-convert',
|
||||
'dummyi.'+sorted(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
|
||||
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
||||
parser.option_groups if g.title == "INPUT OPTIONS"]
|
||||
parser.option_groups if g.title == 'INPUT OPTIONS']
|
||||
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
||||
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
||||
for pl in sorted(output_format_plugins(), key=lambda x: x.name):
|
||||
parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
|
||||
'dummyi.'+pl.file_type, '-h'], default_log)
|
||||
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
||||
parser.option_groups if g.title == "OUTPUT OPTIONS"]
|
||||
parser.option_groups if g.title == 'OUTPUT OPTIONS']
|
||||
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
||||
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
||||
|
||||
|
@ -55,7 +55,7 @@ class DemoDialog(QDialog):
|
||||
self.l.addWidget(self.view_button)
|
||||
|
||||
self.update_metadata_button = QPushButton(
|
||||
'Update metadata in a book\'s files', self)
|
||||
"Update metadata in a book's files", self)
|
||||
self.update_metadata_button.clicked.connect(self.update_metadata)
|
||||
self.l.addWidget(self.update_metadata_button)
|
||||
|
||||
|
@ -61,7 +61,7 @@ if use_archive:
|
||||
body = root.xpath('//body')[0]
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||
try:
|
||||
date = data['dateModified']
|
||||
@ -157,7 +157,7 @@ class Economist(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||
|
||||
__author__ = "Kovid Goyal"
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = (
|
||||
'Published since September 1843 to take part in “a severe contest between intelligence, which presses forward, and '
|
||||
'an unworthy, timid ignorance obstructing our progress.”'
|
||||
@ -170,7 +170,7 @@ class Economist(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
|
||||
dict(attrs={'aria-label': "Article Teaser"}),
|
||||
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||
dict(attrs={
|
||||
'class': [
|
||||
'dblClkTrk', 'ec-article-info', 'share_inline_header',
|
||||
@ -224,7 +224,7 @@ class Economist(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
# return self.economist_test_article()
|
||||
soup = self.index_to_soup('https://www.economist.com/hidden-content/1843magazine-hub')
|
||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
||||
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||
if script_tag is None:
|
||||
raise ValueError('No script tag with JSON data found in the weeklyedition archive')
|
||||
data = json.loads(script_tag.string)
|
||||
@ -247,20 +247,20 @@ class Economist(BasicNewsRecipe):
|
||||
self.description = data['description']
|
||||
|
||||
feeds_dict = defaultdict(list)
|
||||
for part in safe_dict(data, "hasPart", "parts"):
|
||||
for part in safe_dict(data, 'hasPart', 'parts'):
|
||||
section = part['title']
|
||||
self.log(section)
|
||||
for art in safe_dict(part, "hasPart", "parts"):
|
||||
title = safe_dict(art, "title")
|
||||
desc = safe_dict(art, "rubric") or ''
|
||||
sub = safe_dict(art, "flyTitle") or ''
|
||||
for art in safe_dict(part, 'hasPart', 'parts'):
|
||||
title = safe_dict(art, 'title')
|
||||
desc = safe_dict(art, 'rubric') or ''
|
||||
sub = safe_dict(art, 'flyTitle') or ''
|
||||
if sub and section != sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
pt = PersistentTemporaryFile('.html')
|
||||
pt.write(json.dumps(art).encode('utf-8'))
|
||||
pt.close()
|
||||
url = 'file:///' + pt.name
|
||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
|
||||
self.log('\t', title, '\n\t\t', desc)
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
|
||||
@ -311,26 +311,26 @@ class Economist(BasicNewsRecipe):
|
||||
return ans
|
||||
|
||||
def economist_parse_index(self, soup):
|
||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
||||
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||
if script_tag is not None:
|
||||
data = json.loads(script_tag.string)
|
||||
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
|
||||
self.title = safe_dict(data, "props", "pageProps", "content", "headline")
|
||||
self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
|
||||
# self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600'
|
||||
|
||||
feeds = []
|
||||
|
||||
for coll in safe_dict(data, "props", "pageProps", "content", "collections"):
|
||||
section = safe_dict(coll, "headline") or ''
|
||||
for coll in safe_dict(data, 'props', 'pageProps', 'content', 'collections'):
|
||||
section = safe_dict(coll, 'headline') or ''
|
||||
self.log(section)
|
||||
articles = []
|
||||
for part in safe_dict(coll, "hasPart", "parts"):
|
||||
title = safe_dict(part, "headline") or ''
|
||||
url = safe_dict(part, "url", "canonical") or ''
|
||||
for part in safe_dict(coll, 'hasPart', 'parts'):
|
||||
title = safe_dict(part, 'headline') or ''
|
||||
url = safe_dict(part, 'url', 'canonical') or ''
|
||||
if not title or not url:
|
||||
continue
|
||||
desc = safe_dict(part, "description") or ''
|
||||
sub = safe_dict(part, "subheadline") or ''
|
||||
desc = safe_dict(part, 'description') or ''
|
||||
sub = safe_dict(part, 'subheadline') or ''
|
||||
if sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
|
@ -47,11 +47,11 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa: E501
|
||||
]
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
"""
|
||||
'''
|
||||
|
||||
preprocess_regexps = [(re.compile(
|
||||
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
||||
|
@ -28,7 +28,7 @@ class DrawAndCook(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [
|
||||
("They Draw and Cook", "http://www.theydrawandcook.com/")
|
||||
('They Draw and Cook', 'http://www.theydrawandcook.com/')
|
||||
]:
|
||||
articles = self.make_links(url)
|
||||
if articles:
|
||||
|
@ -5,11 +5,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheMITPressReader(BasicNewsRecipe):
|
||||
title = "The MIT Press Reader"
|
||||
title = 'The MIT Press Reader'
|
||||
__author__ = 'yodha8'
|
||||
language = 'en'
|
||||
description = ("Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors."
|
||||
" This recipe pulls articles from the past 7 days.")
|
||||
description = ('Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors.'
|
||||
' This recipe pulls articles from the past 7 days.')
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
@ -47,13 +47,13 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
|
||||
h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
|
||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
"""
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
|
||||
|
@ -28,6 +28,6 @@ class Acrimed(BasicNewsRecipe):
|
||||
lambda m: '<title>' + m.group(1) + '</title>'),
|
||||
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
.chapo{font-style:italic; margin: 1em 0 0.5em}
|
||||
"""
|
||||
'''
|
||||
|
@ -21,7 +21,7 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.adventuregamers.com'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
||||
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
|
||||
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
|
||||
@ -32,7 +32,7 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
.score_header{font-size: large; color: #50544A}
|
||||
img{margin-bottom: 1em;}
|
||||
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -14,7 +14,7 @@ class afr(BasicNewsRecipe):
|
||||
description = (
|
||||
'For more than 65 years The Australian Financial Review has been the authority on business,'
|
||||
' finance and investment news in Australia. It has a reputation for independent, award-winning '
|
||||
'journalism and is essential reading for Australia\'s business and investor community.'
|
||||
"journalism and is essential reading for Australia's business and investor community."
|
||||
)
|
||||
masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png'
|
||||
encoding = 'utf-8'
|
||||
|
@ -36,9 +36,9 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
|
||||
'''
|
||||
|
||||
def default_cover(self, cover_file):
|
||||
"""
|
||||
'''
|
||||
Crée une couverture personnalisée avec le logo
|
||||
"""
|
||||
'''
|
||||
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
||||
|
||||
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
||||
@ -54,7 +54,7 @@ class AfriqueXXIRecipe(BasicNewsRecipe):
|
||||
|
||||
weekday = french_weekday[wkd]
|
||||
month = french_month[today.month]
|
||||
date_str = f"{weekday} {today.day} {month} {today.year}"
|
||||
date_str = f'{weekday} {today.day} {month} {today.year}'
|
||||
edition = today.strftime('Édition de %Hh')
|
||||
|
||||
# Image de base
|
||||
|
@ -21,9 +21,9 @@ class AlJazeera(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Arial,sans-serif}
|
||||
"""
|
||||
'''
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category,
|
||||
'publisher': publisher, 'language': language
|
||||
|
@ -110,7 +110,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
title = title[0:120] + '...'
|
||||
href = link.get('href')
|
||||
if not href:
|
||||
self._p("BAD HREF: " + str(link))
|
||||
self._p('BAD HREF: ' + str(link))
|
||||
return
|
||||
self.queue_article_link(section, href, title)
|
||||
|
||||
@ -158,7 +158,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
|
||||
age = (datetime.datetime.now() - date).days
|
||||
if (age > self.oldest_article):
|
||||
return "too old"
|
||||
return 'too old'
|
||||
return False
|
||||
|
||||
def scrape_article_date(self, soup):
|
||||
@ -174,7 +174,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
def date_from_string(self, datestring):
|
||||
try:
|
||||
# eg: Posted September 17, 2014
|
||||
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y")
|
||||
dt = datetime.datetime.strptime(datestring, 'Posted %B %d, %Y')
|
||||
except:
|
||||
dt = None
|
||||
|
||||
|
@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AlbertMohlersBlog(BasicNewsRecipe):
|
||||
title = u'Albert Mohler\'s Blog'
|
||||
title = u"Albert Mohler's Blog"
|
||||
__author__ = 'Peter Grungi'
|
||||
language = 'en'
|
||||
oldest_article = 90
|
||||
@ -16,5 +16,5 @@ class AlbertMohlersBlog(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
author = 'Albert Mohler'
|
||||
|
||||
feeds = [(u'Albert Mohler\'s Blog',
|
||||
feeds = [(u"Albert Mohler's Blog",
|
||||
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
||||
|
@ -43,7 +43,7 @@ class ALD(BasicNewsRecipe):
|
||||
# Extract a list of dates from the page.
|
||||
# Subset this out to the list of target dates for extraction.
|
||||
date_list = []
|
||||
for div in soup.findAll('div', attrs={'id': "dayheader"}):
|
||||
for div in soup.findAll('div', attrs={'id': 'dayheader'}):
|
||||
date_list.append(self.tag_to_string(div))
|
||||
date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
|
||||
date_list_bool = [
|
||||
@ -54,14 +54,14 @@ class ALD(BasicNewsRecipe):
|
||||
|
||||
# Process each paragraph one by one.
|
||||
# Stop when the text of the previous div is not in the target date list.
|
||||
for div in soup.findAll('div', attrs={'class': "mobile-front"}):
|
||||
for div in soup.findAll('div', attrs={'class': 'mobile-front'}):
|
||||
for p in div.findAll('p'):
|
||||
if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
|
||||
if p.find('a'):
|
||||
title = self.tag_to_string(p)
|
||||
link = p.find('a')['href']
|
||||
if self.tag_to_string(p.findPreviousSibling('h3')
|
||||
) == "Articles of Note":
|
||||
) == 'Articles of Note':
|
||||
articles_note.append({
|
||||
'title': title,
|
||||
'url': link,
|
||||
@ -69,7 +69,7 @@ class ALD(BasicNewsRecipe):
|
||||
'date': ''
|
||||
})
|
||||
elif self.tag_to_string(p.findPreviousSibling('h3')
|
||||
) == "New Books":
|
||||
) == 'New Books':
|
||||
new_books.append({
|
||||
'title': title,
|
||||
'url': link,
|
||||
|
@ -38,7 +38,7 @@ class AlternativesEconomiques(BasicNewsRecipe):
|
||||
self.log('Cover URL found:', cover_url)
|
||||
return cover_url
|
||||
|
||||
self.log('Aucune couverture trouvée, utilisation de l\'image par défaut')
|
||||
self.log("Aucune couverture trouvée, utilisation de l'image par défaut")
|
||||
return 'https://www.alternatives-economiques.fr/sites/all/themes/alternatives-economiques-main/assets/logo-alternatives-economiques.svg'
|
||||
|
||||
except Exception as e:
|
||||
|
@ -58,7 +58,7 @@ class AM730(BasicNewsRecipe):
|
||||
articles = []
|
||||
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
|
||||
href = aTag.get('href',False)
|
||||
if not href.encode("utf-8").startswith(url.encode("utf-8")) :
|
||||
if not href.encode('utf-8').startswith(url.encode('utf-8')) :
|
||||
continue # not in same section
|
||||
|
||||
title = href.split('/')[-1].split('-')[0]
|
||||
|
@ -28,9 +28,9 @@ class Ambito(BasicNewsRecipe):
|
||||
language = 'es_AR'
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Roboto, sans-serif}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description,
|
||||
|
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AmericanThinker(BasicNewsRecipe):
|
||||
title = u'American Thinker'
|
||||
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
|
||||
description = 'American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans.'
|
||||
__author__ = 'Walt Anthony'
|
||||
publisher = 'Thomas Lifson'
|
||||
category = 'news, politics, USA'
|
||||
|
@ -39,4 +39,4 @@ class anan(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
|
||||
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update
|
||||
return url.replace('/show/', '/print/') # 2014-02-27 AGE: update
|
||||
|
@ -12,7 +12,7 @@ class ancientegypt(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
__author__ = 'unkn0wn'
|
||||
description = (
|
||||
'Ancient Egypt is the world\'s leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. '
|
||||
"Ancient Egypt is the world's leading Egyptology magazine, exploring the history, people and culture of the Nile Valley. "
|
||||
'Now in a larger format with a fresh new design, AE brings you the latest news and discoveries, and feature articles covering '
|
||||
'more than 5000 years of Egyptian history. Published bimonthly.'
|
||||
)
|
||||
|
@ -75,7 +75,7 @@ class andhra(BasicNewsRecipe):
|
||||
url = str(snaps['OrgId'])
|
||||
if snaps['ObjectType'] == 4:
|
||||
continue
|
||||
feeds_dict[section].append({"title": '', "url": url})
|
||||
feeds_dict[section].append({'title': '', 'url': url})
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
|
||||
def preprocess_raw_html(self, raw, *a):
|
||||
|
@ -75,7 +75,7 @@ class andhra(BasicNewsRecipe):
|
||||
url = str(snaps['OrgId'])
|
||||
if snaps['ObjectType'] == 4:
|
||||
continue
|
||||
feeds_dict[section].append({"title": '', "url": url})
|
||||
feeds_dict[section].append({'title': '', 'url': url})
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
|
||||
def preprocess_raw_html(self, raw, *a):
|
||||
|
@ -66,19 +66,19 @@ class Arcamax(BasicNewsRecipe):
|
||||
# (u"9 Chickweed Lane", u"https://www.arcamax.com/thefunnies/ninechickweedlane"),
|
||||
# (u"Agnes", u"https://www.arcamax.com/thefunnies/agnes"),
|
||||
# (u"Andy Capp", u"https://www.arcamax.com/thefunnies/andycapp"),
|
||||
(u"BC", u"https://www.arcamax.com/thefunnies/bc"),
|
||||
(u'BC', u'https://www.arcamax.com/thefunnies/bc'),
|
||||
# (u"Baby Blues", u"https://www.arcamax.com/thefunnies/babyblues"),
|
||||
# (u"Beetle Bailey", u"https://www.arcamax.com/thefunnies/beetlebailey"),
|
||||
(u"Blondie", u"https://www.arcamax.com/thefunnies/blondie"),
|
||||
(u'Blondie', u'https://www.arcamax.com/thefunnies/blondie'),
|
||||
# u"Boondocks", u"https://www.arcamax.com/thefunnies/boondocks"),
|
||||
# (u"Cathy", u"https://www.arcamax.com/thefunnies/cathy"),
|
||||
# (u"Daddys Home", u"https://www.arcamax.com/thefunnies/daddyshome"),
|
||||
# (u"Dinette Set", u"https://www.arcamax.com/thefunnies/thedinetteset"),
|
||||
(u"Dog Eat Doug", u"https://www.arcamax.com/thefunnies/dogeatdoug"),
|
||||
(u'Dog Eat Doug', u'https://www.arcamax.com/thefunnies/dogeatdoug'),
|
||||
# (u"Doonesbury", u"https://www.arcamax.com/thefunnies/doonesbury"),
|
||||
# (u"Dustin", u"https://www.arcamax.com/thefunnies/dustin"),
|
||||
(u"Family Circus", u"https://www.arcamax.com/thefunnies/familycircus"),
|
||||
(u"Garfield", u"https://www.arcamax.com/thefunnies/garfield"),
|
||||
(u'Family Circus', u'https://www.arcamax.com/thefunnies/familycircus'),
|
||||
(u'Garfield', u'https://www.arcamax.com/thefunnies/garfield'),
|
||||
# (u"Get Fuzzy", u"https://www.arcamax.com/thefunnies/getfuzzy"),
|
||||
# (u"Girls and Sports", u"https://www.arcamax.com/thefunnies/girlsandsports"),
|
||||
# (u"Hagar the Horrible", u"https://www.arcamax.com/thefunnies/hagarthehorrible"),
|
||||
@ -87,16 +87,16 @@ class Arcamax(BasicNewsRecipe):
|
||||
# (u"Luann", u"https://www.arcamax.com/thefunnies/luann"),
|
||||
# (u"Momma", u"https://www.arcamax.com/thefunnies/momma"),
|
||||
# (u"Mother Goose and Grimm", u"https://www.arcamax.com/thefunnies/mothergooseandgrimm"),
|
||||
(u"Mutts", u"https://www.arcamax.com/thefunnies/mutts"),
|
||||
(u'Mutts', u'https://www.arcamax.com/thefunnies/mutts'),
|
||||
# (u"Non Sequitur", u"https://www.arcamax.com/thefunnies/nonsequitur"),
|
||||
# (u"Pearls Before Swine", u"https://www.arcamax.com/thefunnies/pearlsbeforeswine"),
|
||||
# (u"Pickles", u"https://www.arcamax.com/thefunnies/pickles"),
|
||||
# (u"Red and Rover", u"https://www.arcamax.com/thefunnies/redandrover"),
|
||||
# (u"Rubes", u"https://www.arcamax.com/thefunnies/rubes"),
|
||||
# (u"Rugrats", u"https://www.arcamax.com/thefunnies/rugrats"),
|
||||
(u"Speed Bump", u"https://www.arcamax.com/thefunnies/speedbump"),
|
||||
(u"Wizard of Id", u"https://www.arcamax.com/thefunnies/wizardofid"),
|
||||
(u"Zits", u"https://www.arcamax.com/thefunnies/zits"),
|
||||
(u'Speed Bump', u'https://www.arcamax.com/thefunnies/speedbump'),
|
||||
(u'Wizard of Id', u'https://www.arcamax.com/thefunnies/wizardofid'),
|
||||
(u'Zits', u'https://www.arcamax.com/thefunnies/zits'),
|
||||
]:
|
||||
self.log('Finding strips for:', title)
|
||||
articles = self.make_links(url, title)
|
||||
|
@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ArretSurImages(BasicNewsRecipe):
|
||||
title = 'Arrêt sur Images'
|
||||
description = 'Site français d\'analyse des médias'
|
||||
description = "Site français d'analyse des médias"
|
||||
language = 'fr'
|
||||
encoding = 'utf-8'
|
||||
needs_subscription = True
|
||||
@ -27,9 +27,9 @@ class ArretSurImages(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def default_cover(self, cover_file):
|
||||
"""
|
||||
'''
|
||||
Crée une couverture personnalisée avec le logo ASI
|
||||
"""
|
||||
'''
|
||||
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
||||
|
||||
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
||||
@ -45,7 +45,7 @@ class ArretSurImages(BasicNewsRecipe):
|
||||
|
||||
weekday = french_weekday[wkd]
|
||||
month = french_month[today.month]
|
||||
date_str = f"{weekday} {today.day} {month} {today.year}"
|
||||
date_str = f'{weekday} {today.day} {month} {today.year}'
|
||||
edition = today.strftime('Édition de %Hh')
|
||||
|
||||
img = QImage(1400, 1920, QImage.Format_RGB888)
|
||||
@ -123,9 +123,9 @@ class ArretSurImages(BasicNewsRecipe):
|
||||
br.addheaders += [('Authorization', f'Bearer {auth_response["access_token"]}')]
|
||||
print('Authentification réussie')
|
||||
else:
|
||||
print('Échec de l\'authentification - Vérifiez vos identifiants')
|
||||
print("Échec de l'authentification - Vérifiez vos identifiants")
|
||||
except Exception as e:
|
||||
print(f'Erreur lors de l\'authentification: {str(e)}')
|
||||
print(f"Erreur lors de l'authentification: {str(e)}")
|
||||
return br
|
||||
|
||||
def get_article_url(self, article):
|
||||
|
@ -1,12 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Albert Aparicio Isarn <aaparicio at posteo.net>'
|
||||
|
||||
"""
|
||||
'''
|
||||
https://www.asahi.com/ajw/
|
||||
"""
|
||||
'''
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
@ -14,99 +14,99 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||
title = "The Asahi Shimbun"
|
||||
__author__ = "Albert Aparicio Isarn"
|
||||
title = 'The Asahi Shimbun'
|
||||
__author__ = 'Albert Aparicio Isarn'
|
||||
|
||||
description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan."
|
||||
" The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive"
|
||||
" coverage of cool Japan,focusing on manga, travel and other timely news.")
|
||||
publisher = "The Asahi Shimbun Company"
|
||||
publication_type = "newspaper"
|
||||
category = "news, japan"
|
||||
language = "en_JP"
|
||||
description = ('The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan.'
|
||||
' The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive'
|
||||
' coverage of cool Japan,focusing on manga, travel and other timely news.')
|
||||
publisher = 'The Asahi Shimbun Company'
|
||||
publication_type = 'newspaper'
|
||||
category = 'news, japan'
|
||||
language = 'en_JP'
|
||||
|
||||
index = "https://www.asahi.com"
|
||||
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png"
|
||||
index = 'https://www.asahi.com'
|
||||
masthead_url = 'https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png'
|
||||
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = {"id": "MainInner"}
|
||||
remove_tags_after = {"class": "ArticleText"}
|
||||
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}]
|
||||
remove_tags_before = {'id': 'MainInner'}
|
||||
remove_tags_after = {'class': 'ArticleText'}
|
||||
remove_tags = [{'name': 'div', 'class': 'SnsUtilityArea'}]
|
||||
|
||||
def get_whats_new(self):
|
||||
soup = self.index_to_soup(self.index + "/ajw/new")
|
||||
news_section = soup.find("div", attrs={"class": "specialList"})
|
||||
soup = self.index_to_soup(self.index + '/ajw/new')
|
||||
news_section = soup.find('div', attrs={'class': 'specialList'})
|
||||
|
||||
new_news = []
|
||||
|
||||
for item in news_section.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
for item in news_section.findAll('li'):
|
||||
title = item.find('p', attrs={'class': 'title'}).string
|
||||
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||
date = date_string.strip()
|
||||
url = self.index + item.find("a")["href"]
|
||||
url = self.index + item.find('a')['href']
|
||||
|
||||
new_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
'title': title,
|
||||
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||
'url': url,
|
||||
'description': '',
|
||||
}
|
||||
)
|
||||
|
||||
return new_news
|
||||
|
||||
def get_top6(self, soup):
|
||||
top = soup.find("ul", attrs={"class": "top6"})
|
||||
top = soup.find('ul', attrs={'class': 'top6'})
|
||||
|
||||
top6_news = []
|
||||
|
||||
for item in top.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
for item in top.findAll('li'):
|
||||
title = item.find('p', attrs={'class': 'title'}).string
|
||||
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||
date = date_string.strip()
|
||||
url = self.index + item.find("a")["href"]
|
||||
url = self.index + item.find('a')['href']
|
||||
|
||||
top6_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
'title': title,
|
||||
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||
'url': url,
|
||||
'description': '',
|
||||
}
|
||||
)
|
||||
|
||||
return top6_news
|
||||
|
||||
def get_section_news(self, soup):
|
||||
news_grid = soup.find("ul", attrs={"class": "default"})
|
||||
news_grid = soup.find('ul', attrs={'class': 'default'})
|
||||
|
||||
news = []
|
||||
|
||||
for item in news_grid.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
for item in news_grid.findAll('li'):
|
||||
title = item.find('p', attrs={'class': 'title'}).string
|
||||
date_string = item.find('p', attrs={'class': 'date'}).next
|
||||
date = date_string.strip()
|
||||
|
||||
url = self.index + item.find("a")["href"]
|
||||
url = self.index + item.find('a')['href']
|
||||
|
||||
news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
'title': title,
|
||||
'date': datetime.strptime(date, '%B %d, %Y').strftime('%Y/%m/%d'),
|
||||
'url': url,
|
||||
'description': '',
|
||||
}
|
||||
)
|
||||
|
||||
return news
|
||||
|
||||
def get_section(self, section):
|
||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
||||
soup = self.index_to_soup(self.index + '/ajw/' + section)
|
||||
|
||||
section_news_items = self.get_top6(soup)
|
||||
section_news_items.extend(self.get_section_news(soup))
|
||||
@ -114,26 +114,26 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||
return section_news_items
|
||||
|
||||
def get_special_section(self, section):
|
||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
||||
top = soup.find("div", attrs={"class": "Section"})
|
||||
soup = self.index_to_soup(self.index + '/ajw/' + section)
|
||||
top = soup.find('div', attrs={'class': 'Section'})
|
||||
|
||||
special_news = []
|
||||
|
||||
for item in top.findAll("li"):
|
||||
item_a = item.find("a")
|
||||
for item in top.findAll('li'):
|
||||
item_a = item.find('a')
|
||||
|
||||
text_split = item_a.text.strip().split("\n")
|
||||
text_split = item_a.text.strip().split('\n')
|
||||
title = text_split[0]
|
||||
description = text_split[1].strip()
|
||||
|
||||
url = self.index + item_a["href"]
|
||||
url = self.index + item_a['href']
|
||||
|
||||
special_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": "",
|
||||
"url": url,
|
||||
"description": description,
|
||||
'title': title,
|
||||
'date': '',
|
||||
'url': url,
|
||||
'description': description,
|
||||
}
|
||||
)
|
||||
|
||||
@ -144,24 +144,24 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
("What's New", self.get_whats_new()),
|
||||
("National Report", self.get_section("national_report")),
|
||||
("Politics", self.get_section("politics")),
|
||||
("Business", self.get_section("business")),
|
||||
("Asia & World - China", self.get_section("asia_world/china")),
|
||||
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")),
|
||||
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")),
|
||||
("Asia & World - World", self.get_section("asia_world/world")),
|
||||
("Sci & Tech", self.get_section("sci_tech")),
|
||||
("Culture - Style", self.get_section("culture/style")),
|
||||
('National Report', self.get_section('national_report')),
|
||||
('Politics', self.get_section('politics')),
|
||||
('Business', self.get_section('business')),
|
||||
('Asia & World - China', self.get_section('asia_world/china')),
|
||||
('Asia & World - Korean Peninsula', self.get_section('asia_world/korean_peninsula')),
|
||||
('Asia & World - Around Asia', self.get_section('asia_world/around_asia')),
|
||||
('Asia & World - World', self.get_section('asia_world/world')),
|
||||
('Sci & Tech', self.get_section('sci_tech')),
|
||||
('Culture - Style', self.get_section('culture/style')),
|
||||
# ("Culture - Cooking", self.get_section("culture/cooking")),
|
||||
("Culture - Movies", self.get_section("culture/movies")),
|
||||
("Culture - Manga & Anime", self.get_section("culture/manga_anime")),
|
||||
("Travel", self.get_section("travel")),
|
||||
("Sports", self.get_section("sports")),
|
||||
("Opinion - Editorial", self.get_section("opinion/editorial")),
|
||||
("Opinion - Vox Populi", self.get_section("opinion/vox")),
|
||||
("Opinion - Views", self.get_section("opinion/views")),
|
||||
("Special", self.get_special_section("special")),
|
||||
('Culture - Movies', self.get_section('culture/movies')),
|
||||
('Culture - Manga & Anime', self.get_section('culture/manga_anime')),
|
||||
('Travel', self.get_section('travel')),
|
||||
('Sports', self.get_section('sports')),
|
||||
('Opinion - Editorial', self.get_section('opinion/editorial')),
|
||||
('Opinion - Vox Populi', self.get_section('opinion/vox')),
|
||||
('Opinion - Views', self.get_section('opinion/views')),
|
||||
('Special', self.get_special_section('special')),
|
||||
]
|
||||
|
||||
return feeds
|
||||
|
@ -26,11 +26,11 @@ class AsianReviewOfBooks(BasicNewsRecipe):
|
||||
publication_type = 'magazine'
|
||||
auto_cleanup = True
|
||||
masthead_url = 'https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: "Droid Serif", serif}
|
||||
.entry-title {font-family: "Playfair Display", serif}
|
||||
img {display: block}
|
||||
"""
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
|
@ -24,12 +24,12 @@ class BuenosAiresHerald(BasicNewsRecipe):
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg'
|
||||
INDEX = 'http://www.buenosairesherald.com'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
h1{font-family: Georgia,serif}
|
||||
#fecha{text-align: right; font-size: small}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -16,7 +16,7 @@ class BangkokPostRecipe(BasicNewsRecipe):
|
||||
title = u'Bangkok Post'
|
||||
publisher = u'Post Publishing PCL'
|
||||
category = u'News'
|
||||
description = u'The world\'s window to Thailand'
|
||||
description = u"The world's window to Thailand"
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
@ -8,11 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
|
||||
|
||||
|
||||
class barrons(BasicNewsRecipe):
|
||||
title = 'Barron\'s Magazine'
|
||||
title = "Barron's Magazine"
|
||||
__author__ = 'unkn0wn'
|
||||
description = (
|
||||
'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister '
|
||||
'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and '
|
||||
"Barron's is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister "
|
||||
"publication to The Wall Street Journal, Barron's covers U.S. financial information, market developments, and "
|
||||
'relevant statistics.'
|
||||
)
|
||||
language = 'en_US'
|
||||
@ -82,7 +82,7 @@ class barrons(BasicNewsRecipe):
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (YYYYMMDD format)',
|
||||
'long': 'For example, 20240722.\nIf it didn\'t work, try again later.'
|
||||
'long': "For example, 20240722.\nIf it didn't work, try again later."
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,9 +135,9 @@ class BBCNews(BasicNewsRecipe):
|
||||
|
||||
# Select / de-select the feeds you want in your ebook.
|
||||
feeds = [
|
||||
("News Home", "https://feeds.bbci.co.uk/news/rss.xml"),
|
||||
("UK", "https://feeds.bbci.co.uk/news/uk/rss.xml"),
|
||||
("World", "https://feeds.bbci.co.uk/news/world/rss.xml"),
|
||||
('News Home', 'https://feeds.bbci.co.uk/news/rss.xml'),
|
||||
('UK', 'https://feeds.bbci.co.uk/news/uk/rss.xml'),
|
||||
('World', 'https://feeds.bbci.co.uk/news/world/rss.xml'),
|
||||
# ("England", "https://feeds.bbci.co.uk/news/england/rss.xml"),
|
||||
# ("Scotland", "https://feeds.bbci.co.uk/news/scotland/rss.xml"),
|
||||
# ("Wales", "https://feeds.bbci.co.uk/news/wales/rss.xml"),
|
||||
@ -147,26 +147,26 @@ class BBCNews(BasicNewsRecipe):
|
||||
# ("Europe", "https://feeds.bbci.co.uk/news/world/europe/rss.xml"),
|
||||
# ("Latin America", "https://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
|
||||
# ("Middle East", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
|
||||
("US & Canada", "https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
|
||||
("Politics", "https://feeds.bbci.co.uk/news/politics/rss.xml"),
|
||||
("Science/Environment",
|
||||
"https://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
|
||||
("Technology", "https://feeds.bbci.co.uk/news/technology/rss.xml"),
|
||||
("Magazine", "https://feeds.bbci.co.uk/news/magazine/rss.xml"),
|
||||
("Entertainment/Arts",
|
||||
"https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
|
||||
('US & Canada', 'https://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml'),
|
||||
('Politics', 'https://feeds.bbci.co.uk/news/politics/rss.xml'),
|
||||
('Science/Environment',
|
||||
'https://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
|
||||
('Technology', 'https://feeds.bbci.co.uk/news/technology/rss.xml'),
|
||||
('Magazine', 'https://feeds.bbci.co.uk/news/magazine/rss.xml'),
|
||||
('Entertainment/Arts',
|
||||
'https://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml'),
|
||||
# ("Health", "https://feeds.bbci.co.uk/news/health/rss.xml"),
|
||||
# ("Education/Family", "https://feeds.bbci.co.uk/news/education/rss.xml"),
|
||||
("Business", "https://feeds.bbci.co.uk/news/business/rss.xml"),
|
||||
("Special Reports", "https://feeds.bbci.co.uk/news/special_reports/rss.xml"),
|
||||
("Also in the News", "https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
|
||||
('Business', 'https://feeds.bbci.co.uk/news/business/rss.xml'),
|
||||
('Special Reports', 'https://feeds.bbci.co.uk/news/special_reports/rss.xml'),
|
||||
('Also in the News', 'https://feeds.bbci.co.uk/news/also_in_the_news/rss.xml'),
|
||||
# ("Newsbeat", "https://www.bbc.co.uk/newsbeat/rss.xml"),
|
||||
# ("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
|
||||
# ("Blog: Mark D'Arcy (Parliamentary Correspondent)", "https://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
|
||||
# ("Blog: Robert Peston (Business Editor)", "https://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
|
||||
# ("Blog: Stephanie Flanders (Economics Editor)", "https://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
|
||||
("Sport Front Page",
|
||||
"http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
|
||||
('Sport Front Page',
|
||||
'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
|
||||
# ("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
|
||||
# ("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
|
||||
# ("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
|
||||
|
@ -556,19 +556,19 @@ class BBCBrasilRecipe(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
|
||||
# Handle sports page urls type 01:
|
||||
if (url.find("go/rss/-/sport1/") != -1):
|
||||
temp_url = url.replace("go/rss/-/", "")
|
||||
if (url.find('go/rss/-/sport1/') != -1):
|
||||
temp_url = url.replace('go/rss/-/', '')
|
||||
|
||||
# Handle sports page urls type 02:
|
||||
elif (url.find("go/rss/int/news/-/sport1/") != -1):
|
||||
temp_url = url.replace("go/rss/int/news/-/", "")
|
||||
elif (url.find('go/rss/int/news/-/sport1/') != -1):
|
||||
temp_url = url.replace('go/rss/int/news/-/', '')
|
||||
|
||||
# Handle regular news page urls:
|
||||
else:
|
||||
temp_url = url.replace("go/rss/int/news/-/", "")
|
||||
temp_url = url.replace('go/rss/int/news/-/', '')
|
||||
|
||||
# Always add "?print=true" to the end of the url.
|
||||
print_url = temp_url + "?print=true"
|
||||
print_url = temp_url + '?print=true'
|
||||
|
||||
return print_url
|
||||
|
||||
|
@ -30,7 +30,7 @@ class BillOReilly(BasicNewsRecipe):
|
||||
feeds.append(("O'Reilly Factor", articles_shows))
|
||||
|
||||
if articles_columns:
|
||||
feeds.append(("Newspaper Column", articles_columns))
|
||||
feeds.append(('Newspaper Column', articles_columns))
|
||||
|
||||
return feeds
|
||||
|
||||
|
@ -27,8 +27,8 @@ class bleskRecipe(BasicNewsRecipe):
|
||||
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
extra_css = '''
|
||||
'''
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})
|
||||
|
@ -23,7 +23,7 @@ class Blic(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.blic.rs/resources/images/header/header_back.png'
|
||||
language = 'sr'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Georgia, serif1, serif}
|
||||
@ -35,7 +35,7 @@ class Blic(BasicNewsRecipe):
|
||||
.potpis{font-size: x-small; color: gray}
|
||||
.article_info{font-size: small}
|
||||
img{margin-bottom: 0.8em; margin-top: 0.8em; display: block}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
|
@ -56,7 +56,7 @@ class Bloomberg(BasicNewsRecipe):
|
||||
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
|
||||
description = (
|
||||
'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,'
|
||||
' companies, events, and trends shaping today\'s complex, global economy.'
|
||||
" companies, events, and trends shaping today's complex, global economy."
|
||||
)
|
||||
remove_empty_feeds = True
|
||||
|
||||
|
@ -2,29 +2,29 @@ from urllib.parse import urljoin
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
_issue_url = ""
|
||||
_issue_url = ''
|
||||
|
||||
|
||||
class BookforumMagazine(BasicNewsRecipe):
|
||||
title = "Bookforum"
|
||||
title = 'Bookforum'
|
||||
description = (
|
||||
"Bookforum is an American book review magazine devoted to books and "
|
||||
"the discussion of literature. https://www.bookforum.com/print"
|
||||
'Bookforum is an American book review magazine devoted to books and '
|
||||
'the discussion of literature. https://www.bookforum.com/print'
|
||||
)
|
||||
language = "en"
|
||||
__author__ = "ping"
|
||||
publication_type = "magazine"
|
||||
encoding = "utf-8"
|
||||
language = 'en'
|
||||
__author__ = 'ping'
|
||||
publication_type = 'magazine'
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = False
|
||||
compress_news_images = True
|
||||
compress_news_images_auto_size = 8
|
||||
|
||||
keep_only_tags = [dict(class_="blog-article")]
|
||||
remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])]
|
||||
keep_only_tags = [dict(class_='blog-article')]
|
||||
remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])]
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
|
||||
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
|
||||
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
|
||||
@ -33,46 +33,46 @@ class BookforumMagazine(BasicNewsRecipe):
|
||||
display: block; max-width: 100%; height: auto;
|
||||
}
|
||||
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
|
||||
"""
|
||||
'''
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# strip away links that's not needed
|
||||
for ele in soup.select(".blog-article__header a"):
|
||||
for ele in soup.select('.blog-article__header a'):
|
||||
ele.unwrap()
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(
|
||||
_issue_url if _issue_url else "https://www.bookforum.com/print"
|
||||
_issue_url if _issue_url else 'https://www.bookforum.com/print'
|
||||
)
|
||||
meta_ele = soup.find("meta", property="og:title")
|
||||
meta_ele = soup.find('meta', property='og:title')
|
||||
if meta_ele:
|
||||
self.timefmt = f' [{meta_ele["content"]}]'
|
||||
|
||||
cover_ele = soup.find("img", class_="toc-issue__cover")
|
||||
cover_ele = soup.find('img', class_='toc-issue__cover')
|
||||
if cover_ele:
|
||||
self.cover_url = urljoin(
|
||||
"https://www.bookforum.com",
|
||||
soup.find("img", class_="toc-issue__cover")["src"],
|
||||
'https://www.bookforum.com',
|
||||
soup.find('img', class_='toc-issue__cover')['src'],
|
||||
)
|
||||
|
||||
articles = {}
|
||||
for sect_ele in soup.find_all("div", class_="toc-articles__section"):
|
||||
for sect_ele in soup.find_all('div', class_='toc-articles__section'):
|
||||
section_name = self.tag_to_string(
|
||||
sect_ele.find("a", class_="toc__anchor-links__link")
|
||||
sect_ele.find('a', class_='toc__anchor-links__link')
|
||||
)
|
||||
for article_ele in sect_ele.find_all("article"):
|
||||
title_ele = article_ele.find("h1")
|
||||
sub_title_ele = article_ele.find(class_="toc-article__subtitle")
|
||||
for article_ele in sect_ele.find_all('article'):
|
||||
title_ele = article_ele.find('h1')
|
||||
sub_title_ele = article_ele.find(class_='toc-article__subtitle')
|
||||
articles.setdefault(section_name, []).append(
|
||||
{
|
||||
"title": self.tag_to_string(title_ele),
|
||||
"url": article_ele.find("a", class_="toc-article__link")[
|
||||
"href"
|
||||
'title': self.tag_to_string(title_ele),
|
||||
'url': article_ele.find('a', class_='toc-article__link')[
|
||||
'href'
|
||||
],
|
||||
"description": self.tag_to_string(sub_title_ele)
|
||||
'description': self.tag_to_string(sub_title_ele)
|
||||
if sub_title_ele
|
||||
else "",
|
||||
else '',
|
||||
}
|
||||
)
|
||||
return articles.items()
|
||||
|
@ -22,9 +22,9 @@ class Borsen_dk(BasicNewsRecipe):
|
||||
language = 'da'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name="h1", attrs={'itemprop': 'headline'}),
|
||||
dict(name="div", attrs={'itemprob': 'datePublished'}),
|
||||
dict(name="div", attrs={'itemprop': 'articleBody'}),
|
||||
dict(name='h1', attrs={'itemprop': 'headline'}),
|
||||
dict(name='div', attrs={'itemprob': 'datePublished'}),
|
||||
dict(name='div', attrs={'itemprop': 'articleBody'}),
|
||||
]
|
||||
|
||||
# Feed are found here:
|
||||
|
@ -42,24 +42,24 @@ def class_startswith(*prefixes):
|
||||
|
||||
# From: https://www3.bostonglobe.com/lifestyle/comics?arc404=true
|
||||
comics_to_fetch = {
|
||||
"ADAM@HOME": 'ad',
|
||||
"ARLO & JANIS": 'aj',
|
||||
'ADAM@HOME': 'ad',
|
||||
'ARLO & JANIS': 'aj',
|
||||
# "CUL DE SAC": 'cds',
|
||||
# "CURTIS": 'kfcrt',
|
||||
"DILBERT": 'dt',
|
||||
"DOONESBURY": 'db',
|
||||
"DUSTIN": 'kfdus',
|
||||
"F MINUS": 'fm',
|
||||
"FOR BETTER OR WORSE": 'fb',
|
||||
'DILBERT': 'dt',
|
||||
'DOONESBURY': 'db',
|
||||
'DUSTIN': 'kfdus',
|
||||
'F MINUS': 'fm',
|
||||
'FOR BETTER OR WORSE': 'fb',
|
||||
# "GET FUZZY": 'gz',
|
||||
# "MOTHER GOOSE & GRIMM": 'tmmgg',
|
||||
# "JUMPSTART": 'jt',
|
||||
"MONTY": 'mt',
|
||||
'MONTY': 'mt',
|
||||
# "POOCH CAFE",
|
||||
"RHYMES WITH ORANGE": 'kfrwo',
|
||||
'RHYMES WITH ORANGE': 'kfrwo',
|
||||
# "ROSE IS ROSE": 'rr',
|
||||
# "ZIPPY THE PINHEAD": 'kfzpy',
|
||||
"ZITS": 'kfzt'
|
||||
'ZITS': 'kfzt'
|
||||
}
|
||||
|
||||
|
||||
@ -77,10 +77,10 @@ def extract_json(raw_html):
|
||||
|
||||
|
||||
def absolutize_url(url):
|
||||
if url.startswith("//"):
|
||||
return "https:" + url
|
||||
if url.startswith('//'):
|
||||
return 'https:' + url
|
||||
if url.startswith('/'):
|
||||
url = "https://www.bostonglobe.com" + url
|
||||
url = 'https://www.bostonglobe.com' + url
|
||||
return url
|
||||
|
||||
|
||||
@ -120,7 +120,7 @@ def main():
|
||||
|
||||
class BostonGlobeSubscription(BasicNewsRecipe):
|
||||
|
||||
title = "Boston Globe"
|
||||
title = 'Boston Globe'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'The Boston Globe'
|
||||
language = 'en_US'
|
||||
|
@ -25,17 +25,17 @@ def class_startswith(*prefixes):
|
||||
return dict(attrs={'class': q})
|
||||
|
||||
def absolutize_url(url):
|
||||
if url.startswith("//"):
|
||||
return "https:" + url
|
||||
if url.startswith('//'):
|
||||
return 'https:' + url
|
||||
if url.startswith('/'):
|
||||
url = "https://www.bostonglobe.com" + url
|
||||
url = 'https://www.bostonglobe.com' + url
|
||||
return url
|
||||
|
||||
|
||||
class BostonGlobePrint(BasicNewsRecipe):
|
||||
title = "Boston Globe | Print Edition"
|
||||
title = 'Boston Globe | Print Edition'
|
||||
__author__ = 'Kovid Goyal, unkn0wn'
|
||||
description = 'The Boston Globe - Today\'s Paper'
|
||||
description = "The Boston Globe - Today's Paper"
|
||||
language = 'en_US'
|
||||
|
||||
keep_only_tags = [
|
||||
@ -70,7 +70,7 @@ class BostonGlobePrint(BasicNewsRecipe):
|
||||
for image in soup.findAll('img', src=True):
|
||||
if image['src'].endswith('750.jpg'):
|
||||
return 'https:' + image['src']
|
||||
self.log("\nCover unavailable")
|
||||
self.log('\nCover unavailable')
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
@ -94,7 +94,7 @@ class BostonGlobePrint(BasicNewsRecipe):
|
||||
desc = self.tag_to_string(d)
|
||||
|
||||
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
|
@ -23,40 +23,40 @@ class brewiarz(BasicNewsRecipe):
|
||||
next_days = 1
|
||||
|
||||
def parse_index(self):
|
||||
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
|
||||
"05": "v", "06": "vi", "07": "vii", "08": "viii",
|
||||
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
|
||||
dec2rom_dict = {'01': 'i', '02': 'ii', '03': 'iii', '04': 'iv',
|
||||
'05': 'v', '06': 'vi', '07': 'vii', '08': 'viii',
|
||||
'09': 'ix', '10': 'x', '11': 'xi', '12': 'xii'}
|
||||
|
||||
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
|
||||
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
|
||||
weekday_dict = {'Sunday': 'Niedziela', 'Monday': 'Poniedziałek', 'Tuesday': 'Wtorek',
|
||||
'Wednesday': 'Środa', 'Thursday': 'Czwartek', 'Friday': 'Piątek', 'Saturday': 'Sobota'}
|
||||
|
||||
now = datetime.datetime.now()
|
||||
|
||||
feeds = []
|
||||
for i in range(0, self.next_days):
|
||||
url_date = now + datetime.timedelta(days=i)
|
||||
url_date_month = url_date.strftime("%m")
|
||||
url_date_month = url_date.strftime('%m')
|
||||
url_date_month_roman = dec2rom_dict[url_date_month]
|
||||
url_date_day = url_date.strftime("%d")
|
||||
url_date_year = url_date.strftime("%Y")[2:]
|
||||
url_date_weekday = url_date.strftime("%A")
|
||||
url_date_day = url_date.strftime('%d')
|
||||
url_date_year = url_date.strftime('%Y')[2:]
|
||||
url_date_weekday = url_date.strftime('%A')
|
||||
url_date_weekday_pl = weekday_dict[url_date_weekday]
|
||||
|
||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + \
|
||||
url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
|
||||
url = 'http://brewiarz.pl/' + url_date_month_roman + '_' + \
|
||||
url_date_year + '/' + url_date_day + url_date_month + '/index.php3'
|
||||
articles = self.parse_pages(url)
|
||||
if articles:
|
||||
title = url_date_weekday_pl + " " + url_date_day + \
|
||||
"." + url_date_month + "." + url_date_year
|
||||
title = url_date_weekday_pl + ' ' + url_date_day + \
|
||||
'.' + url_date_month + '.' + url_date_year
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
sectors = self.get_sectors(url)
|
||||
for subpage in sectors:
|
||||
title = url_date_weekday_pl + " " + url_date_day + "." + \
|
||||
url_date_month + "." + url_date_year + " - " + subpage.string
|
||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + \
|
||||
"/" + url_date_day + url_date_month + \
|
||||
"/" + subpage['href']
|
||||
title = url_date_weekday_pl + ' ' + url_date_day + '.' + \
|
||||
url_date_month + '.' + url_date_year + ' - ' + subpage.string
|
||||
url = 'http://brewiarz.pl/' + url_date_month_roman + '_' + url_date_year + \
|
||||
'/' + url_date_day + url_date_month + \
|
||||
'/' + subpage['href']
|
||||
print(url)
|
||||
articles = self.parse_pages(url)
|
||||
if articles:
|
||||
@ -91,7 +91,7 @@ class brewiarz(BasicNewsRecipe):
|
||||
sublinks = ol.findAll(name='a')
|
||||
for sublink in sublinks:
|
||||
link_title = self.tag_to_string(
|
||||
link) + " - " + self.tag_to_string(sublink)
|
||||
link) + ' - ' + self.tag_to_string(sublink)
|
||||
link_url_print = re.sub(
|
||||
'php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
|
||||
link_url = url[:-10] + link_url_print
|
||||
@ -145,7 +145,7 @@ class brewiarz(BasicNewsRecipe):
|
||||
if x == tag:
|
||||
break
|
||||
else:
|
||||
print("Can't find", tag, "in", tag.parent)
|
||||
print("Can't find", tag, 'in', tag.parent)
|
||||
continue
|
||||
for r in reversed(tag.contents):
|
||||
tag.parent.insert(i, r)
|
||||
|
@ -22,10 +22,10 @@ class Business_insider(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://static.businessinsider.com/assets/images/logos/tbi_print.jpg'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -64,7 +64,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
|
||||
if dt.weekday() == 6:
|
||||
self.log.warn(
|
||||
'Business Standard Does Not Have A Print Publication On Sunday. The Reports'
|
||||
' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.'
|
||||
" And Columns On This Page Today Appeared In The Newspaper's Saturday Edition."
|
||||
)
|
||||
url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today
|
||||
raw = self.index_to_soup(url, raw=True)
|
||||
|
@ -90,7 +90,7 @@ class BT(BasicNewsRecipe):
|
||||
|
||||
# Insert feeds in specified order, if available
|
||||
|
||||
feedSort = ['Editor\'s Note', 'Editors note']
|
||||
feedSort = ["Editor's Note", 'Editors note']
|
||||
for i in feedSort:
|
||||
if i in sections:
|
||||
feeds.append((i, sections[i]))
|
||||
|
@ -5,8 +5,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CACM(BasicNewsRecipe):
|
||||
title = "ACM CACM Magazine"
|
||||
description = "Published on day 1 of every month."
|
||||
title = 'ACM CACM Magazine'
|
||||
description = 'Published on day 1 of every month.'
|
||||
language = 'en'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
@ -17,16 +17,16 @@ class CACM(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
"""
|
||||
'''
|
||||
Parse out cover URL from cover page.
|
||||
Example:
|
||||
From: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.large.jpg?1647524668&1647524668
|
||||
Get: https://cacm.acm.org/system/assets/0004/2570/April2022.Cover.1000x1338.jpg
|
||||
"""
|
||||
'''
|
||||
|
||||
soup = self.index_to_soup("https://cacm.acm.org/")
|
||||
a_img = soup.find("a", class_="menuCover")
|
||||
img_url = a_img.img["src"]
|
||||
img_url = img_url.split("?")[0]
|
||||
img_url = img_url.replace(".large", "")
|
||||
soup = self.index_to_soup('https://cacm.acm.org/')
|
||||
a_img = soup.find('a', class_='menuCover')
|
||||
img_url = a_img.img['src']
|
||||
img_url = img_url.split('?')[0]
|
||||
img_url = img_url.replace('.large', '')
|
||||
return img_url
|
||||
|
@ -29,28 +29,28 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"),
|
||||
(u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"),
|
||||
(u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"),
|
||||
(u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"),
|
||||
(u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"),
|
||||
(u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"),
|
||||
(u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"),
|
||||
(u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"),
|
||||
(u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"),
|
||||
(u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"),
|
||||
(u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"),
|
||||
(u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"),
|
||||
(u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"),
|
||||
(u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"),
|
||||
(u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"),
|
||||
(u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"),
|
||||
(u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"),
|
||||
(u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"),
|
||||
(u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"),
|
||||
(u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"),
|
||||
(u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"),
|
||||
(u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"),
|
||||
(u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"),
|
||||
(u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml")
|
||||
(u' דף הבית', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml'),
|
||||
(u' 24/7', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml'),
|
||||
(u' באזז', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml'),
|
||||
(u' משפט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml'),
|
||||
(u' רכב', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml'),
|
||||
(u' אחריות וסביבה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml'),
|
||||
(u' דעות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml'),
|
||||
(u' תיירות ותעופה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml'),
|
||||
(u' קריירה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml'),
|
||||
(u' אחד העם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml'),
|
||||
(u' המלצות ואזהרות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml'),
|
||||
(u' הייטק והון סיכון', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml'),
|
||||
(u' חדשות טכנולוגיה', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml'),
|
||||
(u' תקשורת', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml'),
|
||||
(u' אינטרנט', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml'),
|
||||
(u" מכשירים וגאדג'טים", u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml'),
|
||||
(u' המדריך', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml'),
|
||||
(u' אפליקציות', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml'),
|
||||
(u' Play', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml'),
|
||||
(u' הכסף', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml'),
|
||||
(u' עולם', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml'),
|
||||
(u' פרסום ושיווק', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml'),
|
||||
(u' פנאי', u'http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml'),
|
||||
(u' עסקי ספורט', u'http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml')
|
||||
]
|
||||
|
@ -164,24 +164,24 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
continue
|
||||
break
|
||||
if daysback == 7:
|
||||
self.log("\nCover unavailable")
|
||||
self.log('\nCover unavailable')
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self, string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91", "‘", string)
|
||||
fixed = re.sub('\x91', '‘', string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92", "’", fixed)
|
||||
fixed = re.sub('\x92', '’', fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93", "“", fixed)
|
||||
fixed = re.sub('\x93', '“', fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94", "”", fixed)
|
||||
fixed = re.sub('\x94', '”', fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96", "–", fixed)
|
||||
fixed = re.sub('\x96', '–', fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97", "—", fixed)
|
||||
fixed = re.sub("’", "’", fixed)
|
||||
fixed = re.sub('\x97', '—', fixed)
|
||||
fixed = re.sub('’', '’', fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
@ -262,10 +262,10 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
if url.startswith('/'):
|
||||
url = self.url_prefix + url
|
||||
if not url.startswith(self.url_prefix):
|
||||
print("Rejected " + url)
|
||||
print('Rejected ' + url)
|
||||
return
|
||||
if url in self.url_list:
|
||||
print("Rejected dup " + url)
|
||||
print('Rejected dup ' + url)
|
||||
return
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(atag, False)
|
||||
@ -277,8 +277,8 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
return
|
||||
dtag = adiv.find('div', 'content')
|
||||
description = ''
|
||||
print("URL " + url)
|
||||
print("TITLE " + title)
|
||||
print('URL ' + url)
|
||||
print('TITLE ' + title)
|
||||
if dtag is not None:
|
||||
stag = dtag.span
|
||||
if stag is not None:
|
||||
@ -286,18 +286,18 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
description = self.tag_to_string(stag, False)
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
print('DESCRIPTION: ' + description)
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
|
||||
def parse_web_index(key, keyurl):
|
||||
print("Section: " + key + ': ' + self.url_prefix + keyurl)
|
||||
print('Section: ' + key + ': ' + self.url_prefix + keyurl)
|
||||
try:
|
||||
soup = self.index_to_soup(self.url_prefix + keyurl)
|
||||
except:
|
||||
print("Section: " + key + ' NOT FOUND')
|
||||
print('Section: ' + key + ' NOT FOUND')
|
||||
return
|
||||
ans.append(key)
|
||||
mainsoup = soup.find('div', 'bodywrapper')
|
||||
|
@ -17,7 +17,7 @@ class Capital(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='p'),
|
||||
dict(name='span', attrs={'id': ["textbody"]})
|
||||
dict(name='span', attrs={'id': ['textbody']})
|
||||
]
|
||||
|
||||
# 3 posts seemed to have utf8 encoding
|
||||
|
@ -96,7 +96,7 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self, *args, **kw)
|
||||
if not self.username or not self.password:
|
||||
return br
|
||||
data = json.dumps({"0":{"json":{"email":self.username,"password":self.password}}})
|
||||
data = json.dumps({'0':{'json':{'email':self.username,'password':self.password}}})
|
||||
if not isinstance(data, bytes):
|
||||
data = data.encode('utf-8')
|
||||
rq = Request(
|
||||
@ -138,7 +138,7 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
x = d.split('-')
|
||||
inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}})
|
||||
inp = json.dumps({'0':{'json':{'month':int(x[0]),'year':int(x[1])}}})
|
||||
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
|
||||
|
||||
raw = json.loads(self.index_to_soup(api, raw=True))
|
||||
@ -174,7 +174,7 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
slug = urlparse(url).path
|
||||
inp = json.dumps({"0":{"json":{"slug":slug}}})
|
||||
inp = json.dumps({'0':{'json':{'slug':slug}}})
|
||||
return 'https://api.caravanmagazine.in/api/trpc/articles.getFromCache?batch=1&input=' + quote(inp, safe='')
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
|
@ -5,9 +5,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CATOInstitute(BasicNewsRecipe):
|
||||
title = u'The CATO Institute'
|
||||
description = "The Cato Institute is a public policy research organization — a think tank — \
|
||||
description = 'The Cato Institute is a public policy research organization — a think tank — \
|
||||
dedicated to the principles of individual liberty, limited government, free markets and peace.\
|
||||
Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues."
|
||||
Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues.'
|
||||
__author__ = '_reader'
|
||||
__date__ = '05 July 2012'
|
||||
__version__ = '1.0'
|
||||
|
@ -24,7 +24,7 @@ class CSMonitor(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Arial,Tahoma,Verdana,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.head {font-family: Georgia,"Times New Roman",Times,serif}
|
||||
@ -32,7 +32,7 @@ class CSMonitor(BasicNewsRecipe):
|
||||
.hide{display: none}
|
||||
.sLoc{font-weight: bold}
|
||||
ul{list-style-type: none}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -39,7 +39,7 @@ class Chronicle(BasicNewsRecipe):
|
||||
# Go to the issue
|
||||
soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/')
|
||||
issue = soup0.find('ul', attrs={'class': 'feature-promo-list'}).li
|
||||
issueurl = "http://chronicle.com" + issue.a['href']
|
||||
issueurl = 'http://chronicle.com' + issue.a['href']
|
||||
|
||||
# Find date
|
||||
dates = self.tag_to_string(issue.a).split(': ')[-1]
|
||||
@ -47,12 +47,12 @@ class Chronicle(BasicNewsRecipe):
|
||||
|
||||
# Find cover
|
||||
cover = soup0.find('div', attrs={
|
||||
'class': 'side-content'}).find(attrs={'src': re.compile("photos/biz/Current")})
|
||||
'class': 'side-content'}).find(attrs={'src': re.compile('photos/biz/Current')})
|
||||
if cover is not None:
|
||||
if "chronicle.com" in cover['src']:
|
||||
if 'chronicle.com' in cover['src']:
|
||||
self.cover_url = cover['src']
|
||||
else:
|
||||
self.cover_url = "http://chronicle.com" + cover['src']
|
||||
self.cover_url = 'http://chronicle.com' + cover['src']
|
||||
# Go to the main body
|
||||
soup = self.index_to_soup(issueurl)
|
||||
div = soup.find('div', attrs={'id': 'article-body'})
|
||||
@ -64,7 +64,7 @@ class Chronicle(BasicNewsRecipe):
|
||||
a = post.find('a', href=True)
|
||||
if a is not None:
|
||||
title = self.tag_to_string(a)
|
||||
url = "http://chronicle.com" + a['href'].strip()
|
||||
url = 'http://chronicle.com' + a['href'].strip()
|
||||
sectiontitle = post.findPrevious('h3')
|
||||
if sectiontitle is None:
|
||||
sectiontitle = post.findPrevious('h4')
|
||||
|
@ -18,24 +18,24 @@ class BasicUserRecipe1316245412(BasicNewsRecipe):
|
||||
# remove_javascript = True
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': ["header", "navigation", "skip-link",
|
||||
"header-print", "header-print-url", "meta-toolbar", "footer"]}),
|
||||
dict(name='div', attrs={'class': ["region region-sidebar-first column sidebar", "breadcrumb",
|
||||
"breadcrumb-title", "meta", "comment-wrapper",
|
||||
"field field-name-field-show-teaser-right field-type-list-boolean field-label-above",
|
||||
"page-header",
|
||||
"view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1",
|
||||
"pagination",
|
||||
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1",
|
||||
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2", # 2011-09-23
|
||||
"view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2", # 2011-09-23
|
||||
dict(name='div', attrs={'id': ['header', 'navigation', 'skip-link',
|
||||
'header-print', 'header-print-url', 'meta-toolbar', 'footer']}),
|
||||
dict(name='div', attrs={'class': ['region region-sidebar-first column sidebar', 'breadcrumb',
|
||||
'breadcrumb-title', 'meta', 'comment-wrapper',
|
||||
'field field-name-field-show-teaser-right field-type-list-boolean field-label-above',
|
||||
'page-header',
|
||||
'view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1',
|
||||
'pagination',
|
||||
'view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1',
|
||||
'view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2', # 2011-09-23
|
||||
'view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2', # 2011-09-23
|
||||
]}),
|
||||
dict(name='div', attrs={'title': ["Dossier Auswahl"]}),
|
||||
dict(name='h2', attrs={'class': ["title comment-form"]}),
|
||||
dict(name='div', attrs={'title': ['Dossier Auswahl']}),
|
||||
dict(name='h2', attrs={'class': ['title comment-form']}),
|
||||
dict(name='form', attrs={
|
||||
'class': ["comment-form user-info-from-cookie"]}),
|
||||
'class': ['comment-form user-info-from-cookie']}),
|
||||
dict(name='table', attrs={
|
||||
'class': ["mcx-social-horizontal", "page-header"]}),
|
||||
'class': ['mcx-social-horizontal', 'page-header']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -34,7 +34,7 @@ class AdvancedUserRecipe1234144423(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class': ['padding', 'sidebar-photo', 'blog caitlin']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link', 'table', 'embed']), dict(name='div', attrs={'id': ["pluckcomments", "StoryChat"]}), dict(
|
||||
dict(name=['object', 'link', 'table', 'embed']), dict(name='div', attrs={'id': ['pluckcomments', 'StoryChat']}), dict(
|
||||
name='div', attrs={'class': ['articleflex-container', ]}), dict(name='p', attrs={'class': ['posted', 'tags']})
|
||||
]
|
||||
|
||||
|
@ -23,14 +23,14 @@ class CiperChile(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'blog'
|
||||
masthead_url = 'http://ciperchile.cl/wp-content/themes/cipertheme/css/ui/ciper-logo.png'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Arial,sans-serif}
|
||||
.excerpt{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: 1.25em}
|
||||
.author{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: small}
|
||||
.date{font-family: Georgia,"Times New Roman",Times,serif; font-size: small; color: grey}
|
||||
.epigrafe{font-size: small; color: grey}
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -44,7 +44,7 @@ class Clarin(BasicNewsRecipe):
|
||||
# To get all the data (images)
|
||||
auto_cleanup = False
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
h1#title {
|
||||
line-height: 1em;
|
||||
margin: 0 0 .5em 0;
|
||||
@ -64,7 +64,7 @@ class Clarin(BasicNewsRecipe):
|
||||
font-size: .9em;
|
||||
margin-bottom: .5em;
|
||||
}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -25,16 +25,16 @@ class CNetJapan(BasicNewsRecipe):
|
||||
lambda match: '<!-- removed -->'),
|
||||
]
|
||||
|
||||
remove_tags_before = dict(id="contents_l")
|
||||
remove_tags_before = dict(id='contents_l')
|
||||
remove_tags = [
|
||||
{'class': "social_bkm_share"},
|
||||
{'class': "social_bkm_print"},
|
||||
{'class': "block20 clearfix"},
|
||||
dict(name="div", attrs={'id': 'bookreview'}),
|
||||
{'class': "tag_left_ttl"},
|
||||
{'class': "tag_right"}
|
||||
{'class': 'social_bkm_share'},
|
||||
{'class': 'social_bkm_print'},
|
||||
{'class': 'block20 clearfix'},
|
||||
dict(name='div', attrs={'id': 'bookreview'}),
|
||||
{'class': 'tag_left_ttl'},
|
||||
{'class': 'tag_right'}
|
||||
]
|
||||
remove_tags_after = {'class': "block20"}
|
||||
remove_tags_after = {'class': 'block20'}
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
|
@ -25,16 +25,16 @@ class CNetJapanDigital(BasicNewsRecipe):
|
||||
lambda match: '<!-- removed -->'),
|
||||
]
|
||||
|
||||
remove_tags_before = dict(id="contents_l")
|
||||
remove_tags_before = dict(id='contents_l')
|
||||
remove_tags = [
|
||||
{'class': "social_bkm_share"},
|
||||
{'class': "social_bkm_print"},
|
||||
{'class': "block20 clearfix"},
|
||||
dict(name="div", attrs={'id': 'bookreview'}),
|
||||
{'class': "tag_left_ttl"},
|
||||
{'class': "tag_right"}
|
||||
{'class': 'social_bkm_share'},
|
||||
{'class': 'social_bkm_print'},
|
||||
{'class': 'block20 clearfix'},
|
||||
dict(name='div', attrs={'id': 'bookreview'}),
|
||||
{'class': 'tag_left_ttl'},
|
||||
{'class': 'tag_right'}
|
||||
]
|
||||
remove_tags_after = {'class': "block20"}
|
||||
remove_tags_after = {'class': 'block20'}
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
|
@ -25,15 +25,15 @@ class CNetJapanRelease(BasicNewsRecipe):
|
||||
lambda match: '<!-- removed -->'),
|
||||
]
|
||||
|
||||
remove_tags_before = dict(id="contents_l")
|
||||
remove_tags_before = dict(id='contents_l')
|
||||
remove_tags = [
|
||||
{'class': "social_bkm_share"},
|
||||
{'class': "social_bkm_print"},
|
||||
{'class': "block20 clearfix"},
|
||||
dict(name="div", attrs={'id': 'bookreview'}),
|
||||
{'class': "tag_left_ttl"}
|
||||
{'class': 'social_bkm_share'},
|
||||
{'class': 'social_bkm_print'},
|
||||
{'class': 'block20 clearfix'},
|
||||
dict(name='div', attrs={'id': 'bookreview'}),
|
||||
{'class': 'tag_left_ttl'}
|
||||
]
|
||||
remove_tags_after = {'class': "block20"}
|
||||
remove_tags_after = {'class': 'block20'}
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
|
@ -56,7 +56,7 @@ class CnetNews(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(section='author'),
|
||||
dict(id=["article-body", 'cnetReview']),
|
||||
dict(id=['article-body', 'cnetReview']),
|
||||
dict(attrs={'class': 'deal-content'}),
|
||||
]
|
||||
|
||||
|
@ -72,7 +72,7 @@ class CNN(BasicNewsRecipe):
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
self.log('\nCover unavailable')
|
||||
masthead = None
|
||||
return masthead
|
||||
|
||||
|
@ -36,9 +36,9 @@ class ContretempsRecipe(BasicNewsRecipe):
|
||||
return None
|
||||
|
||||
def default_cover(self, cover_file):
|
||||
"""
|
||||
'''
|
||||
Crée une couverture personnalisée pour Contretemps
|
||||
"""
|
||||
'''
|
||||
from qt.core import QColor, QFont, QImage, QPainter, QPen, QRect, Qt
|
||||
|
||||
from calibre.gui2 import ensure_app, load_builtin_fonts, pixmap_to_data
|
||||
@ -56,7 +56,7 @@ class ContretempsRecipe(BasicNewsRecipe):
|
||||
|
||||
weekday = french_weekday[wkd]
|
||||
month = french_month[today.month]
|
||||
date_str = f"{weekday} {today.day} {month} {today.year}"
|
||||
date_str = f'{weekday} {today.day} {month} {today.year}'
|
||||
edition = today.strftime('Édition de %Hh%M')
|
||||
|
||||
# Création de l'image de base (ratio ~1.6 pour format livre)
|
||||
|
@ -5,10 +5,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CosmosMagazine(BasicNewsRecipe):
|
||||
title = "Cosmos Magazine"
|
||||
title = 'Cosmos Magazine'
|
||||
description = (
|
||||
"Cosmos is a quarterly science magazine with 4 editions a year (Mar, Jun, Sep, Dec)."
|
||||
"It is produced by The Royal Institution of Australia Inc (RiAus)."
|
||||
'Cosmos is a quarterly science magazine with 4 editions a year (Mar, Jun, Sep, Dec).'
|
||||
'It is produced by The Royal Institution of Australia Inc (RiAus).'
|
||||
)
|
||||
language = 'en_AU'
|
||||
__author__ = 'yodha8'
|
||||
|
@ -70,12 +70,12 @@ class CourrierInternational(BasicNewsRecipe):
|
||||
}
|
||||
'''
|
||||
|
||||
needs_subscription = "optional"
|
||||
needs_subscription = 'optional'
|
||||
login_url = 'http://www.courrierinternational.com/login'
|
||||
|
||||
def get_browser(self):
|
||||
def is_form_login(form):
|
||||
return "id" in form.attrs and form.attrs['id'] == "user-login-form"
|
||||
return 'id' in form.attrs and form.attrs['id'] == 'user-login-form'
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username:
|
||||
br.open(self.login_url)
|
||||
@ -86,8 +86,8 @@ class CourrierInternational(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for link in soup.findAll("a", href=re.compile('^/')):
|
||||
link["href"] = 'http://www.courrierinternational.com' + link["href"]
|
||||
for link in soup.findAll('a', href=re.compile('^/')):
|
||||
link['href'] = 'http://www.courrierinternational.com' + link['href']
|
||||
return soup
|
||||
|
||||
feeds = [
|
||||
|
@ -21,10 +21,10 @@ class CubaDebate(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
#BlogTitle{font-size: xx-large; font-weight: bold}
|
||||
body{font-family: Verdana, Arial, Tahoma, sans-serif}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
|
@ -23,7 +23,7 @@ class DainikBhaskar(BasicNewsRecipe):
|
||||
soup = self.index_to_soup('https://epaper.bhaskar.com/')
|
||||
tag = soup.find(attrs={'class': 'scaleDiv'})
|
||||
if tag:
|
||||
self.cover_url = tag.find('img')['src'].replace("_ss.jpg", "_l.jpg")
|
||||
self.cover_url = tag.find('img')['src'].replace('_ss.jpg', '_l.jpg')
|
||||
return super().get_cover_url()
|
||||
|
||||
keep_only_tags = [
|
||||
|
@ -31,11 +31,11 @@ class Danas(BasicNewsRecipe):
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//div[@class="post-intro-above"] //h1[@class="post-title"] | //div[@class="post-intro-title"] | //div[@class="post-meta-wrapper"]'
|
||||
resolve_internal_links = True
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
.author{font-size: small}
|
||||
.published {font-size: small}
|
||||
img{margin-bottom: 0.8em}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description,
|
||||
@ -66,7 +66,7 @@ class Danas(BasicNewsRecipe):
|
||||
'avgust', 'septembar', 'oktobar', 'novembar', 'decembar']
|
||||
td = date.today()
|
||||
monthname = months[td.month - 1]
|
||||
lurl = td.strftime("https://www.danas.rs/naslovna/naslovna-strana-za-%d-" + monthname + "-%Y/")
|
||||
lurl = td.strftime('https://www.danas.rs/naslovna/naslovna-strana-za-%d-' + monthname + '-%Y/')
|
||||
soup = self.index_to_soup(lurl)
|
||||
al = soup.find('div', attrs={'class':'corax-image'})
|
||||
if al and al.img:
|
||||
|
@ -77,9 +77,9 @@ class DeGentenaarOnline(BasicNewsRecipe):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
('http-equiv', 'Content-Language'), ('content', self.lang)])
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
return soup
|
||||
|
@ -16,8 +16,8 @@ class AdvancedUserRecipe1361743898(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
feeds = []
|
||||
soup = self.index_to_soup("http://www.democracyjournal.org")
|
||||
for x in soup.findAll(href=re.compile(r"http://www\.democracyjournal\.org/\d*/.*php$")):
|
||||
soup = self.index_to_soup('http://www.democracyjournal.org')
|
||||
for x in soup.findAll(href=re.compile(r'http://www\.democracyjournal\.org/\d*/.*php$')):
|
||||
url = x.get('href')
|
||||
title = self.tag_to_string(x)
|
||||
articles.append({'title': title, 'url': url,
|
||||
|
@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
'''
|
||||
demorgen.be
|
||||
"""
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -13,7 +13,7 @@ class DeMorganBe(BasicNewsRecipe):
|
||||
description = 'News from Belgium in Dutch'
|
||||
oldest_article = 1
|
||||
language = 'nl_BE'
|
||||
encoding = "utf-8"
|
||||
encoding = 'utf-8'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_attributes = ['style', 'height', 'width']
|
||||
@ -23,10 +23,10 @@ class DeMorganBe(BasicNewsRecipe):
|
||||
masthead_url = 'https://www.demorgen.be/_next/static/media/demorgen_logo.dce579e2.svg'
|
||||
cover_url = 'https://usercontent.one/wp/www.insidejazz.be/wp-content/uploads/2018/11/pic0143.png'
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
time, [data-test-id:"article-label"], [data-test-id:"article-sublabel"], [[data-test-id:"article-author"]] { font-size:small; }
|
||||
[data-test-id:"header-intro"] { font-style: italic; }
|
||||
"""
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article', attrs={'id': 'article-content'}),
|
||||
|
@ -23,8 +23,8 @@ class ceskyDenikRecipe(BasicNewsRecipe):
|
||||
cover_url = 'http://g.denik.cz/images/loga/denik.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
extra_css = '''
|
||||
'''
|
||||
|
||||
remove_tags = []
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'content'})]
|
||||
|
@ -11,11 +11,11 @@ CZ_MONTHS = ['led', 'úno', 'bře', 'dub', 'kvě', 'čen', 'čec', 'srp', 'zář
|
||||
|
||||
|
||||
def cz_title_time():
|
||||
"""
|
||||
'''
|
||||
Helper function to return date with czech locale.
|
||||
Uses hardcoded lookup table of day and month names as strftime requires
|
||||
locale change that is not thread safe.
|
||||
"""
|
||||
'''
|
||||
today = datetime.today()
|
||||
weekday = CZ_DAYS[today.weekday()]
|
||||
month = CZ_MONTHS[today.month-1]
|
||||
@ -26,9 +26,9 @@ def cz_title_time():
|
||||
|
||||
|
||||
class DenikNRecipe(BasicNewsRecipe):
|
||||
"""
|
||||
'''
|
||||
Recipe for the RSS feed of https://denikn.cz/
|
||||
"""
|
||||
'''
|
||||
|
||||
title = u'Deník N'
|
||||
__author__ = 'Robert Mihaly'
|
||||
|
@ -31,13 +31,13 @@ class deredactie(BasicNewsRecipe):
|
||||
catnames = {}
|
||||
soup = self.index_to_soup(
|
||||
'http://www.deredactie.be/cm/vrtnieuws.deutsch')
|
||||
for elem in soup.findAll('li', attrs={'id': re.compile("^navItem[2-9]")}):
|
||||
for elem in soup.findAll('li', attrs={'id': re.compile('^navItem[2-9]')}):
|
||||
a = elem.find('a', href=True)
|
||||
m = re.search('(?<=/)[^/]*$', a['href'])
|
||||
cat = str(m.group(0))
|
||||
categories.append(cat)
|
||||
catnames[cat] = a['title']
|
||||
self.log("found cat %s\n" % catnames[cat])
|
||||
self.log('found cat %s\n' % catnames[cat])
|
||||
|
||||
feeds = []
|
||||
|
||||
@ -45,7 +45,7 @@ class deredactie(BasicNewsRecipe):
|
||||
articles = []
|
||||
soup = self.index_to_soup(
|
||||
'http://www.deredactie.be/cm/vrtnieuws.deutsch/' + cat)
|
||||
for a in soup.findAll('a', attrs={'href': re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
|
||||
for a in soup.findAll('a', attrs={'href': re.compile('deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_')}):
|
||||
skip_this_article = False
|
||||
url = a['href'].strip()
|
||||
if url.startswith('/'):
|
||||
@ -55,12 +55,12 @@ class deredactie(BasicNewsRecipe):
|
||||
for article in articles:
|
||||
if article['url'] == url:
|
||||
skip_this_article = True
|
||||
self.log("SKIPPING DUP %s" % url)
|
||||
self.log('SKIPPING DUP %s' % url)
|
||||
break
|
||||
if skip_this_article:
|
||||
continue
|
||||
articles.append(myarticle)
|
||||
self.log("Adding URL %s\n" % url)
|
||||
self.log('Adding URL %s\n' % url)
|
||||
if articles:
|
||||
feeds.append((catnames[cat], articles))
|
||||
return feeds
|
||||
|
@ -34,7 +34,7 @@ class Volkskrant(BasicNewsRecipe):
|
||||
dict(id=['like', 'dlik']),
|
||||
dict(name=['script', 'noscript', 'style']),
|
||||
]
|
||||
remove_attributes = ["class", "id", "name", "style"]
|
||||
remove_attributes = ['class', 'id', 'name', 'style']
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
ignore_duplicate_articles = {'url'}
|
||||
@ -88,7 +88,7 @@ class Volkskrant(BasicNewsRecipe):
|
||||
)
|
||||
)
|
||||
|
||||
sections = [("Numărul curent", articles)]
|
||||
sections = [('Numărul curent', articles)]
|
||||
return sections
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
"""DistroWatch Weekly"""
|
||||
'''DistroWatch Weekly'''
|
||||
|
||||
import datetime
|
||||
|
||||
@ -10,28 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class DistroWatchWeekly(BasicNewsRecipe):
|
||||
title = "DistroWatch Weekly"
|
||||
description = "Weekly news about Linux distributions"
|
||||
category = "Linux, Technology, News"
|
||||
title = 'DistroWatch Weekly'
|
||||
description = 'Weekly news about Linux distributions'
|
||||
category = 'Linux, Technology, News'
|
||||
oldest_article = 14
|
||||
language = "en"
|
||||
language = 'en'
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
timefmt = " [%A, %d %B, %Y]"
|
||||
timefmt = ' [%A, %d %B, %Y]'
|
||||
auto_cleanup = False
|
||||
|
||||
keep_only_tags = [
|
||||
dict(
|
||||
attrs={
|
||||
"class":
|
||||
lambda x: x and ("News1" in x)
|
||||
'class':
|
||||
lambda x: x and ('News1' in x)
|
||||
}
|
||||
)
|
||||
]
|
||||
|
||||
def _get_mag_date(self):
|
||||
"""Return date of latest weekly issue."""
|
||||
'''Return date of latest weekly issue.'''
|
||||
|
||||
d = datetime.date(2022, 6, 20)
|
||||
t = datetime.date.today()
|
||||
@ -45,17 +45,17 @@ class DistroWatchWeekly(BasicNewsRecipe):
|
||||
|
||||
# Get URL of latest mag page
|
||||
ld = self._get_mag_date()
|
||||
url = ld.strftime("https://distrowatch.com/weekly.php?issue=%Y%m%d")
|
||||
url = ld.strftime('https://distrowatch.com/weekly.php?issue=%Y%m%d')
|
||||
url = url.lower()
|
||||
title = ld.strftime("DistroWatch Weekly for %Y-%m-%d")
|
||||
title = ld.strftime('DistroWatch Weekly for %Y-%m-%d')
|
||||
|
||||
# Get articles
|
||||
stories = [{
|
||||
"url": url,
|
||||
"title": title,
|
||||
'url': url,
|
||||
'title': title,
|
||||
},]
|
||||
index = [
|
||||
("Articles", stories),
|
||||
('Articles', stories),
|
||||
]
|
||||
|
||||
return index
|
||||
|
@ -23,7 +23,7 @@ def new_tag(soup, name, attrs=()):
|
||||
class DnevnikCro(BasicNewsRecipe):
|
||||
title = 'Dnevnik - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Vijesti iz Hrvatske"
|
||||
description = 'Vijesti iz Hrvatske'
|
||||
publisher = 'Dnevnik.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
@ -67,9 +67,9 @@ class DnevnikCro(BasicNewsRecipe):
|
||||
del item[attrib]
|
||||
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
('http-equiv', 'Content-Language'), ('content', self.lang)])
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
('http-equiv', 'Content-Type'), ('content', 'text/html; charset=UTF-8')])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
@ -4,15 +4,15 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
# Comment out sections you are not interested in
|
||||
sections = [
|
||||
("정치", "politics"),
|
||||
("사회", "national"),
|
||||
("경제", "economy"),
|
||||
("국제", "international"),
|
||||
("사설칼럼", "editorials"),
|
||||
("의학과학", "science"),
|
||||
("문화연예", "culture"),
|
||||
("스포츠", "sports"),
|
||||
("사람속으로", "inmul")
|
||||
('정치', 'politics'),
|
||||
('사회', 'national'),
|
||||
('경제', 'economy'),
|
||||
('국제', 'international'),
|
||||
('사설칼럼', 'editorials'),
|
||||
('의학과학', 'science'),
|
||||
('문화연예', 'culture'),
|
||||
('스포츠', 'sports'),
|
||||
('사람속으로', 'inmul')
|
||||
# Following sections are marked as marked optional
|
||||
# as default. Uncomment to enable.
|
||||
# , (u'건강', 'health')
|
||||
@ -26,24 +26,24 @@ sections = [
|
||||
|
||||
|
||||
class Donga(BasicNewsRecipe):
|
||||
language = "ko"
|
||||
title = "동아일보"
|
||||
description = "동아일보 기사"
|
||||
__author__ = "Minsik Cho"
|
||||
ignore_duplicate_articles = {"title", "url"}
|
||||
language = 'ko'
|
||||
title = '동아일보'
|
||||
description = '동아일보 기사'
|
||||
__author__ = 'Minsik Cho'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
compress_news_images = True
|
||||
no_stylesheets = True
|
||||
oldest_article = 2
|
||||
encoding = "utf-8"
|
||||
encoding = 'utf-8'
|
||||
|
||||
# RSS Feed in syntax:
|
||||
# https://rss.donga.com/[sections].xml
|
||||
feeds = [(title, "https://rss.donga.com/" + section + ".xml") for (title, section) in sections]
|
||||
feeds = [(title, 'https://rss.donga.com/' + section + '.xml') for (title, section) in sections]
|
||||
|
||||
# Remove logo and print buttons
|
||||
remove_tags = [
|
||||
dict(name="div", attrs={"class": "popHeaderWrap"}),
|
||||
dict(name="div", attrs={"class": "etc"}),
|
||||
dict(name='div', attrs={'class': 'popHeaderWrap'}),
|
||||
dict(name='div', attrs={'class': 'etc'}),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
@ -51,8 +51,8 @@ class Donga(BasicNewsRecipe):
|
||||
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
|
||||
# Return print version url with syntax:
|
||||
# https://www.donga.com/news/View?gid=[gid]&date=[date]
|
||||
reobject = re.search("(?<=/all/)([0-9]*)/([0-9]*)", url)
|
||||
reobject = re.search('(?<=/all/)([0-9]*)/([0-9]*)', url)
|
||||
date = reobject.group(1)
|
||||
gid = reobject.group(2)
|
||||
|
||||
return "https://www.donga.com/news/View?gid=" + gid + "&date=" + date
|
||||
return 'https://www.donga.com/news/View?gid=' + gid + '&date=' + date
|
||||
|
@ -107,11 +107,11 @@ class DRNyheder(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [
|
||||
|
||||
dict(name="h1", attrs={'class': 'dre-article-title__heading'}), # Title
|
||||
dict(name="div", attrs={'class': 'dre-article-byline'}), # Author
|
||||
dict(name="figure", attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images
|
||||
dict(name="p", attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article
|
||||
dict(name="article", attrs={'itemtype': 'http://schema.org/NewsArticle'}),
|
||||
dict(name='h1', attrs={'class': 'dre-article-title__heading'}), # Title
|
||||
dict(name='div', attrs={'class': 'dre-article-byline'}), # Author
|
||||
dict(name='figure', attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images
|
||||
dict(name='p', attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article
|
||||
dict(name='article', attrs={'itemtype': 'http://schema.org/NewsArticle'}),
|
||||
#dict(name="h1", attrs={'class': 'hydra-latest-news-page-short-news__title'}),
|
||||
#dict(name="p", attrs={'class': 'hydra-latest-news-page-short-news__paragraph'}),
|
||||
#dict(name="div", attrs={'class': 'dre-speech'}),
|
||||
@ -123,7 +123,7 @@ class DRNyheder(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class': [
|
||||
'hydra-latest-news-page-short-news__share', 'hydra-latest-news-page-short-news__a11y-container',
|
||||
'hydra-latest-news-page-short-news__meta', 'hydra-latest-news-page-short-news__image-slider', 'dre-byline__dates']}),
|
||||
dict(name="source"),
|
||||
dict(name='source'),
|
||||
#dict(name='menu', attrs={'class': 'share'}),
|
||||
#dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}),
|
||||
]
|
||||
|
@ -63,20 +63,20 @@ class Dzieje(BasicNewsRecipe):
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds.append((u"Wiadomości", self.find_articles(
|
||||
feeds.append((u'Wiadomości', self.find_articles(
|
||||
'http://dzieje.pl/wiadomosci')))
|
||||
feeds.append((u"Kultura i sztuka", self.find_articles(
|
||||
feeds.append((u'Kultura i sztuka', self.find_articles(
|
||||
'http://dzieje.pl/kulturaisztuka')))
|
||||
feeds.append((u"Film", self.find_articles('http://dzieje.pl/kino')))
|
||||
feeds.append((u"Rozmaitości historyczne",
|
||||
feeds.append((u'Film', self.find_articles('http://dzieje.pl/kino')))
|
||||
feeds.append((u'Rozmaitości historyczne',
|
||||
self.find_articles('http://dzieje.pl/rozmaitości')))
|
||||
feeds.append(
|
||||
(u"Książka", self.find_articles('http://dzieje.pl/ksiazka')))
|
||||
(u'Książka', self.find_articles('http://dzieje.pl/ksiazka')))
|
||||
feeds.append(
|
||||
(u"Wystawa", self.find_articles('http://dzieje.pl/wystawa')))
|
||||
feeds.append((u"Edukacja", self.find_articles(
|
||||
(u'Wystawa', self.find_articles('http://dzieje.pl/wystawa')))
|
||||
feeds.append((u'Edukacja', self.find_articles(
|
||||
'http://dzieje.pl/edukacja')))
|
||||
feeds.append((u"Dzieje się", self.find_articles(
|
||||
feeds.append((u'Dzieje się', self.find_articles(
|
||||
'http://dzieje.pl/wydarzenia')))
|
||||
return feeds
|
||||
|
||||
|
@ -21,7 +21,7 @@ class Dziennik_pl(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}'
|
||||
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile(
|
||||
preprocess_regexps = [(re.compile('Komentarze:'), lambda m: ''), (re.compile(
|
||||
'<p><strong><a href=".*?">>>> CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
|
||||
keep_only_tags = [dict(id='article')]
|
||||
remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa: E501
|
||||
|
@ -120,7 +120,7 @@ class DziennikPolski24(BasicNewsRecipe):
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.dziennikpolski24.pl/pl/moje-konto/950606-loguj.html')
|
||||
br.select_form(nr=1)
|
||||
br["user_login[login]"] = self.username
|
||||
br['user_login[login]'] = self.username
|
||||
br['user_login[pass]'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
@ -63,7 +63,7 @@ def load_article_from_json(raw, root):
|
||||
body = root.xpath('//body')[0]
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||
try:
|
||||
date = data['dateModified']
|
||||
@ -97,8 +97,8 @@ def process_web_node(node):
|
||||
return f'<p>{node.get("textHtml")}</p>'
|
||||
return f'<p>{node.get("text", "")}</p>'
|
||||
elif ntype == 'IMAGE':
|
||||
alt = "" if node.get("altText") is None else node.get("altText")
|
||||
cap = ""
|
||||
alt = '' if node.get('altText') is None else node.get('altText')
|
||||
cap = ''
|
||||
if node.get('caption'):
|
||||
if node['caption'].get('textHtml') is not None:
|
||||
cap = node['caption']['textHtml']
|
||||
@ -123,7 +123,7 @@ def load_article_from_web_json(raw):
|
||||
data = json.loads(raw)['props']['pageProps']['cp2Content']
|
||||
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
|
||||
body += f'<h1>{data["headline"]}</h1>'
|
||||
if data.get("rubric") and data.get("rubric") is not None:
|
||||
if data.get('rubric') and data.get('rubric') is not None:
|
||||
body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
|
||||
try:
|
||||
date = data['dateModified']
|
||||
@ -186,7 +186,7 @@ class Economist(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||
|
||||
__author__ = "Kovid Goyal"
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = (
|
||||
'Global news and current affairs from a European'
|
||||
' perspective. Best downloaded on Friday mornings (GMT)'
|
||||
@ -199,7 +199,7 @@ class Economist(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
|
||||
dict(attrs={'aria-label': "Article Teaser"}),
|
||||
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||
dict(attrs={'id': 'player'}),
|
||||
dict(attrs={
|
||||
'class': [
|
||||
@ -266,11 +266,11 @@ class Economist(BasicNewsRecipe):
|
||||
if edition_date and isinstance(edition_date, str):
|
||||
return parse_only_date(edition_date, as_utc=False)
|
||||
try:
|
||||
url = self.browser.open("https://www.economist.com/printedition").geturl()
|
||||
url = self.browser.open('https://www.economist.com/printedition').geturl()
|
||||
except Exception as e:
|
||||
self.log('Failed to fetch publication date with error: ' + str(e))
|
||||
return super().publication_date()
|
||||
return parse_only_date(url.split("/")[-1], as_utc=False)
|
||||
return parse_only_date(url.split('/')[-1], as_utc=False)
|
||||
|
||||
def economist_test_article(self):
|
||||
return [('Articles', [{'title':'test',
|
||||
@ -364,23 +364,23 @@ class Economist(BasicNewsRecipe):
|
||||
self.log('Got cover:', self.cover_url, '\n', self.description)
|
||||
|
||||
feeds_dict = defaultdict(list)
|
||||
for part in safe_dict(data, "hasPart", "parts"):
|
||||
for part in safe_dict(data, 'hasPart', 'parts'):
|
||||
try:
|
||||
section = part['articleSection']['internal'][0]['title']
|
||||
except Exception:
|
||||
section = safe_dict(part, 'print', 'section', 'title') or 'section'
|
||||
if section not in feeds_dict:
|
||||
self.log(section)
|
||||
title = safe_dict(part, "title")
|
||||
desc = safe_dict(part, "rubric") or ''
|
||||
sub = safe_dict(part, "flyTitle") or ''
|
||||
title = safe_dict(part, 'title')
|
||||
desc = safe_dict(part, 'rubric') or ''
|
||||
sub = safe_dict(part, 'flyTitle') or ''
|
||||
if sub and section != sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
pt = PersistentTemporaryFile('.html')
|
||||
pt.write(json.dumps(part).encode('utf-8'))
|
||||
pt.close()
|
||||
url = 'file:///' + pt.name
|
||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
|
||||
self.log('\t', title, '\n\t\t', desc)
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
|
||||
@ -513,22 +513,22 @@ class Economist(BasicNewsRecipe):
|
||||
return self.economist_return_index(ans)
|
||||
|
||||
def economist_parse_web_index(self, soup):
|
||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
||||
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||
if script_tag is not None:
|
||||
data = json.loads(script_tag.string)
|
||||
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
|
||||
self.description = safe_dict(data, "props", "pageProps", "content", "headline")
|
||||
self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "formattedIssueDate") + ']'
|
||||
self.cover_url = safe_dict(data, "props", "pageProps", "content", "cover", "url").replace(
|
||||
self.description = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
|
||||
self.timefmt = ' [' + safe_dict(data, 'props', 'pageProps', 'content', 'formattedIssueDate') + ']'
|
||||
self.cover_url = safe_dict(data, 'props', 'pageProps', 'content', 'cover', 'url').replace(
|
||||
'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '')
|
||||
self.log('Got cover:', self.cover_url)
|
||||
|
||||
feeds = []
|
||||
|
||||
for part in safe_dict(
|
||||
data, "props", "pageProps", "content", "headerSections"
|
||||
) + safe_dict(data, "props", "pageProps", "content", "sections"):
|
||||
section = safe_dict(part, "name") or ''
|
||||
data, 'props', 'pageProps', 'content', 'headerSections'
|
||||
) + safe_dict(data, 'props', 'pageProps', 'content', 'sections'):
|
||||
section = safe_dict(part, 'name') or ''
|
||||
if not section:
|
||||
continue
|
||||
self.log(section)
|
||||
@ -536,12 +536,12 @@ class Economist(BasicNewsRecipe):
|
||||
articles = []
|
||||
|
||||
for ar in part['articles']:
|
||||
title = safe_dict(ar, "headline") or ''
|
||||
url = process_url(safe_dict(ar, "url") or '')
|
||||
title = safe_dict(ar, 'headline') or ''
|
||||
url = process_url(safe_dict(ar, 'url') or '')
|
||||
if not title or not url:
|
||||
continue
|
||||
desc = safe_dict(ar, "rubric") or ''
|
||||
sub = safe_dict(ar, "flyTitle") or ''
|
||||
desc = safe_dict(ar, 'rubric') or ''
|
||||
sub = safe_dict(ar, 'flyTitle') or ''
|
||||
if sub and section != sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
|
@ -58,7 +58,7 @@ def load_article_from_json(raw, root):
|
||||
body = root.xpath('//body')[0]
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||
E(article, 'div', data['byline'], style='font-style: italic; color:#202020;')
|
||||
main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
|
||||
@ -130,7 +130,7 @@ class Espresso(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
|
||||
dict(attrs={'aria-label': "Article Teaser"}),
|
||||
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||
dict(attrs={
|
||||
'class': [
|
||||
'dblClkTrk', 'ec-article-info', 'share_inline_header',
|
||||
@ -189,13 +189,13 @@ class Espresso(BasicNewsRecipe):
|
||||
self.description = data['rubric']
|
||||
|
||||
ans = []
|
||||
for part in safe_dict(data, "hasPart", "parts"):
|
||||
title = safe_dict(part, "title")
|
||||
for part in safe_dict(data, 'hasPart', 'parts'):
|
||||
title = safe_dict(part, 'title')
|
||||
pt = PersistentTemporaryFile('.html')
|
||||
pt.write(json.dumps(part).encode('utf-8'))
|
||||
pt.close()
|
||||
url = 'file:///' + pt.name
|
||||
ans.append({"title": title, "url": url})
|
||||
ans.append({'title': title, 'url': url})
|
||||
return [('Espresso', ans)]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -63,7 +63,7 @@ def load_article_from_json(raw, root):
|
||||
body = root.xpath('//body')[0]
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||
try:
|
||||
date = data['dateModified']
|
||||
@ -97,8 +97,8 @@ def process_web_node(node):
|
||||
return f'<p>{node.get("textHtml")}</p>'
|
||||
return f'<p>{node.get("text", "")}</p>'
|
||||
elif ntype == 'IMAGE':
|
||||
alt = "" if node.get("altText") is None else node.get("altText")
|
||||
cap = ""
|
||||
alt = '' if node.get('altText') is None else node.get('altText')
|
||||
cap = ''
|
||||
if node.get('caption'):
|
||||
if node['caption'].get('textHtml') is not None:
|
||||
cap = node['caption']['textHtml']
|
||||
@ -123,7 +123,7 @@ def load_article_from_web_json(raw):
|
||||
data = json.loads(raw)['props']['pageProps']['cp2Content']
|
||||
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
|
||||
body += f'<h1>{data["headline"]}</h1>'
|
||||
if data.get("rubric") and data.get("rubric") is not None:
|
||||
if data.get('rubric') and data.get('rubric') is not None:
|
||||
body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
|
||||
try:
|
||||
date = data['dateModified']
|
||||
@ -186,7 +186,7 @@ class Economist(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||
|
||||
__author__ = "Kovid Goyal"
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = (
|
||||
'Global news and current affairs from a European'
|
||||
' perspective. Best downloaded on Friday mornings (GMT)'
|
||||
@ -199,7 +199,7 @@ class Economist(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
|
||||
dict(attrs={'aria-label': "Article Teaser"}),
|
||||
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||
dict(attrs={'id': 'player'}),
|
||||
dict(attrs={
|
||||
'class': [
|
||||
@ -266,11 +266,11 @@ class Economist(BasicNewsRecipe):
|
||||
if edition_date and isinstance(edition_date, str):
|
||||
return parse_only_date(edition_date, as_utc=False)
|
||||
try:
|
||||
url = self.browser.open("https://www.economist.com/printedition").geturl()
|
||||
url = self.browser.open('https://www.economist.com/printedition').geturl()
|
||||
except Exception as e:
|
||||
self.log('Failed to fetch publication date with error: ' + str(e))
|
||||
return super().publication_date()
|
||||
return parse_only_date(url.split("/")[-1], as_utc=False)
|
||||
return parse_only_date(url.split('/')[-1], as_utc=False)
|
||||
|
||||
def economist_test_article(self):
|
||||
return [('Articles', [{'title':'test',
|
||||
@ -364,23 +364,23 @@ class Economist(BasicNewsRecipe):
|
||||
self.log('Got cover:', self.cover_url, '\n', self.description)
|
||||
|
||||
feeds_dict = defaultdict(list)
|
||||
for part in safe_dict(data, "hasPart", "parts"):
|
||||
for part in safe_dict(data, 'hasPart', 'parts'):
|
||||
try:
|
||||
section = part['articleSection']['internal'][0]['title']
|
||||
except Exception:
|
||||
section = safe_dict(part, 'print', 'section', 'title') or 'section'
|
||||
if section not in feeds_dict:
|
||||
self.log(section)
|
||||
title = safe_dict(part, "title")
|
||||
desc = safe_dict(part, "rubric") or ''
|
||||
sub = safe_dict(part, "flyTitle") or ''
|
||||
title = safe_dict(part, 'title')
|
||||
desc = safe_dict(part, 'rubric') or ''
|
||||
sub = safe_dict(part, 'flyTitle') or ''
|
||||
if sub and section != sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
pt = PersistentTemporaryFile('.html')
|
||||
pt.write(json.dumps(part).encode('utf-8'))
|
||||
pt.close()
|
||||
url = 'file:///' + pt.name
|
||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||
feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
|
||||
self.log('\t', title, '\n\t\t', desc)
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
|
||||
@ -513,22 +513,22 @@ class Economist(BasicNewsRecipe):
|
||||
return self.economist_return_index(ans)
|
||||
|
||||
def economist_parse_web_index(self, soup):
|
||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
||||
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||
if script_tag is not None:
|
||||
data = json.loads(script_tag.string)
|
||||
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
|
||||
self.description = safe_dict(data, "props", "pageProps", "content", "headline")
|
||||
self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "formattedIssueDate") + ']'
|
||||
self.cover_url = safe_dict(data, "props", "pageProps", "content", "cover", "url").replace(
|
||||
self.description = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
|
||||
self.timefmt = ' [' + safe_dict(data, 'props', 'pageProps', 'content', 'formattedIssueDate') + ']'
|
||||
self.cover_url = safe_dict(data, 'props', 'pageProps', 'content', 'cover', 'url').replace(
|
||||
'economist.com/', 'economist.com/cdn-cgi/image/width=960,quality=80,format=auto/').replace('SQ_', '')
|
||||
self.log('Got cover:', self.cover_url)
|
||||
|
||||
feeds = []
|
||||
|
||||
for part in safe_dict(
|
||||
data, "props", "pageProps", "content", "headerSections"
|
||||
) + safe_dict(data, "props", "pageProps", "content", "sections"):
|
||||
section = safe_dict(part, "name") or ''
|
||||
data, 'props', 'pageProps', 'content', 'headerSections'
|
||||
) + safe_dict(data, 'props', 'pageProps', 'content', 'sections'):
|
||||
section = safe_dict(part, 'name') or ''
|
||||
if not section:
|
||||
continue
|
||||
self.log(section)
|
||||
@ -536,12 +536,12 @@ class Economist(BasicNewsRecipe):
|
||||
articles = []
|
||||
|
||||
for ar in part['articles']:
|
||||
title = safe_dict(ar, "headline") or ''
|
||||
url = process_url(safe_dict(ar, "url") or '')
|
||||
title = safe_dict(ar, 'headline') or ''
|
||||
url = process_url(safe_dict(ar, 'url') or '')
|
||||
if not title or not url:
|
||||
continue
|
||||
desc = safe_dict(ar, "rubric") or ''
|
||||
sub = safe_dict(ar, "flyTitle") or ''
|
||||
desc = safe_dict(ar, 'rubric') or ''
|
||||
sub = safe_dict(ar, 'flyTitle') or ''
|
||||
if sub and section != sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
|
@ -59,7 +59,7 @@ def load_article_from_json(raw, root):
|
||||
body = root.xpath('//body')[0]
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||
try:
|
||||
date = data['dateModified']
|
||||
@ -125,7 +125,7 @@ class EconomistNews(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||
|
||||
__author__ = "Kovid Goyal"
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = (
|
||||
'Global news and current affairs from a European'
|
||||
' perspective. Get the latest articles here.'
|
||||
@ -140,7 +140,7 @@ class EconomistNews(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
|
||||
dict(attrs={'aria-label': "Article Teaser"}),
|
||||
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||
dict(attrs={'id': 'player'}),
|
||||
dict(attrs={
|
||||
'class': [
|
||||
@ -234,9 +234,9 @@ class EconomistNews(BasicNewsRecipe):
|
||||
articles = []
|
||||
|
||||
for art in part['hasPart']['parts']:
|
||||
title = safe_dict(art, "title")
|
||||
desc = safe_dict(art, "rubric") or ''
|
||||
sub = safe_dict(art, "flyTitle") or ''
|
||||
title = safe_dict(art, 'title')
|
||||
desc = safe_dict(art, 'rubric') or ''
|
||||
sub = safe_dict(art, 'flyTitle') or ''
|
||||
if sub and section != sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
if not art.get('text'):
|
||||
@ -249,7 +249,7 @@ class EconomistNews(BasicNewsRecipe):
|
||||
pt.write(json.dumps(art).encode('utf-8'))
|
||||
pt.close()
|
||||
url = 'file:///' + pt.name
|
||||
articles.append({"title": title, "url": url, "description": desc})
|
||||
articles.append({'title': title, 'url': url, 'description': desc})
|
||||
self.log('\t', title, '\n\t\t', desc)
|
||||
if articles:
|
||||
feeds.append((section, articles))
|
||||
|
@ -23,8 +23,8 @@ def process_node(node):
|
||||
return f'<p>{node.get("textHtml")}</p>'
|
||||
return f'<p>{node.get("text", "")}</p>'
|
||||
elif ntype == 'IMAGE':
|
||||
alt = "" if node.get("altText") is None else node.get("altText")
|
||||
cap = ""
|
||||
alt = '' if node.get('altText') is None else node.get('altText')
|
||||
cap = ''
|
||||
if node.get('caption'):
|
||||
if node['caption'].get('textHtml') is not None:
|
||||
cap = node['caption']['textHtml']
|
||||
@ -112,7 +112,7 @@ class econ_search(BasicNewsRecipe):
|
||||
title = 'The Economist - Search'
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
__author__ = "unkn0wn"
|
||||
__author__ = 'unkn0wn'
|
||||
description = (
|
||||
'Use the Advanced section of the recipe to search.'
|
||||
)
|
||||
@ -128,7 +128,7 @@ class econ_search(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
|
||||
dict(attrs={'aria-label': "Article Teaser"}),
|
||||
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||
dict(attrs={'id':'player'}),
|
||||
dict(attrs={
|
||||
'class': [
|
||||
|
@ -23,8 +23,8 @@ def process_node(node):
|
||||
return f'<p>{node.get("textHtml")}</p>'
|
||||
return f'<p>{node.get("text", "")}</p>'
|
||||
elif ntype == 'IMAGE':
|
||||
alt = "" if node.get("altText") is None else node.get("altText")
|
||||
cap = ""
|
||||
alt = '' if node.get('altText') is None else node.get('altText')
|
||||
cap = ''
|
||||
if node.get('caption'):
|
||||
if node['caption'].get('textHtml') is not None:
|
||||
cap = node['caption']['textHtml']
|
||||
@ -122,7 +122,7 @@ class EconomistWorld(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||
|
||||
__author__ = "unkn0wn"
|
||||
__author__ = 'unkn0wn'
|
||||
description = (
|
||||
'The World Ahead is The Economist’s future-gazing publication. It prepares audiences for what is to '
|
||||
'come with mind-stretching insights and expert analysis—all in The Economist’s clear, elegant style.'
|
||||
@ -136,7 +136,7 @@ class EconomistWorld(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer', 'svg']),
|
||||
dict(attrs={'aria-label': "Article Teaser"}),
|
||||
dict(attrs={'aria-label': 'Article Teaser'}),
|
||||
dict(attrs={'id': 'player'}),
|
||||
dict(attrs={
|
||||
'class': [
|
||||
@ -205,24 +205,24 @@ class EconomistWorld(BasicNewsRecipe):
|
||||
return self.economist_return_index(ans)
|
||||
|
||||
def economist_parse_index(self, soup):
|
||||
script_tag = soup.find("script", id="__NEXT_DATA__")
|
||||
script_tag = soup.find('script', id='__NEXT_DATA__')
|
||||
if script_tag is not None:
|
||||
data = json.loads(script_tag.string)
|
||||
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
|
||||
self.title = safe_dict(data, "props", "pageProps", "content", "headline")
|
||||
self.title = safe_dict(data, 'props', 'pageProps', 'content', 'headline')
|
||||
self.cover_url = 'https://mma.prnewswire.com/media/2561745/The_Economist_World_Ahead_2025_cover.jpg?w=600'
|
||||
|
||||
feeds = []
|
||||
|
||||
for coll in safe_dict(data, "props", "pageProps", "content", "components"):
|
||||
section = safe_dict(coll, "headline") or ''
|
||||
for coll in safe_dict(data, 'props', 'pageProps', 'content', 'components'):
|
||||
section = safe_dict(coll, 'headline') or ''
|
||||
self.log(section)
|
||||
articles = []
|
||||
for part in safe_dict(coll, "items"):
|
||||
title = safe_dict(part, "headline") or ''
|
||||
url = process_url(safe_dict(part, "url") or '')
|
||||
desc = safe_dict(part, "rubric") or ''
|
||||
sub = safe_dict(part, "flyTitle") or ''
|
||||
for part in safe_dict(coll, 'items'):
|
||||
title = safe_dict(part, 'headline') or ''
|
||||
url = process_url(safe_dict(part, 'url') or '')
|
||||
desc = safe_dict(part, 'rubric') or ''
|
||||
sub = safe_dict(part, 'flyTitle') or ''
|
||||
if sub and section != sub:
|
||||
desc = sub + ' :: ' + desc
|
||||
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
|
@ -164,24 +164,24 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
continue
|
||||
break
|
||||
if daysback == 7:
|
||||
self.log("\nCover unavailable")
|
||||
self.log('\nCover unavailable')
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self, string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91", "‘", string)
|
||||
fixed = re.sub('\x91', '‘', string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92", "’", fixed)
|
||||
fixed = re.sub('\x92', '’', fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93", "“", fixed)
|
||||
fixed = re.sub('\x93', '“', fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94", "”", fixed)
|
||||
fixed = re.sub('\x94', '”', fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96", "–", fixed)
|
||||
fixed = re.sub('\x96', '–', fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97", "—", fixed)
|
||||
fixed = re.sub("’", "’", fixed)
|
||||
fixed = re.sub('\x97', '—', fixed)
|
||||
fixed = re.sub('’', '’', fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
@ -262,10 +262,10 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
if url.startswith('/'):
|
||||
url = self.url_prefix + url
|
||||
if not url.startswith(self.url_prefix):
|
||||
print("Rejected " + url)
|
||||
print('Rejected ' + url)
|
||||
return
|
||||
if url in self.url_list:
|
||||
print("Rejected dup " + url)
|
||||
print('Rejected dup ' + url)
|
||||
return
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(atag, False)
|
||||
@ -277,8 +277,8 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
return
|
||||
dtag = adiv.find('div', 'content')
|
||||
description = ''
|
||||
print("URL " + url)
|
||||
print("TITLE " + title)
|
||||
print('URL ' + url)
|
||||
print('TITLE ' + title)
|
||||
if dtag is not None:
|
||||
stag = dtag.span
|
||||
if stag is not None:
|
||||
@ -286,18 +286,18 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
description = self.tag_to_string(stag, False)
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
print('DESCRIPTION: ' + description)
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
|
||||
def parse_web_index(key, keyurl):
|
||||
print("Section: " + key + ': ' + self.url_prefix + keyurl)
|
||||
print('Section: ' + key + ': ' + self.url_prefix + keyurl)
|
||||
try:
|
||||
soup = self.index_to_soup(self.url_prefix + keyurl)
|
||||
except:
|
||||
print("Section: " + key + ' NOT FOUND')
|
||||
print('Section: ' + key + ' NOT FOUND')
|
||||
return
|
||||
ans.append(key)
|
||||
mainsoup = soup.find('div', 'bodywrapper')
|
||||
|
@ -20,12 +20,12 @@ class AdvancedUserRecipe1311790237(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
||||
"""
|
||||
'''
|
||||
|
||||
feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'),
|
||||
(u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'),
|
||||
|
@ -55,9 +55,9 @@ class RevistaElCultural(BasicNewsRecipe):
|
||||
if url.startswith('/version_papel/' + titleSection + '/'):
|
||||
url = 'http://www.elcultural.es' + url
|
||||
|
||||
self.log('\t\tFound article:', title[0:title.find("|") - 1])
|
||||
self.log('\t\tFound article:', title[0:title.find('|') - 1])
|
||||
self.log('\t\t\t', url)
|
||||
current_articles.append({'title': title[0:title.find("|") - 1], 'url': url,
|
||||
current_articles.append({'title': title[0:title.find('|') - 1], 'url': url,
|
||||
'description': '', 'date': ''})
|
||||
|
||||
return current_articles
|
||||
|
@ -1,51 +1,51 @@
|
||||
# -*- mode: python; coding: utf-8; -*-
|
||||
# vim: set syntax=python fileencoding=utf-8
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2023, Tomás Di Domenico <tdido at tdido.eu>"
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2023, Tomás Di Domenico <tdido at tdido.eu>'
|
||||
|
||||
"""
|
||||
'''
|
||||
www.eldiplo.org
|
||||
"""
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ElDiplo2023(BasicNewsRecipe):
|
||||
title = "Le Monde Diplomatique - cono sur"
|
||||
__author__ = "Tomás Di Domenico"
|
||||
description = "Publicación de Le Monde Diplomatique para el cono sur."
|
||||
publisher = "Capital Intelectual"
|
||||
category = "News, Politics, Argentina, Uruguay, Paraguay, South America, World"
|
||||
title = 'Le Monde Diplomatique - cono sur'
|
||||
__author__ = 'Tomás Di Domenico'
|
||||
description = 'Publicación de Le Monde Diplomatique para el cono sur.'
|
||||
publisher = 'Capital Intelectual'
|
||||
category = 'News, Politics, Argentina, Uruguay, Paraguay, South America, World'
|
||||
oldest_article = 31
|
||||
no_stylesheets = True
|
||||
encoding = "utf8"
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = "es_AR"
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = "magazine"
|
||||
publication_type = 'magazine'
|
||||
delay = 1
|
||||
simultaneous_downloads = 1
|
||||
timeout = 8
|
||||
needs_subscription = True
|
||||
ignore_duplicate_articles = {"url"}
|
||||
ignore_duplicate_articles = {'url'}
|
||||
temp_files = []
|
||||
fetch_retries = 10
|
||||
handle_gzip = True
|
||||
compress_news_images = True
|
||||
scale_news_images_to_device = True
|
||||
masthead_url = (
|
||||
"https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png"
|
||||
'https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png'
|
||||
)
|
||||
INDEX = "https://www.eldiplo.org/"
|
||||
INDEX = 'https://www.eldiplo.org/'
|
||||
|
||||
conversion_options = {"series": "El Dipló", "publisher": publisher, "base_font_size": 8, "tags": category}
|
||||
conversion_options = {'series': 'El Dipló', 'publisher': publisher, 'base_font_size': 8, 'tags': category}
|
||||
|
||||
keep_only_tags = [dict(name=["article"])]
|
||||
keep_only_tags = [dict(name=['article'])]
|
||||
|
||||
remove_tags = [dict(name=["button"])]
|
||||
remove_tags = [dict(name=['button'])]
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
.entry-title {
|
||||
text-align: center;
|
||||
}
|
||||
@ -67,59 +67,59 @@ class ElDiplo2023(BasicNewsRecipe):
|
||||
padding-left: 10%;
|
||||
padding-right: 10%;
|
||||
}
|
||||
"""
|
||||
'''
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.select_form(id="loginform")
|
||||
br["log"] = self.username
|
||||
br["pwd"] = self.password
|
||||
br.select_form(id='loginform')
|
||||
br['log'] = self.username
|
||||
br['pwd'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def get_cover_url(self):
|
||||
soup_index = self.index_to_soup(self.INDEX)
|
||||
tag_sumario = soup_index.find("span", text="Sumario")
|
||||
url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"]
|
||||
tag_sumario = soup_index.find('span', text='Sumario')
|
||||
url_sumario = 'https://www.eldiplo.org' + tag_sumario.parent['href']
|
||||
|
||||
soup = self.index_to_soup(url_sumario)
|
||||
|
||||
container = soup.find("div", class_="px-16")
|
||||
url = container.find("img")["src"]
|
||||
container = soup.find('div', class_='px-16')
|
||||
url = container.find('img')['src']
|
||||
|
||||
return getattr(self, "cover_url", url)
|
||||
return getattr(self, 'cover_url', url)
|
||||
|
||||
def _process_article(self, article):
|
||||
url = article.find("a", href=True, attrs={"class": "title"})["href"]
|
||||
title = self.tag_to_string(article).replace("Editorial", "Editorial: ")
|
||||
url = article.find('a', href=True, attrs={'class': 'title'})['href']
|
||||
title = self.tag_to_string(article).replace('Editorial', 'Editorial: ')
|
||||
try:
|
||||
title, authors = title.split(", por")
|
||||
authors = f"por {authors}"
|
||||
title, authors = title.split(', por')
|
||||
authors = f'por {authors}'
|
||||
except ValueError:
|
||||
authors = ""
|
||||
self.log("title: ", title, " url: ", url)
|
||||
return {"title": title, "url": url, "description": authors, "date": ""}
|
||||
authors = ''
|
||||
self.log('title: ', title, ' url: ', url)
|
||||
return {'title': title, 'url': url, 'description': authors, 'date': ''}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
font_size = "90%"
|
||||
font_size = '90%'
|
||||
|
||||
# make the footnotes smaller
|
||||
for p in soup.find("div", id="nota_pie").findChildren("p", recursive=False):
|
||||
p["style"] = f"font-size: {font_size};"
|
||||
for p in soup.find('div', id='nota_pie').findChildren('p', recursive=False):
|
||||
p['style'] = f'font-size: {font_size};'
|
||||
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
soup_index = self.index_to_soup(self.INDEX)
|
||||
|
||||
tag_sumario = soup_index.find("span", text="Sumario")
|
||||
tag_sumario = soup_index.find('span', text='Sumario')
|
||||
|
||||
if tag_sumario is None:
|
||||
return None
|
||||
|
||||
url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"]
|
||||
url_sumario = 'https://www.eldiplo.org' + tag_sumario.parent['href']
|
||||
self.log(url_sumario)
|
||||
|
||||
soup_sumario = self.index_to_soup(url_sumario)
|
||||
@ -128,20 +128,20 @@ class ElDiplo2023(BasicNewsRecipe):
|
||||
articles = []
|
||||
dossiers = []
|
||||
|
||||
sumario = soup_sumario.find("div", class_="sumario")
|
||||
sumario = soup_sumario.find('div', class_='sumario')
|
||||
|
||||
for section in sumario.find_all("div", recursive=False):
|
||||
classes = section.attrs["class"]
|
||||
for section in sumario.find_all('div', recursive=False):
|
||||
classes = section.attrs['class']
|
||||
|
||||
if "dossier" in classes:
|
||||
dtitle = self.tag_to_string(section.find("h3"))
|
||||
if 'dossier' in classes:
|
||||
dtitle = self.tag_to_string(section.find('h3'))
|
||||
darticles = []
|
||||
for article in section.find_all("div", recursive=False):
|
||||
for article in section.find_all('div', recursive=False):
|
||||
darticles.append(self._process_article(article))
|
||||
dossiers.append((dtitle, darticles))
|
||||
else:
|
||||
articles.append(self._process_article(section))
|
||||
feeds.append(("Artículos", articles))
|
||||
feeds.append(('Artículos', articles))
|
||||
feeds += dossiers
|
||||
|
||||
return feeds
|
||||
|
@ -119,11 +119,11 @@ div.a_md_a {text-align: center; text-transform: uppercase; font-size: .8rem;}
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
self.log('\nCover unavailable')
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
splitUrl = url.split("cloudfront-")
|
||||
splitUrl = url.split('cloudfront-')
|
||||
parsedUrl = 'https://cloudfront-' + splitUrl[1]
|
||||
return parsedUrl
|
||||
|
@ -36,7 +36,7 @@ class ElPaisBabelia(BasicNewsRecipe):
|
||||
title = self.tag_to_string(post)
|
||||
if str(post).find('class=') > 0:
|
||||
klass = post['class']
|
||||
if klass != "":
|
||||
if klass != '':
|
||||
self.log()
|
||||
self.log('--> post: ', post)
|
||||
self.log('--> url: ', url)
|
||||
|
@ -28,12 +28,12 @@ class elcohetealaluna(BasicNewsRecipe):
|
||||
compress_news_images = True
|
||||
masthead_url = 'https://www.elcohetealaluna.com/wp-content/uploads/2018/06/logo-menu.png'
|
||||
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: Georgia, Times, "Times New Roman", serif}
|
||||
h1,h2,.post-author-name{font-family: Oswald, sans-serif}
|
||||
h2{color: gray}
|
||||
img{margin-top:1em; margin-bottom: 1em; display:block}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -28,10 +28,10 @@ class ElCronistaArg(BasicNewsRecipe):
|
||||
auto_cleanup_keep = '//div[@class="header-bottom"] | //h1 | //h2'
|
||||
ignore_duplicate_articles = {'url'}
|
||||
masthead_url = 'https://www.cronista.com/export/sites/diarioelcronista/arte/v2/lg_cronista_footer.png_665574830.png'
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: 'Source Sans Pro', sans-serif}
|
||||
h1,h2,h3,h4{font-family: 'Libre Baskerville', serif}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
|
@ -29,5 +29,5 @@ class Elektroda(BasicNewsRecipe):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
article.title = article.title[article.title.find("::") + 3:]
|
||||
article.title = article.title[article.title.find('::') + 3:]
|
||||
return feeds
|
||||
|
@ -35,14 +35,14 @@ class ElMundo(BasicNewsRecipe):
|
||||
articles_are_obfuscated = True
|
||||
auto_cleanup = True
|
||||
temp_files = []
|
||||
extra_css = """
|
||||
extra_css = '''
|
||||
body{font-family: "PT serif",Georgia,serif,times}
|
||||
.metadata_noticia{font-size: small}
|
||||
.pestana_GDP{font-size: small; font-weight:bold}
|
||||
h1 {color: #333333; font-family: "Clear Sans Bold",Arial,sans-serif,helvetica}
|
||||
.hora{color: red}
|
||||
.update{color: gray}
|
||||
"""
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
@ -83,14 +83,14 @@ class ElMundo(BasicNewsRecipe):
|
||||
cover = self.masthead_url
|
||||
st = time.localtime()
|
||||
year = str(st.tm_year)
|
||||
month = "%.2d" % st.tm_mon
|
||||
day = "%.2d" % st.tm_mday
|
||||
month = '%.2d' % st.tm_mon
|
||||
day = '%.2d' % st.tm_mday
|
||||
cover = 'http://img.kiosko.net/' + year + '/' + \
|
||||
month + '/' + day + '/es/elmundo.750.jpg'
|
||||
try:
|
||||
self.browser.open(cover)
|
||||
except:
|
||||
self.log("\nPortada no disponible")
|
||||
self.log('\nPortada no disponible')
|
||||
return cover
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
@ -103,7 +103,7 @@ class ElMundo(BasicNewsRecipe):
|
||||
html = response.read()
|
||||
count = tries
|
||||
except:
|
||||
print("Retrying download...")
|
||||
print('Retrying download...')
|
||||
count += 1
|
||||
if html is not None:
|
||||
tfile = PersistentTemporaryFile('_fa.html')
|
||||
|
@ -66,7 +66,7 @@ class ElPeriodico_cat(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
('http-equiv', 'Content-Type'), ('content', 'text/html; charset=utf-8')])
|
||||
soup.head.insert(0, mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
|
@ -18,18 +18,18 @@ class En_Globes_Recipe(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [
|
||||
(u"Main Headlines", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942"),
|
||||
(u"Israeli stocks on Wall Street", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392"),
|
||||
(u"All news", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725"),
|
||||
(u"Macro economics", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389"),
|
||||
(u"Aerospace and defense", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380"),
|
||||
(u"Real estate", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385"),
|
||||
(u"Energy and water", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382"),
|
||||
(u"Start-ups and venture capital", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397"),
|
||||
(u"Financial services", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383"),
|
||||
(u"Tel Aviv markets", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404"),
|
||||
(u"Healthcare", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377"),
|
||||
(u"Telecommunications", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386"),
|
||||
(u"Information technology", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376"),
|
||||
(u"Transport and infrastructure", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388"),
|
||||
(u'Main Headlines', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942'),
|
||||
(u'Israeli stocks on Wall Street', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392'),
|
||||
(u'All news', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725'),
|
||||
(u'Macro economics', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389'),
|
||||
(u'Aerospace and defense', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380'),
|
||||
(u'Real estate', u'https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385'),
|
||||
(u'Energy and water', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382'),
|
||||
(u'Start-ups and venture capital', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397'),
|
||||
(u'Financial services', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383'),
|
||||
(u'Tel Aviv markets', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404'),
|
||||
(u'Healthcare', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377'),
|
||||
(u'Telecommunications', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386'),
|
||||
(u'Information technology', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376'),
|
||||
(u'Transport and infrastructure', u'https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388'),
|
||||
]
|
||||
|
@ -87,8 +87,8 @@ class Engadget(BasicNewsRecipe):
|
||||
except KeyError:
|
||||
continue
|
||||
# Reorder the "title" and "content" elements
|
||||
title_div = soup.find("div", {"class": "caas-title-wrapper"})
|
||||
content_div = soup.find("div", {"class": "caas-content-wrapper"})
|
||||
title_div = soup.find('div', {'class': 'caas-title-wrapper'})
|
||||
content_div = soup.find('div', {'class': 'caas-content-wrapper'})
|
||||
if title_div and content_div:
|
||||
soup.body.clear()
|
||||
soup.body.append(title_div)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user