various whitespace (extra-edit)

!partial 'E203,E222,E241,E271,E272'
This commit is contained in:
un-pogaz 2025-01-24 11:14:24 +01:00
parent 41cee6f02d
commit ed2930712d
386 changed files with 2666 additions and 2679 deletions

View File

@ -61,7 +61,7 @@ if use_archive:
data = json.loads(raw)
body = root.xpath('//body')[0]
article = E(body, 'article')
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
try:
@ -96,7 +96,7 @@ else:
for child in tuple(body):
body.remove(child)
article = E(body, 'article')
E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;')
E(article, 'div', replace_entities(data['subheadline']), style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', replace_entities(data['headline']))
E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
if data['dateline'] is None:

View File

@ -32,7 +32,7 @@ class aktualneRecipe(BasicNewsRecipe):
remove_attributes = []
remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']})
filter_regexps = [r'img.aktualne.centrum.cz']
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}),
dict(name='div', attrs={'class': 'itemcomment id0'}),
dict(name='div', attrs={'class': 'hlavicka'}),

View File

@ -55,7 +55,7 @@ class AlJazeera(BasicNewsRecipe):
u'http://www.aljazeera.com/xml/rss/all.xml')]
def get_article_url(self, article):
artlurl = article.get('link', None)
artlurl = article.get('link', None)
return artlurl
def preprocess_html(self, soup):

View File

@ -58,7 +58,7 @@ class AM730(BasicNewsRecipe):
articles = []
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
href = aTag.get('href',False)
if not href.encode('utf-8').startswith(url.encode('utf-8')) :
if not href.encode('utf-8').startswith(url.encode('utf-8')):
continue # not in same section
title = href.split('/')[-1].split('-')[0]

View File

@ -119,7 +119,7 @@ class barrons(BasicNewsRecipe):
byl = articles.find(**prefixed_classes('BarronsTheme--byline--'))
if byl:
desc += self.tag_to_string(byl)
ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--'))
ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--'))
if ttr:
desc += self.tag_to_string(ttr)
summ = articles.find(**prefixed_classes('BarronsTheme--summary--'))

View File

@ -26,7 +26,7 @@ class BeforeWeGo(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'id': 'author-bio'})
# remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'nectar-scrolling-text font_size_10vh custom_color has-custom-divider'}),
dict(name='span', attrs={'class': 'meta-comment-count'}),
dict(name='p', attrs={'id': 'breadcrumbs'})

View File

@ -33,9 +33,9 @@ class bleskRecipe(BasicNewsRecipe):
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})
remove_tags_after = dict(name='div', attrs={'class': ['artAuthors']})
remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}),
dict(name='div', attrs={'id': ['partHeader']}),
dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})]
remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}),
dict(name='div', attrs={'id': ['partHeader']}),
dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})]
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*',
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]

View File

@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''})
if divtags:
for div in divtags:
del (div['id'])
del div['id']
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps

View File

@ -22,7 +22,7 @@ class Cherta(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class':'single-page__footer-info'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'single-content-link'}),
dict(name='div', attrs={'class': 'single-page__footer-info_links clearfix'}),
dict(name='div', attrs={'class': 'single-article-tags-wrapper'})

View File

@ -85,11 +85,11 @@ class Clarin(BasicNewsRecipe):
self.oldest_article = float(d)
keep_only_tags = [
dict(name='p' , attrs={'class' : 'volanta'}),
dict(name='h1' , attrs={'id': 'title'}),
dict(name='div', attrs={'class' : 'bajada'}),
dict(name='div', attrs={'id' : 'galeria-trigger'}),
dict(name='div', attrs={'class' : 'body-nota'})
dict(name='p', attrs={'class': 'volanta'}),
dict(name='h1', attrs={'id': 'title'}),
dict(name='div', attrs={'class': 'bajada'}),
dict(name='div', attrs={'id': 'galeria-trigger'}),
dict(name='div', attrs={'class': 'body-nota'})
]

View File

@ -22,7 +22,7 @@ class Coda(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': 'article'})
remove_tags = [
remove_tags = [
dict(name='li', attrs={'class': 'material-meta__type'}),
dict(name='div', attrs={'class': 'more'})
]

View File

@ -25,7 +25,7 @@ class ComputerWeekly(BasicNewsRecipe):
('Financial services IT news', 'https://www.computerweekly.com/rss/Financial-services-IT-news.xml'),
('Public sector IT news', 'https://www.computerweekly.com/rss/Public-sector-IT-news.xml'),
('Enterprise software', 'https://www.computerweekly.com/rss/Enterprise-software.xml'),
('SME IT news' , 'https://www.computerweekly.com/rss/SME-IT-news.xml'),
('SME IT news', 'https://www.computerweekly.com/rss/SME-IT-news.xml'),
('Datacenter and cloud computing', 'https://www.computerweekly.com/rss/Datacentre-and-cloud-computing.xml'),
('Storage', 'https://www.computerweekly.com/rss/Storage.xml'),
('Information Management', 'https://www.computerweekly.com/rss/Information-management.xml'),

View File

@ -27,25 +27,18 @@ class AdvancedUserRecipe(BasicNewsRecipe):
remove_tags_after = dict(name='p', attrs={'class': ['firma-redazione']})
feeds = [
(u'Politica',
u'http://contropiano.org/news/politica-news/feed'),
(u'Internazionale',
u'http://contropiano.org/news/internazionale-news/feed'),
(u'Aggiornamenti in breve', u'http://contropiano.org/news/aggiornamenti-in-breve/feed'),
(u'Economia',
u'http://contropiano.org/news/news-economia/feed'),
(u'Ambiente',
u'http://contropiano.org/news/ambiente-news/feed'),
(u'Scienza',
u'http://contropiano.org/news/scienza-news/feed'),
(u'Cultura',
u'http://contropiano.org/news/cultura-news/feed'),
(u'Politica', u'http://contropiano.org/news/politica-news/feed'),
(u'Internazionale', u'http://contropiano.org/news/internazionale-news/feed'),
(u'Aggiornamenti in breve', u'http://contropiano.org/news/aggiornamenti-in-breve/feed'),
(u'Economia', u'http://contropiano.org/news/news-economia/feed'),
(u'Ambiente', u'http://contropiano.org/news/ambiente-news/feed'),
(u'Scienza', u'http://contropiano.org/news/scienza-news/feed'),
(u'Cultura', u'http://contropiano.org/news/cultura-news/feed'),
(u'Locali', u'http://contropiano.org/regionali/feed'),
(u'Lavoro', u'http://contropiano.org/news/lavoro-conflitto-news/feed'),
(u'Malapolizia', u'http://contropiano.org/news/malapolizia-news/feed'),
(u'Malapolizia', u'http://contropiano.org/news/malapolizia-news/feed'),
(u'Interventi', u'http://contropiano.org/interventi/feed'),
(u'Documenti', u'http://contropiano.org/documenti/feed'),
(u'Vignette', u'http://contropiano.org/vignette/feed'),
(u'Altro',
u'http://contropiano.org/altro/feed')
(u'Altro', u'http://contropiano.org/altro/feed')
]

View File

@ -31,8 +31,8 @@ class Cumhuriyet(BasicNewsRecipe):
]
feeds = [
('Gundem', 'https://www.cumhuriyet.com.tr/rss/9999'),
('Dünya', 'https://www.cumhuriyet.com.tr/rss/4'),
('Gundem', 'https://www.cumhuriyet.com.tr/rss/9999'),
('Dünya', 'https://www.cumhuriyet.com.tr/rss/4'),
('Türkiye', 'https://www.cumhuriyet.com.tr/rss/3'),
('Ekonomi', 'https://www.cumhuriyet.com.tr/rss/5'),
('Kultur Sanat', 'https://www.cumhuriyet.com.tr/rss/6'),

View File

@ -63,7 +63,7 @@ class DeGentenaarOnline(BasicNewsRecipe):
return url.replace('/Detail.aspx?articleid', '/PrintArticle.aspx?ArticleID')
def get_article_url(self, article):
return article.get('guid', None)
return article.get('guid', None)
def preprocess_html(self, soup):
del soup.body['onload']

View File

@ -25,4 +25,4 @@ class denikReferendumRecipe(BasicNewsRecipe):
remove_tags = [dict(name='div', attrs={'class': ['box boxLine', 'box noprint', 'box']}),
dict(name='h3', attrs={'class': 'head alt'})]
keep_only_tags = [dict(name='div', attrs={'id': ['content']})]
keep_only_tags = [dict(name='div', attrs={'id': ['content']})]

View File

@ -29,7 +29,7 @@ class AdvancedUserRecipe1432200863(BasicNewsRecipe):
max_articles_per_feed = 100
auto_cleanup = False
extra_css = '''
extra_css = '''
h1, h2 {font-size: 1.6em; text-align: left}
.article-header-description {font-size: 1em; font-style: italic; font-weight: normal;margin-bottom: 1em}
.b-image-figure, .caption-figure.is-left, .b-image-credits {font-size: .75em; font-weight: normal;margin-bottom: .75em}

View File

@ -24,7 +24,7 @@ class WiComix(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': 'article__body'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'article__reference article__reference_header'}),
dict(name='div', attrs={'class': 'my-lg-5'}),
dict(name='div', attrs={'class': 'video '}),

View File

@ -22,7 +22,7 @@ class EchoMsk(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='span', attrs={'class': 'sc-7b4cbb79-0 guzUFC'}),
dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}),
dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'})

View File

@ -57,7 +57,7 @@ def load_article_from_json(raw, root):
data = json.loads(raw)
body = root.xpath('//body')[0]
article = E(body, 'article')
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
E(article, 'div', data['byline'], style='font-style: italic; color:#202020;')

View File

@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''})
if divtags:
for div in divtags:
del (div['id'])
del div['id']
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps

View File

@ -114,7 +114,7 @@ div.a_md_a {text-align: center; text-transform: uppercase; font-size: .8rem;}
from datetime import date
cover = ('https://srv00.epimg.net/pdf/elpais/snapshot/' +
str(date.today().year) + '/' + date.today().strftime('%m') + '/elpais/' +
str(date.today().year) + date.today().strftime('%m') + date.today().strftime('%d') + 'Big.jpg')
str(date.today().year) + date.today().strftime('%m') + date.today().strftime('%d') + 'Big.jpg')
br = BasicNewsRecipe.get_browser(self)
try:
br.open(cover)

View File

@ -13,7 +13,7 @@ class EpochTimes(BasicNewsRecipe):
max_articles_per_feed = 20
ignore_duplicate_articles = {'url'}
remove_attributes = ['height', 'width', 'style']
remove_empty_feeds = True
remove_empty_feeds = True
no_stylesheets = True
resolve_internal_links = True
masthead_url = 'https://epochtimes-ny.newsmemory.com/eeLayout/epochtimes/1.0.a/images/webapp/banner.png'

View File

@ -45,7 +45,7 @@ class FastCompany(BasicNewsRecipe):
feeds = [(u'All News', u'http://feeds.feedburner.com/fastcompany/headlines')]
def get_article_url(self, article):
return article.get('guid', None)
return article.get('guid', None)
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang

View File

@ -93,24 +93,24 @@ class FazNet(BasicNewsRecipe):
# original by Armin Geller
# overhaul to deal with changes in the faz.net websites
title = 'FAZ.NET'
__author__ = 'Unknown'
description = 'Frankfurter Allgemeine Zeitung'
publisher = 'Frankfurter Allgemeine Zeitung GmbH'
category = 'news, politics, Germany'
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/7/72/Frankfurter_Allgemeine_logo.svg'
encoding = 'utf-8'
language = 'de'
ignore_duplicate_articles = {'title', 'url'}
max_articles_per_feed = 30
no_stylesheets = True
remove_javascript = True
scale_news_images = (10,100)
delay = 1
title = 'FAZ.NET'
__author__ = 'Unknown'
description = 'Frankfurter Allgemeine Zeitung'
publisher = 'Frankfurter Allgemeine Zeitung GmbH'
category = 'news, politics, Germany'
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/7/72/Frankfurter_Allgemeine_logo.svg'
encoding = 'utf-8'
language = 'de'
ignore_duplicate_articles = {'title', 'url'}
max_articles_per_feed = 30
no_stylesheets = True
remove_javascript = True
scale_news_images = (10,100)
delay = 1
test_feed = 'https://www.faz.net/rss/aktuell/feuilleton/kunst-und-architektur/berlinische-galerie-zeigt-edvard-munch-die-ganze-gefuehlsskala-des-lebens-19180631.html?printPagedArticle=true#pageIndex_2'
extra_css = '''
extra_css = '''
.header-title,.scrolly-title {font-size: 1.5em; font-weight:bold; text-align:left;}
.quote {font-size: 1.5em; font-weight:bold; text-align:center;}
.author {font-size: 0.7em; font-weight:bold; text-align:center; display:block;

View File

@ -26,7 +26,7 @@ class AdvancedUserRecipe1313693926(BasicNewsRecipe):
max_articles_per_feed = 50
auto_cleanup = False
feeds = [
feeds = [
(u'Inhalt:', u'https://www.fluter.de/rss.xml')
]
@ -38,6 +38,6 @@ class AdvancedUserRecipe1313693926(BasicNewsRecipe):
dict(name='h2', attrs={'class':'element-invisible'})
]
extra_css = '''
extra_css = '''
.field-group-format, .group_additional_info, .additional-info {display: inline-block; min-width: 8rem; text-align: center}
'''

View File

@ -125,7 +125,7 @@ img { background: none !important; float: none; margin: 0px; }
break
elif strpost.startswith('<a href'):
url = post['href']
if url.startswith(('http://www1.folha.uol.com.br/', 'https://www1.folha.uol.com.br/')) :
if url.startswith(('http://www1.folha.uol.com.br/', 'https://www1.folha.uol.com.br/')):
title = self.tag_to_string(post)
self.log()
self.log('--> post: ', post)

View File

@ -25,7 +25,7 @@ class FootballUA(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'bottom-info'}),
dict(name='div', attrs={'class': 'social-buttons'})
]

View File

@ -188,7 +188,7 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
for by in soup.findAll(**classes('topper__byline topper__date font-style-italic')):
by.name = 'div'
for img in soup.find_all('img', attrs={'srcset': True}):
img['src'] = re.sub(r'_webp_small_\dx', '_webp_large_1x',img['srcset'].split()[0])
img['src'] = re.sub(r'_webp_small_\dx', '_webp_large_1x', img['srcset'].split()[0])
return soup
def get_browser(self):

View File

@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
# remove_tags = [
# remove_tags = [
# dict(name='div', attrs={'class': 'footer-content'}),
# ]

View File

@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
# remove_tags = [
# remove_tags = [
# dict(name='div', attrs={'class': 'footer-content'}),
# ]

View File

@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
# remove_tags = [
# remove_tags = [
# dict(name='div', attrs={'class': 'footer-content'}),
# ]

View File

@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
# remove_tags = [
# remove_tags = [
# dict(name='div', attrs={'class': 'footer-content'}),
# ]

View File

@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
# remove_tags = [
# remove_tags = [
# dict(name='div', attrs={'class': 'footer-content'}),
# ]

View File

@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
# remove_tags = [
# remove_tags = [
# dict(name='div', attrs={'class': 'footer-content'}),
# ]

View File

@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
# remove_tags = [
# remove_tags = [
# dict(name='div', attrs={'class': 'footer-content'}),
# ]

View File

@ -33,7 +33,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
# remove_tags = [
# remove_tags = [
# dict(name='div', attrs={'class': 'footer-content'}),
# ]

View File

@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
# Ukrainian version only
remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'footer-content'}),
]

View File

@ -23,7 +23,7 @@ class GazetaUA(BasicNewsRecipe):
remove_tags_after = dict(name='section', attrs={'class': 'article-content clearfix'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'mt5'}),
dict(name='div', attrs={'class': 'interview-block'}),
dict(name='p', attrs={'id': 'mce_0'}),

View File

@ -23,7 +23,7 @@ class GazetaUA(BasicNewsRecipe):
remove_tags_after = dict(name='section', attrs={'class': 'article-content clearfix'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'mt5'}),
dict(name='div', attrs={'class': 'interview-block'}),
dict(name='p', attrs={'id': 'mce_0'}),

View File

@ -21,7 +21,7 @@ class GeekCity(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'term-badges floated'}),
dict(name='div', attrs={'class': 'post-meta single-post-meta'}),
dict(name='div', attrs={'class': 'post-share single-post-share top-share clearfix style-1'}),

View File

@ -22,7 +22,7 @@ class Gorky(BasicNewsRecipe):
remove_tags_after = dict(name='footer')
remove_tags = [
remove_tags = [
dict(name='footer'),
dict(name='nav', attrs={'class': 'navbar'}),
dict(name='div', attrs={'class': 'hide'}),

View File

@ -28,7 +28,7 @@ class LiveHindustan(BasicNewsRecipe):
remove_tags_after = [classes('stry-bdy')]
feeds = [
('प्रमुख खबरें' ,'https://feed.livehindustan.com/rss/3127'),
('प्रमुख खबरें', 'https://feed.livehindustan.com/rss/3127'),
('देश', 'https://feed.livehindustan.com/rss/4911'),
('विदेश', 'https://feed.livehindustan.com/rss/4913'),
('ओपिनियन', 'https://feed.livehindustan.com/rss/5165'),

View File

@ -26,9 +26,8 @@ class iHeuteRecipe(BasicNewsRecipe):
no_stylesheets = True
remove_attributes = ['width', 'height']
remove_tags = [dict(name='div', attrs={'id': ['zooming']}),
dict(name='div', attrs={
'class': ['related', 'mapa-wrapper']}),
remove_tags = [dict(name='div', attrs={'id': ['zooming']}),
dict(name='div', attrs={'class': ['related', 'mapa-wrapper']}),
dict(name='table', attrs={'id': ['opener-img', 'portal']}),
dict(name='table', attrs={'class': ['video-16ku9']})]
remove_tags_after = [

View File

@ -10,29 +10,21 @@ class AdvancedUserRecipe1286477122(BasicNewsRecipe):
__author__ = 'egilh'
feeds = [
(u'Politica & Palazzo',
u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'),
(u'Giustizia & impunit\xe0',
u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'),
(u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'),
(u'Economia & Lobby',
u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'),
(u'Lavoro & precari',
u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'),
(u'Ambiente & Veleni',
u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'),
(u'Sport & miliardi',
u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'),
(u'Politica & Palazzo', u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'),
(u'Giustizia & impunit\xe0', u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'),
(u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'),
(u'Economia & Lobby', u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'),
(u'Lavoro & precari', u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'),
(u'Ambiente & Veleni', u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'),
(u'Sport & miliardi', u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'),
(u'Cronaca', u'http://www.ilfattoquotidiano.it/category/cronaca/feed/'),
(u'Mondo', u'http://www.ilfattoquotidiano.it/category/mondo/feed/'),
(u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'),
(u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'),
(u'Scuola', u'http://www.ilfattoquotidiano.it/category/scuola/feed/'),
(u'Tecno', u'http://www.ilfattoquotidiano.it/category/tecno/feed/'),
(u'Terza pagina', u'http://www.ilfattoquotidiano.it/category/terza-pagina/feed/'),
(u'Piacere quotidiano',
u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'),
(u'Cervelli in fuga',
u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'),
(u'Piacere quotidiano', u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'),
(u'Cervelli in fuga', u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'),
(u'Documentati!', u'http://www.ilfattoquotidiano.it/category/documentati/feed/'),
(u'Misfatto', u'http://www.ilfattoquotidiano.it/category/misfatto/feed/')
]

View File

@ -36,7 +36,7 @@ class IlMessaggero(BasicNewsRecipe):
dict(name='h2', attrs={
'class': ['sottotitLettura', 'grigio16']}),
dict(name='span', attrs={'class': 'testoArticoloG'}),
dict(name='div', attrs={'id': 'testodim'})
dict(name='div', attrs={'id': 'testodim'})
]
def get_cover_url(self):

View File

@ -34,7 +34,7 @@ class IlManifesto(BasicNewsRecipe):
startSoup = self.index_to_soup(startUrl)
lastEdition = startSoup.findAll('div', id='accordion_inedicola')[
1].find('a')['href']
del (startSoup)
del startSoup
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
urlsplit = lastEdition.split('/')
self.manifesto_datestr = urlsplit[-1]
@ -106,5 +106,5 @@ class IlManifesto(BasicNewsRecipe):
summary = sommNode
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>" # noqa: E501
del (bs)
del bs
return template % dict(title=title, subtitle=subtitle, author=author, summary=summary, content=content)

View File

@ -115,6 +115,6 @@ class IndiaToday(BasicNewsRecipe):
imagecap = '<div id="imgcap">' + data['image_caption'] + '</div>'
html = '<html><body>' + slug + '<h1>' + title + '</h1>\n' + desc + '<div id="author">'\
+ author + '<span> ' + city + ' UPDATED: ' + date + '</span></div>\n' + image + imagecap + body\
+ author + '<span> ' + city + ' UPDATED: ' + date + '</span></div>\n' + image + imagecap + body\
+ '</body></html>'
return html

View File

@ -121,7 +121,7 @@ class IrishTimes(BasicNewsRecipe):
'sec-fetch-site': 'same-origin',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
}, data=urlencode({'username': self.username, 'password': self.password, 'deviceid':deviceid, 'persistent':'on', 'rid': ''}))
}, data=urlencode({'username': self.username, 'password': self.password, 'deviceid':deviceid, 'persistent':'on', 'rid': ''}))
r = br.open(rq)
raw = r.read()

View File

@ -47,7 +47,7 @@ class jotdown(BasicNewsRecipe):
dict(name='div', attrs={'id':'respond'})
]
remove_tags_after = dict(name='div' , attrs={'id':'respond'})
remove_tags_after = dict(name='div', attrs={'id':'respond'})
preprocess_regexps = [
# To change the small size of the text

View File

@ -26,7 +26,7 @@ class Computerra(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='ul', attrs={'class': 'breadcrumbs'}),
dict(name='div', attrs={'class': 'post-info__likes post-info-likes'}),
dict(name='div', attrs={'class': 'cta-row'}),

View File

@ -30,7 +30,7 @@ class kudyznudyRecipe(BasicNewsRecipe):
name='div', attrs={'class': ['C_WholeContentPadding']})
remove_tags_after = dict(
name='div', attrs={'class': ['SurroundingsContainer']})
remove_tags = [dict(name='div', attrs={
remove_tags = [dict(name='div', attrs={
'class': ['Details', 'buttons', 'SurroundingsContainer', 'breadcrumb']})]
keep_only_tags = []

View File

@ -111,7 +111,7 @@ class LaJornada_mx(BasicNewsRecipe):
def get_article_url(self, article):
# Get link to original article URL
rurl = article.get('guid', None)
rurl = article.get('guid', None)
if not rurl:
# Use the "link" attribute as failover
return article.get('link', None)

View File

@ -50,7 +50,7 @@ class LibertadDigital(BasicNewsRecipe):
]
def get_article_url(self, article):
return article.get('guid', None)
return article.get('guid', None)
def print_version(self, url):
art, sep, rest = url.rpartition('/')

View File

@ -12,7 +12,7 @@ def classes(classes):
def absolutize(href):
if href.startswith('/'):
href = 'https://www.lrb.co.uk' + href
href = 'https://www.lrb.co.uk' + href
return href

View File

@ -46,4 +46,4 @@ class Marca(BasicNewsRecipe):
return soup
def get_article_url(self, article):
return article.get('guid', None)
return article.get('guid', None)

View File

@ -95,7 +95,7 @@ class Mediapart(BasicNewsRecipe):
for feed in feeds:
feed_name = feed.title.lower()
for article in feed.articles:
if feed_name != 'autres' and feed_name not in article.url:
if feed_name != 'autres' and feed_name not in article.url:
feed.articles.remove(article)
if feed_name == 'autres' and any(section in article.url for section in self.sections):
feed.articles.remove(article)

View File

@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''})
if divtags:
for div in divtags:
del (div['id'])
del div['id']
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps

View File

@ -22,7 +22,7 @@ class MoscowTimes(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': 'article__tags'})
remove_tags = [
remove_tags = [
dict(name='aside'),
dict(name='footer'),
dict(name='section', attrs={'class': 'cluster'}),

View File

@ -60,7 +60,7 @@ class naszdziennik(BasicNewsRecipe):
article_title_datetime.find('h4'))
# zebrane elementy dodajemy do listy zadeklarowanej w linijce 44
articles[section].append(
{'title': article_title, 'url': article_url, 'date': article_date})
{'title': article_title, 'url': article_url, 'date': article_date})
# po dodaniu wszystkich artykułów dodajemy sekcje do listy feedów,
# korzystając z list sekcji znajdujących się w słowniku
for section in sections:

View File

@ -27,12 +27,12 @@ class NavyTimes(BasicNewsRecipe):
('Home page', 'https://www.navytimes.com/arc/outboundfeeds/rss/?outputType=xml'),
('News', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/news/?outputType=xml'),
('Your Navy', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/news/your-navy/?outputType=xml'),
('Your Army', 'https://www.armytimes.com/arc/outboundfeeds/rss/category/news/your-army/?outputType=xml'),
('Your Army', 'https://www.armytimes.com/arc/outboundfeeds/rss/category/news/your-army/?outputType=xml'),
('Your Air Force', 'https://www.airforcetimes.com/arc/outboundfeeds/rss/category/news/your-air-force?outputType=xml'),
('Your Marine Core', 'https://www.marinecorpstimes.com/arc/outboundfeeds/rss/category/news/your-marine-corps/?outputType=xml'),
('Pentagon and Congress', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/news/pentagon-congress/?outputType=xml'),
('Pay and Benefits', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/pay-benefits/?outputType=xml'),
('Veterans', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/veterans/?outputType=xml'),
('Education and Transition', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/education-transition/?outputType=xml'),
('Flashpoints', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/flashpoints/?outputType=xml'),
('Your Marine Core', 'https://www.marinecorpstimes.com/arc/outboundfeeds/rss/category/news/your-marine-corps/?outputType=xml'),
('Pentagon and Congress', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/news/pentagon-congress/?outputType=xml'),
('Pay and Benefits', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/pay-benefits/?outputType=xml'),
('Veterans', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/veterans/?outputType=xml'),
('Education and Transition', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/education-transition/?outputType=xml'),
('Flashpoints', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/flashpoints/?outputType=xml'),
]

View File

@ -29,7 +29,7 @@ class nepszabadsag(BasicNewsRecipe):
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'class': ['d-source']})
remove_tags_after = dict(name='div', attrs={'class': ['tags']})
remove_tags = [dict(name='div', attrs={'class': ['h']}),
remove_tags = [dict(name='div', attrs={'class': ['h']}),
dict(name='tfoot')]
keep_only_tags = [dict(name='table', attrs={'class': 'article-box'})]

View File

@ -80,7 +80,7 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe):
sections = soup.findAll(attrs={'class': re.compile(r'.*cmn-article_title.*')})
for sect in sections:
sect_title = sect.find(attrs={'class' : re.compile(r'.*cmnc-((large)|(middle)|(small)).*')})
sect_title = sect.find(attrs={'class': re.compile(r'.*cmnc-((large)|(middle)|(small)).*')})
if sect_title is None:
continue
sect_title = sect_title.contents[0]

View File

@ -24,12 +24,12 @@ class novinkyRecipe(BasicNewsRecipe):
remove_javascript = True
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'id': ['pictureInnerBox']}),
dict(name='div', attrs={'id': ['discussionEntry']}),
dict(name='span', attrs={
remove_tags = [dict(name='div', attrs={'id': ['pictureInnerBox']}),
dict(name='div', attrs={'id': ['discussionEntry']}),
dict(name='span', attrs={
'id': ['mynews-hits', 'mynews-author']}),
dict(name='div', attrs={'class': ['related']}),
dict(name='div', attrs={'id': ['multimediaInfo']})]
dict(name='div', attrs={'class': ['related']}),
dict(name='div', attrs={'id': ['multimediaInfo']})]
remove_tags_before = dict(name='div', attrs={'class': ['articleHeader']})
remove_tags_after = dict(name='div', attrs={'class': 'related'})

View File

@ -49,4 +49,4 @@ class Nu(BasicNewsRecipe):
(u'Podcast Algemeen nieuws', u'http://www.nu.nl/podcast.php')]
def get_article_url(self, article):
return article.get('guid', None)
return article.get('guid', None)

View File

@ -24,7 +24,7 @@ class OGRU(BasicNewsRecipe):
remove_attributes = ['style']
remove_tags = [
remove_tags = [
dict(name='p', attrs={'id': 'pageDescription'}),
dict(name='div', attrs={'class': 'pageNavLinkGroup'}),
dict(name='div', attrs={'class': 'tagBlock TagContainer'}),

View File

@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''})
if divtags:
for div in divtags:
del (div['id'])
del div['id']
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps

View File

@ -76,22 +76,22 @@ class Pagina12(BasicNewsRecipe):
return br
feeds = [
(u'Diario de hoy' , u'https://www.pagina12.com.ar/rss/edicion-impresa'),
(u'Espectaculos' , u'https://www.pagina12.com.ar/rss/suplementos/cultura-y-espectaculos/notas'),
(u'Radar' , u'https://www.pagina12.com.ar/rss/suplementos/radar/notas'),
(u'Radar libros' , u'https://www.pagina12.com.ar/rss/suplementos/radar-libros/notas'),
(u'Cash' , u'https://www.pagina12.com.ar/rss/suplementos/cash/notas'),
(u'NO' , u'https://www.pagina12.com.ar/rss/suplementos/no/notas'),
(u'Las 12' , u'https://www.pagina12.com.ar/rss/suplementos/las12/notas'),
(u'Soy' , u'https://www.pagina12.com.ar/rss/suplementos/soy/notas'),
(u'M2' , u'https://www.pagina12.com.ar/rss/suplementos/m2/notas'),
(u'Rosario 12' , u'https://www.pagina12.com.ar/rss/suplementos/rosario12/notas')
(u'Diario de hoy', u'https://www.pagina12.com.ar/rss/edicion-impresa'),
(u'Espectaculos', u'https://www.pagina12.com.ar/rss/suplementos/cultura-y-espectaculos/notas'),
(u'Radar', u'https://www.pagina12.com.ar/rss/suplementos/radar/notas'),
(u'Radar libros', u'https://www.pagina12.com.ar/rss/suplementos/radar-libros/notas'),
(u'Cash', u'https://www.pagina12.com.ar/rss/suplementos/cash/notas'),
(u'NO', u'https://www.pagina12.com.ar/rss/suplementos/no/notas'),
(u'Las 12', u'https://www.pagina12.com.ar/rss/suplementos/las12/notas'),
(u'Soy', u'https://www.pagina12.com.ar/rss/suplementos/soy/notas'),
(u'M2', u'https://www.pagina12.com.ar/rss/suplementos/m2/notas'),
(u'Rosario 12', u'https://www.pagina12.com.ar/rss/suplementos/rosario12/notas')
]
def get_cover_url(self):
lurl = strftime('https://www.pagina12.com.ar/edicion-impresa/%d-%m-%Y')
soup = self.index_to_soup(lurl)
mydiv = soup.find('div', {'class' : lambda x: x and 'printed-edition-cover' in x.split()})
mydiv = soup.find('div', {'class': lambda x: x and 'printed-edition-cover' in x.split()})
if mydiv:
for image in mydiv.findAll('img'):
if image['src'].startswith('https://images.pagina12.com.ar/styles/width700/public/'):

View File

@ -24,7 +24,7 @@ class PaperPaper(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'bottom-block '}),
dict(name='div', attrs={'class': 'bottom-block news'})
]

View File

@ -32,4 +32,4 @@ class plRecipe(BasicNewsRecipe):
preprocess_regexps = [(re.compile(r'<(span|strong)[^>]*>\s*Ptejte se politik.*',
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = [dict(name='div', attrs={'class': ['article-detail']})]
keep_only_tags = [dict(name='div', attrs={'class': ['article-detail']})]

View File

@ -26,7 +26,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
classes('Article-header Article-excerpt Article-author Article-thumbnail Article-bodyText article-title article-dek article-paragraph articlebody'),
]
remove_tags = [
dict(name='section', attrs={'class': ['recurrent-share']})
dict(name='section', attrs={'class': ['recurrent-share']})
]
def parse_section_index(self, slug):

View File

@ -137,7 +137,7 @@ class PrivateEyeRecipe(BasicNewsRecipe):
# We remove vast swathes of HTML which is not part of the articles.
# Remove sibling content
remove_tags_before = [
remove_tags_before = [
{'name': 'div', 'class': 'article'},
{'name': 'div', 'id': 'page'},
{'name': 'div', 'id': 'page-wide'},

View File

@ -31,7 +31,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
# cover_url = file:///c:/Users/YOUR_USERNAME/AppData/Roaming/calibre/resources/images/news_covers/Pro_Physik.png
extra_css = '''
extra_css = '''
h1 {font-size: 1.6em; text-align: left}
h2, h3 {font-size: 1.3em;text-align: left}
h2.subtitle {font-size: 1.2em;text-align: left;font-style: italic}

View File

@ -26,7 +26,7 @@ class ProSleduet(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': 'container'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'ya-share2 ya-share2_inited'})
]

View File

@ -104,7 +104,7 @@ class RadioCanada(BasicNewsRecipe):
('Grands titres', 'https://ici.radio-canada.ca/rss/771'),
('Football', 'https://ici.radio-canada.ca/rss/1000057'),
('Hockey', 'https://ici.radio-canada.ca/rss/1000056'),
('Olympiques', 'https://ici.radio-canada.ca/rss/64852'),
('Olympiques', 'https://ici.radio-canada.ca/rss/64852'),
('Podium', 'https://ici.radio-canada.ca/rss/555082'),
('Soccer', 'https://ici.radio-canada.ca/rss/1000058'),
('Tennis', 'https://ici.radio-canada.ca/rss/1000059'),

View File

@ -35,9 +35,9 @@ class RealClear(BasicNewsRecipe):
# Numeric parameter is type, controls whether we look for
feedsets = [
['Politics', 'http://www.realclearpolitics.com/index.xml', 0],
['Policy', 'http://www.realclearpolicy.com/index.xml', 0],
['Science', 'http://www.realclearscience.com/index.xml', 0],
['Politics', 'http://www.realclearpolitics.com/index.xml', 0],
['Policy', 'http://www.realclearpolicy.com/index.xml', 0],
['Science', 'http://www.realclearscience.com/index.xml', 0],
['Tech', 'http://www.realcleartechnology.com/index.xml', 0],
# The feedburner is essentially the same as the top feed, politics.
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
@ -45,7 +45,7 @@ class RealClear(BasicNewsRecipe):
['Markets Home', 'http://www.realclearmarkets.com/index.xml', 0],
['Markets', 'http://www.realclearmarkets.com/articles/index.xml', 0],
['World', 'http://www.realclearworld.com/index.xml', 0],
['World Blog', 'http://www.realclearworld.com/blog/index.xml', 2]
['World Blog', 'http://www.realclearworld.com/blog/index.xml', 2]
]
# Hints to extractPrintURL.
# First column is the URL snippet. Then the string to search for as text,
@ -53,12 +53,11 @@ class RealClear(BasicNewsRecipe):
# drill down.
phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4)
printhints = [['realclear', '', '', 'printpage'],
['billoreilly.com', 'Print this entry', 'a', ''],
['billoreilly.com', 'Print This Article', 'a', ''],
['politico.com', 'Print',
'a', 'share-print'],
['nationalreview.com', '>Print<', 'a', ''],
printhints = [['realclear', '', '', 'printpage'],
['billoreilly.com', 'Print this entry', 'a', ''],
['billoreilly.com', 'Print This Article', 'a', ''],
['politico.com', 'Print', 'a', 'share-print'],
['nationalreview.com', '>Print<', 'a', ''],
['reason.com', '', 'a', 'printer']
# The following are not supported due to JavaScripting, and would require obfuscated_article to handle
# forbes,

View File

@ -125,7 +125,7 @@ class respektRecipe(BasicNewsRecipe):
else:
if next.getchildren():
next_child = next.getchildren()[0]
next_child.text = next_child.text + u'' + text
next_child.text = next_child.text + u'' + text
par.getparent().remove(par)
# Insert text length
text = root.xpath("//div[@id='postcontent']")[0]
@ -171,4 +171,4 @@ class respektRecipe(BasicNewsRecipe):
o.getparent().replace(o,e)
except:
pass
return (BeautifulSoup(lxml.etree.tostring(root,encoding='unicode')))
return BeautifulSoup(lxml.etree.tostring(root,encoding='unicode'))

View File

@ -60,7 +60,7 @@ class RND(BasicNewsRecipe):
feeds = [
('Politik', 'https://www.rnd.de/arc/outboundfeeds/rss/category/politik/'),
('Wirtschaft', 'https://www.rnd.de/arc/outboundfeeds/rss/category/wirtschaft/'),
('Wirtschaft', 'https://www.rnd.de/arc/outboundfeeds/rss/category/wirtschaft/'),
('Sport', 'https://www.rnd.de/arc/outboundfeeds/rss/category/sport/'),
('Panorama', 'https://www.rnd.de/arc/outboundfeeds/rss/category/panorama/'),
# ('Promis', 'https://www.rnd.de/arc/outboundfeeds/rss/category/promis/'),

View File

@ -61,7 +61,7 @@ class Saechsische(BasicNewsRecipe):
feeds = [
# ('Alle Artikel der SZ', 'https://www.saechsische.de/arc/outboundfeeds/rss/'),
('Stadt Dresden', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden'),
('Stadt Dresden', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden'),
# ('Altstadt', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden/altstadt'),
# ('Blasewitz', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden/blasewitz'),
# ('Cotta', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden/cotta'),
@ -152,7 +152,7 @@ class Saechsische(BasicNewsRecipe):
# ('Vogtlandkreis', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/vogtland'),
# ('Plauen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/vogtland/plauen'),
# ('Tschechien', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/tschechien'),
('Sachsen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/sachsen'),
('Sachsen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/sachsen'),
# ('Der Osten', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/der-osten'),
# ('Politik in Sachsen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/politik/regional'),
# ('Wirtschaft in Sachsen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/wirtschaft/regional'),
@ -172,10 +172,10 @@ class Saechsische(BasicNewsRecipe):
# ('Genuss und Kochen', 'https://www.saechsische.de/arc/outboundfeeds/rss/tags_slug/genuss-und-kochen'),
# ('Sächsische Schweiz', 'https://www.saechsische.de/arc/outboundfeeds/rss/tags_slug/saechsische-schweiz'),
# ('Sachsenkompass', 'https://www.saechsische.de/arc/outboundfeeds/rss/tags_slug/sachsenkompass'),
('Politik', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/politik'),
('Wirtschaft', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/wirtschaft'),
('Politik', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/politik'),
('Wirtschaft', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/wirtschaft'),
# ('Sport', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/sport'),
('Panorama', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/panorama'),
('Panorama', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/panorama'),
# ('Promis', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/promis'),
# ('Reise', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/reise'),
# ('Medien & TV', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/medien'),

View File

@ -54,7 +54,7 @@ def load_article_from_json(raw, root):
for child in tuple(body):
body.remove(child)
article = E(body, 'article')
E(article, 'div', replace_entities(data['firstTopic']['name']) , style='color: gray; font-size:small; font-weight:bold;')
E(article, 'div', replace_entities(data['firstTopic']['name']), style='color: gray; font-size:small; font-weight:bold;')
E(article, 'h1', replace_entities(data['headline']))
# E(article, 'p', replace_entities(data['subHeadline']['text']), style='font-style: italic; color:#202020;')
for subh in data['subHeadline']['json']:

View File

@ -22,7 +22,7 @@ class Sobaka(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': 'b-post-view__foot'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'b-post-view__telegram-promo'}),
dict(name='div', attrs={'class': 'b-post-view__tgb'}),
dict(name='div', attrs={'id': 'comments'}),

View File

@ -21,7 +21,7 @@ class Sotavision(BasicNewsRecipe):
remove_tags_after = dict(name='span', attrs={'style': 'border-color:#EBEBEB;border-width:1px;width:100%;'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'td_block_wrap tdb_mobile_menu tdi_7 td-pb-border-top td_block_template_1 tdb-header-align'}),
dict(name='div', attrs={'class': 'td_block_wrap tdb_single_author tdi_52 td-pb-border-top td_block_template_1 tdb-post-meta'}),
dict(name='div', attrs={'class': 'td_block_wrap tdb_single_date tdi_53 td-pb-border-top td_block_template_1 tdb-post-meta'}),

View File

@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
def absurl(url):
if url.startswith('/'):
url = 'https://www.spectator.co.uk' + url
url = 'https://www.spectator.co.uk' + url
return url

View File

@ -39,16 +39,16 @@ class StraitsTimes(BasicNewsRecipe):
]
feeds = [
(u'World' , u'https://www.straitstimes.com/news/world/rss.xml')
(u'Business' , u'https://www.straitstimes.com/news/business/rss.xml'),
(u'Life' , u'https://www.straitstimes.com/news/life/rss.xml'),
(u'Tech' , u'https://www.straitstimes.com/news/tech/rss.xml'),
(u'Opinion' , u'https://www.straitstimes.com/news/opinion/rss.xml'),
(u'Life' , u'https://www.straitstimes.com/news/life/rss.xml'),
(u'Singapore' , u'https://www.straitstimes.com/news/singapore/rss.xml'),
(u'Asia' , u'https://www.straitstimes.com/news/asia/rss.xml'),
(u'Multimedia' , u'https://www.straitstimes.com/news/multimedia/rss.xml'),
(u'Sport' , u'https://www.straitstimes.com/news/sport/rss.xml'),
(u'World', u'https://www.straitstimes.com/news/world/rss.xml'),
(u'Business', u'https://www.straitstimes.com/news/business/rss.xml'),
(u'Life', u'https://www.straitstimes.com/news/life/rss.xml'),
(u'Tech', u'https://www.straitstimes.com/news/tech/rss.xml'),
(u'Opinion', u'https://www.straitstimes.com/news/opinion/rss.xml'),
(u'Life', u'https://www.straitstimes.com/news/life/rss.xml'),
(u'Singapore', u'https://www.straitstimes.com/news/singapore/rss.xml'),
(u'Asia', u'https://www.straitstimes.com/news/asia/rss.xml'),
(u'Multimedia', u'https://www.straitstimes.com/news/multimedia/rss.xml'),
(u'Sport', u'https://www.straitstimes.com/news/sport/rss.xml'),
]
def preprocess_html(self, soup):

View File

@ -29,7 +29,7 @@ class TInvariant(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'media mg-info-author-block'}),
dict(name='div', attrs={'class': 'mg-blog-category mb-1'}),
dict(name='span', attrs={'class': 'newses-tags'}),

View File

@ -29,7 +29,7 @@ class TInvariant(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'media mg-info-author-block'}),
dict(name='div', attrs={'class': 'mg-blog-category mb-1'}),
dict(name='span', attrs={'class': 'newses-tags'}),

View File

@ -29,7 +29,7 @@ class TInvariant(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'media mg-info-author-block'}),
dict(name='div', attrs={'class': 'mg-blog-category mb-1'}),
dict(name='span', attrs={'class': 'newses-tags'}),

View File

@ -51,73 +51,80 @@ class PhilippineDailyInquirer(BasicNewsRecipe):
feeds = [
('Headlines', 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed'),
('Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/feed'),
('Nation' , 'http://newsinfo.inquirer.net/category/nation/feed'),
('Nation - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed'),
('Metro' , 'http://newsinfo.inquirer.net/category/metro/feed'),
('Metro - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed'),
('Regions' , 'http://newsinfo.inquirer.net/category/regions/feed'),
('Regions - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed'),
('News' , 'http://www.inquirer.net/fullfeed'),
('More News' , 'http://newsinfo.inquirer.net/feed')
,
('Global Nation' , 'http://globalnation.inquirer.net/feed'),
('Global Nation - Latest Stories', 'http://globalnation.inquirer.net/category/latest-stories/feed'),
('Global Nation - Philippines', 'http://globalnation.inquirer.net/category/news/philippines/feed'),
('Global Nation - Asia & Pacific', 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed'),
('Global Nation - Americas', 'http://globalnation.inquirer.net/category/news/uscanada/feed'),
('Global Nation - Middle East & Africa', 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed'),
('Global Nation - Europe' , 'http://globalnation.inquirer.net/category/news/europe/feed'),
('Global Nation - Global Pinoy', 'http://globalnation.inquirer.net/category/global-pinoy/feed'),
('Global Nation - Events' , 'http://globalnation.inquirer.net/category/events/feed'),
('Business' , 'http://business.inquirer.net/feed'),
('Business - Latest Stories' , 'http://business.inquirer.net/category/latest-stories/feed'),
('Business - Money' , 'http://business.inquirer.net/category/money/feed'),
('Headlines', 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed'),
('Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/feed'),
('Nation', 'http://newsinfo.inquirer.net/category/nation/feed'),
('Nation - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed'),
('Metro', 'http://newsinfo.inquirer.net/category/metro/feed'),
('Metro - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed'),
('Regions', 'http://newsinfo.inquirer.net/category/regions/feed'),
('Regions - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed'),
('News', 'http://www.inquirer.net/fullfeed'),
('More News', 'http://newsinfo.inquirer.net/feed'),
('Global Nation', 'http://globalnation.inquirer.net/feed'),
('Global Nation - Latest Stories', 'http://globalnation.inquirer.net/category/latest-stories/feed'),
('Global Nation - Philippines', 'http://globalnation.inquirer.net/category/news/philippines/feed'),
('Global Nation - Asia & Pacific', 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed'),
('Global Nation - Americas', 'http://globalnation.inquirer.net/category/news/uscanada/feed'),
('Global Nation - Middle East & Africa', 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed'),
('Global Nation - Europe', 'http://globalnation.inquirer.net/category/news/europe/feed'),
('Global Nation - Global Pinoy', 'http://globalnation.inquirer.net/category/global-pinoy/feed'),
('Global Nation - Events', 'http://globalnation.inquirer.net/category/events/feed'),
('Business', 'http://business.inquirer.net/feed'),
('Business - Latest Stories', 'http://business.inquirer.net/category/latest-stories/feed'),
('Business - Money', 'http://business.inquirer.net/category/money/feed'),
('Business - Science & Health', 'http://business.inquirer.net/category/science-and-health/feed'),
('Business - Motoring' , 'http://business.inquirer.net/category/motoring/feed'),
('Business - Property Guide' , 'http://business.inquirer.net/category/property-guide/feed'),
('Business - Columnists' , 'http://business.inquirer.net/category/columnists/feed'),
('Sports' , 'http://sports.inquirer.net/feed'),
('Sports - Latest Stories' , 'http://sports.inquirer.net/category/latest-stories/feed'),
('Sports - Basketball' , 'http://sports.inquirer.net/category/section/basketball/feed'),
('Sports - Boxing & MMA', 'http://sports.inquirer.net/category/section/boxing-mma/feed'),
('Sports - Golf' , 'http://sports.inquirer.net/category/section/golf/feed'),
('Sports - Football' , 'http://sports.inquirer.net/category/section/other-sports/football/feed'),
('Sports - Other Sports' , 'http://sports.inquirer.net/category/section/other-sports/feed'),
('Technology' , 'http://technology.inquirer.net/feed'),
('Technology Latest Stories', 'http://technology.inquirer.net/category/latest-stories/feed'),
('Entertainment' , 'http://entertainment.inquirer.net/feed'),
('Entertainment - Headlines', 'http://entertainment.inquirer.net/category/headlines/feed'),
('Entertainment - Latest Stories', 'http://entertainment.inquirer.net/category/latest-stories/feed'),
('Entertainment - Movies' , 'http://movies.inquirer.net/feed'),
('Lifestyle' , 'http://lifestyle.inquirer.net/feed'),
('Lifestyle - Latest Stories', 'http://lifestyle.inquirer.net/category/latest-stories/feed'),
('Lifestyle - Arts & Books' , 'http://lifestyle.inquirer.net/category/arts-and-books/feed'),
('Lifestyle - Wellness' , 'http://lifestyle.inquirer.net/category/wellness/feed'),
('Business - Motoring', 'http://business.inquirer.net/category/motoring/feed'),
('Business - Property Guide', 'http://business.inquirer.net/category/property-guide/feed'),
('Business - Columnists', 'http://business.inquirer.net/category/columnists/feed'),
('Sports', 'http://sports.inquirer.net/feed'),
('Sports - Latest Stories', 'http://sports.inquirer.net/category/latest-stories/feed'),
('Sports - Basketball', 'http://sports.inquirer.net/category/section/basketball/feed'),
('Sports - Boxing & MMA', 'http://sports.inquirer.net/category/section/boxing-mma/feed'),
('Sports - Golf', 'http://sports.inquirer.net/category/section/golf/feed'),
('Sports - Football', 'http://sports.inquirer.net/category/section/other-sports/football/feed'),
('Sports - Other Sports', 'http://sports.inquirer.net/category/section/other-sports/feed'),
('Technology', 'http://technology.inquirer.net/feed'),
('Technology Latest Stories', 'http://technology.inquirer.net/category/latest-stories/feed'),
('Entertainment', 'http://entertainment.inquirer.net/feed'),
('Entertainment - Headlines', 'http://entertainment.inquirer.net/category/headlines/feed'),
('Entertainment - Latest Stories', 'http://entertainment.inquirer.net/category/latest-stories/feed'),
('Entertainment - Movies', 'http://movies.inquirer.net/feed'),
('Lifestyle', 'http://lifestyle.inquirer.net/feed'),
('Lifestyle - Latest Stories', 'http://lifestyle.inquirer.net/category/latest-stories/feed'),
('Lifestyle - Arts & Books', 'http://lifestyle.inquirer.net/category/arts-and-books/feed'),
('Lifestyle - Wellness', 'http://lifestyle.inquirer.net/category/wellness/feed'),
('Lifestyle - Home & Entertaining', 'http://lifestyle.inquirer.net/category/home-and-entertaining/feed'),
('Lifestyle - Parenting' , 'http://lifestyle.inquirer.net/category/parenting/feed'),
('Lifestyle - Food' , 'http://lifestyle.inquirer.net/category/food/feed'),
('Lifestyle - Fashion & Beauty', 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed'),
('Lifestyle - Super' , 'http://lifestyle.inquirer.net/category/super/feed'),
('Lifestyle - 2BU' , 'http://lifestyle.inquirer.net/category/2bu/feed'),
('Lifestyle - Sunday Lifestyle', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed'),
('Lifestyle - Wedding' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed'),
('Lifestyle - Travel' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed'),
('Lifestyle - Relationship' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed'),
('Opinion' , 'http://opinion.inquirer.net/feed'),
('Opinion - Viewpoints' , 'http://opinion.inquirer.net/category/viewpoints/feed'),
('Opinion - Talk of the Town', 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed'),
('Editorial' , 'http://opinion.inquirer.net/category/editorial/feed'),
('Letters to the Editor' , 'http://opinion.inquirer.net/category/letters-to-the-editor/feed'),
('Columns' , 'http://opinion.inquirer.net/category/columns/feed'),
('Citizens Journalism' , 'http://newsinfo.inquirer.net/category/citizens-journalism/feed'),
('Cebu - Daily News' , 'http://newsinfo.inquirer.net/category/cdn/feed'),
('Cebu - More News' , 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed'),
('Cebu - Community' , 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed'),
('Cebu - Metro' , 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed'),
('Cebu - Business' , 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed'),
('Cebu - Sports' , 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed'),
('Cebu - Visayas' , 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed'),
('Cebu - Opinion' , 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed')
('Lifestyle - Parenting', 'http://lifestyle.inquirer.net/category/parenting/feed'),
('Lifestyle - Food', 'http://lifestyle.inquirer.net/category/food/feed'),
('Lifestyle - Fashion & Beauty', 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed'),
('Lifestyle - Super', 'http://lifestyle.inquirer.net/category/super/feed'),
('Lifestyle - 2BU', 'http://lifestyle.inquirer.net/category/2bu/feed'),
('Lifestyle - Sunday Lifestyle', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed'),
('Lifestyle - Wedding', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed'),
('Lifestyle - Travel', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed'),
('Lifestyle - Relationship', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed'),
('Opinion', 'http://opinion.inquirer.net/feed'),
('Opinion - Viewpoints', 'http://opinion.inquirer.net/category/viewpoints/feed'),
('Opinion - Talk of the Town', 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed'),
('Editorial', 'http://opinion.inquirer.net/category/editorial/feed'),
('Letters to the Editor', 'http://opinion.inquirer.net/category/letters-to-the-editor/feed'),
('Columns', 'http://opinion.inquirer.net/category/columns/feed'),
('Citizens Journalism', 'http://newsinfo.inquirer.net/category/citizens-journalism/feed'),
('Cebu - Daily News', 'http://newsinfo.inquirer.net/category/cdn/feed'),
('Cebu - More News', 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed'),
('Cebu - Community', 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed'),
('Cebu - Metro', 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed'),
('Cebu - Business', 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed'),
('Cebu - Sports', 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed'),
('Cebu - Visayas', 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed'),
('Cebu - Opinion', 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed'),
]

View File

@ -217,7 +217,7 @@ class PrivateEyeRecipe(BasicNewsRecipe):
]
# We remove vast swathes of HTML which is not part of the articles.
remove_tags_before = [
remove_tags_before = [
{'name': 'div', 'class': 'container'},
{'name': 'div', 'class': 'content-wrapper'},
{'name': 'div', 'class': 'only-in-the-magazine'},

View File

@ -30,21 +30,21 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
('TOP 20', 'http://www.tmz.com/rss.xml'),
('Exclusives', 'http://www.tmz.com/category/exclusives/rss.xml'),
('Celeb Justice', 'http://www.tmz.com/category/celebrity-justice/rss.xml'),
('Celeb Feuds', 'http://www.tmz.com/category/celebrity-feuds/rss.xml'),
('Politix', 'http://www.tmz.com/category/politix/rss.xml'),
('Music', 'http://www.tmz.com/category/music/rss.xml'),
('Movies', 'http://www.tmz.com/category/movies/rss.xml'),
('TV', 'http://www.tmz.com/category/tv/rss.xml'),
('Sports', 'http://www.tmz.com/category/TMZsports/rss.xml'),
('Hook-Ups', 'http://www.tmz.com/category/hook-ups/rss.xml'),
('Beauty', 'http://www.tmz.com/category/beauty/rss.xml'),
('Fashion', 'http://www.tmz.com/category/fashion/rss.xml'),
('Gossip & Rumor', 'http://www.tmz.com/category/gossip-rumors/rss.xml'),
('Hot Mama', 'http://www.tmz.com/category/hot-mamas/rss.xml'),
('Party All The Time', 'http://www.tmz.com/category/party-all-the-time/rss.xml'),
('Ride Me!', 'http://www.tmz.com/category/ride-me/rss.xml'),
('Stars in Heat', 'http://www.tmz.com/category/stars-in-heat/rss.xml'),
('Vegas', 'http://www.tmz.com/category/hot-vegas/rss.xml')
('Celeb Feuds', 'http://www.tmz.com/category/celebrity-feuds/rss.xml'),
('Politix', 'http://www.tmz.com/category/politix/rss.xml'),
('Music', 'http://www.tmz.com/category/music/rss.xml'),
('Movies', 'http://www.tmz.com/category/movies/rss.xml'),
('TV', 'http://www.tmz.com/category/tv/rss.xml'),
('Sports', 'http://www.tmz.com/category/TMZsports/rss.xml'),
('Hook-Ups', 'http://www.tmz.com/category/hook-ups/rss.xml'),
('Beauty', 'http://www.tmz.com/category/beauty/rss.xml'),
('Fashion', 'http://www.tmz.com/category/fashion/rss.xml'),
('Gossip & Rumor', 'http://www.tmz.com/category/gossip-rumors/rss.xml'),
('Hot Mama', 'http://www.tmz.com/category/hot-mamas/rss.xml'),
('Party All The Time', 'http://www.tmz.com/category/party-all-the-time/rss.xml'),
('Ride Me!', 'http://www.tmz.com/category/ride-me/rss.xml'),
('Stars in Heat', 'http://www.tmz.com/category/stars-in-heat/rss.xml'),
('Vegas', 'http://www.tmz.com/category/hot-vegas/rss.xml')
]
def print_version(self, url):

View File

@ -73,7 +73,7 @@ class TheEconomicTimes(BasicNewsRecipe):
return citem['content']
def get_article_url(self, article):
rurl = article.get('guid', None)
rurl = article.get('guid', None)
if '/articleshow/' in rurl:
return rurl

View File

@ -39,7 +39,7 @@ class UAFootball(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': 'show-post'})
remove_tags = [
remove_tags = [
dict(name='form'),
dict(name='iframe'),
dict(name='div', attrs={'class': 'language'}),

View File

@ -22,7 +22,7 @@ class UkrInform(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
dict(name='aside'),
dict(name='img', attrs={'class': 'pixel'}),
dict(name='section', attrs={'class': 'read'}),

View File

@ -227,7 +227,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''})
if divtags:
for div in divtags:
del (div['id'])
del div['id']
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps

View File

@ -215,7 +215,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''})
if divtags:
for div in divtags:
del (div['id'])
del div['id']
pgall = soup.find('div', attrs={'id': 'storyphoto'})
if pgall is not None: # photo gallery perhaps

View File

@ -38,7 +38,7 @@ class version2(BasicNewsRecipe):
dict(name='span', attrs={'class': 'article-link-id'}),
dict(name='section', attrs={'class': 'social-tools-pane'}),
dict(name='section', attrs={'class': 'article-timeline'}),
dict(name='div', attrs={'id' : 'mini-panel-comments_and_form'}),
dict(name='div', attrs={'id': 'mini-panel-comments_and_form'}),
dict(name='div', attrs={'class': 'related-articles top-three'}),
dict(name='div', attrs={'id': 'mini-panel-jobfinder_1'}),
dict(name='section', attrs={'id': 'mini-panel-frontpage_debat_zone'}),
@ -53,7 +53,7 @@ class version2(BasicNewsRecipe):
dict(name='section', attrs={'class': 'jobs-list'}),
dict(name='footer', attrs={'id': 'footer'}),
dict(name='section', attrs={'class': 'banner'}),
dict(name='div', attrs={'class' : 'fast-track-frontpage'}),
dict(name='div', attrs={'class': 'fast-track-frontpage'}),
dict(name='a', attrs={'class': 'byline-comments'})
]

View File

@ -23,7 +23,7 @@ class ViknaSTB(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class': 'content-wrapper'})
remove_tags = [
remove_tags = [
dict(name='div', attrs={'class': 'share-content-wrapper flex-wrapper'}),
dict(name='div', attrs={'class': 'sticky-wrapper'}),
dict(name='div', attrs={'class': 'promo-wrapper'}),

View File

@ -101,7 +101,7 @@ class weblogssl(BasicNewsRecipe):
dict(name='div', attrs={'id':'comments'})
]
remove_tags_after = dict(name='div' , attrs={'id':'comments'})
remove_tags_after = dict(name='div', attrs={'id':'comments'})
def print_version(self, url):
if url.startswith('http://www'):

View File

@ -24,7 +24,7 @@ class WiComix(BasicNewsRecipe):
remove_tags_after = dict(name='article')
remove_tags = [
remove_tags = [
# dict(name='div', attrs={'class': 'author-meta'}),
dict(name='div', attrs={'id': 'jp-post-flair'}),
dict(name='footer', attrs={'class': 'entry-meta'})

View File

@ -57,7 +57,7 @@ class CanWestPaper(BasicNewsRecipe):
divtags = soup.findAll('div', attrs={'id': ''})
if divtags:
for div in divtags:
del (div['id'])
del div['id']
return soup
def parse_index(self):

Some files were not shown because too many files have changed in this diff Show More