mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
various whitespace (extra-edit)
!partial 'E203,E222,E241,E271,E272'
This commit is contained in:
parent
41cee6f02d
commit
ed2930712d
@ -61,7 +61,7 @@ if use_archive:
|
||||
data = json.loads(raw)
|
||||
body = root.xpath('//body')[0]
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||
try:
|
||||
@ -96,7 +96,7 @@ else:
|
||||
for child in tuple(body):
|
||||
body.remove(child)
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'div', replace_entities(data['subheadline']), style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', replace_entities(data['headline']))
|
||||
E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
|
||||
if data['dateline'] is None:
|
||||
|
@ -32,7 +32,7 @@ class aktualneRecipe(BasicNewsRecipe):
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']})
|
||||
filter_regexps = [r'img.aktualne.centrum.cz']
|
||||
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
|
||||
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
|
||||
dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}),
|
||||
dict(name='div', attrs={'class': 'itemcomment id0'}),
|
||||
dict(name='div', attrs={'class': 'hlavicka'}),
|
||||
|
@ -55,7 +55,7 @@ class AlJazeera(BasicNewsRecipe):
|
||||
u'http://www.aljazeera.com/xml/rss/all.xml')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
artlurl = article.get('link', None)
|
||||
artlurl = article.get('link', None)
|
||||
return artlurl
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -58,7 +58,7 @@ class AM730(BasicNewsRecipe):
|
||||
articles = []
|
||||
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
|
||||
href = aTag.get('href',False)
|
||||
if not href.encode('utf-8').startswith(url.encode('utf-8')) :
|
||||
if not href.encode('utf-8').startswith(url.encode('utf-8')):
|
||||
continue # not in same section
|
||||
|
||||
title = href.split('/')[-1].split('-')[0]
|
||||
|
@ -119,7 +119,7 @@ class barrons(BasicNewsRecipe):
|
||||
byl = articles.find(**prefixed_classes('BarronsTheme--byline--'))
|
||||
if byl:
|
||||
desc += self.tag_to_string(byl)
|
||||
ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--'))
|
||||
ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--'))
|
||||
if ttr:
|
||||
desc += self.tag_to_string(ttr)
|
||||
summ = articles.find(**prefixed_classes('BarronsTheme--summary--'))
|
||||
|
@ -26,7 +26,7 @@ class BeforeWeGo(BasicNewsRecipe):
|
||||
remove_tags_after = dict(name='div', attrs={'id': 'author-bio'})
|
||||
# remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'nectar-scrolling-text font_size_10vh custom_color has-custom-divider'}),
|
||||
dict(name='span', attrs={'class': 'meta-comment-count'}),
|
||||
dict(name='p', attrs={'id': 'breadcrumbs'})
|
||||
|
@ -33,9 +33,9 @@ class bleskRecipe(BasicNewsRecipe):
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'id': ['boxContent']})
|
||||
remove_tags_after = dict(name='div', attrs={'class': ['artAuthors']})
|
||||
remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}),
|
||||
dict(name='div', attrs={'id': ['partHeader']}),
|
||||
dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})]
|
||||
remove_tags = [dict(name='div', attrs={'class': ['link_clanek']}),
|
||||
dict(name='div', attrs={'id': ['partHeader']}),
|
||||
dict(name='div', attrs={'id': ['top_bottom_box', 'lista_top']})]
|
||||
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*',
|
||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
|
@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del (div['id'])
|
||||
del div['id']
|
||||
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
|
@ -22,7 +22,7 @@ class Cherta(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class':'single-page__footer-info'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'single-content-link'}),
|
||||
dict(name='div', attrs={'class': 'single-page__footer-info_links clearfix'}),
|
||||
dict(name='div', attrs={'class': 'single-article-tags-wrapper'})
|
||||
|
@ -85,11 +85,11 @@ class Clarin(BasicNewsRecipe):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='p' , attrs={'class' : 'volanta'}),
|
||||
dict(name='h1' , attrs={'id': 'title'}),
|
||||
dict(name='div', attrs={'class' : 'bajada'}),
|
||||
dict(name='div', attrs={'id' : 'galeria-trigger'}),
|
||||
dict(name='div', attrs={'class' : 'body-nota'})
|
||||
dict(name='p', attrs={'class': 'volanta'}),
|
||||
dict(name='h1', attrs={'id': 'title'}),
|
||||
dict(name='div', attrs={'class': 'bajada'}),
|
||||
dict(name='div', attrs={'id': 'galeria-trigger'}),
|
||||
dict(name='div', attrs={'class': 'body-nota'})
|
||||
|
||||
]
|
||||
|
||||
|
@ -22,7 +22,7 @@ class Coda(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'article'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='li', attrs={'class': 'material-meta__type'}),
|
||||
dict(name='div', attrs={'class': 'more'})
|
||||
]
|
||||
|
@ -25,7 +25,7 @@ class ComputerWeekly(BasicNewsRecipe):
|
||||
('Financial services IT news', 'https://www.computerweekly.com/rss/Financial-services-IT-news.xml'),
|
||||
('Public sector IT news', 'https://www.computerweekly.com/rss/Public-sector-IT-news.xml'),
|
||||
('Enterprise software', 'https://www.computerweekly.com/rss/Enterprise-software.xml'),
|
||||
('SME IT news' , 'https://www.computerweekly.com/rss/SME-IT-news.xml'),
|
||||
('SME IT news', 'https://www.computerweekly.com/rss/SME-IT-news.xml'),
|
||||
('Datacenter and cloud computing', 'https://www.computerweekly.com/rss/Datacentre-and-cloud-computing.xml'),
|
||||
('Storage', 'https://www.computerweekly.com/rss/Storage.xml'),
|
||||
('Information Management', 'https://www.computerweekly.com/rss/Information-management.xml'),
|
||||
|
@ -27,25 +27,18 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
||||
remove_tags_after = dict(name='p', attrs={'class': ['firma-redazione']})
|
||||
|
||||
feeds = [
|
||||
(u'Politica',
|
||||
u'http://contropiano.org/news/politica-news/feed'),
|
||||
(u'Internazionale',
|
||||
u'http://contropiano.org/news/internazionale-news/feed'),
|
||||
(u'Aggiornamenti in breve', u'http://contropiano.org/news/aggiornamenti-in-breve/feed'),
|
||||
(u'Economia',
|
||||
u'http://contropiano.org/news/news-economia/feed'),
|
||||
(u'Ambiente',
|
||||
u'http://contropiano.org/news/ambiente-news/feed'),
|
||||
(u'Scienza',
|
||||
u'http://contropiano.org/news/scienza-news/feed'),
|
||||
(u'Cultura',
|
||||
u'http://contropiano.org/news/cultura-news/feed'),
|
||||
(u'Politica', u'http://contropiano.org/news/politica-news/feed'),
|
||||
(u'Internazionale', u'http://contropiano.org/news/internazionale-news/feed'),
|
||||
(u'Aggiornamenti in breve', u'http://contropiano.org/news/aggiornamenti-in-breve/feed'),
|
||||
(u'Economia', u'http://contropiano.org/news/news-economia/feed'),
|
||||
(u'Ambiente', u'http://contropiano.org/news/ambiente-news/feed'),
|
||||
(u'Scienza', u'http://contropiano.org/news/scienza-news/feed'),
|
||||
(u'Cultura', u'http://contropiano.org/news/cultura-news/feed'),
|
||||
(u'Locali', u'http://contropiano.org/regionali/feed'),
|
||||
(u'Lavoro', u'http://contropiano.org/news/lavoro-conflitto-news/feed'),
|
||||
(u'Malapolizia', u'http://contropiano.org/news/malapolizia-news/feed'),
|
||||
(u'Malapolizia', u'http://contropiano.org/news/malapolizia-news/feed'),
|
||||
(u'Interventi', u'http://contropiano.org/interventi/feed'),
|
||||
(u'Documenti', u'http://contropiano.org/documenti/feed'),
|
||||
(u'Vignette', u'http://contropiano.org/vignette/feed'),
|
||||
(u'Altro',
|
||||
u'http://contropiano.org/altro/feed')
|
||||
(u'Altro', u'http://contropiano.org/altro/feed')
|
||||
]
|
||||
|
@ -31,8 +31,8 @@ class Cumhuriyet(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Gundem', 'https://www.cumhuriyet.com.tr/rss/9999'),
|
||||
('Dünya', 'https://www.cumhuriyet.com.tr/rss/4'),
|
||||
('Gundem', 'https://www.cumhuriyet.com.tr/rss/9999'),
|
||||
('Dünya', 'https://www.cumhuriyet.com.tr/rss/4'),
|
||||
('Türkiye', 'https://www.cumhuriyet.com.tr/rss/3'),
|
||||
('Ekonomi', 'https://www.cumhuriyet.com.tr/rss/5'),
|
||||
('Kultur Sanat', 'https://www.cumhuriyet.com.tr/rss/6'),
|
||||
|
@ -63,7 +63,7 @@ class DeGentenaarOnline(BasicNewsRecipe):
|
||||
return url.replace('/Detail.aspx?articleid', '/PrintArticle.aspx?ArticleID')
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['onload']
|
||||
|
@ -25,4 +25,4 @@ class denikReferendumRecipe(BasicNewsRecipe):
|
||||
remove_tags = [dict(name='div', attrs={'class': ['box boxLine', 'box noprint', 'box']}),
|
||||
dict(name='h3', attrs={'class': 'head alt'})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': ['content']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id': ['content']})]
|
||||
|
@ -29,7 +29,7 @@ class AdvancedUserRecipe1432200863(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
|
||||
extra_css = '''
|
||||
extra_css = '''
|
||||
h1, h2 {font-size: 1.6em; text-align: left}
|
||||
.article-header-description {font-size: 1em; font-style: italic; font-weight: normal;margin-bottom: 1em}
|
||||
.b-image-figure, .caption-figure.is-left, .b-image-credits {font-size: .75em; font-weight: normal;margin-bottom: .75em}
|
||||
|
@ -24,7 +24,7 @@ class WiComix(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'article__body'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'article__reference article__reference_header'}),
|
||||
dict(name='div', attrs={'class': 'my-lg-5'}),
|
||||
dict(name='div', attrs={'class': 'video '}),
|
||||
|
@ -22,7 +22,7 @@ class EchoMsk(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class': 'sc-7b4cbb79-0 guzUFC'}),
|
||||
dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}),
|
||||
dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'})
|
||||
|
@ -57,7 +57,7 @@ def load_article_from_json(raw, root):
|
||||
data = json.loads(raw)
|
||||
body = root.xpath('//body')[0]
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||
E(article, 'div', data['byline'], style='font-style: italic; color:#202020;')
|
||||
|
@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del (div['id'])
|
||||
del div['id']
|
||||
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
|
@ -114,7 +114,7 @@ div.a_md_a {text-align: center; text-transform: uppercase; font-size: .8rem;}
|
||||
from datetime import date
|
||||
cover = ('https://srv00.epimg.net/pdf/elpais/snapshot/' +
|
||||
str(date.today().year) + '/' + date.today().strftime('%m') + '/elpais/' +
|
||||
str(date.today().year) + date.today().strftime('%m') + date.today().strftime('%d') + 'Big.jpg')
|
||||
str(date.today().year) + date.today().strftime('%m') + date.today().strftime('%d') + 'Big.jpg')
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
|
@ -13,7 +13,7 @@ class EpochTimes(BasicNewsRecipe):
|
||||
max_articles_per_feed = 20
|
||||
ignore_duplicate_articles = {'url'}
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
remove_empty_feeds = True
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
resolve_internal_links = True
|
||||
masthead_url = 'https://epochtimes-ny.newsmemory.com/eeLayout/epochtimes/1.0.a/images/webapp/banner.png'
|
||||
|
@ -45,7 +45,7 @@ class FastCompany(BasicNewsRecipe):
|
||||
feeds = [(u'All News', u'http://feeds.feedburner.com/fastcompany/headlines')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
|
@ -93,24 +93,24 @@ class FazNet(BasicNewsRecipe):
|
||||
# original by Armin Geller
|
||||
# overhaul to deal with changes in the faz.net websites
|
||||
|
||||
title = 'FAZ.NET'
|
||||
__author__ = 'Unknown'
|
||||
description = 'Frankfurter Allgemeine Zeitung'
|
||||
publisher = 'Frankfurter Allgemeine Zeitung GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/7/72/Frankfurter_Allgemeine_logo.svg'
|
||||
encoding = 'utf-8'
|
||||
language = 'de'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
scale_news_images = (10,100)
|
||||
delay = 1
|
||||
title = 'FAZ.NET'
|
||||
__author__ = 'Unknown'
|
||||
description = 'Frankfurter Allgemeine Zeitung'
|
||||
publisher = 'Frankfurter Allgemeine Zeitung GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/7/72/Frankfurter_Allgemeine_logo.svg'
|
||||
encoding = 'utf-8'
|
||||
language = 'de'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
scale_news_images = (10,100)
|
||||
delay = 1
|
||||
|
||||
test_feed = 'https://www.faz.net/rss/aktuell/feuilleton/kunst-und-architektur/berlinische-galerie-zeigt-edvard-munch-die-ganze-gefuehlsskala-des-lebens-19180631.html?printPagedArticle=true#pageIndex_2'
|
||||
|
||||
extra_css = '''
|
||||
extra_css = '''
|
||||
.header-title,.scrolly-title {font-size: 1.5em; font-weight:bold; text-align:left;}
|
||||
.quote {font-size: 1.5em; font-weight:bold; text-align:center;}
|
||||
.author {font-size: 0.7em; font-weight:bold; text-align:center; display:block;
|
||||
|
@ -26,7 +26,7 @@ class AdvancedUserRecipe1313693926(BasicNewsRecipe):
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = False
|
||||
|
||||
feeds = [
|
||||
feeds = [
|
||||
(u'Inhalt:', u'https://www.fluter.de/rss.xml')
|
||||
]
|
||||
|
||||
@ -38,6 +38,6 @@ class AdvancedUserRecipe1313693926(BasicNewsRecipe):
|
||||
dict(name='h2', attrs={'class':'element-invisible'})
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
extra_css = '''
|
||||
.field-group-format, .group_additional_info, .additional-info {display: inline-block; min-width: 8rem; text-align: center}
|
||||
'''
|
||||
|
@ -125,7 +125,7 @@ img { background: none !important; float: none; margin: 0px; }
|
||||
break
|
||||
elif strpost.startswith('<a href'):
|
||||
url = post['href']
|
||||
if url.startswith(('http://www1.folha.uol.com.br/', 'https://www1.folha.uol.com.br/')) :
|
||||
if url.startswith(('http://www1.folha.uol.com.br/', 'https://www1.folha.uol.com.br/')):
|
||||
title = self.tag_to_string(post)
|
||||
self.log()
|
||||
self.log('--> post: ', post)
|
||||
|
@ -25,7 +25,7 @@ class FootballUA(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'bottom-info'}),
|
||||
dict(name='div', attrs={'class': 'social-buttons'})
|
||||
]
|
||||
|
@ -188,7 +188,7 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
for by in soup.findAll(**classes('topper__byline topper__date font-style-italic')):
|
||||
by.name = 'div'
|
||||
for img in soup.find_all('img', attrs={'srcset': True}):
|
||||
img['src'] = re.sub(r'_webp_small_\dx', '_webp_large_1x',img['srcset'].split()[0])
|
||||
img['src'] = re.sub(r'_webp_small_\dx', '_webp_large_1x', img['srcset'].split()[0])
|
||||
return soup
|
||||
|
||||
def get_browser(self):
|
||||
|
@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
# remove_tags = [
|
||||
# remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'footer-content'}),
|
||||
# ]
|
||||
|
||||
|
@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
# remove_tags = [
|
||||
# remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'footer-content'}),
|
||||
# ]
|
||||
|
||||
|
@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
# remove_tags = [
|
||||
# remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'footer-content'}),
|
||||
# ]
|
||||
|
||||
|
@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
# remove_tags = [
|
||||
# remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'footer-content'}),
|
||||
# ]
|
||||
|
||||
|
@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
# remove_tags = [
|
||||
# remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'footer-content'}),
|
||||
# ]
|
||||
|
||||
|
@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
# remove_tags = [
|
||||
# remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'footer-content'}),
|
||||
# ]
|
||||
|
||||
|
@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
# remove_tags = [
|
||||
# remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'footer-content'}),
|
||||
# ]
|
||||
|
||||
|
@ -33,7 +33,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
# remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
# remove_tags = [
|
||||
# remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'footer-content'}),
|
||||
# ]
|
||||
|
||||
|
@ -32,7 +32,7 @@ class Gagadget(BasicNewsRecipe):
|
||||
# Ukrainian version only
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'top20 bottom20 post-links'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'footer-content'}),
|
||||
]
|
||||
|
||||
|
@ -23,7 +23,7 @@ class GazetaUA(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='section', attrs={'class': 'article-content clearfix'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'mt5'}),
|
||||
dict(name='div', attrs={'class': 'interview-block'}),
|
||||
dict(name='p', attrs={'id': 'mce_0'}),
|
||||
|
@ -23,7 +23,7 @@ class GazetaUA(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='section', attrs={'class': 'article-content clearfix'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'mt5'}),
|
||||
dict(name='div', attrs={'class': 'interview-block'}),
|
||||
dict(name='p', attrs={'id': 'mce_0'}),
|
||||
|
@ -21,7 +21,7 @@ class GeekCity(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'term-badges floated'}),
|
||||
dict(name='div', attrs={'class': 'post-meta single-post-meta'}),
|
||||
dict(name='div', attrs={'class': 'post-share single-post-share top-share clearfix style-1'}),
|
||||
|
@ -22,7 +22,7 @@ class Gorky(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='footer')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='footer'),
|
||||
dict(name='nav', attrs={'class': 'navbar'}),
|
||||
dict(name='div', attrs={'class': 'hide'}),
|
||||
|
@ -28,7 +28,7 @@ class LiveHindustan(BasicNewsRecipe):
|
||||
remove_tags_after = [classes('stry-bdy')]
|
||||
|
||||
feeds = [
|
||||
('प्रमुख खबरें' ,'https://feed.livehindustan.com/rss/3127'),
|
||||
('प्रमुख खबरें', 'https://feed.livehindustan.com/rss/3127'),
|
||||
('देश', 'https://feed.livehindustan.com/rss/4911'),
|
||||
('विदेश', 'https://feed.livehindustan.com/rss/4913'),
|
||||
('ओपिनियन', 'https://feed.livehindustan.com/rss/5165'),
|
||||
|
@ -26,9 +26,8 @@ class iHeuteRecipe(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
remove_attributes = ['width', 'height']
|
||||
remove_tags = [dict(name='div', attrs={'id': ['zooming']}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['related', 'mapa-wrapper']}),
|
||||
remove_tags = [dict(name='div', attrs={'id': ['zooming']}),
|
||||
dict(name='div', attrs={'class': ['related', 'mapa-wrapper']}),
|
||||
dict(name='table', attrs={'id': ['opener-img', 'portal']}),
|
||||
dict(name='table', attrs={'class': ['video-16ku9']})]
|
||||
remove_tags_after = [
|
||||
|
@ -10,29 +10,21 @@ class AdvancedUserRecipe1286477122(BasicNewsRecipe):
|
||||
__author__ = 'egilh'
|
||||
|
||||
feeds = [
|
||||
(u'Politica & Palazzo',
|
||||
u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'),
|
||||
(u'Giustizia & impunit\xe0',
|
||||
u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'),
|
||||
(u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'),
|
||||
(u'Economia & Lobby',
|
||||
u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'),
|
||||
(u'Lavoro & precari',
|
||||
u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'),
|
||||
(u'Ambiente & Veleni',
|
||||
u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'),
|
||||
(u'Sport & miliardi',
|
||||
u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'),
|
||||
(u'Politica & Palazzo', u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'),
|
||||
(u'Giustizia & impunit\xe0', u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'),
|
||||
(u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'),
|
||||
(u'Economia & Lobby', u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'),
|
||||
(u'Lavoro & precari', u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'),
|
||||
(u'Ambiente & Veleni', u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'),
|
||||
(u'Sport & miliardi', u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'),
|
||||
(u'Cronaca', u'http://www.ilfattoquotidiano.it/category/cronaca/feed/'),
|
||||
(u'Mondo', u'http://www.ilfattoquotidiano.it/category/mondo/feed/'),
|
||||
(u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'),
|
||||
(u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'),
|
||||
(u'Scuola', u'http://www.ilfattoquotidiano.it/category/scuola/feed/'),
|
||||
(u'Tecno', u'http://www.ilfattoquotidiano.it/category/tecno/feed/'),
|
||||
(u'Terza pagina', u'http://www.ilfattoquotidiano.it/category/terza-pagina/feed/'),
|
||||
(u'Piacere quotidiano',
|
||||
u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'),
|
||||
(u'Cervelli in fuga',
|
||||
u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'),
|
||||
(u'Piacere quotidiano', u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'),
|
||||
(u'Cervelli in fuga', u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'),
|
||||
(u'Documentati!', u'http://www.ilfattoquotidiano.it/category/documentati/feed/'),
|
||||
(u'Misfatto', u'http://www.ilfattoquotidiano.it/category/misfatto/feed/')
|
||||
]
|
||||
|
@ -36,7 +36,7 @@ class IlMessaggero(BasicNewsRecipe):
|
||||
dict(name='h2', attrs={
|
||||
'class': ['sottotitLettura', 'grigio16']}),
|
||||
dict(name='span', attrs={'class': 'testoArticoloG'}),
|
||||
dict(name='div', attrs={'id': 'testodim'})
|
||||
dict(name='div', attrs={'id': 'testodim'})
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
|
@ -34,7 +34,7 @@ class IlManifesto(BasicNewsRecipe):
|
||||
startSoup = self.index_to_soup(startUrl)
|
||||
lastEdition = startSoup.findAll('div', id='accordion_inedicola')[
|
||||
1].find('a')['href']
|
||||
del (startSoup)
|
||||
del startSoup
|
||||
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
|
||||
urlsplit = lastEdition.split('/')
|
||||
self.manifesto_datestr = urlsplit[-1]
|
||||
@ -106,5 +106,5 @@ class IlManifesto(BasicNewsRecipe):
|
||||
summary = sommNode
|
||||
|
||||
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>" # noqa: E501
|
||||
del (bs)
|
||||
del bs
|
||||
return template % dict(title=title, subtitle=subtitle, author=author, summary=summary, content=content)
|
||||
|
@ -115,6 +115,6 @@ class IndiaToday(BasicNewsRecipe):
|
||||
imagecap = '<div id="imgcap">' + data['image_caption'] + '</div>'
|
||||
|
||||
html = '<html><body>' + slug + '<h1>' + title + '</h1>\n' + desc + '<div id="author">'\
|
||||
+ author + '<span> ' + city + ' UPDATED: ' + date + '</span></div>\n' + image + imagecap + body\
|
||||
+ author + '<span> ' + city + ' UPDATED: ' + date + '</span></div>\n' + image + imagecap + body\
|
||||
+ '</body></html>'
|
||||
return html
|
||||
|
@ -121,7 +121,7 @@ class IrishTimes(BasicNewsRecipe):
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
}, data=urlencode({'username': self.username, 'password': self.password, 'deviceid':deviceid, 'persistent':'on', 'rid': ''}))
|
||||
}, data=urlencode({'username': self.username, 'password': self.password, 'deviceid':deviceid, 'persistent':'on', 'rid': ''}))
|
||||
|
||||
r = br.open(rq)
|
||||
raw = r.read()
|
||||
|
@ -47,7 +47,7 @@ class jotdown(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'id':'respond'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='div' , attrs={'id':'respond'})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'respond'})
|
||||
|
||||
preprocess_regexps = [
|
||||
# To change the small size of the text
|
||||
|
@ -26,7 +26,7 @@ class Computerra(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class': 'breadcrumbs'}),
|
||||
dict(name='div', attrs={'class': 'post-info__likes post-info-likes'}),
|
||||
dict(name='div', attrs={'class': 'cta-row'}),
|
||||
|
@ -30,7 +30,7 @@ class kudyznudyRecipe(BasicNewsRecipe):
|
||||
name='div', attrs={'class': ['C_WholeContentPadding']})
|
||||
remove_tags_after = dict(
|
||||
name='div', attrs={'class': ['SurroundingsContainer']})
|
||||
remove_tags = [dict(name='div', attrs={
|
||||
remove_tags = [dict(name='div', attrs={
|
||||
'class': ['Details', 'buttons', 'SurroundingsContainer', 'breadcrumb']})]
|
||||
|
||||
keep_only_tags = []
|
||||
|
@ -111,7 +111,7 @@ class LaJornada_mx(BasicNewsRecipe):
|
||||
|
||||
def get_article_url(self, article):
|
||||
# Get link to original article URL
|
||||
rurl = article.get('guid', None)
|
||||
rurl = article.get('guid', None)
|
||||
if not rurl:
|
||||
# Use the "link" attribute as failover
|
||||
return article.get('link', None)
|
||||
|
@ -50,7 +50,7 @@ class LibertadDigital(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self, url):
|
||||
art, sep, rest = url.rpartition('/')
|
||||
|
@ -12,7 +12,7 @@ def classes(classes):
|
||||
|
||||
def absolutize(href):
|
||||
if href.startswith('/'):
|
||||
href = 'https://www.lrb.co.uk' + href
|
||||
href = 'https://www.lrb.co.uk' + href
|
||||
return href
|
||||
|
||||
|
||||
|
@ -46,4 +46,4 @@ class Marca(BasicNewsRecipe):
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
return article.get('guid', None)
|
||||
|
@ -95,7 +95,7 @@ class Mediapart(BasicNewsRecipe):
|
||||
for feed in feeds:
|
||||
feed_name = feed.title.lower()
|
||||
for article in feed.articles:
|
||||
if feed_name != 'autres' and feed_name not in article.url:
|
||||
if feed_name != 'autres' and feed_name not in article.url:
|
||||
feed.articles.remove(article)
|
||||
if feed_name == 'autres' and any(section in article.url for section in self.sections):
|
||||
feed.articles.remove(article)
|
||||
|
@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del (div['id'])
|
||||
del div['id']
|
||||
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
|
@ -22,7 +22,7 @@ class MoscowTimes(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'article__tags'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='aside'),
|
||||
dict(name='footer'),
|
||||
dict(name='section', attrs={'class': 'cluster'}),
|
||||
|
@ -60,7 +60,7 @@ class naszdziennik(BasicNewsRecipe):
|
||||
article_title_datetime.find('h4'))
|
||||
# zebrane elementy dodajemy do listy zadeklarowanej w linijce 44
|
||||
articles[section].append(
|
||||
{'title': article_title, 'url': article_url, 'date': article_date})
|
||||
{'title': article_title, 'url': article_url, 'date': article_date})
|
||||
# po dodaniu wszystkich artykułów dodajemy sekcje do listy feedów,
|
||||
# korzystając z list sekcji znajdujących się w słowniku
|
||||
for section in sections:
|
||||
|
@ -27,12 +27,12 @@ class NavyTimes(BasicNewsRecipe):
|
||||
('Home page', 'https://www.navytimes.com/arc/outboundfeeds/rss/?outputType=xml'),
|
||||
('News', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/news/?outputType=xml'),
|
||||
('Your Navy', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/news/your-navy/?outputType=xml'),
|
||||
('Your Army', 'https://www.armytimes.com/arc/outboundfeeds/rss/category/news/your-army/?outputType=xml'),
|
||||
('Your Army', 'https://www.armytimes.com/arc/outboundfeeds/rss/category/news/your-army/?outputType=xml'),
|
||||
('Your Air Force', 'https://www.airforcetimes.com/arc/outboundfeeds/rss/category/news/your-air-force?outputType=xml'),
|
||||
('Your Marine Core', 'https://www.marinecorpstimes.com/arc/outboundfeeds/rss/category/news/your-marine-corps/?outputType=xml'),
|
||||
('Pentagon and Congress', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/news/pentagon-congress/?outputType=xml'),
|
||||
('Pay and Benefits', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/pay-benefits/?outputType=xml'),
|
||||
('Veterans', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/veterans/?outputType=xml'),
|
||||
('Education and Transition', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/education-transition/?outputType=xml'),
|
||||
('Flashpoints', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/flashpoints/?outputType=xml'),
|
||||
('Your Marine Core', 'https://www.marinecorpstimes.com/arc/outboundfeeds/rss/category/news/your-marine-corps/?outputType=xml'),
|
||||
('Pentagon and Congress', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/news/pentagon-congress/?outputType=xml'),
|
||||
('Pay and Benefits', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/pay-benefits/?outputType=xml'),
|
||||
('Veterans', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/veterans/?outputType=xml'),
|
||||
('Education and Transition', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/education-transition/?outputType=xml'),
|
||||
('Flashpoints', 'https://www.navytimes.com/arc/outboundfeeds/rss/category/flashpoints/?outputType=xml'),
|
||||
]
|
||||
|
@ -29,7 +29,7 @@ class nepszabadsag(BasicNewsRecipe):
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'class': ['d-source']})
|
||||
remove_tags_after = dict(name='div', attrs={'class': ['tags']})
|
||||
remove_tags = [dict(name='div', attrs={'class': ['h']}),
|
||||
remove_tags = [dict(name='div', attrs={'class': ['h']}),
|
||||
dict(name='tfoot')]
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'class': 'article-box'})]
|
||||
|
@ -80,7 +80,7 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe):
|
||||
sections = soup.findAll(attrs={'class': re.compile(r'.*cmn-article_title.*')})
|
||||
|
||||
for sect in sections:
|
||||
sect_title = sect.find(attrs={'class' : re.compile(r'.*cmnc-((large)|(middle)|(small)).*')})
|
||||
sect_title = sect.find(attrs={'class': re.compile(r'.*cmnc-((large)|(middle)|(small)).*')})
|
||||
if sect_title is None:
|
||||
continue
|
||||
sect_title = sect_title.contents[0]
|
||||
|
@ -24,12 +24,12 @@ class novinkyRecipe(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id': ['pictureInnerBox']}),
|
||||
dict(name='div', attrs={'id': ['discussionEntry']}),
|
||||
dict(name='span', attrs={
|
||||
remove_tags = [dict(name='div', attrs={'id': ['pictureInnerBox']}),
|
||||
dict(name='div', attrs={'id': ['discussionEntry']}),
|
||||
dict(name='span', attrs={
|
||||
'id': ['mynews-hits', 'mynews-author']}),
|
||||
dict(name='div', attrs={'class': ['related']}),
|
||||
dict(name='div', attrs={'id': ['multimediaInfo']})]
|
||||
dict(name='div', attrs={'class': ['related']}),
|
||||
dict(name='div', attrs={'id': ['multimediaInfo']})]
|
||||
remove_tags_before = dict(name='div', attrs={'class': ['articleHeader']})
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'related'})
|
||||
|
||||
|
@ -49,4 +49,4 @@ class Nu(BasicNewsRecipe):
|
||||
(u'Podcast Algemeen nieuws', u'http://www.nu.nl/podcast.php')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
return article.get('guid', None)
|
||||
|
@ -24,7 +24,7 @@ class OGRU(BasicNewsRecipe):
|
||||
|
||||
remove_attributes = ['style']
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'id': 'pageDescription'}),
|
||||
dict(name='div', attrs={'class': 'pageNavLinkGroup'}),
|
||||
dict(name='div', attrs={'class': 'tagBlock TagContainer'}),
|
||||
|
@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del (div['id'])
|
||||
del div['id']
|
||||
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
|
@ -76,22 +76,22 @@ class Pagina12(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
feeds = [
|
||||
(u'Diario de hoy' , u'https://www.pagina12.com.ar/rss/edicion-impresa'),
|
||||
(u'Espectaculos' , u'https://www.pagina12.com.ar/rss/suplementos/cultura-y-espectaculos/notas'),
|
||||
(u'Radar' , u'https://www.pagina12.com.ar/rss/suplementos/radar/notas'),
|
||||
(u'Radar libros' , u'https://www.pagina12.com.ar/rss/suplementos/radar-libros/notas'),
|
||||
(u'Cash' , u'https://www.pagina12.com.ar/rss/suplementos/cash/notas'),
|
||||
(u'NO' , u'https://www.pagina12.com.ar/rss/suplementos/no/notas'),
|
||||
(u'Las 12' , u'https://www.pagina12.com.ar/rss/suplementos/las12/notas'),
|
||||
(u'Soy' , u'https://www.pagina12.com.ar/rss/suplementos/soy/notas'),
|
||||
(u'M2' , u'https://www.pagina12.com.ar/rss/suplementos/m2/notas'),
|
||||
(u'Rosario 12' , u'https://www.pagina12.com.ar/rss/suplementos/rosario12/notas')
|
||||
(u'Diario de hoy', u'https://www.pagina12.com.ar/rss/edicion-impresa'),
|
||||
(u'Espectaculos', u'https://www.pagina12.com.ar/rss/suplementos/cultura-y-espectaculos/notas'),
|
||||
(u'Radar', u'https://www.pagina12.com.ar/rss/suplementos/radar/notas'),
|
||||
(u'Radar libros', u'https://www.pagina12.com.ar/rss/suplementos/radar-libros/notas'),
|
||||
(u'Cash', u'https://www.pagina12.com.ar/rss/suplementos/cash/notas'),
|
||||
(u'NO', u'https://www.pagina12.com.ar/rss/suplementos/no/notas'),
|
||||
(u'Las 12', u'https://www.pagina12.com.ar/rss/suplementos/las12/notas'),
|
||||
(u'Soy', u'https://www.pagina12.com.ar/rss/suplementos/soy/notas'),
|
||||
(u'M2', u'https://www.pagina12.com.ar/rss/suplementos/m2/notas'),
|
||||
(u'Rosario 12', u'https://www.pagina12.com.ar/rss/suplementos/rosario12/notas')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
lurl = strftime('https://www.pagina12.com.ar/edicion-impresa/%d-%m-%Y')
|
||||
soup = self.index_to_soup(lurl)
|
||||
mydiv = soup.find('div', {'class' : lambda x: x and 'printed-edition-cover' in x.split()})
|
||||
mydiv = soup.find('div', {'class': lambda x: x and 'printed-edition-cover' in x.split()})
|
||||
if mydiv:
|
||||
for image in mydiv.findAll('img'):
|
||||
if image['src'].startswith('https://images.pagina12.com.ar/styles/width700/public/'):
|
||||
|
@ -24,7 +24,7 @@ class PaperPaper(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'bottom-block '}),
|
||||
dict(name='div', attrs={'class': 'bottom-block news'})
|
||||
]
|
||||
|
@ -32,4 +32,4 @@ class plRecipe(BasicNewsRecipe):
|
||||
preprocess_regexps = [(re.compile(r'<(span|strong)[^>]*>\s*Ptejte se politik.*',
|
||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': ['article-detail']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class': ['article-detail']})]
|
||||
|
@ -26,7 +26,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
classes('Article-header Article-excerpt Article-author Article-thumbnail Article-bodyText article-title article-dek article-paragraph articlebody'),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='section', attrs={'class': ['recurrent-share']})
|
||||
dict(name='section', attrs={'class': ['recurrent-share']})
|
||||
]
|
||||
|
||||
def parse_section_index(self, slug):
|
||||
|
@ -137,7 +137,7 @@ class PrivateEyeRecipe(BasicNewsRecipe):
|
||||
|
||||
# We remove vast swathes of HTML which is not part of the articles.
|
||||
# Remove sibling content
|
||||
remove_tags_before = [
|
||||
remove_tags_before = [
|
||||
{'name': 'div', 'class': 'article'},
|
||||
{'name': 'div', 'id': 'page'},
|
||||
{'name': 'div', 'id': 'page-wide'},
|
||||
|
@ -31,7 +31,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
# cover_url = file:///c:/Users/YOUR_USERNAME/AppData/Roaming/calibre/resources/images/news_covers/Pro_Physik.png
|
||||
|
||||
extra_css = '''
|
||||
extra_css = '''
|
||||
h1 {font-size: 1.6em; text-align: left}
|
||||
h2, h3 {font-size: 1.3em;text-align: left}
|
||||
h2.subtitle {font-size: 1.2em;text-align: left;font-style: italic}
|
||||
|
@ -26,7 +26,7 @@ class ProSleduet(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'container'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'ya-share2 ya-share2_inited'})
|
||||
]
|
||||
|
||||
|
@ -104,7 +104,7 @@ class RadioCanada(BasicNewsRecipe):
|
||||
('Grands titres', 'https://ici.radio-canada.ca/rss/771'),
|
||||
('Football', 'https://ici.radio-canada.ca/rss/1000057'),
|
||||
('Hockey', 'https://ici.radio-canada.ca/rss/1000056'),
|
||||
('Olympiques', 'https://ici.radio-canada.ca/rss/64852'),
|
||||
('Olympiques', 'https://ici.radio-canada.ca/rss/64852'),
|
||||
('Podium', 'https://ici.radio-canada.ca/rss/555082'),
|
||||
('Soccer', 'https://ici.radio-canada.ca/rss/1000058'),
|
||||
('Tennis', 'https://ici.radio-canada.ca/rss/1000059'),
|
||||
|
@ -35,9 +35,9 @@ class RealClear(BasicNewsRecipe):
|
||||
|
||||
# Numeric parameter is type, controls whether we look for
|
||||
feedsets = [
|
||||
['Politics', 'http://www.realclearpolitics.com/index.xml', 0],
|
||||
['Policy', 'http://www.realclearpolicy.com/index.xml', 0],
|
||||
['Science', 'http://www.realclearscience.com/index.xml', 0],
|
||||
['Politics', 'http://www.realclearpolitics.com/index.xml', 0],
|
||||
['Policy', 'http://www.realclearpolicy.com/index.xml', 0],
|
||||
['Science', 'http://www.realclearscience.com/index.xml', 0],
|
||||
['Tech', 'http://www.realcleartechnology.com/index.xml', 0],
|
||||
# The feedburner is essentially the same as the top feed, politics.
|
||||
# ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
|
||||
@ -45,7 +45,7 @@ class RealClear(BasicNewsRecipe):
|
||||
['Markets Home', 'http://www.realclearmarkets.com/index.xml', 0],
|
||||
['Markets', 'http://www.realclearmarkets.com/articles/index.xml', 0],
|
||||
['World', 'http://www.realclearworld.com/index.xml', 0],
|
||||
['World Blog', 'http://www.realclearworld.com/blog/index.xml', 2]
|
||||
['World Blog', 'http://www.realclearworld.com/blog/index.xml', 2]
|
||||
]
|
||||
# Hints to extractPrintURL.
|
||||
# First column is the URL snippet. Then the string to search for as text,
|
||||
@ -53,12 +53,11 @@ class RealClear(BasicNewsRecipe):
|
||||
# drill down.
|
||||
phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4)
|
||||
|
||||
printhints = [['realclear', '', '', 'printpage'],
|
||||
['billoreilly.com', 'Print this entry', 'a', ''],
|
||||
['billoreilly.com', 'Print This Article', 'a', ''],
|
||||
['politico.com', 'Print',
|
||||
'a', 'share-print'],
|
||||
['nationalreview.com', '>Print<', 'a', ''],
|
||||
printhints = [['realclear', '', '', 'printpage'],
|
||||
['billoreilly.com', 'Print this entry', 'a', ''],
|
||||
['billoreilly.com', 'Print This Article', 'a', ''],
|
||||
['politico.com', 'Print', 'a', 'share-print'],
|
||||
['nationalreview.com', '>Print<', 'a', ''],
|
||||
['reason.com', '', 'a', 'printer']
|
||||
# The following are not supported due to JavaScripting, and would require obfuscated_article to handle
|
||||
# forbes,
|
||||
|
@ -125,7 +125,7 @@ class respektRecipe(BasicNewsRecipe):
|
||||
else:
|
||||
if next.getchildren():
|
||||
next_child = next.getchildren()[0]
|
||||
next_child.text = next_child.text + u' • ' + text
|
||||
next_child.text = next_child.text + u' • ' + text
|
||||
par.getparent().remove(par)
|
||||
# Insert text length
|
||||
text = root.xpath("//div[@id='postcontent']")[0]
|
||||
@ -171,4 +171,4 @@ class respektRecipe(BasicNewsRecipe):
|
||||
o.getparent().replace(o,e)
|
||||
except:
|
||||
pass
|
||||
return (BeautifulSoup(lxml.etree.tostring(root,encoding='unicode')))
|
||||
return BeautifulSoup(lxml.etree.tostring(root,encoding='unicode'))
|
||||
|
@ -60,7 +60,7 @@ class RND(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
('Politik', 'https://www.rnd.de/arc/outboundfeeds/rss/category/politik/'),
|
||||
('Wirtschaft', 'https://www.rnd.de/arc/outboundfeeds/rss/category/wirtschaft/'),
|
||||
('Wirtschaft', 'https://www.rnd.de/arc/outboundfeeds/rss/category/wirtschaft/'),
|
||||
('Sport', 'https://www.rnd.de/arc/outboundfeeds/rss/category/sport/'),
|
||||
('Panorama', 'https://www.rnd.de/arc/outboundfeeds/rss/category/panorama/'),
|
||||
# ('Promis', 'https://www.rnd.de/arc/outboundfeeds/rss/category/promis/'),
|
||||
|
@ -61,7 +61,7 @@ class Saechsische(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
# ('Alle Artikel der SZ', 'https://www.saechsische.de/arc/outboundfeeds/rss/'),
|
||||
('Stadt Dresden', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden'),
|
||||
('Stadt Dresden', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden'),
|
||||
# ('Altstadt', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden/altstadt'),
|
||||
# ('Blasewitz', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden/blasewitz'),
|
||||
# ('Cotta', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/dresden/cotta'),
|
||||
@ -152,7 +152,7 @@ class Saechsische(BasicNewsRecipe):
|
||||
# ('Vogtlandkreis', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/vogtland'),
|
||||
# ('Plauen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/lokales/vogtland/plauen'),
|
||||
# ('Tschechien', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/tschechien'),
|
||||
('Sachsen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/sachsen'),
|
||||
('Sachsen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/sachsen'),
|
||||
# ('Der Osten', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/der-osten'),
|
||||
# ('Politik in Sachsen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/politik/regional'),
|
||||
# ('Wirtschaft in Sachsen', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/wirtschaft/regional'),
|
||||
@ -172,10 +172,10 @@ class Saechsische(BasicNewsRecipe):
|
||||
# ('Genuss und Kochen', 'https://www.saechsische.de/arc/outboundfeeds/rss/tags_slug/genuss-und-kochen'),
|
||||
# ('Sächsische Schweiz', 'https://www.saechsische.de/arc/outboundfeeds/rss/tags_slug/saechsische-schweiz'),
|
||||
# ('Sachsenkompass', 'https://www.saechsische.de/arc/outboundfeeds/rss/tags_slug/sachsenkompass'),
|
||||
('Politik', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/politik'),
|
||||
('Wirtschaft', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/wirtschaft'),
|
||||
('Politik', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/politik'),
|
||||
('Wirtschaft', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/wirtschaft'),
|
||||
# ('Sport', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/sport'),
|
||||
('Panorama', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/panorama'),
|
||||
('Panorama', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/panorama'),
|
||||
# ('Promis', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/promis'),
|
||||
# ('Reise', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/reise'),
|
||||
# ('Medien & TV', 'https://www.saechsische.de/arc/outboundfeeds/rss/category/medien'),
|
||||
|
@ -54,7 +54,7 @@ def load_article_from_json(raw, root):
|
||||
for child in tuple(body):
|
||||
body.remove(child)
|
||||
article = E(body, 'article')
|
||||
E(article, 'div', replace_entities(data['firstTopic']['name']) , style='color: gray; font-size:small; font-weight:bold;')
|
||||
E(article, 'div', replace_entities(data['firstTopic']['name']), style='color: gray; font-size:small; font-weight:bold;')
|
||||
E(article, 'h1', replace_entities(data['headline']))
|
||||
# E(article, 'p', replace_entities(data['subHeadline']['text']), style='font-style: italic; color:#202020;')
|
||||
for subh in data['subHeadline']['json']:
|
||||
|
@ -22,7 +22,7 @@ class Sobaka(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'b-post-view__foot'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'b-post-view__telegram-promo'}),
|
||||
dict(name='div', attrs={'class': 'b-post-view__tgb'}),
|
||||
dict(name='div', attrs={'id': 'comments'}),
|
||||
|
@ -21,7 +21,7 @@ class Sotavision(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='span', attrs={'style': 'border-color:#EBEBEB;border-width:1px;width:100%;'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'td_block_wrap tdb_mobile_menu tdi_7 td-pb-border-top td_block_template_1 tdb-header-align'}),
|
||||
dict(name='div', attrs={'class': 'td_block_wrap tdb_single_author tdi_52 td-pb-border-top td_block_template_1 tdb-post-meta'}),
|
||||
dict(name='div', attrs={'class': 'td_block_wrap tdb_single_date tdi_53 td-pb-border-top td_block_template_1 tdb-post-meta'}),
|
||||
|
@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
|
||||
def absurl(url):
|
||||
if url.startswith('/'):
|
||||
url = 'https://www.spectator.co.uk' + url
|
||||
url = 'https://www.spectator.co.uk' + url
|
||||
return url
|
||||
|
||||
|
||||
|
@ -39,16 +39,16 @@ class StraitsTimes(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'World' , u'https://www.straitstimes.com/news/world/rss.xml')
|
||||
(u'Business' , u'https://www.straitstimes.com/news/business/rss.xml'),
|
||||
(u'Life' , u'https://www.straitstimes.com/news/life/rss.xml'),
|
||||
(u'Tech' , u'https://www.straitstimes.com/news/tech/rss.xml'),
|
||||
(u'Opinion' , u'https://www.straitstimes.com/news/opinion/rss.xml'),
|
||||
(u'Life' , u'https://www.straitstimes.com/news/life/rss.xml'),
|
||||
(u'Singapore' , u'https://www.straitstimes.com/news/singapore/rss.xml'),
|
||||
(u'Asia' , u'https://www.straitstimes.com/news/asia/rss.xml'),
|
||||
(u'Multimedia' , u'https://www.straitstimes.com/news/multimedia/rss.xml'),
|
||||
(u'Sport' , u'https://www.straitstimes.com/news/sport/rss.xml'),
|
||||
(u'World', u'https://www.straitstimes.com/news/world/rss.xml'),
|
||||
(u'Business', u'https://www.straitstimes.com/news/business/rss.xml'),
|
||||
(u'Life', u'https://www.straitstimes.com/news/life/rss.xml'),
|
||||
(u'Tech', u'https://www.straitstimes.com/news/tech/rss.xml'),
|
||||
(u'Opinion', u'https://www.straitstimes.com/news/opinion/rss.xml'),
|
||||
(u'Life', u'https://www.straitstimes.com/news/life/rss.xml'),
|
||||
(u'Singapore', u'https://www.straitstimes.com/news/singapore/rss.xml'),
|
||||
(u'Asia', u'https://www.straitstimes.com/news/asia/rss.xml'),
|
||||
(u'Multimedia', u'https://www.straitstimes.com/news/multimedia/rss.xml'),
|
||||
(u'Sport', u'https://www.straitstimes.com/news/sport/rss.xml'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -29,7 +29,7 @@ class TInvariant(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'media mg-info-author-block'}),
|
||||
dict(name='div', attrs={'class': 'mg-blog-category mb-1'}),
|
||||
dict(name='span', attrs={'class': 'newses-tags'}),
|
||||
|
@ -29,7 +29,7 @@ class TInvariant(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'media mg-info-author-block'}),
|
||||
dict(name='div', attrs={'class': 'mg-blog-category mb-1'}),
|
||||
dict(name='span', attrs={'class': 'newses-tags'}),
|
||||
|
@ -29,7 +29,7 @@ class TInvariant(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'media mg-info-author-block'}),
|
||||
dict(name='div', attrs={'class': 'mg-blog-category mb-1'}),
|
||||
dict(name='span', attrs={'class': 'newses-tags'}),
|
||||
|
@ -51,73 +51,80 @@ class PhilippineDailyInquirer(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
|
||||
('Headlines', 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed'),
|
||||
('Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/feed'),
|
||||
('Nation' , 'http://newsinfo.inquirer.net/category/nation/feed'),
|
||||
('Nation - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed'),
|
||||
('Metro' , 'http://newsinfo.inquirer.net/category/metro/feed'),
|
||||
('Metro - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed'),
|
||||
('Regions' , 'http://newsinfo.inquirer.net/category/regions/feed'),
|
||||
('Regions - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed'),
|
||||
('News' , 'http://www.inquirer.net/fullfeed'),
|
||||
('More News' , 'http://newsinfo.inquirer.net/feed')
|
||||
,
|
||||
('Global Nation' , 'http://globalnation.inquirer.net/feed'),
|
||||
('Global Nation - Latest Stories', 'http://globalnation.inquirer.net/category/latest-stories/feed'),
|
||||
('Global Nation - Philippines', 'http://globalnation.inquirer.net/category/news/philippines/feed'),
|
||||
('Global Nation - Asia & Pacific', 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed'),
|
||||
('Global Nation - Americas', 'http://globalnation.inquirer.net/category/news/uscanada/feed'),
|
||||
('Global Nation - Middle East & Africa', 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed'),
|
||||
('Global Nation - Europe' , 'http://globalnation.inquirer.net/category/news/europe/feed'),
|
||||
('Global Nation - Global Pinoy', 'http://globalnation.inquirer.net/category/global-pinoy/feed'),
|
||||
('Global Nation - Events' , 'http://globalnation.inquirer.net/category/events/feed'),
|
||||
('Business' , 'http://business.inquirer.net/feed'),
|
||||
('Business - Latest Stories' , 'http://business.inquirer.net/category/latest-stories/feed'),
|
||||
('Business - Money' , 'http://business.inquirer.net/category/money/feed'),
|
||||
('Headlines', 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed'),
|
||||
('Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/feed'),
|
||||
('Nation', 'http://newsinfo.inquirer.net/category/nation/feed'),
|
||||
('Nation - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed'),
|
||||
('Metro', 'http://newsinfo.inquirer.net/category/metro/feed'),
|
||||
('Metro - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed'),
|
||||
('Regions', 'http://newsinfo.inquirer.net/category/regions/feed'),
|
||||
('Regions - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed'),
|
||||
('News', 'http://www.inquirer.net/fullfeed'),
|
||||
('More News', 'http://newsinfo.inquirer.net/feed'),
|
||||
|
||||
('Global Nation', 'http://globalnation.inquirer.net/feed'),
|
||||
('Global Nation - Latest Stories', 'http://globalnation.inquirer.net/category/latest-stories/feed'),
|
||||
('Global Nation - Philippines', 'http://globalnation.inquirer.net/category/news/philippines/feed'),
|
||||
('Global Nation - Asia & Pacific', 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed'),
|
||||
('Global Nation - Americas', 'http://globalnation.inquirer.net/category/news/uscanada/feed'),
|
||||
('Global Nation - Middle East & Africa', 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed'),
|
||||
('Global Nation - Europe', 'http://globalnation.inquirer.net/category/news/europe/feed'),
|
||||
('Global Nation - Global Pinoy', 'http://globalnation.inquirer.net/category/global-pinoy/feed'),
|
||||
('Global Nation - Events', 'http://globalnation.inquirer.net/category/events/feed'),
|
||||
|
||||
('Business', 'http://business.inquirer.net/feed'),
|
||||
('Business - Latest Stories', 'http://business.inquirer.net/category/latest-stories/feed'),
|
||||
('Business - Money', 'http://business.inquirer.net/category/money/feed'),
|
||||
('Business - Science & Health', 'http://business.inquirer.net/category/science-and-health/feed'),
|
||||
('Business - Motoring' , 'http://business.inquirer.net/category/motoring/feed'),
|
||||
('Business - Property Guide' , 'http://business.inquirer.net/category/property-guide/feed'),
|
||||
('Business - Columnists' , 'http://business.inquirer.net/category/columnists/feed'),
|
||||
('Sports' , 'http://sports.inquirer.net/feed'),
|
||||
('Sports - Latest Stories' , 'http://sports.inquirer.net/category/latest-stories/feed'),
|
||||
('Sports - Basketball' , 'http://sports.inquirer.net/category/section/basketball/feed'),
|
||||
('Sports - Boxing & MMA', 'http://sports.inquirer.net/category/section/boxing-mma/feed'),
|
||||
('Sports - Golf' , 'http://sports.inquirer.net/category/section/golf/feed'),
|
||||
('Sports - Football' , 'http://sports.inquirer.net/category/section/other-sports/football/feed'),
|
||||
('Sports - Other Sports' , 'http://sports.inquirer.net/category/section/other-sports/feed'),
|
||||
('Technology' , 'http://technology.inquirer.net/feed'),
|
||||
('Technology Latest Stories', 'http://technology.inquirer.net/category/latest-stories/feed'),
|
||||
('Entertainment' , 'http://entertainment.inquirer.net/feed'),
|
||||
('Entertainment - Headlines', 'http://entertainment.inquirer.net/category/headlines/feed'),
|
||||
('Entertainment - Latest Stories', 'http://entertainment.inquirer.net/category/latest-stories/feed'),
|
||||
('Entertainment - Movies' , 'http://movies.inquirer.net/feed'),
|
||||
('Lifestyle' , 'http://lifestyle.inquirer.net/feed'),
|
||||
('Lifestyle - Latest Stories', 'http://lifestyle.inquirer.net/category/latest-stories/feed'),
|
||||
('Lifestyle - Arts & Books' , 'http://lifestyle.inquirer.net/category/arts-and-books/feed'),
|
||||
('Lifestyle - Wellness' , 'http://lifestyle.inquirer.net/category/wellness/feed'),
|
||||
('Business - Motoring', 'http://business.inquirer.net/category/motoring/feed'),
|
||||
('Business - Property Guide', 'http://business.inquirer.net/category/property-guide/feed'),
|
||||
('Business - Columnists', 'http://business.inquirer.net/category/columnists/feed'),
|
||||
|
||||
('Sports', 'http://sports.inquirer.net/feed'),
|
||||
('Sports - Latest Stories', 'http://sports.inquirer.net/category/latest-stories/feed'),
|
||||
('Sports - Basketball', 'http://sports.inquirer.net/category/section/basketball/feed'),
|
||||
('Sports - Boxing & MMA', 'http://sports.inquirer.net/category/section/boxing-mma/feed'),
|
||||
('Sports - Golf', 'http://sports.inquirer.net/category/section/golf/feed'),
|
||||
('Sports - Football', 'http://sports.inquirer.net/category/section/other-sports/football/feed'),
|
||||
('Sports - Other Sports', 'http://sports.inquirer.net/category/section/other-sports/feed'),
|
||||
|
||||
('Technology', 'http://technology.inquirer.net/feed'),
|
||||
('Technology Latest Stories', 'http://technology.inquirer.net/category/latest-stories/feed'),
|
||||
|
||||
('Entertainment', 'http://entertainment.inquirer.net/feed'),
|
||||
('Entertainment - Headlines', 'http://entertainment.inquirer.net/category/headlines/feed'),
|
||||
('Entertainment - Latest Stories', 'http://entertainment.inquirer.net/category/latest-stories/feed'),
|
||||
('Entertainment - Movies', 'http://movies.inquirer.net/feed'),
|
||||
|
||||
('Lifestyle', 'http://lifestyle.inquirer.net/feed'),
|
||||
('Lifestyle - Latest Stories', 'http://lifestyle.inquirer.net/category/latest-stories/feed'),
|
||||
('Lifestyle - Arts & Books', 'http://lifestyle.inquirer.net/category/arts-and-books/feed'),
|
||||
('Lifestyle - Wellness', 'http://lifestyle.inquirer.net/category/wellness/feed'),
|
||||
('Lifestyle - Home & Entertaining', 'http://lifestyle.inquirer.net/category/home-and-entertaining/feed'),
|
||||
('Lifestyle - Parenting' , 'http://lifestyle.inquirer.net/category/parenting/feed'),
|
||||
('Lifestyle - Food' , 'http://lifestyle.inquirer.net/category/food/feed'),
|
||||
('Lifestyle - Fashion & Beauty', 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed'),
|
||||
('Lifestyle - Super' , 'http://lifestyle.inquirer.net/category/super/feed'),
|
||||
('Lifestyle - 2BU' , 'http://lifestyle.inquirer.net/category/2bu/feed'),
|
||||
('Lifestyle - Sunday Lifestyle', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed'),
|
||||
('Lifestyle - Wedding' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed'),
|
||||
('Lifestyle - Travel' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed'),
|
||||
('Lifestyle - Relationship' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed'),
|
||||
('Opinion' , 'http://opinion.inquirer.net/feed'),
|
||||
('Opinion - Viewpoints' , 'http://opinion.inquirer.net/category/viewpoints/feed'),
|
||||
('Opinion - Talk of the Town', 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed'),
|
||||
('Editorial' , 'http://opinion.inquirer.net/category/editorial/feed'),
|
||||
('Letters to the Editor' , 'http://opinion.inquirer.net/category/letters-to-the-editor/feed'),
|
||||
('Columns' , 'http://opinion.inquirer.net/category/columns/feed'),
|
||||
('Citizens Journalism' , 'http://newsinfo.inquirer.net/category/citizens-journalism/feed'),
|
||||
('Cebu - Daily News' , 'http://newsinfo.inquirer.net/category/cdn/feed'),
|
||||
('Cebu - More News' , 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed'),
|
||||
('Cebu - Community' , 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed'),
|
||||
('Cebu - Metro' , 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed'),
|
||||
('Cebu - Business' , 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed'),
|
||||
('Cebu - Sports' , 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed'),
|
||||
('Cebu - Visayas' , 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed'),
|
||||
('Cebu - Opinion' , 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed')
|
||||
('Lifestyle - Parenting', 'http://lifestyle.inquirer.net/category/parenting/feed'),
|
||||
('Lifestyle - Food', 'http://lifestyle.inquirer.net/category/food/feed'),
|
||||
('Lifestyle - Fashion & Beauty', 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed'),
|
||||
('Lifestyle - Super', 'http://lifestyle.inquirer.net/category/super/feed'),
|
||||
('Lifestyle - 2BU', 'http://lifestyle.inquirer.net/category/2bu/feed'),
|
||||
('Lifestyle - Sunday Lifestyle', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed'),
|
||||
('Lifestyle - Wedding', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed'),
|
||||
('Lifestyle - Travel', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed'),
|
||||
('Lifestyle - Relationship', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed'),
|
||||
|
||||
('Opinion', 'http://opinion.inquirer.net/feed'),
|
||||
('Opinion - Viewpoints', 'http://opinion.inquirer.net/category/viewpoints/feed'),
|
||||
('Opinion - Talk of the Town', 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed'),
|
||||
('Editorial', 'http://opinion.inquirer.net/category/editorial/feed'),
|
||||
('Letters to the Editor', 'http://opinion.inquirer.net/category/letters-to-the-editor/feed'),
|
||||
('Columns', 'http://opinion.inquirer.net/category/columns/feed'),
|
||||
|
||||
('Citizens Journalism', 'http://newsinfo.inquirer.net/category/citizens-journalism/feed'),
|
||||
('Cebu - Daily News', 'http://newsinfo.inquirer.net/category/cdn/feed'),
|
||||
('Cebu - More News', 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed'),
|
||||
('Cebu - Community', 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed'),
|
||||
('Cebu - Metro', 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed'),
|
||||
('Cebu - Business', 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed'),
|
||||
('Cebu - Sports', 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed'),
|
||||
('Cebu - Visayas', 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed'),
|
||||
('Cebu - Opinion', 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed'),
|
||||
]
|
||||
|
@ -217,7 +217,7 @@ class PrivateEyeRecipe(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
# We remove vast swathes of HTML which is not part of the articles.
|
||||
remove_tags_before = [
|
||||
remove_tags_before = [
|
||||
{'name': 'div', 'class': 'container'},
|
||||
{'name': 'div', 'class': 'content-wrapper'},
|
||||
{'name': 'div', 'class': 'only-in-the-magazine'},
|
||||
|
@ -30,21 +30,21 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
('TOP 20', 'http://www.tmz.com/rss.xml'),
|
||||
('Exclusives', 'http://www.tmz.com/category/exclusives/rss.xml'),
|
||||
('Celeb Justice', 'http://www.tmz.com/category/celebrity-justice/rss.xml'),
|
||||
('Celeb Feuds', 'http://www.tmz.com/category/celebrity-feuds/rss.xml'),
|
||||
('Politix', 'http://www.tmz.com/category/politix/rss.xml'),
|
||||
('Music', 'http://www.tmz.com/category/music/rss.xml'),
|
||||
('Movies', 'http://www.tmz.com/category/movies/rss.xml'),
|
||||
('TV', 'http://www.tmz.com/category/tv/rss.xml'),
|
||||
('Sports', 'http://www.tmz.com/category/TMZsports/rss.xml'),
|
||||
('Hook-Ups', 'http://www.tmz.com/category/hook-ups/rss.xml'),
|
||||
('Beauty', 'http://www.tmz.com/category/beauty/rss.xml'),
|
||||
('Fashion', 'http://www.tmz.com/category/fashion/rss.xml'),
|
||||
('Gossip & Rumor', 'http://www.tmz.com/category/gossip-rumors/rss.xml'),
|
||||
('Hot Mama', 'http://www.tmz.com/category/hot-mamas/rss.xml'),
|
||||
('Party All The Time', 'http://www.tmz.com/category/party-all-the-time/rss.xml'),
|
||||
('Ride Me!', 'http://www.tmz.com/category/ride-me/rss.xml'),
|
||||
('Stars in Heat', 'http://www.tmz.com/category/stars-in-heat/rss.xml'),
|
||||
('Vegas', 'http://www.tmz.com/category/hot-vegas/rss.xml')
|
||||
('Celeb Feuds', 'http://www.tmz.com/category/celebrity-feuds/rss.xml'),
|
||||
('Politix', 'http://www.tmz.com/category/politix/rss.xml'),
|
||||
('Music', 'http://www.tmz.com/category/music/rss.xml'),
|
||||
('Movies', 'http://www.tmz.com/category/movies/rss.xml'),
|
||||
('TV', 'http://www.tmz.com/category/tv/rss.xml'),
|
||||
('Sports', 'http://www.tmz.com/category/TMZsports/rss.xml'),
|
||||
('Hook-Ups', 'http://www.tmz.com/category/hook-ups/rss.xml'),
|
||||
('Beauty', 'http://www.tmz.com/category/beauty/rss.xml'),
|
||||
('Fashion', 'http://www.tmz.com/category/fashion/rss.xml'),
|
||||
('Gossip & Rumor', 'http://www.tmz.com/category/gossip-rumors/rss.xml'),
|
||||
('Hot Mama', 'http://www.tmz.com/category/hot-mamas/rss.xml'),
|
||||
('Party All The Time', 'http://www.tmz.com/category/party-all-the-time/rss.xml'),
|
||||
('Ride Me!', 'http://www.tmz.com/category/ride-me/rss.xml'),
|
||||
('Stars in Heat', 'http://www.tmz.com/category/stars-in-heat/rss.xml'),
|
||||
('Vegas', 'http://www.tmz.com/category/hot-vegas/rss.xml')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -73,7 +73,7 @@ class TheEconomicTimes(BasicNewsRecipe):
|
||||
return citem['content']
|
||||
|
||||
def get_article_url(self, article):
|
||||
rurl = article.get('guid', None)
|
||||
rurl = article.get('guid', None)
|
||||
if '/articleshow/' in rurl:
|
||||
return rurl
|
||||
|
||||
|
@ -39,7 +39,7 @@ class UAFootball(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'show-post'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='form'),
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class': 'language'}),
|
||||
|
@ -22,7 +22,7 @@ class UkrInform(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='aside'),
|
||||
dict(name='img', attrs={'class': 'pixel'}),
|
||||
dict(name='section', attrs={'class': 'read'}),
|
||||
|
@ -227,7 +227,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del (div['id'])
|
||||
del div['id']
|
||||
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
|
@ -215,7 +215,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del (div['id'])
|
||||
del div['id']
|
||||
|
||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
|
@ -38,7 +38,7 @@ class version2(BasicNewsRecipe):
|
||||
dict(name='span', attrs={'class': 'article-link-id'}),
|
||||
dict(name='section', attrs={'class': 'social-tools-pane'}),
|
||||
dict(name='section', attrs={'class': 'article-timeline'}),
|
||||
dict(name='div', attrs={'id' : 'mini-panel-comments_and_form'}),
|
||||
dict(name='div', attrs={'id': 'mini-panel-comments_and_form'}),
|
||||
dict(name='div', attrs={'class': 'related-articles top-three'}),
|
||||
dict(name='div', attrs={'id': 'mini-panel-jobfinder_1'}),
|
||||
dict(name='section', attrs={'id': 'mini-panel-frontpage_debat_zone'}),
|
||||
@ -53,7 +53,7 @@ class version2(BasicNewsRecipe):
|
||||
dict(name='section', attrs={'class': 'jobs-list'}),
|
||||
dict(name='footer', attrs={'id': 'footer'}),
|
||||
dict(name='section', attrs={'class': 'banner'}),
|
||||
dict(name='div', attrs={'class' : 'fast-track-frontpage'}),
|
||||
dict(name='div', attrs={'class': 'fast-track-frontpage'}),
|
||||
dict(name='a', attrs={'class': 'byline-comments'})
|
||||
]
|
||||
|
||||
|
@ -23,7 +23,7 @@ class ViknaSTB(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'content-wrapper'})
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'share-content-wrapper flex-wrapper'}),
|
||||
dict(name='div', attrs={'class': 'sticky-wrapper'}),
|
||||
dict(name='div', attrs={'class': 'promo-wrapper'}),
|
||||
|
@ -101,7 +101,7 @@ class weblogssl(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'id':'comments'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='div' , attrs={'id':'comments'})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'comments'})
|
||||
|
||||
def print_version(self, url):
|
||||
if url.startswith('http://www'):
|
||||
|
@ -24,7 +24,7 @@ class WiComix(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = dict(name='article')
|
||||
|
||||
remove_tags = [
|
||||
remove_tags = [
|
||||
# dict(name='div', attrs={'class': 'author-meta'}),
|
||||
dict(name='div', attrs={'id': 'jp-post-flair'}),
|
||||
dict(name='footer', attrs={'class': 'entry-meta'})
|
||||
|
@ -57,7 +57,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del (div['id'])
|
||||
del div['id']
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user