remove dead recipes

These recipes are based on RSS feeds that no longer work.
This commit is contained in:
unkn0w7n 2024-09-24 09:57:29 +05:30
parent 893ebb0457
commit d9b9f1baee
190 changed files with 0 additions and 5256 deletions

View File

@ -1,130 +0,0 @@
from datetime import datetime, timedelta
from calibre.web.feeds.news import BasicNewsRecipe
class CyNewsLiveRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en_CY'
version = 1
title = u'Cyprus Weekly'
publisher = u'The Cyprus Weekly'
category = u'News, Newspaper'
description = u'News from Cyprus'
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
pubTime = None
minTime = None
articleCount = 0
INDEX = 'http://www.cyprusweekly.com.cy/main/default.aspx'
feeds = []
feeds.append(
('News: Cyprus', 'http://www.cyprusweekly.com.cy/main/92,0,0,0-CYPRUS.aspx'))
feeds.append(
('News: World', 'http://www.cyprusweekly.com.cy/main/78,0,0,0-UKWORLD.aspx'))
feeds.append(('Sport: Football',
'http://www.cyprusweekly.com.cy/main/82,0,0,0-FOOTBALL.aspx'))
feeds.append(
('Sport: Rugby', 'http://www.cyprusweekly.com.cy/main/83,0,0,0-RUGBY.aspx'))
feeds.append(
('Sport: Cricket', 'http://www.cyprusweekly.com.cy/main/85,0,0,0-CRICKET.aspx'))
feeds.append(
('Sport: Tennis', 'http://www.cyprusweekly.com.cy/main/84,0,0,0-TENNIS.aspx'))
feeds.append(
('Sport: Other', 'http://www.cyprusweekly.com.cy/main/86,0,0,0-OTHER.aspx'))
feeds.append(
('Business: Local', 'http://www.cyprusweekly.com.cy/main/100,0,0,0-LOCAL.aspx'))
feeds.append(('Business: Foreign',
'http://www.cyprusweekly.com.cy/main/101,0,0,0-FOREIGN.aspx'))
feeds.append(('Whats On: Places of Interest',
'http://www.cyprusweekly.com.cy/main/123,0,0,0-PLACES-OF-INTEREST.aspx'))
feeds.append(('Whats On: Going Out',
'http://www.cyprusweekly.com.cy/main/153,0,0,0-GOING-OUT.aspx'))
feeds.append(('Whats On: Arts & Entertainment',
'http://www.cyprusweekly.com.cy/main/135,0,0,0-ARTS--and-ENTERTAINMENT.aspx'))
feeds.append(('Whats On: Things To Do',
'http://www.cyprusweekly.com.cy/main/136,0,0,0-THINGS-TO-DO.aspx'))
feeds.append(('Whats On: Shopping Guide',
'http://www.cyprusweekly.com.cy/main/142,0,0,0-SHOPPING-GUIDE.aspx'))
feeds.append(
('Culture', 'http://www.cyprusweekly.com.cy/main/208,0,0,0-CULTURE.aspx'))
feeds.append(
('Environment', 'http://www.cyprusweekly.com.cy/main/93,0,0,0-ENVIRONMENT.aspx'))
feeds.append(
('Info', 'http://www.cyprusweekly.com.cy/main/91,0,0,0-INFO.aspx'))
keep_only_tags = []
keep_only_tags.append(
dict(name='div', attrs={'class': 'ArticleCategories'}))
extra_css = '''
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
'''
def parse_index(self):
answer = []
for feed in self.feeds:
self.articleCount = 0
articles = []
soup = self.index_to_soup(feed[1])
table = soup.find('table', attrs={'id': 'ctl00_cp_ctl01_listp'})
if table:
self.pubTime = datetime.now()
self.minTime = self.pubTime - \
timedelta(days=self.oldest_article)
self.find_articles(table, articles)
answer.append((feed[0], articles))
return answer
def postprocess_html(self, soup, first):
for el in soup.findAll(attrs={'style': True}):
del el['style']
for el in soup.findAll('font'):
el.name = 'div'
for attr, value in el:
del el[attr]
return soup
def find_articles(self, table, articles):
for div in table.findAll('div', attrs={'class': 'ListArticle'}):
el = div.find('div', attrs={'class': 'ListArticle_T'})
title = self.tag_to_string(el.a)
url = self.INDEX + el.a['href']
description = self.tag_to_string(
div.find('div', attrs={'class': 'ListArticle_BODY300'}))
el = div.find('div', attrs={'class': 'ListArticle_D'})
if el:
dateParts = self.tag_to_string(el).split(' ')
monthNames = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11,
'December': 12}
timeParts = dateParts[3].split(':')
self.pubTime = datetime(year=int(dateParts[2]), month=int(monthNames[dateParts[1]]),
day=int(dateParts[0]), hour=int(timeParts[0]),
minute=int(timeParts[1]))
if self.pubTime >= self.minTime and self.articleCount <= self.max_articles_per_feed:
articles.append(
{'title': title, 'date': self.pubTime, 'url': url, 'description': description})
self.articleCount += 1
else:
return

View File

@ -1,25 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from calibre.web.feeds.news import BasicNewsRecipe
class CzasGentlemanow(BasicNewsRecipe):
title = u'Czas Gentlemanów'
__author__ = 'fenuks'
description = u'Historia mężczyzn z dala od wielkiej polityki'
category = 'blog'
language = 'pl'
cover_url = 'https://czasgentlemanow.pl/wp-content/uploads/2012/10/logo-Czas-Gentlemanow1.jpg'
ignore_duplicate_articles = {'title', 'url'}
oldest_article = 7
max_articles_per_feed = 100
extra_css = '.gallery-item {float:left; margin-right: 10px; max-width: 20%;} .alignright {text-align: right; float:right; margin-left:5px;}\
.wp-caption-text {text-align: left;} img.aligncenter {display: block; margin-left: auto; margin-right: auto;} .alignleft {float: left; margin-right:5px;}'
no_stylesheets = True
remove_empty_feeds = True
use_embedded_content = False
keep_only_tags = [dict(name='div', attrs={'class': 'post-wrapper'})]
remove_tags = [dict(attrs={'class': ['awac-wrapper', 'post-bottom', 'comment', 'seperate']})]
feeds = [
(u'Charakter', u'https://czasgentlemanow.pl/category/charakter/feed/'),
(u'Wizerunek', u'https://czasgentlemanow.pl/category/wizerunek/feed/'),
(u'Relacje międzyludzkie', u'https://czasgentlemanow.pl/category/relacje-miedzyludzkie/feed/')]

View File

@ -1,104 +0,0 @@
# vim:fileencoding=UTF-8
from __future__ import print_function, unicode_literals
import re
from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1390132023(BasicNewsRecipe):
title = u'Daily Express'
__author__ = 'Dave Asbury'
# 1.8.15 official feedburner feeds live again
# 27.6.15 using feed43 as rss feeds dead
# feed 43 string = <div {*}<a href="{%}"{*}<h4>{%}</h4>
oldest_article = 1.5
language = 'en_GB'
max_articles_per_feed = 10
compress_news_images = True
compress_news_images_max_size = 20
ignore_duplicate_articles = {'title', 'url'}
masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png'
auto_cleanup_keep = '//*[@class="author"]|//section[@class="photo changeSpace"]'
auto_cleanup = True
no_stylesheets = False
preprocess_regexps = [
(re.compile(r'\| [\w].+?\| [\w].+?\| Daily Express',
re.IGNORECASE | re.DOTALL), lambda match: ''),
]
feeds = [
# (u'UK News', u'http://feed43.com/3460616116055543.xml'),
# http://www.express.co.uk/posts/rss/1/uk'),
(u'UK News', u'http://feeds.feedburner.com/daily-express-uk-news'),
(u'World News', u'http://feeds.feedburner.com/daily-express-world-news'),
# (u'World News',u'http://feed43.com/5650105317448722.xml'),
# http://www.express.co.uk/posts/rss/78/world'),
(u'Showbiz News', u'http://feeds.feedburner.com/daily-express-showbiz-news'),
# (u'Showbiz News',u'http://feed43.com/2564008080442425.xml'),
(u'Finance', u'http://feeds.feedburner.com/daily-express-finance-news'),
# (u'Finance',u'http://feed43.com/8636615325246501.xml'),
# http://www.express.co.uk/posts/rss/21/finance'),
# (u'Sport - Boxing',u'http://feed43.com/7570233481503246.xml'),
(u'Sport - Boxing', u'http://feeds.feedburner.com/daily-express-boxing-news'),
(u'Sport - Rugby Union',
u'http://feeds.feedburner.com/daily-express-rugby-union-news'),
# (u'Sport - Rugby Union',u'http://feed43.com/4235483647118470.xml'),
# (u'Sport - Others',u'http://feed43.com/6106345668326737.xml'),
(u'Sport - Others', u'http://feeds.feedburner.com/daily-express-other-sport-news'),
# http://www.express.co.uk/posts/rss/65/sport'),
(u'Entertainment', u'http://feeds.feedburner.com/daily-express-entertainment-news'),
# (u'Entertainment',u'http://feed43.com/8864645080210731.xml'),
# http://www.express.co.uk/posts/rss/18/entertainment'),
(u'Lifestyle', u'http://feeds.feedburner.com/daily-express-life-and-style-news'),
# (u'Lifestyle',u'http://feed43.com/8705161426770855.xml'),
# http://www.express.co.uk/posts/rss/8/life&style'),
(u'Travel', u'http://feeds.feedburner.com/daily-express-travel'),
# (u'Travel',u'http://feed43.com/6547373884767554.xml'),
]
# starsons code
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
print('article.title is: ', article.title)
if 'WATCH:' in article.title.upper():
feed.articles.remove(article)
return feeds
def get_cover_url(self):
soup = self.index_to_soup('http://www.express.co.uk/ourpaper/')
cov = soup.find(attrs={'src': re.compile(
'http://cdn.images.express.co.uk/img/covers/')})
cov = str(cov)
cov2 = re.findall(
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
cov = str(cov2)
cov = cov[2:len(cov) - 2]
# cover_url=cov
br = browser()
br.set_handle_redirect(False)
try:
br.open_novisit(cov)
cover_url = cov
except:
cover_url = 'http://cdn.images.express.co.uk/img/static/ourpaper/header-back-issue-papers.jpg'
return cover_url
extra_css = '''
#h1{font-weight:bold;font-size:175%;}
h2{display: block;margin-left: auto;margin-right: auto;width:100%;font-weight:bold;font-size:175%;}
#p{font-size:14px;}
#body{font-size:14px;}
.newsCaption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
.publish-info {font-size:50%;}
.photo img {display: block;margin-left: auto;margin-right: auto;width:100%;}
'''

View File

@ -1,85 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
http://www.news.com.au/dailytelegraph/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DailyTelegraph(BasicNewsRecipe):
title = u'Daily Telegraph'
__author__ = u'Adrian G.'
language = 'en_AU'
description = u'Daily Telegraph News'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
no_javascript = True
timefmt = ' [%A, %d %B, %Y]'
encoding = 'utf-8'
keep_only_tags = [dict(name='div', attrs={'id': 'story'})]
extra_css = '''
h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
.cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
.cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
.source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
#content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
.pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
#bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
#idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
'''
remove_tags = [
dict(name='div', attrs={'id': ['comments', 'story-related-coverage']}),
dict(name='div', attrs={'class': [
'story-header-tools', 'story-footer', 'story-extras', 'story-related']}),
dict(name='div', attrs={
'class': ['promo-image', 'story-extras story-extras-2']}),
dict(name='div', attrs={'class': ['assistive sidebar-jump']})
]
feeds = [
(u'Top Stories', u'http://feeds.news.com.au/public/rss/2.0/dtele_top_stories_253.xml'),
(u'National News',
u'http://feeds.news.com.au/public/rss/2.0/dtele_national_news_202.xml'),
(u'World News', u'http://feeds.news.com.au/public/rss/2.0/dtele_world_news_204.xml'),
(u'NSW and ACT', u'http://feeds.news.com.au/public/rss/2.0/dtele_nswact_225.xml'),
(u'Arts', u'http://feeds.news.com.au/public/rss/2.0/dtele_art_444.xml'),
(u'Business News', u'http://feeds.news.com.au/public/rss/2.0/dtele_business_226.xml'),
(u'Entertainment News',
u'http://feeds.news.com.au/public/rss/2.0/dtele_entertainment_news_201.xml'),
(u'Lifestyle News',
u'http://feeds.news.com.au/public/rss/2.0/dtele_lifestyle_227.xml'),
(u'Music', u'http://feeds.news.com.au/public/rss/2.0/dtele_music_441.xml'),
(u'Sport',
u'http://feeds.news.com.au/public/rss/2.0/dtele_sport_203.xml'),
(u'Soccer',
u'http://feeds.news.com.au/public/rss/2.0/dtele_sports_soccer_344.xml'),
(u'Rugby Union',
u'http://feeds.news.com.au/public/rss/2.0/dtele_sports_rugby_union_342.xml'),
(u'Property Confidential',
u'http://feeds.news.com.au/public/rss/2.0/dtele_property_confidential_463.xml'),
(u'Property - Your Space',
u'http://feeds.news.com.au/public/rss/2.0/dtele_property_yourspace_462.xml'),
(u'Confidential News',
u'http://feeds.news.com.au/public/rss/2.0/dtele_entertainment_confidential_252.xml'),
(u'Confidential Biographies',
u'http://feeds.news.com.au/public/rss/2.0/dtele_confidential_biographies_491.xml'),
(u'Confidential Galleries',
u'http://feeds.news.com.au/public/rss/2.0/dtele_confidential_galleries_483.xml'),
]

View File

@ -1,62 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
daily.tportal.hr
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe):
title = 'Daily tportal.h'
__author__ = 'Darko Miletic'
description = 'News from Croatia'
publisher = 'tportal.hr'
category = 'news, politics, Croatia'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en_HR'
remove_empty_feeds = True
publication_type = 'newsportal'
extra_css = """
body{font-family: Verdana,sans-serif }
img{margin-bottom: 0.4em; display:block}
h1,h2{color: #2D648A; font-family: Georgia,serif}
.artAbstract{font-size: 1.2em; font-family: Georgia,serif}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
dict(name=['meta', 'link', 'embed', 'object', 'iframe', 'base']), dict(
name='div', attrs={'class': 'artInfo'})
]
remove_attributes = ['lang']
keep_only_tags = dict(attrs={'class': 'articleDetails'})
feeds = [(u'News', u'http://daily.tportal.hr/rss/dailynaslovnicarss.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img', alt=False):
item['alt'] = 'image'
return soup

View File

@ -1,35 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class DallasNews(BasicNewsRecipe):
title = u'The Dallas Morning News'
language = 'en'
oldest_article = 2 # days
max_articles_per_feed = 25
no_stylesheets = True
use_embedded_content = False
auto_cleanup = True
feeds = [
('News',
'http://www.dallasnews.com/news.rss'),
('Local News',
'http://www.dallasnews.com/news/local-politics.rss'),
('State Politics',
'http://www.dallasnews.com/news/texas-politics.rss'),
('Religion',
'http://www.dallasnews.com/life/faith.rss'),
('Crime',
'http://www.dallasnews.com/news/crime.rss'),
('Celebrity News',
'http://www.dallasnews.com/entertainment/celebrity-news/?rss&listname=TopStories'),
('Business',
'http://www.dallasnews.com/business.rss'),
('Arts',
'http://www.dallasnews.com/arts.rss'),
('Life',
'http://www.dallasnews.com/life.rss'),
('Opinion',
'http://www.dallasnews.com/opinion.rss'),
]

View File

@ -1,36 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2014, Brandon S Allbery <allbery.b at gmail.com>'
'''
Dark Reading: protect the business. enable access.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DarkReading(BasicNewsRecipe):
title = u'Dark Reading'
__author__ = 'Brandon Allberry'
language = 'en'
description = u'Dark Reading is the premier online resource helping information security professionals manage the balance between protection and access. It offers breaking news and analysis on attacks, breaches and vulnerabilities, as well as strategies for protecting enterprise data. It also offers guidance on setting risk management and compliance policies.' # noqa
oldest_article = 7
max_articles_per_feed = 100
keep_only_tags = [dict(attrs=['article-content', 'heading'])]
masthead_url = u'http://img.deusm.com/darkreading/DR-logo.png'
cover_url = u'http://img.deusm.com/darkreading/DR-logo.png'
remove_tags = [
dict(name='div', attrs={'id': 'first-level-nav-container'}),
dict(name='div', attrs={'id': 'search-box'}),
dict(name='div', attrs={'id': 'mobile-menu-nav'}),
dict(name='div', attrs={'id': 'mobile-menu-profile'}),
dict(name='div', attrs={'id': 'mobile-menu-search'}),
dict(name='div', attrs={'id': 'mobile-menu-rss'}),
dict(name='div', attrs={'id': 'second-level'}),
dict(name='div', attrs={'id': 'third-level'}),
dict(name='div', attrs={'id': 'aside-main'}),
dict(name='div', attrs={'id': 'third-level'}),
dict(name='div', attrs={'id': 'more-insights'}),
dict(name='div', attrs={'class': 'more-insights-item'})
]
feeds = [(u'All', u'http://www.darkreading.com/rss_simple.asp')]

View File

@ -1,45 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch darknet.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class darknet(BasicNewsRecipe):
title = 'darknet'
description = 'Ethical hacking and security news'
__author__ = 'Oliver Niesner'
language = 'en'
use_embedded_content = False
timefmt = ' [%b %d %Y]'
max_articles_per_feed = 40
no_stylesheets = True
oldest_article = 180
remove_tags = [dict(id='navi_top'),
dict(id='navi_bottom'),
dict(id='nav'),
dict(id='top-ad'),
dict(id='login_suche'),
dict(id='navi_login'),
dict(id='breadcrumb'),
dict(id='subtitle'),
dict(id='bannerzone'),
dict(name='span', attrs={'class': 'rsaquo'}),
dict(name='span', attrs={'class': 'next'}),
dict(name='span', attrs={'class': 'prev'}),
dict(name='span', attrs={'class': 'comments'}),
dict(name='div', attrs={'class': 'news_logo'}),
dict(name='div', attrs={'class': 'nextprev'}),
dict(name='div', attrs={'class': 'tags'}),
dict(name='div', attrs={'class': 'Nav'}),
dict(name='p', attrs={'class': 'news_option'}),
dict(name='p', attrs={'class': 'news_foren'})]
remove_tags_after = [dict(name='div', attrs={'class': 'meta-footer'})]
feeds = [('darknet', 'http://feedproxy.google.com/darknethackers')]

View File

@ -1,35 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1468055030(BasicNewsRecipe):
title = 'DataNews'
__author__ = 'oCkz7bJ_'
description = 'Technology / Best Practice / Business'
publisher = 'Roularta Media Group'
category = 'news, information technology, Belgium'
language = 'nl_BE'
oldest_article = 2
max_articles_per_feed = 100
auto_cleanup = True
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
cover_url = 'http://datablend.be/wp-content/uploads/2014/01/Data_News_logo-short.jpg'
masthead_url = 'http://datanews.knack.be/images/svg/logos/logo_Site-DataNews-NL.svg'
# Source: http://datanews.knack.be/rss/
feeds = [
('Technology', 'http://datanews.knack.be/ict/feed.rss'),
('Opinie', 'http://datanews.knack.be/ict/opinie/feed.rss'),
('Gadgets', 'http://datanews.knack.be/ict/gadgets/feed.rss'),
('Foto', 'http://datanews.knack.be/ict/foto/feed.rss'),
('Nieuws', 'http://datanews.knack.be/ict/nieuws/feed.rss'),
('Reviews', 'http://datanews.knack.be/ict/reviews/feed.rss'),
('Startups', 'http://datanews.knack.be/ict/start-ups/feed.rss'),
]

View File

@ -1,16 +0,0 @@
__license__ = 'GPL v3'
__author__ = 'faber1971'
description = 'Italian soccer news website - v1.00 (17, December 2011)'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324114272(BasicNewsRecipe):
title = u'Datasport'
language = 'it'
__author__ = 'faber1971'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]

View File

@ -1,88 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class DaytonBeachNewsJournal(BasicNewsRecipe):
title = 'Daytona Beach News Journal'
__author__ = 'BRGriff'
publisher = 'News-JournalOnline.com'
description = 'Daytona Beach, Florida, Newspaper'
category = 'News, Daytona Beach, Florida'
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
filterDuplicates = True
remove_attributes = ['style']
keep_only_tags = [dict(name='div', attrs={'class': 'page-header'}),
dict(name='div', attrs={'class': 'asset-body'})
]
remove_tags = [dict(name='div', attrs={'class': ['byline-section', 'asset-meta']})
]
feeds = [
# ####NEWS#####
(u"News", u"http://www.news-journalonline.com/rss.xml"),
(u"Breaking News",
u"http://www.news-journalonline.com/breakingnews/rss.xml"),
(u"Local - East Volusia",
u"http://www.news-journalonline.com/news/local/east-volusia/rss.xml"),
(u"Local - West Volusia",
u"http://www.news-journalonline.com/news/local/west-volusia/rss.xml"),
(u"Local - Southeast",
u"http://www.news-journalonline.com/news/local/southeast-volusia/rss.xml"),
(u"Local - Flagler",
u"http://www.news-journalonline.com/news/local/flagler/rss.xml"),
(u"Florida", u"http://www.news-journalonline.com/news/florida/rss.xml"),
(u"National/World",
u"http://www.news-journalonline.com/news/nationworld/rss.xml"),
(u"Politics", u"http://www.news-journalonline.com/news/politics/rss.xml"),
(u"News of Record",
u"http://www.news-journalonline.com/news/news-of-record/rss.xml"),
# ###BUSINESS####
(u"Business", u"http://www.news-journalonline.com/business/rss.xml"),
# (u"Jobs", u"http://www.news-journalonline.com/business/jobs/rss.xml"),
# (u"Markets", u"http://www.news-journalonline.com/business/markets/rss.xml"),
# (u"Real Estate", u"http://www.news-journalonline.com/business/real-estate/rss.xml"),
# (u"Technology", u"http://www.news-journalonline.com/business/technology/rss.xml"),
# ###SPORTS####
(u"Sports", u"http://www.news-journalonline.com/sports/rss.xml"),
(u"Racing", u"http://www.news-journalonline.com/racing/rss.xml"),
(u"Highschool", u"http://www.news-journalonline.com/sports/highschool/rss.xml"),
(u"College", u"http://www.news-journalonline.com/sports/college/rss.xml"),
(u"Basketball", u"http://www.news-journalonline.com/sports/basketball/rss.xml"),
(u"Football", u"http://www.news-journalonline.com/sports/football/rss.xml"),
(u"Golf", u"http://www.news-journalonline.com/sports/golf/rss.xml"),
(u"Other Sports",
u"http://www.news-journalonline.com/sports/other/rss.xml"),
# ###LIFESTYLE####
(u"Lifestyle", u"http://www.news-journalonline.com/lifestyle/rss.xml"),
# (u"Fashion", u"http://www.news-journalonline.com/lifestyle/fashion/rss.xml"),
(u"Food", u"http://www.news-journalonline.com/lifestyle/food/rss.xml"),
# (u"Health", u"http://www.news-journalonline.com/lifestyle/health/rss.xml"),
(u"Home and Garden",
u"http://www.news-journalonline.com/lifestyle/home-and-garden/rss.xml"),
(u"Living", u"http://www.news-journalonline.com/lifestyle/living/rss.xml"),
(u"Religion", u"http://www.news-journalonline.com/lifestyle/religion/rss.xml"),
# (u"Travel", u"http://www.news-journalonline.com/lifestyle/travel/rss.xml"),
# ###OPINION####
# (u"Opinion", u"http://www.news-journalonline.com/opinion/rss.xml"),
# (u"Letters to Editor", u"http://www.news-journalonline.com/opinion/letters-to-the-editor/rss.xml"),
# (u"Columns", u"http://www.news-journalonline.com/columns/rss.xml"),
# (u"Podcasts", u"http://www.news-journalonline.com/podcasts/rss.xml"),
# ###ENTERTAINMENT#### ##Weekly Feature##
(u"Entertainment", u"http://www.go386.com/rss.xml"),
(u"Go Out", u"http://www.go386.com/go/rss.xml"),
(u"Music", u"http://www.go386.com/music/rss.xml"),
(u"Movies", u"http://www.go386.com/movies/rss.xml"),
# (u"Culture", u"http://www.go386.com/culture/rss.xml"),
]
extra_css = '''
.page-header{font-family:Arial,Helvetica,sans-serif; font-style:bold;font-size:22pt;}
.asset-body{font-family:Helvetica,Arial,sans-serif; font-size:16pt;}
'''

View File

@ -1,44 +0,0 @@
# -*- coding: utf-8
__license__ = 'GPL v3'
__author__ = 'Luis Hernandez'
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
'''
http://www.filmica.com/david_bravo/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
title = u'Blog de David Bravo'
publisher = u'Filmica'
__author__ = 'Luis Hernández'
description = 'blog sobre leyes, p2p y copyright'
cover_url = 'http://www.elpais.es/edigitales/image.php?foto=par/portada/1551.jpg'
oldest_article = 365
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
encoding = 'ISO-8859-1'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
keep_only_tags = [
dict(name='div', attrs={'class': ['blog', 'date', 'blogbody', 'comments-head',
'comments-body']}), dict(name='span', attrs={'class': ['comments-post']})
]
remove_tags_before = dict(name='div', attrs={'id': ['bitacoras']})
remove_tags_after = dict(name='div', attrs={'id': ['comments-body']})
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h2{ font-family: sans-serif; font-size:75%; font-weight: 800; text-align: justify } h3{ font-family: sans-serif; font-size:150%; font-weight: 600; text-align: left } img{margin-bottom: 0.4em} ' # noqa
feeds = [(u'Blog', u'http://www.filmica.com/david_bravo/index.rdf')]

View File

@ -1,51 +0,0 @@
#!/usr/bin/env python
from calibre.web.feeds.news import BasicNewsRecipe
class DeRedactie(BasicNewsRecipe):
title = u'De Redactie.be'
__author__ = u'erkfuizfeuadjfjzefzfuzeff'
description = u'News from Belgium in Dutch'
oldest_article = 7
language = 'nl_BE'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
keep_only_tags = [
dict(name='title'), dict(name='div', attrs={'id': 'intro'}), dict(name='h3'),
dict(name='h1'), dict(name='span', attrs={'class': 'media_holder'}),
dict(name='div', attrs={'class': 'divider image'}),
dict(name='div', attrs={'class': 'paragraph'})
]
feeds = [
(u'Hoofdpunten', u'http://deredactie.be/cm/vrtnieuws?mode=atom'),
(u'Binnenland', u'http://deredactie.be/cm/vrtnieuws/binnenland?mode=atom'),
(u'Politiek', u'http://deredactie.be/cm/vrtnieuws/politiek?mode=atom'),
(u'Buitenland', u'http://deredactie.be/cm/vrtnieuws/buitenland?mode=atom'), (
u'Cultuur en Media',
u'http://deredactie.be/cm/vrtnieuws/cultuur+en+media?mode=atom'
), (u'Economie', u'http://deredactie.be/cm/vrtnieuws/economie?mode=atom'),
(u'Ook dat nog', u'http://deredactie.be/cm/vrtnieuws/ookdatnog?mode=atom'), (
u'Regionaal Antwerpen',
u'http://deredactie.be/cm/vrtnieuws/regio/antwerpen?mode=atom'
), (
u'Regionaal Brussel',
u'http://deredactie.be/cm/vrtnieuws/regio/brussel?mode=atom'
), (
u'Regionaal Limburg',
u'http://deredactie.be/cm/vrtnieuws/regio/limburg?mode=atom'
), (
u'Regionaal Oost-Vlaanderen',
u'http://deredactie.be/cm/vrtnieuws/regio/oostvlaanderen?mode=atom'
), (
u'Regionaal Vlaams-Brabant',
u'http://deredactie.be/cm/vrtnieuws/regio/vlaamsbrabant?mode=atom'
), (
u'Regionaal West-Vlaanderen',
u'http://deredactie.be/cm/vrtnieuws/regio/westvlaanderen?mode=atom'
)
]

View File

@ -1,34 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
deadspin.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Deadspin(BasicNewsRecipe):
title = 'Deadspin'
__author__ = 'NA'
description = "Deadspin, Sports News without Access, Favor, or Discretion."
publisher = 'deadspin.com'
category = 'news, sports, meltdowns'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = True
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/deadspin.com/img/logo.png'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
{'class': 'feedflare'},
]
feeds = [(u'Articles', u'http://deadspin.com/rss/vip')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,51 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.defensenews.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DefenseNews(BasicNewsRecipe):
title = 'Defense News'
__author__ = 'Darko Miletic'
description = 'Find late-breaking defense news from the leading defense news weekly'
publisher = 'Gannett Government Media Corporation'
oldest_article = 31
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newspaper'
ignore_duplicate_articles = {'url'}
masthead_url = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.info{font-size: small; color: gray}
"""
remove_attributes = ['style', 'lang']
keep_only_tags = [
dict(attrs={'class': ['ody-hgroup', 'ody-article']}),
]
remove_tags = [
dict(name=['meta', 'link']),
dict(attrs={'class': ['toolbar', 'toolsShareWrap', 'ody-bo-sm ',
'ody-comments', 'ody-related-links', 'left', 'right']}),
dict(id=['factsMore', 'ody-nextstoryslider']),
]
feeds = [
(u'Europe', u'http://www.defensenews.com/rss/europe'),
(u'Americas', u'http://www.defensenews.com/rss/americas'),
(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asia-pacific-rim'),
(u'Middle east & Africa', u'http://www.defensenews.com/rss/middle-east-africa'),
(u'Air', u'http://www.defensenews.com/rss/air-warfare'),
(u'Land', u'http://www.defensenews.com/rss/land-warfare'),
(u'Naval', u'http://www.defensenews.com/rss/naval-warfare')
]

View File

@ -1,24 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class HindustanTimes(BasicNewsRecipe):
title = u'Delcoe Times'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 1 # days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('News',
'http://www.delcotimes.com/?rss=news'),
('Sports',
'http://www.delcotimes.com/?rss=sports'),
('Business',
'http://business-news.thestreet.com/the-delaware-county-daily-times/rss/109393'),
('Entertainment',
'http://www.delcotimes.com/?rss=entertainment'),
]

View File

@ -1,37 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class demagogRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Demagog.cz'
publisher = u''
description = 'demagog.cz'
oldest_article = 6
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Aktuality', u'http://demagog.cz/rss')
]
language = 'cs'
cover_url = 'http://demagog.cz/content/images/demagog.cz.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
.vyrok_suhrn{margin-top:50px; }
.vyrok{margin-bottom:30px; }
"""
remove_tags = [dict(name='a', attrs={'class': 'vyrok_odovodnenie_tgl'}),
dict(name='img', attrs={'class': 'vyrok_fotografia'})]
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'vyrok_text_after'})
preprocess_regexps = [(re.compile(
r'(<div class="vyrok_suhrn">)', re.DOTALL | re.IGNORECASE), lambda match: '\1<hr>')]

View File

@ -1,29 +0,0 @@
# -*- coding: utf-8 -*-
'''
descopera.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Descopera(BasicNewsRecipe):
title = u'Descoperă.org'
__author__ = 'Marius Ignătescu'
description = 'Descoperă. Placerea de a cunoaște'
publisher = 'descopera.org'
category = 'science, technology, culture, history, earth'
language = 'ro'
oldest_article = 14
max_articles_per_feed = 100
encoding = 'utf8'
no_stylesheets = True
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
keep_only_tags = [dict(name='div', attrs={'class': ['post']})]
remove_tags = [dict(name='div', attrs={'class': [
'topnav', 'box_a', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-knowledge']})]
remove_attributes = ['width', 'height']
cover_url = 'http://www.descopera.org/wp-content/themes/dorg/styles/default/img/b_top.png?width=400'
feeds = [(u'Articles', u'http://www.descopera.org/feed/')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,74 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1297291961(BasicNewsRecipe):
title = u'Detroit News'
language = 'en'
__author__ = 'DTM'
oldest_article = 2
max_articles_per_feed = 20
no_stylesheets = True
conversion_options = {
'linearize_tables': True,
}
feeds = [
(u'Headlines', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss&mime=xml'),
(u'Nation/World', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss09&mime=xml'),
(u'Metro/State', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss36&mime=xml'),
(u'Wayne County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss01&mime=xml'),
(u'Oakland County',
u'http://www.detnews.com/apps/pbcs.dll/section?category=rss02&mime=xml'),
(u'Macomb County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss03&mime=xml'),
(u'Livingston County',
u'http://detnews.com/apps/pbcs.dll/section?category=rss04&mime=xml'),
(u'Politics/Government',
u'http://www.detnews.com/apps/pbcs.dll/section?category=rss10&mime=xml'),
(u'Editorials', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss07&mime=xml'),
(u'Columnists', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss43&mime=xml'),
(u'Charlie LeDuff',
u'http://detnews.com/apps/pbcs.dll/section?category=rss54&mime=xml'),
(u'Religion', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss11&mime=xml'),
(u'Technology', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss12&mime=xml'),
(u'Commuting', u'http://detnews.com/apps/pbcs.dll/section?category=rss05&mime=xml'),
(u'Schools', u'http://detnews.com/apps/pbcs.dll/section?category=rss06&mime=xml'),
(u'Obituaries', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss08&mime=xml'),
(u'Autos Insider', u'http://detnews.com/apps/pbcs.dll/section?category=rss25&mime=xml'),
(u'Drive', u'http://detnews.com/apps/pbcs.dll/section?category=rss26&mime=xml'),
(u'Business', u'http://detnews.com/apps/pbcs.dll/section?category=rss21&mime=xml'),
(u'Personal Finance',
u'http://detnews.com/apps/pbcs.dll/section?category=rss23&mime=xml'),
(u'Real Estate', u'http://detnews.com/apps/pbcs.dll/section?category=rss24&mime=xml'),
(u'Movies', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss28&mime=xml'),
(u'TV', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss40&mime=xml'),
(u'Music/Nightlife',
u'http://www.detnews.com/apps/pbcs.dll/section?category=rss30&mime=xml'),
(u'Celebrities', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss51&mime=xml'),
(u'The Arts', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss27&mime=xml'),
(u'Food', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss29&mime=xml'),
(u'Homestyle', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss31&mime=xml'),
(u'The Green Life',
u'http://www.detnews.com/apps/pbcs.dll/section?category=rss53&mime=xml'),
(u'Lifestyle', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss32&mime=xml'),
(u'Health', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss34&mime=xml'),
(u'Travel', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss52&mime=xml'),
(u'Advice', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss50&mime=xml'),
(u'Pistons', u'http://detnews.com/apps/pbcs.dll/section?category=rss13&mime=xml'),
(u'Lions', u'http://detnews.com/apps/pbcs.dll/section?category=rss14&mime=xml'),
(u'Tigers', u'http://detnews.com/apps/pbcs.dll/section?category=rss15&mime=xml'),
(u'Red Wings', u'http://detnews.com/apps/pbcs.dll/section?category=rss16&mime=xml'),
(u'Michigan State',
u'http://detnews.com/apps/pbcs.dll/section?category=rss18&mime=xml'),
(u'University of Michigan',
u'http://detnews.com/apps/pbcs.dll/section?category=rss17&mime=xml'),
(u'Motor Sports', u'http://detnews.com/apps/pbcs.dll/section?category=rss20&mime=xml'),
(u'Golf', u'http://detnews.com/apps/pbcs.dll/section?category=rss47&mime=xml'),
(u'Outdoors', u'http://detnews.com/apps/pbcs.dll/section?category=rss19&mime=xml')
]
def print_version(self, url):
p = re.compile(r'(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*')
m = p.search(url)
return url.replace(m.group(), '&template=printart')

View File

@ -1,44 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = 'Ruben Pollan <meskio@sindominio.net>'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1335657507(BasicNewsRecipe):
title = u'diagonal'
__author__ = 'Ruben Pollan'
description = 'Periodico quincenal de actualidad critica'
language = 'es'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
cover_url = u'http://diagonalperiodico.net/IMG/siteon0.jpg'
feeds = [(u'Panorama', u'http://diagonalperiodico.net/-Panorama-.html?page=backend'),
(u'Global', u'http://diagonalperiodico.net/-Global,104-.html?page=backend'),
(u'Fotonoticia - Galería',
u'http://diagonalperiodico.net/-Fotonoticia-Galeria-.html?page=backend'),
(u'Libertades y Derechos',
u'http://diagonalperiodico.net/-Libertades-y-Derechos,77-.html?page=backend'),
(u'Saberes', u'http://diagonalperiodico.net/-Saberes,78-.html?page=backend'),
(u'En movimiento',
u'http://diagonalperiodico.net/-En-movimiento-.html?page=backend'),
(u'Culturas', u'http://diagonalperiodico.net/-Culturas,89-.html?page=backend'),
(u'Cuerpo', u'http://diagonalperiodico.net/-Cuerpo,99-.html?page=backend'),
(u'La plaza', u'http://diagonalperiodico.net/-La-plaza-.html?page=backend'),
(u'Enfoques', u'http://diagonalperiodico.net/-Enfoques,106-.html?page=backend'),
(u'Humor - Galería',
u'http://diagonalperiodico.net/-Humor-Galeria-.html?page=backend'),
(u'Entrevistas digitales',
u'http://diagonalperiodico.net/-Entrevistas-Digitales-.html?page=backend'),
(u'Cartas a diagonal',
u'http://diagonalperiodico.net/-Cartas-a-Diagonal-.html?page=backend'),
(u'Blogs', u'http://diagonalperiodico.net/-Blogs-.html?page=backend')]
def get_article_url(self, article):
link = article.get('link')
return 'http://diagonalperiodico.net/' + link

View File

@ -1,83 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
diariocordoba.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Diariosur(BasicNewsRecipe):
title = u'Diario Cordoba'
__author__ = u'Francisco'
description = u'News Cordoba'
oldest_article = 5
max_articles_per_feed = 100
delay = 0
timeout = 120
no_stylesheets = True
use_embedded_content = False
encoding = 'UTF-8'
remove_javascript = True
language = 'es'
extra_css = 'body{font-family: Arial,Helvetica,sans-serif}'
remove_attributes = ['height', 'width']
keep_only_tags = [
dict(name='div', attrs={'id': 'contenidos'})
]
remove_tags = [
dict(name='div', attrs={'class': 'Recorte'}),
dict(name='div', attrs={'id': 'comentarios'}),
dict(name='div', attrs={'id': 'CajaAccesoCuentaUsuario'}),
dict(name='div', attrs={'id': 'cajacomparte'}),
dict(name='div', attrs={'class': 'FormularioDeAcceso'}),
dict(name='div', attrs={'class': 'TextoFormularioDeAcceso'}),
dict(name='div', attrs={'class': 'Recorte'}),
dict(name='div', attrs={'id': 'IframeCajaComparte'}),
dict(name='div', attrs={'id': 'CintilloComentario'}),
dict(name='div', attrs={'id': 'EscribeComentario'}),
dict(name='div', attrs={'class': 'Nota'}),
dict(name='div', attrs={'id': 'FormularioComentario'}),
dict(name='div', attrs={'id': 'Comparte'}),
dict(name='iframe', attrs={'id': 'IframeCajaComparte'}),
dict(name='ul', attrs={'class': 'herramientasDeNoticia'}),
dict(name='div', attrs={'id': 'NoticiaEnPapel'}),
dict(name='div', attrs={'class': 'navegaNoticias'}),
dict(name='p', attrs={'class': 'RecorteEnNoticias'}),
dict(name='ul', attrs={'class': 'herramientasDeNoticia'}),
dict(name='div', attrs={'class': 'navegaNoticias'}),
dict(name='div', attrs={'id': 'CajaComparte'}),
dict(name='div', attrs={'id': 'Comparte'})
]
html2lrf_options = [
'--comment', description, '--base-font-size', '6', '--category', 'news, Spain', '--ignore-tables'
]
feeds = [
(u'Ultima Hora', 'http://www.diariocordoba.com/rss/ultimahora.xml'),
(u'Tema del Dia', 'http://www.diariocordoba.com/rss/106.xml'),
(u'Local', 'http://www.diariocordoba.com/rss/101.xml'),
(u'Provincia', 'http://www.diariocordoba.com/rss/102.xml'),
(u'Andalucia', 'http://www.diariocordoba.com/rss/1.xml'),
(u'Opinion', 'http://www.diariocordoba.com/rss/100.xml'),
(u'Deportes', 'http://www.diariocordoba.com/rss/4.xml'),
(u'Espa\xc3\xb1a', 'http://www.diariocordoba.com/rss/7.xml'),
(u'Internacional', 'http://www.diariocordoba.com/rss/6.xml'),
(u'Economia', 'http://www.diariocordoba.com/rss/5.xml'),
(u'Cultura', 'http://www.diariocordoba.com/rss/3.xml'),
(u'Sociedad', 'http://www.diariocordoba.com/rss/103.xml'),
(u'Gente', 'http://www.diariocordoba.com/rss/204.xml'),
(u'Noticias Curiosas', 'http://www.diariocordoba.com/rss/205.xml'),
(u'Tecnologia', 'http://www.diariocordoba.com/rss/206.xml')
]

View File

@ -1,25 +0,0 @@
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1365070687(BasicNewsRecipe):
title = 'Diário de Notícias'
oldest_article = 7
language = 'pt'
__author__ = 'Jose Pinto'
max_articles_per_feed = 100
keep_only_tags = [dict(name='div', attrs={'id': 'cln-esqmid'})]
remove_tags = [dict(name='table', attrs={'class': 'TabFerramentasInf'})]
feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
(u'Globo', u'http://feeds.dn.pt/DN-Globo'),
(u'Economia', u'http://feeds.dn.pt/DN-Economia'),
(u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
(u'Artes', u'http://feeds.dn.pt/DN-Artes'),
(u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
(u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
(u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
]

View File

@ -1,47 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class goonews(BasicNewsRecipe):
__author__ = 'Douglas Delgado'
title = u'Diario Extra'
publisher = 'Sociedad Periodistica Extra Limitada'
description = 'Diario de circulacion nacional de Costa Rica.'
category = 'Spanish, Entertainment'
masthead_url = 'http://www.diarioextra.com/img/apariencia/logo.png'
oldest_article = 7
delay = 1
max_articles_per_feed = 100
auto_cleanup = True
encoding = 'utf-8'
language = 'es_CR'
use_embedded_content = False
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
feeds = [(u'Nacionales',
u'http://www.diarioextra.com/includes/rss_text.php?id=1'),
(u'Internacionales',
u'http://www.diarioextra.com/includes/rss_text.php?id=2'),
(u'Sucesos',
u'http://www.diarioextra.com/includes/rss_text.php?id=3'),
(u'Deportes',
u'http://www.diarioextra.com/includes/rss_text.php?id=6'),
(u'Espectaculos',
u'http://www.diarioextra.com/includes/rss_text.php?id=7'),
(u'Opinion',
u'http://www.diarioextra.com/includes/rss_text.php?id=4')]
def get_cover_url(self):
index = 'http://kiosko.net/cr/np/cr_extra.html'
soup = self.index_to_soup(index)
for image in soup.findAll('img', src=True):
if image['src'].endswith('cr_extra.750.jpg'):
return image['src']
return None
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
'''

View File

@ -1,13 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341449(BasicNewsRecipe):
title = u'Diario La Republica'
__author__ = 'CAVALENCIA'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
language = 'es_CO'
feeds = [(u'Diario La Republica',
u'http://www.larepublica.com.co/rss/larepublica.xml')]

View File

@ -1,62 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes
class Digit(BasicNewsRecipe):
title = u'Digit Magazine'
description = 'Digit caters to the largest community of tech buyers, users and enthusiasts in India.'
language = 'en_IN'
__author__ = 'unkn0wn'
oldest_article = 30 # days
max_articles_per_feed = 50
encoding = 'utf-8'
use_embedded_content = False
no_stylesheets = True
masthead_url = 'https://www.digit.in/images/digit_logo.png'
remove_attributes = ['style', 'height', 'width']
ignore_duplicate_articles = {'title', 'url'}
def get_cover_url(self):
soup = self.index_to_soup(
'https://store.digit.in/cart.php?category_id=139&year='
)
tag = soup.find(attrs={'class': 'previous-magazines'})
if tag:
self.cover_url = tag.find('img')['src']
return super().get_cover_url()
keep_only_tags = [
classes(
'big_img_container highlights_cont Top-sponsered Text-sponsered heading-wraper article_video'
'article-inside-container skoar_desc New-desk pros-Cons Review-reting For-table col-md-7'
'review-inside-container price_wrap key_specifications'
),
]
remove_tags = [
classes(
'adsAdvert Video-wraper article_share auth_social breadcrumbwrap textads_list rel_articles_container'
),
]
feeds = [
('Features', 'http://feeds.feedburner.com/digit/latest-features'),
('Reviews', 'http://feeds.feedburner.com/digit/latest-review'),
('Laptops', 'https://feeds.feedburner.com/digit/latest-laptops'),
('PC Components', 'https://feeds.feedburner.com/digit/latest-pc-components'),
('Tablets', 'https://feeds.feedburner.com/digit/latest-tablets'),
('TVs', 'https://feeds.feedburner.com/digit/latest-tvs'),
(
'Wearable devices',
'https://feeds.feedburner.com/digit/latest-wearable-devices'
),
('How-to', 'https://feeds.feedburner.com/digit/how-to'),
('Entertainment', 'https://feeds.feedburner.com/digit/latest-entertainment'),
('Gaming', 'http://feeds.feedburner.com/digit/latest-gaming'),
('Software', 'https://feeds.feedburner.com/digit/latest-software'),
('Audio-Video', 'https://feeds.feedburner.com/digit/latest-audio-video'),
# ('Apps', 'https://feeds.feedburner.com/digit/latest-apps'),
# ('Mobile Phones', 'https://feeds.feedburner.com/digit/latest-mobile-phones'),
# For more : https://www.digit.in/rss-feed/
]

View File

@ -1,59 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '14, January 2010'
'''
http://media.digitalartsonline.co.uk/
'''
from calibre.web.feeds.news import BasicNewsRecipe
temp_files = []
articles_are_obfuscated = True
class digiArts(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini'
description = ('Digital Arts - comprehensive coverage of the art of '
'graphic design, 3D, animation, video, effects, web and '
'interactive design, in print and online.') # noqa
cover_url = 'http://media.digitalartsonline.co.uk/graphics/logo_digital_arts.gif'
title = 'Digital Arts Magazine '
publisher = 'IDG Communication'
category = ('Multimedia, photo, video, computing, product reviews, '
'editing, cameras, production')
language = 'en'
encoding = 'cp1252'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 30
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
auto_cleanup = False
keep_only_tags = [
dict(name='h1', attrs={'itemprop': 'headline'}),
dict(name='span', attrs={'itemprop': 'author'}),
dict(name='section', attrs={'class': 'articleBody'}),
]
# Feed are found here: http://www.digitalartsonline.co.uk/rss/
feeds = [
('Latest News Articles',
'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-news.xml'),
('Latest Tutorials',
'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-tutorials.xml'),
('Latest Reviews',
'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-reviews.xml'),
('Latest Features',
'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-features.xml'),
]

View File

@ -1,51 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
digitaljournal.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DigitalJournal(BasicNewsRecipe):
title = 'Digital Journal'
__author__ = 'Darko Miletic'
description = 'A Global Citizen Journalism News Network'
category = 'news, politics, USA, world'
publisher = 'Digital Journal'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
language = 'en'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(id='article_text'),
]
remove_tags = [
dict(attrs={'class': lambda x: x and 'article-top-social' in x}),
]
feeds = [
(u'Latest News', u'http://digitaljournal.com/rss/?feed=latest_news'),
(u'Business', u'http://digitaljournal.com/rss/?feed=top_news&depname=Business'),
(u'Entertainment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Entertainment'),
(u'Environment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Environment'),
(u'Food', u'http://digitaljournal.com/rss/?feed=top_news&depname=Food'),
(u'Health', u'http://digitaljournal.com/rss/?feed=top_news&depname=Health'),
(u'Internet', u'http://digitaljournal.com/rss/?feed=top_news&depname=Internet'),
(u'Politics', u'http://digitaljournal.com/rss/?feed=top_news&depname=Politics'),
(u'Religion', u'http://digitaljournal.com/rss/?feed=top_news&depname=Religion'),
(u'Science', u'http://digitaljournal.com/rss/?feed=top_news&depname=Science'),
(u'Sports', u'http://digitaljournal.com/rss/?feed=top_news&depname=Sports'),
(u'Technology', u'http://digitaljournal.com/rss/?feed=top_news&depname=Technology'),
(u'World', u'http://digitaljournal.com/rss/?feed=top_news&depname=World'),
(u'Arts', u'http://digitaljournal.com/rss/?feed=top_news&depname=Arts')
]

View File

@ -1,41 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.digitalspy.co.uk
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DigitalSpyUK(BasicNewsRecipe):
title = 'Digital Spy - UK Edition'
__author__ = 'Darko Miletic'
description = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
publisher = 'Digital Spy Limited.'
category = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en_GB'
remove_empty_feeds = True
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [dict(name=['link'])]
remove_attributes = ['height', 'width']
keep_only_tags = [dict(name='div', attrs={'id': 'content'})]
feeds = [
(u'News', u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml'),
(u'Big Brother', u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml'),
(u'Entertainment', u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml'),
(u'General', u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml'),
(u'Media', u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml')
]

View File

@ -1,81 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import os
import tempfile
from calibre.web.feeds.recipes import BasicNewsRecipe
class Dilbert(BasicNewsRecipe):
title = u'Dilbert'
__author__ = 'TechnoCat'
description = 'Dilbert, by Scott Adams. Includes last three or so comics and blog entries.'
cover_url = 'http://dilbert.com/assets/dilbert-logo-4152bd0c31f7de7443b4bc90abd818da.png'
auto_cleanup = True
encoding = 'utf8'
language = 'en'
needs_subscription = False
no_stylesheets = True
oldest_article = 7
remove_javascript = True
recursions = 0
max_articles_per_feed = 20
debugMessages = True
BASE_URL = 'http://dilbert.com' # Note no www.
COMIC_DIV_TAG = 'img-comic-container'
BLOG_DIV_TAG = 'media'
tempfiles = []
# Creates a temp file for the wrapped image url
def writeImage(self, title, imageURL) :
tempFile = tempfile.NamedTemporaryFile(delete=False)
self.tempfiles.append(tempFile)
tempFile.write('<html><head><title>'+title+'</title></head><body>')
tempFile.write(imageURL.prettify())
tempFile.write('</body></html>')
tempFile.flush()
tempFile.close()
return tempFile.name
def cleanUpTempFiles(self):
for tempFile in self.tempfiles:
tempFile.close()
os.unlink(tempFile.name)
def cleanup(self):
self.cleanUpTempFiles()
# Extract comic links from the soup
# Returns a list of comics (articles) as:
# {
# 'title' : article title,
# 'url' : URL of print version,
# 'date' : The publication date of the article as a string,
# 'description' : A summary of the article
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
# }
def comicFeed(self, soup) :
feedset = []
for comicContainer in soup.findAll('div', {'class': self.COMIC_DIV_TAG}) :
comic = comicContainer.find('img')
if comic is not None:
filelink = self.writeImage(comic['alt'], comic)
feedset.append(
dict(title=comic['alt'], url='file://'+filelink, description=comic['alt'], content=''))
return feedset
def blogFeed(self, soup) :
feedset = []
for blogContainer in soup.findAll('div', {'class': self.BLOG_DIV_TAG}) :
blog = blogContainer.find('a', {'class':'link-blended'})
if blog is not None:
feedset.append(
dict(title=blog['title'], url=blog['href'], description=blog['title'], content=''))
return feedset
def parse_index(self):
root = self.index_to_soup(self.BASE_URL)
comics = self.comicFeed(root)
blogs = self.blogFeed(root)
return [('Comics', comics), ('Blog Entries', blogs)]

View File

@ -1,149 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
__license__ = 'GPL v3'
__copyright__ = u'2014-01-09, Silviu Cotoar\u0103, Marius Popescu'
'''
dilemaveche.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DilemaVeche(BasicNewsRecipe):
# apare vinerea, mai pe dupa-masa,depinde de Luiza cred (care se semneaza
# ca fiind creatorul fiecarui articol in feed-ul RSS)
title = u'Dilema Veche'
# inspirat din scriptul pentru Le Monde. Inspired from the Le Monde script
__author__ = 'song2'
description = '"Sint vechi, domnule!" (I.L. Caragiale)'
publisher = 'Adevarul Holding'
oldest_article = 7
language = 'ro'
max_articles_per_feed = 150
encoding = 'utf-8'
simultaneous_downloads = 5
masthead_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
needs_subscription = True
use_embedded_content = False
publication_type = 'magazine'
remove_javascript = True
no_stylesheets = True
remove_empty_feeds = True
feeds = [
('Editoriale si opinii - Situatiunea',
'http://www.dilemaveche.ro/taxonomy/term/37/0/feed'),
('Editoriale si opinii - Pe ce lume traim',
'http://www.dilemaveche.ro/taxonomy/term/38/0/feed'),
('Editoriale si opinii - Bordeie si obiceie',
'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
('Editoriale si opinii - Talc Show',
'http://www.dilemaveche.ro/taxonomy/term/39/0/feed'),
('Tema saptamanii', 'http://www.dilemaveche.ro/taxonomy/term/19/0/feed'),
('La zi in cultura - Dilema va recomanda',
'http://www.dilemaveche.ro/taxonomy/term/58/0/feed'),
('La zi in cultura - Carte',
'http://www.dilemaveche.ro/taxonomy/term/14/0/feed'),
('La zi in cultura - Film',
'http://www.dilemaveche.ro/taxonomy/term/13/0/feed'),
('La zi in cultura - Muzica',
'http://www.dilemaveche.ro/taxonomy/term/1341/0/feed'),
('La zi in cultura - Arte performative',
'http://www.dilemaveche.ro/taxonomy/term/1342/0/feed'),
('La zi in cultura - Arte vizuale',
'http://www.dilemaveche.ro/taxonomy/term/1512/0/feed'),
('Societate - Ieri cu vedere spre azi',
'http://www.dilemaveche.ro/taxonomy/term/15/0/feed'),
('Societate - Din polul opus',
'http://www.dilemaveche.ro/taxonomy/term/41/0/feed'),
('Societate - Mass comedia',
'http://www.dilemaveche.ro/taxonomy/term/43/0/feed'),
('Societate - La singular si la plural',
'http://www.dilemaveche.ro/taxonomy/term/42/0/feed'),
('Oameni si idei - Educatie',
'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
('Oameni si idei - Polemici si dezbateri',
'http://www.dilemaveche.ro/taxonomy/term/48/0/feed'),
('Oameni si idei - Stiinta si tehnologie',
'http://www.dilemaveche.ro/taxonomy/term/47/0/feed'),
# online only articles
('Dileme on-line', 'http://www.dilemaveche.ro/taxonomy/term/5/0/feed'),
# once per month, 6-7 day of the month
('Dilemateca', 'http://dilemaveche.ro/taxonomy/term/21/0/feed'),
# children, once-twice per year
('Dilematix', 'http://dilemaveche.ro/taxonomy/term/20/0/feed'),
('Dilema Studiilor Postuniversitare',
'http://dilemaveche.ro/taxonomy/term/1635/0/feed') # once per year, July
]
remove_tags_before = dict(name='div', attrs={'class': 'spacer_10'})
remove_tags = [
dict(name='div', attrs={'id': ['adshop_widget_428x60']}),
dict(name='div', attrs={'id': ['gallery']}),
dict(name='div', attrs={'class': ['art_related_left']}),
dict(name='a', attrs={'class': ['prevPage']}),
dict(name='a', attrs={'class': ['nextPage']}),
dict(name='div', attrs={'class': ['article_details']}),
dict(name='div', attrs={'id': ['comments']}),
dict(name='ul', attrs={'class': ['social-buttons-list']}),
dict(name='a', attrs={'class': ['editie']}),
dict(name='div', attrs={'class': 'simple_overlay'}),
dict(name='div', attrs={'class': 'c_right_column'}),
dict(name='div', attrs={'id': 'content_right'}),
dict(name='div', attrs={'class': 'box_shadow_top'}),
dict(name='div', attrs={'class': 'box_shadow_bottom'}),
dict(name='div', attrs={'id': ['footer']}),
dict(name='div', attrs={'class': ['clear spacer_20']}),
dict(name='div', attrs={'id': ['adh-footer']}),
dict(name='div', attrs={'id': ['skyright']}),
dict(name='div', attrs={'id': ['closure']})
]
remove_tags_after = [
dict(name='div', attrs={'id': ['adshop_widget_428x60']})
]
extra_css = """
body{font-family: Georgia,Times,serif }
img{margin-bottom: 0.4em; display:block}
"""
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open(
'http://pay.dilemaveche.ro/autentificare/?redirect=http%3A%2F%2Fdilemaveche.ro%2F%2F&return=true')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def preprocess_html(self, soup):
return self.adeify_images(soup)
def get_cover_url(self):
# small, from the current number article: http://dilemaveche.ro/sites/default/files/imagecache/articol_teaser/DV517web-1_copy.JPG
# medium, from the homepage PDF link: http://dilemaveche.ro/sites/default/files/imagecache/editie_small/DV517web-1_copy_0.JPG
# big, from the current number article, click on the samll image:
# http://dilemaveche.ro/sites/default/files/imagecache/image_gallery_large/DV517web-1_copy.JPG
cover_url = None
soup = self.index_to_soup('http://dilemaveche.ro')
link_item = soup.find('div', attrs={'class': 'box_dr_pdf_picture'})
if link_item and link_item.a:
cover_url = link_item.a['href']
br = BasicNewsRecipe.get_browser(self)
try:
br.open(cover_url)
except: # daca nu gaseste pdf-ul
self.log("\nPDF indisponibil")
link_item = soup.find('div', attrs={'class': 'box_dr_pdf_picture'})
if link_item and link_item.img:
cover_url = link_item.img['src']
br = BasicNewsRecipe.get_browser(self)
try:
br.open(cover_url)
except: # daca nu gaseste nici imaginea mica mica
print('nu este nici pdf nici imagine')
cover_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
return cover_url
cover_margins = (10, 15, '#ffffff')

View File

@ -1,25 +0,0 @@
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class Dingoo(BasicNewsRecipe):
language = 'ru'
__author__ = 'bug_me_not'
title = u'Dingoo A320. \u0420\u0443\u0441\u0441\u043a\u0438\u0439 \u0440\u0435\u0441\u0443\u0440\u0441'
description = 'Портативная игровая консоль Dingoo A320 и другие необычные гаджеты'
publisher = 'Emulate.SU'
category = 'console'
cover_url = u'http://upload.wikimedia.org/wikipedia/commons/thumb/0/02/Dingoo_A320_White.jpg/300px-Dingoo_A320_White.jpg'
no_stylesheets = False
remove_javascript = True
oldest_article = 20
max_articles_per_feed = 200
feeds = [(u'A320', u'http://feeds.feedburner.com/ru_dingoo')]
remove_tags_before = dict(name='div', attrs={'class': 'posttitle'})
remove_tags_after = dict(name='div', attrs={'class': 'article'})
remove_tags = [dict(name='iframe')]

View File

@ -1,49 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
divahair.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DivaHair(BasicNewsRecipe):
title = u'Diva Hair'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Coafuri, frizuri, tunsori ..'
publisher = u'Diva Hair'
category = u'Ziare,Stiri,Coafuri,Femei'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.divahair.ro/imgs/logo.jpg'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='td', attrs={'class': 'spatiuart'}), dict(
name='div', attrs={'class': 'spatiuart'})
]
remove_tags = [
dict(name='div', attrs={'class': 'categorie'}), dict(name='div', attrs={
'class': 'gri gri2 detaliiart'}), dict(name='div', attrs={'class': 'articol_box_bottom'})
]
remove_tags_after = [
dict(name='div', attrs={'class': 'articol_box_bottom'})
]
feeds = [(u'\u0218tiri', u'http://www.divahair.ro/feed')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,26 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
DjurslandsPosten
'''
class DjurslandsPosten_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'DjurslandsPosten'
description = 'Lokale og regionale nyheder'
category = 'newspaper, news, localnews, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Nyheder', 'http://www.dinby.dk/djurslandsposten/rss'),
]

View File

@ -1,65 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, BlonG'
'''
dnevnik.si
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Dnevnik(BasicNewsRecipe):
title = u'Dnevnik.si'
__author__ = u'BlonG'
description = u'''Dnevnik je \u010dasnik z ve\u010d kot polstoletno zgodovino.
Pod sloganom \xbb\u017divljenje ima besedo\xab na svojih straneh prina\u0161a
bralcem bogastvo informacij, komentarjev in kolumen in raznovrstnost
pogledov, zaznamovanih z odgovornostjo do posameznika in \u0161ir\u0161e
dru\u017ebe.'''
oldest_article = 3
max_articles_per_feed = 20
language = 'sl'
no_stylesheets = True
use_embedded_content = False
cover_url = 'https://sites.google.com/site/javno2010/home/dnevnik_cover.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'id': '_iprom_inStream'}),
dict(name='div', attrs={'class': 'entry-content'}),
]
remove_tags = [
dict(name='div', attrs={'class': 'fb_article_top'}),
dict(name='div', attrs={'class': 'related'}),
dict(name='div', attrs={'class': 'fb_article_foot'}),
dict(name='div', attrs={'class': 'spreading'}),
dict(name='dl', attrs={'class': 'ad'}),
dict(name='p', attrs={'class': 'report'}),
dict(name='div', attrs={'class': 'hfeed comments'}),
dict(name='dl', attrs={'id': 'entryPanel'}),
dict(name='dl', attrs={'class': 'infopush ip_wide'}),
dict(name='div', attrs={'class': 'sidebar'}),
dict(name='dl', attrs={'class': 'bottom'}),
dict(name='div', attrs={'id': 'footer'}),
]
feeds = [
(u'Slovenija', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=13'),
(u'Svet', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=14'),
(u'EU', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=116'),
(u'Poslovni dnevnik', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=5'),
(u'Kronika', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=15'),
(u'Kultura', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=17'),
(u'Zdravje', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=18'),
(u'Znanost in IT', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=19'),
(u'(Ne)verjetno', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=20'),
(u'E-strada', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=21'),
(u'Svet vozil', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=22')
]

View File

@ -1,106 +0,0 @@
#!/usr/bin/env python
from __future__ import print_function
__author__ = 'Darko Spasovski'
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
'''
dnevnik.com.mk
'''
import datetime
import re
from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class Dnevnik(BasicNewsRecipe):
INDEX = 'http://www.dnevnik.com.mk'
__author__ = 'Darko Spasovski'
title = 'Dnevnik - mk'
description = 'Daily Macedonian newspaper'
masthead_url = 'http://www.dnevnik.com.mk/images/re-logo.gif'
language = 'mk'
publication_type = 'newspaper'
category = 'news, Macedonia'
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
# Remove anything before the start of the article.
(r'<body.*?<\?xml version=\"1.0\"\?><!--Article start-->', lambda match: '<body>'),
# Remove anything after the end of the article.
(r'<!--Article end.*?</body>', lambda match: '</body>'),
]
]
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
.WB_DNEVNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
"""
conversion_options = {
'comment': description,
'tags': category,
'language': language,
'linearize_tables': True
}
def parse_index(self):
datum = datetime.datetime.today().strftime('%d.%m.%Y')
soup = self.index_to_soup(
self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
feeds = []
for section in soup.findAll('td', attrs={'class': 'WB_DNEVNIK_ArhivaFormTitle'}):
sectionTitle = section.contents[0].string
if sectionTitle.lower().startswith('online'):
# Skip online articles
continue
containerTable = section.findPrevious(
name='table').findNextSibling(name='table')
if containerTable is None:
print('No container table found - page layout may have been changed.')
continue
articles = []
for article in containerTable.findAll('a', attrs={'class': 'WB_DNEVNIK_ArhivaFormText'}):
title = self.tag_to_string(article, use_alt=True).strip()
articles.append({'title': title, 'url': 'http://www.dnevnik.com.mk/' +
article['href'], 'description': '', 'date': ''})
if articles:
feeds.append((sectionTitle, articles))
return sorted(feeds, key=lambda section: self.get_weight(section))
def get_weight(self, section):
"""
Returns 'weight' of a section.
Used for sorting the sections based on their 'natural' order in the printed edition.
"""
natural_order = {u'во фокусот': 1, u'актуелно': 2, u'економија': 3,
u'отворена': 4, u'свет': 5, u'интервју': 6, u'џубокс': 7,
u'репортажа': 8, u'наш туризам': 9, u'живот': 10,
u'автомобилизам': 11, u'спорт': 12, u'омнибус': 13}
if section[0].string.lower() in natural_order:
return natural_order[section[0].string.lower()]
else:
return 999 # section names not on the list go to the bottom
def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d.%m.%Y')
soup = self.index_to_soup(
self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
anchor = soup.find('a', attrs={'class': 'WB_DNEVNIK_MoreLink'})
if anchor is not None:
raw = browser().open_novisit(
self.INDEX + '/' + anchor['href']).read()
cover_soup = BeautifulSoup(raw)
url = cover_soup.find(
'div', attrs={'class': 'WB_DNEVNIK_Datum2'}).findNext('img')['src']
return self.INDEX + '/' + url
return ''

View File

@ -1,43 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
'''
dobanevinosti.blogspot.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class DobaNevinosti(BasicNewsRecipe):
title = 'Doba Nevinosti'
__author__ = 'Darko Miletic'
description = 'Filmski blog'
oldest_article = 15
max_articles_per_feed = 100
language = 'sr'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
auto_cleanup = True
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif}
img{margin-bottom: 0.8em; display:block;}
"""
conversion_options = {
'comment': description, 'tags': 'film, blog, srbija, tv', 'publisher': 'Dimitrije Vojinov', 'language': language
}
remove_attributes = ['lang', 'border']
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [(u'Tekstovi', u'http://dobanevinosti.blogspot.com/feeds/posts/default')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('img', alt=False):
item['alt'] = 'image'
return soup

View File

@ -1,60 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2012, NiLuJe <niluje at ak-team.com>'
'''
Fetch DoghouseDiaries.
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class DoghouseDiaries(BasicNewsRecipe):
title = 'Doghouse Diaries'
description = 'A webcomic.'
__author__ = 'NiLuJe'
language = 'en'
use_embedded_content = False
# 14 comics per fetch (not really days... but we can't easily get the date
# of individual comics, short of parsing each one...)
oldest_article = 14
cover_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
masthead_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
keep_only_tags = [dict(name='img', attrs={'class': re.compile("comic-item*")}), dict(
name='h1'), dict(name='div', attrs={'class': 'entry'}), dict(name='p', id='alttext')]
remove_tags = [dict(name='div', attrs={'class': 'pin-it-btn-wrapper'}), dict(
name='span'), dict(name='div', id='wp_fb_like_button')]
remove_attributes = ['width', 'height']
no_stylesheets = True
# Turn image bubblehelp into a paragraph (NOTE: We run before the
# remove_tags cleanup, so we need to make sure we only parse the
# comic-item img, not the pinterest one pulled by the entry div)
preprocess_regexps = [
(re.compile(r'(<img.*src="http://thedoghousediaries.com/comics/.*title=")([^"]+)(".*>)'),
lambda m: '%s%s<p id="alttext"><strong>%s</strong></p>' % (m.group(1), m.group(3), m.group(2)))
]
def parse_index(self):
INDEX = 'http://www.thedoghousediaries.com/'
soup = self.index_to_soup(INDEX)
articles = []
# Since the feed sucks, and there's no real archive, we use the 'Quick
# Archive' thingie, but we can't get the date from here, so stop after
# 14 comics...
for item in soup.findAll('option', {}, True, None, self.oldest_article + 1):
# Skip the quick archive itself
if (item['value'] != '0'):
articles.append({
'title': self.tag_to_string(item).encode('UTF-8'),
'url': item['value'],
'description': '',
'content': '',
})
return [('Doghouse Diaries', articles)]

View File

@ -1,44 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class stuffconz(BasicNewsRecipe):
title = u'stuff.co.nz'
language = 'en_NZ'
__author__ = 'Krittika Goyal'
oldest_article = 1 # days
max_articles_per_feed = 25
keep_only_tags = [
classes('sics-component__headline sics-component__byline sics-component__story')
]
remove_tags = [
dict(name=['meta', 'link', 'style']),
classes('sics-component__sharebar'),
]
remove_stylesheets = True
feeds = [
('Dominion Post',
'http://www.stuff.co.nz/rss/dominion-post'),
('National',
'http://www.stuff.co.nz/rss/national'),
('World',
'http://www.stuff.co.nz/rss/world'),
('Business',
'http://www.stuff.co.nz/rss/business'),
('Technology',
'http://www.stuff.co.nz/rss/technology'),
('Sport',
'http://www.stuff.co.nz/rss/sport'),
('Entertainment',
'http://www.stuff.co.nz/rss/entertainment'),
('Life and Style',
'http://www.stuff.co.nz/rss/life-style'),
]

View File

@ -1,29 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
http://www.dosisdiarias.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class DosisDiarias(BasicNewsRecipe):
title = 'Alberto Montt en dosis diarias'
__author__ = 'Darko Miletic'
description = 'Mire sin compromiso y si le gusta vuelva'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
encoding = 'utf-8'
publisher = 'Alberto Montt'
category = 'comic, blog, spanish'
language = 'es'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
remove_tags = [dict(name='div', attrs={'class': 'feedflare'})]
feeds = [(u'Dosis diaria', u'http://feeds.feedburner.com/montt')]

View File

@ -1,25 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011-2011, Federico Escalada <fedeescalada at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class Dotpod(BasicNewsRecipe):
__author__ = 'Federico Escalada'
description = 'Tecnologia y Comunicacion Audiovisual'
encoding = 'utf-8'
language = 'es'
max_articles_per_feed = 100
no_stylesheets = True
oldest_article = 7
publication_type = 'blog'
title = 'Dotpod'
authors = 'Federico Picone'
conversion_options = {
'authors': authors, 'comments': description, 'language': language
}
feeds = [('Dotpod', 'http://www.dotpod.com.ar/feed/')]
remove_tags = [dict(name='div', attrs={'class': 'feedflare'})]

View File

@ -1,26 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe, classes
class My_Feeds(BasicNewsRecipe):
title = 'Down To Earth'
language = 'en_IN'
oldest_article = 20
__author__ = 'Amit'
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
center_navbar = True
use_embedded_content = False
remove_empty_feeds = True
keep_only_tags = [
classes('detail-heading content-main news-basic-info news-banner news-detail-content')
]
remove_tags = [
classes('add-comment btn hindi_detail_link single-news-letter'),
dict(id=['comments', 'breadcrumb', 'node_related_stories']),
dict(attrs={'class': ['commentCount', 'box']})
]
feeds = [
('All', 'https://www.downtoearth.org.in/rss/all'),
]

View File

@ -1,36 +0,0 @@
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class drivelrycom(BasicNewsRecipe):
title = u'drivelry.com'
language = 'en'
description = 'A blog by Mike Abrahams'
__author__ = 'Krittika Goyal'
oldest_article = 60 # days
max_articles_per_feed = 25
remove_stylesheets = True
remove_tags_after = dict(name='div', attrs={'id': 'bookmark'})
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class': ['sidebar']}),
dict(name='div', attrs={'id': ['bookmark']}),
]
feeds = [
('drivelry.com',
'http://feeds.feedburner.com/drivelry'),
]
def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'id': 'main'})
soup = BeautifulSoup('''
<html><head><title>t</title></head><body>
<p>To donate to this blog: <a href="http://www.drivelry.com/thank-you/">click here</a></p>
</body></html>
''')
body = soup.find(name='body')
body.insert(0, story)
return soup

View File

@ -1,43 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1390492898(BasicNewsRecipe):
title = u'D\xfcnya Bizim'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
__author__ = 'asalet_r'
language = 'tr'
feeds = [
(u'Ayr\u0131nt\u0131 Defteri', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=58'),
(u'Baba Kitaplar', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=4'),
(u'\xc7-al\u0131nt\u0131', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=33'),
(u'Dar\xfclmedya', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=49'),
(u'Denemedi Deme', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=72'),
(u'DevriAlem', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=67'),
(u'Duyduk duymad\u0131k demeyin', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=48'),
(u'G\xfczel Mekanlar', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=43'),
(u'\u0130stanbul Bizim', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=62'),
(u'\u0130yi Haberler', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=18'),
(u'\u0130yi M\xfczikler', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=2'),
(u'Kalite Dergiler', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=3'),
(u'K\u0131sa K\u0131sa', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=55'),
(u'Konu\u015fa Konu\u015fa', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=24'),
(u'Medyada D\xfcnyaBizim', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=64'),
(u'Mizah', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=29'),
(u'M\xfcstesna G\xfczeller', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=65'),
(u'Nerede Ne Var?', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=66'),
(u'Not Defteri', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=71'),
(u'O \u015eimdi Nerede?', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=52'),
(u'Olsa Ke\u015fke', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=34'),
(u'Orada Ne Oldu?', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=38'),
(u'\xd6nemli Adamlar', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=1'),
(u'Sinema', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=23'),
(u'Tart\u0131\u015fa tart\u0131\u015fa', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=39'),
(u'Yay\u0131n Y\xf6netmeninden', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=69'),
(u'Yeni \u015eeyler', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=57'),
(u'Zekeriya Sofras\u0131', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=60')]

View File

@ -1,34 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1390492898(BasicNewsRecipe):
title = u'D\xfcnya B\xfclteni'
__author__ = 'asalet_r'
language = 'tr'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [
(u'Dosya', u'http://www.dunyabulteni.net/servisler/rss/haberler/181'),
(u'Makale-Yorum', u'http://www.dunyabulteni.net/servisler/rss/haberler/174'),
(u'T\xfcrkiye', u'http://www.dunyabulteni.net/servisler/rss/haberler/44'),
(u'\u015eehrin Nabz\u0131', u'http://www.dunyabulteni.net/servisler/rss/haberler/195'),
(u'D\xfcnya', u'http://www.dunyabulteni.net/servisler/rss/haberler/31'),
(u'Tarih Dosyas\u0131', u'http://www.dunyabulteni.net/servisler/rss/haberler/157'),
(u'Dubam', u'http://www.dunyabulteni.net/servisler/rss/haberler/163'),
(u'K\xfclt\xfcr Sanat', u'http://www.dunyabulteni.net/servisler/rss/haberler/66'),
(u'Haber Analiz', u'http://www.dunyabulteni.net/servisler/rss/haberler/123'),
(u'Ekonomi', u'http://www.dunyabulteni.net/servisler/rss/haberler/40'),
(u'R\xf6portaj', u'http://www.dunyabulteni.net/servisler/rss/haberler/153'),
(u'Bilim Teknoloji', u'http://www.dunyabulteni.net/servisler/rss/haberler/128'),
(u'Aile-Sa\u011fl\u0131k', u'http://www.dunyabulteni.net/servisler/rss/haberler/75'),
(u'E\u011fitim', u'http://www.dunyabulteni.net/servisler/rss/haberler/80'),
(u'Gezi-\u0130zlenim', u'http://www.dunyabulteni.net/servisler/rss/haberler/90'),
(u'Hayat\u0131n \u0130\xe7inden', u'http://www.dunyabulteni.net/servisler/rss/haberler/200'),
(u'Yazarlar\u0131m\u0131z', u'http://www.dunyabulteni.net/servisler/rss/yazarlar/5'),
(u'Konuk Yazarlar', u'http://www.dunyabulteni.net/servisler/rss/yazarlar/6'),
(u'Al\u0131nt\u0131 Yaz\u0131lar', u'http://www.dunyabulteni.net/servisler/rss/yazarlar/7')]

View File

@ -1,205 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""www.dunyahalleri.com"""
import locale
import os
import re
from shutil import copyfile
from calibre import strftime
from calibre.ebooks.BeautifulSoup import Tag
from calibre.utils.resources import get_path
from calibre.web.feeds.recipes import BasicNewsRecipe
from PIL import Image, ImageDraw, ImageFont
__license__ = 'GPL v3'
__copyright__ = '2017, sukru alatas / alatas.org'
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DunyaHalleri(BasicNewsRecipe):
title = 'Dünya Halleri'
description = 'Gözden Kaçanlar Rehberi'
timefmt = ' [%a, %d %b, %Y]'
publication_type = 'blog'
language = 'tr'
locale = 'tr_TR' # for localized month names
simultaneous_downloads = 5
needs_subscription = False
scale_news_images = True
remove_tags_before = dict(name='span', attrs={'itemprop': 'reviewBody'})
remove_tags_after = dict(
name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'})
remove_tags = [dict(name=['script', 'noscript', 'style', 'footer']),
dict(attrs={'class': ['jsharedaddy sd-sharing-enabled',
'cb-sticky-sidebar', 'sharedaddy sd-sharing-enabled']}),
dict(id=['jp-relatedposts', 'tldr-post-summary', 'tldr-post-summary-buttons'])]
encoding = 'utf_8'
no_stylesheets = True
extra_css = '.caption {color: #998; font-style: italic; font-size: 8pt}'
__author__ = 'Sukru Alatas'
feeds = [(u"Genel Gündem",
'https://www.dunyahalleri.com/genel-gundem/feed/'),
(u"Teknoloji / Bilim",
'https://www.dunyahalleri.com/teknoloji-bilim/feed/'),
(u"İnternet / Girişimler",
'https://www.dunyahalleri.com/internet-girisimler/feed/'),
(u"Tasarım / İnovasyon",
'https://www.dunyahalleri.com/tasarim-inovasyon/feed/'),
(u"Kültür / Sanat", 'https://www.dunyahalleri.com/kultur-sanat/feed/')]
oldest_article = 7
max_articles_per_feed = 50
COVER_WIDTH, COVER_HEIGHT = 590, 750
masthead_url = 'https://www.dunyahalleri.com/wp-content/uploads/2016/07/dh-logo-transparan.png'
cover_url = ''
cover_img_url = 'https://i0.wp.com/www.dunyahalleri.com/wp-content/uploads/2016/04/dh-favico-v2.png'
cover_img_path = ''
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
# for localized month names
locale.setlocale(locale.LC_TIME, self.locale)
if self.output_profile.short_name.startswith('kindle'):
# Reduce image sizes to get file size below amazon's email
# sending threshold
self.web2disk_options.compress_news_images = True
self.web2disk_options.compress_news_images_auto_size = 5
self.log.warn(
'Kindle Output profile being used, reducing image quality '
'to keep file size below amazon email threshold')
def preprocess_html(self, soup):
span = soup.findAll('span', {'itemprop': 'reviewBody'}, limit=1)[0]
# title insert
article_title = soup.title.contents[0]
article_title.replace(u' - Dünya Halleri', '')
h2 = new_tag(soup, 'h2')
h2.append(article_title)
span.insert(0, h2)
# featured image insert
meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0]
if meta:
img = new_tag(soup, 'img')
img.attrs = [('src', meta['content'])]
span.insert(1, img)
# gallery normalization
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
p = new_tag(soup, 'p')
for img in div.findAll('img'):
img.attrs = [(key, value)
for key, value in img.attrs if key in ['src']]
p.append(img)
div.replaceWith(p)
# youtube embedded normalization
# this block finds the cover image for each embedded youtube video then
# changes it to "a href" and "img"
for iframe in soup.findAll('iframe'):
a = new_tag(soup, 'a')
caption = new_tag(soup, 'pre')
img = new_tag(soup, 'img')
m = re.match(
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
r'(?P<vid>.*?)(([\?\&].*)|$|\n)',
iframe['src'])
if m:
# youtube
img_src = 'https://img.youtube.com/vi/' + \
m.group('vid') + '/0.jpg'
a_href = 'https://www.youtube.com/watch?v=' + m.group('vid')
else:
# not youtube
# default cover image for non-youtube embedded pages
img_src = 'http://www.warnerclassics.com/img_style/default_video_m.jpg'
a_href = iframe['src']
img.attrs = [('src', img_src)]
caption.append('Video: ' + a_href)
caption.attrs = [('class', 'caption')]
a.attrs = [('href', a_href), ('target', '_blank')]
a.append(img)
a.append(caption)
iframe.replaceWith(a)
return soup
# cover generator
# original version
# https://www.mobileread.com/forums/showpost.php?p=866553&postcount=5
def get_cover_img_url(self):
return getattr(self, 'cover_img_url', None)
def _download_cover_img(self):
old_cu = None
try:
old_cu = self.get_cover_url()
except:
pass
new_cu = self.get_cover_img_url()
self.cover_url = new_cu
self._download_cover()
outfile = os.path.join(self.output_dir, 'cover_img.jpg')
copyfile(self.cover_path, outfile)
self.cover_url = old_cu
self.cover_img_path = outfile
def download_cover_img(self):
try:
self._download_cover_img()
self.report_progress(
1, ('Downloaded cover to %s') % self.cover_img_path)
except:
self.log.exception('Failed to download cover img')
self.cover_img_path = None
def draw_text(self, draw, text, text_size, top):
font_path = get_path('fonts/liberation/LiberationSerif-Bold.ttf')
font = ImageFont.truetype(font_path, text_size)
width, height = draw.textsize(text, font=font)
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
draw.text((left, top), text, fill=(0, 0, 0), font=font)
return height
def default_cover(self, cover_file):
title = self.title
date = strftime('%d %B %Y')
author = u'www.dunyahalleri.com'
# Texts
img = Image.new(
'RGB', (self.COVER_WIDTH, self.COVER_HEIGHT), 'white')
draw = ImageDraw.Draw(img)
bottom = 15
bottom += self.draw_text(draw, title, 42, bottom)
bottom += 50
bottom += self.draw_text(draw, date, 32, bottom)
bottom += self.draw_text(draw, author, 32, self.COVER_HEIGHT - 45)
# Logo
self.download_cover_img()
if getattr(self, 'cover_img_path', None) is not None:
logo_file = self.cover_img_path
self.report_progress(
1, ('using cover img from %s') % logo_file)
logo = Image.open(logo_file, 'r')
width, height = logo.size
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
top = max(int((self.COVER_HEIGHT - height) / 2.), 0)
img.paste(logo, (left, top))
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
img.convert('RGB').save(cover_file, 'JPEG')
cover_file.flush()
return True

View File

@ -1,273 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""www.dunyahalleri.com/haftanin-ozeti"""
import locale
import os
import re
from contextlib import closing
from shutil import copyfile
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
from calibre.utils.resources import get_path
from calibre.web.feeds.recipes import BasicNewsRecipe
from PIL import Image, ImageDraw, ImageFont
__license__ = 'GPL v3'
__copyright__ = '2017, sukru alatas / alatas.org'
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
title = 'Dünya Halleri - Haftanın Özeti'
description = ('Geçen hafta boyunca Türkiye ve dünyadan haber,'
' site, yazılım, donanım, cihaz, video ve trendler...')
timefmt = ' [%a, %d %b, %Y]'
publication_type = 'blog'
language = 'tr'
locale = 'tr_TR' # for localized month names
simultaneous_downloads = 5
needs_subscription = False
scale_news_images = True
remove_tags_before = dict(name='section', attrs={'itemprop': 'articleBody'})
remove_tags_after = dict(name='div', attrs={'class': 'cb-alert cb-blue'})
remove_tags = [dict(name=['ol', 'h4', 'script', 'noscript', 'style', 'footer']),
dict(name='h1', attrs={
'class': 'entry-title cb-entry-title entry-title cb-title'}),
dict(attrs={'class': ['cb-alert cb-blue', 'woo-sc-box info ',
'sharedaddy sd-sharing-enabled', 'jp-relatedposts']}),
dict(id=['post-pagination', 'plp_inital_pagination'])]
encoding = 'utf_8'
no_stylesheets = True
INDEX = 'https://www.dunyahalleri.com/haftanin-ozeti/feed/'
extra_css = '.caption {color: #998; font-style: italic; font-size: 8pt}'
__author__ = 'Sukru Alatas'
COVER_WIDTH, COVER_HEIGHT = 590, 750
issue_title = ''
issue_date = ''
masthead_url = ''
cover_url = ''
cover_img_url = ''
cover_img_path = ''
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
# for localized month names
locale.setlocale(locale.LC_TIME, self.locale)
if self.output_profile.short_name.startswith('kindle'):
# Reduce image sizes to get file size below amazon's email
# sending threshold
self.web2disk_options.compress_news_images = True
self.web2disk_options.compress_news_images_auto_size = 5
self.log.warn(
'Kindle Output profile being used, reducing image quality '
'to keep file size below amazon email threshold')
# BeautifulSoup xml parser extension
# If you use index_to_soup with xml or rss, it outputs lots of garbage node,
# and change the tree for its own.
# This function very very similar copy of index_to_soup but it uses
# BeautifulStoneSoup instead of BeautifulSoup
def xml_to_soup(self, url_or_raw, raw=False):
if re.match(r'\w+://', url_or_raw):
br = self.clone_browser(self.browser)
open_func = getattr(br, 'open_novisit', br.open)
with closing(open_func(url_or_raw)) as f:
_raw = f.read()
if not _raw:
raise RuntimeError(
'Could not fetch index from %s' % url_or_raw)
else:
_raw = url_or_raw
if raw:
return _raw
if not isinstance(_raw, type(u'')) and self.encoding:
if callable(self.encoding):
_raw = self.encoding(_raw)
else:
_raw = _raw.decode(self.encoding, 'replace')
from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode
from calibre.utils.cleantext import clean_xml_chars
if isinstance(_raw, type(u'')):
_raw = strip_encoding_declarations(_raw)
else:
_raw = xml_to_unicode(
_raw, strip_encoding_pats=True, resolve_entities=True)[0]
_raw = clean_xml_chars(_raw)
return BeautifulStoneSoup(_raw) # <== the difference
def parse_index(self):
from dateutil.parser import parse
# RSS parsing
index = self.xml_to_soup(self.INDEX)
channel = index.rss.channel
self.description = channel.description.contents[0]
self.masthead_url = channel.url.contents[0]
item = channel.item
self.issue_title = item.title.contents[0]
self.issue_date = parse(item.pubdate.contents[0])
base_url = item.link.contents[0]
cover_img_desc = BeautifulSoup(item.description.contents[0])
# this is necessary for cover generator
self.cover_img_url = cover_img_desc.img['src']
soup = self.index_to_soup(base_url)
articles = {}
key = None
ans = []
for li in soup.findNext('ol').findAll('li'):
a = li.find('a', href=True)
if not a:
url = base_url
feed = self.tag_to_string(li, use_alt=True).strip()
pubdate = self.issue_date.strftime('%a, %d %b')
else:
url = base_url + re.sub(r'\.\/', '', a['href'])
feed = self.tag_to_string(a, use_alt=True).strip()
pubdate = self.issue_date.strftime('%a, %d %b')
title = self.issue_title + \
' (' + self.issue_date.strftime('%d %B %Y') + ')'
if feed not in articles:
articles[feed] = []
ans.append(feed)
articles[feed].append(
dict(title=title, url=url, date=pubdate, description='', content=''))
ans = [(key, articles[k]) for k in ans if k in articles]
return ans
def preprocess_html(self, soup):
# gallery normalization
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
p = new_tag(soup, 'p')
for img in div.findAll('img'):
img.attrs = [(key, value)
for key, value in img.attrs if key in ['src']]
p.append(img)
div.replaceWith(p)
# youtube embedded normalization
# this block finds the cover image for each embedded youtube video then
# changes it to "a href" and "img"
for iframe in soup.findAll('iframe'):
a = new_tag(soup, 'a')
caption = new_tag(soup, 'pre')
img = new_tag(soup, 'img')
m = re.match(
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
r'(?P<vid>.*?)(([\?\&].*)|$|\n)',
iframe['src'])
if m:
# youtube
img_src = 'https://img.youtube.com/vi/' + \
m.group('vid') + '/0.jpg'
a_href = 'https://www.youtube.com/watch?v=' + m.group('vid')
else:
# not youtube
# default cover image for non-youtube embedded pages
img_src = 'http://www.warnerclassics.com/img_style/default_video_m.jpg'
a_href = iframe['src']
img.attrs = [('src', img_src)]
caption.append('Video: ' + a_href)
caption.attrs = [('class', 'caption')]
a.attrs = [('href', a_href), ('target', '_blank')]
a.append(img)
a.append(caption)
iframe.replaceWith(a)
return soup
# cover generator
# original version https://www.mobileread.com/forums/showpost.php?p=866553&postcount=5
def get_cover_img_url(self):
return getattr(self, 'cover_img_url', None)
def _download_cover_img(self):
old_cu = None
try:
old_cu = self.get_cover_url()
except:
pass
new_cu = self.get_cover_img_url()
self.cover_url = new_cu
self._download_cover()
outfile = os.path.join(self.output_dir, 'cover_img.jpg')
copyfile(self.cover_path, outfile)
self.cover_url = old_cu
self.cover_img_path = outfile
def download_cover_img(self):
try:
self._download_cover_img()
self.report_progress(
1, ('Downloaded cover to %s') % self.cover_img_path)
except:
self.log.exception('Failed to download cover img')
self.cover_img_path = None
def draw_text(self, draw, text, text_size, top):
font_path = get_path('fonts/liberation/LiberationSerif-Bold.ttf')
font = ImageFont.truetype(font_path, text_size)
width, height = draw.textsize(text, font=font)
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
draw.text((left, top), text, fill=(0, 0, 0), font=font)
return height
def default_cover(self, cover_file):
title = self.issue_title
date = self.issue_date.strftime(
'%d %B %Y').decode('utf8', 'replace')
author = u'www.dunyahalleri.com/haftanin-ozeti'
# Texts
img = Image.new(
'RGB', (self.COVER_WIDTH, self.COVER_HEIGHT), 'white')
draw = ImageDraw.Draw(img)
bottom = 15
bottom += self.draw_text(draw, title, 42, bottom)
bottom += 50
bottom += self.draw_text(draw, date, 32, bottom)
bottom += self.draw_text(draw, author, 32, self.COVER_HEIGHT - 45)
# Logo
self.download_cover_img()
if getattr(self, 'cover_img_path', None) is not None:
logo_file = self.cover_img_path
self.report_progress(
1, ('using cover img from %s') % logo_file)
logo = Image.open(logo_file, 'r')
width, height = logo.size
logo = logo.resize(
(self.COVER_WIDTH, (self.COVER_WIDTH * height / width)), Image.Resampling.LANCZOS)
width, height = logo.size
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
top = max(int((self.COVER_HEIGHT - height) / 2.), 0)
img.paste(logo, (left, top))
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
img.convert('RGB').save(cover_file, 'JPEG')
cover_file.flush()
return True

View File

@ -1,47 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1302341394(BasicNewsRecipe):
title = u'DvhN'
__author__ = 'Reijndert'
oldest_article = 7
max_articles_per_feed = 200
no_stylesheets = True
cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg'
language = 'nl'
country = 'NL'
version = 1
publisher = u'Dagblad van het Noorden'
category = u'Nieuws'
description = u'Nieuws uit Noord Nederland'
timefmt = ' %Y-%m-%d (%a)'
keep_only_tags = [dict(name='div', attrs={'id': 'fullPicture'}), dict(name='div', attrs={'id': 'articleText'})
]
remove_tags = [
dict(name='span', attrs={'class': 'location'})
]
preprocess_regexps = [
(re.compile(r'<a.*?>'), lambda h1: ''), (re.compile(r'</a>'), lambda h2: ''), (re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'),
lambda h3: ''), (re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '') # noqa
]
feeds = [
(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'),
(u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'),
(u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'),
(u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'),
(u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'),
(u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'),
(u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'),
(u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')
]
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
'''

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Adresseavisen Ebeltoft
'''
class EbeltoftLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Adresseavisen Ebeltoft'
description = 'Lokale og regionale nyheder, sport, kultur fra Ebeltoft og omegn på ebeltoft.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,87 +0,0 @@
import re
from calibre.ebooks.BeautifulSoup import Comment
from calibre.web.feeds.news import BasicNewsRecipe
class EchoDnia(BasicNewsRecipe):
title = u'Echo Dnia'
__author__ = 'fenuks'
description = u'Echo Dnia - portal regionalny świętokrzyskiego radomskiego i podkarpackiego. Najnowsze wiadomości z Twojego regionu, galerie, video, mp3.'
category = 'newspaper'
language = 'pl'
encoding = 'iso-8859-2'
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
INDEX = 'http://www.echodnia.eu'
masthead_url = INDEX + '/images/top_logo.png'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
preprocess_regexps = [(re.compile(u'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa
(re.compile(u'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline', 'articleZoomText']),
dict(attrs={'class': 'articleFunctions'})]
feeds = [(u'Wszystkie', u'http://www.echodnia.eu/rss.xml'),
(u'Świętokrzyskie', u'http://www.echodnia.eu/swietokrzyskie.xml'),
(u'Radomskie', u'http://www.echodnia.eu/radomskie.xml'),
(u'Podkarpackie', u'http://www.echodnia.eu/podkarpackie.xml'),
(u'Sport \u015bwi\u0119tokrzyski',
u'http://www.echodnia.eu/sport_swi.xml'),
(u'Sport radomski', u'http://www.echodnia.eu/sport_rad.xml'),
(u'Sport podkarpacki', u'http://www.echodnia.eu/sport_pod.xml'),
(u'Pi\u0142ka no\u017cna', u'http://www.echodnia.eu/pilka.xml'),
(u'Praca', u'http://www.echodnia.eu/praca.xml'),
(u'Dom', u'http://www.echodnia.eu/dom.xml'),
(u'Auto', u'http://www.echodnia.eu/auto.xml'),
(u'Zdrowie', u'http://www.echodnia.eu/zdrowie.xml')]
def get_cover_url(self):
soup = self.index_to_soup(
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
soup = self.index_to_soup(nexturl)
self.cover_url = self.INDEX + \
soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
def append_page(self, soup, appendtag):
tag = soup.find('span', attrs={'class': 'photoNavigationPages'})
if tag:
number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
baseurl = self.INDEX + \
soup.find(attrs={'class': 'photoNavigationNext'})['href'][:-1]
for r in appendtag.findAll(attrs={'class': 'photoNavigation'}):
r.extract()
for nr in range(2, number + 1):
soup2 = self.index_to_soup(baseurl + str(nr))
pagetext = soup2.find(id='photoContainer')
if pagetext:
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
pagetext = soup2.find(attrs={'class': 'photoMeta'})
if pagetext:
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
pagetext = soup2.find(attrs={'class': 'photoStoryText'})
if pagetext:
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
comments = appendtag.findAll(
text=lambda text: isinstance(text, Comment))
for comment in comments:
comment.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -1,43 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class EclipseOnline(BasicNewsRecipe):
#
# oldest_article specifies the maximum age, in days, of posts to retrieve.
# The default of 32 is intended to work well with a "days of month = 1"
# recipe schedule to download "monthly issues" of Eclipse Online.
# Increase this value to include additional posts. However, the RSS feed
# currently only includes the 10 most recent posts, so that's the max.
#
oldest_article = 32
title = u'Eclipse Online'
description = u'"Where strange and wonderful things happen, where reality is eclipsed for a little while with something magical and new." Eclipse Online is edited by Jonathan Strahan and published online by Night Shade Books. http://www.nightshadebooks.com/category/eclipse/' # noqa
publication_type = 'magazine'
language = 'en'
__author__ = u'Jim DeVona'
__version__ = '1.0'
# For now, use this Eclipse Online logo as the ebook cover image.
# (Disable the cover_url line to let Calibre generate a default cover, including date.)
cover_url = 'http://www.nightshadebooks.com/wp-content/uploads/2012/10/Eclipse-Logo.jpg'
# Extract the "post" div containing the story (minus redundant metadata)
# from each page.
keep_only_tags = [
dict(name='div', attrs={'class': lambda x: x and 'post' in x})]
remove_tags = [
dict(name='span', attrs={'class': ['post-author', 'post-category', 'small']})]
# Nice plain markup (like Eclipse's) works best for most e-readers.
# Disregard any special styling rules, but center illustrations.
auto_cleanup = False
no_stylesheets = True
remove_attributes = ['style', 'align']
extra_css = '.wp-caption {text-align: center;} .wp-caption-text {font-size: small; font-style: italic;}'
# Tell Calibre where to look for article links. It will proceed to retrieve
# these posts and format them into an ebook according to the above rules.
feeds = ['http://www.nightshadebooks.com/category/eclipse/feed/']

View File

@ -1,35 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
EcoGeek.org
'''
import os
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.web.feeds.news import BasicNewsRecipe
class EcoGeek(BasicNewsRecipe):
title = 'EcoGeek'
__author__ = 'Darko Miletic'
description = 'EcoGeek - Technology for the Environment Blog Feed'
publisher = 'EcoGeek'
language = 'en'
no_stylesheets = True
def parse_index(self):
tdir = PersistentTemporaryDirectory('_ecogeek')
articles = []
soup = self.index_to_soup('http://feeds2.feedburner.com/EcoGeek')
for i, article in enumerate(soup.findAll('div', attrs={'class': 'article'})):
fname = os.path.join(tdir, '%d.html' % i)
with open(fname, 'wb') as f:
f.write(type(u'')(article).encode('utf-8'))
articles.append({
'title': self.tag_to_string(article.find('h2')),
'url': 'file://' + fname.replace(os.sep, '/'),
})
return [('EcoGeek', articles)]

View File

@ -1,41 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
globaleconomicanalysis.blogspot.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class GlobalEconomicAnalysis(BasicNewsRecipe):
title = "Mish's Global Economic Trend Analysis"
__author__ = 'Darko Miletic'
description = 'Thoughts on the global economy, housing, gold, silver, interest rates, oil, energy, China, commodities, the dollar, Euro, Renminbi, Yen, inflation, deflation, stagflation, precious metals, emerging markets, and policy decisions that affect the global markets.' # noqa
publisher = 'Mike Shedlock'
category = 'news, politics, economy, banking'
oldest_article = 7
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = True
language = 'en'
remove_empty_feeds = True
publication_type = 'blog'
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict(
attrs={'class': 'blogger-post-footer'})
]
remove_attributes = ['border']
feeds = [
(u'Articles', u'http://feeds2.feedburner.com/MishsGlobalEconomicTrendAnalysis')]

View File

@ -1,45 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
ecuisine.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class EcuisineRo(BasicNewsRecipe):
title = u'eCuisine'
__author__ = u'Silviu Cotoar\u0103'
description = u'Reinventeaz\u0103 pl\u0103cerea de a g\u0103ti'
publisher = 'eCuisine'
oldest_article = 50
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Retete,Bucatarie'
encoding = 'utf-8'
cover_url = 'http://www.ecuisine.ro/sites/all/themes/ecuisine/images/logo.gif'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='h1', attrs={'id': 'page-title'}
), dict(name='div', attrs={'class': 'field-item even'})
]
remove_tags = [
dict(name='ul', attrs={'id': ['recipe-tabs']}), dict(name='div', attrs={'class': ['recipe-body-rating clearfix']}), dict(name='div', attrs={'class': ['recipe-body-flags']}), dict(name='div', attrs={'id': ['tweetmeme_button']}), dict(name='div', attrs={ 'class': ['fbshare']}), dict(name='a', attrs={'class': ['button-rounded']}), dict(name='div', attrs={'class': ['recipe-body-related']}), dict(name='div', attrs={'class': ['fbshare']}), dict(name='div', attrs={'class': ['link-wrapper']}) # noqa
]
feeds = [
(u'Feeds', u'http://www.ecuisine.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,14 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1390492898(BasicNewsRecipe):
title = u'Edebistan'
__author__ = 'asalet_r'
language = 'tr'
oldest_article = 7
max_articles_per_feed = 30
auto_cleanup = True
feeds = [(u'Edebistan', u'http://www.edebistan.com/index.php/feed/')]

View File

@ -1,18 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import AutomaticNewsRecipe
class BasicUserRecipe1420467110(AutomaticNewsRecipe):
title = 'Edebiyat Haber'
language = 'tr'
__author__ = 'asalet_r'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [
('Edebiyat Haber', 'http://feeds.feedburner.com/feedburner/edebiyathaber'),
]

View File

@ -1,23 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2012 Levien van Zon <levien@zonnetjes.net>'
'''
Fetch Edge.org conversations
'''
from calibre.web.feeds.news import BasicNewsRecipe
class EdgeConversationRSS(BasicNewsRecipe):
title = u'Edge.org Conversations'
__author__ = 'levien'
language = 'en'
description = '''Edge.org offers "open-minded, free ranging, intellectually
playful ... an unadorned pleasure in curiosity, a collective expression of
wonder at the living and inanimate world ... an ongoing and thrilling
colloquium.'''
oldest_article = 60
max_articles_per_feed = 100
no_stylesheets = True
auto_cleanup = True
feeds = [(u'Edge RSS', u'http://edge.org/feed')]

View File

@ -1,47 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010 elsuave'
from calibre.web.feeds.news import BasicNewsRecipe
class EandP(BasicNewsRecipe):
title = u'Editor and Publisher'
__author__ = u'elsuave (modified from Xanthan Gum)'
description = 'News about newspapers and journalism.'
publisher = 'Editor and Publisher'
category = 'news, journalism, industry'
language = 'en'
max_articles_per_feed = 25
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
remove_javascript = True
auto_cleanup = True
html2lrf_options = [
'--comment', description,
'--category', category,
'--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + \
'"\ncomments="' + description + '"\ntags="' + category + '"'
# Font formatting code borrowed from kwetal
extra_css = '''
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
h1{font-size: xx-large;}
h2{font-size: large;}
'''
# Remove commenting/social media lins
remove_tags_after = [dict(name='div', attrs={'class': 'clear'})]
feeds = [
(u'Editor & Publisher', u'http://www.editorandpublisher.com/feed/'),
(u'Comments', u'http://www.editorandpublisher.com/comments/feed/'),
]

View File

@ -1,19 +0,0 @@
__version__ = 'v1.0'
__date__ = '7, April 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1332847053(BasicNewsRecipe):
title = u'Editoriali'
__author__ = 'faber1971'
description = 'Leading articles on Italy by the best Italian editorials'
language = 'it'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = True
conversion_options = {'linearize_tables': True}
masthead_url = 'http://folkbulletin.folkest.com/wp-content/uploads/editoriale1.jpg'
feeds = [(u'Micromega', u'http://temi.repubblica.it/micromega-online/feed/'), (u'Corriere della Sera', u'http://xml.corriereobjects.it/rss/editoriali.xml'),
(u'La Stampa', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=25'), (u"Italia dall'estero", u'http://italiadallestero.info/feed')]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Egedal
'''
class EgedalLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Egedal'
description = 'Lokale, regionale nyheder, sport og kultur i Egedal, Stenløse, Ølstykke, Ganløse, Gundsø, Slangerup, Roskilde på egedal.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,41 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
egirl.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class EgirlRo(BasicNewsRecipe):
title = u'egirl'
__author__ = u'Silviu Cotoar\u0103'
description = u'Necesar pentru tine'
publisher = u'egirl'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Femei'
encoding = 'utf-8'
cover_url = 'http://www.egirl.ro/images/egirlNou/logo_egirl.gif'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'id': 'content_art'}), dict(
name='div', attrs={'class': 'content_articol'})
]
feeds = [
(u'Feeds', u'http://www.egirl.ro/rss/egirl.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,30 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class eioba(BasicNewsRecipe):
title = u'eioba'
__author__ = 'fenuks'
description = u'eioba.pl - daj się przeczytać!'
cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png'
language = 'pl'
oldest_article = 7
remove_empty_feeds = True
max_articles_per_feed = 100
extra_css = '#ctl0_body_Topic {font-weight: bold; font-size:30px;}'
keep_only_tags = [dict(id=['ctl0_body_Topic', 'articleContent'])]
feeds = [(u'Wszyskie kategorie', u'http://feeds.eioba.pl/eioba-pl-top'),
(u'Technologia', u'http://www.eioba.pl/feed/categories/1.xml'),
(u'Nauka', u'http://www.eioba.pl/feed/categories/12.xml'),
(u'Finanse', u'http://www.eioba.pl/feed/categories/7.xml'),
(u'Życie', u'http://www.eioba.pl/feed/categories/5.xml'),
(u'Zainteresowania', u'http://www.eioba.pl/feed/categories/420.xml'),
(u'Społeczeństwo', u'http://www.eioba.pl/feed/categories/8.xml'),
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,31 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1314326622(BasicNewsRecipe):
title = u'Ekantipur'
__author__ = 'Manish Bhattarai'
description = 'News from the No.1 News Portal In Nepal'
language = 'en_NP'
oldest_article = 7
max_articles_per_feed = 25
masthead_url = 'http://www.ekantipur.com/images/logo.gif'
remove_empty_feeds = True
remove_tags_before = dict(id='main-content')
remove_tags_after = dict(id='view-comments')
remove_tags = [dict(attrs={'class': ['lang fltl', 'bdtop', 'ratings', 'news-tool', 'comment', 'post-ur-comment', 'asideBox', 'commentsbox', 'related-sidebar-row related-news']}), # noqa
dict(id=['menu_container', 'top_container', 'news_container',
'top_right', 'sidebar', 'news-detail-img', 'footer-wrapper']),
dict(name=['script'])]
feeds = [
(u'Top Stories', u'http://www.ekantipur.com/en/rss/top-stories/'),
(u'National', u'http://www.ekantipur.com/en/rss/national/1'),
(u'Capital', u'http://www.ekantipur.com/en/rss/capital/7'),
(u'Business', u'http://www.ekantipur.com/en/rss/business/3'),
(u'World', u'http://www.ekantipur.com/en/rss/world/5'),
(u'Sports', u'http://www.ekantipur.com/en/rss/sports/4'),
(u'Mixed Bag', u'http://www.ekantipur.com/en/rss/mixed-bag/14'),
(u'Health & Living', u'http://www.ekantipur.com/en/rss/health-and-living/19'),
(u'Entertainment', u'http://www.ekantipur.com/en/rss/entertainment/6')]

View File

@ -1,59 +0,0 @@
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.recipes import BasicNewsRecipe
from lxml import etree
class Ekathimerini(BasicNewsRecipe):
title = 'ekathimerini'
__author__ = 'Thomas Scholl'
description = 'News from Greece, English edition'
masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
max_articles_per_feed = 100
oldest_article = 100
publisher = 'Kathimerini'
category = 'news, GR'
language = 'en_GR'
encoding = 'windows-1253'
conversion_options = {'linearize_tables': True}
no_stylesheets = True
delay = 1
keep_only_tags = [dict(name='td', attrs={'class': 'news'})]
rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
def find_articles(self, idx, category):
for article in idx.findAll('item'):
cat = u''
cat_elem = article.find('subcat')
if cat_elem:
cat = self.tag_to_string(cat_elem)
if cat == category:
desc_html = self.tag_to_string(article.find('description'))
description = self.tag_to_string(BeautifulSoup(desc_html))
a = {
'title': self.tag_to_string(article.find('title')),
'url': self.tag_to_string(article.find('link')),
'description': description,
'date': self.tag_to_string(article.find('pubdate')),
}
yield a
def parse_index(self):
idx_contents = self.browser.open(self.rss_url).read()
idx = etree.fromstring(idx_contents, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
cats = sorted({self.tag_to_string(subcat)
for subcat in idx.xpath('//*[local-name()="subcat"]')})
feeds = [(u'News', list(self.find_articles(idx, u'')))]
for cat in cats:
feeds.append((cat.capitalize(), list(
self.find_articles(idx, cat))))
return feeds
def print_version(self, url):
return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')

View File

@ -1,34 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
import re
from calibre.web.feeds.news import BasicNewsRecipe
class EkologiaPl(BasicNewsRecipe):
title = u'Ekologia.pl'
__author__ = 'fenuks'
description = u'Portal ekologiczny - eko, ekologia, ochrona przyrody, ochrona środowiska, przyroda, środowisko online. Ekologia i ochrona środowiska. Ekologia dla dzieci.' # noqa
category = 'ecology'
language = 'pl'
cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
ignore_duplicate_articles = {'title', 'url'}
extra_css = '.title {font-size: 200%;} .imagePowiazane {float:left; margin-right:5px; width: 200px;}'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
remove_javascript = True
use_embedded_content = False
remove_attrs = ['style']
keep_only_tags = [dict(attrs={'class': 'contentParent'})]
remove_tags = [dict(
attrs={'class': ['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})]
feeds = [
(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'),
(u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'),
(u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')]
def print_version(self, url):
id = re.search(r',(?P<id>\d+)\.html', url).group('id')
return 'http://drukuj.ekologia.pl/artykul/' + id

View File

@ -1,51 +0,0 @@
# coding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElEspectador(BasicNewsRecipe):
title = u'Periódico el Espectador'
__author__ = 'BIGO-CAVA'
cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
remove_tags_before = dict(id='content')
remove_tags_after = [dict(name='div', attrs={'class': 'paginacion'})]
language = 'es_CO'
remove_tags = [dict(name='div', attrs={'class': 'herramientas_nota'}),
dict(name='div', attrs={'class': 'relpauta'}),
dict(name='div', attrs={'class': 'recursosrelacionados'}),
dict(name='div', attrs={'class': 'nav_negocios'})]
# dict(name='div', attrs={'class':'ico-mail2'}),
# dict(name='div', attrs={'id':'caja-instapaper'}),
# dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'),
(u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
(u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
(u'Economía', u'http://www.elespectador.com/economia/feed'),
(u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
(u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
(u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
(u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
(u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
(u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
(u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
(u'Deportes', u'http://www.elespectador.com/deportes/feed'),
(u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
(u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
(u'Opinión', u'http://www.elespectador.com/opinion/feed'),
(u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]

View File

@ -1,28 +0,0 @@
# coding=utf-8
# https://github.com/iemejia/calibrecolombia
'''
http://www.elmalpensante.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElMalpensante(BasicNewsRecipe):
title = u'El Malpensante'
language = 'es_CO'
__author__ = 'Ismael Mejia <iemejia@gmail.com>'
cover_url = 'http://elmalpensante.com/img/layout/logo.gif'
description = 'El Malpensante'
oldest_article = 30
simultaneous_downloads = 20
use_embedded_content = True
remove_empty_feeds = True
max_articles_per_feed = 100
feeds = [(u'Artículos', u'http://www.elmalpensante.com/articulosRSS.php'),
(u'Malpensantías',
u'http://www.elmalpensante.com/malpensantiasRSS.php'),
(u'Margaritas', u'http://www.elmalpensante.com/margaritasRSS.php'),
# This one is almost the same as articulos so we leave articles
# (u'Noticias', u'http://www.elmalpensante.com/noticiasRSS.php'),
]

View File

@ -1,34 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1313609361(BasicNewsRecipe):
news = True
title = u'El Mostrador'
__author__ = 'Alex Mitrani'
description = u'Chilean online newspaper'
publisher = u'La Plaza S.A.'
category = 'news, rss'
oldest_article = 7
max_articles_per_feed = 100
summary_length = 1000
language = 'es_CL'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elmostrador.cl/assets/img/logo-elmostrador-m.jpg'
remove_tags_before = dict(name='div', attrs={'class': 'news-heading cf'})
remove_tags_after = dict(name='div', attrs={'class': 'footer-actions cf'})
remove_tags = [dict(name='div', attrs={'class': 'footer-actions cb cf'}), dict(name='div', attrs={'class': 'news-aside fl'}), dict(name='div', attrs={'class': 'footer-actions cf'}), dict(name='div', attrs={'class': 'user-bar', 'id': 'top'}), dict(name='div', attrs={'class': 'indicators'}), dict(name='div', attrs={'id': 'header'}) # noqa
]
feeds = [
(u'Temas Destacados', u'http://www.elmostrador.cl/destacado/feed/'),
(u'El D\xeda', u'http://www.elmostrador.cl/dia/feed/'),
(u'Pa\xeds', u'http://www.elmostrador.cl/noticias/pais/feed/'),
(u'Mundo', u'http://www.elmostrador.cl/noticias/mundo/feed/'),
(u'Negocios', u'http://www.elmostrador.cl/noticias/negocios/feed/'),
(u'Cultura', u'http://www.elmostrador.cl/noticias/cultura/feed/'),
(u'Vida en L\xednea', u'http://www.elmostrador.cl/vida-en-linea/feed/'),
(u'Opini\xf3n & Blogs', u'http://www.elmostrador.cl/opinion/feed/')
]

View File

@ -1,53 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElMundo02(BasicNewsRecipe):
title = u'Periódico El Mundo'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
remove_tags_before = dict(id='miga_pan')
remove_tags_after = [
dict(name='div', attrs={'class': 'cuadro_opciones_new1'})]
remove_tags = [dict(name='div', attrs={'class': 'ruta'}),
dict(name='div', attrs={'class': 'buscador'}),
dict(name='div', attrs={'class': 'iconos'}),
dict(name='div', attrs={'class': 'otros_iconos'}),
dict(name='div', attrs={'class': 'cuadro_opciones_new1'}),
dict(name='div', attrs={'class': 'otras_noticias'}),
dict(name='div', attrs={'class': 'notas_relacionadas'}),
dict(name='div', attrs={'id': 'lateral_2'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Opinión', u'http://www.elmundo.com/images/rss/opinion.xml'),
(u'Economía', u'http://www.elmundo.com/images/rss/noticias_economia.xml'),
(u'Deportes', u'http://www.elmundo.com/images/rss/deportes.xml'),
(u'Política ', u'http://www.elmundo.com/images/rss/noticias_politica.xml'),
(u'Antioquia', u'http://www.elmundo.com/images/rss/noticias_antioquia.xml'),
(u'Nacional ', u'http://www.elmundo.com/images/rss/noticias_nacional.xml'),
(u'Internacional',
u'http://www.elmundo.com/images/rss/noticias_internacional.xml'),
(u'Servicios Públicos',
u'http://www.elmundo.com/images/rss/noticias_servicios_publicos.xml'),
(u'Infraestructura',
u'http://www.elmundo.com/images/rss/noticias_infraestructura.xml'),
(u'Mobilidad', u'http://www.elmundo.com/images/rss/noticias_movilidad.xml'),
(u'Derechos Humanos',
u'http://www.elmundo.com/images/rss/noticias_derechos_humanos.xml'),
(u'Vida', u'http://www.elmundo.com/images/rss/vida.xml'),
(u'Cultura', u'http://www.elmundo.com/images/rss/cultura.xml')]

View File

@ -1,65 +0,0 @@
#!/usr/bin/env python
##
# Last Edited: 2018-02-13 Carlos Alves <carlosalves90@gmail.com>
##
__license__ = 'GPL v3'
__author__ = '2010, Yuri Alvarez<me at yurialvarez.com>'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
'''
elobservador.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = 'El Observador'
__author__ = 'yrvn'
description = 'Noticias desde Uruguay'
tags = 'news, sports, entretainment'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [
dict(name='h1', attrs={'class': 'detail-title newDetailTextChange'}),
dict(name='div', attrs={'class': 'cuerpo air newDetailTextChange'})
]
remove_tags = [
dict(name='div', attrs={
'class': ['fecha', 'copyright', 'story_right']}),
dict(name='div', attrs={'class': ['photo', 'social']}),
dict(name='div', attrs={'id': 'widget'}),
dict(name=['object', 'link'])
]
remove_attributes = ['width', 'height', 'style', 'font', 'color']
extra_css = '''
h1{font-family: Georgia,"Times New Roman",Times,serif}
h3{font-family: Georgia,"Times New Roman",Times,serif}
h2{font-family: Georgia,"Times New Roman",Times,serif}
p{font-family: Verdana,Arial,Helvetica,sans-serif}
body{font-family: Verdana,Arial,Helvetica,sans-serif}
img{margin-bottom: 0.4em; display:block;}
'''
feeds = [
(u'Portada', u'http://www.elobservador.com.uy/rss/home.xml'),
]
def get_cover_url(self):
return None
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,121 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '04 December 2010, desUBIKado'
__author__ = 'desUBIKado'
__description__ = 'Daily newspaper from Aragon'
__version__ = 'v0.10'
__date__ = '09, September 2017'
'''
elperiodicodearagon.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class elperiodicodearagon(BasicNewsRecipe):
title = u'El Periodico de Aragon'
__author__ = u'desUBIKado'
description = u'Noticias desde Aragon'
publisher = u'elperiodicodearagon.com'
category = u'news, politics, Spain, Aragon'
oldest_article = 1
delay = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'es'
masthead_url = 'http://pdf.elperiodicodearagon.com/img/logotipo.gif'
encoding = 'iso-8859-1'
remove_empty_feeds = True
remove_javascript = True
feeds = [
(u'Portada', u'http://zetaestaticos.com/aragon/rss/portada_es.xml'),
(u'Arag\xf3n', u'http://zetaestaticos.com/aragon/rss/2_es.xml'),
(u'Internacional', u'http://zetaestaticos.com/aragon/rss/4_es.xml'),
(u'Espa\xf1a', u'http://zetaestaticos.com/aragon/rss/3_es.xml'),
(u'Econom\xeda', u'http://zetaestaticos.com/aragon/rss/5_es.xml'),
(u'Deportes', u'http://zetaestaticos.com/aragon/rss/7_es.xml'),
(u'Real Zaragoza', u'http://zetaestaticos.com/aragon/rss/10_es.xml'),
(u'Tecnyconta Zaragoza', u'http://zetaestaticos.com/aragon/rss/91_es.xml'),
(u'Monta\xf1ismo', u'http://zetaestaticos.com/aragon/rss/354_es.xml'),
(u'Opini\xf3n', u'http://zetaestaticos.com/aragon/rss/103_es.xml'),
(u'Tema del d\xeda', u'http://zetaestaticos.com/aragon/rss/102_es.xml'),
(u'Escenarios', u'http://zetaestaticos.com/aragon/rss/105_es.xml'),
(u'Sociedad', u'http://zetaestaticos.com/aragon/rss/104_es.xml'),
(u'Gente', u'http://zetaestaticos.com/aragon/rss/330_es.xml'),
(u'Espacio 3', u'http://zetaestaticos.com/aragon/rss/328_es.xml'),
(u'Fiestas del Pilar', u'http://zetaestaticos.com/aragon/rss/107_es.xml'),
(u'Semana Santa', u'http://zetaestaticos.com/aragon/rss/385_es.xml'), (
u'La crónica de Valdejal\xf3n',
u'http://zetaestaticos.com/aragon/rss/206_es.xml'
), (
u'La crónica de Campo de Borja',
u'http://zetaestaticos.com/aragon/rss/208_es.xml'
), (
u'La crónica de Ejea y sus pueblos',
u'http://zetaestaticos.com/aragon/rss/212_es.xml'
), (
u'La crónica del Bajo Gállego',
u'http://zetaestaticos.com/aragon/rss/205_es.xml'
), (
u'La crónica del Campo de Cariñena',
u'http://zetaestaticos.com/aragon/rss/207_es.xml'
), (
u'La crónica de la Ribera Alta del Ebro',
u'http://zetaestaticos.com/aragon/rss/211_es.xml'
), (
u'La crónica del Campo de Belchite',
u'http://zetaestaticos.com/aragon/rss/331_es.xml'
)
]
remove_tags_before = dict(name='div', attrs={'class': 'Pagina'})
remove_tags_after = dict(name='div', attrs={'class': 'ComentariosNew'})
keep_only_tags = [dict(name='div', attrs={'class': 'Pagina'})]
remove_tags = [
dict(
name='nav',
attrs={'class': ['Compartir', 'HerramientasConversacion Herramientas']}
),
dict(name='h5', attrs={'class': ['CintilloBox']}),
dict(
name='div',
attrs={
'class': [
'BoxMenu BoxMenuConFoto', 'BxGalerias', 'ConStick',
'HerramientasComentarioNew Herramientas', 'NumeroComentarioNew'
]
}
),
dict(
name='div',
attrs={
'class': [
'BoxPestanas', 'Box', 'ColumnaDerecha',
'NoticiasRelacionadasDeNoticia',
'CintilloNoticiasRelacionadasDeNoticia'
]
}
),
dict(name='a', attrs={'class': ['IrA BotonLink']})
]
# Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)
def get_cover_url(self):
index = 'http://pdf.elperiodicodearagon.com/edicion.php'
soup = self.index_to_soup(index)
for image in soup.findAll('img', src=True):
if image['src'].startswith('/funciones/img-public.php?key='):
return 'http://pdf.elperiodicodearagon.com' + image['src']
return None
extra_css = '''
h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:28px;}
h2 {font-family:Arial,Helvetica,sans-serif; font-style:italic;font-size:14px;color:#4D4D4D;}
h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
'''

View File

@ -1,46 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Gerardo Diez'
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
__docformat__ = 'restructuredtext en'
'''
publico.es
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Publico(BasicNewsRecipe):
title = u'Publico.es'
__author__ = 'Gerardo Diez'
publisher = u'Mediapubli Sociedad de Publicaciones y Ediciones S.L.'
category = 'news, politics, finances, world, spain, science, catalunya'
oldest_article = 1
max_articles_per_feed = 100
simultaneous_downloads = 10
cover_url = u'http://imagenes.publico.es/css/img/logo_publico.gif'
timefmt = '[%a, %d %b, %Y]'
encoding = 'utf8'
language = 'es'
remove_javascript = True
no_stylesheets = True
keep_only_tags = dict(id='main')
remove_tags = [
dict(name='div', attrs={
'class': ['Noticias_642x50', 'contInfo ancho']}),
dict(name='ul', attrs={'class': ['navComentarios', 'comentarios']}),
dict(name='div', attrs={
'id': ['commentsContext', 'toolbar', 'comentarios']}),
dict(name='h5', attrs={'id': 'comentarios'})
]
feeds = [(u'Internacional', u'http://www.publico.es/estaticos/rss/internacional'),
(u'Espa\xf1a', u'http://www.publico.es/estaticos/rss/espana'),
(u'Dinero', u'http://www.publico.es/estaticos/rss/dinero'),
(u'Ciencias', u'http://www.publico.es/estaticos/rss/ciencias'),
(u'Culturas', u'http://www.publico.es/estaticos/rss/culturas'),
(u'Deportes', u'http://www.publico.es/estaticos/rss/deportes'),
(u'Televisi\xf3n y Gente',
u'http://www.publico.es/estaticos/rss/televisionygente'),
(u'Catalu\xf1a', u'http://www.publico.es/estaticos/rss/catalunya'),
(u'Viajes', u'http://www.publico.es/estaticos/rss/viajes')]

View File

@ -1,51 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElTiempo02(BasicNewsRecipe):
title = u'Periódico el Tiempo'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class': 'modulo reporte'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
remove_tags = [dict(name='div', attrs={'class': 'social-media'}),
dict(name='div', attrs={'class': 'recomend-art'}),
dict(name='div', attrs={'class': 'caja-facebook'}),
dict(name='div', attrs={'class': 'caja-twitter'}),
dict(name='div', attrs={'class': 'caja-buzz'}),
dict(name='div', attrs={'class': 'ico-mail2'}),
dict(name='div', attrs={'id': 'caja-instapaper'}),
dict(name='div', attrs={'class': 'modulo herramientas'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Colombia', u'http://www.eltiempo.com/colombia/rss.xml'),
(u'Medellin', u'http://www.eltiempo.com/colombia/medellin/rss.xml'),
(u'Economia', u'http://www.eltiempo.com/economia/rss.xml'),
(u'Deportes', u'http://www.eltiempo.com/deportes/rss.xml'),
(u'Mundo', u'http://www.eltiempo.com/mundo/rss.xml'),
(u'Gente', u'http://www.eltiempo.com/gente/rss.xml'),
(u'Vida de Hoy', u'http://www.eltiempo.com/vida-de-hoy/rss.xml'),
(u'EEUU', u'http://www.eltiempo.com/mundo/estados-unidos/rss.xml'),
(u'LatinoAmerica', u'http://www.eltiempo.com/mundo/latinoamerica/rss.xml'),
(u'Europa', u'http://www.eltiempo.com/mundo/europa/rss.xml'),
(u'Medio Oriente', u'http://www.eltiempo.com/mundo/medio-oriente/rss.xml'),
(u'Vive in Medellin', u'http://medellin.vive.in/medellin/rss.xml'),
(u'Don Juan', u'http://www.revistadonjuan.com/feedrss/'),
(u'Alo', u'http://www.eltiempo.com/alo/rss.xml')]

View File

@ -1,49 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2016, Darko Miletic <darko.miletic at gmail.com>'
'''
eluniversal.com.mx
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElUniversal(BasicNewsRecipe):
title = 'El Universal'
__author__ = 'Darko Miletic'
description = ('Sitio líder de noticias minuto x minuto de México y el mundo, con información sobre política,'
' ciudad; videos, interactividad, opinión, blogs')
oldest_article = 1
max_articles_per_feed = 100
publisher = 'El Universal'
category = 'news, politics, Mexico'
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
auto_cleanup = True
auto_cleanup_keep = ("//div[contains(concat(' ', normalize-space(@class), ' '),"
" ' field-name-field-resumen ')] | //div[@class='fechap'] |"
" //div[@class='hora'] | //div[contains(concat(' ', normalize-space(@class), ' '), ' field-name-field-fuente ')]")
remove_javascript = True
remove_empty_feeds = True
ignore_duplicate_articles = {'url'}
publication_type = 'newspaper'
language = 'es_MX'
extra_css = '''
body{font-family: Roboto, sans-serif}
.h1{font-family: "Duplicate Ionic Bold", serif}
.field-name-field-resumen{font-family: "Duplicate Ionic Light", serif; display: block; font-size: large;}
'''
feeds = [
(u'Nacion', u'http://www.eluniversal.com.mx/seccion/1/rss.xml'),
(u'Mundo', u'http://www.eluniversal.com.mx/seccion/5/rss.xml'),
(u'Metropoli', u'http://www.eluniversal.com.mx/seccion/6/rss.xml'),
(u'Estados', u'http://www.eluniversal.com.mx/seccion/13/rss.xml'),
(u'Cartera', u'http://www.eluniversal.com.mx/seccion/14/rss.xml'),
(u'Deportes', u'http://www.eluniversal.com.mx/seccion/15/rss.xml'),
(u'Espectaculos', u'http://www.eluniversal.com.mx/seccion/133/rss.xml'),
(u'Cultura', u'http://www.eluniversal.com.mx/seccion/17/rss.xml'),
(u'Ciencia y salud', u'http://www.eluniversal.com.mx/seccion/16/rss.xml'),
(u'Techbit', u'http://www.eluniversal.com.mx/seccion/5782/rss.xml'),
(u'Periodismo de investigacion', u'http://www.eluniversal.com.mx/seccion/11363/rss.xml')
]

View File

@ -1,58 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.clubdelebook.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElClubDelEbook(BasicNewsRecipe):
title = 'El club del ebook'
__author__ = 'Darko Miletic'
description = 'El Club del eBook, es la primera fuente de informacion sobre ebooks de Argentina. Aca vas a encontrar noticias, tips, tutoriales, recursos y opiniones sobre el mundo de los libros electronicos.' # noqa
tags = 'ebook, libro electronico, e-book, ebooks, libros electronicos, e-books'
oldest_article = 7
max_articles_per_feed = 100
language = 'es_AR'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
masthead_url = 'http://dl.dropbox.com/u/2845131/elclubdelebook.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
img{ margin-bottom: 0.8em;
border: 1px solid #333333;
padding: 4px; display: block
}
"""
conversion_options = {
'comment': description, 'tags': tags, 'publisher': title, 'language': language
}
remove_tags = [dict(attrs={'id': 'crp_related'})]
remove_tags_after = dict(attrs={'id': 'crp_related'})
feeds = [(u'Articulos', u'http://feeds.feedburner.com/ElClubDelEbook')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img', alt=False):
item['alt'] = 'image'
return soup

View File

@ -1,35 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
elcomercio.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElComercio(BasicNewsRecipe):
title = 'El Comercio '
__author__ = 'Darko Miletic'
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
publisher = 'GRUPO EL COMERCIO C.A.'
category = 'news, Ecuador, politics'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = True
language = 'es_EC'
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_attributes = ['width', 'height']
feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,56 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
ele.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Ele(BasicNewsRecipe):
title = u'Ele'
__author__ = u'Silviu Cotoar\u0103'
description = u'Dezv\u0103luie ceea ce e\u015fti'
publisher = u'Ele'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Femei'
encoding = 'utf-8'
cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='h1', attrs={'class': 'article_title'}), dict(
name='div', attrs={'class': 'article_text'})
]
feeds = [
(u'Feeds', u'http://www.ele.ro/rss_must_read')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,61 +0,0 @@
##########################################################################
# Description: http://es.hu/ RSS channel
# Author: Bigpapa (bigpapabig@hotmail.com)
# Date: 2012.01.20. - V1.2
##########################################################################
from calibre.web.feeds.recipes import BasicNewsRecipe
class elet_es_irodalom(BasicNewsRecipe):
title = u'\u00c9let \u00e9s Irodalom'
__author__ = 'Bigpapa'
oldest_article = 7
# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
max_articles_per_feed = 30
no_stylesheets = True
use_embedded_content = False
encoding = 'iso-8859-2'
category = 'Cikkek'
language = 'hu'
publication_type = 'newsportal'
extra_css = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
needs_subscription = 'optional'
masthead_url = 'http://www.es.hu/images/logo.jpg'
timefmt = ' [%Y %b %d, %a]'
# Nem ide a kódba kell beleírni a hozzáférés adatait, hanem azt akkor adod
# meg, ha le akarod tölteni!
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('http://www.es.hu/')
br.select_form(name='userfrmlogin')
br['cusername'] = self.username
br['cpassword'] = self.password
br.submit()
return br
keep_only_tags = [
dict(name='div', attrs={'class': ['doc_author', 'doc_title', 'doc']})
]
remove_tags = [
dict(name='a', attrs={'target': ['_TOP']}),
dict(name='div', attrs={'style': [
'float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
]
feeds = [
(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
(u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
]

View File

@ -1,90 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010-2014, Darko Miletic <darko.miletic at gmail.com>'
'''
elpais.com
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class ElPais_RSS(BasicNewsRecipe):
title = u'El País'
__author__ = 'Darko Miletic'
description = u'Noticias de última hora sobre la actualidad en España y el mundo: política, economía, deportes, cultura, sociedad, tecnología, gente, opinión, viajes, moda, televisión, los blogs y las firmas de EL PAÍS. Además especiales, vídeos, fotos, audios, gráficos, entrevistas, promociones y todos los servicios de EL PAÍS.' # noqa
publisher = 'EDICIONES EL PAIS, S.L.'
category = 'news, politics, finances, world, spain'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'es'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://ep01.epimg.net/iconos/v1.x/v1.0/logos/cabecera_portada.png'
cover_url = strftime(
'http://srv00.epimg.net/pdf/elpais/1aPagina/%Y/%m/ep-%Y%m%d.pdf')
extra_css = """
h1{font-family: Georgia,"Times New Roman",Times,serif }
#subtitulo_noticia, .firma, .figcaption{font-size: small}
body{font-family: Arial,Helvetica,Garuda,sans-serif}
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = [
dict(attrs={'id': ['titulo_noticia', 'subtitulo_noticia']}), dict(
attrs={'class': ['firma', 'columna_texto', 'entrevista_p_r']})
]
remove_tags = [
dict(name=['iframe', 'embed', 'object']), dict(
attrs={'class': 'disposicion_vertical'})
]
feeds = [
(u'Lo ultimo', u'http://ep00.epimg.net/rss/tags/ultimas_noticias.xml'),
(u'America Latina', u'http://elpais.com/tag/rss/latinoamerica/a/'),
(u'Mexico', u'http://elpais.com/tag/rss/mexico/a/'),
(u'Europa', u'http://elpais.com/tag/rss/europa/a/'),
(u'Estados Unidos', u'http://elpais.com/tag/rss/estados_unidos/a/'),
(u'Oriente proximo', u'http://elpais.com/tag/rss/oriente_proximo/a/'),
(u'Andalucia', u'http://ep00.epimg.net/rss/ccaa/andalucia.xml'),
(u'Catalunia', u'http://ep00.epimg.net/rss/ccaa/catalunya.xml'),
(u'Comunidad Valenciana', u'http://ep00.epimg.net/rss/ccaa/valencia.xml'),
(u'Madrid', u'http://ep00.epimg.net/rss/ccaa/madrid.xml'),
(u'Pais Vasco', u'http://ep00.epimg.net/rss/ccaa/paisvasco.xml'),
(u'Galicia', u'http://ep00.epimg.net/rss/ccaa/galicia.xml'),
(u'Sociedad', u'http://ep00.epimg.net/rss/sociedad/portada.xml'),
(u'Deportes', u'http://ep00.epimg.net/rss/deportes/portada.xml'),
(u'Cultura', u'http://ep00.epimg.net/rss/cultura/portada.xml'),
(u'Cine', u'http://elpais.com/tag/rss/cine/a/'),
(u'Economía', u'http://elpais.com/tag/rss/economia/a/'),
(u'Literatura', u'http://elpais.com/tag/rss/libros/a/'),
(u'Musica', u'http://elpais.com/tag/rss/musica/a/'),
(u'Arte', u'http://elpais.com/tag/rss/arte/a/'),
(u'Medio Ambiente', u'http://elpais.com/tag/rss/medio_ambiente/a/'),
(u'Tecnologia', u'http://ep01.epimg.net/rss/tecnologia/portada.xml'),
(u'Ciencia', u'http://ep00.epimg.net/rss/tags/c_ciencia.xml'),
(u'Salud', u'http://elpais.com/tag/rss/salud/a/'),
(u'Ocio', u'http://elpais.com/tag/rss/ocio/a/'),
(u'Justicia y Leyes', u'http://elpais.com/tag/rss/justicia/a/'),
(u'Guerras y conflictos', u'http://elpais.com/tag/rss/conflictos/a/'),
(u'Politica', u'http://ep00.epimg.net/rss/politica/portada.xml'),
(u'Opinion', u'http://ep01.epimg.net/rss/elpais/opinion.xml')
]
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url and ('/album/' not in url and '/futbol/partido/' not in url):
return url
self.log('Skipping non-article', url)
return None
def preprocess_raw_html(self, raw, url):
return '<html><head><title>Untitled</title>' + raw[raw.find('</head>'):]

View File

@ -1,64 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
elsevier.nl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe):
title = 'Elsevier.nl'
__author__ = 'Darko Miletic'
description = 'News from Holland'
publisher = 'elsevier.nl'
category = 'news, politics, Holland'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'nl'
country = 'NL'
remove_empty_feeds = True
masthead_url = 'http://www.elsevier.nl/static/elsevier/stdimg/logo.gif'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = dict(attrs={'id': 'artikel_container'})
remove_tags_before = dict(attrs={'id': 'breadcrumb_container'})
remove_tags_after = dict(attrs={'class': 'author_link'})
remove_tags = [
dict(attrs={'id': 'breadcrumb_container'}), dict(
name='div', attrs={'class': 'pullout_vak'})
]
remove_attributes = ['width', 'height']
feeds = [
(u'Laatste nieuws', u'http://www.elsevier.nl/web/RSS/Homepage-RSS.htm?output=xml'),
(u'Nederland', u'http://www.elsevier.nl/web/RSS/Nederland-RSS.htm?output=xml'),
(u'Politiek', u'http://www.elsevier.nl/web/RSS/Politiek-RSS.htm?output=xml'),
(u'Europese Unie', u'http://www.elsevier.nl/web/RSS/Europese-Unie-RSS.htm?output=xml'),
(u'Buitenland', u'http://www.elsevier.nl/web/RSS/Buitenland-RSS.htm?output=xml'),
(u'Economie', u'http://www.elsevier.nl/web/RSS/Economie-RSS.htm?output=xml'),
(u'Wetenschap', u'http://www.elsevier.nl/web/RSS/Wetenschap-RSS.htm?output=xml'),
(u'Cultuur & Televisie', u'http://www.elsevier.nl/web/RSS/Cultuur-Televisie-RSS.htm?output=xml'),
(u'Society', u'http://www.elsevier.nl/web/RSS/Society-RSS.htm?output=xml'),
(u'Internet&/Gadgets', u'http://www.elsevier.nl/web/RSS/Internet-Gadgets-RSS.htm?output=xml'),
(u'Comentaren', u'http://www.elsevier.nl/web/RSS/Commentaren-RSS.htm?output=xml')
]
def print_version(self, url):
return url + '?print=true'
def get_article_url(self, article):
return article.get('guid', None).rpartition('?')[0]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,54 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.tiempo.hn
'''
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ElTiempoHn(BasicNewsRecipe):
title = 'El Tiempo - Honduras'
__author__ = 'Darko Miletic'
description = 'Noticias de Honduras y mundo'
publisher = 'El Tiempo'
category = 'news, politics, Honduras'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = 'es_HN'
lang = 'es-HN'
direction = 'ltr'
remove_tags = [dict(name=['form', 'object', 'embed', 'base'])]
keep_only_tags = [dict(name='td', attrs={'id': 'mainbodycont'})]
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -1,61 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
'''
www.eluniversal.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class ElUniversal(BasicNewsRecipe):
title = 'El Universal'
__author__ = 'Darko Miletic'
description = 'Noticias de Venezuela y el mundo. Avances informativos de ultimo minuto. Incluye secciones de politica, deportes, economia y mas.'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
encoding = 'cp1252'
publisher = 'El Universal'
category = 'news, Caracas, Venezuela, world'
language = 'es_VE'
publication_type = 'newspaper'
masthead_url = 'http://cdn.eluniversal.com/images/eu4/back/logo-eluniversal.gif'
cover_url = 'http://images.eluniversal.com//pdf/primeraPlana.pdf'
extra_css = """
.txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small}
.txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray}
.txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large}
.txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large}
body{font-family: Verdana,Arial,Helvetica,sans-serif}
"""
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
remove_tags_before = dict(attrs={'class': 'header-print MB10'})
remove_tags_after = dict(attrs={'id': 'SizeText'})
remove_tags = [
dict(name=['object', 'link', 'script', 'iframe', 'meta']), dict(
attrs={'class': 'header-print MB10'})
]
feeds = [
(u'Ultimas Noticias', u'http://www.eluniversal.com/rss/avances.xml'),
(u'Economia', u'http://www.eluniversal.com/rss/eco_avances.xml'),
(u'Internacionales', u'http://www.eluniversal.com/rss/int_avances.xml'),
(u'Deportes', u'http://www.eluniversal.com/rss/dep_avances.xml'),
(u'Cultura', u'http://www.eluniversal.com/rss/cul_avances.xml'),
(u'Nacional y politica', u'http://www.eluniversal.com/rss/pol_avances.xml'),
(u'Ciencia y tecnologia', u'http://www.eluniversal.com/rss/cyt_avances.xml'),
(u'Universo empresarial', u'http://www.eluniversal.com/rss/uni_avances.xml'),
(u'Caracas', u'http://www.eluniversal.com/rss/ccs_avances.xml')
]
def print_version(self, url):
return url + '-imp'
def get_article_url(self, article):
return article.get('guid', None)

View File

@ -1,61 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
eluniverso.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElUniverso_Ecuador(BasicNewsRecipe):
title = 'El Universo - Ecuador'
__author__ = 'Darko Miletic'
description = 'Noticias del Ecuador y el resto del mundo'
publisher = 'El Universo'
category = 'news, politics, Ecuador'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'es_EC'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
extra_css = """
body{font-family: Verdana,Arial,Helvetica,sans-serif; color: #333333 }
h2{font-family: Georgia,"Times New Roman",Times,serif; color: #1B2D60}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
dict(attrs={'class': ['flechs', 'multiBox', 'colRecursos']}), dict(
name=['meta', 'link', 'embed', 'object', 'iframe', 'base'])
]
keep_only_tags = [dict(attrs={'class': 'Nota'})]
remove_tags_after = dict(attrs={'id': 'TextoPrint'})
remove_tags_before = dict(attrs={'id': 'FechaPrint'})
feeds = [
(u'Portada', u'http://www.eluniverso.com/rss/portada.xml'),
(u'Politica', u'http://www.eluniverso.com/rss/politica.xml'),
(u'Economia', u'http://www.eluniverso.com/rss/economia.xml'),
(u'Sucesos', u'http://www.eluniverso.com/rss/sucesos.xml'),
(u'Migracion', u'http://www.eluniverso.com/rss/migrantes_tema.xml'),
(u'El Pais', u'http://www.eluniverso.com/rss/elpais.xml'),
(u'Internacionales', u'http://www.eluniverso.com/rss/internacionales.xml'),
(u'Deportes', u'http://www.eluniverso.com/rss/deportes.xml'),
(u'Gran Guayaquill', u'http://www.eluniverso.com/rss/gran_guayaquil.xml'),
(u'Entretenimiento', u'http://www.eluniverso.com/rss/arteyespectaculos.xml'),
(u'Vida', u'http://www.eluniverso.com/rss/tuvida.xml'),
(u'Opinion', u'http://www.eluniverso.com/rss/opinion.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,43 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
emg.rs/en/news
'''
from calibre.web.feeds.news import BasicNewsRecipe
class emportal_en(BasicNewsRecipe):
title = 'Ekonom:east News'
__author__ = 'Darko Miletic'
description = 'Daily business news from Serbia.'
publisher = 'Ekonom:east Media Group'
category = 'Business, SEE, Serbia, Belgrade, news, Ekonomist, EMportal'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
masthead_url = 'http://www.emg.rs/img/emportal-rss.png'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } '
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [dict(attrs={'class': ['text-share']})]
keep_only_tags = [dict(attrs={'class': 'text'})]
remove_tags_after = dict(attrs={'class': 'text-share'})
remove_attributes = ['width', 'height']
feeds = [(u'Serbia', u'http://www.emg.rs/en/news/serbia/rss.xml')]
def print_version(self, url):
return url.replace('.html', '.print.html')
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,64 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1341650280(BasicNewsRecipe):
title = u'Empire Magazine'
description = 'Author D.Asbury. Film articles from Empire Mag. '
language = 'en'
__author__ = 'Dave Asbury'
# last updated 7/7/12
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
max_articles_per_feed = 20
cover_url = 'http://www.empireonline.com/images/magazine/cover.jpg'
conversion_options = {
'linearize_tables': True,
}
preprocess_regexps = [
(re.compile(r'<a href="http://twitter.com/share.*?</a>',
re.IGNORECASE | re.DOTALL), lambda match: ''),
(re.compile(r'<head>.*?<!-- CONTENT: START -->', re.IGNORECASE |
re.DOTALL), lambda match: '<head></head><!-- CONTENT: START -->'),
(re.compile(r'<!-- LATEST NEWS HEADLINES: START -->.*?<!-- LATEST NEWS HEADLINES: END -->', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- LATEST NEWS HEADLINES: START --><!-- LATEST NEWS HEADLINES: END -->'),
(re.compile(r'<!-- RELATED FUTURE FILMS: START -->.*?<!-- RELATED FUTURE FILMS: END -->', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- RELATED FUTURE FILMS: START --><!-- RELATED FUTURE FILMS: END -->'),
(re.compile(r'<!-- CURRENT HIGHLIGHTS: START-->.*?<!-- CURRENT HIGHLIGHTS: END -->', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- CURRENT HIGHLIGHTS: START--><!-- CURRENT HIGHLIGHTS: END -->'),
(re.compile(r'<!-- RELATED REVIEWS: START -->.*?<!-- RELATED REVIEWS: END -->', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- RELATED REVIEWS: START --><!-- RELATED REVIEWS: END -->'),
(re.compile(r'<!-- RELATED INTERVIEWS -->.*?<!-- RELATED REVIEWS: END -->', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- RELATED INTERVIEWS --><!-- RELATED REVIEWS: END -->'),
(re.compile(r'<!-- CONTENT: END -->.*?</body>', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- CONTENT: END --></body>'),
(re.compile(r'<!-- STORY: END -->.*?</body>', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- STORY: END --></body>'),
(re.compile(r'<!-- RATINGS GUIDE: START-->.*?<!-- RATINGS GUIDE: END-->', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- RATINGS GUIDE: START--><!-- RATINGS GUIDE: END-->'),
(re.compile(r'<strong>SUBSCRIBE TO EMPIRE</strong>.*?</tbody>',
re.IGNORECASE | re.DOTALL), lambda match: '</tbody>'),
(re.compile(r'<!-- USER REVIEWS: START -->.*?<!-- USER REVIEWS: END -->', re.IGNORECASE |
re.DOTALL), lambda match: '<!-- USER REVIEWS: START --><!-- USER REVIEWS: END -->'),
(re.compile(r'Advertisement', re.IGNORECASE | re.DOTALL), lambda match: ''),
(re.compile(r'<a name="haveyoursay".*?now to have your say.',
re.IGNORECASE | re.DOTALL), lambda match: ''),
]
keep_only_tags = [
# dict(name='h1'),
# dict(attrs={'class' : 'mediumblack'}),
]
remove_tags = [dict(name='td', attrs={'width': '200', 'valign': 'top'}),
dict(name='b'),
dict(name='a', attrs={'name': 'haveyoursay'}),
dict(attrs={'class': 'newslink'}),
]
feeds = [(u'News', u'http://feed43.com/7338478755673147.xml'),
(u'Recent Features', u'http://feed43.com/4346347750304760.xml'),
(u'Interviews', u'http://feed43.com/3418350077724081.xml'),
(u'Film Reviews', u'http://feed43.com/2643703076510627.xml'),
]

View File

@ -1,39 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
'''
japan.engadget.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class EndgadgetJapan(BasicNewsRecipe):
title = u'Endgadget\u65e5\u672c\u7248'
language = 'ja'
__author__ = 'Hiroshi Miura'
cover_url = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg'
masthead_url = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
language = 'ja'
encoding = 'utf-8'
index = 'http://japanese.engadget.com/'
remove_javascript = True
remove_tags_before = dict(name="header", attrs={'class': "header"})
remove_tags_after = dict(name='div', attrs={'class': 'post-meta'})
def parse_index(self):
feeds = []
newsarticles = []
soup = self.index_to_soup(self.index)
for topstories in soup.findAll('header', attrs={'class': 'post-header'}):
itt = topstories.find('h2')
itema = itt.find('a', href=True)
itemtime = topstories.find('span', attrs={'class': 'time'})
newsarticles.append({
'title': itema.string, 'date': itemtime.string, 'url': itema['href'], 'description': ''
})
feeds.append(('Latest Posts', newsarticles))
return feeds

View File

@ -1,66 +0,0 @@
#!/usr/bin/env python
from calibre.web.feeds.news import BasicNewsRecipe
class EOSWetenschap(BasicNewsRecipe):
title = u'EOS Wetenschap'
__author__ = u'erkfuizfeuadjfjzefzfuzeff'
description = u'Wetenschapsnieuws'
oldest_article = 7
language = 'nl'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
keep_only_tags = [
dict(name='title'),
dict(name='h1'),
dict(name='img'),
dict(name='p'),
]
remove_tags = []
feeds = [
(u'Alle nieuwsberichten', u'http://eoswetenschap.eu/rss/artikels/all'),
(u'Gezondheid', u'http://eoswetenschap.eu/rss/artikels/Gezondheid'),
(u'Geneeskunde', u'http://eoswetenschap.eu/artikels/Geneeskunde'),
(u'Voeding', u'http://eoswetenschap.eu/rss/artikels/Voeding'),
(u'Sport', u'http://eoswetenschap.eu/rss/artikels/Sport'), (
u'Natuur & Milieu',
u'http://eoswetenschap.eu/rss/artikels/Natuur%20en%20Milieu'
), (u'Energie', u'http://eoswetenschap.eu/rss/artikels/Energie'), (
u'Klimaatverandering',
u'http://eoswetenschap.eu/rss/artikels/Klimaatverandering'
), (u'Natuur', u'http://eoswetenschap.eu/rss/artikels/Natuur'),
(u'Gedrag', u'http://eoswetenschap.eu/rss/artikels/Gedrag'),
(u'Psychologie', u'http://eoswetenschap.eu/rss/artikels/Psychologie'), (
u'Hersenwetenschap',
u'http://eoswetenschap.eu/rss/artikels/Hersenwetenschap'
), (u'Sociologie', u'http://eoswetenschap.eu/rss/artikels/Sociologie'), (
u'Fundamenteel onderzoek',
u'http://eoswetenschap.eu/rss/artikels/Onderzoek'
), (
u'Natuur- en wiskunde',
u'http://eoswetenschap.eu/rss/artikels/Natuur-%20en%20wiskunde'
), (u'Genetica', u'http://eoswetenschap.eu/rss/artikels/Genetica'),
(u'Chemie', u'http://eoswetenschap.eu/rss/artikels/Chemie'),
(u'Technologie', u'http://eoswetenschap.eu/rss/artikels/Technologie'),
(u'Biotechnologie', u'http://eoswetenschap.eu/rss/artikels/Biotechnologie'),
(
u'Nanotechnologie',
u'http://eoswetenschap.eu/rss/artikels/Nanotechnologie'
), (u'ICT', u'http://eoswetenschap.eu/rss/artikels/Internet'),
(u'Mobiliteit', u'http://eoswetenschap.eu/artikels/Mobiliteit'),
(u'Geschiedenis', u'http://eoswetenschap.eu/rss/artikels/Historisch'), (
u'Archeologie- en paleontologie',
u'http://eoswetenschap.eu/rss/artikels/Archeologie_Paleontologie'
), (
u'Moderne geschiedenis',
u'http://eoswetenschap.eu/rss/artikels/Moderne_geschiedenis'
), (u'Ruimte', u'http://eoswetenschap.eu/rss/artikels/Ruimte'),
(u'Ruimtevaart', u'http://eoswetenschap.eu/rss/artikels/ruimtevaart'),
(u'Kosmologie', u'http://eoswetenschap.eu/rss/artikels/Kosmologie')
]

View File

@ -1,26 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Erhvervs&#149;Avisen: RSS feed: Seneste nyt - erhvervsavisen.dk
'''
class Erhvervsavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Erhvervs Avisen'
description = 'Lokale, regionale nyheder, bolig, motor og job i Køge, Greve, Ringsted og Stevns på erhvervsavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Erhvervs Avisen', 'http://erhvervsavisen.dk/section/senestenytrss'),
]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Esbjerg: RSS feed: Seneste nyt - esbjerg.lokalavisen.dk
'''
class EsbjergLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Esbjerg'
description = 'Lokale og regionale nyheder, sport, kultur fra Esbjerg og omegn på esbjerg.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,127 +0,0 @@
from __future__ import print_function
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag
from calibre.utils.magick import Image, PixelWand
from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Estadao(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt'
LANGHTM = 'pt-br'
ENCODING = 'utf'
ENCHTM = 'utf-8'
directionhtm = 'ltr'
requires_version = (0, 7, 47)
news = True
title = u'Estad\xe3o'
__author__ = 'Euler Alves'
description = u'Brazilian news from Estad\xe3o'
publisher = u'Estad\xe3o'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now() - timedelta(days=2)
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour < 10:
hoje = hoje - timedelta(days=1)
CAPA = 'http://www.estadao.com.br/estadaodehoje/' + \
hoje.strftime('%Y%m%d') + '/img/capadodia.jpg'
SCREENSHOT = 'http://estadao.com.br/'
cover_margins = (0, 0, 'white')
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
keep_only_tags = [
dict(name='div', attrs={'class': ['bb-md-noticia', 'corpo']})]
remove_tags = [
dict(name='div',
attrs={'id': [
'bb-md-noticia-tabs'
]}), dict(name='div',
attrs={'class': [
'tags', 'discussion', 'bb-gg adsense_container'
]}), dict(name='a'), dict(name='iframe'), dict(name='link'), dict(name='script')
]
feeds = [
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml'),
(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml'),
(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml'),
(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml'),
(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/'),
(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml'),
(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml'),
(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml'),
(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
]
conversion_options = {
'title': title, 'comments': description, 'publisher': publisher, 'tags': category, 'language': LANGUAGE, 'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv': 'Content-Language'}):
meta0 = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.LANGHTM)])
soup.head.insert(0, meta0)
if not soup.find(attrs={'http-equiv': 'Content-Type'}):
meta1 = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=" + self.ENCHTM)])
soup.head.insert(0, meta1)
return soup
def postprocess_html(self, soup, first):
# process all the images. assumes that the new html has the correct
# path
for tag in soup.findAll('img', src=True):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print('img is: ', iurl, 'width is: ', width, 'height is: ', height)
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if(width > height and width > 590):
print('Rotate image')
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
if self.THUMBALIZR_API:
cover_url = self.CAPA
try:
soup = self.index_to_soup(cover_url)
cover_item = soup.find('body')
if cover_item:
cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
'&url=' + self.SCREENSHOT + '&width=600&quality=90'
return cover_url
except Exception:
cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
'&url=' + self.SCREENSHOT + '&width=600&quality=90'
return cover_url

View File

@ -1,60 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
LANGUAGE = 'de'
def feedlink(num):
return u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id=' +\
str(num) + '&lang=' + LANGUAGE
class EUCommissionPress(BasicNewsRecipe):
title = u'Pressemitteilungen der EU Kommission pro Politikbereich'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
__author__ = 'malfi'
language = LANGUAGE
keep_only_tags = []
keep_only_tags.append(
dict(name='div', attrs={'class': 'pressReleaseContentMain'}))
remove_tags = []
feeds = [
(u'Pressemitteilung des Tages', feedlink(64)),
(u'Presidency', feedlink(137)),
(u'Foreign affairs and security policy', feedlink(138)),
(u'Agriculture and rural development', feedlink(139)),
(u'Budget and financial programming ', feedlink(140)),
(u'Climate action', feedlink(141)),
(u'Competition', feedlink(142)),
(u'Development', feedlink(143)),
(u'Digital agenda', feedlink(144)),
(u'Economic and monetary affairs', feedlink(145)),
(u'Education, culture, multilingualism and youth ', feedlink(146)),
(u'Employment, social Affairs and inclusion ', feedlink(147)),
(u'Energy', feedlink(148)),
(u'Enlargment and European neighbourhood policy ', feedlink(149)),
(u'Environment', feedlink(150)),
(u'Health and consumer policy', feedlink(151)),
(u'Home affairs', feedlink(152)),
(u'Industry and entrepreneurship', feedlink(153)),
(u'Inter-Institutional relations and administration', feedlink(154)),
(u'Internal market and services', feedlink(155)),
(u'International cooperation, humanitarian aid and crisis response', feedlink(
156)),
(u'Justice, fundamental rights and citizenship', feedlink(157)),
(u'Maritime affairs and fisheries', feedlink(158)),
(u'Regional policy', feedlink(159)),
(u'Research and innovation', feedlink(160)),
(u'Taxation and customs union, audit and anti-fraud', feedlink(161)),
(u'Trade', feedlink(162)),
(u'Transport', feedlink(163))
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -1,66 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
europasur.es
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Europasur(BasicNewsRecipe):
title = 'Europa Sur'
__author__ = 'Darko Miletic'
description = 'News in Spanish'
publisher = 'Joly Digital'
category = 'news, politics, Spanish'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
remove_empty_feeds = True
delay = 2
no_stylesheets = True
encoding = 'cp1252'
language = 'es'
publication_type = 'newspaper'
extra_css = """ body{font-family: Verdana,Arial,Helvetica,sans-serif}
h2{font-family: Georgia,Times New Roman,Times,serif}
.subtitle{font-weight:bold}
.caption{font-size: small}
.body{font-size: 1.1em}
.info{color: #848484}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = [
dict(attrs={'class': ['titles', 'current']}), dict(
attrs={'id': 'newsBody'})
]
remove_tags = [
dict(name=['iframe', 'base', 'embed', 'object']), dict(name='a', attrs={
'class': 'zoom thickbox'}), dict(name='div', attrs={'class': 'other'})
]
remove_attributes = ['width', 'height']
feeds = [
(u'Portada', u'http://www.europasur.es/rss/articles.php'),
(u'Deportes', u'http://www.europasur.es/rss/articles.php?sec=1224'),
(u'Economia', u'http://www.europasur.es/rss/articles.php?sec=427'),
(u'Espana', u'http://www.europasur.es/rss/articles.php?sec=437'),
(u'Mundo', u'http://www.europasur.es/rss/articles.php?sec=428'),
(u'Pasarela', u'http://www.europasur.es/rss/articles.php?sec=1958'),
(u'Ocio y cultura', u'http://www.europasur.es/rss/articles.php?sec=1210'),
(u'Opinion', u'http://www.europasur.es/rss/articles.php?sec=1195'),
(u'Tecnologia', u'http://www.europasur.es/rss/articles.php?sec=1681'),
(u'Salud', u'http://www.europasur.es/rss/articles.php?sec=2379')
]
def image_url_processor(self, baseurl, url):
artl, sep, width = url.rpartition('&an=')
artid, sep, ext = artl.rpartition('.')
article_id = artid.rpartition('/')[2]
return 'http://media.grupojoly.com/cache/' + article_id + '_' + width + 'x' + width + '_' + ext + '000.' + ext

View File

@ -1,26 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Evangelizo(BasicNewsRecipe):
title = 'Evangelizo.org'
oldest_article = 2
max_articles_per_feed = 30
language = 'de'
__author__ = 'Bobus'
feeds = [
('EvangleliumTagfuerTag',
'http://www.evangeliumtagfuertag.org/rss/evangelizo_rss-de.xml'),
]
use_embedded_content = True
preprocess_regexps = [
(re.compile(
r'&lt;font size="-2"&gt;([(][0-9]*[)])&lt;/font&gt;'), r'\g<1>'),
(re.compile(r'([\.!]\n)'), r'\g<1><br />'),
]
def populate_article_metadata(self, article, soup, first):
article.title = re.sub(
r'<font size="-2">([(][0-9]*[)])</font>', r'\g<1>', article.title)
return

View File

@ -1,50 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
evz.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class EvenimentulZilei(BasicNewsRecipe):
title = u'Evenimentul Zilei'
__author__ = u'Silviu Cotoar\u0103'
description = ''
publisher = u'Evenimentul Zilei'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.evz.ro/fileadmin/images/evzLogo.png'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'class': 'single'}), dict(name='img', attrs={
'id': 'placeholder'}), dict(name='a', attrs={'id': 'holderlink'})
]
remove_tags = [
dict(name='p', attrs={'class': ['articleInfo']}), dict(name='div', attrs={'id': [
'bannerAddoceansArticleJos']}), dict(name='div', attrs={'id': ['bannerAddoceansArticle']})
]
remove_tags_after = [
dict(name='div', attrs={'id': ['bannerAddoceansArticleJos']})
]
feeds = [
(u'Feeds', u'http://www.evz.ro/rss.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,55 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
exiledonline.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Exiled(BasicNewsRecipe):
title = 'Exiled Online'
__author__ = 'Darko Miletic'
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
publisher = 'Exiled Online'
category = 'news, politics, international'
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
language = 'en'
publication_type = 'newsblog'
masthead_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
#topslug{font-size: xx-large; font-weight: bold; color: red}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = [dict(name='div', attrs={'id': 'main'})]
remove_tags = [
dict(name=['object', 'link']), dict(name='div', attrs={'class': 'info'}), dict(
name='div', attrs={'id': ['comments', 'navig']})
]
feeds = [(u'Articles', u'http://exiledonline.com/feed/')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
def get_article_url(self, article):
raw = article.get('link', None)
final = raw + 'all/1/'
return final

View File

@ -1,57 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Explosm(BasicNewsRecipe):
title = u'Explosm Rotated'
__author__ = 'Andromeda Rabbit'
description = 'Explosm'
language = 'en'
use_embedded_content = False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 10
feeds = [
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
]
keep_only_tags = [
dict(name='img', attrs={'alt': 'Cyanide and Happiness, a daily webcomic'})]
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(
name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
def get_cover_url(self):
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a, curarticle in enumerate(curfeed.articles):
if re.search(r'http://www.explosm.net/comics', curarticle.url) is None:
delList.append(curarticle)
if len(delList) > 0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index + 1] = []
return feeds
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(name='img', attrs={
'alt': 'Cyanide and Happiness, a daily webcomic'})
if skip_tag is None:
return soup
return None

View File

@ -1,80 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Express.de'
__author__ = 'schuster'
oldest_article = 2
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
language = 'de'
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
'''
remove_javascript = True
remove_tags_befor = [dict(name='div', attrs={'class': 'Datum'})]
remove_tags_after = [dict(name='div', attrs={'class': 'MoreNews'})]
remove_tags = [dict(id='kalaydo'),
dict(id='Header'),
dict(id='Searchline'),
dict(id='MainNav'),
dict(id='Logo'),
dict(id='MainLinkSpacer'),
dict(id='MainLinks'),
dict(id='ContainerPfad'), # neu
dict(title='Diese Seite Bookmarken'),
dict(name='span'),
dict(name='div', attrs={'class': 'spacer_leftneu'}),
dict(name='div', attrs={'class': 'button kalaydologo'}),
dict(name='div', attrs={'class': 'button stellenneu'}),
dict(name='div', attrs={'class': 'button autoneu'}),
dict(name='div', attrs={'class': 'button immobilienneu'}),
dict(name='div', attrs={'class': 'button kleinanzeigen'}),
dict(name='div', attrs={'class': 'button tiereneu'}),
dict(name='div', attrs={'class': 'button ferienwohnungen'}),
dict(name='div', attrs={'class': 'button inserierenneu'}),
dict(name='div', attrs={'class': 'spacer_rightneu'}),
dict(name='div', attrs={'class': 'spacer_rightcorner'}),
dict(name='div', attrs={'class': 'HeaderMetaNav'}),
dict(name='div', attrs={'class': 'HeaderSearchOption'}),
dict(name='div', attrs={'class': 'HeaderSearch'}),
dict(name='div', attrs={'class': 'sbutton'}),
dict(name='div', attrs={'class': 'active'}),
dict(name='div', attrs={'class': 'MoreNews'}), # neu
dict(name='div', attrs={
'class': 'ContentBoxSubline'}) # neu
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
(u'Regional - Köln',
u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
(u'Regional - Bonn',
u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
(u'Regional - Düsseldorf',
u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
(u'Regional - Region',
u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
(u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
(u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
(u'1.FC Köln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
(u'Alemannia Aachen News',
u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
(u'Borussia M~Gladbach',
u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
(u'Basketball News',
u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 738 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 461 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 458 B

Some files were not shown because too many files have changed in this diff Show More