mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
remove dead recipes
These recipes are based on RSS feeds that no longer work.
This commit is contained in:
parent
893ebb0457
commit
d9b9f1baee
@ -1,130 +0,0 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CyNewsLiveRecipe(BasicNewsRecipe):
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'kwetal'
|
|
||||||
language = 'en_CY'
|
|
||||||
version = 1
|
|
||||||
|
|
||||||
title = u'Cyprus Weekly'
|
|
||||||
publisher = u'The Cyprus Weekly'
|
|
||||||
category = u'News, Newspaper'
|
|
||||||
description = u'News from Cyprus'
|
|
||||||
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
pubTime = None
|
|
||||||
minTime = None
|
|
||||||
articleCount = 0
|
|
||||||
|
|
||||||
INDEX = 'http://www.cyprusweekly.com.cy/main/default.aspx'
|
|
||||||
|
|
||||||
feeds = []
|
|
||||||
feeds.append(
|
|
||||||
('News: Cyprus', 'http://www.cyprusweekly.com.cy/main/92,0,0,0-CYPRUS.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('News: World', 'http://www.cyprusweekly.com.cy/main/78,0,0,0-UKWORLD.aspx'))
|
|
||||||
feeds.append(('Sport: Football',
|
|
||||||
'http://www.cyprusweekly.com.cy/main/82,0,0,0-FOOTBALL.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Rugby', 'http://www.cyprusweekly.com.cy/main/83,0,0,0-RUGBY.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Cricket', 'http://www.cyprusweekly.com.cy/main/85,0,0,0-CRICKET.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Tennis', 'http://www.cyprusweekly.com.cy/main/84,0,0,0-TENNIS.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Other', 'http://www.cyprusweekly.com.cy/main/86,0,0,0-OTHER.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Business: Local', 'http://www.cyprusweekly.com.cy/main/100,0,0,0-LOCAL.aspx'))
|
|
||||||
feeds.append(('Business: Foreign',
|
|
||||||
'http://www.cyprusweekly.com.cy/main/101,0,0,0-FOREIGN.aspx'))
|
|
||||||
feeds.append(('Whats On: Places of Interest',
|
|
||||||
'http://www.cyprusweekly.com.cy/main/123,0,0,0-PLACES-OF-INTEREST.aspx'))
|
|
||||||
feeds.append(('Whats On: Going Out',
|
|
||||||
'http://www.cyprusweekly.com.cy/main/153,0,0,0-GOING-OUT.aspx'))
|
|
||||||
feeds.append(('Whats On: Arts & Entertainment',
|
|
||||||
'http://www.cyprusweekly.com.cy/main/135,0,0,0-ARTS--and-ENTERTAINMENT.aspx'))
|
|
||||||
feeds.append(('Whats On: Things To Do',
|
|
||||||
'http://www.cyprusweekly.com.cy/main/136,0,0,0-THINGS-TO-DO.aspx'))
|
|
||||||
feeds.append(('Whats On: Shopping Guide',
|
|
||||||
'http://www.cyprusweekly.com.cy/main/142,0,0,0-SHOPPING-GUIDE.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Culture', 'http://www.cyprusweekly.com.cy/main/208,0,0,0-CULTURE.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Environment', 'http://www.cyprusweekly.com.cy/main/93,0,0,0-ENVIRONMENT.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Info', 'http://www.cyprusweekly.com.cy/main/91,0,0,0-INFO.aspx'))
|
|
||||||
|
|
||||||
keep_only_tags = []
|
|
||||||
keep_only_tags.append(
|
|
||||||
dict(name='div', attrs={'class': 'ArticleCategories'}))
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
answer = []
|
|
||||||
for feed in self.feeds:
|
|
||||||
self.articleCount = 0
|
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup(feed[1])
|
|
||||||
|
|
||||||
table = soup.find('table', attrs={'id': 'ctl00_cp_ctl01_listp'})
|
|
||||||
if table:
|
|
||||||
self.pubTime = datetime.now()
|
|
||||||
self.minTime = self.pubTime - \
|
|
||||||
timedelta(days=self.oldest_article)
|
|
||||||
|
|
||||||
self.find_articles(table, articles)
|
|
||||||
|
|
||||||
answer.append((feed[0], articles))
|
|
||||||
|
|
||||||
return answer
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
|
||||||
for el in soup.findAll(attrs={'style': True}):
|
|
||||||
del el['style']
|
|
||||||
|
|
||||||
for el in soup.findAll('font'):
|
|
||||||
el.name = 'div'
|
|
||||||
for attr, value in el:
|
|
||||||
del el[attr]
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def find_articles(self, table, articles):
|
|
||||||
for div in table.findAll('div', attrs={'class': 'ListArticle'}):
|
|
||||||
el = div.find('div', attrs={'class': 'ListArticle_T'})
|
|
||||||
title = self.tag_to_string(el.a)
|
|
||||||
url = self.INDEX + el.a['href']
|
|
||||||
|
|
||||||
description = self.tag_to_string(
|
|
||||||
div.find('div', attrs={'class': 'ListArticle_BODY300'}))
|
|
||||||
|
|
||||||
el = div.find('div', attrs={'class': 'ListArticle_D'})
|
|
||||||
if el:
|
|
||||||
dateParts = self.tag_to_string(el).split(' ')
|
|
||||||
monthNames = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
|
|
||||||
'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11,
|
|
||||||
'December': 12}
|
|
||||||
timeParts = dateParts[3].split(':')
|
|
||||||
self.pubTime = datetime(year=int(dateParts[2]), month=int(monthNames[dateParts[1]]),
|
|
||||||
day=int(dateParts[0]), hour=int(timeParts[0]),
|
|
||||||
minute=int(timeParts[1]))
|
|
||||||
|
|
||||||
if self.pubTime >= self.minTime and self.articleCount <= self.max_articles_per_feed:
|
|
||||||
articles.append(
|
|
||||||
{'title': title, 'date': self.pubTime, 'url': url, 'description': description})
|
|
||||||
self.articleCount += 1
|
|
||||||
else:
|
|
||||||
return
|
|
@ -1,25 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CzasGentlemanow(BasicNewsRecipe):
|
|
||||||
title = u'Czas Gentlemanów'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'Historia mężczyzn z dala od wielkiej polityki'
|
|
||||||
category = 'blog'
|
|
||||||
language = 'pl'
|
|
||||||
cover_url = 'https://czasgentlemanow.pl/wp-content/uploads/2012/10/logo-Czas-Gentlemanow1.jpg'
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
extra_css = '.gallery-item {float:left; margin-right: 10px; max-width: 20%;} .alignright {text-align: right; float:right; margin-left:5px;}\
|
|
||||||
.wp-caption-text {text-align: left;} img.aligncenter {display: block; margin-left: auto; margin-right: auto;} .alignleft {float: left; margin-right:5px;}'
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
use_embedded_content = False
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'post-wrapper'})]
|
|
||||||
remove_tags = [dict(attrs={'class': ['awac-wrapper', 'post-bottom', 'comment', 'seperate']})]
|
|
||||||
feeds = [
|
|
||||||
(u'Charakter', u'https://czasgentlemanow.pl/category/charakter/feed/'),
|
|
||||||
(u'Wizerunek', u'https://czasgentlemanow.pl/category/wizerunek/feed/'),
|
|
||||||
(u'Relacje międzyludzkie', u'https://czasgentlemanow.pl/category/relacje-miedzyludzkie/feed/')]
|
|
@ -1,104 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8
|
|
||||||
from __future__ import print_function, unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre import browser
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1390132023(BasicNewsRecipe):
|
|
||||||
title = u'Daily Express'
|
|
||||||
__author__ = 'Dave Asbury'
|
|
||||||
# 1.8.15 official feedburner feeds live again
|
|
||||||
# 27.6.15 using feed43 as rss feeds dead
|
|
||||||
# feed 43 string = <div {*}<a href="{%}"{*}<h4>{%}</h4>
|
|
||||||
oldest_article = 1.5
|
|
||||||
language = 'en_GB'
|
|
||||||
max_articles_per_feed = 10
|
|
||||||
compress_news_images = True
|
|
||||||
compress_news_images_max_size = 20
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png'
|
|
||||||
auto_cleanup_keep = '//*[@class="author"]|//section[@class="photo changeSpace"]'
|
|
||||||
auto_cleanup = True
|
|
||||||
no_stylesheets = False
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'\| [\w].+?\| [\w].+?\| Daily Express',
|
|
||||||
re.IGNORECASE | re.DOTALL), lambda match: ''),
|
|
||||||
|
|
||||||
]
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
# (u'UK News', u'http://feed43.com/3460616116055543.xml'),
|
|
||||||
# http://www.express.co.uk/posts/rss/1/uk'),
|
|
||||||
(u'UK News', u'http://feeds.feedburner.com/daily-express-uk-news'),
|
|
||||||
(u'World News', u'http://feeds.feedburner.com/daily-express-world-news'),
|
|
||||||
# (u'World News',u'http://feed43.com/5650105317448722.xml'),
|
|
||||||
# http://www.express.co.uk/posts/rss/78/world'),
|
|
||||||
(u'Showbiz News', u'http://feeds.feedburner.com/daily-express-showbiz-news'),
|
|
||||||
# (u'Showbiz News',u'http://feed43.com/2564008080442425.xml'),
|
|
||||||
(u'Finance', u'http://feeds.feedburner.com/daily-express-finance-news'),
|
|
||||||
# (u'Finance',u'http://feed43.com/8636615325246501.xml'),
|
|
||||||
# http://www.express.co.uk/posts/rss/21/finance'),
|
|
||||||
# (u'Sport - Boxing',u'http://feed43.com/7570233481503246.xml'),
|
|
||||||
(u'Sport - Boxing', u'http://feeds.feedburner.com/daily-express-boxing-news'),
|
|
||||||
(u'Sport - Rugby Union',
|
|
||||||
u'http://feeds.feedburner.com/daily-express-rugby-union-news'),
|
|
||||||
# (u'Sport - Rugby Union',u'http://feed43.com/4235483647118470.xml'),
|
|
||||||
# (u'Sport - Others',u'http://feed43.com/6106345668326737.xml'),
|
|
||||||
(u'Sport - Others', u'http://feeds.feedburner.com/daily-express-other-sport-news'),
|
|
||||||
# http://www.express.co.uk/posts/rss/65/sport'),
|
|
||||||
(u'Entertainment', u'http://feeds.feedburner.com/daily-express-entertainment-news'),
|
|
||||||
# (u'Entertainment',u'http://feed43.com/8864645080210731.xml'),
|
|
||||||
# http://www.express.co.uk/posts/rss/18/entertainment'),
|
|
||||||
(u'Lifestyle', u'http://feeds.feedburner.com/daily-express-life-and-style-news'),
|
|
||||||
# (u'Lifestyle',u'http://feed43.com/8705161426770855.xml'),
|
|
||||||
# http://www.express.co.uk/posts/rss/8/life&style'),
|
|
||||||
(u'Travel', u'http://feeds.feedburner.com/daily-express-travel'),
|
|
||||||
# (u'Travel',u'http://feed43.com/6547373884767554.xml'),
|
|
||||||
]
|
|
||||||
# starsons code
|
|
||||||
|
|
||||||
def parse_feeds(self):
|
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
|
||||||
for feed in feeds:
|
|
||||||
for article in feed.articles[:]:
|
|
||||||
print('article.title is: ', article.title)
|
|
||||||
if 'WATCH:' in article.title.upper():
|
|
||||||
feed.articles.remove(article)
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup('http://www.express.co.uk/ourpaper/')
|
|
||||||
cov = soup.find(attrs={'src': re.compile(
|
|
||||||
'http://cdn.images.express.co.uk/img/covers/')})
|
|
||||||
cov = str(cov)
|
|
||||||
cov2 = re.findall(
|
|
||||||
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
|
||||||
|
|
||||||
cov = str(cov2)
|
|
||||||
cov = cov[2:len(cov) - 2]
|
|
||||||
|
|
||||||
# cover_url=cov
|
|
||||||
br = browser()
|
|
||||||
br.set_handle_redirect(False)
|
|
||||||
try:
|
|
||||||
br.open_novisit(cov)
|
|
||||||
cover_url = cov
|
|
||||||
except:
|
|
||||||
cover_url = 'http://cdn.images.express.co.uk/img/static/ourpaper/header-back-issue-papers.jpg'
|
|
||||||
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
#h1{font-weight:bold;font-size:175%;}
|
|
||||||
h2{display: block;margin-left: auto;margin-right: auto;width:100%;font-weight:bold;font-size:175%;}
|
|
||||||
#p{font-size:14px;}
|
|
||||||
#body{font-size:14px;}
|
|
||||||
.newsCaption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
|
|
||||||
.publish-info {font-size:50%;}
|
|
||||||
.photo img {display: block;margin-left: auto;margin-right: auto;width:100%;}
|
|
||||||
'''
|
|
@ -1,85 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.news.com.au/dailytelegraph/
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DailyTelegraph(BasicNewsRecipe):
|
|
||||||
title = u'Daily Telegraph'
|
|
||||||
__author__ = u'Adrian G.'
|
|
||||||
language = 'en_AU'
|
|
||||||
|
|
||||||
description = u'Daily Telegraph News'
|
|
||||||
oldest_article = 5
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
no_javascript = True
|
|
||||||
|
|
||||||
timefmt = ' [%A, %d %B, %Y]'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'story'})]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
|
|
||||||
.cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
|
|
||||||
.articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
|
|
||||||
.cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
|
|
||||||
.source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
|
|
||||||
#content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
|
||||||
.pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
#bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
|
|
||||||
.featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
|
||||||
#idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
|
||||||
h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
|
|
||||||
h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
|
|
||||||
h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
|
|
||||||
h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
|
|
||||||
body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'id': ['comments', 'story-related-coverage']}),
|
|
||||||
dict(name='div', attrs={'class': [
|
|
||||||
'story-header-tools', 'story-footer', 'story-extras', 'story-related']}),
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': ['promo-image', 'story-extras story-extras-2']}),
|
|
||||||
dict(name='div', attrs={'class': ['assistive sidebar-jump']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Top Stories', u'http://feeds.news.com.au/public/rss/2.0/dtele_top_stories_253.xml'),
|
|
||||||
(u'National News',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_national_news_202.xml'),
|
|
||||||
(u'World News', u'http://feeds.news.com.au/public/rss/2.0/dtele_world_news_204.xml'),
|
|
||||||
(u'NSW and ACT', u'http://feeds.news.com.au/public/rss/2.0/dtele_nswact_225.xml'),
|
|
||||||
(u'Arts', u'http://feeds.news.com.au/public/rss/2.0/dtele_art_444.xml'),
|
|
||||||
(u'Business News', u'http://feeds.news.com.au/public/rss/2.0/dtele_business_226.xml'),
|
|
||||||
(u'Entertainment News',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_entertainment_news_201.xml'),
|
|
||||||
(u'Lifestyle News',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_lifestyle_227.xml'),
|
|
||||||
(u'Music', u'http://feeds.news.com.au/public/rss/2.0/dtele_music_441.xml'),
|
|
||||||
(u'Sport',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_sport_203.xml'),
|
|
||||||
(u'Soccer',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_sports_soccer_344.xml'),
|
|
||||||
(u'Rugby Union',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_sports_rugby_union_342.xml'),
|
|
||||||
(u'Property Confidential',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_property_confidential_463.xml'),
|
|
||||||
(u'Property - Your Space',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_property_yourspace_462.xml'),
|
|
||||||
(u'Confidential News',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_entertainment_confidential_252.xml'),
|
|
||||||
(u'Confidential Biographies',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_confidential_biographies_491.xml'),
|
|
||||||
(u'Confidential Galleries',
|
|
||||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_confidential_galleries_483.xml'),
|
|
||||||
]
|
|
@ -1,62 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
daily.tportal.hr
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Pagina12(BasicNewsRecipe):
|
|
||||||
title = 'Daily tportal.h'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News from Croatia'
|
|
||||||
publisher = 'tportal.hr'
|
|
||||||
category = 'news, politics, Croatia'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en_HR'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Verdana,sans-serif }
|
|
||||||
img{margin-bottom: 0.4em; display:block}
|
|
||||||
h1,h2{color: #2D648A; font-family: Georgia,serif}
|
|
||||||
.artAbstract{font-size: 1.2em; font-family: Georgia,serif}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['meta', 'link', 'embed', 'object', 'iframe', 'base']), dict(
|
|
||||||
name='div', attrs={'class': 'artInfo'})
|
|
||||||
]
|
|
||||||
remove_attributes = ['lang']
|
|
||||||
|
|
||||||
keep_only_tags = dict(attrs={'class': 'articleDetails'})
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://daily.tportal.hr/rss/dailynaslovnicarss.xml')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll('a'):
|
|
||||||
limg = item.find('img')
|
|
||||||
if item.string is not None:
|
|
||||||
str = item.string
|
|
||||||
item.replaceWith(str)
|
|
||||||
else:
|
|
||||||
if limg:
|
|
||||||
item.name = 'div'
|
|
||||||
item.attrs = []
|
|
||||||
else:
|
|
||||||
str = self.tag_to_string(item)
|
|
||||||
item.replaceWith(str)
|
|
||||||
for item in soup.findAll('img', alt=False):
|
|
||||||
item['alt'] = 'image'
|
|
||||||
return soup
|
|
@ -1,35 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DallasNews(BasicNewsRecipe):
|
|
||||||
title = u'The Dallas Morning News'
|
|
||||||
language = 'en'
|
|
||||||
oldest_article = 2 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('News',
|
|
||||||
'http://www.dallasnews.com/news.rss'),
|
|
||||||
('Local News',
|
|
||||||
'http://www.dallasnews.com/news/local-politics.rss'),
|
|
||||||
('State Politics',
|
|
||||||
'http://www.dallasnews.com/news/texas-politics.rss'),
|
|
||||||
('Religion',
|
|
||||||
'http://www.dallasnews.com/life/faith.rss'),
|
|
||||||
('Crime',
|
|
||||||
'http://www.dallasnews.com/news/crime.rss'),
|
|
||||||
('Celebrity News',
|
|
||||||
'http://www.dallasnews.com/entertainment/celebrity-news/?rss&listname=TopStories'),
|
|
||||||
('Business',
|
|
||||||
'http://www.dallasnews.com/business.rss'),
|
|
||||||
('Arts',
|
|
||||||
'http://www.dallasnews.com/arts.rss'),
|
|
||||||
('Life',
|
|
||||||
'http://www.dallasnews.com/life.rss'),
|
|
||||||
('Opinion',
|
|
||||||
'http://www.dallasnews.com/opinion.rss'),
|
|
||||||
]
|
|
@ -1,36 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2014, Brandon S Allbery <allbery.b at gmail.com>'
|
|
||||||
'''
|
|
||||||
Dark Reading: protect the business. enable access.
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DarkReading(BasicNewsRecipe):
|
|
||||||
title = u'Dark Reading'
|
|
||||||
__author__ = 'Brandon Allberry'
|
|
||||||
language = 'en'
|
|
||||||
description = u'Dark Reading is the premier online resource helping information security professionals manage the balance between protection and access. It offers breaking news and analysis on attacks, breaches and vulnerabilities, as well as strategies for protecting enterprise data. It also offers guidance on setting risk management and compliance policies.' # noqa
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
keep_only_tags = [dict(attrs=['article-content', 'heading'])]
|
|
||||||
masthead_url = u'http://img.deusm.com/darkreading/DR-logo.png'
|
|
||||||
cover_url = u'http://img.deusm.com/darkreading/DR-logo.png'
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'first-level-nav-container'}),
|
|
||||||
dict(name='div', attrs={'id': 'search-box'}),
|
|
||||||
dict(name='div', attrs={'id': 'mobile-menu-nav'}),
|
|
||||||
dict(name='div', attrs={'id': 'mobile-menu-profile'}),
|
|
||||||
dict(name='div', attrs={'id': 'mobile-menu-search'}),
|
|
||||||
dict(name='div', attrs={'id': 'mobile-menu-rss'}),
|
|
||||||
dict(name='div', attrs={'id': 'second-level'}),
|
|
||||||
dict(name='div', attrs={'id': 'third-level'}),
|
|
||||||
dict(name='div', attrs={'id': 'aside-main'}),
|
|
||||||
dict(name='div', attrs={'id': 'third-level'}),
|
|
||||||
dict(name='div', attrs={'id': 'more-insights'}),
|
|
||||||
dict(name='div', attrs={'class': 'more-insights-item'})
|
|
||||||
]
|
|
||||||
feeds = [(u'All', u'http://www.darkreading.com/rss_simple.asp')]
|
|
@ -1,45 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Fetch darknet.
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class darknet(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = 'darknet'
|
|
||||||
description = 'Ethical hacking and security news'
|
|
||||||
__author__ = 'Oliver Niesner'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
use_embedded_content = False
|
|
||||||
timefmt = ' [%b %d %Y]'
|
|
||||||
max_articles_per_feed = 40
|
|
||||||
no_stylesheets = True
|
|
||||||
oldest_article = 180
|
|
||||||
|
|
||||||
remove_tags = [dict(id='navi_top'),
|
|
||||||
dict(id='navi_bottom'),
|
|
||||||
dict(id='nav'),
|
|
||||||
dict(id='top-ad'),
|
|
||||||
dict(id='login_suche'),
|
|
||||||
dict(id='navi_login'),
|
|
||||||
dict(id='breadcrumb'),
|
|
||||||
dict(id='subtitle'),
|
|
||||||
dict(id='bannerzone'),
|
|
||||||
dict(name='span', attrs={'class': 'rsaquo'}),
|
|
||||||
dict(name='span', attrs={'class': 'next'}),
|
|
||||||
dict(name='span', attrs={'class': 'prev'}),
|
|
||||||
dict(name='span', attrs={'class': 'comments'}),
|
|
||||||
dict(name='div', attrs={'class': 'news_logo'}),
|
|
||||||
dict(name='div', attrs={'class': 'nextprev'}),
|
|
||||||
dict(name='div', attrs={'class': 'tags'}),
|
|
||||||
dict(name='div', attrs={'class': 'Nav'}),
|
|
||||||
dict(name='p', attrs={'class': 'news_option'}),
|
|
||||||
dict(name='p', attrs={'class': 'news_foren'})]
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class': 'meta-footer'})]
|
|
||||||
|
|
||||||
feeds = [('darknet', 'http://feedproxy.google.com/darknethackers')]
|
|
@ -1,35 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1468055030(BasicNewsRecipe):
|
|
||||||
title = 'DataNews'
|
|
||||||
__author__ = 'oCkz7bJ_'
|
|
||||||
description = 'Technology / Best Practice / Business'
|
|
||||||
publisher = 'Roularta Media Group'
|
|
||||||
category = 'news, information technology, Belgium'
|
|
||||||
language = 'nl_BE'
|
|
||||||
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
cover_url = 'http://datablend.be/wp-content/uploads/2014/01/Data_News_logo-short.jpg'
|
|
||||||
masthead_url = 'http://datanews.knack.be/images/svg/logos/logo_Site-DataNews-NL.svg'
|
|
||||||
|
|
||||||
# Source: http://datanews.knack.be/rss/
|
|
||||||
feeds = [
|
|
||||||
('Technology', 'http://datanews.knack.be/ict/feed.rss'),
|
|
||||||
('Opinie', 'http://datanews.knack.be/ict/opinie/feed.rss'),
|
|
||||||
('Gadgets', 'http://datanews.knack.be/ict/gadgets/feed.rss'),
|
|
||||||
('Foto', 'http://datanews.knack.be/ict/foto/feed.rss'),
|
|
||||||
('Nieuws', 'http://datanews.knack.be/ict/nieuws/feed.rss'),
|
|
||||||
('Reviews', 'http://datanews.knack.be/ict/reviews/feed.rss'),
|
|
||||||
('Startups', 'http://datanews.knack.be/ict/start-ups/feed.rss'),
|
|
||||||
]
|
|
@ -1,16 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'faber1971'
|
|
||||||
description = 'Italian soccer news website - v1.00 (17, December 2011)'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1324114272(BasicNewsRecipe):
|
|
||||||
title = u'Datasport'
|
|
||||||
language = 'it'
|
|
||||||
__author__ = 'faber1971'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]
|
|
@ -1,88 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DaytonBeachNewsJournal(BasicNewsRecipe):
|
|
||||||
title = 'Daytona Beach News Journal'
|
|
||||||
__author__ = 'BRGriff'
|
|
||||||
publisher = 'News-JournalOnline.com'
|
|
||||||
description = 'Daytona Beach, Florida, Newspaper'
|
|
||||||
category = 'News, Daytona Beach, Florida'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
language = 'en'
|
|
||||||
filterDuplicates = True
|
|
||||||
remove_attributes = ['style']
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'page-header'}),
|
|
||||||
dict(name='div', attrs={'class': 'asset-body'})
|
|
||||||
]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': ['byline-section', 'asset-meta']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
# ####NEWS#####
|
|
||||||
(u"News", u"http://www.news-journalonline.com/rss.xml"),
|
|
||||||
(u"Breaking News",
|
|
||||||
u"http://www.news-journalonline.com/breakingnews/rss.xml"),
|
|
||||||
(u"Local - East Volusia",
|
|
||||||
u"http://www.news-journalonline.com/news/local/east-volusia/rss.xml"),
|
|
||||||
(u"Local - West Volusia",
|
|
||||||
u"http://www.news-journalonline.com/news/local/west-volusia/rss.xml"),
|
|
||||||
(u"Local - Southeast",
|
|
||||||
u"http://www.news-journalonline.com/news/local/southeast-volusia/rss.xml"),
|
|
||||||
(u"Local - Flagler",
|
|
||||||
u"http://www.news-journalonline.com/news/local/flagler/rss.xml"),
|
|
||||||
(u"Florida", u"http://www.news-journalonline.com/news/florida/rss.xml"),
|
|
||||||
(u"National/World",
|
|
||||||
u"http://www.news-journalonline.com/news/nationworld/rss.xml"),
|
|
||||||
(u"Politics", u"http://www.news-journalonline.com/news/politics/rss.xml"),
|
|
||||||
(u"News of Record",
|
|
||||||
u"http://www.news-journalonline.com/news/news-of-record/rss.xml"),
|
|
||||||
# ###BUSINESS####
|
|
||||||
(u"Business", u"http://www.news-journalonline.com/business/rss.xml"),
|
|
||||||
# (u"Jobs", u"http://www.news-journalonline.com/business/jobs/rss.xml"),
|
|
||||||
# (u"Markets", u"http://www.news-journalonline.com/business/markets/rss.xml"),
|
|
||||||
# (u"Real Estate", u"http://www.news-journalonline.com/business/real-estate/rss.xml"),
|
|
||||||
# (u"Technology", u"http://www.news-journalonline.com/business/technology/rss.xml"),
|
|
||||||
# ###SPORTS####
|
|
||||||
(u"Sports", u"http://www.news-journalonline.com/sports/rss.xml"),
|
|
||||||
(u"Racing", u"http://www.news-journalonline.com/racing/rss.xml"),
|
|
||||||
(u"Highschool", u"http://www.news-journalonline.com/sports/highschool/rss.xml"),
|
|
||||||
(u"College", u"http://www.news-journalonline.com/sports/college/rss.xml"),
|
|
||||||
(u"Basketball", u"http://www.news-journalonline.com/sports/basketball/rss.xml"),
|
|
||||||
(u"Football", u"http://www.news-journalonline.com/sports/football/rss.xml"),
|
|
||||||
(u"Golf", u"http://www.news-journalonline.com/sports/golf/rss.xml"),
|
|
||||||
(u"Other Sports",
|
|
||||||
u"http://www.news-journalonline.com/sports/other/rss.xml"),
|
|
||||||
# ###LIFESTYLE####
|
|
||||||
(u"Lifestyle", u"http://www.news-journalonline.com/lifestyle/rss.xml"),
|
|
||||||
# (u"Fashion", u"http://www.news-journalonline.com/lifestyle/fashion/rss.xml"),
|
|
||||||
(u"Food", u"http://www.news-journalonline.com/lifestyle/food/rss.xml"),
|
|
||||||
# (u"Health", u"http://www.news-journalonline.com/lifestyle/health/rss.xml"),
|
|
||||||
(u"Home and Garden",
|
|
||||||
u"http://www.news-journalonline.com/lifestyle/home-and-garden/rss.xml"),
|
|
||||||
(u"Living", u"http://www.news-journalonline.com/lifestyle/living/rss.xml"),
|
|
||||||
(u"Religion", u"http://www.news-journalonline.com/lifestyle/religion/rss.xml"),
|
|
||||||
# (u"Travel", u"http://www.news-journalonline.com/lifestyle/travel/rss.xml"),
|
|
||||||
# ###OPINION####
|
|
||||||
# (u"Opinion", u"http://www.news-journalonline.com/opinion/rss.xml"),
|
|
||||||
# (u"Letters to Editor", u"http://www.news-journalonline.com/opinion/letters-to-the-editor/rss.xml"),
|
|
||||||
# (u"Columns", u"http://www.news-journalonline.com/columns/rss.xml"),
|
|
||||||
# (u"Podcasts", u"http://www.news-journalonline.com/podcasts/rss.xml"),
|
|
||||||
# ###ENTERTAINMENT#### ##Weekly Feature##
|
|
||||||
(u"Entertainment", u"http://www.go386.com/rss.xml"),
|
|
||||||
(u"Go Out", u"http://www.go386.com/go/rss.xml"),
|
|
||||||
(u"Music", u"http://www.go386.com/music/rss.xml"),
|
|
||||||
(u"Movies", u"http://www.go386.com/movies/rss.xml"),
|
|
||||||
# (u"Culture", u"http://www.go386.com/culture/rss.xml"),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
.page-header{font-family:Arial,Helvetica,sans-serif; font-style:bold;font-size:22pt;}
|
|
||||||
.asset-body{font-family:Helvetica,Arial,sans-serif; font-size:16pt;}
|
|
||||||
|
|
||||||
'''
|
|
@ -1,44 +0,0 @@
|
|||||||
# -*- coding: utf-8
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Luis Hernandez'
|
|
||||||
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.filmica.com/david_bravo/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Blog de David Bravo'
|
|
||||||
publisher = u'Filmica'
|
|
||||||
|
|
||||||
__author__ = 'Luis Hernández'
|
|
||||||
description = 'blog sobre leyes, p2p y copyright'
|
|
||||||
cover_url = 'http://www.elpais.es/edigitales/image.php?foto=par/portada/1551.jpg'
|
|
||||||
|
|
||||||
oldest_article = 365
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
encoding = 'ISO-8859-1'
|
|
||||||
language = 'es'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['blog', 'date', 'blogbody', 'comments-head',
|
|
||||||
'comments-body']}), dict(name='span', attrs={'class': ['comments-post']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', attrs={'id': ['bitacoras']})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id': ['comments-body']})
|
|
||||||
|
|
||||||
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h2{ font-family: sans-serif; font-size:75%; font-weight: 800; text-align: justify } h3{ font-family: sans-serif; font-size:150%; font-weight: 600; text-align: left } img{margin-bottom: 0.4em} ' # noqa
|
|
||||||
|
|
||||||
feeds = [(u'Blog', u'http://www.filmica.com/david_bravo/index.rdf')]
|
|
@ -1,51 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DeRedactie(BasicNewsRecipe):
|
|
||||||
title = u'De Redactie.be'
|
|
||||||
__author__ = u'erkfuizfeuadjfjzefzfuzeff'
|
|
||||||
description = u'News from Belgium in Dutch'
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'nl_BE'
|
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='title'), dict(name='div', attrs={'id': 'intro'}), dict(name='h3'),
|
|
||||||
dict(name='h1'), dict(name='span', attrs={'class': 'media_holder'}),
|
|
||||||
dict(name='div', attrs={'class': 'divider image'}),
|
|
||||||
dict(name='div', attrs={'class': 'paragraph'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Hoofdpunten', u'http://deredactie.be/cm/vrtnieuws?mode=atom'),
|
|
||||||
(u'Binnenland', u'http://deredactie.be/cm/vrtnieuws/binnenland?mode=atom'),
|
|
||||||
(u'Politiek', u'http://deredactie.be/cm/vrtnieuws/politiek?mode=atom'),
|
|
||||||
(u'Buitenland', u'http://deredactie.be/cm/vrtnieuws/buitenland?mode=atom'), (
|
|
||||||
u'Cultuur en Media',
|
|
||||||
u'http://deredactie.be/cm/vrtnieuws/cultuur+en+media?mode=atom'
|
|
||||||
), (u'Economie', u'http://deredactie.be/cm/vrtnieuws/economie?mode=atom'),
|
|
||||||
(u'Ook dat nog', u'http://deredactie.be/cm/vrtnieuws/ookdatnog?mode=atom'), (
|
|
||||||
u'Regionaal Antwerpen',
|
|
||||||
u'http://deredactie.be/cm/vrtnieuws/regio/antwerpen?mode=atom'
|
|
||||||
), (
|
|
||||||
u'Regionaal Brussel',
|
|
||||||
u'http://deredactie.be/cm/vrtnieuws/regio/brussel?mode=atom'
|
|
||||||
), (
|
|
||||||
u'Regionaal Limburg',
|
|
||||||
u'http://deredactie.be/cm/vrtnieuws/regio/limburg?mode=atom'
|
|
||||||
), (
|
|
||||||
u'Regionaal Oost-Vlaanderen',
|
|
||||||
u'http://deredactie.be/cm/vrtnieuws/regio/oostvlaanderen?mode=atom'
|
|
||||||
), (
|
|
||||||
u'Regionaal Vlaams-Brabant',
|
|
||||||
u'http://deredactie.be/cm/vrtnieuws/regio/vlaamsbrabant?mode=atom'
|
|
||||||
), (
|
|
||||||
u'Regionaal West-Vlaanderen',
|
|
||||||
u'http://deredactie.be/cm/vrtnieuws/regio/westvlaanderen?mode=atom'
|
|
||||||
)
|
|
||||||
]
|
|
@ -1,34 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, NA'
|
|
||||||
'''
|
|
||||||
deadspin.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Deadspin(BasicNewsRecipe):
|
|
||||||
title = 'Deadspin'
|
|
||||||
__author__ = 'NA'
|
|
||||||
description = "Deadspin, Sports News without Access, Favor, or Discretion."
|
|
||||||
publisher = 'deadspin.com'
|
|
||||||
category = 'news, sports, meltdowns'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = True
|
|
||||||
language = 'en'
|
|
||||||
masthead_url = 'http://cache.gawkerassets.com/assets/deadspin.com/img/logo.png'
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
{'class': 'feedflare'},
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://deadspin.com/rss/vip')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,51 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.defensenews.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DefenseNews(BasicNewsRecipe):
|
|
||||||
title = 'Defense News'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Find late-breaking defense news from the leading defense news weekly'
|
|
||||||
publisher = 'Gannett Government Media Corporation'
|
|
||||||
oldest_article = 31
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'cp1252'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
ignore_duplicate_articles = {'url'}
|
|
||||||
masthead_url = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
|
||||||
img{margin-bottom: 0.4em; display:block}
|
|
||||||
.info{font-size: small; color: gray}
|
|
||||||
"""
|
|
||||||
remove_attributes = ['style', 'lang']
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'class': ['ody-hgroup', 'ody-article']}),
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['meta', 'link']),
|
|
||||||
dict(attrs={'class': ['toolbar', 'toolsShareWrap', 'ody-bo-sm ',
|
|
||||||
'ody-comments', 'ody-related-links', 'left', 'right']}),
|
|
||||||
dict(id=['factsMore', 'ody-nextstoryslider']),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Europe', u'http://www.defensenews.com/rss/europe'),
|
|
||||||
(u'Americas', u'http://www.defensenews.com/rss/americas'),
|
|
||||||
(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asia-pacific-rim'),
|
|
||||||
(u'Middle east & Africa', u'http://www.defensenews.com/rss/middle-east-africa'),
|
|
||||||
(u'Air', u'http://www.defensenews.com/rss/air-warfare'),
|
|
||||||
(u'Land', u'http://www.defensenews.com/rss/land-warfare'),
|
|
||||||
(u'Naval', u'http://www.defensenews.com/rss/naval-warfare')
|
|
||||||
]
|
|
@ -1,24 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class HindustanTimes(BasicNewsRecipe):
|
|
||||||
title = u'Delcoe Times'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'Krittika Goyal'
|
|
||||||
oldest_article = 1 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('News',
|
|
||||||
'http://www.delcotimes.com/?rss=news'),
|
|
||||||
('Sports',
|
|
||||||
'http://www.delcotimes.com/?rss=sports'),
|
|
||||||
('Business',
|
|
||||||
'http://business-news.thestreet.com/the-delaware-county-daily-times/rss/109393'),
|
|
||||||
('Entertainment',
|
|
||||||
'http://www.delcotimes.com/?rss=entertainment'),
|
|
||||||
]
|
|
@ -1,37 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class demagogRecipe(BasicNewsRecipe):
|
|
||||||
__author__ = 'bubak'
|
|
||||||
title = u'Demagog.cz'
|
|
||||||
publisher = u''
|
|
||||||
description = 'demagog.cz'
|
|
||||||
oldest_article = 6
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Aktuality', u'http://demagog.cz/rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
language = 'cs'
|
|
||||||
cover_url = 'http://demagog.cz/content/images/demagog.cz.png'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
extra_css = """
|
|
||||||
.vyrok_suhrn{margin-top:50px; }
|
|
||||||
.vyrok{margin-bottom:30px; }
|
|
||||||
"""
|
|
||||||
|
|
||||||
remove_tags = [dict(name='a', attrs={'class': 'vyrok_odovodnenie_tgl'}),
|
|
||||||
dict(name='img', attrs={'class': 'vyrok_fotografia'})]
|
|
||||||
remove_tags_before = dict(name='h1')
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'vyrok_text_after'})
|
|
||||||
preprocess_regexps = [(re.compile(
|
|
||||||
r'(<div class="vyrok_suhrn">)', re.DOTALL | re.IGNORECASE), lambda match: '\1<hr>')]
|
|
@ -1,29 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
'''
|
|
||||||
descopera.org
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Descopera(BasicNewsRecipe):
|
|
||||||
title = u'Descoperă.org'
|
|
||||||
__author__ = 'Marius Ignătescu'
|
|
||||||
description = 'Descoperă. Placerea de a cunoaște'
|
|
||||||
publisher = 'descopera.org'
|
|
||||||
category = 'science, technology, culture, history, earth'
|
|
||||||
language = 'ro'
|
|
||||||
oldest_article = 14
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
encoding = 'utf8'
|
|
||||||
no_stylesheets = True
|
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': ['post']})]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': [
|
|
||||||
'topnav', 'box_a', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-knowledge']})]
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
cover_url = 'http://www.descopera.org/wp-content/themes/dorg/styles/default/img/b_top.png?width=400'
|
|
||||||
feeds = [(u'Articles', u'http://www.descopera.org/feed/')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,74 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1297291961(BasicNewsRecipe):
|
|
||||||
title = u'Detroit News'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'DTM'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
no_stylesheets = True
|
|
||||||
conversion_options = {
|
|
||||||
'linearize_tables': True,
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Headlines', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss&mime=xml'),
|
|
||||||
(u'Nation/World', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss09&mime=xml'),
|
|
||||||
(u'Metro/State', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss36&mime=xml'),
|
|
||||||
(u'Wayne County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss01&mime=xml'),
|
|
||||||
(u'Oakland County',
|
|
||||||
u'http://www.detnews.com/apps/pbcs.dll/section?category=rss02&mime=xml'),
|
|
||||||
(u'Macomb County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss03&mime=xml'),
|
|
||||||
(u'Livingston County',
|
|
||||||
u'http://detnews.com/apps/pbcs.dll/section?category=rss04&mime=xml'),
|
|
||||||
(u'Politics/Government',
|
|
||||||
u'http://www.detnews.com/apps/pbcs.dll/section?category=rss10&mime=xml'),
|
|
||||||
(u'Editorials', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss07&mime=xml'),
|
|
||||||
(u'Columnists', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss43&mime=xml'),
|
|
||||||
(u'Charlie LeDuff',
|
|
||||||
u'http://detnews.com/apps/pbcs.dll/section?category=rss54&mime=xml'),
|
|
||||||
(u'Religion', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss11&mime=xml'),
|
|
||||||
(u'Technology', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss12&mime=xml'),
|
|
||||||
(u'Commuting', u'http://detnews.com/apps/pbcs.dll/section?category=rss05&mime=xml'),
|
|
||||||
(u'Schools', u'http://detnews.com/apps/pbcs.dll/section?category=rss06&mime=xml'),
|
|
||||||
(u'Obituaries', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss08&mime=xml'),
|
|
||||||
(u'Autos Insider', u'http://detnews.com/apps/pbcs.dll/section?category=rss25&mime=xml'),
|
|
||||||
(u'Drive', u'http://detnews.com/apps/pbcs.dll/section?category=rss26&mime=xml'),
|
|
||||||
(u'Business', u'http://detnews.com/apps/pbcs.dll/section?category=rss21&mime=xml'),
|
|
||||||
(u'Personal Finance',
|
|
||||||
u'http://detnews.com/apps/pbcs.dll/section?category=rss23&mime=xml'),
|
|
||||||
(u'Real Estate', u'http://detnews.com/apps/pbcs.dll/section?category=rss24&mime=xml'),
|
|
||||||
(u'Movies', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss28&mime=xml'),
|
|
||||||
(u'TV', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss40&mime=xml'),
|
|
||||||
(u'Music/Nightlife',
|
|
||||||
u'http://www.detnews.com/apps/pbcs.dll/section?category=rss30&mime=xml'),
|
|
||||||
(u'Celebrities', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss51&mime=xml'),
|
|
||||||
(u'The Arts', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss27&mime=xml'),
|
|
||||||
(u'Food', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss29&mime=xml'),
|
|
||||||
(u'Homestyle', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss31&mime=xml'),
|
|
||||||
(u'The Green Life',
|
|
||||||
u'http://www.detnews.com/apps/pbcs.dll/section?category=rss53&mime=xml'),
|
|
||||||
(u'Lifestyle', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss32&mime=xml'),
|
|
||||||
(u'Health', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss34&mime=xml'),
|
|
||||||
(u'Travel', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss52&mime=xml'),
|
|
||||||
(u'Advice', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss50&mime=xml'),
|
|
||||||
(u'Pistons', u'http://detnews.com/apps/pbcs.dll/section?category=rss13&mime=xml'),
|
|
||||||
(u'Lions', u'http://detnews.com/apps/pbcs.dll/section?category=rss14&mime=xml'),
|
|
||||||
(u'Tigers', u'http://detnews.com/apps/pbcs.dll/section?category=rss15&mime=xml'),
|
|
||||||
(u'Red Wings', u'http://detnews.com/apps/pbcs.dll/section?category=rss16&mime=xml'),
|
|
||||||
(u'Michigan State',
|
|
||||||
u'http://detnews.com/apps/pbcs.dll/section?category=rss18&mime=xml'),
|
|
||||||
(u'University of Michigan',
|
|
||||||
u'http://detnews.com/apps/pbcs.dll/section?category=rss17&mime=xml'),
|
|
||||||
(u'Motor Sports', u'http://detnews.com/apps/pbcs.dll/section?category=rss20&mime=xml'),
|
|
||||||
(u'Golf', u'http://detnews.com/apps/pbcs.dll/section?category=rss47&mime=xml'),
|
|
||||||
(u'Outdoors', u'http://detnews.com/apps/pbcs.dll/section?category=rss19&mime=xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
p = re.compile(r'(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*')
|
|
||||||
m = p.search(url)
|
|
||||||
return url.replace(m.group(), '&template=printart')
|
|
@ -1,44 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = 'Ruben Pollan <meskio@sindominio.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1335657507(BasicNewsRecipe):
|
|
||||||
title = u'diagonal'
|
|
||||||
__author__ = 'Ruben Pollan'
|
|
||||||
description = 'Periodico quincenal de actualidad critica'
|
|
||||||
language = 'es'
|
|
||||||
|
|
||||||
oldest_article = 15
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
cover_url = u'http://diagonalperiodico.net/IMG/siteon0.jpg'
|
|
||||||
|
|
||||||
feeds = [(u'Panorama', u'http://diagonalperiodico.net/-Panorama-.html?page=backend'),
|
|
||||||
(u'Global', u'http://diagonalperiodico.net/-Global,104-.html?page=backend'),
|
|
||||||
(u'Fotonoticia - Galería',
|
|
||||||
u'http://diagonalperiodico.net/-Fotonoticia-Galeria-.html?page=backend'),
|
|
||||||
(u'Libertades y Derechos',
|
|
||||||
u'http://diagonalperiodico.net/-Libertades-y-Derechos,77-.html?page=backend'),
|
|
||||||
(u'Saberes', u'http://diagonalperiodico.net/-Saberes,78-.html?page=backend'),
|
|
||||||
(u'En movimiento',
|
|
||||||
u'http://diagonalperiodico.net/-En-movimiento-.html?page=backend'),
|
|
||||||
(u'Culturas', u'http://diagonalperiodico.net/-Culturas,89-.html?page=backend'),
|
|
||||||
(u'Cuerpo', u'http://diagonalperiodico.net/-Cuerpo,99-.html?page=backend'),
|
|
||||||
(u'La plaza', u'http://diagonalperiodico.net/-La-plaza-.html?page=backend'),
|
|
||||||
(u'Enfoques', u'http://diagonalperiodico.net/-Enfoques,106-.html?page=backend'),
|
|
||||||
(u'Humor - Galería',
|
|
||||||
u'http://diagonalperiodico.net/-Humor-Galeria-.html?page=backend'),
|
|
||||||
(u'Entrevistas digitales',
|
|
||||||
u'http://diagonalperiodico.net/-Entrevistas-Digitales-.html?page=backend'),
|
|
||||||
(u'Cartas a diagonal',
|
|
||||||
u'http://diagonalperiodico.net/-Cartas-a-Diagonal-.html?page=backend'),
|
|
||||||
(u'Blogs', u'http://diagonalperiodico.net/-Blogs-.html?page=backend')]
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
link = article.get('link')
|
|
||||||
return 'http://diagonalperiodico.net/' + link
|
|
@ -1,83 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
diariocordoba.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Diariosur(BasicNewsRecipe):
|
|
||||||
title = u'Diario Cordoba'
|
|
||||||
__author__ = u'Francisco'
|
|
||||||
description = u'News Cordoba'
|
|
||||||
oldest_article = 5
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
delay = 0
|
|
||||||
timeout = 120
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'UTF-8'
|
|
||||||
remove_javascript = True
|
|
||||||
language = 'es'
|
|
||||||
extra_css = 'body{font-family: Arial,Helvetica,sans-serif}'
|
|
||||||
|
|
||||||
remove_attributes = ['height', 'width']
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'contenidos'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'Recorte'}),
|
|
||||||
dict(name='div', attrs={'id': 'comentarios'}),
|
|
||||||
dict(name='div', attrs={'id': 'CajaAccesoCuentaUsuario'}),
|
|
||||||
dict(name='div', attrs={'id': 'cajacomparte'}),
|
|
||||||
dict(name='div', attrs={'class': 'FormularioDeAcceso'}),
|
|
||||||
dict(name='div', attrs={'class': 'TextoFormularioDeAcceso'}),
|
|
||||||
dict(name='div', attrs={'class': 'Recorte'}),
|
|
||||||
dict(name='div', attrs={'id': 'IframeCajaComparte'}),
|
|
||||||
dict(name='div', attrs={'id': 'CintilloComentario'}),
|
|
||||||
dict(name='div', attrs={'id': 'EscribeComentario'}),
|
|
||||||
dict(name='div', attrs={'class': 'Nota'}),
|
|
||||||
dict(name='div', attrs={'id': 'FormularioComentario'}),
|
|
||||||
dict(name='div', attrs={'id': 'Comparte'}),
|
|
||||||
dict(name='iframe', attrs={'id': 'IframeCajaComparte'}),
|
|
||||||
dict(name='ul', attrs={'class': 'herramientasDeNoticia'}),
|
|
||||||
dict(name='div', attrs={'id': 'NoticiaEnPapel'}),
|
|
||||||
dict(name='div', attrs={'class': 'navegaNoticias'}),
|
|
||||||
dict(name='p', attrs={'class': 'RecorteEnNoticias'}),
|
|
||||||
dict(name='ul', attrs={'class': 'herramientasDeNoticia'}),
|
|
||||||
dict(name='div', attrs={'class': 'navegaNoticias'}),
|
|
||||||
dict(name='div', attrs={'id': 'CajaComparte'}),
|
|
||||||
dict(name='div', attrs={'id': 'Comparte'})
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description, '--base-font-size', '6', '--category', 'news, Spain', '--ignore-tables'
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Ultima Hora', 'http://www.diariocordoba.com/rss/ultimahora.xml'),
|
|
||||||
(u'Tema del Dia', 'http://www.diariocordoba.com/rss/106.xml'),
|
|
||||||
(u'Local', 'http://www.diariocordoba.com/rss/101.xml'),
|
|
||||||
(u'Provincia', 'http://www.diariocordoba.com/rss/102.xml'),
|
|
||||||
(u'Andalucia', 'http://www.diariocordoba.com/rss/1.xml'),
|
|
||||||
(u'Opinion', 'http://www.diariocordoba.com/rss/100.xml'),
|
|
||||||
(u'Deportes', 'http://www.diariocordoba.com/rss/4.xml'),
|
|
||||||
(u'Espa\xc3\xb1a', 'http://www.diariocordoba.com/rss/7.xml'),
|
|
||||||
(u'Internacional', 'http://www.diariocordoba.com/rss/6.xml'),
|
|
||||||
(u'Economia', 'http://www.diariocordoba.com/rss/5.xml'),
|
|
||||||
(u'Cultura', 'http://www.diariocordoba.com/rss/3.xml'),
|
|
||||||
(u'Sociedad', 'http://www.diariocordoba.com/rss/103.xml'),
|
|
||||||
(u'Gente', 'http://www.diariocordoba.com/rss/204.xml'),
|
|
||||||
(u'Noticias Curiosas', 'http://www.diariocordoba.com/rss/205.xml'),
|
|
||||||
(u'Tecnologia', 'http://www.diariocordoba.com/rss/206.xml')
|
|
||||||
|
|
||||||
|
|
||||||
]
|
|
@ -1,25 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8
|
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1365070687(BasicNewsRecipe):
|
|
||||||
title = 'Diário de Notícias'
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'pt'
|
|
||||||
__author__ = 'Jose Pinto'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'cln-esqmid'})]
|
|
||||||
remove_tags = [dict(name='table', attrs={'class': 'TabFerramentasInf'})]
|
|
||||||
|
|
||||||
feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
|
|
||||||
(u'Globo', u'http://feeds.dn.pt/DN-Globo'),
|
|
||||||
(u'Economia', u'http://feeds.dn.pt/DN-Economia'),
|
|
||||||
(u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
|
|
||||||
(u'Artes', u'http://feeds.dn.pt/DN-Artes'),
|
|
||||||
(u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
|
|
||||||
(u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
|
|
||||||
(u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
|
|
||||||
]
|
|
@ -1,47 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class goonews(BasicNewsRecipe):
|
|
||||||
__author__ = 'Douglas Delgado'
|
|
||||||
title = u'Diario Extra'
|
|
||||||
publisher = 'Sociedad Periodistica Extra Limitada'
|
|
||||||
description = 'Diario de circulacion nacional de Costa Rica.'
|
|
||||||
category = 'Spanish, Entertainment'
|
|
||||||
masthead_url = 'http://www.diarioextra.com/img/apariencia/logo.png'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
delay = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'es_CR'
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
feeds = [(u'Nacionales',
|
|
||||||
u'http://www.diarioextra.com/includes/rss_text.php?id=1'),
|
|
||||||
(u'Internacionales',
|
|
||||||
u'http://www.diarioextra.com/includes/rss_text.php?id=2'),
|
|
||||||
(u'Sucesos',
|
|
||||||
u'http://www.diarioextra.com/includes/rss_text.php?id=3'),
|
|
||||||
(u'Deportes',
|
|
||||||
u'http://www.diarioextra.com/includes/rss_text.php?id=6'),
|
|
||||||
(u'Espectaculos',
|
|
||||||
u'http://www.diarioextra.com/includes/rss_text.php?id=7'),
|
|
||||||
(u'Opinion',
|
|
||||||
u'http://www.diarioextra.com/includes/rss_text.php?id=4')]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
index = 'http://kiosko.net/cr/np/cr_extra.html'
|
|
||||||
soup = self.index_to_soup(index)
|
|
||||||
for image in soup.findAll('img', src=True):
|
|
||||||
if image['src'].endswith('cr_extra.750.jpg'):
|
|
||||||
return image['src']
|
|
||||||
return None
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
|
|
||||||
'''
|
|
@ -1,13 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1317341449(BasicNewsRecipe):
|
|
||||||
title = u'Diario La Republica'
|
|
||||||
__author__ = 'CAVALENCIA'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'es_CO'
|
|
||||||
|
|
||||||
feeds = [(u'Diario La Republica',
|
|
||||||
u'http://www.larepublica.com.co/rss/larepublica.xml')]
|
|
@ -1,62 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
|
||||||
|
|
||||||
|
|
||||||
class Digit(BasicNewsRecipe):
|
|
||||||
title = u'Digit Magazine'
|
|
||||||
description = 'Digit caters to the largest community of tech buyers, users and enthusiasts in India.'
|
|
||||||
language = 'en_IN'
|
|
||||||
__author__ = 'unkn0wn'
|
|
||||||
oldest_article = 30 # days
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
masthead_url = 'https://www.digit.in/images/digit_logo.png'
|
|
||||||
remove_attributes = ['style', 'height', 'width']
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup(
|
|
||||||
'https://store.digit.in/cart.php?category_id=139&year='
|
|
||||||
)
|
|
||||||
tag = soup.find(attrs={'class': 'previous-magazines'})
|
|
||||||
if tag:
|
|
||||||
self.cover_url = tag.find('img')['src']
|
|
||||||
return super().get_cover_url()
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
classes(
|
|
||||||
'big_img_container highlights_cont Top-sponsered Text-sponsered heading-wraper article_video'
|
|
||||||
'article-inside-container skoar_desc New-desk pros-Cons Review-reting For-table col-md-7'
|
|
||||||
'review-inside-container price_wrap key_specifications'
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
classes(
|
|
||||||
'adsAdvert Video-wraper article_share auth_social breadcrumbwrap textads_list rel_articles_container'
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Features', 'http://feeds.feedburner.com/digit/latest-features'),
|
|
||||||
('Reviews', 'http://feeds.feedburner.com/digit/latest-review'),
|
|
||||||
('Laptops', 'https://feeds.feedburner.com/digit/latest-laptops'),
|
|
||||||
('PC Components', 'https://feeds.feedburner.com/digit/latest-pc-components'),
|
|
||||||
('Tablets', 'https://feeds.feedburner.com/digit/latest-tablets'),
|
|
||||||
('TVs', 'https://feeds.feedburner.com/digit/latest-tvs'),
|
|
||||||
(
|
|
||||||
'Wearable devices',
|
|
||||||
'https://feeds.feedburner.com/digit/latest-wearable-devices'
|
|
||||||
),
|
|
||||||
('How-to', 'https://feeds.feedburner.com/digit/how-to'),
|
|
||||||
('Entertainment', 'https://feeds.feedburner.com/digit/latest-entertainment'),
|
|
||||||
('Gaming', 'http://feeds.feedburner.com/digit/latest-gaming'),
|
|
||||||
('Software', 'https://feeds.feedburner.com/digit/latest-software'),
|
|
||||||
('Audio-Video', 'https://feeds.feedburner.com/digit/latest-audio-video'),
|
|
||||||
# ('Apps', 'https://feeds.feedburner.com/digit/latest-apps'),
|
|
||||||
# ('Mobile Phones', 'https://feeds.feedburner.com/digit/latest-mobile-phones'),
|
|
||||||
# For more : https://www.digit.in/rss-feed/
|
|
||||||
]
|
|
@ -1,59 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Lorenzo Vigentini'
|
|
||||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
|
||||||
__version__ = 'v1.01'
|
|
||||||
__date__ = '14, January 2010'
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://media.digitalartsonline.co.uk/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
temp_files = []
|
|
||||||
articles_are_obfuscated = True
|
|
||||||
|
|
||||||
|
|
||||||
class digiArts(BasicNewsRecipe):
|
|
||||||
__author__ = 'Lorenzo Vigentini'
|
|
||||||
description = ('Digital Arts - comprehensive coverage of the art of '
|
|
||||||
'graphic design, 3D, animation, video, effects, web and '
|
|
||||||
'interactive design, in print and online.') # noqa
|
|
||||||
cover_url = 'http://media.digitalartsonline.co.uk/graphics/logo_digital_arts.gif'
|
|
||||||
|
|
||||||
title = 'Digital Arts Magazine '
|
|
||||||
publisher = 'IDG Communication'
|
|
||||||
category = ('Multimedia, photo, video, computing, product reviews, '
|
|
||||||
'editing, cameras, production')
|
|
||||||
|
|
||||||
language = 'en'
|
|
||||||
encoding = 'cp1252'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
|
|
||||||
oldest_article = 30
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
recursion = 10
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = False
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h1', attrs={'itemprop': 'headline'}),
|
|
||||||
dict(name='span', attrs={'itemprop': 'author'}),
|
|
||||||
dict(name='section', attrs={'class': 'articleBody'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Feed are found here: http://www.digitalartsonline.co.uk/rss/
|
|
||||||
feeds = [
|
|
||||||
('Latest News Articles',
|
|
||||||
'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-news.xml'),
|
|
||||||
('Latest Tutorials',
|
|
||||||
'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-tutorials.xml'),
|
|
||||||
('Latest Reviews',
|
|
||||||
'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-reviews.xml'),
|
|
||||||
('Latest Features',
|
|
||||||
'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-features.xml'),
|
|
||||||
]
|
|
@ -1,51 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
digitaljournal.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DigitalJournal(BasicNewsRecipe):
|
|
||||||
title = 'Digital Journal'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'A Global Citizen Journalism News Network'
|
|
||||||
category = 'news, politics, USA, world'
|
|
||||||
publisher = 'Digital Journal'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf8'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(id='article_text'),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(attrs={'class': lambda x: x and 'article-top-social' in x}),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Latest News', u'http://digitaljournal.com/rss/?feed=latest_news'),
|
|
||||||
(u'Business', u'http://digitaljournal.com/rss/?feed=top_news&depname=Business'),
|
|
||||||
(u'Entertainment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Entertainment'),
|
|
||||||
(u'Environment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Environment'),
|
|
||||||
(u'Food', u'http://digitaljournal.com/rss/?feed=top_news&depname=Food'),
|
|
||||||
(u'Health', u'http://digitaljournal.com/rss/?feed=top_news&depname=Health'),
|
|
||||||
(u'Internet', u'http://digitaljournal.com/rss/?feed=top_news&depname=Internet'),
|
|
||||||
(u'Politics', u'http://digitaljournal.com/rss/?feed=top_news&depname=Politics'),
|
|
||||||
(u'Religion', u'http://digitaljournal.com/rss/?feed=top_news&depname=Religion'),
|
|
||||||
(u'Science', u'http://digitaljournal.com/rss/?feed=top_news&depname=Science'),
|
|
||||||
(u'Sports', u'http://digitaljournal.com/rss/?feed=top_news&depname=Sports'),
|
|
||||||
(u'Technology', u'http://digitaljournal.com/rss/?feed=top_news&depname=Technology'),
|
|
||||||
(u'World', u'http://digitaljournal.com/rss/?feed=top_news&depname=World'),
|
|
||||||
(u'Arts', u'http://digitaljournal.com/rss/?feed=top_news&depname=Arts')
|
|
||||||
]
|
|
@ -1,41 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.digitalspy.co.uk
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DigitalSpyUK(BasicNewsRecipe):
|
|
||||||
title = 'Digital Spy - UK Edition'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
|
|
||||||
publisher = 'Digital Spy Limited.'
|
|
||||||
category = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'cp1252'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en_GB'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [dict(name=['link'])]
|
|
||||||
remove_attributes = ['height', 'width']
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'content'})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'News', u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml'),
|
|
||||||
(u'Big Brother', u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml'),
|
|
||||||
(u'Entertainment', u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml'),
|
|
||||||
(u'General', u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml'),
|
|
||||||
(u'Media', u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml')
|
|
||||||
]
|
|
@ -1,81 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
|
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Dilbert(BasicNewsRecipe):
|
|
||||||
title = u'Dilbert'
|
|
||||||
__author__ = 'TechnoCat'
|
|
||||||
description = 'Dilbert, by Scott Adams. Includes last three or so comics and blog entries.'
|
|
||||||
cover_url = 'http://dilbert.com/assets/dilbert-logo-4152bd0c31f7de7443b4bc90abd818da.png'
|
|
||||||
auto_cleanup = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
language = 'en'
|
|
||||||
needs_subscription = False
|
|
||||||
no_stylesheets = True
|
|
||||||
oldest_article = 7
|
|
||||||
remove_javascript = True
|
|
||||||
recursions = 0
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
debugMessages = True
|
|
||||||
BASE_URL = 'http://dilbert.com' # Note no www.
|
|
||||||
COMIC_DIV_TAG = 'img-comic-container'
|
|
||||||
BLOG_DIV_TAG = 'media'
|
|
||||||
tempfiles = []
|
|
||||||
|
|
||||||
# Creates a temp file for the wrapped image url
|
|
||||||
def writeImage(self, title, imageURL) :
|
|
||||||
tempFile = tempfile.NamedTemporaryFile(delete=False)
|
|
||||||
self.tempfiles.append(tempFile)
|
|
||||||
tempFile.write('<html><head><title>'+title+'</title></head><body>')
|
|
||||||
tempFile.write(imageURL.prettify())
|
|
||||||
tempFile.write('</body></html>')
|
|
||||||
tempFile.flush()
|
|
||||||
tempFile.close()
|
|
||||||
return tempFile.name
|
|
||||||
|
|
||||||
def cleanUpTempFiles(self):
|
|
||||||
for tempFile in self.tempfiles:
|
|
||||||
tempFile.close()
|
|
||||||
os.unlink(tempFile.name)
|
|
||||||
|
|
||||||
def cleanup(self):
|
|
||||||
self.cleanUpTempFiles()
|
|
||||||
|
|
||||||
# Extract comic links from the soup
|
|
||||||
# Returns a list of comics (articles) as:
|
|
||||||
# {
|
|
||||||
# 'title' : article title,
|
|
||||||
# 'url' : URL of print version,
|
|
||||||
# 'date' : The publication date of the article as a string,
|
|
||||||
# 'description' : A summary of the article
|
|
||||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
|
||||||
# }
|
|
||||||
def comicFeed(self, soup) :
|
|
||||||
feedset = []
|
|
||||||
for comicContainer in soup.findAll('div', {'class': self.COMIC_DIV_TAG}) :
|
|
||||||
comic = comicContainer.find('img')
|
|
||||||
if comic is not None:
|
|
||||||
filelink = self.writeImage(comic['alt'], comic)
|
|
||||||
feedset.append(
|
|
||||||
dict(title=comic['alt'], url='file://'+filelink, description=comic['alt'], content=''))
|
|
||||||
return feedset
|
|
||||||
|
|
||||||
def blogFeed(self, soup) :
|
|
||||||
feedset = []
|
|
||||||
for blogContainer in soup.findAll('div', {'class': self.BLOG_DIV_TAG}) :
|
|
||||||
blog = blogContainer.find('a', {'class':'link-blended'})
|
|
||||||
if blog is not None:
|
|
||||||
feedset.append(
|
|
||||||
dict(title=blog['title'], url=blog['href'], description=blog['title'], content=''))
|
|
||||||
return feedset
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
root = self.index_to_soup(self.BASE_URL)
|
|
||||||
comics = self.comicFeed(root)
|
|
||||||
blogs = self.blogFeed(root)
|
|
||||||
return [('Comics', comics), ('Blog Entries', blogs)]
|
|
@ -1,149 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2014-01-09, Silviu Cotoar\u0103, Marius Popescu'
|
|
||||||
'''
|
|
||||||
dilemaveche.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DilemaVeche(BasicNewsRecipe):
|
|
||||||
# apare vinerea, mai pe dupa-masa,depinde de Luiza cred (care se semneaza
|
|
||||||
# ca fiind creatorul fiecarui articol in feed-ul RSS)
|
|
||||||
title = u'Dilema Veche'
|
|
||||||
# inspirat din scriptul pentru Le Monde. Inspired from the Le Monde script
|
|
||||||
__author__ = 'song2'
|
|
||||||
description = '"Sint vechi, domnule!" (I.L. Caragiale)'
|
|
||||||
publisher = 'Adevarul Holding'
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 150
|
|
||||||
encoding = 'utf-8'
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
masthead_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
|
|
||||||
needs_subscription = True
|
|
||||||
use_embedded_content = False
|
|
||||||
publication_type = 'magazine'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
feeds = [
|
|
||||||
('Editoriale si opinii - Situatiunea',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/37/0/feed'),
|
|
||||||
('Editoriale si opinii - Pe ce lume traim',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/38/0/feed'),
|
|
||||||
('Editoriale si opinii - Bordeie si obiceie',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
|
|
||||||
('Editoriale si opinii - Talc Show',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/39/0/feed'),
|
|
||||||
('Tema saptamanii', 'http://www.dilemaveche.ro/taxonomy/term/19/0/feed'),
|
|
||||||
('La zi in cultura - Dilema va recomanda',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/58/0/feed'),
|
|
||||||
('La zi in cultura - Carte',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/14/0/feed'),
|
|
||||||
('La zi in cultura - Film',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/13/0/feed'),
|
|
||||||
('La zi in cultura - Muzica',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/1341/0/feed'),
|
|
||||||
('La zi in cultura - Arte performative',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/1342/0/feed'),
|
|
||||||
('La zi in cultura - Arte vizuale',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/1512/0/feed'),
|
|
||||||
('Societate - Ieri cu vedere spre azi',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/15/0/feed'),
|
|
||||||
('Societate - Din polul opus',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/41/0/feed'),
|
|
||||||
('Societate - Mass comedia',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/43/0/feed'),
|
|
||||||
('Societate - La singular si la plural',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/42/0/feed'),
|
|
||||||
('Oameni si idei - Educatie',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
|
|
||||||
('Oameni si idei - Polemici si dezbateri',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/48/0/feed'),
|
|
||||||
('Oameni si idei - Stiinta si tehnologie',
|
|
||||||
'http://www.dilemaveche.ro/taxonomy/term/47/0/feed'),
|
|
||||||
# online only articles
|
|
||||||
('Dileme on-line', 'http://www.dilemaveche.ro/taxonomy/term/5/0/feed'),
|
|
||||||
# once per month, 6-7 day of the month
|
|
||||||
('Dilemateca', 'http://dilemaveche.ro/taxonomy/term/21/0/feed'),
|
|
||||||
# children, once-twice per year
|
|
||||||
('Dilematix', 'http://dilemaveche.ro/taxonomy/term/20/0/feed'),
|
|
||||||
('Dilema Studiilor Postuniversitare',
|
|
||||||
'http://dilemaveche.ro/taxonomy/term/1635/0/feed') # once per year, July
|
|
||||||
]
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class': 'spacer_10'})
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'id': ['adshop_widget_428x60']}),
|
|
||||||
dict(name='div', attrs={'id': ['gallery']}),
|
|
||||||
dict(name='div', attrs={'class': ['art_related_left']}),
|
|
||||||
dict(name='a', attrs={'class': ['prevPage']}),
|
|
||||||
dict(name='a', attrs={'class': ['nextPage']}),
|
|
||||||
dict(name='div', attrs={'class': ['article_details']}),
|
|
||||||
dict(name='div', attrs={'id': ['comments']}),
|
|
||||||
dict(name='ul', attrs={'class': ['social-buttons-list']}),
|
|
||||||
dict(name='a', attrs={'class': ['editie']}),
|
|
||||||
dict(name='div', attrs={'class': 'simple_overlay'}),
|
|
||||||
dict(name='div', attrs={'class': 'c_right_column'}),
|
|
||||||
dict(name='div', attrs={'id': 'content_right'}),
|
|
||||||
dict(name='div', attrs={'class': 'box_shadow_top'}),
|
|
||||||
dict(name='div', attrs={'class': 'box_shadow_bottom'}),
|
|
||||||
dict(name='div', attrs={'id': ['footer']}),
|
|
||||||
dict(name='div', attrs={'class': ['clear spacer_20']}),
|
|
||||||
dict(name='div', attrs={'id': ['adh-footer']}),
|
|
||||||
dict(name='div', attrs={'id': ['skyright']}),
|
|
||||||
dict(name='div', attrs={'id': ['closure']})
|
|
||||||
]
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'id': ['adshop_widget_428x60']})
|
|
||||||
]
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Georgia,Times,serif }
|
|
||||||
img{margin-bottom: 0.4em; display:block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_browser(self):
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
if self.username is not None and self.password is not None:
|
|
||||||
br.open(
|
|
||||||
'http://pay.dilemaveche.ro/autentificare/?redirect=http%3A%2F%2Fdilemaveche.ro%2F%2F&return=true')
|
|
||||||
br.select_form(nr=0)
|
|
||||||
br['username'] = self.username
|
|
||||||
br['password'] = self.password
|
|
||||||
br.submit()
|
|
||||||
return br
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
# small, from the current number article: http://dilemaveche.ro/sites/default/files/imagecache/articol_teaser/DV517web-1_copy.JPG
|
|
||||||
# medium, from the homepage PDF link: http://dilemaveche.ro/sites/default/files/imagecache/editie_small/DV517web-1_copy_0.JPG
|
|
||||||
# big, from the current number article, click on the samll image:
|
|
||||||
# http://dilemaveche.ro/sites/default/files/imagecache/image_gallery_large/DV517web-1_copy.JPG
|
|
||||||
cover_url = None
|
|
||||||
soup = self.index_to_soup('http://dilemaveche.ro')
|
|
||||||
link_item = soup.find('div', attrs={'class': 'box_dr_pdf_picture'})
|
|
||||||
if link_item and link_item.a:
|
|
||||||
cover_url = link_item.a['href']
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
try:
|
|
||||||
br.open(cover_url)
|
|
||||||
except: # daca nu gaseste pdf-ul
|
|
||||||
self.log("\nPDF indisponibil")
|
|
||||||
link_item = soup.find('div', attrs={'class': 'box_dr_pdf_picture'})
|
|
||||||
if link_item and link_item.img:
|
|
||||||
cover_url = link_item.img['src']
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
try:
|
|
||||||
br.open(cover_url)
|
|
||||||
except: # daca nu gaseste nici imaginea mica mica
|
|
||||||
print('nu este nici pdf nici imagine')
|
|
||||||
cover_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
|
|
||||||
return cover_url
|
|
||||||
cover_margins = (10, 15, '#ffffff')
|
|
@ -1,25 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Dingoo(BasicNewsRecipe):
|
|
||||||
language = 'ru'
|
|
||||||
__author__ = 'bug_me_not'
|
|
||||||
title = u'Dingoo A320. \u0420\u0443\u0441\u0441\u043a\u0438\u0439 \u0440\u0435\u0441\u0443\u0440\u0441'
|
|
||||||
description = 'Портативная игровая консоль Dingoo A320 и другие необычные гаджеты'
|
|
||||||
publisher = 'Emulate.SU'
|
|
||||||
category = 'console'
|
|
||||||
cover_url = u'http://upload.wikimedia.org/wikipedia/commons/thumb/0/02/Dingoo_A320_White.jpg/300px-Dingoo_A320_White.jpg'
|
|
||||||
no_stylesheets = False
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
oldest_article = 20
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
|
|
||||||
feeds = [(u'A320', u'http://feeds.feedburner.com/ru_dingoo')]
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class': 'posttitle'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'article'})
|
|
||||||
remove_tags = [dict(name='iframe')]
|
|
@ -1,49 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
divahair.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DivaHair(BasicNewsRecipe):
|
|
||||||
title = u'Diva Hair'
|
|
||||||
language = 'ro'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = u'Coafuri, frizuri, tunsori ..'
|
|
||||||
publisher = u'Diva Hair'
|
|
||||||
category = u'Ziare,Stiri,Coafuri,Femei'
|
|
||||||
oldest_article = 5
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
remove_javascript = True
|
|
||||||
cover_url = 'http://www.divahair.ro/imgs/logo.jpg'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='td', attrs={'class': 'spatiuart'}), dict(
|
|
||||||
name='div', attrs={'class': 'spatiuart'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'categorie'}), dict(name='div', attrs={
|
|
||||||
'class': 'gri gri2 detaliiart'}), dict(name='div', attrs={'class': 'articol_box_bottom'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'class': 'articol_box_bottom'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'\u0218tiri', u'http://www.divahair.ro/feed')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,26 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
DjurslandsPosten
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class DjurslandsPosten_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'DjurslandsPosten'
|
|
||||||
description = 'Lokale og regionale nyheder'
|
|
||||||
category = 'newspaper, news, localnews, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Nyheder', 'http://www.dinby.dk/djurslandsposten/rss'),
|
|
||||||
]
|
|
||||||
|
|
@ -1,65 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, BlonG'
|
|
||||||
'''
|
|
||||||
dnevnik.si
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Dnevnik(BasicNewsRecipe):
|
|
||||||
title = u'Dnevnik.si'
|
|
||||||
__author__ = u'BlonG'
|
|
||||||
description = u'''Dnevnik je \u010dasnik z ve\u010d kot polstoletno zgodovino.
|
|
||||||
Pod sloganom \xbb\u017divljenje ima besedo\xab na svojih straneh prina\u0161a
|
|
||||||
bralcem bogastvo informacij, komentarjev in kolumen in raznovrstnost
|
|
||||||
pogledov, zaznamovanih z odgovornostjo do posameznika in \u0161ir\u0161e
|
|
||||||
dru\u017ebe.'''
|
|
||||||
oldest_article = 3
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
language = 'sl'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
cover_url = 'https://sites.google.com/site/javno2010/home/dnevnik_cover.jpg'
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': '_iprom_inStream'}),
|
|
||||||
dict(name='div', attrs={'class': 'entry-content'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'fb_article_top'}),
|
|
||||||
dict(name='div', attrs={'class': 'related'}),
|
|
||||||
dict(name='div', attrs={'class': 'fb_article_foot'}),
|
|
||||||
dict(name='div', attrs={'class': 'spreading'}),
|
|
||||||
dict(name='dl', attrs={'class': 'ad'}),
|
|
||||||
dict(name='p', attrs={'class': 'report'}),
|
|
||||||
dict(name='div', attrs={'class': 'hfeed comments'}),
|
|
||||||
dict(name='dl', attrs={'id': 'entryPanel'}),
|
|
||||||
dict(name='dl', attrs={'class': 'infopush ip_wide'}),
|
|
||||||
dict(name='div', attrs={'class': 'sidebar'}),
|
|
||||||
dict(name='dl', attrs={'class': 'bottom'}),
|
|
||||||
dict(name='div', attrs={'id': 'footer'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Slovenija', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=13'),
|
|
||||||
(u'Svet', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=14'),
|
|
||||||
(u'EU', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=116'),
|
|
||||||
(u'Poslovni dnevnik', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=5'),
|
|
||||||
(u'Kronika', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=15'),
|
|
||||||
(u'Kultura', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=17'),
|
|
||||||
(u'Zdravje', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=18'),
|
|
||||||
(u'Znanost in IT', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=19'),
|
|
||||||
(u'(Ne)verjetno', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=20'),
|
|
||||||
(u'E-strada', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=21'),
|
|
||||||
(u'Svet vozil', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=22')
|
|
||||||
]
|
|
@ -1,106 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
__author__ = 'Darko Spasovski'
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
|
|
||||||
'''
|
|
||||||
dnevnik.com.mk
|
|
||||||
'''
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre import browser
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Dnevnik(BasicNewsRecipe):
|
|
||||||
|
|
||||||
INDEX = 'http://www.dnevnik.com.mk'
|
|
||||||
__author__ = 'Darko Spasovski'
|
|
||||||
title = 'Dnevnik - mk'
|
|
||||||
description = 'Daily Macedonian newspaper'
|
|
||||||
masthead_url = 'http://www.dnevnik.com.mk/images/re-logo.gif'
|
|
||||||
language = 'mk'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
category = 'news, Macedonia'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
|
||||||
[
|
|
||||||
# Remove anything before the start of the article.
|
|
||||||
(r'<body.*?<\?xml version=\"1.0\"\?><!--Article start-->', lambda match: '<body>'),
|
|
||||||
|
|
||||||
# Remove anything after the end of the article.
|
|
||||||
(r'<!--Article end.*?</body>', lambda match: '</body>'),
|
|
||||||
]
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif}
|
|
||||||
.WB_DNEVNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description,
|
|
||||||
'tags': category,
|
|
||||||
'language': language,
|
|
||||||
'linearize_tables': True
|
|
||||||
}
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
datum = datetime.datetime.today().strftime('%d.%m.%Y')
|
|
||||||
soup = self.index_to_soup(
|
|
||||||
self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
|
|
||||||
feeds = []
|
|
||||||
for section in soup.findAll('td', attrs={'class': 'WB_DNEVNIK_ArhivaFormTitle'}):
|
|
||||||
sectionTitle = section.contents[0].string
|
|
||||||
if sectionTitle.lower().startswith('online'):
|
|
||||||
# Skip online articles
|
|
||||||
continue
|
|
||||||
containerTable = section.findPrevious(
|
|
||||||
name='table').findNextSibling(name='table')
|
|
||||||
if containerTable is None:
|
|
||||||
print('No container table found - page layout may have been changed.')
|
|
||||||
continue
|
|
||||||
articles = []
|
|
||||||
for article in containerTable.findAll('a', attrs={'class': 'WB_DNEVNIK_ArhivaFormText'}):
|
|
||||||
title = self.tag_to_string(article, use_alt=True).strip()
|
|
||||||
articles.append({'title': title, 'url': 'http://www.dnevnik.com.mk/' +
|
|
||||||
article['href'], 'description': '', 'date': ''})
|
|
||||||
if articles:
|
|
||||||
feeds.append((sectionTitle, articles))
|
|
||||||
return sorted(feeds, key=lambda section: self.get_weight(section))
|
|
||||||
|
|
||||||
def get_weight(self, section):
|
|
||||||
"""
|
|
||||||
Returns 'weight' of a section.
|
|
||||||
Used for sorting the sections based on their 'natural' order in the printed edition.
|
|
||||||
"""
|
|
||||||
natural_order = {u'во фокусот': 1, u'актуелно': 2, u'економија': 3,
|
|
||||||
u'отворена': 4, u'свет': 5, u'интервју': 6, u'џубокс': 7,
|
|
||||||
u'репортажа': 8, u'наш туризам': 9, u'живот': 10,
|
|
||||||
u'автомобилизам': 11, u'спорт': 12, u'омнибус': 13}
|
|
||||||
if section[0].string.lower() in natural_order:
|
|
||||||
return natural_order[section[0].string.lower()]
|
|
||||||
else:
|
|
||||||
return 999 # section names not on the list go to the bottom
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
datum = datetime.datetime.today().strftime('%d.%m.%Y')
|
|
||||||
soup = self.index_to_soup(
|
|
||||||
self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
|
|
||||||
anchor = soup.find('a', attrs={'class': 'WB_DNEVNIK_MoreLink'})
|
|
||||||
if anchor is not None:
|
|
||||||
raw = browser().open_novisit(
|
|
||||||
self.INDEX + '/' + anchor['href']).read()
|
|
||||||
cover_soup = BeautifulSoup(raw)
|
|
||||||
url = cover_soup.find(
|
|
||||||
'div', attrs={'class': 'WB_DNEVNIK_Datum2'}).findNext('img')['src']
|
|
||||||
return self.INDEX + '/' + url
|
|
||||||
return ''
|
|
@ -1,43 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
dobanevinosti.blogspot.com
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DobaNevinosti(BasicNewsRecipe):
|
|
||||||
title = 'Doba Nevinosti'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Filmski blog'
|
|
||||||
oldest_article = 15
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'sr'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = True
|
|
||||||
publication_type = 'blog'
|
|
||||||
auto_cleanup = True
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
|
||||||
body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif}
|
|
||||||
img{margin-bottom: 0.8em; display:block;}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': 'film, blog, srbija, tv', 'publisher': 'Dimitrije Vojinov', 'language': language
|
|
||||||
}
|
|
||||||
remove_attributes = ['lang', 'border']
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
feeds = [(u'Tekstovi', u'http://dobanevinosti.blogspot.com/feeds/posts/default')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll('img', alt=False):
|
|
||||||
item['alt'] = 'image'
|
|
||||||
return soup
|
|
@ -1,60 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010-2012, NiLuJe <niluje at ak-team.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Fetch DoghouseDiaries.
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DoghouseDiaries(BasicNewsRecipe):
|
|
||||||
title = 'Doghouse Diaries'
|
|
||||||
description = 'A webcomic.'
|
|
||||||
__author__ = 'NiLuJe'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
use_embedded_content = False
|
|
||||||
# 14 comics per fetch (not really days... but we can't easily get the date
|
|
||||||
# of individual comics, short of parsing each one...)
|
|
||||||
oldest_article = 14
|
|
||||||
|
|
||||||
cover_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
|
|
||||||
masthead_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='img', attrs={'class': re.compile("comic-item*")}), dict(
|
|
||||||
name='h1'), dict(name='div', attrs={'class': 'entry'}), dict(name='p', id='alttext')]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'pin-it-btn-wrapper'}), dict(
|
|
||||||
name='span'), dict(name='div', id='wp_fb_like_button')]
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
# Turn image bubblehelp into a paragraph (NOTE: We run before the
|
|
||||||
# remove_tags cleanup, so we need to make sure we only parse the
|
|
||||||
# comic-item img, not the pinterest one pulled by the entry div)
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'(<img.*src="http://thedoghousediaries.com/comics/.*title=")([^"]+)(".*>)'),
|
|
||||||
lambda m: '%s%s<p id="alttext"><strong>%s</strong></p>' % (m.group(1), m.group(3), m.group(2)))
|
|
||||||
]
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
INDEX = 'http://www.thedoghousediaries.com/'
|
|
||||||
|
|
||||||
soup = self.index_to_soup(INDEX)
|
|
||||||
articles = []
|
|
||||||
# Since the feed sucks, and there's no real archive, we use the 'Quick
|
|
||||||
# Archive' thingie, but we can't get the date from here, so stop after
|
|
||||||
# 14 comics...
|
|
||||||
for item in soup.findAll('option', {}, True, None, self.oldest_article + 1):
|
|
||||||
# Skip the quick archive itself
|
|
||||||
if (item['value'] != '0'):
|
|
||||||
articles.append({
|
|
||||||
'title': self.tag_to_string(item).encode('UTF-8'),
|
|
||||||
'url': item['value'],
|
|
||||||
'description': '',
|
|
||||||
'content': '',
|
|
||||||
})
|
|
||||||
|
|
||||||
return [('Doghouse Diaries', articles)]
|
|
@ -1,44 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
|
||||||
q = frozenset(classes.split(' '))
|
|
||||||
return dict(attrs={
|
|
||||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
|
||||||
|
|
||||||
|
|
||||||
class stuffconz(BasicNewsRecipe):
|
|
||||||
title = u'stuff.co.nz'
|
|
||||||
language = 'en_NZ'
|
|
||||||
__author__ = 'Krittika Goyal'
|
|
||||||
oldest_article = 1 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
classes('sics-component__headline sics-component__byline sics-component__story')
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['meta', 'link', 'style']),
|
|
||||||
classes('sics-component__sharebar'),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_stylesheets = True
|
|
||||||
feeds = [
|
|
||||||
('Dominion Post',
|
|
||||||
'http://www.stuff.co.nz/rss/dominion-post'),
|
|
||||||
('National',
|
|
||||||
'http://www.stuff.co.nz/rss/national'),
|
|
||||||
('World',
|
|
||||||
'http://www.stuff.co.nz/rss/world'),
|
|
||||||
('Business',
|
|
||||||
'http://www.stuff.co.nz/rss/business'),
|
|
||||||
('Technology',
|
|
||||||
'http://www.stuff.co.nz/rss/technology'),
|
|
||||||
('Sport',
|
|
||||||
'http://www.stuff.co.nz/rss/sport'),
|
|
||||||
('Entertainment',
|
|
||||||
'http://www.stuff.co.nz/rss/entertainment'),
|
|
||||||
('Life and Style',
|
|
||||||
'http://www.stuff.co.nz/rss/life-style'),
|
|
||||||
|
|
||||||
]
|
|
@ -1,29 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
http://www.dosisdiarias.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class DosisDiarias(BasicNewsRecipe):
|
|
||||||
title = 'Alberto Montt en dosis diarias'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Mire sin compromiso y si le gusta vuelva'
|
|
||||||
oldest_article = 5
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
publisher = 'Alberto Montt'
|
|
||||||
category = 'comic, blog, spanish'
|
|
||||||
language = 'es'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'feedflare'})]
|
|
||||||
|
|
||||||
feeds = [(u'Dosis diaria', u'http://feeds.feedburner.com/montt')]
|
|
@ -1,25 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011-2011, Federico Escalada <fedeescalada at gmail.com>'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Dotpod(BasicNewsRecipe):
|
|
||||||
__author__ = 'Federico Escalada'
|
|
||||||
description = 'Tecnologia y Comunicacion Audiovisual'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'es'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
oldest_article = 7
|
|
||||||
publication_type = 'blog'
|
|
||||||
title = 'Dotpod'
|
|
||||||
authors = 'Federico Picone'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'authors': authors, 'comments': description, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [('Dotpod', 'http://www.dotpod.com.ar/feed/')]
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'feedflare'})]
|
|
@ -1,26 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
|
||||||
|
|
||||||
|
|
||||||
class My_Feeds(BasicNewsRecipe):
|
|
||||||
title = 'Down To Earth'
|
|
||||||
language = 'en_IN'
|
|
||||||
oldest_article = 20
|
|
||||||
__author__ = 'Amit'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
center_navbar = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
keep_only_tags = [
|
|
||||||
classes('detail-heading content-main news-basic-info news-banner news-detail-content')
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
classes('add-comment btn hindi_detail_link single-news-letter'),
|
|
||||||
dict(id=['comments', 'breadcrumb', 'node_related_stories']),
|
|
||||||
dict(attrs={'class': ['commentCount', 'box']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('All', 'https://www.downtoearth.org.in/rss/all'),
|
|
||||||
]
|
|
@ -1,36 +0,0 @@
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class drivelrycom(BasicNewsRecipe):
|
|
||||||
title = u'drivelry.com'
|
|
||||||
language = 'en'
|
|
||||||
description = 'A blog by Mike Abrahams'
|
|
||||||
__author__ = 'Krittika Goyal'
|
|
||||||
oldest_article = 60 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
|
|
||||||
remove_stylesheets = True
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id': 'bookmark'})
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='iframe'),
|
|
||||||
dict(name='div', attrs={'class': ['sidebar']}),
|
|
||||||
dict(name='div', attrs={'id': ['bookmark']}),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('drivelry.com',
|
|
||||||
'http://feeds.feedburner.com/drivelry'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
story = soup.find(name='div', attrs={'id': 'main'})
|
|
||||||
soup = BeautifulSoup('''
|
|
||||||
<html><head><title>t</title></head><body>
|
|
||||||
<p>To donate to this blog: <a href="http://www.drivelry.com/thank-you/">click here</a></p>
|
|
||||||
</body></html>
|
|
||||||
''')
|
|
||||||
body = soup.find(name='body')
|
|
||||||
body.insert(0, story)
|
|
||||||
return soup
|
|
@ -1,43 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BasicUserRecipe1390492898(BasicNewsRecipe):
|
|
||||||
title = u'D\xfcnya Bizim'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
__author__ = 'asalet_r'
|
|
||||||
language = 'tr'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Ayr\u0131nt\u0131 Defteri', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=58'),
|
|
||||||
(u'Baba Kitaplar', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=4'),
|
|
||||||
(u'\xc7-al\u0131nt\u0131', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=33'),
|
|
||||||
(u'Dar\xfclmedya', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=49'),
|
|
||||||
(u'Denemedi Deme', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=72'),
|
|
||||||
(u'DevriAlem', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=67'),
|
|
||||||
(u'Duyduk duymad\u0131k demeyin', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=48'),
|
|
||||||
(u'G\xfczel Mekanlar', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=43'),
|
|
||||||
(u'\u0130stanbul Bizim', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=62'),
|
|
||||||
(u'\u0130yi Haberler', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=18'),
|
|
||||||
(u'\u0130yi M\xfczikler', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=2'),
|
|
||||||
(u'Kalite Dergiler', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=3'),
|
|
||||||
(u'K\u0131sa K\u0131sa', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=55'),
|
|
||||||
(u'Konu\u015fa Konu\u015fa', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=24'),
|
|
||||||
|
|
||||||
(u'Medyada D\xfcnyaBizim', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=64'),
|
|
||||||
(u'Mizah', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=29'),
|
|
||||||
(u'M\xfcstesna G\xfczeller', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=65'),
|
|
||||||
(u'Nerede Ne Var?', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=66'),
|
|
||||||
(u'Not Defteri', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=71'),
|
|
||||||
(u'O \u015eimdi Nerede?', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=52'),
|
|
||||||
(u'Olsa Ke\u015fke', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=34'),
|
|
||||||
(u'Orada Ne Oldu?', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=38'),
|
|
||||||
(u'\xd6nemli Adamlar', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=1'),
|
|
||||||
(u'Sinema', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=23'),
|
|
||||||
(u'Tart\u0131\u015fa tart\u0131\u015fa', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=39'),
|
|
||||||
(u'Yay\u0131n Y\xf6netmeninden', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=69'),
|
|
||||||
(u'Yeni \u015eeyler', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=57'),
|
|
||||||
(u'Zekeriya Sofras\u0131', u'http://www.dunyabizim.com/servisler/rss.php?kategoriID=60')]
|
|
@ -1,34 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BasicUserRecipe1390492898(BasicNewsRecipe):
|
|
||||||
title = u'D\xfcnya B\xfclteni'
|
|
||||||
__author__ = 'asalet_r'
|
|
||||||
language = 'tr'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Dosya', u'http://www.dunyabulteni.net/servisler/rss/haberler/181'),
|
|
||||||
(u'Makale-Yorum', u'http://www.dunyabulteni.net/servisler/rss/haberler/174'),
|
|
||||||
(u'T\xfcrkiye', u'http://www.dunyabulteni.net/servisler/rss/haberler/44'),
|
|
||||||
(u'\u015eehrin Nabz\u0131', u'http://www.dunyabulteni.net/servisler/rss/haberler/195'),
|
|
||||||
(u'D\xfcnya', u'http://www.dunyabulteni.net/servisler/rss/haberler/31'),
|
|
||||||
(u'Tarih Dosyas\u0131', u'http://www.dunyabulteni.net/servisler/rss/haberler/157'),
|
|
||||||
(u'Dubam', u'http://www.dunyabulteni.net/servisler/rss/haberler/163'),
|
|
||||||
(u'K\xfclt\xfcr Sanat', u'http://www.dunyabulteni.net/servisler/rss/haberler/66'),
|
|
||||||
(u'Haber Analiz', u'http://www.dunyabulteni.net/servisler/rss/haberler/123'),
|
|
||||||
(u'Ekonomi', u'http://www.dunyabulteni.net/servisler/rss/haberler/40'),
|
|
||||||
|
|
||||||
(u'R\xf6portaj', u'http://www.dunyabulteni.net/servisler/rss/haberler/153'),
|
|
||||||
(u'Bilim Teknoloji', u'http://www.dunyabulteni.net/servisler/rss/haberler/128'),
|
|
||||||
(u'Aile-Sa\u011fl\u0131k', u'http://www.dunyabulteni.net/servisler/rss/haberler/75'),
|
|
||||||
(u'E\u011fitim', u'http://www.dunyabulteni.net/servisler/rss/haberler/80'),
|
|
||||||
(u'Gezi-\u0130zlenim', u'http://www.dunyabulteni.net/servisler/rss/haberler/90'),
|
|
||||||
(u'Hayat\u0131n \u0130\xe7inden', u'http://www.dunyabulteni.net/servisler/rss/haberler/200'),
|
|
||||||
(u'Yazarlar\u0131m\u0131z', u'http://www.dunyabulteni.net/servisler/rss/yazarlar/5'),
|
|
||||||
(u'Konuk Yazarlar', u'http://www.dunyabulteni.net/servisler/rss/yazarlar/6'),
|
|
||||||
(u'Al\u0131nt\u0131 Yaz\u0131lar', u'http://www.dunyabulteni.net/servisler/rss/yazarlar/7')]
|
|
@ -1,205 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""www.dunyahalleri.com"""
|
|
||||||
import locale
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from shutil import copyfile
|
|
||||||
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
from calibre.utils.resources import get_path
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2017, sukru alatas / alatas.org'
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class DunyaHalleri(BasicNewsRecipe):
|
|
||||||
title = 'Dünya Halleri'
|
|
||||||
description = 'Gözden Kaçanlar Rehberi'
|
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
|
||||||
publication_type = 'blog'
|
|
||||||
language = 'tr'
|
|
||||||
locale = 'tr_TR' # for localized month names
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
|
|
||||||
needs_subscription = False
|
|
||||||
scale_news_images = True
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='span', attrs={'itemprop': 'reviewBody'})
|
|
||||||
remove_tags_after = dict(
|
|
||||||
name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'})
|
|
||||||
remove_tags = [dict(name=['script', 'noscript', 'style', 'footer']),
|
|
||||||
dict(attrs={'class': ['jsharedaddy sd-sharing-enabled',
|
|
||||||
'cb-sticky-sidebar', 'sharedaddy sd-sharing-enabled']}),
|
|
||||||
dict(id=['jp-relatedposts', 'tldr-post-summary', 'tldr-post-summary-buttons'])]
|
|
||||||
encoding = 'utf_8'
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
extra_css = '.caption {color: #998; font-style: italic; font-size: 8pt}'
|
|
||||||
__author__ = 'Sukru Alatas'
|
|
||||||
feeds = [(u"Genel Gündem",
|
|
||||||
'https://www.dunyahalleri.com/genel-gundem/feed/'),
|
|
||||||
(u"Teknoloji / Bilim",
|
|
||||||
'https://www.dunyahalleri.com/teknoloji-bilim/feed/'),
|
|
||||||
(u"İnternet / Girişimler",
|
|
||||||
'https://www.dunyahalleri.com/internet-girisimler/feed/'),
|
|
||||||
(u"Tasarım / İnovasyon",
|
|
||||||
'https://www.dunyahalleri.com/tasarim-inovasyon/feed/'),
|
|
||||||
(u"Kültür / Sanat", 'https://www.dunyahalleri.com/kultur-sanat/feed/')]
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
|
|
||||||
COVER_WIDTH, COVER_HEIGHT = 590, 750
|
|
||||||
masthead_url = 'https://www.dunyahalleri.com/wp-content/uploads/2016/07/dh-logo-transparan.png'
|
|
||||||
cover_url = ''
|
|
||||||
cover_img_url = 'https://i0.wp.com/www.dunyahalleri.com/wp-content/uploads/2016/04/dh-favico-v2.png'
|
|
||||||
cover_img_path = ''
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
|
||||||
# for localized month names
|
|
||||||
locale.setlocale(locale.LC_TIME, self.locale)
|
|
||||||
|
|
||||||
if self.output_profile.short_name.startswith('kindle'):
|
|
||||||
# Reduce image sizes to get file size below amazon's email
|
|
||||||
# sending threshold
|
|
||||||
self.web2disk_options.compress_news_images = True
|
|
||||||
self.web2disk_options.compress_news_images_auto_size = 5
|
|
||||||
self.log.warn(
|
|
||||||
'Kindle Output profile being used, reducing image quality '
|
|
||||||
'to keep file size below amazon email threshold')
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
span = soup.findAll('span', {'itemprop': 'reviewBody'}, limit=1)[0]
|
|
||||||
|
|
||||||
# title insert
|
|
||||||
article_title = soup.title.contents[0]
|
|
||||||
article_title.replace(u' - Dünya Halleri', '')
|
|
||||||
h2 = new_tag(soup, 'h2')
|
|
||||||
h2.append(article_title)
|
|
||||||
span.insert(0, h2)
|
|
||||||
|
|
||||||
# featured image insert
|
|
||||||
meta = soup.findAll('meta', {'property': 'og:image'}, limit=1)[0]
|
|
||||||
if meta:
|
|
||||||
img = new_tag(soup, 'img')
|
|
||||||
img.attrs = [('src', meta['content'])]
|
|
||||||
span.insert(1, img)
|
|
||||||
|
|
||||||
# gallery normalization
|
|
||||||
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
|
|
||||||
p = new_tag(soup, 'p')
|
|
||||||
for img in div.findAll('img'):
|
|
||||||
img.attrs = [(key, value)
|
|
||||||
for key, value in img.attrs if key in ['src']]
|
|
||||||
p.append(img)
|
|
||||||
div.replaceWith(p)
|
|
||||||
|
|
||||||
# youtube embedded normalization
|
|
||||||
# this block finds the cover image for each embedded youtube video then
|
|
||||||
# changes it to "a href" and "img"
|
|
||||||
for iframe in soup.findAll('iframe'):
|
|
||||||
a = new_tag(soup, 'a')
|
|
||||||
caption = new_tag(soup, 'pre')
|
|
||||||
img = new_tag(soup, 'img')
|
|
||||||
|
|
||||||
m = re.match(
|
|
||||||
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
|
|
||||||
r'(?P<vid>.*?)(([\?\&].*)|$|\n)',
|
|
||||||
iframe['src'])
|
|
||||||
if m:
|
|
||||||
# youtube
|
|
||||||
img_src = 'https://img.youtube.com/vi/' + \
|
|
||||||
m.group('vid') + '/0.jpg'
|
|
||||||
a_href = 'https://www.youtube.com/watch?v=' + m.group('vid')
|
|
||||||
else:
|
|
||||||
# not youtube
|
|
||||||
# default cover image for non-youtube embedded pages
|
|
||||||
img_src = 'http://www.warnerclassics.com/img_style/default_video_m.jpg'
|
|
||||||
a_href = iframe['src']
|
|
||||||
|
|
||||||
img.attrs = [('src', img_src)]
|
|
||||||
caption.append('Video: ' + a_href)
|
|
||||||
caption.attrs = [('class', 'caption')]
|
|
||||||
a.attrs = [('href', a_href), ('target', '_blank')]
|
|
||||||
a.append(img)
|
|
||||||
a.append(caption)
|
|
||||||
iframe.replaceWith(a)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
# cover generator
|
|
||||||
# original version
|
|
||||||
# https://www.mobileread.com/forums/showpost.php?p=866553&postcount=5
|
|
||||||
def get_cover_img_url(self):
|
|
||||||
return getattr(self, 'cover_img_url', None)
|
|
||||||
|
|
||||||
def _download_cover_img(self):
|
|
||||||
old_cu = None
|
|
||||||
try:
|
|
||||||
old_cu = self.get_cover_url()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
new_cu = self.get_cover_img_url()
|
|
||||||
self.cover_url = new_cu
|
|
||||||
self._download_cover()
|
|
||||||
|
|
||||||
outfile = os.path.join(self.output_dir, 'cover_img.jpg')
|
|
||||||
copyfile(self.cover_path, outfile)
|
|
||||||
self.cover_url = old_cu
|
|
||||||
self.cover_img_path = outfile
|
|
||||||
|
|
||||||
def download_cover_img(self):
|
|
||||||
try:
|
|
||||||
self._download_cover_img()
|
|
||||||
self.report_progress(
|
|
||||||
1, ('Downloaded cover to %s') % self.cover_img_path)
|
|
||||||
except:
|
|
||||||
self.log.exception('Failed to download cover img')
|
|
||||||
self.cover_img_path = None
|
|
||||||
|
|
||||||
def draw_text(self, draw, text, text_size, top):
|
|
||||||
font_path = get_path('fonts/liberation/LiberationSerif-Bold.ttf')
|
|
||||||
font = ImageFont.truetype(font_path, text_size)
|
|
||||||
width, height = draw.textsize(text, font=font)
|
|
||||||
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
|
|
||||||
draw.text((left, top), text, fill=(0, 0, 0), font=font)
|
|
||||||
return height
|
|
||||||
|
|
||||||
def default_cover(self, cover_file):
|
|
||||||
title = self.title
|
|
||||||
date = strftime('%d %B %Y')
|
|
||||||
author = u'www.dunyahalleri.com'
|
|
||||||
# Texts
|
|
||||||
img = Image.new(
|
|
||||||
'RGB', (self.COVER_WIDTH, self.COVER_HEIGHT), 'white')
|
|
||||||
draw = ImageDraw.Draw(img)
|
|
||||||
bottom = 15
|
|
||||||
bottom += self.draw_text(draw, title, 42, bottom)
|
|
||||||
bottom += 50
|
|
||||||
bottom += self.draw_text(draw, date, 32, bottom)
|
|
||||||
bottom += self.draw_text(draw, author, 32, self.COVER_HEIGHT - 45)
|
|
||||||
# Logo
|
|
||||||
self.download_cover_img()
|
|
||||||
if getattr(self, 'cover_img_path', None) is not None:
|
|
||||||
logo_file = self.cover_img_path
|
|
||||||
self.report_progress(
|
|
||||||
1, ('using cover img from %s') % logo_file)
|
|
||||||
logo = Image.open(logo_file, 'r')
|
|
||||||
width, height = logo.size
|
|
||||||
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
|
|
||||||
top = max(int((self.COVER_HEIGHT - height) / 2.), 0)
|
|
||||||
img.paste(logo, (left, top))
|
|
||||||
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
|
|
||||||
img.convert('RGB').save(cover_file, 'JPEG')
|
|
||||||
cover_file.flush()
|
|
||||||
return True
|
|
@ -1,273 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""www.dunyahalleri.com/haftanin-ozeti"""
|
|
||||||
import locale
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from contextlib import closing
|
|
||||||
from shutil import copyfile
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
|
|
||||||
from calibre.utils.resources import get_path
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2017, sukru alatas / alatas.org'
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
|
|
||||||
title = 'Dünya Halleri - Haftanın Özeti'
|
|
||||||
description = ('Geçen hafta boyunca Türkiye ve dünyadan haber,'
|
|
||||||
' site, yazılım, donanım, cihaz, video ve trendler...')
|
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
|
||||||
publication_type = 'blog'
|
|
||||||
language = 'tr'
|
|
||||||
locale = 'tr_TR' # for localized month names
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
|
|
||||||
needs_subscription = False
|
|
||||||
scale_news_images = True
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='section', attrs={'itemprop': 'articleBody'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'cb-alert cb-blue'})
|
|
||||||
remove_tags = [dict(name=['ol', 'h4', 'script', 'noscript', 'style', 'footer']),
|
|
||||||
dict(name='h1', attrs={
|
|
||||||
'class': 'entry-title cb-entry-title entry-title cb-title'}),
|
|
||||||
dict(attrs={'class': ['cb-alert cb-blue', 'woo-sc-box info ',
|
|
||||||
'sharedaddy sd-sharing-enabled', 'jp-relatedposts']}),
|
|
||||||
dict(id=['post-pagination', 'plp_inital_pagination'])]
|
|
||||||
encoding = 'utf_8'
|
|
||||||
no_stylesheets = True
|
|
||||||
INDEX = 'https://www.dunyahalleri.com/haftanin-ozeti/feed/'
|
|
||||||
extra_css = '.caption {color: #998; font-style: italic; font-size: 8pt}'
|
|
||||||
__author__ = 'Sukru Alatas'
|
|
||||||
|
|
||||||
COVER_WIDTH, COVER_HEIGHT = 590, 750
|
|
||||||
issue_title = ''
|
|
||||||
issue_date = ''
|
|
||||||
masthead_url = ''
|
|
||||||
cover_url = ''
|
|
||||||
cover_img_url = ''
|
|
||||||
cover_img_path = ''
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
|
||||||
# for localized month names
|
|
||||||
locale.setlocale(locale.LC_TIME, self.locale)
|
|
||||||
|
|
||||||
if self.output_profile.short_name.startswith('kindle'):
|
|
||||||
# Reduce image sizes to get file size below amazon's email
|
|
||||||
# sending threshold
|
|
||||||
self.web2disk_options.compress_news_images = True
|
|
||||||
self.web2disk_options.compress_news_images_auto_size = 5
|
|
||||||
self.log.warn(
|
|
||||||
'Kindle Output profile being used, reducing image quality '
|
|
||||||
'to keep file size below amazon email threshold')
|
|
||||||
|
|
||||||
# BeautifulSoup xml parser extension
|
|
||||||
# If you use index_to_soup with xml or rss, it outputs lots of garbage node,
|
|
||||||
# and change the tree for its own.
|
|
||||||
# This function very very similar copy of index_to_soup but it uses
|
|
||||||
# BeautifulStoneSoup instead of BeautifulSoup
|
|
||||||
def xml_to_soup(self, url_or_raw, raw=False):
|
|
||||||
if re.match(r'\w+://', url_or_raw):
|
|
||||||
br = self.clone_browser(self.browser)
|
|
||||||
open_func = getattr(br, 'open_novisit', br.open)
|
|
||||||
with closing(open_func(url_or_raw)) as f:
|
|
||||||
_raw = f.read()
|
|
||||||
if not _raw:
|
|
||||||
raise RuntimeError(
|
|
||||||
'Could not fetch index from %s' % url_or_raw)
|
|
||||||
else:
|
|
||||||
_raw = url_or_raw
|
|
||||||
|
|
||||||
if raw:
|
|
||||||
return _raw
|
|
||||||
|
|
||||||
if not isinstance(_raw, type(u'')) and self.encoding:
|
|
||||||
if callable(self.encoding):
|
|
||||||
_raw = self.encoding(_raw)
|
|
||||||
else:
|
|
||||||
_raw = _raw.decode(self.encoding, 'replace')
|
|
||||||
|
|
||||||
from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode
|
|
||||||
from calibre.utils.cleantext import clean_xml_chars
|
|
||||||
|
|
||||||
if isinstance(_raw, type(u'')):
|
|
||||||
_raw = strip_encoding_declarations(_raw)
|
|
||||||
else:
|
|
||||||
_raw = xml_to_unicode(
|
|
||||||
_raw, strip_encoding_pats=True, resolve_entities=True)[0]
|
|
||||||
|
|
||||||
_raw = clean_xml_chars(_raw)
|
|
||||||
return BeautifulStoneSoup(_raw) # <== the difference
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
from dateutil.parser import parse
|
|
||||||
|
|
||||||
# RSS parsing
|
|
||||||
index = self.xml_to_soup(self.INDEX)
|
|
||||||
|
|
||||||
channel = index.rss.channel
|
|
||||||
|
|
||||||
self.description = channel.description.contents[0]
|
|
||||||
self.masthead_url = channel.url.contents[0]
|
|
||||||
|
|
||||||
item = channel.item
|
|
||||||
self.issue_title = item.title.contents[0]
|
|
||||||
self.issue_date = parse(item.pubdate.contents[0])
|
|
||||||
|
|
||||||
base_url = item.link.contents[0]
|
|
||||||
cover_img_desc = BeautifulSoup(item.description.contents[0])
|
|
||||||
# this is necessary for cover generator
|
|
||||||
self.cover_img_url = cover_img_desc.img['src']
|
|
||||||
|
|
||||||
soup = self.index_to_soup(base_url)
|
|
||||||
articles = {}
|
|
||||||
key = None
|
|
||||||
ans = []
|
|
||||||
|
|
||||||
for li in soup.findNext('ol').findAll('li'):
|
|
||||||
a = li.find('a', href=True)
|
|
||||||
|
|
||||||
if not a:
|
|
||||||
url = base_url
|
|
||||||
feed = self.tag_to_string(li, use_alt=True).strip()
|
|
||||||
pubdate = self.issue_date.strftime('%a, %d %b')
|
|
||||||
else:
|
|
||||||
url = base_url + re.sub(r'\.\/', '', a['href'])
|
|
||||||
feed = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
pubdate = self.issue_date.strftime('%a, %d %b')
|
|
||||||
|
|
||||||
title = self.issue_title + \
|
|
||||||
' (' + self.issue_date.strftime('%d %B %Y') + ')'
|
|
||||||
|
|
||||||
if feed not in articles:
|
|
||||||
articles[feed] = []
|
|
||||||
ans.append(feed)
|
|
||||||
|
|
||||||
articles[feed].append(
|
|
||||||
dict(title=title, url=url, date=pubdate, description='', content=''))
|
|
||||||
|
|
||||||
ans = [(key, articles[k]) for k in ans if k in articles]
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
# gallery normalization
|
|
||||||
for div in soup.findAll('div', {'itemtype': 'http://schema.org/ImageGallery'}):
|
|
||||||
p = new_tag(soup, 'p')
|
|
||||||
for img in div.findAll('img'):
|
|
||||||
img.attrs = [(key, value)
|
|
||||||
for key, value in img.attrs if key in ['src']]
|
|
||||||
p.append(img)
|
|
||||||
div.replaceWith(p)
|
|
||||||
|
|
||||||
# youtube embedded normalization
|
|
||||||
# this block finds the cover image for each embedded youtube video then
|
|
||||||
# changes it to "a href" and "img"
|
|
||||||
for iframe in soup.findAll('iframe'):
|
|
||||||
a = new_tag(soup, 'a')
|
|
||||||
caption = new_tag(soup, 'pre')
|
|
||||||
img = new_tag(soup, 'img')
|
|
||||||
|
|
||||||
m = re.match(
|
|
||||||
r'https\:\/\/(www\.)?youtube.com\/(embed\/|watch\?v\=)'
|
|
||||||
r'(?P<vid>.*?)(([\?\&].*)|$|\n)',
|
|
||||||
iframe['src'])
|
|
||||||
if m:
|
|
||||||
# youtube
|
|
||||||
img_src = 'https://img.youtube.com/vi/' + \
|
|
||||||
m.group('vid') + '/0.jpg'
|
|
||||||
a_href = 'https://www.youtube.com/watch?v=' + m.group('vid')
|
|
||||||
else:
|
|
||||||
# not youtube
|
|
||||||
# default cover image for non-youtube embedded pages
|
|
||||||
img_src = 'http://www.warnerclassics.com/img_style/default_video_m.jpg'
|
|
||||||
a_href = iframe['src']
|
|
||||||
|
|
||||||
img.attrs = [('src', img_src)]
|
|
||||||
caption.append('Video: ' + a_href)
|
|
||||||
caption.attrs = [('class', 'caption')]
|
|
||||||
a.attrs = [('href', a_href), ('target', '_blank')]
|
|
||||||
a.append(img)
|
|
||||||
a.append(caption)
|
|
||||||
iframe.replaceWith(a)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
# cover generator
|
|
||||||
# original version https://www.mobileread.com/forums/showpost.php?p=866553&postcount=5
|
|
||||||
def get_cover_img_url(self):
|
|
||||||
return getattr(self, 'cover_img_url', None)
|
|
||||||
|
|
||||||
def _download_cover_img(self):
|
|
||||||
old_cu = None
|
|
||||||
try:
|
|
||||||
old_cu = self.get_cover_url()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
new_cu = self.get_cover_img_url()
|
|
||||||
self.cover_url = new_cu
|
|
||||||
self._download_cover()
|
|
||||||
|
|
||||||
outfile = os.path.join(self.output_dir, 'cover_img.jpg')
|
|
||||||
copyfile(self.cover_path, outfile)
|
|
||||||
self.cover_url = old_cu
|
|
||||||
self.cover_img_path = outfile
|
|
||||||
|
|
||||||
def download_cover_img(self):
|
|
||||||
try:
|
|
||||||
self._download_cover_img()
|
|
||||||
self.report_progress(
|
|
||||||
1, ('Downloaded cover to %s') % self.cover_img_path)
|
|
||||||
except:
|
|
||||||
self.log.exception('Failed to download cover img')
|
|
||||||
self.cover_img_path = None
|
|
||||||
|
|
||||||
def draw_text(self, draw, text, text_size, top):
|
|
||||||
font_path = get_path('fonts/liberation/LiberationSerif-Bold.ttf')
|
|
||||||
font = ImageFont.truetype(font_path, text_size)
|
|
||||||
width, height = draw.textsize(text, font=font)
|
|
||||||
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
|
|
||||||
draw.text((left, top), text, fill=(0, 0, 0), font=font)
|
|
||||||
return height
|
|
||||||
|
|
||||||
def default_cover(self, cover_file):
|
|
||||||
title = self.issue_title
|
|
||||||
date = self.issue_date.strftime(
|
|
||||||
'%d %B %Y').decode('utf8', 'replace')
|
|
||||||
author = u'www.dunyahalleri.com/haftanin-ozeti'
|
|
||||||
# Texts
|
|
||||||
img = Image.new(
|
|
||||||
'RGB', (self.COVER_WIDTH, self.COVER_HEIGHT), 'white')
|
|
||||||
draw = ImageDraw.Draw(img)
|
|
||||||
bottom = 15
|
|
||||||
bottom += self.draw_text(draw, title, 42, bottom)
|
|
||||||
bottom += 50
|
|
||||||
bottom += self.draw_text(draw, date, 32, bottom)
|
|
||||||
bottom += self.draw_text(draw, author, 32, self.COVER_HEIGHT - 45)
|
|
||||||
# Logo
|
|
||||||
self.download_cover_img()
|
|
||||||
if getattr(self, 'cover_img_path', None) is not None:
|
|
||||||
logo_file = self.cover_img_path
|
|
||||||
self.report_progress(
|
|
||||||
1, ('using cover img from %s') % logo_file)
|
|
||||||
logo = Image.open(logo_file, 'r')
|
|
||||||
width, height = logo.size
|
|
||||||
logo = logo.resize(
|
|
||||||
(self.COVER_WIDTH, (self.COVER_WIDTH * height / width)), Image.Resampling.LANCZOS)
|
|
||||||
width, height = logo.size
|
|
||||||
left = max(int((self.COVER_WIDTH - width) / 2.), 0)
|
|
||||||
top = max(int((self.COVER_HEIGHT - height) / 2.), 0)
|
|
||||||
img.paste(logo, (left, top))
|
|
||||||
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
|
|
||||||
img.convert('RGB').save(cover_file, 'JPEG')
|
|
||||||
cover_file.flush()
|
|
||||||
return True
|
|
@ -1,47 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1302341394(BasicNewsRecipe):
|
|
||||||
title = u'DvhN'
|
|
||||||
__author__ = 'Reijndert'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg'
|
|
||||||
language = 'nl'
|
|
||||||
country = 'NL'
|
|
||||||
version = 1
|
|
||||||
publisher = u'Dagblad van het Noorden'
|
|
||||||
category = u'Nieuws'
|
|
||||||
description = u'Nieuws uit Noord Nederland'
|
|
||||||
timefmt = ' %Y-%m-%d (%a)'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'fullPicture'}), dict(name='div', attrs={'id': 'articleText'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='span', attrs={'class': 'location'})
|
|
||||||
]
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'<a.*?>'), lambda h1: ''), (re.compile(r'</a>'), lambda h2: ''), (re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'),
|
|
||||||
lambda h3: ''), (re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '') # noqa
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'),
|
|
||||||
(u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'),
|
|
||||||
(u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'),
|
|
||||||
(u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'),
|
|
||||||
(u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'),
|
|
||||||
(u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'),
|
|
||||||
(u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'),
|
|
||||||
(u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
|
||||||
'''
|
|
@ -1,32 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Adresseavisen Ebeltoft
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class EbeltoftLokalavisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Adresseavisen Ebeltoft'
|
|
||||||
description = 'Lokale og regionale nyheder, sport, kultur fra Ebeltoft og omegn på ebeltoft.lokalavisen.dk'
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Seneste nyt fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestenytrss'),
|
|
||||||
('Seneste lokale nyheder fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestelokalenyhederrss'),
|
|
||||||
('Seneste sport fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestesportrss'),
|
|
||||||
('Seneste 112 nyheder fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/seneste112rss'),
|
|
||||||
('Seneste kultur nyheder fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestekulturrss'),
|
|
||||||
('Seneste læserbreve fra Adresseavisen Ebeltoft', 'http://ebeltoft.lokalavisen.dk/section/senestelaeserbreverss'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,87 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import Comment
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EchoDnia(BasicNewsRecipe):
|
|
||||||
title = u'Echo Dnia'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'Echo Dnia - portal regionalny świętokrzyskiego radomskiego i podkarpackiego. Najnowsze wiadomości z Twojego regionu, galerie, video, mp3.'
|
|
||||||
category = 'newspaper'
|
|
||||||
language = 'pl'
|
|
||||||
encoding = 'iso-8859-2'
|
|
||||||
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
|
|
||||||
INDEX = 'http://www.echodnia.eu'
|
|
||||||
masthead_url = INDEX + '/images/top_logo.png'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_empty_feeds = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa
|
|
||||||
(re.compile(u'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
|
||||||
|
|
||||||
keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
|
|
||||||
remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
|
|
||||||
'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
|
|
||||||
'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline', 'articleZoomText']),
|
|
||||||
dict(attrs={'class': 'articleFunctions'})]
|
|
||||||
|
|
||||||
feeds = [(u'Wszystkie', u'http://www.echodnia.eu/rss.xml'),
|
|
||||||
(u'Świętokrzyskie', u'http://www.echodnia.eu/swietokrzyskie.xml'),
|
|
||||||
(u'Radomskie', u'http://www.echodnia.eu/radomskie.xml'),
|
|
||||||
(u'Podkarpackie', u'http://www.echodnia.eu/podkarpackie.xml'),
|
|
||||||
(u'Sport \u015bwi\u0119tokrzyski',
|
|
||||||
u'http://www.echodnia.eu/sport_swi.xml'),
|
|
||||||
(u'Sport radomski', u'http://www.echodnia.eu/sport_rad.xml'),
|
|
||||||
(u'Sport podkarpacki', u'http://www.echodnia.eu/sport_pod.xml'),
|
|
||||||
(u'Pi\u0142ka no\u017cna', u'http://www.echodnia.eu/pilka.xml'),
|
|
||||||
(u'Praca', u'http://www.echodnia.eu/praca.xml'),
|
|
||||||
(u'Dom', u'http://www.echodnia.eu/dom.xml'),
|
|
||||||
(u'Auto', u'http://www.echodnia.eu/auto.xml'),
|
|
||||||
(u'Zdrowie', u'http://www.echodnia.eu/zdrowie.xml')]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup(
|
|
||||||
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
|
|
||||||
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
|
|
||||||
soup = self.index_to_soup(nexturl)
|
|
||||||
self.cover_url = self.INDEX + \
|
|
||||||
soup.find(id='cover').find(name='img')['src']
|
|
||||||
return getattr(self, 'cover_url', self.cover_url)
|
|
||||||
|
|
||||||
def append_page(self, soup, appendtag):
|
|
||||||
tag = soup.find('span', attrs={'class': 'photoNavigationPages'})
|
|
||||||
if tag:
|
|
||||||
number = int(tag.string.rpartition('/')[-1].replace(' ', ''))
|
|
||||||
baseurl = self.INDEX + \
|
|
||||||
soup.find(attrs={'class': 'photoNavigationNext'})['href'][:-1]
|
|
||||||
|
|
||||||
for r in appendtag.findAll(attrs={'class': 'photoNavigation'}):
|
|
||||||
r.extract()
|
|
||||||
for nr in range(2, number + 1):
|
|
||||||
soup2 = self.index_to_soup(baseurl + str(nr))
|
|
||||||
pagetext = soup2.find(id='photoContainer')
|
|
||||||
if pagetext:
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
pagetext = soup2.find(attrs={'class': 'photoMeta'})
|
|
||||||
if pagetext:
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
pagetext = soup2.find(attrs={'class': 'photoStoryText'})
|
|
||||||
if pagetext:
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
|
|
||||||
comments = appendtag.findAll(
|
|
||||||
text=lambda text: isinstance(text, Comment))
|
|
||||||
for comment in comments:
|
|
||||||
comment.extract()
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
return soup
|
|
@ -1,43 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EclipseOnline(BasicNewsRecipe):
|
|
||||||
|
|
||||||
#
|
|
||||||
# oldest_article specifies the maximum age, in days, of posts to retrieve.
|
|
||||||
# The default of 32 is intended to work well with a "days of month = 1"
|
|
||||||
# recipe schedule to download "monthly issues" of Eclipse Online.
|
|
||||||
# Increase this value to include additional posts. However, the RSS feed
|
|
||||||
# currently only includes the 10 most recent posts, so that's the max.
|
|
||||||
#
|
|
||||||
oldest_article = 32
|
|
||||||
|
|
||||||
title = u'Eclipse Online'
|
|
||||||
description = u'"Where strange and wonderful things happen, where reality is eclipsed for a little while with something magical and new." Eclipse Online is edited by Jonathan Strahan and published online by Night Shade Books. http://www.nightshadebooks.com/category/eclipse/' # noqa
|
|
||||||
publication_type = 'magazine'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
__author__ = u'Jim DeVona'
|
|
||||||
__version__ = '1.0'
|
|
||||||
|
|
||||||
# For now, use this Eclipse Online logo as the ebook cover image.
|
|
||||||
# (Disable the cover_url line to let Calibre generate a default cover, including date.)
|
|
||||||
cover_url = 'http://www.nightshadebooks.com/wp-content/uploads/2012/10/Eclipse-Logo.jpg'
|
|
||||||
|
|
||||||
# Extract the "post" div containing the story (minus redundant metadata)
|
|
||||||
# from each page.
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': lambda x: x and 'post' in x})]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='span', attrs={'class': ['post-author', 'post-category', 'small']})]
|
|
||||||
|
|
||||||
# Nice plain markup (like Eclipse's) works best for most e-readers.
|
|
||||||
# Disregard any special styling rules, but center illustrations.
|
|
||||||
auto_cleanup = False
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_attributes = ['style', 'align']
|
|
||||||
extra_css = '.wp-caption {text-align: center;} .wp-caption-text {font-size: small; font-style: italic;}'
|
|
||||||
|
|
||||||
# Tell Calibre where to look for article links. It will proceed to retrieve
|
|
||||||
# these posts and format them into an ebook according to the above rules.
|
|
||||||
feeds = ['http://www.nightshadebooks.com/category/eclipse/feed/']
|
|
@ -1,35 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
EcoGeek.org
|
|
||||||
'''
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EcoGeek(BasicNewsRecipe):
|
|
||||||
title = 'EcoGeek'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'EcoGeek - Technology for the Environment Blog Feed'
|
|
||||||
publisher = 'EcoGeek'
|
|
||||||
language = 'en'
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
tdir = PersistentTemporaryDirectory('_ecogeek')
|
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup('http://feeds2.feedburner.com/EcoGeek')
|
|
||||||
for i, article in enumerate(soup.findAll('div', attrs={'class': 'article'})):
|
|
||||||
fname = os.path.join(tdir, '%d.html' % i)
|
|
||||||
with open(fname, 'wb') as f:
|
|
||||||
f.write(type(u'')(article).encode('utf-8'))
|
|
||||||
articles.append({
|
|
||||||
'title': self.tag_to_string(article.find('h2')),
|
|
||||||
'url': 'file://' + fname.replace(os.sep, '/'),
|
|
||||||
})
|
|
||||||
return [('EcoGeek', articles)]
|
|
@ -1,41 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
globaleconomicanalysis.blogspot.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class GlobalEconomicAnalysis(BasicNewsRecipe):
|
|
||||||
title = "Mish's Global Economic Trend Analysis"
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Thoughts on the global economy, housing, gold, silver, interest rates, oil, energy, China, commodities, the dollar, Euro, Renminbi, Yen, inflation, deflation, stagflation, precious metals, emerging markets, and policy decisions that affect the global markets.' # noqa
|
|
||||||
publisher = 'Mike Shedlock'
|
|
||||||
category = 'news, politics, economy, banking'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = True
|
|
||||||
language = 'en'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'blog'
|
|
||||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
|
||||||
img{margin-bottom: 0.4em; display:block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict(
|
|
||||||
attrs={'class': 'blogger-post-footer'})
|
|
||||||
]
|
|
||||||
remove_attributes = ['border']
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Articles', u'http://feeds2.feedburner.com/MishsGlobalEconomicTrendAnalysis')]
|
|
@ -1,45 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
ecuisine.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EcuisineRo(BasicNewsRecipe):
|
|
||||||
title = u'eCuisine'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = u'Reinventeaz\u0103 pl\u0103cerea de a g\u0103ti'
|
|
||||||
publisher = 'eCuisine'
|
|
||||||
oldest_article = 50
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Retete,Bucatarie'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.ecuisine.ro/sites/all/themes/ecuisine/images/logo.gif'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h1', attrs={'id': 'page-title'}
|
|
||||||
), dict(name='div', attrs={'class': 'field-item even'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='ul', attrs={'id': ['recipe-tabs']}), dict(name='div', attrs={'class': ['recipe-body-rating clearfix']}), dict(name='div', attrs={'class': ['recipe-body-flags']}), dict(name='div', attrs={'id': ['tweetmeme_button']}), dict(name='div', attrs={ 'class': ['fbshare']}), dict(name='a', attrs={'class': ['button-rounded']}), dict(name='div', attrs={'class': ['recipe-body-related']}), dict(name='div', attrs={'class': ['fbshare']}), dict(name='div', attrs={'class': ['link-wrapper']}) # noqa
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://www.ecuisine.ro/rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,14 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BasicUserRecipe1390492898(BasicNewsRecipe):
|
|
||||||
title = u'Edebistan'
|
|
||||||
__author__ = 'asalet_r'
|
|
||||||
language = 'tr'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 30
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [(u'Edebistan', u'http://www.edebistan.com/index.php/feed/')]
|
|
@ -1,18 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import AutomaticNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BasicUserRecipe1420467110(AutomaticNewsRecipe):
|
|
||||||
title = 'Edebiyat Haber'
|
|
||||||
language = 'tr'
|
|
||||||
__author__ = 'asalet_r'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Edebiyat Haber', 'http://feeds.feedburner.com/feedburner/edebiyathaber'),
|
|
||||||
]
|
|
@ -1,23 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012 Levien van Zon <levien@zonnetjes.net>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Fetch Edge.org conversations
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EdgeConversationRSS(BasicNewsRecipe):
|
|
||||||
title = u'Edge.org Conversations'
|
|
||||||
__author__ = 'levien'
|
|
||||||
language = 'en'
|
|
||||||
description = '''Edge.org offers "open-minded, free ranging, intellectually
|
|
||||||
playful ... an unadorned pleasure in curiosity, a collective expression of
|
|
||||||
wonder at the living and inanimate world ... an ongoing and thrilling
|
|
||||||
colloquium.'''
|
|
||||||
oldest_article = 60
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [(u'Edge RSS', u'http://edge.org/feed')]
|
|
@ -1,47 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010 elsuave'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EandP(BasicNewsRecipe):
|
|
||||||
title = u'Editor and Publisher'
|
|
||||||
__author__ = u'elsuave (modified from Xanthan Gum)'
|
|
||||||
description = 'News about newspapers and journalism.'
|
|
||||||
publisher = 'Editor and Publisher'
|
|
||||||
category = 'news, journalism, industry'
|
|
||||||
language = 'en'
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf8'
|
|
||||||
cover_url = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
|
|
||||||
remove_javascript = True
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description,
|
|
||||||
'--category', category,
|
|
||||||
'--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + \
|
|
||||||
'"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
# Font formatting code borrowed from kwetal
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
|
||||||
h1{font-size: xx-large;}
|
|
||||||
h2{font-size: large;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
# Remove commenting/social media lins
|
|
||||||
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class': 'clear'})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Editor & Publisher', u'http://www.editorandpublisher.com/feed/'),
|
|
||||||
(u'Comments', u'http://www.editorandpublisher.com/comments/feed/'),
|
|
||||||
]
|
|
@ -1,19 +0,0 @@
|
|||||||
__version__ = 'v1.0'
|
|
||||||
__date__ = '7, April 2012'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1332847053(BasicNewsRecipe):
|
|
||||||
title = u'Editoriali'
|
|
||||||
__author__ = 'faber1971'
|
|
||||||
description = 'Leading articles on Italy by the best Italian editorials'
|
|
||||||
language = 'it'
|
|
||||||
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
masthead_url = 'http://folkbulletin.folkest.com/wp-content/uploads/editoriale1.jpg'
|
|
||||||
feeds = [(u'Micromega', u'http://temi.repubblica.it/micromega-online/feed/'), (u'Corriere della Sera', u'http://xml.corriereobjects.it/rss/editoriali.xml'),
|
|
||||||
(u'La Stampa', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=25'), (u"Italia dall'estero", u'http://italiadallestero.info/feed')]
|
|
@ -1,32 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Lokalavisen Egedal
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class EgedalLokalavisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Lokalavisen Egedal'
|
|
||||||
description = 'Lokale, regionale nyheder, sport og kultur i Egedal, Stenløse, Ølstykke, Ganløse, Gundsø, Slangerup, Roskilde på egedal.lokalavisen.dk'
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Seneste nyt fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestenytrss'),
|
|
||||||
('Seneste lokale nyheder fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestelokalenyhederrss'),
|
|
||||||
('Seneste sport fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestesportrss'),
|
|
||||||
('Seneste 112 nyheder fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/seneste112rss'),
|
|
||||||
('Seneste kultur nyheder fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestekulturrss'),
|
|
||||||
('Seneste læserbreve fra Lokalavisen Egedal', 'http://egedal.lokalavisen.dk/section/senestelaeserbreverss'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,41 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
egirl.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EgirlRo(BasicNewsRecipe):
|
|
||||||
title = u'egirl'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = u'Necesar pentru tine'
|
|
||||||
publisher = u'egirl'
|
|
||||||
oldest_article = 5
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Reviste,Femei'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.egirl.ro/images/egirlNou/logo_egirl.gif'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'content_art'}), dict(
|
|
||||||
name='div', attrs={'class': 'content_articol'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://www.egirl.ro/rss/egirl.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,30 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class eioba(BasicNewsRecipe):
|
|
||||||
title = u'eioba'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'eioba.pl - daj się przeczytać!'
|
|
||||||
cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png'
|
|
||||||
language = 'pl'
|
|
||||||
oldest_article = 7
|
|
||||||
remove_empty_feeds = True
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
extra_css = '#ctl0_body_Topic {font-weight: bold; font-size:30px;}'
|
|
||||||
keep_only_tags = [dict(id=['ctl0_body_Topic', 'articleContent'])]
|
|
||||||
feeds = [(u'Wszyskie kategorie', u'http://feeds.eioba.pl/eioba-pl-top'),
|
|
||||||
(u'Technologia', u'http://www.eioba.pl/feed/categories/1.xml'),
|
|
||||||
(u'Nauka', u'http://www.eioba.pl/feed/categories/12.xml'),
|
|
||||||
(u'Finanse', u'http://www.eioba.pl/feed/categories/7.xml'),
|
|
||||||
(u'Życie', u'http://www.eioba.pl/feed/categories/5.xml'),
|
|
||||||
(u'Zainteresowania', u'http://www.eioba.pl/feed/categories/420.xml'),
|
|
||||||
(u'Społeczeństwo', u'http://www.eioba.pl/feed/categories/8.xml'),
|
|
||||||
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
|
|
||||||
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,31 +0,0 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1314326622(BasicNewsRecipe):
|
|
||||||
title = u'Ekantipur'
|
|
||||||
__author__ = 'Manish Bhattarai'
|
|
||||||
description = 'News from the No.1 News Portal In Nepal'
|
|
||||||
language = 'en_NP'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
masthead_url = 'http://www.ekantipur.com/images/logo.gif'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
remove_tags_before = dict(id='main-content')
|
|
||||||
remove_tags_after = dict(id='view-comments')
|
|
||||||
remove_tags = [dict(attrs={'class': ['lang fltl', 'bdtop', 'ratings', 'news-tool', 'comment', 'post-ur-comment', 'asideBox', 'commentsbox', 'related-sidebar-row related-news']}), # noqa
|
|
||||||
dict(id=['menu_container', 'top_container', 'news_container',
|
|
||||||
'top_right', 'sidebar', 'news-detail-img', 'footer-wrapper']),
|
|
||||||
dict(name=['script'])]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Top Stories', u'http://www.ekantipur.com/en/rss/top-stories/'),
|
|
||||||
(u'National', u'http://www.ekantipur.com/en/rss/national/1'),
|
|
||||||
(u'Capital', u'http://www.ekantipur.com/en/rss/capital/7'),
|
|
||||||
(u'Business', u'http://www.ekantipur.com/en/rss/business/3'),
|
|
||||||
(u'World', u'http://www.ekantipur.com/en/rss/world/5'),
|
|
||||||
|
|
||||||
(u'Sports', u'http://www.ekantipur.com/en/rss/sports/4'),
|
|
||||||
(u'Mixed Bag', u'http://www.ekantipur.com/en/rss/mixed-bag/14'),
|
|
||||||
(u'Health & Living', u'http://www.ekantipur.com/en/rss/health-and-living/19'),
|
|
||||||
(u'Entertainment', u'http://www.ekantipur.com/en/rss/entertainment/6')]
|
|
@ -1,59 +0,0 @@
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
|
|
||||||
class Ekathimerini(BasicNewsRecipe):
|
|
||||||
title = 'ekathimerini'
|
|
||||||
__author__ = 'Thomas Scholl'
|
|
||||||
description = 'News from Greece, English edition'
|
|
||||||
masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
oldest_article = 100
|
|
||||||
publisher = 'Kathimerini'
|
|
||||||
category = 'news, GR'
|
|
||||||
language = 'en_GR'
|
|
||||||
encoding = 'windows-1253'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
no_stylesheets = True
|
|
||||||
delay = 1
|
|
||||||
keep_only_tags = [dict(name='td', attrs={'class': 'news'})]
|
|
||||||
|
|
||||||
rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
|
|
||||||
|
|
||||||
def find_articles(self, idx, category):
|
|
||||||
for article in idx.findAll('item'):
|
|
||||||
cat = u''
|
|
||||||
cat_elem = article.find('subcat')
|
|
||||||
if cat_elem:
|
|
||||||
cat = self.tag_to_string(cat_elem)
|
|
||||||
|
|
||||||
if cat == category:
|
|
||||||
desc_html = self.tag_to_string(article.find('description'))
|
|
||||||
description = self.tag_to_string(BeautifulSoup(desc_html))
|
|
||||||
|
|
||||||
a = {
|
|
||||||
'title': self.tag_to_string(article.find('title')),
|
|
||||||
'url': self.tag_to_string(article.find('link')),
|
|
||||||
'description': description,
|
|
||||||
'date': self.tag_to_string(article.find('pubdate')),
|
|
||||||
}
|
|
||||||
yield a
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
idx_contents = self.browser.open(self.rss_url).read()
|
|
||||||
idx = etree.fromstring(idx_contents, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
|
|
||||||
|
|
||||||
cats = sorted({self.tag_to_string(subcat)
|
|
||||||
for subcat in idx.xpath('//*[local-name()="subcat"]')})
|
|
||||||
|
|
||||||
feeds = [(u'News', list(self.find_articles(idx, u'')))]
|
|
||||||
|
|
||||||
for cat in cats:
|
|
||||||
feeds.append((cat.capitalize(), list(
|
|
||||||
self.find_articles(idx, cat))))
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')
|
|
@ -1,34 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EkologiaPl(BasicNewsRecipe):
|
|
||||||
title = u'Ekologia.pl'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'Portal ekologiczny - eko, ekologia, ochrona przyrody, ochrona środowiska, przyroda, środowisko online. Ekologia i ochrona środowiska. Ekologia dla dzieci.' # noqa
|
|
||||||
category = 'ecology'
|
|
||||||
language = 'pl'
|
|
||||||
cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
extra_css = '.title {font-size: 200%;} .imagePowiazane {float:left; margin-right:5px; width: 200px;}'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_attrs = ['style']
|
|
||||||
keep_only_tags = [dict(attrs={'class': 'contentParent'})]
|
|
||||||
remove_tags = [dict(
|
|
||||||
attrs={'class': ['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'),
|
|
||||||
(u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'),
|
|
||||||
(u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
id = re.search(r',(?P<id>\d+)\.html', url).group('id')
|
|
||||||
return 'http://drukuj.ekologia.pl/artykul/' + id
|
|
@ -1,51 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ColombiaElEspectador(BasicNewsRecipe):
|
|
||||||
title = u'Periódico el Espectador'
|
|
||||||
__author__ = 'BIGO-CAVA'
|
|
||||||
cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
|
|
||||||
remove_tags_before = dict(id='content')
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class': 'paginacion'})]
|
|
||||||
language = 'es_CO'
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'herramientas_nota'}),
|
|
||||||
dict(name='div', attrs={'class': 'relpauta'}),
|
|
||||||
dict(name='div', attrs={'class': 'recursosrelacionados'}),
|
|
||||||
dict(name='div', attrs={'class': 'nav_negocios'})]
|
|
||||||
# dict(name='div', attrs={'class':'ico-mail2'}),
|
|
||||||
# dict(name='div', attrs={'id':'caja-instapaper'}),
|
|
||||||
# dict(name='div', attrs={'class':'modulo herramientas'})]
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
|
|
||||||
extra_css = """
|
|
||||||
p{text-align: justify; font-size: 100%}
|
|
||||||
body{ text-align: left; font-size:100% }
|
|
||||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
|
||||||
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
|
||||||
"""
|
|
||||||
|
|
||||||
feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'),
|
|
||||||
(u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
|
|
||||||
(u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
|
|
||||||
(u'Economía', u'http://www.elespectador.com/economia/feed'),
|
|
||||||
(u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
|
|
||||||
(u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
|
|
||||||
(u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
|
|
||||||
(u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
|
|
||||||
(u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
|
|
||||||
(u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
|
|
||||||
(u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
|
|
||||||
(u'Deportes', u'http://www.elespectador.com/deportes/feed'),
|
|
||||||
(u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
|
|
||||||
(u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
|
|
||||||
(u'Opinión', u'http://www.elespectador.com/opinion/feed'),
|
|
||||||
(u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]
|
|
@ -1,28 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# https://github.com/iemejia/calibrecolombia
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.elmalpensante.com/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ElMalpensante(BasicNewsRecipe):
|
|
||||||
title = u'El Malpensante'
|
|
||||||
language = 'es_CO'
|
|
||||||
__author__ = 'Ismael Mejia <iemejia@gmail.com>'
|
|
||||||
cover_url = 'http://elmalpensante.com/img/layout/logo.gif'
|
|
||||||
description = 'El Malpensante'
|
|
||||||
oldest_article = 30
|
|
||||||
simultaneous_downloads = 20
|
|
||||||
use_embedded_content = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
feeds = [(u'Artículos', u'http://www.elmalpensante.com/articulosRSS.php'),
|
|
||||||
(u'Malpensantías',
|
|
||||||
u'http://www.elmalpensante.com/malpensantiasRSS.php'),
|
|
||||||
(u'Margaritas', u'http://www.elmalpensante.com/margaritasRSS.php'),
|
|
||||||
# This one is almost the same as articulos so we leave articles
|
|
||||||
# (u'Noticias', u'http://www.elmalpensante.com/noticiasRSS.php'),
|
|
||||||
]
|
|
@ -1,34 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1313609361(BasicNewsRecipe):
|
|
||||||
news = True
|
|
||||||
title = u'El Mostrador'
|
|
||||||
__author__ = 'Alex Mitrani'
|
|
||||||
description = u'Chilean online newspaper'
|
|
||||||
publisher = u'La Plaza S.A.'
|
|
||||||
category = 'news, rss'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
summary_length = 1000
|
|
||||||
language = 'es_CL'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://www.elmostrador.cl/assets/img/logo-elmostrador-m.jpg'
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class': 'news-heading cf'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'footer-actions cf'})
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'footer-actions cb cf'}), dict(name='div', attrs={'class': 'news-aside fl'}), dict(name='div', attrs={'class': 'footer-actions cf'}), dict(name='div', attrs={'class': 'user-bar', 'id': 'top'}), dict(name='div', attrs={'class': 'indicators'}), dict(name='div', attrs={'id': 'header'}) # noqa
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Temas Destacados', u'http://www.elmostrador.cl/destacado/feed/'),
|
|
||||||
(u'El D\xeda', u'http://www.elmostrador.cl/dia/feed/'),
|
|
||||||
(u'Pa\xeds', u'http://www.elmostrador.cl/noticias/pais/feed/'),
|
|
||||||
(u'Mundo', u'http://www.elmostrador.cl/noticias/mundo/feed/'),
|
|
||||||
(u'Negocios', u'http://www.elmostrador.cl/noticias/negocios/feed/'),
|
|
||||||
(u'Cultura', u'http://www.elmostrador.cl/noticias/cultura/feed/'),
|
|
||||||
(u'Vida en L\xednea', u'http://www.elmostrador.cl/vida-en-linea/feed/'),
|
|
||||||
(u'Opini\xf3n & Blogs', u'http://www.elmostrador.cl/opinion/feed/')
|
|
||||||
]
|
|
@ -1,53 +0,0 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ColombiaElMundo02(BasicNewsRecipe):
|
|
||||||
title = u'Periódico El Mundo'
|
|
||||||
__author__ = 'BIGO-CAVA'
|
|
||||||
language = 'es_CO'
|
|
||||||
cover_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
|
|
||||||
remove_tags_before = dict(id='miga_pan')
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'class': 'cuadro_opciones_new1'})]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'ruta'}),
|
|
||||||
dict(name='div', attrs={'class': 'buscador'}),
|
|
||||||
dict(name='div', attrs={'class': 'iconos'}),
|
|
||||||
dict(name='div', attrs={'class': 'otros_iconos'}),
|
|
||||||
dict(name='div', attrs={'class': 'cuadro_opciones_new1'}),
|
|
||||||
dict(name='div', attrs={'class': 'otras_noticias'}),
|
|
||||||
dict(name='div', attrs={'class': 'notas_relacionadas'}),
|
|
||||||
dict(name='div', attrs={'id': 'lateral_2'})]
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
|
|
||||||
extra_css = """
|
|
||||||
p{text-align: justify; font-size: 100%}
|
|
||||||
body{ text-align: left; font-size:100% }
|
|
||||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
|
||||||
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
|
||||||
"""
|
|
||||||
|
|
||||||
feeds = [(u'Opinión', u'http://www.elmundo.com/images/rss/opinion.xml'),
|
|
||||||
(u'Economía', u'http://www.elmundo.com/images/rss/noticias_economia.xml'),
|
|
||||||
(u'Deportes', u'http://www.elmundo.com/images/rss/deportes.xml'),
|
|
||||||
(u'Política ', u'http://www.elmundo.com/images/rss/noticias_politica.xml'),
|
|
||||||
(u'Antioquia', u'http://www.elmundo.com/images/rss/noticias_antioquia.xml'),
|
|
||||||
(u'Nacional ', u'http://www.elmundo.com/images/rss/noticias_nacional.xml'),
|
|
||||||
(u'Internacional',
|
|
||||||
u'http://www.elmundo.com/images/rss/noticias_internacional.xml'),
|
|
||||||
(u'Servicios Públicos',
|
|
||||||
u'http://www.elmundo.com/images/rss/noticias_servicios_publicos.xml'),
|
|
||||||
(u'Infraestructura',
|
|
||||||
u'http://www.elmundo.com/images/rss/noticias_infraestructura.xml'),
|
|
||||||
(u'Mobilidad', u'http://www.elmundo.com/images/rss/noticias_movilidad.xml'),
|
|
||||||
(u'Derechos Humanos',
|
|
||||||
u'http://www.elmundo.com/images/rss/noticias_derechos_humanos.xml'),
|
|
||||||
(u'Vida', u'http://www.elmundo.com/images/rss/vida.xml'),
|
|
||||||
(u'Cultura', u'http://www.elmundo.com/images/rss/cultura.xml')]
|
|
@ -1,65 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
##
|
|
||||||
# Last Edited: 2018-02-13 Carlos Alves <carlosalves90@gmail.com>
|
|
||||||
##
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = '2010, Yuri Alvarez<me at yurialvarez.com>'
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|
||||||
|
|
||||||
'''
|
|
||||||
elobservador.com.uy
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Noticias(BasicNewsRecipe):
|
|
||||||
title = 'El Observador'
|
|
||||||
__author__ = 'yrvn'
|
|
||||||
description = 'Noticias desde Uruguay'
|
|
||||||
tags = 'news, sports, entretainment'
|
|
||||||
language = 'es_UY'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
use_embedded_content = False
|
|
||||||
recursion = 5
|
|
||||||
encoding = 'utf8'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h1', attrs={'class': 'detail-title newDetailTextChange'}),
|
|
||||||
dict(name='div', attrs={'class': 'cuerpo air newDetailTextChange'})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': ['fecha', 'copyright', 'story_right']}),
|
|
||||||
dict(name='div', attrs={'class': ['photo', 'social']}),
|
|
||||||
dict(name='div', attrs={'id': 'widget'}),
|
|
||||||
dict(name=['object', 'link'])
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_attributes = ['width', 'height', 'style', 'font', 'color']
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family: Georgia,"Times New Roman",Times,serif}
|
|
||||||
h3{font-family: Georgia,"Times New Roman",Times,serif}
|
|
||||||
h2{font-family: Georgia,"Times New Roman",Times,serif}
|
|
||||||
p{font-family: Verdana,Arial,Helvetica,sans-serif}
|
|
||||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
|
||||||
img{margin-bottom: 0.4em; display:block;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Portada', u'http://www.elobservador.com.uy/rss/home.xml'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,121 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '04 December 2010, desUBIKado'
|
|
||||||
__author__ = 'desUBIKado'
|
|
||||||
__description__ = 'Daily newspaper from Aragon'
|
|
||||||
__version__ = 'v0.10'
|
|
||||||
__date__ = '09, September 2017'
|
|
||||||
'''
|
|
||||||
elperiodicodearagon.com
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class elperiodicodearagon(BasicNewsRecipe):
|
|
||||||
title = u'El Periodico de Aragon'
|
|
||||||
__author__ = u'desUBIKado'
|
|
||||||
description = u'Noticias desde Aragon'
|
|
||||||
publisher = u'elperiodicodearagon.com'
|
|
||||||
category = u'news, politics, Spain, Aragon'
|
|
||||||
oldest_article = 1
|
|
||||||
delay = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'es'
|
|
||||||
masthead_url = 'http://pdf.elperiodicodearagon.com/img/logotipo.gif'
|
|
||||||
encoding = 'iso-8859-1'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Portada', u'http://zetaestaticos.com/aragon/rss/portada_es.xml'),
|
|
||||||
(u'Arag\xf3n', u'http://zetaestaticos.com/aragon/rss/2_es.xml'),
|
|
||||||
(u'Internacional', u'http://zetaestaticos.com/aragon/rss/4_es.xml'),
|
|
||||||
(u'Espa\xf1a', u'http://zetaestaticos.com/aragon/rss/3_es.xml'),
|
|
||||||
(u'Econom\xeda', u'http://zetaestaticos.com/aragon/rss/5_es.xml'),
|
|
||||||
(u'Deportes', u'http://zetaestaticos.com/aragon/rss/7_es.xml'),
|
|
||||||
(u'Real Zaragoza', u'http://zetaestaticos.com/aragon/rss/10_es.xml'),
|
|
||||||
(u'Tecnyconta Zaragoza', u'http://zetaestaticos.com/aragon/rss/91_es.xml'),
|
|
||||||
(u'Monta\xf1ismo', u'http://zetaestaticos.com/aragon/rss/354_es.xml'),
|
|
||||||
(u'Opini\xf3n', u'http://zetaestaticos.com/aragon/rss/103_es.xml'),
|
|
||||||
(u'Tema del d\xeda', u'http://zetaestaticos.com/aragon/rss/102_es.xml'),
|
|
||||||
(u'Escenarios', u'http://zetaestaticos.com/aragon/rss/105_es.xml'),
|
|
||||||
(u'Sociedad', u'http://zetaestaticos.com/aragon/rss/104_es.xml'),
|
|
||||||
(u'Gente', u'http://zetaestaticos.com/aragon/rss/330_es.xml'),
|
|
||||||
(u'Espacio 3', u'http://zetaestaticos.com/aragon/rss/328_es.xml'),
|
|
||||||
(u'Fiestas del Pilar', u'http://zetaestaticos.com/aragon/rss/107_es.xml'),
|
|
||||||
(u'Semana Santa', u'http://zetaestaticos.com/aragon/rss/385_es.xml'), (
|
|
||||||
u'La crónica de Valdejal\xf3n',
|
|
||||||
u'http://zetaestaticos.com/aragon/rss/206_es.xml'
|
|
||||||
), (
|
|
||||||
u'La crónica de Campo de Borja',
|
|
||||||
u'http://zetaestaticos.com/aragon/rss/208_es.xml'
|
|
||||||
), (
|
|
||||||
u'La crónica de Ejea y sus pueblos',
|
|
||||||
u'http://zetaestaticos.com/aragon/rss/212_es.xml'
|
|
||||||
), (
|
|
||||||
u'La crónica del Bajo Gállego',
|
|
||||||
u'http://zetaestaticos.com/aragon/rss/205_es.xml'
|
|
||||||
), (
|
|
||||||
u'La crónica del Campo de Cariñena',
|
|
||||||
u'http://zetaestaticos.com/aragon/rss/207_es.xml'
|
|
||||||
), (
|
|
||||||
u'La crónica de la Ribera Alta del Ebro',
|
|
||||||
u'http://zetaestaticos.com/aragon/rss/211_es.xml'
|
|
||||||
), (
|
|
||||||
u'La crónica del Campo de Belchite',
|
|
||||||
u'http://zetaestaticos.com/aragon/rss/331_es.xml'
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class': 'Pagina'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'ComentariosNew'})
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'Pagina'})]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(
|
|
||||||
name='nav',
|
|
||||||
attrs={'class': ['Compartir', 'HerramientasConversacion Herramientas']}
|
|
||||||
),
|
|
||||||
dict(name='h5', attrs={'class': ['CintilloBox']}),
|
|
||||||
dict(
|
|
||||||
name='div',
|
|
||||||
attrs={
|
|
||||||
'class': [
|
|
||||||
'BoxMenu BoxMenuConFoto', 'BxGalerias', 'ConStick',
|
|
||||||
'HerramientasComentarioNew Herramientas', 'NumeroComentarioNew'
|
|
||||||
]
|
|
||||||
}
|
|
||||||
),
|
|
||||||
dict(
|
|
||||||
name='div',
|
|
||||||
attrs={
|
|
||||||
'class': [
|
|
||||||
'BoxPestanas', 'Box', 'ColumnaDerecha',
|
|
||||||
'NoticiasRelacionadasDeNoticia',
|
|
||||||
'CintilloNoticiasRelacionadasDeNoticia'
|
|
||||||
]
|
|
||||||
}
|
|
||||||
),
|
|
||||||
dict(name='a', attrs={'class': ['IrA BotonLink']})
|
|
||||||
]
|
|
||||||
|
|
||||||
# Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
index = 'http://pdf.elperiodicodearagon.com/edicion.php'
|
|
||||||
soup = self.index_to_soup(index)
|
|
||||||
for image in soup.findAll('img', src=True):
|
|
||||||
if image['src'].startswith('/funciones/img-public.php?key='):
|
|
||||||
return 'http://pdf.elperiodicodearagon.com' + image['src']
|
|
||||||
return None
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:28px;}
|
|
||||||
h2 {font-family:Arial,Helvetica,sans-serif; font-style:italic;font-size:14px;color:#4D4D4D;}
|
|
||||||
h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
|
|
||||||
'''
|
|
@ -1,46 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Gerardo Diez'
|
|
||||||
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
|
|
||||||
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
|
||||||
publico.es
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Publico(BasicNewsRecipe):
|
|
||||||
title = u'Publico.es'
|
|
||||||
__author__ = 'Gerardo Diez'
|
|
||||||
publisher = u'Mediapubli Sociedad de Publicaciones y Ediciones S.L.'
|
|
||||||
category = 'news, politics, finances, world, spain, science, catalunya'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
simultaneous_downloads = 10
|
|
||||||
cover_url = u'http://imagenes.publico.es/css/img/logo_publico.gif'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
encoding = 'utf8'
|
|
||||||
language = 'es'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
keep_only_tags = dict(id='main')
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': ['Noticias_642x50', 'contInfo ancho']}),
|
|
||||||
dict(name='ul', attrs={'class': ['navComentarios', 'comentarios']}),
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'id': ['commentsContext', 'toolbar', 'comentarios']}),
|
|
||||||
dict(name='h5', attrs={'id': 'comentarios'})
|
|
||||||
]
|
|
||||||
feeds = [(u'Internacional', u'http://www.publico.es/estaticos/rss/internacional'),
|
|
||||||
(u'Espa\xf1a', u'http://www.publico.es/estaticos/rss/espana'),
|
|
||||||
(u'Dinero', u'http://www.publico.es/estaticos/rss/dinero'),
|
|
||||||
(u'Ciencias', u'http://www.publico.es/estaticos/rss/ciencias'),
|
|
||||||
(u'Culturas', u'http://www.publico.es/estaticos/rss/culturas'),
|
|
||||||
(u'Deportes', u'http://www.publico.es/estaticos/rss/deportes'),
|
|
||||||
(u'Televisi\xf3n y Gente',
|
|
||||||
u'http://www.publico.es/estaticos/rss/televisionygente'),
|
|
||||||
(u'Catalu\xf1a', u'http://www.publico.es/estaticos/rss/catalunya'),
|
|
||||||
(u'Viajes', u'http://www.publico.es/estaticos/rss/viajes')]
|
|
@ -1,51 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ColombiaElTiempo02(BasicNewsRecipe):
|
|
||||||
title = u'Periódico el Tiempo'
|
|
||||||
__author__ = 'BIGO-CAVA'
|
|
||||||
language = 'es_CO'
|
|
||||||
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
|
|
||||||
remove_tags_before = dict(id='contenidoArt')
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class': 'modulo reporte'})]
|
|
||||||
keep_only_tags = [dict(name='div', id='contenidoArt')]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'social-media'}),
|
|
||||||
dict(name='div', attrs={'class': 'recomend-art'}),
|
|
||||||
dict(name='div', attrs={'class': 'caja-facebook'}),
|
|
||||||
dict(name='div', attrs={'class': 'caja-twitter'}),
|
|
||||||
dict(name='div', attrs={'class': 'caja-buzz'}),
|
|
||||||
dict(name='div', attrs={'class': 'ico-mail2'}),
|
|
||||||
dict(name='div', attrs={'id': 'caja-instapaper'}),
|
|
||||||
dict(name='div', attrs={'class': 'modulo herramientas'})]
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
|
|
||||||
extra_css = """
|
|
||||||
p{text-align: justify; font-size: 100%}
|
|
||||||
body{ text-align: left; font-size:100% }
|
|
||||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
|
||||||
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
|
||||||
"""
|
|
||||||
|
|
||||||
feeds = [(u'Colombia', u'http://www.eltiempo.com/colombia/rss.xml'),
|
|
||||||
(u'Medellin', u'http://www.eltiempo.com/colombia/medellin/rss.xml'),
|
|
||||||
(u'Economia', u'http://www.eltiempo.com/economia/rss.xml'),
|
|
||||||
(u'Deportes', u'http://www.eltiempo.com/deportes/rss.xml'),
|
|
||||||
(u'Mundo', u'http://www.eltiempo.com/mundo/rss.xml'),
|
|
||||||
(u'Gente', u'http://www.eltiempo.com/gente/rss.xml'),
|
|
||||||
(u'Vida de Hoy', u'http://www.eltiempo.com/vida-de-hoy/rss.xml'),
|
|
||||||
(u'EEUU', u'http://www.eltiempo.com/mundo/estados-unidos/rss.xml'),
|
|
||||||
(u'LatinoAmerica', u'http://www.eltiempo.com/mundo/latinoamerica/rss.xml'),
|
|
||||||
(u'Europa', u'http://www.eltiempo.com/mundo/europa/rss.xml'),
|
|
||||||
(u'Medio Oriente', u'http://www.eltiempo.com/mundo/medio-oriente/rss.xml'),
|
|
||||||
(u'Vive in Medellin', u'http://medellin.vive.in/medellin/rss.xml'),
|
|
||||||
(u'Don Juan', u'http://www.revistadonjuan.com/feedrss/'),
|
|
||||||
(u'Alo', u'http://www.eltiempo.com/alo/rss.xml')]
|
|
@ -1,49 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2016, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
eluniversal.com.mx
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ElUniversal(BasicNewsRecipe):
|
|
||||||
title = 'El Universal'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = ('Sitio líder de noticias minuto x minuto de México y el mundo, con información sobre política,'
|
|
||||||
' ciudad; videos, interactividad, opinión, blogs')
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
publisher = 'El Universal'
|
|
||||||
category = 'news, politics, Mexico'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf8'
|
|
||||||
auto_cleanup = True
|
|
||||||
auto_cleanup_keep = ("//div[contains(concat(' ', normalize-space(@class), ' '),"
|
|
||||||
" ' field-name-field-resumen ')] | //div[@class='fechap'] |"
|
|
||||||
" //div[@class='hora'] | //div[contains(concat(' ', normalize-space(@class), ' '), ' field-name-field-fuente ')]")
|
|
||||||
remove_javascript = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
ignore_duplicate_articles = {'url'}
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
language = 'es_MX'
|
|
||||||
extra_css = '''
|
|
||||||
body{font-family: Roboto, sans-serif}
|
|
||||||
.h1{font-family: "Duplicate Ionic Bold", serif}
|
|
||||||
.field-name-field-resumen{font-family: "Duplicate Ionic Light", serif; display: block; font-size: large;}
|
|
||||||
'''
|
|
||||||
feeds = [
|
|
||||||
(u'Nacion', u'http://www.eluniversal.com.mx/seccion/1/rss.xml'),
|
|
||||||
(u'Mundo', u'http://www.eluniversal.com.mx/seccion/5/rss.xml'),
|
|
||||||
(u'Metropoli', u'http://www.eluniversal.com.mx/seccion/6/rss.xml'),
|
|
||||||
(u'Estados', u'http://www.eluniversal.com.mx/seccion/13/rss.xml'),
|
|
||||||
(u'Cartera', u'http://www.eluniversal.com.mx/seccion/14/rss.xml'),
|
|
||||||
(u'Deportes', u'http://www.eluniversal.com.mx/seccion/15/rss.xml'),
|
|
||||||
(u'Espectaculos', u'http://www.eluniversal.com.mx/seccion/133/rss.xml'),
|
|
||||||
(u'Cultura', u'http://www.eluniversal.com.mx/seccion/17/rss.xml'),
|
|
||||||
(u'Ciencia y salud', u'http://www.eluniversal.com.mx/seccion/16/rss.xml'),
|
|
||||||
(u'Techbit', u'http://www.eluniversal.com.mx/seccion/5782/rss.xml'),
|
|
||||||
(u'Periodismo de investigacion', u'http://www.eluniversal.com.mx/seccion/11363/rss.xml')
|
|
||||||
]
|
|
||||||
|
|
@ -1,58 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.clubdelebook.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ElClubDelEbook(BasicNewsRecipe):
|
|
||||||
title = 'El club del ebook'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'El Club del eBook, es la primera fuente de informacion sobre ebooks de Argentina. Aca vas a encontrar noticias, tips, tutoriales, recursos y opiniones sobre el mundo de los libros electronicos.' # noqa
|
|
||||||
tags = 'ebook, libro electronico, e-book, ebooks, libros electronicos, e-books'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'es_AR'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = True
|
|
||||||
publication_type = 'blog'
|
|
||||||
masthead_url = 'http://dl.dropbox.com/u/2845131/elclubdelebook.png'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif}
|
|
||||||
img{ margin-bottom: 0.8em;
|
|
||||||
border: 1px solid #333333;
|
|
||||||
padding: 4px; display: block
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': tags, 'publisher': title, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'id': 'crp_related'})]
|
|
||||||
remove_tags_after = dict(attrs={'id': 'crp_related'})
|
|
||||||
|
|
||||||
feeds = [(u'Articulos', u'http://feeds.feedburner.com/ElClubDelEbook')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll('a'):
|
|
||||||
limg = item.find('img')
|
|
||||||
if item.string is not None:
|
|
||||||
str = item.string
|
|
||||||
item.replaceWith(str)
|
|
||||||
else:
|
|
||||||
if limg:
|
|
||||||
item.name = 'div'
|
|
||||||
item.attrs = []
|
|
||||||
else:
|
|
||||||
str = self.tag_to_string(item)
|
|
||||||
item.replaceWith(str)
|
|
||||||
for item in soup.findAll('img', alt=False):
|
|
||||||
item['alt'] = 'image'
|
|
||||||
return soup
|
|
@ -1,35 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
elcomercio.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ElComercio(BasicNewsRecipe):
|
|
||||||
title = 'El Comercio '
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
|
||||||
publisher = 'GRUPO EL COMERCIO C.A.'
|
|
||||||
category = 'news, Ecuador, politics'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = True
|
|
||||||
language = 'es_EC'
|
|
||||||
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
|
|
||||||
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,56 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
ele.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Ele(BasicNewsRecipe):
|
|
||||||
title = u'Ele'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = u'Dezv\u0103luie ceea ce e\u015fti'
|
|
||||||
publisher = u'Ele'
|
|
||||||
oldest_article = 25
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Femei'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
||||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
|
||||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
|
||||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h1', attrs={'class': 'article_title'}), dict(
|
|
||||||
name='div', attrs={'class': 'article_text'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://www.ele.ro/rss_must_read')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,61 +0,0 @@
|
|||||||
##########################################################################
|
|
||||||
# Description: http://es.hu/ RSS channel
|
|
||||||
# Author: Bigpapa (bigpapabig@hotmail.com)
|
|
||||||
# Date: 2012.01.20. - V1.2
|
|
||||||
##########################################################################
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class elet_es_irodalom(BasicNewsRecipe):
|
|
||||||
title = u'\u00c9let \u00e9s Irodalom'
|
|
||||||
__author__ = 'Bigpapa'
|
|
||||||
oldest_article = 7
|
|
||||||
# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
|
||||||
max_articles_per_feed = 30
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'iso-8859-2'
|
|
||||||
category = 'Cikkek'
|
|
||||||
language = 'hu'
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
extra_css = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
|
|
||||||
needs_subscription = 'optional'
|
|
||||||
|
|
||||||
masthead_url = 'http://www.es.hu/images/logo.jpg'
|
|
||||||
timefmt = ' [%Y %b %d, %a]'
|
|
||||||
|
|
||||||
# Nem ide a kódba kell beleírni a hozzáférés adatait, hanem azt akkor adod
|
|
||||||
# meg, ha le akarod tölteni!
|
|
||||||
def get_browser(self):
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
if self.username is not None and self.password is not None:
|
|
||||||
br.open('http://www.es.hu/')
|
|
||||||
br.select_form(name='userfrmlogin')
|
|
||||||
br['cusername'] = self.username
|
|
||||||
br['cpassword'] = self.password
|
|
||||||
br.submit()
|
|
||||||
return br
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['doc_author', 'doc_title', 'doc']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='a', attrs={'target': ['_TOP']}),
|
|
||||||
dict(name='div', attrs={'style': [
|
|
||||||
'float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
|
|
||||||
(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
|
|
||||||
(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
|
|
||||||
(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
|
|
||||||
(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
|
|
||||||
(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
|
|
||||||
(u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
|
|
||||||
(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
|
|
||||||
(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
|
|
||||||
]
|
|
@ -1,90 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010-2014, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
elpais.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ElPais_RSS(BasicNewsRecipe):
|
|
||||||
title = u'El País'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = u'Noticias de última hora sobre la actualidad en España y el mundo: política, economía, deportes, cultura, sociedad, tecnología, gente, opinión, viajes, moda, televisión, los blogs y las firmas de EL PAÍS. Además especiales, vídeos, fotos, audios, gráficos, entrevistas, promociones y todos los servicios de EL PAÍS.' # noqa
|
|
||||||
publisher = 'EDICIONES EL PAIS, S.L.'
|
|
||||||
category = 'news, politics, finances, world, spain'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'es'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
masthead_url = 'http://ep01.epimg.net/iconos/v1.x/v1.0/logos/cabecera_portada.png'
|
|
||||||
cover_url = strftime(
|
|
||||||
'http://srv00.epimg.net/pdf/elpais/1aPagina/%Y/%m/ep-%Y%m%d.pdf')
|
|
||||||
extra_css = """
|
|
||||||
h1{font-family: Georgia,"Times New Roman",Times,serif }
|
|
||||||
#subtitulo_noticia, .firma, .figcaption{font-size: small}
|
|
||||||
body{font-family: Arial,Helvetica,Garuda,sans-serif}
|
|
||||||
img{margin-bottom: 0.4em; display:block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'id': ['titulo_noticia', 'subtitulo_noticia']}), dict(
|
|
||||||
attrs={'class': ['firma', 'columna_texto', 'entrevista_p_r']})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['iframe', 'embed', 'object']), dict(
|
|
||||||
attrs={'class': 'disposicion_vertical'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Lo ultimo', u'http://ep00.epimg.net/rss/tags/ultimas_noticias.xml'),
|
|
||||||
(u'America Latina', u'http://elpais.com/tag/rss/latinoamerica/a/'),
|
|
||||||
(u'Mexico', u'http://elpais.com/tag/rss/mexico/a/'),
|
|
||||||
(u'Europa', u'http://elpais.com/tag/rss/europa/a/'),
|
|
||||||
(u'Estados Unidos', u'http://elpais.com/tag/rss/estados_unidos/a/'),
|
|
||||||
(u'Oriente proximo', u'http://elpais.com/tag/rss/oriente_proximo/a/'),
|
|
||||||
(u'Andalucia', u'http://ep00.epimg.net/rss/ccaa/andalucia.xml'),
|
|
||||||
(u'Catalunia', u'http://ep00.epimg.net/rss/ccaa/catalunya.xml'),
|
|
||||||
(u'Comunidad Valenciana', u'http://ep00.epimg.net/rss/ccaa/valencia.xml'),
|
|
||||||
(u'Madrid', u'http://ep00.epimg.net/rss/ccaa/madrid.xml'),
|
|
||||||
(u'Pais Vasco', u'http://ep00.epimg.net/rss/ccaa/paisvasco.xml'),
|
|
||||||
(u'Galicia', u'http://ep00.epimg.net/rss/ccaa/galicia.xml'),
|
|
||||||
(u'Sociedad', u'http://ep00.epimg.net/rss/sociedad/portada.xml'),
|
|
||||||
(u'Deportes', u'http://ep00.epimg.net/rss/deportes/portada.xml'),
|
|
||||||
(u'Cultura', u'http://ep00.epimg.net/rss/cultura/portada.xml'),
|
|
||||||
(u'Cine', u'http://elpais.com/tag/rss/cine/a/'),
|
|
||||||
(u'Economía', u'http://elpais.com/tag/rss/economia/a/'),
|
|
||||||
(u'Literatura', u'http://elpais.com/tag/rss/libros/a/'),
|
|
||||||
(u'Musica', u'http://elpais.com/tag/rss/musica/a/'),
|
|
||||||
(u'Arte', u'http://elpais.com/tag/rss/arte/a/'),
|
|
||||||
(u'Medio Ambiente', u'http://elpais.com/tag/rss/medio_ambiente/a/'),
|
|
||||||
(u'Tecnologia', u'http://ep01.epimg.net/rss/tecnologia/portada.xml'),
|
|
||||||
(u'Ciencia', u'http://ep00.epimg.net/rss/tags/c_ciencia.xml'),
|
|
||||||
(u'Salud', u'http://elpais.com/tag/rss/salud/a/'),
|
|
||||||
(u'Ocio', u'http://elpais.com/tag/rss/ocio/a/'),
|
|
||||||
(u'Justicia y Leyes', u'http://elpais.com/tag/rss/justicia/a/'),
|
|
||||||
(u'Guerras y conflictos', u'http://elpais.com/tag/rss/conflictos/a/'),
|
|
||||||
(u'Politica', u'http://ep00.epimg.net/rss/politica/portada.xml'),
|
|
||||||
(u'Opinion', u'http://ep01.epimg.net/rss/elpais/opinion.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
|
||||||
if url and ('/album/' not in url and '/futbol/partido/' not in url):
|
|
||||||
return url
|
|
||||||
self.log('Skipping non-article', url)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, url):
|
|
||||||
return '<html><head><title>Untitled</title>' + raw[raw.find('</head>'):]
|
|
@ -1,64 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
elsevier.nl
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Pagina12(BasicNewsRecipe):
|
|
||||||
title = 'Elsevier.nl'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News from Holland'
|
|
||||||
publisher = 'elsevier.nl'
|
|
||||||
category = 'news, politics, Holland'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'nl'
|
|
||||||
country = 'NL'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://www.elsevier.nl/static/elsevier/stdimg/logo.gif'
|
|
||||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = dict(attrs={'id': 'artikel_container'})
|
|
||||||
remove_tags_before = dict(attrs={'id': 'breadcrumb_container'})
|
|
||||||
remove_tags_after = dict(attrs={'class': 'author_link'})
|
|
||||||
remove_tags = [
|
|
||||||
dict(attrs={'id': 'breadcrumb_container'}), dict(
|
|
||||||
name='div', attrs={'class': 'pullout_vak'})
|
|
||||||
]
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Laatste nieuws', u'http://www.elsevier.nl/web/RSS/Homepage-RSS.htm?output=xml'),
|
|
||||||
(u'Nederland', u'http://www.elsevier.nl/web/RSS/Nederland-RSS.htm?output=xml'),
|
|
||||||
(u'Politiek', u'http://www.elsevier.nl/web/RSS/Politiek-RSS.htm?output=xml'),
|
|
||||||
(u'Europese Unie', u'http://www.elsevier.nl/web/RSS/Europese-Unie-RSS.htm?output=xml'),
|
|
||||||
(u'Buitenland', u'http://www.elsevier.nl/web/RSS/Buitenland-RSS.htm?output=xml'),
|
|
||||||
(u'Economie', u'http://www.elsevier.nl/web/RSS/Economie-RSS.htm?output=xml'),
|
|
||||||
(u'Wetenschap', u'http://www.elsevier.nl/web/RSS/Wetenschap-RSS.htm?output=xml'),
|
|
||||||
(u'Cultuur & Televisie', u'http://www.elsevier.nl/web/RSS/Cultuur-Televisie-RSS.htm?output=xml'),
|
|
||||||
(u'Society', u'http://www.elsevier.nl/web/RSS/Society-RSS.htm?output=xml'),
|
|
||||||
(u'Internet&/Gadgets', u'http://www.elsevier.nl/web/RSS/Internet-Gadgets-RSS.htm?output=xml'),
|
|
||||||
(u'Comentaren', u'http://www.elsevier.nl/web/RSS/Commentaren-RSS.htm?output=xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?print=true'
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
return article.get('guid', None).rpartition('?')[0]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,54 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.tiempo.hn
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class ElTiempoHn(BasicNewsRecipe):
|
|
||||||
title = 'El Tiempo - Honduras'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Noticias de Honduras y mundo'
|
|
||||||
publisher = 'El Tiempo'
|
|
||||||
category = 'news, politics, Honduras'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'es_HN'
|
|
||||||
|
|
||||||
lang = 'es-HN'
|
|
||||||
direction = 'ltr'
|
|
||||||
|
|
||||||
remove_tags = [dict(name=['form', 'object', 'embed', 'base'])]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='td', attrs={'id': 'mainbodycont'})]
|
|
||||||
|
|
||||||
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
soup.html['dir'] = self.direction
|
|
||||||
mlang = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
|
||||||
mcharset = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
|
||||||
soup.head.insert(0, mlang)
|
|
||||||
soup.head.insert(1, mcharset)
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,61 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.eluniversal.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ElUniversal(BasicNewsRecipe):
|
|
||||||
title = 'El Universal'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Noticias de Venezuela y el mundo. Avances informativos de ultimo minuto. Incluye secciones de politica, deportes, economia y mas.'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
encoding = 'cp1252'
|
|
||||||
publisher = 'El Universal'
|
|
||||||
category = 'news, Caracas, Venezuela, world'
|
|
||||||
language = 'es_VE'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
masthead_url = 'http://cdn.eluniversal.com/images/eu4/back/logo-eluniversal.gif'
|
|
||||||
cover_url = 'http://images.eluniversal.com//pdf/primeraPlana.pdf'
|
|
||||||
extra_css = """
|
|
||||||
.txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small}
|
|
||||||
.txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray}
|
|
||||||
.txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large}
|
|
||||||
.txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large}
|
|
||||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
|
||||||
"""
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags_before = dict(attrs={'class': 'header-print MB10'})
|
|
||||||
remove_tags_after = dict(attrs={'id': 'SizeText'})
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object', 'link', 'script', 'iframe', 'meta']), dict(
|
|
||||||
attrs={'class': 'header-print MB10'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Ultimas Noticias', u'http://www.eluniversal.com/rss/avances.xml'),
|
|
||||||
(u'Economia', u'http://www.eluniversal.com/rss/eco_avances.xml'),
|
|
||||||
(u'Internacionales', u'http://www.eluniversal.com/rss/int_avances.xml'),
|
|
||||||
(u'Deportes', u'http://www.eluniversal.com/rss/dep_avances.xml'),
|
|
||||||
(u'Cultura', u'http://www.eluniversal.com/rss/cul_avances.xml'),
|
|
||||||
(u'Nacional y politica', u'http://www.eluniversal.com/rss/pol_avances.xml'),
|
|
||||||
(u'Ciencia y tecnologia', u'http://www.eluniversal.com/rss/cyt_avances.xml'),
|
|
||||||
(u'Universo empresarial', u'http://www.eluniversal.com/rss/uni_avances.xml'),
|
|
||||||
(u'Caracas', u'http://www.eluniversal.com/rss/ccs_avances.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '-imp'
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
return article.get('guid', None)
|
|
@ -1,61 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
eluniverso.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ElUniverso_Ecuador(BasicNewsRecipe):
|
|
||||||
title = 'El Universo - Ecuador'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Noticias del Ecuador y el resto del mundo'
|
|
||||||
publisher = 'El Universo'
|
|
||||||
category = 'news, politics, Ecuador'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'es_EC'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Verdana,Arial,Helvetica,sans-serif; color: #333333 }
|
|
||||||
h2{font-family: Georgia,"Times New Roman",Times,serif; color: #1B2D60}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(attrs={'class': ['flechs', 'multiBox', 'colRecursos']}), dict(
|
|
||||||
name=['meta', 'link', 'embed', 'object', 'iframe', 'base'])
|
|
||||||
]
|
|
||||||
keep_only_tags = [dict(attrs={'class': 'Nota'})]
|
|
||||||
remove_tags_after = dict(attrs={'id': 'TextoPrint'})
|
|
||||||
remove_tags_before = dict(attrs={'id': 'FechaPrint'})
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Portada', u'http://www.eluniverso.com/rss/portada.xml'),
|
|
||||||
(u'Politica', u'http://www.eluniverso.com/rss/politica.xml'),
|
|
||||||
(u'Economia', u'http://www.eluniverso.com/rss/economia.xml'),
|
|
||||||
(u'Sucesos', u'http://www.eluniverso.com/rss/sucesos.xml'),
|
|
||||||
(u'Migracion', u'http://www.eluniverso.com/rss/migrantes_tema.xml'),
|
|
||||||
(u'El Pais', u'http://www.eluniverso.com/rss/elpais.xml'),
|
|
||||||
(u'Internacionales', u'http://www.eluniverso.com/rss/internacionales.xml'),
|
|
||||||
(u'Deportes', u'http://www.eluniverso.com/rss/deportes.xml'),
|
|
||||||
(u'Gran Guayaquill', u'http://www.eluniverso.com/rss/gran_guayaquil.xml'),
|
|
||||||
(u'Entretenimiento', u'http://www.eluniverso.com/rss/arteyespectaculos.xml'),
|
|
||||||
(u'Vida', u'http://www.eluniverso.com/rss/tuvida.xml'),
|
|
||||||
(u'Opinion', u'http://www.eluniverso.com/rss/opinion.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,43 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
emg.rs/en/news
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class emportal_en(BasicNewsRecipe):
|
|
||||||
title = 'Ekonom:east News'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Daily business news from Serbia.'
|
|
||||||
publisher = 'Ekonom:east Media Group'
|
|
||||||
category = 'Business, SEE, Serbia, Belgrade, news, Ekonomist, EMportal'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://www.emg.rs/img/emportal-rss.png'
|
|
||||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } '
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class': ['text-share']})]
|
|
||||||
keep_only_tags = [dict(attrs={'class': 'text'})]
|
|
||||||
remove_tags_after = dict(attrs={'class': 'text-share'})
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
|
|
||||||
feeds = [(u'Serbia', u'http://www.emg.rs/en/news/serbia/rss.xml')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('.html', '.print.html')
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,64 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1341650280(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Empire Magazine'
|
|
||||||
description = 'Author D.Asbury. Film articles from Empire Mag. '
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'Dave Asbury'
|
|
||||||
# last updated 7/7/12
|
|
||||||
remove_empty_feeds = True
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
cover_url = 'http://www.empireonline.com/images/magazine/cover.jpg'
|
|
||||||
conversion_options = {
|
|
||||||
'linearize_tables': True,
|
|
||||||
}
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'<a href="http://twitter.com/share.*?</a>',
|
|
||||||
re.IGNORECASE | re.DOTALL), lambda match: ''),
|
|
||||||
(re.compile(r'<head>.*?<!-- CONTENT: START -->', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<head></head><!-- CONTENT: START -->'),
|
|
||||||
(re.compile(r'<!-- LATEST NEWS HEADLINES: START -->.*?<!-- LATEST NEWS HEADLINES: END -->', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- LATEST NEWS HEADLINES: START --><!-- LATEST NEWS HEADLINES: END -->'),
|
|
||||||
(re.compile(r'<!-- RELATED FUTURE FILMS: START -->.*?<!-- RELATED FUTURE FILMS: END -->', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- RELATED FUTURE FILMS: START --><!-- RELATED FUTURE FILMS: END -->'),
|
|
||||||
(re.compile(r'<!-- CURRENT HIGHLIGHTS: START-->.*?<!-- CURRENT HIGHLIGHTS: END -->', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- CURRENT HIGHLIGHTS: START--><!-- CURRENT HIGHLIGHTS: END -->'),
|
|
||||||
(re.compile(r'<!-- RELATED REVIEWS: START -->.*?<!-- RELATED REVIEWS: END -->', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- RELATED REVIEWS: START --><!-- RELATED REVIEWS: END -->'),
|
|
||||||
(re.compile(r'<!-- RELATED INTERVIEWS -->.*?<!-- RELATED REVIEWS: END -->', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- RELATED INTERVIEWS --><!-- RELATED REVIEWS: END -->'),
|
|
||||||
(re.compile(r'<!-- CONTENT: END -->.*?</body>', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- CONTENT: END --></body>'),
|
|
||||||
(re.compile(r'<!-- STORY: END -->.*?</body>', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- STORY: END --></body>'),
|
|
||||||
(re.compile(r'<!-- RATINGS GUIDE: START-->.*?<!-- RATINGS GUIDE: END-->', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- RATINGS GUIDE: START--><!-- RATINGS GUIDE: END-->'),
|
|
||||||
(re.compile(r'<strong>SUBSCRIBE TO EMPIRE</strong>.*?</tbody>',
|
|
||||||
re.IGNORECASE | re.DOTALL), lambda match: '</tbody>'),
|
|
||||||
(re.compile(r'<!-- USER REVIEWS: START -->.*?<!-- USER REVIEWS: END -->', re.IGNORECASE |
|
|
||||||
re.DOTALL), lambda match: '<!-- USER REVIEWS: START --><!-- USER REVIEWS: END -->'),
|
|
||||||
(re.compile(r'Advertisement', re.IGNORECASE | re.DOTALL), lambda match: ''),
|
|
||||||
(re.compile(r'<a name="haveyoursay".*?now to have your say.',
|
|
||||||
re.IGNORECASE | re.DOTALL), lambda match: ''),
|
|
||||||
]
|
|
||||||
keep_only_tags = [
|
|
||||||
# dict(name='h1'),
|
|
||||||
# dict(attrs={'class' : 'mediumblack'}),
|
|
||||||
]
|
|
||||||
remove_tags = [dict(name='td', attrs={'width': '200', 'valign': 'top'}),
|
|
||||||
dict(name='b'),
|
|
||||||
dict(name='a', attrs={'name': 'haveyoursay'}),
|
|
||||||
dict(attrs={'class': 'newslink'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://feed43.com/7338478755673147.xml'),
|
|
||||||
(u'Recent Features', u'http://feed43.com/4346347750304760.xml'),
|
|
||||||
(u'Interviews', u'http://feed43.com/3418350077724081.xml'),
|
|
||||||
(u'Film Reviews', u'http://feed43.com/2643703076510627.xml'),
|
|
||||||
]
|
|
@ -1,39 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
|
|
||||||
'''
|
|
||||||
japan.engadget.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EndgadgetJapan(BasicNewsRecipe):
|
|
||||||
title = u'Endgadget\u65e5\u672c\u7248'
|
|
||||||
language = 'ja'
|
|
||||||
__author__ = 'Hiroshi Miura'
|
|
||||||
cover_url = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg'
|
|
||||||
masthead_url = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
language = 'ja'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
index = 'http://japanese.engadget.com/'
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
remove_tags_before = dict(name="header", attrs={'class': "header"})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'post-meta'})
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
feeds = []
|
|
||||||
newsarticles = []
|
|
||||||
soup = self.index_to_soup(self.index)
|
|
||||||
for topstories in soup.findAll('header', attrs={'class': 'post-header'}):
|
|
||||||
itt = topstories.find('h2')
|
|
||||||
itema = itt.find('a', href=True)
|
|
||||||
itemtime = topstories.find('span', attrs={'class': 'time'})
|
|
||||||
newsarticles.append({
|
|
||||||
'title': itema.string, 'date': itemtime.string, 'url': itema['href'], 'description': ''
|
|
||||||
})
|
|
||||||
feeds.append(('Latest Posts', newsarticles))
|
|
||||||
return feeds
|
|
@ -1,66 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EOSWetenschap(BasicNewsRecipe):
|
|
||||||
title = u'EOS Wetenschap'
|
|
||||||
__author__ = u'erkfuizfeuadjfjzefzfuzeff'
|
|
||||||
description = u'Wetenschapsnieuws'
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'nl'
|
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='title'),
|
|
||||||
dict(name='h1'),
|
|
||||||
dict(name='img'),
|
|
||||||
dict(name='p'),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = []
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Alle nieuwsberichten', u'http://eoswetenschap.eu/rss/artikels/all'),
|
|
||||||
(u'Gezondheid', u'http://eoswetenschap.eu/rss/artikels/Gezondheid'),
|
|
||||||
(u'Geneeskunde', u'http://eoswetenschap.eu/artikels/Geneeskunde'),
|
|
||||||
(u'Voeding', u'http://eoswetenschap.eu/rss/artikels/Voeding'),
|
|
||||||
(u'Sport', u'http://eoswetenschap.eu/rss/artikels/Sport'), (
|
|
||||||
u'Natuur & Milieu',
|
|
||||||
u'http://eoswetenschap.eu/rss/artikels/Natuur%20en%20Milieu'
|
|
||||||
), (u'Energie', u'http://eoswetenschap.eu/rss/artikels/Energie'), (
|
|
||||||
u'Klimaatverandering',
|
|
||||||
u'http://eoswetenschap.eu/rss/artikels/Klimaatverandering'
|
|
||||||
), (u'Natuur', u'http://eoswetenschap.eu/rss/artikels/Natuur'),
|
|
||||||
(u'Gedrag', u'http://eoswetenschap.eu/rss/artikels/Gedrag'),
|
|
||||||
(u'Psychologie', u'http://eoswetenschap.eu/rss/artikels/Psychologie'), (
|
|
||||||
u'Hersenwetenschap',
|
|
||||||
u'http://eoswetenschap.eu/rss/artikels/Hersenwetenschap'
|
|
||||||
), (u'Sociologie', u'http://eoswetenschap.eu/rss/artikels/Sociologie'), (
|
|
||||||
u'Fundamenteel onderzoek',
|
|
||||||
u'http://eoswetenschap.eu/rss/artikels/Onderzoek'
|
|
||||||
), (
|
|
||||||
u'Natuur- en wiskunde',
|
|
||||||
u'http://eoswetenschap.eu/rss/artikels/Natuur-%20en%20wiskunde'
|
|
||||||
), (u'Genetica', u'http://eoswetenschap.eu/rss/artikels/Genetica'),
|
|
||||||
(u'Chemie', u'http://eoswetenschap.eu/rss/artikels/Chemie'),
|
|
||||||
(u'Technologie', u'http://eoswetenschap.eu/rss/artikels/Technologie'),
|
|
||||||
(u'Biotechnologie', u'http://eoswetenschap.eu/rss/artikels/Biotechnologie'),
|
|
||||||
(
|
|
||||||
u'Nanotechnologie',
|
|
||||||
u'http://eoswetenschap.eu/rss/artikels/Nanotechnologie'
|
|
||||||
), (u'ICT', u'http://eoswetenschap.eu/rss/artikels/Internet'),
|
|
||||||
(u'Mobiliteit', u'http://eoswetenschap.eu/artikels/Mobiliteit'),
|
|
||||||
(u'Geschiedenis', u'http://eoswetenschap.eu/rss/artikels/Historisch'), (
|
|
||||||
u'Archeologie- en paleontologie',
|
|
||||||
u'http://eoswetenschap.eu/rss/artikels/Archeologie_Paleontologie'
|
|
||||||
), (
|
|
||||||
u'Moderne geschiedenis',
|
|
||||||
u'http://eoswetenschap.eu/rss/artikels/Moderne_geschiedenis'
|
|
||||||
), (u'Ruimte', u'http://eoswetenschap.eu/rss/artikels/Ruimte'),
|
|
||||||
(u'Ruimtevaart', u'http://eoswetenschap.eu/rss/artikels/ruimtevaart'),
|
|
||||||
(u'Kosmologie', u'http://eoswetenschap.eu/rss/artikels/Kosmologie')
|
|
||||||
]
|
|
@ -1,26 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Erhvervs•Avisen: RSS feed: Seneste nyt - erhvervsavisen.dk
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class Erhvervsavisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Erhvervs Avisen'
|
|
||||||
description = 'Lokale, regionale nyheder, bolig, motor og job i Køge, Greve, Ringsted og Stevns på erhvervsavisen.dk'
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Seneste nyt fra Erhvervs Avisen', 'http://erhvervsavisen.dk/section/senestenytrss'),
|
|
||||||
|
|
||||||
]
|
|
@ -1,32 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Lokalavisen Esbjerg: RSS feed: Seneste nyt - esbjerg.lokalavisen.dk
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class EsbjergLokalavisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Lokalavisen Esbjerg'
|
|
||||||
description = 'Lokale og regionale nyheder, sport, kultur fra Esbjerg og omegn på esbjerg.lokalavisen.dk'
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Seneste nyt fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestenytrss'),
|
|
||||||
('Seneste lokale nyheder fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestelokalenyhederrss'),
|
|
||||||
('Seneste sport fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestesportrss'),
|
|
||||||
('Seneste 112 nyheder fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/seneste112rss'),
|
|
||||||
('Seneste kultur nyheder fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestekulturrss'),
|
|
||||||
('Seneste læserbreve fra Lokalavisen Esbjerg', 'http://esbjerg.lokalavisen.dk/section/senestelaeserbreverss'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,127 +0,0 @@
|
|||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
from calibre.utils.magick import Image, PixelWand
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class Estadao(BasicNewsRecipe):
|
|
||||||
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
|
|
||||||
LANGUAGE = 'pt_br'
|
|
||||||
language = 'pt'
|
|
||||||
LANGHTM = 'pt-br'
|
|
||||||
ENCODING = 'utf'
|
|
||||||
ENCHTM = 'utf-8'
|
|
||||||
directionhtm = 'ltr'
|
|
||||||
requires_version = (0, 7, 47)
|
|
||||||
news = True
|
|
||||||
|
|
||||||
title = u'Estad\xe3o'
|
|
||||||
__author__ = 'Euler Alves'
|
|
||||||
description = u'Brazilian news from Estad\xe3o'
|
|
||||||
publisher = u'Estad\xe3o'
|
|
||||||
category = 'news, rss'
|
|
||||||
|
|
||||||
oldest_article = 4
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
summary_length = 1000
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
timefmt = ' [%d %b %Y (%a)]'
|
|
||||||
|
|
||||||
hoje = datetime.now() - timedelta(days=2)
|
|
||||||
pubdate = hoje.strftime('%a, %d %b')
|
|
||||||
if hoje.hour < 10:
|
|
||||||
hoje = hoje - timedelta(days=1)
|
|
||||||
CAPA = 'http://www.estadao.com.br/estadaodehoje/' + \
|
|
||||||
hoje.strftime('%Y%m%d') + '/img/capadodia.jpg'
|
|
||||||
SCREENSHOT = 'http://estadao.com.br/'
|
|
||||||
cover_margins = (0, 0, 'white')
|
|
||||||
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['bb-md-noticia', 'corpo']})]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div',
|
|
||||||
attrs={'id': [
|
|
||||||
'bb-md-noticia-tabs'
|
|
||||||
]}), dict(name='div',
|
|
||||||
attrs={'class': [
|
|
||||||
'tags', 'discussion', 'bb-gg adsense_container'
|
|
||||||
]}), dict(name='a'), dict(name='iframe'), dict(name='link'), dict(name='script')
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml'),
|
|
||||||
(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml'),
|
|
||||||
(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml'),
|
|
||||||
(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml'),
|
|
||||||
(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/'),
|
|
||||||
(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml'),
|
|
||||||
(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml'),
|
|
||||||
(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml'),
|
|
||||||
(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'title': title, 'comments': description, 'publisher': publisher, 'tags': category, 'language': LANGUAGE, 'linearize_tables': True
|
|
||||||
}
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
if not soup.find(attrs={'http-equiv': 'Content-Language'}):
|
|
||||||
meta0 = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Language"), ("content", self.LANGHTM)])
|
|
||||||
soup.head.insert(0, meta0)
|
|
||||||
if not soup.find(attrs={'http-equiv': 'Content-Type'}):
|
|
||||||
meta1 = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=" + self.ENCHTM)])
|
|
||||||
soup.head.insert(0, meta1)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
|
||||||
# process all the images. assumes that the new html has the correct
|
|
||||||
# path
|
|
||||||
for tag in soup.findAll('img', src=True):
|
|
||||||
iurl = tag['src']
|
|
||||||
img = Image()
|
|
||||||
img.open(iurl)
|
|
||||||
width, height = img.size
|
|
||||||
print('img is: ', iurl, 'width is: ', width, 'height is: ', height)
|
|
||||||
if img < 0:
|
|
||||||
raise RuntimeError('Out of memory')
|
|
||||||
pw = PixelWand()
|
|
||||||
if(width > height and width > 590):
|
|
||||||
print('Rotate image')
|
|
||||||
img.rotate(pw, -90)
|
|
||||||
img.save(iurl)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
if self.THUMBALIZR_API:
|
|
||||||
cover_url = self.CAPA
|
|
||||||
try:
|
|
||||||
soup = self.index_to_soup(cover_url)
|
|
||||||
cover_item = soup.find('body')
|
|
||||||
if cover_item:
|
|
||||||
cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
|
|
||||||
'&url=' + self.SCREENSHOT + '&width=600&quality=90'
|
|
||||||
return cover_url
|
|
||||||
except Exception:
|
|
||||||
cover_url = 'http://api.thumbalizr.com/?api_key=' + self.THUMBALIZR_API + \
|
|
||||||
'&url=' + self.SCREENSHOT + '&width=600&quality=90'
|
|
||||||
return cover_url
|
|
@ -1,60 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
LANGUAGE = 'de'
|
|
||||||
|
|
||||||
|
|
||||||
def feedlink(num):
|
|
||||||
return u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id=' +\
|
|
||||||
str(num) + '&lang=' + LANGUAGE
|
|
||||||
|
|
||||||
|
|
||||||
class EUCommissionPress(BasicNewsRecipe):
|
|
||||||
title = u'Pressemitteilungen der EU Kommission pro Politikbereich'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
|
|
||||||
__author__ = 'malfi'
|
|
||||||
language = LANGUAGE
|
|
||||||
keep_only_tags = []
|
|
||||||
keep_only_tags.append(
|
|
||||||
dict(name='div', attrs={'class': 'pressReleaseContentMain'}))
|
|
||||||
remove_tags = []
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Pressemitteilung des Tages', feedlink(64)),
|
|
||||||
(u'Presidency', feedlink(137)),
|
|
||||||
(u'Foreign affairs and security policy', feedlink(138)),
|
|
||||||
(u'Agriculture and rural development', feedlink(139)),
|
|
||||||
(u'Budget and financial programming ', feedlink(140)),
|
|
||||||
(u'Climate action', feedlink(141)),
|
|
||||||
(u'Competition', feedlink(142)),
|
|
||||||
(u'Development', feedlink(143)),
|
|
||||||
(u'Digital agenda', feedlink(144)),
|
|
||||||
(u'Economic and monetary affairs', feedlink(145)),
|
|
||||||
(u'Education, culture, multilingualism and youth ', feedlink(146)),
|
|
||||||
(u'Employment, social Affairs and inclusion ', feedlink(147)),
|
|
||||||
(u'Energy', feedlink(148)),
|
|
||||||
(u'Enlargment and European neighbourhood policy ', feedlink(149)),
|
|
||||||
(u'Environment', feedlink(150)),
|
|
||||||
(u'Health and consumer policy', feedlink(151)),
|
|
||||||
(u'Home affairs', feedlink(152)),
|
|
||||||
(u'Industry and entrepreneurship', feedlink(153)),
|
|
||||||
(u'Inter-Institutional relations and administration', feedlink(154)),
|
|
||||||
(u'Internal market and services', feedlink(155)),
|
|
||||||
(u'International cooperation, humanitarian aid and crisis response', feedlink(
|
|
||||||
156)),
|
|
||||||
(u'Justice, fundamental rights and citizenship', feedlink(157)),
|
|
||||||
(u'Maritime affairs and fisheries', feedlink(158)),
|
|
||||||
(u'Regional policy', feedlink(159)),
|
|
||||||
(u'Research and innovation', feedlink(160)),
|
|
||||||
(u'Taxation and customs union, audit and anti-fraud', feedlink(161)),
|
|
||||||
(u'Trade', feedlink(162)),
|
|
||||||
(u'Transport', feedlink(163))
|
|
||||||
]
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
'''
|
|
@ -1,66 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
europasur.es
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Europasur(BasicNewsRecipe):
|
|
||||||
title = 'Europa Sur'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News in Spanish'
|
|
||||||
publisher = 'Joly Digital'
|
|
||||||
category = 'news, politics, Spanish'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
delay = 2
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'cp1252'
|
|
||||||
language = 'es'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
extra_css = """ body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
|
||||||
h2{font-family: Georgia,Times New Roman,Times,serif}
|
|
||||||
.subtitle{font-weight:bold}
|
|
||||||
.caption{font-size: small}
|
|
||||||
.body{font-size: 1.1em}
|
|
||||||
.info{color: #848484}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'class': ['titles', 'current']}), dict(
|
|
||||||
attrs={'id': 'newsBody'})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['iframe', 'base', 'embed', 'object']), dict(name='a', attrs={
|
|
||||||
'class': 'zoom thickbox'}), dict(name='div', attrs={'class': 'other'})
|
|
||||||
]
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Portada', u'http://www.europasur.es/rss/articles.php'),
|
|
||||||
(u'Deportes', u'http://www.europasur.es/rss/articles.php?sec=1224'),
|
|
||||||
(u'Economia', u'http://www.europasur.es/rss/articles.php?sec=427'),
|
|
||||||
(u'Espana', u'http://www.europasur.es/rss/articles.php?sec=437'),
|
|
||||||
(u'Mundo', u'http://www.europasur.es/rss/articles.php?sec=428'),
|
|
||||||
(u'Pasarela', u'http://www.europasur.es/rss/articles.php?sec=1958'),
|
|
||||||
(u'Ocio y cultura', u'http://www.europasur.es/rss/articles.php?sec=1210'),
|
|
||||||
(u'Opinion', u'http://www.europasur.es/rss/articles.php?sec=1195'),
|
|
||||||
(u'Tecnologia', u'http://www.europasur.es/rss/articles.php?sec=1681'),
|
|
||||||
(u'Salud', u'http://www.europasur.es/rss/articles.php?sec=2379')
|
|
||||||
]
|
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
|
||||||
artl, sep, width = url.rpartition('&an=')
|
|
||||||
artid, sep, ext = artl.rpartition('.')
|
|
||||||
article_id = artid.rpartition('/')[2]
|
|
||||||
return 'http://media.grupojoly.com/cache/' + article_id + '_' + width + 'x' + width + '_' + ext + '000.' + ext
|
|
@ -1,26 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Evangelizo(BasicNewsRecipe):
|
|
||||||
title = 'Evangelizo.org'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 30
|
|
||||||
language = 'de'
|
|
||||||
__author__ = 'Bobus'
|
|
||||||
feeds = [
|
|
||||||
('EvangleliumTagfuerTag',
|
|
||||||
'http://www.evangeliumtagfuertag.org/rss/evangelizo_rss-de.xml'),
|
|
||||||
]
|
|
||||||
use_embedded_content = True
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(
|
|
||||||
r'<font size="-2">([(][0-9]*[)])</font>'), r'\g<1>'),
|
|
||||||
(re.compile(r'([\.!]\n)'), r'\g<1><br />'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
|
||||||
article.title = re.sub(
|
|
||||||
r'<font size="-2">([(][0-9]*[)])</font>', r'\g<1>', article.title)
|
|
||||||
return
|
|
@ -1,50 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
evz.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class EvenimentulZilei(BasicNewsRecipe):
|
|
||||||
title = u'Evenimentul Zilei'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = ''
|
|
||||||
publisher = u'Evenimentul Zilei'
|
|
||||||
oldest_article = 5
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Stiri'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.evz.ro/fileadmin/images/evzLogo.png'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'single'}), dict(name='img', attrs={
|
|
||||||
'id': 'placeholder'}), dict(name='a', attrs={'id': 'holderlink'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='p', attrs={'class': ['articleInfo']}), dict(name='div', attrs={'id': [
|
|
||||||
'bannerAddoceansArticleJos']}), dict(name='div', attrs={'id': ['bannerAddoceansArticle']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'id': ['bannerAddoceansArticleJos']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://www.evz.ro/rss.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,55 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
exiledonline.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Exiled(BasicNewsRecipe):
|
|
||||||
title = 'Exiled Online'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
|
|
||||||
publisher = 'Exiled Online'
|
|
||||||
category = 'news, politics, international'
|
|
||||||
oldest_article = 15
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf8'
|
|
||||||
remove_javascript = True
|
|
||||||
language = 'en'
|
|
||||||
publication_type = 'newsblog'
|
|
||||||
masthead_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif}
|
|
||||||
#topslug{font-size: xx-large; font-weight: bold; color: red}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'main'})]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object', 'link']), dict(name='div', attrs={'class': 'info'}), dict(
|
|
||||||
name='div', attrs={'id': ['comments', 'navig']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://exiledonline.com/feed/')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for alink in soup.findAll('a'):
|
|
||||||
if alink.string is not None:
|
|
||||||
tstr = alink.string
|
|
||||||
alink.replaceWith(tstr)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
raw = article.get('link', None)
|
|
||||||
final = raw + 'all/1/'
|
|
||||||
return final
|
|
@ -1,57 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Explosm(BasicNewsRecipe):
|
|
||||||
title = u'Explosm Rotated'
|
|
||||||
__author__ = 'Andromeda Rabbit'
|
|
||||||
description = 'Explosm'
|
|
||||||
language = 'en'
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
oldest_article = 24
|
|
||||||
remove_javascript = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
max_articles_per_feed = 10
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
|
|
||||||
]
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='img', attrs={'alt': 'Cyanide and Happiness, a daily webcomic'})]
|
|
||||||
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(
|
|
||||||
name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
|
|
||||||
|
|
||||||
def parse_feeds(self):
|
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
|
||||||
|
|
||||||
for curfeed in feeds:
|
|
||||||
delList = []
|
|
||||||
for a, curarticle in enumerate(curfeed.articles):
|
|
||||||
if re.search(r'http://www.explosm.net/comics', curarticle.url) is None:
|
|
||||||
delList.append(curarticle)
|
|
||||||
if len(delList) > 0:
|
|
||||||
for d in delList:
|
|
||||||
index = curfeed.articles.index(d)
|
|
||||||
curfeed.articles[index:index + 1] = []
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
|
||||||
# Skip ad pages served before actual article
|
|
||||||
skip_tag = soup.find(name='img', attrs={
|
|
||||||
'alt': 'Cyanide and Happiness, a daily webcomic'})
|
|
||||||
if skip_tag is None:
|
|
||||||
return soup
|
|
||||||
return None
|
|
@ -1,80 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Express.de'
|
|
||||||
__author__ = 'schuster'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'de'
|
|
||||||
extra_css = '''
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
|
|
||||||
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
|
||||||
'''
|
|
||||||
remove_javascript = True
|
|
||||||
remove_tags_befor = [dict(name='div', attrs={'class': 'Datum'})]
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class': 'MoreNews'})]
|
|
||||||
|
|
||||||
remove_tags = [dict(id='kalaydo'),
|
|
||||||
dict(id='Header'),
|
|
||||||
dict(id='Searchline'),
|
|
||||||
dict(id='MainNav'),
|
|
||||||
dict(id='Logo'),
|
|
||||||
dict(id='MainLinkSpacer'),
|
|
||||||
dict(id='MainLinks'),
|
|
||||||
dict(id='ContainerPfad'), # neu
|
|
||||||
dict(title='Diese Seite Bookmarken'),
|
|
||||||
|
|
||||||
dict(name='span'),
|
|
||||||
dict(name='div', attrs={'class': 'spacer_leftneu'}),
|
|
||||||
dict(name='div', attrs={'class': 'button kalaydologo'}),
|
|
||||||
dict(name='div', attrs={'class': 'button stellenneu'}),
|
|
||||||
dict(name='div', attrs={'class': 'button autoneu'}),
|
|
||||||
dict(name='div', attrs={'class': 'button immobilienneu'}),
|
|
||||||
dict(name='div', attrs={'class': 'button kleinanzeigen'}),
|
|
||||||
dict(name='div', attrs={'class': 'button tiereneu'}),
|
|
||||||
dict(name='div', attrs={'class': 'button ferienwohnungen'}),
|
|
||||||
dict(name='div', attrs={'class': 'button inserierenneu'}),
|
|
||||||
dict(name='div', attrs={'class': 'spacer_rightneu'}),
|
|
||||||
dict(name='div', attrs={'class': 'spacer_rightcorner'}),
|
|
||||||
dict(name='div', attrs={'class': 'HeaderMetaNav'}),
|
|
||||||
dict(name='div', attrs={'class': 'HeaderSearchOption'}),
|
|
||||||
dict(name='div', attrs={'class': 'HeaderSearch'}),
|
|
||||||
dict(name='div', attrs={'class': 'sbutton'}),
|
|
||||||
dict(name='div', attrs={'class': 'active'}),
|
|
||||||
dict(name='div', attrs={'class': 'MoreNews'}), # neu
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': 'ContentBoxSubline'}) # neu
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for alink in soup.findAll('a'):
|
|
||||||
if alink.string is not None:
|
|
||||||
tstr = alink.string
|
|
||||||
alink.replaceWith(tstr)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
feeds = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Regional - Köln',
|
|
||||||
u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Regional - Bonn',
|
|
||||||
u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Regional - Düsseldorf',
|
|
||||||
u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Regional - Region',
|
|
||||||
u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'1.FC Köln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Alemannia Aachen News',
|
|
||||||
u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Borussia M~Gladbach',
|
|
||||||
u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Basketball News',
|
|
||||||
u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
|
|
||||||
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
|
|
||||||
]
|
|
Binary file not shown.
Before Width: | Height: | Size: 1.1 KiB |
Binary file not shown.
Before Width: | Height: | Size: 738 B |
Binary file not shown.
Before Width: | Height: | Size: 461 B |
Binary file not shown.
Before Width: | Height: | Size: 458 B |
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user