remove dead recipes
These recipes are based on RSS feeds that no longer work.
@ -1,50 +0,0 @@
|
|||||||
# -*- mode: python -*-
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2017, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
http://balkanist.net/magazine
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Pagina12(BasicNewsRecipe):
|
|
||||||
title = 'Balkanist'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Balkanist is an experimental, bilingual platform featuring politics, analysis, culture, and criticism for a smart international audience underwhelmed by what is currently on offer. Our aim is to provide bold, uncompromising coverage of the Balkan region and everything to its East. We are currently entirely independent, self- and reader-funded, and are not affiliated with any organization, company, or government institution.' # noqa
|
|
||||||
publisher = 'Balkanist'
|
|
||||||
category = 'news, politics, Balkans'
|
|
||||||
oldest_article = 30
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'magazine'
|
|
||||||
auto_cleanup = True
|
|
||||||
masthead_url = 'http://media.balkanist.net/2013/07/Balkanist-Magazine-cover.png'
|
|
||||||
ignore_duplicate_articles = {'url'}
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Lora,serif}
|
|
||||||
img{margin-top:1em; margin-bottom: 1em; display:block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description,
|
|
||||||
'tags': category,
|
|
||||||
'publisher': publisher,
|
|
||||||
'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['meta', 'link']),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Magazine', u'http://balkanist.net/magazine/feed/'),
|
|
||||||
(u'News', u'http://balkanist.net/news/feed/'),
|
|
||||||
(u'Commentary', u'http://balkanist.net/commentary/feed/'),
|
|
||||||
(u'Arts and Culture', u'http://balkanist.net/arts-and-culture/feed/'),
|
|
||||||
(u'Politics', u'http://balkanist.net/politics/feed/'),
|
|
||||||
]
|
|
@ -1,25 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1290689337(BasicNewsRecipe):
|
|
||||||
__author__ = 'Anat R.'
|
|
||||||
language = 'th'
|
|
||||||
title = u'Bangkok Biz News'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
feeds = [(u'Headlines',
|
|
||||||
u'http://www.bangkokbiznews.com/home/services/rss/home.xml'),
|
|
||||||
(u'Politics', u'http://www.bangkokbiznews.com/home/services/rss/politics.xml'),
|
|
||||||
(u'Business', u'http://www.bangkokbiznews.com/home/services/rss/business.xml'),
|
|
||||||
(u'Finance', u' http://www.bangkokbiznews.com/home/services/rss/finance.xml'),
|
|
||||||
(u'Technology', u' http://www.bangkokbiznews.com/home/services/rss/it.xml')]
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class': 'box-Detailcontent'})
|
|
||||||
remove_tags_after = dict(name='p', attrs={'class': 'allTags'})
|
|
||||||
remove_tags = []
|
|
||||||
remove_tags.append(dict(name='div', attrs={'id': 'content-tools'}))
|
|
||||||
remove_tags.append(dict(name='p', attrs={'class': 'allTags'}))
|
|
||||||
remove_tags.append(dict(name='div', attrs={'id': 'morePic'}))
|
|
||||||
remove_tags.append(dict(name='ul', attrs={'class': 'tabs-nav'}))
|
|
@ -1,49 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'teepel <teepel44@gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
bankier.pl
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class bankier(BasicNewsRecipe):
|
|
||||||
title = u'Bankier.pl'
|
|
||||||
__author__ = 'teepel <teepel44@gmail.com>'
|
|
||||||
language = 'pl'
|
|
||||||
description = 'Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.' # noqa
|
|
||||||
masthead_url = 'http://www.bankier.pl/gfx/hd-mid-02.gif'
|
|
||||||
INDEX = 'http://bankier.pl/'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
|
|
||||||
keep_only_tags = []
|
|
||||||
keep_only_tags.append(dict(name='div', attrs={'align': 'left'}))
|
|
||||||
|
|
||||||
remove_tags = []
|
|
||||||
remove_tags.append(dict(name='table', attrs={'cellspacing': '2'}))
|
|
||||||
remove_tags.append(dict(name='div', attrs={'align': 'center'}))
|
|
||||||
remove_tags.append(dict(name='img', attrs={'src': '/gfx/hd-mid-02.gif'}))
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
|
|
||||||
(u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
|
|
||||||
(u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
|
|
||||||
(u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
|
|
||||||
(u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
|
|
||||||
(u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
segment = url.split('.')
|
|
||||||
urlPart = segment[2]
|
|
||||||
segments = urlPart.split('-')
|
|
||||||
urlPart2 = segments[-1]
|
|
||||||
return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
|
|
@ -1,46 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class TheBayCitizen(BasicNewsRecipe):
|
|
||||||
title = 'The Bay Citizen'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'noah'
|
|
||||||
description = 'The Bay Citizen'
|
|
||||||
publisher = 'The Bay Citizen'
|
|
||||||
INDEX = u'http://www.baycitizen.org'
|
|
||||||
category = 'news'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
no_stylesheets = True
|
|
||||||
masthead_url = 'http://media.baycitizen.org/images/layout/logo1.png'
|
|
||||||
feeds = [('Main Feed', 'http://www.baycitizen.org/feeds/stories/')]
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'story'})]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'socialBar'}),
|
|
||||||
dict(name='div', attrs={'id': 'text-resize'}),
|
|
||||||
dict(name='div', attrs={'class': 'story relatedContent'}),
|
|
||||||
dict(name='div', attrs={'id': 'comment_status_loading'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
def append_page(self, soup, appendtag, position):
|
|
||||||
pager = soup.find('a', attrs={'class': 'stry-next'})
|
|
||||||
if pager:
|
|
||||||
nexturl = self.INDEX + pager['href']
|
|
||||||
soup2 = self.index_to_soup(nexturl)
|
|
||||||
texttag = soup2.find('div', attrs={'class': 'body'})
|
|
||||||
for it in texttag.findAll(style=True):
|
|
||||||
del it['style']
|
|
||||||
newpos = len(texttag.contents)
|
|
||||||
self.append_page(soup2, texttag, newpos)
|
|
||||||
texttag.extract()
|
|
||||||
appendtag.insert(position, texttag)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
self.append_page(soup, soup.body, 3)
|
|
||||||
garbage = soup.findAll(id='story-pagination')
|
|
||||||
[trash.extract() for trash in garbage]
|
|
||||||
garbage = soup.findAll('em', 'cont-from-prev')
|
|
||||||
[trash.extract() for trash in garbage]
|
|
||||||
return soup
|
|
@ -1,16 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1327747616(BasicNewsRecipe):
|
|
||||||
title = u'Beppe Grillo'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [(u'Beppe Grillo', u'http://feeds.feedburner.com/beppegrillo/atom')]
|
|
||||||
description = 'Blog of the famous comedian and politician Beppe Grillo - v1.00 (28, January 2012)'
|
|
||||||
__author__ = 'faber1971'
|
|
||||||
|
|
||||||
language = 'it'
|
|
@ -1,48 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
|
||||||
q = frozenset(classes.split(' '))
|
|
||||||
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
|
||||||
|
|
||||||
|
|
||||||
class BerlinerZeitung(BasicNewsRecipe):
|
|
||||||
title = 'Berliner Zeitung'
|
|
||||||
__author__ = 'Kovid Goyal'
|
|
||||||
language = 'de'
|
|
||||||
description = 'Berliner Zeitung RSS'
|
|
||||||
timefmt = ' [%d.%m.%Y]'
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
remove_empty_feeds = True
|
|
||||||
|
|
||||||
# oldest_article = 7.0
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
classes('dm_article_body dm_article_header'),
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
classes('dm_article_share'),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [x.split() for x in [
|
|
||||||
'Berlin http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699382-asYahooFeed.xml',
|
|
||||||
'Brandenburg http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699570-asYahooFeed.xml',
|
|
||||||
'Politik http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699614-asYahooFeed.xml',
|
|
||||||
'Wirtschaft http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699644-asYahooFeed.xml',
|
|
||||||
'Sport http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699874-asYahooFeed.xml',
|
|
||||||
'Kultur http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700020-asYahooFeed.xml',
|
|
||||||
'Panorama http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700178-asYahooFeed.xml',
|
|
||||||
'Wissen http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700222-asYahooFeed.xml',
|
|
||||||
'Digital http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700594-asYahooFeed.xml',
|
|
||||||
'Ratgeber http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700190-asYahooFeed.xml',
|
|
||||||
]]
|
|
@ -1,49 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
berlingske.dk
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Berlingske_dk(BasicNewsRecipe):
|
|
||||||
title = 'Berlingske Tidende'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News from Denmark'
|
|
||||||
publisher = 'berlingske.dk'
|
|
||||||
category = 'news, politics, Denmark'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
encoding = 'utf8'
|
|
||||||
language = 'da'
|
|
||||||
auto_cleanup = True
|
|
||||||
extra_css = '''
|
|
||||||
.manchet {color:#888888;}
|
|
||||||
.dateline {font-size: x-small; color:#444444;}
|
|
||||||
.manchet,.dateline { font-family: Cambria,Georgia,Times,"Times New Roman",serif }
|
|
||||||
.body {font-family: Arial,Helvetica,sans-serif }
|
|
||||||
'''
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Breaking news', u'http://www.b.dk/breaking/rss'),
|
|
||||||
(u'Seneste nyt', u'http://www.b.dk/seneste/rss'),
|
|
||||||
(u'Topnyheder', u'http://www.b.dk/top/rss'),
|
|
||||||
(u'Danmark', u'http://www.b.dk/danmark/seneste/rss'),
|
|
||||||
(u'Verden', u'http://www.b.dk/verden/seneste/rss'),
|
|
||||||
(u'Klima', u'http://www.b.dk/klima/seneste/rss'),
|
|
||||||
(u'Debat', u'http://www.b.dk/debat/seneste/rss'),
|
|
||||||
(u'Koebenhavn', u'http://www.b.dk/koebenhavn/seneste/rss'),
|
|
||||||
(u'Politik', u'http://www.b.dk/politik/seneste/rss'),
|
|
||||||
(u'Kultur', u'http://www.b.dk/kultur/seneste/rss')
|
|
||||||
]
|
|
@ -1,60 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
bighollywood.breitbart.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BigHollywood(BasicNewsRecipe):
|
|
||||||
title = 'Big Hollywood'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News and articles from the media world'
|
|
||||||
publisher = 'Big Hollywood'
|
|
||||||
category = 'news, media, art, literature, movies, politics, USA, Hollywood'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'blog'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,sans-serif }
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class': 'postcontent'})]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['meta', 'link', 'link', 'iframe', 'embed', 'object']), dict(
|
|
||||||
name='p', attrs={'class': ['post_meta_links', 'postfooter']})
|
|
||||||
]
|
|
||||||
remove_attributes = ['original', 'onclick']
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://bighollywood.breitbart.com/feed/')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll('a'):
|
|
||||||
limg = item.find('img')
|
|
||||||
if item.string is not None:
|
|
||||||
str = item.string
|
|
||||||
item.replaceWith(str)
|
|
||||||
else:
|
|
||||||
if limg:
|
|
||||||
if limg['src'].endswith('BlogPrintButton.png'):
|
|
||||||
limg.extract()
|
|
||||||
item.name = 'div'
|
|
||||||
item.attrs = []
|
|
||||||
else:
|
|
||||||
str = self.tag_to_string(item)
|
|
||||||
item.replaceWith(str)
|
|
||||||
for item in soup.findAll('img', alt=False):
|
|
||||||
item['alt'] = 'image'
|
|
||||||
return soup
|
|
@ -1,45 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Birgun (BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Birgün Gazetesi'
|
|
||||||
__author__ = u'Osman Kaysan'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 150
|
|
||||||
use_embedded_content = False
|
|
||||||
description = 'Birgun gazatesi haberleri, kose yazarlari'
|
|
||||||
publisher = 'Birgün'
|
|
||||||
category = 'news,haberler,turkce,gazete,birgun'
|
|
||||||
language = 'tr'
|
|
||||||
no_stylesheets = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True, 'remove_paragraph_spacing': True,
|
|
||||||
}
|
|
||||||
|
|
||||||
cover_img_url = 'http://www.birgun.net/i/birgun.png'
|
|
||||||
masthead_url = 'http://www.birgun.net/i/birgun.png'
|
|
||||||
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='h2', attrs={'class': 'storyHeadline'})
|
|
||||||
remove_tags_after = dict(name='tr', attrs={'valign': 'top'})
|
|
||||||
remove_tags = [dict(name='div', attrs={'id': 'byLine'}), dict(name='div', attrs={'class': 'toollinks'}), dict(name='div', attrs={
|
|
||||||
'class': 'main-lead'}), dict(name='div', attrs={'class': 'addthis_toolbox addthis_default_style'}), dict(name='a', attrs={'class': 'addthis_button'})]
|
|
||||||
|
|
||||||
remove_empty_feeds = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Güncel', u'http://www.birgun.net/actuels.xml'),
|
|
||||||
(u'Köşe Yazarları', u'http://www.birgun.net/writer.xml'),
|
|
||||||
(u'Politika', u'http://www.birgun.net/politics.xml'),
|
|
||||||
(u'Ekonomi', u'http://www.birgun.net/economic.xml'),
|
|
||||||
(u'Çalışma Yaşamı', u'http://www.birgun.net/workers.xml'),
|
|
||||||
(u'Dünya', u'http://www.birgun.net/worlds.xml'),
|
|
||||||
(u'Yaşam', u'http://www.birgun.net/lifes.xml')
|
|
||||||
]
|
|
@ -1,56 +0,0 @@
|
|||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
import mechanize
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|
||||||
title = u'Birmingham post'
|
|
||||||
description = 'Author D.Asbury. News for Birmingham UK'
|
|
||||||
__author__ = 'Dave Asbury'
|
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
linearize_tables = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'en_GB'
|
|
||||||
compress_news_images = True
|
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
|
||||||
|
|
||||||
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup('http://www.birminghampost.net')
|
|
||||||
# look for the block containing the sun button and url
|
|
||||||
cov = soup.find(attrs={'height': re.compile(
|
|
||||||
'3'), 'alt': re.compile('Post')})
|
|
||||||
print()
|
|
||||||
print('%%%%%%%%%%%%%%%', cov)
|
|
||||||
print()
|
|
||||||
cov2 = str(cov['src'])
|
|
||||||
print('88888888 ', cov2, ' 888888888888')
|
|
||||||
|
|
||||||
# cover_url=cov2
|
|
||||||
# return cover_url
|
|
||||||
br = mechanize.Browser()
|
|
||||||
br.set_handle_redirect(False)
|
|
||||||
try:
|
|
||||||
br.open_novisit(cov2)
|
|
||||||
cover_url = cov2
|
|
||||||
except:
|
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'West Mids. News',
|
|
||||||
u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
|
|
||||||
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
|
|
||||||
(u'Sports', u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
|
|
||||||
(u'Bloggs & Comments', u'http://www.birminghampost.net/comment/rss.xml')
|
|
||||||
|
|
||||||
]
|
|
@ -1,57 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
|
||||||
'''
|
|
||||||
bitacora.com.uy
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class General(BasicNewsRecipe):
|
|
||||||
title = 'bitacora.com.uy'
|
|
||||||
__author__ = 'Gustavo Azambuja'
|
|
||||||
description = 'Noticias de Uruguay'
|
|
||||||
language = 'es_UY'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
use_embedded_content = False
|
|
||||||
recursion = 5
|
|
||||||
encoding = 'iso-8859-1'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
keep_only_tags = [dict(id=['txt'])]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'tablafoot'}),
|
|
||||||
dict(name=['object', 'h4']),
|
|
||||||
dict(name=['object', 'link'])
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_attributes = ['width', 'height', 'style', 'font', 'color']
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
|
||||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
|
||||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
|
||||||
p {font-family:Arial,Helvetica,sans-serif;}
|
|
||||||
'''
|
|
||||||
feeds = [
|
|
||||||
(u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15')
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = None
|
|
||||||
index = 'http://www.bitacora.com.uy'
|
|
||||||
soup = self.index_to_soup(index)
|
|
||||||
link_item = soup.find('img', attrs={'class': 'imgtapa'})
|
|
||||||
if link_item:
|
|
||||||
cover_url = "http://www.bitacora.com.uy/" + link_item['src']
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,38 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
|
||||||
description = 'This is a recipe of BizPortal.co.il.'
|
|
||||||
cover_url = 'http://www.bizportal.co.il/shukhahon/images/bizportal.jpg'
|
|
||||||
title = u'BizPortal'
|
|
||||||
language = 'he'
|
|
||||||
__author__ = 'marbs'
|
|
||||||
extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
remove_javascript = True
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_attributes = ['width']
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
remove_tags = [dict(name='img', attrs={'scr': ['images/bizlogo_nl.gif']})]
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
feeds = [(u'חדשות שוק ההון', u'http://www.bizportal.co.il/shukhahon/messRssUTF2.xml'),
|
|
||||||
(u'חדשות וול סטריט בעברית',
|
|
||||||
u'http://www.bizportal.co.il/shukhahon/images/bizportal.jpg'),
|
|
||||||
(u'שיווק ופרסום', u'http://www.bizportal.co.il/shukhahon/messRssUTF145.xml'),
|
|
||||||
(u'משפט', u'http://www.bizportal.co.il/shukhahon/messRssUTF3.xml'),
|
|
||||||
(u'ניתוח טכני', u'http://www.bizportal.co.il/shukhahon/messRssUTF5.xml'),
|
|
||||||
(u'דיני עבודה ושכר', u'http://www.bizportal.co.il/shukhahon/messRssUTF6.xml'),
|
|
||||||
(u'מיסוי', u'http://www.bizportal.co.il/shukhahon/messRssUTF7.xml'),
|
|
||||||
(u'טאבו', u'http://www.bizportal.co.il/shukhahon/messRssUTF8.xml'),
|
|
||||||
(u'נדל"ן', u'http://www.bizportal.co.il/shukhahon/messRssUTF160.xml'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
split1 = url.split("=")
|
|
||||||
print_url = 'http://www.bizportal.co.il/web/webnew/shukhahon/biznews02print.shtml?mid=' + \
|
|
||||||
split1[1]
|
|
||||||
return print_url
|
|
@ -1,31 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Tony Stegall'
|
|
||||||
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
|
|
||||||
__version__ = '1.04'
|
|
||||||
__date__ = '27, September 2010'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|
||||||
title = 'Nealz Nuze'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'TonytheBookworm'
|
|
||||||
description = 'Neal Boortz Show Radio Notes'
|
|
||||||
publisher = 'Neal Boortz'
|
|
||||||
category = 'news, politics, USA, talkshow'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = True
|
|
||||||
masthead_url = 'http://boortz.com/images/nuze_logo.gif'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
feeds = [
|
|
||||||
('NUZE', 'http://boortz.com/nealz_nuze_rss/rss.xml')
|
|
||||||
|
|
||||||
]
|
|
@ -1,34 +0,0 @@
|
|||||||
# vim:fileencoding=utf-8
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Börse-online'
|
|
||||||
__author__ = 'schuster, Armin Geller' # AGE upd 2013-11-29
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'de'
|
|
||||||
remove_javascript = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
encoding = 'utf-8'
|
|
||||||
timefmt = ' [%a, %d %b %Y]'
|
|
||||||
|
|
||||||
cover_url = 'http://www.wirtschaftsmedien-shop.de/s/media/coverimages/7576_2013107.jpg'
|
|
||||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/de/5/56/B%C3%B6rse_Online_Logo.svg'
|
|
||||||
|
|
||||||
feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss'),
|
|
||||||
(u'Märkte', u'http://www.boerse-online.de/rss/maerkte'),
|
|
||||||
(u'Chartanalyse', u'http://www.boerse-online.de/rss/maerkte/chartanalyse'),
|
|
||||||
(u'Aktien', u'http://www.boerse-online.de/rss/aktie'),
|
|
||||||
(u'Aktien-Chartanalyse',
|
|
||||||
u'http://www.boerse-online.de/rss/aktie/chartanalyse'),
|
|
||||||
(u'zertifikate', u'http://www.boerse-online.de/rss/zertifikat')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
s1, s2 = url.rsplit('/', 1)
|
|
||||||
return 'http://www.boerse-online.de/nachrichten/drucken/' + s2
|
|
@ -1,18 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import AutomaticNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BasicUserRecipe1501590114(AutomaticNewsRecipe):
|
|
||||||
title = 'Boxis'
|
|
||||||
oldest_article = 240
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'sc'
|
|
||||||
__author__ = 'tzium'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Boxis', 'http://www.boxis.it/sc/feed/'),
|
|
||||||
]
|
|
@ -1,130 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__version__ = '0.2'
|
|
||||||
|
|
||||||
'''
|
|
||||||
brand eins.de
|
|
||||||
'''
|
|
||||||
from collections import OrderedDict
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BrandEins(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'brand eins'
|
|
||||||
__author__ = 'Nikolas Mangold-Takao, Thomas Schlenkhoff'
|
|
||||||
language = 'de'
|
|
||||||
description = u'brand eins beschreibt den momentanen Wandel in Wirtschaft und Gesellschaft.'
|
|
||||||
publisher = u'brand eins Verlag GmbH & Co. oHG'
|
|
||||||
category = 'politics, business, wirtschaft, Germany'
|
|
||||||
|
|
||||||
PREFIX = 'http://www.brandeins.de/'
|
|
||||||
INDEX = PREFIX + 'archiv/listeansicht.html'
|
|
||||||
|
|
||||||
use_embedded_content = False
|
|
||||||
resolve_internal_links = True
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
needs_subscription = False
|
|
||||||
|
|
||||||
delay = 1
|
|
||||||
summary_length = 200
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
keep_only_tags = dict(name='div', attrs={'id': 'content'})
|
|
||||||
|
|
||||||
# remove share image from articles
|
|
||||||
remove_tags = [dict(name='div', attrs={'id': 'oms_gpt_billboard'}),
|
|
||||||
dict(name='div', attrs={'id': 'oms_gpt_rectangle'}),
|
|
||||||
dict(name='h3', attrs={'class': 'sharing-headline'}),
|
|
||||||
dict(name='div', attrs={'class': 'sharing-links'}),
|
|
||||||
dict(name='aside', attrs={'class': 'articleAside'})]
|
|
||||||
|
|
||||||
remove_tags_before = dict(
|
|
||||||
name='div', attrs={'class': 'innerContent typeArticle'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id': 'socialshareprivacy'})
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
body, p {text-align: left;}
|
|
||||||
.headline {font-size: x-large;}
|
|
||||||
h2 {font-size: medium;}
|
|
||||||
h1 {font-size: large;}
|
|
||||||
em.Bold {font-weight:bold;font-style:normal;}
|
|
||||||
em.Italic {font-style:italic;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
issue = ""
|
|
||||||
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
issue_list = soup.findAll('div', attrs={'class': 'details'})
|
|
||||||
|
|
||||||
issue_map = {}
|
|
||||||
i = 0
|
|
||||||
for entry in issue_list:
|
|
||||||
title = self.tag_to_string(entry.find(
|
|
||||||
'h3', attrs={'class': 'like-h1'}))
|
|
||||||
issue_string = self.tag_to_string(
|
|
||||||
entry.find('span', attrs={'class': 'meta'}))
|
|
||||||
year = issue_string[8:]
|
|
||||||
month = issue_string[5:-5]
|
|
||||||
yyyymm = "{}{}".format(year, month)
|
|
||||||
link = entry.findAll('a')[0]
|
|
||||||
issue_map[yyyymm] = link.get('href')
|
|
||||||
self.log('- ', year, month, title, link.get('href'))
|
|
||||||
|
|
||||||
# Issue 1 (most recent) has only few articles online,
|
|
||||||
# Issue 2 and 3 (2nd and 3rd recent) is not completely online.
|
|
||||||
# Issue 4 (4th recent) is completely online, hence i == 3
|
|
||||||
|
|
||||||
if issue == "" and i == 3:
|
|
||||||
issue = yyyymm
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
url = 'http://brandeins.de/' + issue_map[issue]
|
|
||||||
self.log('Issue to get: ', issue, title, url)
|
|
||||||
self.issue_url = url # save to extract cover
|
|
||||||
|
|
||||||
return self.parse_issue(url)
|
|
||||||
|
|
||||||
def parse_issue(self, url):
|
|
||||||
soup = self.index_to_soup(url)
|
|
||||||
feeds = OrderedDict()
|
|
||||||
|
|
||||||
for item in soup.findAll(attrs={'class': lambda x: 'ihv_item' in (x or '').split()}):
|
|
||||||
a = item.findParent('a', href=True)
|
|
||||||
if a is None:
|
|
||||||
continue
|
|
||||||
url = self.PREFIX + a['href']
|
|
||||||
title = self.tag_to_string(item.find(attrs={'class': 'ihv_title'}))
|
|
||||||
sec = self.tag_to_string(
|
|
||||||
item.find(attrs={'class': 'ihv_page_category'}).findAll('span')[-1])
|
|
||||||
if sec not in feeds:
|
|
||||||
feeds[sec] = []
|
|
||||||
desc = ''
|
|
||||||
for p in item.findAll('p'):
|
|
||||||
desc += self.tag_to_string(p)
|
|
||||||
feeds[sec].append(
|
|
||||||
{'title': title, 'url': url, 'description': desc})
|
|
||||||
self.log('Found article:', title, 'at', url)
|
|
||||||
|
|
||||||
return [(st, articles) for st, articles in feeds.items() if articles]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
# the index does not contain a usable cover, but the 'Welt in
|
|
||||||
# Zahlen'-article contains it
|
|
||||||
cover_article = "{}{}".format(
|
|
||||||
self.issue_url, 'die-welt-in-zahlen.html')
|
|
||||||
self.log('Cover article URL: %s' % cover_article)
|
|
||||||
soup = self.index_to_soup(cover_article)
|
|
||||||
img = soup.find('section', 'asideSection no-content').find('img')
|
|
||||||
self.log('Found cover image url: %s' % img['src'])
|
|
||||||
return (self.PREFIX + img['src'])
|
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
|
||||||
return raw_html.replace('<p>• ', '<p>')
|
|
@ -1,33 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BrasilDeFato(BasicNewsRecipe):
|
|
||||||
news = True
|
|
||||||
title = u'Brasil de Fato'
|
|
||||||
__author__ = 'Alex Mitrani'
|
|
||||||
description = u'Uma visão popular do Brasil e do mundo.'
|
|
||||||
publisher = u'SOCIEDADE EDITORIAL BRASIL DE FATO'
|
|
||||||
category = 'news, politics, Brazil, rss, Portuguese'
|
|
||||||
oldest_article = 10
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
summary_length = 1000
|
|
||||||
language = 'pt_BR'
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg'
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'main'})]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'links'})]
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class': 'links'})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Nacional', u'http://www.brasildefato.com.br/rss_nacional'),
|
|
||||||
(u'Internacional', u'http://www.brasildefato.com.br/rss_internacional'),
|
|
||||||
(u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista'),
|
|
||||||
(u'Cultura', u'http://www.brasildefato.com.br/rss_cultura'),
|
|
||||||
(u'Análise', u'http://www.brasildefato.com.br/rss_analise')
|
|
||||||
]
|
|
@ -1,81 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.brecha.com.uy
|
|
||||||
'''
|
|
||||||
|
|
||||||
try:
|
|
||||||
from urllib.parse import quote, urlencode
|
|
||||||
except ImportError:
|
|
||||||
from urllib import quote, urlencode
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Brecha(BasicNewsRecipe):
|
|
||||||
title = 'Brecha Digital'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Brecha , Cultura ,Sociales , Separatas, Lupas, Vueltas de Montevideo y toda la infomacion que caracteriza a este semanario'
|
|
||||||
publisher = 'Brecha'
|
|
||||||
category = 'brecha, digital, prensa, uruguay, semanario, sociedad, politica, cultura'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'es_UY'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'magazine'
|
|
||||||
auto_cleanup = True
|
|
||||||
needs_subscription = 'optional'
|
|
||||||
masthead_url = 'http://www.brecha.com.uy/templates/ja_nex/themes/orange/images/logo.png'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
|
||||||
img{margin-bottom: 0.4em; display:block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_browser(self):
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
br.open('http://www.brecha.com.uy/index.php/acceder-miembros')
|
|
||||||
if self.username is not None and self.password is not None:
|
|
||||||
data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
|
|
||||||
})
|
|
||||||
br.open(
|
|
||||||
'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data)
|
|
||||||
return br
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['meta', 'link']),
|
|
||||||
dict(name='div', attrs={'id': 'js_ja'}),
|
|
||||||
dict(name='ul', attrs={'class': 'actions'})
|
|
||||||
]
|
|
||||||
remove_attributes = ['lang', 'border']
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Politica', u'http://www.brecha.com.uy/index.php/politica-uruguaya?format=feed&type=rss'),
|
|
||||||
(u'Mundo', u'http://www.brecha.com.uy/index.php/mundo?format=feed&type=rss'),
|
|
||||||
(u'Mapamundi', u'http://www.brecha.com.uy/index.php/mundo/mapamundi?format=feed&type=rss'),
|
|
||||||
(u'Cultura', u'http://www.brecha.com.uy/index.php/cultura?format=feed&type=rss'),
|
|
||||||
(u'Vueltas de Montevideo',
|
|
||||||
u'http://www.brecha.com.uy/index.php/cultura/vueltas-de-montevideo?format=feed&type=rss'),
|
|
||||||
(u'Secos y Mojados', u'http://www.brecha.com.uy/index.php/cultura/secos-y-mojados?format=feed&type=rss'),
|
|
||||||
(u'Literarias', u'http://www.brecha.com.uy/index.php/cultura/literarias?format=feed&type=rss'),
|
|
||||||
(u'Sociedad', u'http://www.brecha.com.uy/index.php/sociedad?format=feed&type=rss'),
|
|
||||||
(u'Especiales', u'http://www.brecha.com.uy/index.php/especiales?format=feed&type=rss'),
|
|
||||||
(u'Contratapa', u'http://www.brecha.com.uy/index.php/contratapa?format=feed&type=rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?tmpl=component&print=1&layout=default&page='
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup('http://www.brecha.com.uy/index.php')
|
|
||||||
for image in soup.findAll('img', alt=True):
|
|
||||||
if image['alt'].startswith('Tapa '):
|
|
||||||
return 'http://www.brecha.com.uy' + quote(image['src'])
|
|
||||||
return None
|
|
@ -1,16 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Brhat(BasicNewsRecipe):
|
|
||||||
title = 'Brhat'
|
|
||||||
__author__ = 'Vishvas Vasuki'
|
|
||||||
language = 'en_IN'
|
|
||||||
oldest_article = 365
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Main', 'https://brhat.in/feed/'),
|
|
||||||
]
|
|
@ -1,63 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class germanyBSI(BasicNewsRecipe):
|
|
||||||
# Title of the Recipe
|
|
||||||
# title = 'News des Bundesamt für Sicherheit in der Informationstechnik'
|
|
||||||
title = 'BSI News - DE'
|
|
||||||
cover_url = 'https://www.bsi.bund.de/SiteGlobals/Frontend/Images/BSI/logo.png'
|
|
||||||
# Author
|
|
||||||
__author__ = 'Volker Heggemann, VoHe'
|
|
||||||
# oldest article to download (in days) ---- can be edit by user
|
|
||||||
oldest_article = 7
|
|
||||||
# describes itself, ---- can be edit by user
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
# speed up the download on fast computers be careful (I test max.20)
|
|
||||||
# ---- can be edit by user
|
|
||||||
simultaneous_downloads = 10
|
|
||||||
# description, some Reader show this in titlepage
|
|
||||||
description = u'News from BSI'
|
|
||||||
# Who published the content?
|
|
||||||
publisher = u'Newsfeeds des BSI'
|
|
||||||
# What is the content of?
|
|
||||||
category = u'Sie erfahren, wenn neue Nachrichten auf der Internetseite des BSI veröffentlicht werden'
|
|
||||||
# describes itself, ---- can be edit by user
|
|
||||||
use_embedded_content = False
|
|
||||||
# describes itself, ---- can be edit by user
|
|
||||||
language = 'de'
|
|
||||||
# encoding of content. e.g. utf-8, None, ...
|
|
||||||
# ---- can be edit by user
|
|
||||||
encoding = None # 'utf-8' doesn't work here
|
|
||||||
# Removes javascript- why keep this, we only want static content
|
|
||||||
remove_javascript = True
|
|
||||||
# Removes empty feeds - why keep them!?
|
|
||||||
remove_empty_feeds = True
|
|
||||||
|
|
||||||
# remove the rubbish (in ebook)
|
|
||||||
auto_cleanup = True
|
|
||||||
# now the content description and URL follows
|
|
||||||
# feel free to add, wipe out what you need ---- can be edit by user
|
|
||||||
#
|
|
||||||
# some of this are double
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Make some tests, may you first comment all of them out, and step by step you add what you'll need?
|
|
||||||
#
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('BSI - Germany - Sicherheitshinweise des Buerger-CERT',
|
|
||||||
'https://www.bsi-fuer-buerger.de/SiteGlobals/Functions/RSSFeed/RSSNewsfessBSIFB/RSSNewsfeed_BuergerCERT.xml'
|
|
||||||
),
|
|
||||||
('BSI - Germany - Aktuelle Informationen BSI f\xfcr B\xfcrger',
|
|
||||||
'https://www.bsi-fuer-buerger.de/SiteGlobals/Functions/RSSFeed/RSSNewsfessBSIFB/RSSNewsfeed_Buerger_aktuelle_Informationen.xml'
|
|
||||||
),
|
|
||||||
('Kurzinformationen des CERT-Bund zu Sicherheitsl\xfccken und Schwachstellen in IT-Systemen',
|
|
||||||
'https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed_WID.xml'
|
|
||||||
),
|
|
||||||
('BSI - Germany - RSS-Newsfeed (Presse-, Kurzmitteilungen und Veranstaltungshinweise)',
|
|
||||||
'https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed.xml'
|
|
||||||
),
|
|
||||||
]
|
|
@ -1,45 +0,0 @@
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
|
|
||||||
|
|
||||||
|
|
||||||
class Buchreport(BasicNewsRecipe):
|
|
||||||
__author__ = 'a.peter'
|
|
||||||
__copyright__ = 'a.peter'
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
description = 'Buchreport'
|
|
||||||
version = 4
|
|
||||||
title = u'Buchreport'
|
|
||||||
timefmt = ' [%d.%m.%Y]'
|
|
||||||
encoding = 'cp1252'
|
|
||||||
language = 'de'
|
|
||||||
|
|
||||||
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
|
|
||||||
article, articledate, articledescription { text-align: left; } \
|
|
||||||
h1 { text-align: left; font-size: 140%; font-weight: bold; } \
|
|
||||||
h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
|
|
||||||
h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
|
|
||||||
h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
|
|
||||||
|
|
||||||
oldest_article = 7.0
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='h2')
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'style': ["padding-top:10px;clear:both"]})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'style': ["padding-top:10px;clear:both"]}),
|
|
||||||
dict(name='iframe'),
|
|
||||||
dict(name='img')
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_masthead_url(self):
|
|
||||||
return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'
|
|
@ -1,49 +0,0 @@
|
|||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|
||||||
title = 'BuckMasters In The Kitchen'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'TonytheBookworm & Starson17'
|
|
||||||
description = 'Learn how to cook all those outdoor varments'
|
|
||||||
publisher = 'BuckMasters.com'
|
|
||||||
category = 'food,cooking,recipes'
|
|
||||||
oldest_article = 365
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
masthead_url = 'http://www.buckmasters.com/Portals/_default/Skins/BM_10/images/header_bg.jpg'
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='table', attrs={'class': ['containermaster_black']})
|
|
||||||
]
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'align': ['left']})]
|
|
||||||
feeds = [
|
|
||||||
('Recipes', 'http://www.buckmasters.com/DesktopModules/DnnForge%20-%20NewsArticles/RSS.aspx?TabID=292&ModuleID=658&MaxCount=25'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
item = soup.find('a', attrs={'class': ['MenuTopSelected']})
|
|
||||||
if item:
|
|
||||||
item.parent.extract()
|
|
||||||
for img_tag in soup.findAll('img'):
|
|
||||||
parent_tag = img_tag.parent
|
|
||||||
if parent_tag.name == 'a':
|
|
||||||
ntag = new_tag(soup, 'p')
|
|
||||||
ntag.insert(0, img_tag)
|
|
||||||
parent_tag.replaceWith(ntag)
|
|
||||||
elif parent_tag.name == 'p':
|
|
||||||
if not self.tag_to_string(parent_tag) == '':
|
|
||||||
new_div = new_tag(soup, 'div')
|
|
||||||
ntag = new_tag(soup, 'p')
|
|
||||||
ntag.insert(0, img_tag)
|
|
||||||
parent_tag.replaceWith(new_div)
|
|
||||||
new_div.insert(0, ntag)
|
|
||||||
new_div.insert(1, parent_tag)
|
|
||||||
return soup
|
|
@ -1,53 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- mode: python -*-
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2016, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.diariobae.com
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BsAsEconomico(BasicNewsRecipe):
|
|
||||||
title = 'Buenos Aires Economico'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Diario BAE es el diario economico-politico con mas influencia en la Argentina. Fuente de empresarios y politicos del pais y el exterior. El pozo estaria aportando en periodos breves un volumen equivalente a 800m3 diarios. Pero todavia deben efectuarse otras perforaciones adicionales.' # noqa
|
|
||||||
publisher = 'Diario BAE'
|
|
||||||
category = 'news, politics, economy, Argentina'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'es_AR'
|
|
||||||
masthead_url = 'http://static.cronica.com.ar/FileAccessHandler.ashx?code=635959869637084622'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Georgia,"Times New Roman",Times,serif}
|
|
||||||
img{display: block; margin-top: 1em}
|
|
||||||
"""
|
|
||||||
conversion_options = {
|
|
||||||
'comment' : description,
|
|
||||||
'tags' : category,
|
|
||||||
'publisher': publisher,
|
|
||||||
'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['meta', 'base', 'iframe', 'link', 'lang'])
|
|
||||||
,dict(attrs={'class':'pdfprnt-bottom-right'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://www.diariobae.com/feed/getfeed')]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover = None
|
|
||||||
soup = self.index_to_soup('http://www.diariobae.com/')
|
|
||||||
tag = soup.find('a', rel='lightbox[tapa]', href=True)
|
|
||||||
if tag:
|
|
||||||
cover = tag['href']
|
|
||||||
return cover
|
|
@ -1,49 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Todd Chapman'
|
|
||||||
__copyright__ = 'Todd Chapman'
|
|
||||||
__version__ = 'v0.2'
|
|
||||||
__date__ = '2 March 2011'
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.buffalonews.com/RSS/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BuffaloNews(BasicNewsRecipe):
|
|
||||||
title = u'Buffalo News'
|
|
||||||
oldest_article = 2
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'ChappyOnIce, Krittika Goyal'
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
encoding = 'utf-8'
|
|
||||||
masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png'
|
|
||||||
auto_cleanup = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Southern Erie County',
|
|
||||||
u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'),
|
|
||||||
(u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944')
|
|
||||||
]
|
|
@ -1,36 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.businessworld.in
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BusinessWorldMagazine(BasicNewsRecipe):
|
|
||||||
title = 'Business World Magazine'
|
|
||||||
__author__ = 'Kovid Goyal'
|
|
||||||
description = 'News from India'
|
|
||||||
category = 'news, politics, finances, India, Asia'
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'en_IN'
|
|
||||||
oldest_article = 2
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'class': ['main-article']}),
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(id='video_n_ad_div'),
|
|
||||||
dict(attrs={'class': ['meta-tools', 'social-article']}),
|
|
||||||
]
|
|
||||||
remove_tags_after = dict(attrs={'class': 'social-article'})
|
|
||||||
|
|
||||||
feeds = ['http://www.businessworld.in/rss/all-article.xml']
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for img in soup.findAll('img', attrs={'data-original': True}):
|
|
||||||
img['src'] = img['data-original']
|
|
||||||
for ins in soup.findAll(attrs={'class': 'adsbygoogle'}):
|
|
||||||
ins.parent.extract()
|
|
||||||
return soup
|
|
@ -1,14 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BasicUserRecipe1325259641(BasicNewsRecipe):
|
|
||||||
language = 'tr'
|
|
||||||
__author__ = 'asalet_r'
|
|
||||||
title = u'CafCaf Dergisi'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [(u'CafCaf', u'http://www.cafcafdergisi.net/feed/rss/')]
|
|
@ -1,20 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1331729727(BasicNewsRecipe):
|
|
||||||
title = u'Camera di Commercio di Bari'
|
|
||||||
oldest_article = 7
|
|
||||||
__author__ = 'faber1971'
|
|
||||||
description = 'News from the Chamber of Commerce of Bari'
|
|
||||||
language = 'it'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
masthead_url = 'http://www.ba.camcom.it/grafica/layout-bordo/logo_camcom_bari.png'
|
|
||||||
feeds = [(u'Camera di Commercio di Bari',
|
|
||||||
u'http://feed43.com/4715147488845101.xml')]
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, faber1971'
|
|
||||||
__version__ = 'v1.00'
|
|
||||||
__date__ = '17, April 2012'
|
|
@ -1,12 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CapesnBabesRecipe(BasicNewsRecipe):
|
|
||||||
title = u'Capes n Babes'
|
|
||||||
language = 'en'
|
|
||||||
description = 'The Capes n Babes comic Blog'
|
|
||||||
__author__ = 'skyhawker'
|
|
||||||
oldest_article = 31
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = True
|
|
||||||
feeds = [(u'Capes & Babes', u'feed://www.capesnbabes.com/feed/')]
|
|
@ -1,41 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
capital.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Capital(BasicNewsRecipe):
|
|
||||||
title = 'Capital'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = u'\u0218tiri din Rom\u00e2nia'
|
|
||||||
oldest_article = 5
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Stiri,Romania'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
remove_javascript = True
|
|
||||||
publisher = 'Capital'
|
|
||||||
cover_url = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'single one_article'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'single_details'}), dict(name='div', attrs={'class': 'tx-addoceansbanners-pi1'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,84 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
'''
|
|
||||||
capital.de
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
|
|
||||||
title = 'Capital.de'
|
|
||||||
__author__ = 'schuster'
|
|
||||||
description = 'RSS-Feed von Capital.de'
|
|
||||||
publisher = 'Gruner+Jahr GmbH & Co KG'
|
|
||||||
language = 'de'
|
|
||||||
|
|
||||||
oldest_article = 14
|
|
||||||
max_articles_per_feed = 35
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
conversion_options = {'smarten_punctuation': True,
|
|
||||||
'publisher': publisher}
|
|
||||||
|
|
||||||
cover_source = 'http://shop.capital.de/abos/capital/'
|
|
||||||
masthead_url = 'http://www.capital.de/files/capital/layout/logo.png'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Capital.de', 'http://www.capital.de/partner-feeds/rss.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': 'grid_8 alpha omega layout_full block'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'article_header'}),
|
|
||||||
dict(name='br', attrs={'class': 'clear'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_attributes = ['height', 'width']
|
|
||||||
|
|
||||||
extra_css = 'h1 {font-size: 1.6em; text-align: left} \
|
|
||||||
h2 {font-size: 1em; text-align: left} \
|
|
||||||
.copyright {font-size: 0.6em} \
|
|
||||||
.caption {font-size: 0.6em}'
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup(self.cover_source)
|
|
||||||
img_span = soup.find('span', {'class': re.compile('coverimage')})
|
|
||||||
self.cover_url = img_span.find('img', src=True)['src']
|
|
||||||
return self.cover_url
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
# remove all articles without relevant content
|
|
||||||
tags = soup.findAll('li', {'class': 'tag-chain-item'})
|
|
||||||
for li in tags:
|
|
||||||
if 'BILDERSTRECKE' in self.tag_to_string(li).upper():
|
|
||||||
self.abort_article()
|
|
||||||
# remove list of tags
|
|
||||||
tags = soup.find('ul', {'class': 'tag-chain'})
|
|
||||||
if tags:
|
|
||||||
tags.extract()
|
|
||||||
# remove all style attributes
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
# remove all local hyperlinks
|
|
||||||
for a in soup.findAll('a', {'href': True}):
|
|
||||||
if a['href'] and 'http' not in a['href']:
|
|
||||||
del a['href']
|
|
||||||
# remove picture(s) of author(s)
|
|
||||||
for div in soup.findAll('div', {'class': 'ce_text block'}):
|
|
||||||
if div.find('hr'):
|
|
||||||
for hr in div.findAll('hr'):
|
|
||||||
hr.extract()
|
|
||||||
for img in div.findAll('img'):
|
|
||||||
img.extract()
|
|
||||||
return soup
|
|
@ -1,116 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
from mechanize import Request
|
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
|
||||||
q = frozenset(classes.split(' '))
|
|
||||||
return dict(
|
|
||||||
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class CaravanMagazineHindi(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = 'Caravan Magazine in Hindi'
|
|
||||||
__author__ = 'Kovid Goyal, Gobelinus, Aareet Mahadevan'
|
|
||||||
description = 'An Indian Journal of politics and culture'
|
|
||||||
language = 'hi'
|
|
||||||
timefmt = ' [%b, %Y]'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
needs_subscription = 'optional'
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
classes('post-title short-desc author-details cover'),
|
|
||||||
dict(itemprop='articleBody'),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='meta'),
|
|
||||||
dict(attrs={'class': ['share-with', 'img-wrap abs']}),
|
|
||||||
]
|
|
||||||
remove_attributes = ['style']
|
|
||||||
|
|
||||||
def get_browser(self, *args, **kw):
|
|
||||||
br = BasicNewsRecipe.get_browser(self, *args, **kw)
|
|
||||||
if not self.username or not self.password:
|
|
||||||
return br
|
|
||||||
data = json.dumps({
|
|
||||||
'email': self.username,
|
|
||||||
'name': '',
|
|
||||||
'password': self.password
|
|
||||||
})
|
|
||||||
if not isinstance(data, bytes):
|
|
||||||
data = data.encode('utf-8')
|
|
||||||
rq = Request(
|
|
||||||
url='https://caravanmagazine.in/api/users/login',
|
|
||||||
data=data,
|
|
||||||
headers={
|
|
||||||
'Accept': 'application/json, text/plain, */*',
|
|
||||||
'Origin': 'https://caravanmagazine.in',
|
|
||||||
'Referer': 'https://caravanmagazine.in/',
|
|
||||||
'Content-type': 'application/json;charset=UTF-8',
|
|
||||||
},
|
|
||||||
method='POST'
|
|
||||||
)
|
|
||||||
res = br.open(rq).read()
|
|
||||||
res = res.decode('utf-8')
|
|
||||||
self.log('Login request response: {}'.format(res))
|
|
||||||
res = json.loads(res)
|
|
||||||
if res['code'] != 200 or res['message'] != "Login success":
|
|
||||||
raise ValueError('Login failed, check your username and password')
|
|
||||||
return br
|
|
||||||
|
|
||||||
# To parse article toc
|
|
||||||
def parse_index(self):
|
|
||||||
base_url = 'https://www.caravanmagazine.in/'
|
|
||||||
soup = self.index_to_soup('{0}magazine'.format(base_url))
|
|
||||||
|
|
||||||
# find current issue cover
|
|
||||||
feeds = []
|
|
||||||
sections = soup.find(
|
|
||||||
attrs={
|
|
||||||
'class': lambda x: x and 'current-magazine-issue' in x.split()
|
|
||||||
}
|
|
||||||
).find(attrs={'class': lambda x: x and 'sections' in x.split()})
|
|
||||||
for section in sections.findAll(
|
|
||||||
attrs={'class': lambda x: x and 'section' in x.split()}
|
|
||||||
):
|
|
||||||
a = section.find('a')
|
|
||||||
section_title = self.tag_to_string(a)
|
|
||||||
self.log('\nSection:', section_title)
|
|
||||||
articles = []
|
|
||||||
for article in section.findAll('article'):
|
|
||||||
details = article.find(
|
|
||||||
attrs={'class': lambda x: x and 'details' in x.split()}
|
|
||||||
)
|
|
||||||
pre = details.find(
|
|
||||||
attrs={'class': lambda x: x and 'pre-heading' in x.split()}
|
|
||||||
)
|
|
||||||
if pre is not None:
|
|
||||||
pre.extract()
|
|
||||||
a = details.find('a')
|
|
||||||
url = base_url + a['href'].lstrip('/') + '-hindi'
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
desc = self.tag_to_string(details.find('div'))
|
|
||||||
self.log('\t', title, url)
|
|
||||||
articles.append({'title': title, 'description': desc, 'url': url})
|
|
||||||
if articles:
|
|
||||||
feeds.append((section_title, articles))
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for div in soup.findAll(itemprop='image'):
|
|
||||||
for img in div.findAll('img'):
|
|
||||||
img['src'] = div['content']
|
|
||||||
for img in soup.findAll(attrs={'data-src': True}):
|
|
||||||
img['src'] = img['data-src']
|
|
||||||
return soup
|
|
@ -1,51 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net, Armin Geller>'
|
|
||||||
|
|
||||||
|
|
||||||
class Carta(BasicNewsRecipe):
|
|
||||||
# Update 2017-09-01
|
|
||||||
# Armin Geller
|
|
||||||
|
|
||||||
title = u'Carta'
|
|
||||||
description = 'Authors blog for politics, economics and digital community'
|
|
||||||
__author__ = 'Armin Geller' # AGe Update 2017-09-01
|
|
||||||
|
|
||||||
timefmt = ' [%a %d %b %Y]'
|
|
||||||
oldest_article = 14
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'de'
|
|
||||||
|
|
||||||
cover_url = 'http://www.carta.info/wp-content/themes/carta2014/img/carta-logo.svg' # AGe Update 2014-10-05 new cover
|
|
||||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/de/b/ba/Carta_logo.png'
|
|
||||||
# masthead_url ='http://www.carta.info/wp-content/themes/carta2014/img/carta-logo.svg'
|
|
||||||
extra_css = '''
|
|
||||||
h2 {font-size: 1.3em; font-style: italic}
|
|
||||||
.excerpt {font-size: 1.2em; font-style: italic}
|
|
||||||
'''
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['article-text', 'author']}),
|
|
||||||
dict(name='p', attrs={'class': 'tags'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='ul', attrs={'class': 'meta'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'CARTA - Standard', u'http://feeds2.feedburner.com/carta-standard-rss'),
|
|
||||||
(u'CARTA - Homepage', u'http://feeds2.feedburner.com/carta-homepage-rss'),
|
|
||||||
(u'CARTA - Agenda', u'http://feeds2.feedburner.com/carta-agenda-rss'),
|
|
||||||
(u'CARTA - Ökonomie', u'http://feeds2.feedburner.com/carta-oekonomie-rss'),
|
|
||||||
(u'CARTA - Medien', u'http://feeds2.feedburner.com/carta-medien-rss'),
|
|
||||||
]
|
|
@ -1,17 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BasicUserRecipe1328971305(BasicNewsRecipe):
|
|
||||||
title = u'Catholic Daily Readings'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'adoucette'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Daily Readings - USCCB', u'http://www.usccb.org/bible/readings/rss/'),
|
|
||||||
(u'Daily Reflection - One Bread One Body', u'http://www.presentationministries.com/general/rss.asp'),
|
|
||||||
|
|
||||||
(u'Mass Readings - Universalis', u'http://www.universalis.com/atommass3.xml'),
|
|
||||||
(u'Saint Of The Day - CNA', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')]
|
|
@ -1,28 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CD_Action(BasicNewsRecipe):
|
|
||||||
title = u'CD-Action'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = 'Strona CD-Action (CDA), największego w Polsce pisma dla graczy.Pełne wersje gier, newsy, recenzje, zapowiedzi, konkursy, forum, opinie, galerie screenów,trailery, filmiki, patche, teksty. Gry komputerowe (PC) oraz na konsole (PS3, XBOX 360).' # noqa
|
|
||||||
category = 'games'
|
|
||||||
language = 'pl'
|
|
||||||
index = 'http://www.cdaction.pl'
|
|
||||||
oldest_article = 8
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
keep_only_tags = dict(id='news_content')
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'tresc'})
|
|
||||||
feeds = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
|
|
||||||
self.cover_url = 'http://www.cdaction.pl' + \
|
|
||||||
soup.find(id='wspolnik').div.a['href']
|
|
||||||
return getattr(self, 'cover_url', self.cover_url)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for a in soup.findAll('a', href=True):
|
|
||||||
if 'http://' not in a['href'] and 'https://' not in a['href']:
|
|
||||||
a['href'] = self.index + a['href']
|
|
||||||
return soup
|
|
@ -1,74 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import Comment
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class cdrinfo(BasicNewsRecipe):
|
|
||||||
title = u'CDRinfo.pl'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'Serwis poświęcony archiwizacji danych. Testy i recenzje nagrywarek. Programy do nagrywania płyt. Dyski twarde, dyski SSD i serwery sieciowe NAS. Rankingi dyskow twardych, najszybsze dyski twarde, newsy, artykuły, testy, recenzje, porady, oprogramowanie. Zestawienie nagrywarek, najnowsze biosy do nagrywarek, programy dla dysków twardych.' # noqa
|
|
||||||
category = 'it, hardware'
|
|
||||||
# publication_type = ''
|
|
||||||
language = 'pl'
|
|
||||||
# encoding = ''
|
|
||||||
# extra_css = ''
|
|
||||||
cover_url = 'http://www.cdrinfo.pl/gfx/graph3/top.jpg'
|
|
||||||
# masthead_url = ''
|
|
||||||
use_embedded_content = False
|
|
||||||
oldest_article = 777
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
remove_javascript = True
|
|
||||||
remove_attributes = ['style', 'onmouseover']
|
|
||||||
preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\\.gravatar\\.com</a>\\.</p>', re.DOTALL), lambda match: ''),
|
|
||||||
(re.compile(u'<p[^>]*?>.{,2}</p>', re.DOTALL), lambda match: '')]
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='input', attrs={'name': 'ref'}), dict(id=['text', 'text2'])]
|
|
||||||
remove_tags = [dict(attrs={'class': ['navigation', 'sociable', 'last6news']}), dict(
|
|
||||||
name=['hr', 'br']), dict(id='respond')]
|
|
||||||
remove_tags_after = dict(id='artnawigacja')
|
|
||||||
feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'),
|
|
||||||
(u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'),
|
|
||||||
(u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'),
|
|
||||||
(u'Pliki', 'http://www.cdrinfo.pl/rss/rss_pliki.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
if soup.find(id='artnawigacja'):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def append_page(self, soup, appendtag):
|
|
||||||
baseurl = 'http://cdrinfo.pl' + \
|
|
||||||
soup.find(name='input', attrs={'name': 'ref'})['value'] + '/'
|
|
||||||
if baseurl[-2] == '/':
|
|
||||||
baseurl = baseurl[:-1]
|
|
||||||
tag = soup.find(id='artnawigacja')
|
|
||||||
div = tag.find('div', attrs={'align': 'right'})
|
|
||||||
while div:
|
|
||||||
counter = 0
|
|
||||||
while counter < 5:
|
|
||||||
try:
|
|
||||||
soup2 = self.index_to_soup(baseurl + div.a['href'])
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
counter += 1
|
|
||||||
tag2 = soup2.find(id='artnawigacja')
|
|
||||||
div = tag2.find('div', attrs={'align': 'right'})
|
|
||||||
pagetext = soup2.find(attrs={'class': 'art'})
|
|
||||||
comments = pagetext.findAll(
|
|
||||||
text=lambda text: isinstance(text, Comment))
|
|
||||||
for comment in comments:
|
|
||||||
comment.extract()
|
|
||||||
for r in soup2.findAll(attrs={'class': 'star-rating'}):
|
|
||||||
r.extract()
|
|
||||||
for r in soup2.findAll(attrs={'class': 'star-rating2'}):
|
|
||||||
r.extract()
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
tag.extract()
|
|
@ -1,70 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ceskaPoziceRecipe(BasicNewsRecipe):
|
|
||||||
__author__ = 'bubak'
|
|
||||||
title = u'Česká pozice'
|
|
||||||
description = 'Česká pozice'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Všechny články', u'http://www.ceskapozice.cz/rss.xml'),
|
|
||||||
(u'Domov', u'http://www.ceskapozice.cz/taxonomy/term/16/feed'),
|
|
||||||
(u'Chrono', u'http://www.ceskapozice.cz/chrono/feed'),
|
|
||||||
(u'Evropa', u'http://www.ceskapozice.cz/taxonomy/term/17/feed')
|
|
||||||
]
|
|
||||||
|
|
||||||
language = 'cs'
|
|
||||||
cover_url = 'http://www.ceskapozice.cz/sites/default/files/cpozice_logo.png'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
domain = u'http://www.ceskapozice.cz'
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': ['block-ad', 'region region-content-ad']}),
|
|
||||||
dict(name='ul', attrs={'class': 'links'}),
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'id': ['comments', 'back-to-top']}),
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': ['next-page', 'region region-content-ad']}),
|
|
||||||
dict(name='cite')]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'content'})]
|
|
||||||
|
|
||||||
visited_urls = {}
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
|
||||||
if url in self.visited_urls:
|
|
||||||
self.log.debug('Ignoring duplicate: ' + url)
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
self.visited_urls[url] = True
|
|
||||||
self.log.debug('Accepting: ' + url)
|
|
||||||
return url
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body, 3)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def append_page(self, soup, appendtag, position):
|
|
||||||
pager = soup.find('div', attrs={'class': 'paging-bottom'})
|
|
||||||
if pager:
|
|
||||||
nextbutton = pager.find('li', attrs={'class': 'pager-next'})
|
|
||||||
if nextbutton:
|
|
||||||
nexturl = self.domain + nextbutton.a['href']
|
|
||||||
soup2 = self.index_to_soup(nexturl)
|
|
||||||
texttag = soup2.find('div', attrs={'class': 'main-body'})
|
|
||||||
for it in texttag.findAll('div', attrs={'class': 'region region-content-ad'}):
|
|
||||||
it.extract()
|
|
||||||
for it in texttag.findAll('cite'):
|
|
||||||
it.extract()
|
|
||||||
newpos = len(texttag.contents)
|
|
||||||
self.append_page(soup2, texttag, newpos)
|
|
||||||
texttag.extract()
|
|
||||||
appendtag.insert(position, texttag)
|
|
||||||
pager.extract()
|
|
@ -1,27 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class cro6Recipe(BasicNewsRecipe):
|
|
||||||
__author__ = 'bubak'
|
|
||||||
title = u'Český rozhlas 6'
|
|
||||||
description = 'Český rozhlas 6'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Český rozhlas 6', u'http://www.rozhlas.cz/export/cro6/')
|
|
||||||
]
|
|
||||||
|
|
||||||
language = 'cs'
|
|
||||||
cover_url = 'http://www.rozhlas.cz/img/e5/logo/cro6.png'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
remove_attributes = []
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': ['audio-play-all', 'poradHeaders', 'actions']}),
|
|
||||||
dict(name='p', attrs={'class': ['para-last']})]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'article'})]
|
|
@ -1,34 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
chetnixploitation.blogspot.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Chetnixploitation(BasicNewsRecipe):
|
|
||||||
title = 'Chetnixploitation'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Filmski blog'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'blog'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = True
|
|
||||||
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': 'film, blog, cetnici, srbija, ex-yu', 'publisher': 'Son of Man', 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
feeds = [(u'Posts', u'http://chetnixploitation.blogspot.com/feeds/posts/default')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,48 +0,0 @@
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CGM(BasicNewsRecipe):
|
|
||||||
title = u'CGM'
|
|
||||||
oldest_article = 7
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'Codzienna Gazeta Muzyczna'
|
|
||||||
masthead_url = 'http://www.cgm.pl/img/header/logo.gif'
|
|
||||||
cover_url = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg'
|
|
||||||
category = 'music'
|
|
||||||
language = 'pl'
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;} img {display: block;} ul.galleryImagesList {list-style: none;} li.item {float: left;} .calibrenavbar {clear: both;}' # noqa
|
|
||||||
remove_tags_before = dict(id='mainContent')
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'fbContainer'})
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': ['fbContainer', 'socials']}),
|
|
||||||
dict(name='p', attrs={
|
|
||||||
'class': ['tagCloud', 'galleryAuthor']}),
|
|
||||||
dict(id=['movieShare', 'container']), dict(name='br')]
|
|
||||||
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
|
||||||
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
gallery = soup.find('div', attrs={'class': 'galleryFlash'})
|
|
||||||
if gallery and gallery.div:
|
|
||||||
img = gallery.div
|
|
||||||
gallery.img.extract()
|
|
||||||
if img:
|
|
||||||
img = img['style']
|
|
||||||
img = 'http://www.cgm.pl' + \
|
|
||||||
img[img.find('url(') + 4:img.find(')')]
|
|
||||||
gallery.contents[1].name = 'img'
|
|
||||||
gallery.contents[1]['src'] = img
|
|
||||||
pos = len(gallery.contents)
|
|
||||||
gallery.insert(pos, BeautifulSoup('<br />'))
|
|
||||||
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
ad = soup.findAll('a')
|
|
||||||
for r in ad:
|
|
||||||
if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
|
|
||||||
r.extract()
|
|
||||||
return soup
|
|
@ -1,44 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
chicagobreakingnews.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ChicagoBreakingNews(BasicNewsRecipe):
|
|
||||||
title = 'Chicago Breaking News'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Breaking News from Chicago'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = True
|
|
||||||
publisher = 'Chicago Breaking News'
|
|
||||||
category = 'news, politics, USA, Chicago'
|
|
||||||
encoding = 'utf8'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description, '--category', category, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + \
|
|
||||||
'"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
feeds = [(u'Breaking news', u'http://feeds2.feedburner.com/ChicagoBreakingNews/')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll('a'):
|
|
||||||
if item['href'].find('http://feedads.googleadservices.com') > -1:
|
|
||||||
item.extract()
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll(color=True):
|
|
||||||
del item['color']
|
|
||||||
for item in soup.findAll(size=True):
|
|
||||||
del item['size']
|
|
||||||
return soup
|
|
@ -1,56 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1278162597(BasicNewsRecipe):
|
|
||||||
__author__ = 'rty'
|
|
||||||
title = u'China Economic Net'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
publisher = 'www.ce.cn - China Economic net - Beijing'
|
|
||||||
description = 'China Economic Net Magazine'
|
|
||||||
category = 'Economic News Magazine, Chinese, China'
|
|
||||||
|
|
||||||
recipe_specific_options = {
|
|
||||||
'days': {
|
|
||||||
'short': 'Oldest article to download from this news source. In days ',
|
|
||||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
|
||||||
'default': str(oldest_article)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
|
||||||
d = self.recipe_specific_options.get('days')
|
|
||||||
if d and isinstance(d, str):
|
|
||||||
self.oldest_article = float(d)
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'),
|
|
||||||
(u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'),
|
|
||||||
(u'Health 健康', u'http://www.ce.cn/health/index_6294.xml'),
|
|
||||||
(u'Technology 科技', u'http://sci.ce.cn/mainpage/index_6307.xml'),
|
|
||||||
(u'Domestic Politics 国内时政', u'http://www.ce.cn/xwzx/gnsz/index_6273.xml')
|
|
||||||
]
|
|
||||||
masthead_url = 'http://finance.ce.cn/images/08mdy_logo.gif'
|
|
||||||
extra_css = '''
|
|
||||||
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
|
|
||||||
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
|
|
||||||
h1 {font-family: 'DroidFont', serif;}\n
|
|
||||||
.articledescription {font-family: 'DroidFont', serif;}
|
|
||||||
'''
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
language = 'zh_CN'
|
|
||||||
encoding = 'gb2312'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
|
|
||||||
dict(name='h1', attrs={'id': 'articleTitle'}),
|
|
||||||
dict(name='div', attrs={'class': 'laiyuan'}),
|
|
||||||
dict(name='div', attrs={'id': 'articleText'}),
|
|
||||||
]
|
|
@ -1,41 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
|
||||||
title = u'中時電子報'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
feeds = [(u'焦點要聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-focus'),
|
|
||||||
(u'生活新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-life'),
|
|
||||||
(u'社會新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-society'),
|
|
||||||
(u'兩岸國際', u'http://feeds.feedburner.com/chinatimes/chinatimes-international'),
|
|
||||||
(u'時論廣場', u'http://feeds.feedburner.com/chinatimes/chinatimes-comment'),
|
|
||||||
(u'藝文副刊', u'http://feeds.feedburner.com/chinatimes/chinatimes-philology'),
|
|
||||||
(u'地方新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-local'),
|
|
||||||
(u'財經焦點', u'http://feeds.feedburner.com/chinatimes/chinatimes-finance'),
|
|
||||||
(u'運動天地', u'http://feeds.feedburner.com/chinatimes/chinatimes-sport'),
|
|
||||||
(u'娛樂新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-showbiz'),
|
|
||||||
(u'時尚消費', u'http://feeds.feedburner.com/chinatimes/chinatimes-fashion'),
|
|
||||||
# (u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
|
|
||||||
# (u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
|
|
||||||
]
|
|
||||||
|
|
||||||
__author__ = 'einstuerzende, updated by Eddie Lau'
|
|
||||||
__version__ = '1.1'
|
|
||||||
language = 'zh'
|
|
||||||
publisher = 'China Times Group'
|
|
||||||
description = 'China Times (Taiwan)'
|
|
||||||
category = 'News, Chinese, Taiwan'
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
|
|
||||||
cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
|
|
@ -1,46 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
chip.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ChipRo(BasicNewsRecipe):
|
|
||||||
title = u'Chip Online'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = 'Chip Online'
|
|
||||||
publisher = 'Chip Online'
|
|
||||||
oldest_article = 5
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Reviste,IT'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.chip.ro/images/logo.png'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h2', attrs={'class': 'contentheading clearfix'}), dict(name='span', attrs={
|
|
||||||
'class': 'createby'}), dict(name='div', attrs={'class': 'article-content'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['sharemecompactbutton']}), dict(name='div', attrs={'align': ['left']}), dict(name='div', attrs={
|
|
||||||
'align': ['center']}), dict(name='th', attrs={'class': ['pagenav_prev']}), dict(name='table', attrs={'class': ['pagenav']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://www.chip.ro/index.php?option=com_ninjarsssyndicator&feed_id=9&format=raw')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,29 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
|
||||||
'''
|
|
||||||
Profile to download Chosun.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ChosunDotcom(BasicNewsRecipe):
|
|
||||||
language = 'ko'
|
|
||||||
title = u'조선일보'
|
|
||||||
description = u'조선닷컴 기사'
|
|
||||||
__author__ = 'Hoje Lee'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 10
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'정치', 'http://www.chosun.com/site/data/rss/politics.xml'),
|
|
||||||
(u'조선비즈', 'http://biz.chosun.com/site/data/rss/rss.xml'),
|
|
||||||
(u'사회', 'http://www.chosun.com/site/data/rss/national.xml'),
|
|
||||||
(u'문화', 'http://www.chosun.com/site/data/rss/culture.xml'),
|
|
||||||
(u'국제', 'http://www.chosun.com/site/data/rss/international.xml'),
|
|
||||||
(u'오피니언', 'http://www.chosun.com/site/data/rss/editorials.xml'),
|
|
||||||
(u'스포츠', 'http://www.chosun.com/site/data/rss/sports.xml'),
|
|
||||||
(u'연예', 'http://www.chosun.com/site/data/rss/ent.xml'),
|
|
||||||
]
|
|
@ -1,71 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Luis Hernandez'
|
|
||||||
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
|
|
||||||
__version__ = 'v1.2'
|
|
||||||
__date__ = '31 January 2011'
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.cincodias.com/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Cinco Dias'
|
|
||||||
publisher = u'Grupo Prisa'
|
|
||||||
|
|
||||||
__author__ = 'Luis Hernandez'
|
|
||||||
description = 'spanish web about money and business, free edition'
|
|
||||||
|
|
||||||
cover_url = 'http://www.prisa.com/images/logos/logo_cinco_dias.gif'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
language = 'es'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
encoding = 'ISO-8859-1'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['cab_articulo cab_noticia', 'pos_3', 'txt_noticia', 'mod_despiece']}), dict(
|
|
||||||
name='p', attrs={'class': ['cintillo']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class': ['publi_h']})
|
|
||||||
remove_tags_after = dict(
|
|
||||||
name='div', attrs={'class': ['tab_util util_estadisticas']})
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['util-1', 'util-2', 'util-3', 'inner estirar', 'inner1', 'inner2', 'inner3', 'cont', 'tab_util util_estadisticas', 'tab_util util_enviar', 'mod_list_inf', 'mod_similares', 'mod_divisas', 'mod_sectores', 'mod_termometro', 'mod post', 'mod_img', 'mod_txt', 'nivel estirar', 'barra estirar', 'info_brujula btnBrujula', 'utilidad_brujula estirar']}), dict(name='li', attrs={'class': ['lnk-fcbook', 'lnk-retweet', 'lnk-meneame', 'desplegable', 'comentarios', 'list-options', 'estirar']}), dict(name='ul', attrs={'class': ['lista-izquierda', 'list-options', 'estirar']}), dict(name='p', attrs={'class': ['autor']}) # noqa
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = """
|
|
||||||
p{text-align: justify; font-size: 100%}
|
|
||||||
body{ text-align: left; font-size:100% }
|
|
||||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
|
||||||
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
|
||||||
"""
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Ultima Hora', u'http://www.cincodias.com/rss/feed.html?feedId=17029'),
|
|
||||||
(u'Empresas', u'http://www.cincodias.com/rss/feed.html?feedId=19'),
|
|
||||||
(u'Mercados', u'http://www.cincodias.com/rss/feed.html?feedId=20'),
|
|
||||||
(u'Economia', u'http://www.cincodias.com/rss/feed.html?feedId=21'),
|
|
||||||
(u'Tecnorama', u'http://www.cincodias.com/rss/feed.html?feedId=17230'),
|
|
||||||
(u'Tecnologia', u'http://www.cincodias.com/rss/feed.html?feedId=17106'),
|
|
||||||
(u'Finanzas Personales', u'http://www.cincodias.com/rss/feed.html?feedId=22'),
|
|
||||||
(u'Fiscalidad', u'http://www.cincodias.com/rss/feed.html?feedId=17107'),
|
|
||||||
(u'Vivienda', u'http://www.cincodias.com/rss/feed.html?feedId=17108'),
|
|
||||||
(u'Tendencias', u'http://www.cincodias.com/rss/feed.html?feedId=17109'),
|
|
||||||
(u'Empleo', u'http://www.cincodias.com/rss/feed.html?feedId=17110'),
|
|
||||||
(u'IBEX 35', u'http://www.cincodias.com/rss/feed.html?feedId=17125'),
|
|
||||||
(u'Sectores', u'http://www.cincodias.com/rss/feed.html?feedId=17126'),
|
|
||||||
(u'Opinion', u'http://www.cincodias.com/rss/feed.html?feedId=17105')
|
|
||||||
]
|
|
@ -1,46 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008-2011, Lionel Bergeret <lbergeret at gmail.com>'
|
|
||||||
'''
|
|
||||||
cinebel.be
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Cinebel(BasicNewsRecipe):
|
|
||||||
title = u'Cinebel'
|
|
||||||
__author__ = u'Lionel Bergeret'
|
|
||||||
description = u'Cinema news from Belgium in French'
|
|
||||||
publisher = u'cinebel.be'
|
|
||||||
category = 'news, cinema, movie, Belgium'
|
|
||||||
oldest_article = 15
|
|
||||||
language = 'fr'
|
|
||||||
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
timefmt = ' [%d %b %Y]'
|
|
||||||
filterDuplicates = True
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='span', attrs={'class': 'movieMainTitle'}), dict(name='div', attrs={'id': 'filmPoster'}), dict(
|
|
||||||
name='div', attrs={'id': 'filmDefinition'}), dict(name='div', attrs={'id': 'synopsis'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Les sorties de la semaine', u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=0'),
|
|
||||||
(u'Top 10', u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=2')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for alink in soup.findAll('a', href=True):
|
|
||||||
tstr = "Site officiel: " + alink['href']
|
|
||||||
alink.replaceWith(tstr)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = 'http://www.cinebel.be/portal/resources/common/logo_index.gif'
|
|
||||||
return cover_url
|
|
@ -1,131 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Lorenzo Vigentini'
|
|
||||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
|
||||||
__version__ = 'v1.01'
|
|
||||||
__date__ = '14, January 2010'
|
|
||||||
__description__ = 'CIO is the leading information brand for today s busy chief information officer. '
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.cio.co.uk/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class cio(BasicNewsRecipe):
|
|
||||||
__author__ = 'Lorenzo Vigentini'
|
|
||||||
description = 'CIO is the leading information brand for today\'s busy chief information officer.'
|
|
||||||
cover_url = 'http://media.cio.co.uk/graphics/shared/cio-logo.gif'
|
|
||||||
|
|
||||||
title = 'CIO '
|
|
||||||
publisher = 'IDG Communication'
|
|
||||||
category = 'IT, technology, business, industry'
|
|
||||||
|
|
||||||
language = 'en'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 10
|
|
||||||
use_embedded_content = False
|
|
||||||
recursion = 10
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
temp_files = []
|
|
||||||
articles_are_obfuscated = True
|
|
||||||
|
|
||||||
def get_obfuscated_article(self, url):
|
|
||||||
br = self.get_browser()
|
|
||||||
br.open(url)
|
|
||||||
response = br.follow_link(url_regex='&print&intcmp=ROSATT2$', nr=0)
|
|
||||||
html = response.read()
|
|
||||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
|
||||||
self.temp_files[-1].write(html)
|
|
||||||
self.temp_files[-1].close()
|
|
||||||
return self.temp_files[-1].name
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'mainContent'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'News', u'http://www.cio.co.uk/rss/feeds/cio-news.xml'),
|
|
||||||
(u'Debate', u'http://www.cio.co.uk/rss/feeds/cio-debate.xml'),
|
|
||||||
(u'Analysis', u'http://www.cio.co.uk/rss/feeds/cio-analysis.xml'),
|
|
||||||
(u'Opinion', u'http://www.cio.co.uk/rss/feeds/cio-opinion.xml'),
|
|
||||||
(u'In-Depth', u'http://www.cio.co.uk/rss/feeds/cio-in-depth.xml'),
|
|
||||||
(u'Change management',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-change-management-management.xml'),
|
|
||||||
(u'Regulatory compliance',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-regulatory-compliance-management.xml'),
|
|
||||||
(u'Business strategy',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-business-strategy-management.xml'),
|
|
||||||
(u'Technology', u'http://www.cio.co.uk/rss/feeds/cio-technology-management.xml'),
|
|
||||||
(u'Security', u'http://www.cio.co.uk/rss/feeds/cio-security-management.xml'),
|
|
||||||
(u'Soft skills', u'http://www.cio.co.uk/rss/feeds/cio-soft-skills-management.xml'),
|
|
||||||
(u'The CIO career',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-cio-career-management.xml'),
|
|
||||||
(u'Budgets', u'http://www.cio.co.uk/rss/feeds/cio-budgets-management.xml'),
|
|
||||||
(u'Supplier management',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-supplier-management-management.xml'),
|
|
||||||
(u'Board politics',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-board-politics-management.xml'),
|
|
||||||
(u'Enterprise software',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-enterprise-software-technology.xml'),
|
|
||||||
(u'Mobile and wireless',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-mobile-wireless-technology.xml'),
|
|
||||||
(u'Security', u'http://www.cio.co.uk/rss/feeds/cio-security-technology.xml'),
|
|
||||||
(u'Storage', u'http://www.cio.co.uk/rss/feeds/cio-storage-technology.xml'),
|
|
||||||
(u'Desktop and client',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-desktop-client-technology.xml'),
|
|
||||||
(u'Outsourcing', u'http://www.cio.co.uk/rss/feeds/cio-outsourcing-technology.xml'),
|
|
||||||
(u'Internet and e-commerce',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-internet-technology.xml'),
|
|
||||||
(u'Database management',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-database-management-technology.xml'),
|
|
||||||
(u'Communications and networking ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-communication-networking-technology.xml'),
|
|
||||||
(u'Grid computing',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-grid-computing-cloud-technology.xml'),
|
|
||||||
(u'Enterprise search',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-enterprise-search-technology.xml'),
|
|
||||||
(u'CRM ', u'http://www.cio.co.uk/rss/feeds/cio-crm-technology.xml'),
|
|
||||||
(u'Ade McCormack ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-ade-mccormack.xml'),
|
|
||||||
(u'Andy Hayler ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-andy-hayler.xml'),
|
|
||||||
(u'CEB ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-ceb.xml'),
|
|
||||||
(u'CIO Staff ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-cio-staff.xml'),
|
|
||||||
(u'Dave Pepperell ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-dave-pepperell.xml'),
|
|
||||||
(u'Elliot Limb ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-elliot-limb.xml'),
|
|
||||||
(u'Freeform Dynamics ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-freeform-dynamics.xml'),
|
|
||||||
(u'Giles Nelson ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-giles-nelson.xml'),
|
|
||||||
(u'Mark Chillingworth ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-mark-chillingworth.xml'),
|
|
||||||
(u'Martin Veitch ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-martin-veitch.xml'),
|
|
||||||
(u'Mike Altendorf ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-mike-altendorf.xml'),
|
|
||||||
(u'Richard Steel ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-richard-steel.xml'),
|
|
||||||
(u'Richard Sykes ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-richard-sykes.xml'),
|
|
||||||
(u'Rob Llewellyn ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-opinion-rob-llewellyn.xml'),
|
|
||||||
(u'Free thinking ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-blog-free-thinking.xml'),
|
|
||||||
(u'Leading CIOs ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-blog-leading-cios.xml'),
|
|
||||||
(u'CIO News View ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-blog-cio-news-view.xml'),
|
|
||||||
(u'CIO Blog ', u'http://www.cio.co.uk/rss/feeds/cio-blog-cio-blog.xml'),
|
|
||||||
(u'Transformation CIO ',
|
|
||||||
u'http://www.cio.co.uk/rss/feeds/cio-blog-transformation-cio.xml')
|
|
||||||
]
|
|
@ -1,147 +0,0 @@
|
|||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
# Para convertir el tiempo del articulo
|
|
||||||
import string
|
|
||||||
|
|
||||||
# sys no hace falta... lo intente usar para escribir en stderr
|
|
||||||
from calibre import strftime
|
|
||||||
|
|
||||||
# Los primeros comentarios son las dificultades que he tenido con el Piton
|
|
||||||
# Cuando da error UTF8 revisa los comentarios (acentos). En notepad++ Search, Goto, posicion y lo ves.
|
|
||||||
# Editar con Notepad++ Si pone - donde no debe es que ha indentado mal... Edit - Blank operations - tab to space
|
|
||||||
# He entendido lo que significa el from... son paths dentro de pylib.zip...
|
|
||||||
# Con from importa solo un simbolo...con import,la libreria completa
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
# Para usar expresiones regulares
|
|
||||||
# Visto en pylib.zip... la primera letra es mayuscula
|
|
||||||
# Estas dos ultimas han sido un vago intento de establecer una cookie (no
|
|
||||||
# usado)
|
|
||||||
|
|
||||||
|
|
||||||
class CIO_Magazine(BasicNewsRecipe):
|
|
||||||
title = 'CIO Magazine'
|
|
||||||
oldest_article = 14
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
__author__ = 'Julio Map'
|
|
||||||
description = 'CIO is the leading information brand for today-s busy Chief information Officer - CIO Magazine bi-monthly '
|
|
||||||
language = 'en'
|
|
||||||
encoding = 'utf8'
|
|
||||||
cover_url = 'http://www.cio.com/homepage/images/hp-cio-logo-linkedin.png'
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', attrs={'id': 'container'})
|
|
||||||
# Absolutamente innecesario... al final he visto un print_version (ver mas
|
|
||||||
# adelante)
|
|
||||||
|
|
||||||
# Dentro de una revista dada...
|
|
||||||
# issue_details contiene el titulo y las secciones de este ejemplar
|
|
||||||
# DetailModule esta dentro de issue_details contiene las urls y resumenes
|
|
||||||
# Dentro de un articulo dado...
|
|
||||||
# Article-default-body contiene el texto. Pero como digo, he encontrado
|
|
||||||
# una print_version
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
# A esta funcion le llama el sistema... no hay que llamarla uno mismo (porque seria llamada dos veces)
|
|
||||||
# Existe una version imprimible de los articulos cambiando
|
|
||||||
# http://www.cio.com/article/<num>/<titulo> por
|
|
||||||
# http://www.cio.com/article/print/<num> que contiene todas las paginas
|
|
||||||
# dentro del div id=container
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = 'http://www.cio.com' + url
|
|
||||||
segments = url.split('/')
|
|
||||||
printURL = '/'.join(segments[0:4]) + '/print/' + segments[4] + '#'
|
|
||||||
return printURL
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
#######################################################################
|
|
||||||
# This method should be implemented in recipes that parse a website
|
|
||||||
# instead of feeds to generate a list of articles. Typical uses are for
|
|
||||||
# news sources that have a Print Edition webpage that lists all the
|
|
||||||
# articles in the current print edition. If this function is implemented,
|
|
||||||
# it will be used in preference to BasicNewsRecipe.parse_feeds().
|
|
||||||
#
|
|
||||||
# It must return a list. Each element of the list must be a 2-element
|
|
||||||
# tuple of the form ('feed title', list of articles).
|
|
||||||
#
|
|
||||||
# Each list of articles must contain dictionaries of the form:
|
|
||||||
#
|
|
||||||
# {
|
|
||||||
# 'title' : article title,
|
|
||||||
# 'url' : URL of print version,
|
|
||||||
# 'date' : The publication date of the article as a string,
|
|
||||||
# 'description' : A summary of the article
|
|
||||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# For an example, see the recipe for downloading The Atlantic.
|
|
||||||
# In addition, you can add 'author' for the author of the article.
|
|
||||||
#######################################################################
|
|
||||||
|
|
||||||
# Primero buscamos cual es la ultima revista que se ha creado
|
|
||||||
soupinicial = self.index_to_soup('http://www.cio.com/magazine')
|
|
||||||
# Es el primer enlace que hay en el DIV con class content_body
|
|
||||||
a = soupinicial.find(
|
|
||||||
True, attrs={'class': 'content_body'}).find('a', href=True)
|
|
||||||
INDEX = re.sub(r'\?.*', '', a['href'])
|
|
||||||
# Como cio.com usa enlaces relativos, le anteponemos el domain name.
|
|
||||||
if INDEX.startswith('/'): # protegiendonos de que dejen de usarlos
|
|
||||||
INDEX = 'http://www.cio.com' + INDEX
|
|
||||||
# Y nos aseguramos en los logs que lo estamos haciendo bien
|
|
||||||
print("INDEX en parse_index: ", INDEX)
|
|
||||||
|
|
||||||
# Ya sabemos cual es la revista... procesemosla.
|
|
||||||
soup = self.index_to_soup(INDEX)
|
|
||||||
|
|
||||||
articles = {}
|
|
||||||
key = None
|
|
||||||
feeds = []
|
|
||||||
# Para empezar nos quedamos solo con dos DIV, 'heading' y ' issue_item'
|
|
||||||
# Del primero sacamos las categorias (key) y del segundo las urls y
|
|
||||||
# resumenes
|
|
||||||
for div in soup.findAll(True,
|
|
||||||
attrs={'class': ['heading', 'issue_item']}):
|
|
||||||
|
|
||||||
if ''.join(div['class']) == 'heading':
|
|
||||||
key = string.capwords(self.tag_to_string(div.span))
|
|
||||||
print("Key: ", key) # Esto es para depurar
|
|
||||||
articles[key] = []
|
|
||||||
feeds.append(key)
|
|
||||||
|
|
||||||
elif ''.join(div['class']) == 'issue_item':
|
|
||||||
a = div.find('a', href=True)
|
|
||||||
if not a:
|
|
||||||
continue
|
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
|
||||||
print("url: ", url) # Esto es para depurar
|
|
||||||
# Ya para nota, quitar al final las dos ultimas palabras
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
# No es la fecha de publicacion sino la de colecta
|
|
||||||
pubdate = strftime('%a, %d %b')
|
|
||||||
# Dentro de la div 'issue_item' el unico parrafo que hay es el
|
|
||||||
# resumen
|
|
||||||
summary = div.find('p')
|
|
||||||
# Si hay summary la description sera el summary... si no, la
|
|
||||||
# dejamos en blanco
|
|
||||||
description = ''
|
|
||||||
|
|
||||||
if summary:
|
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
|
||||||
print("Description = ", description)
|
|
||||||
|
|
||||||
# Esto esta copiado del NY times
|
|
||||||
feed = key if key is not None else 'Uncategorized'
|
|
||||||
if feed not in articles:
|
|
||||||
articles[feed] = []
|
|
||||||
if 'podcasts' not in url:
|
|
||||||
articles[feed].append(
|
|
||||||
dict(title=title, url=url, date=pubdate,
|
|
||||||
description=description,
|
|
||||||
content=''))
|
|
||||||
feeds = [(k, articles[k]) for k in feeds if k in articles]
|
|
||||||
return feeds
|
|
@ -1,28 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
City Avisen
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class CityAvisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'City Avisen'
|
|
||||||
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('City Avisen', 'http://minby.dk/city-avisen/feed/'),
|
|
||||||
('Kommentarer til City Avisen', 'http://minby.dk/city-avisen/comments/feed/'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,16 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CJR(BasicNewsRecipe):
|
|
||||||
title = u'Columbia Journalism Review'
|
|
||||||
__author__ = u'Xanthan Gum'
|
|
||||||
description = 'News about journalism.'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
feeds = [(u'News Stories', u'http://www.cjr.org/index.xml')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?page=all&print=true'
|
|
@ -1,28 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ClarionLedger(BasicNewsRecipe):
|
|
||||||
title = u'Clarion Ledger'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'cr4zyd'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Local News', u'http://www.clarionledger.com/apps/pbcs.dll/oversikt?Category=RSS01'),
|
|
||||||
(u'Breaking News', u'http://www.clarionledger.com/apps/pbcs.dll/section?Category=RSS'),
|
|
||||||
|
|
||||||
(u'Sports', u'http://www.clarionledger.com/apps/pbcs.dll/oversikt?Category=RSS02'),
|
|
||||||
(u'Business', u'http://www.clarionledger.com/apps/pbcs.dll/oversikt?Category=RSS03')]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'article-headline'}),
|
|
||||||
dict(name='div', attrs={'class': 'article-bodytext'})]
|
|
||||||
remove_tags = [dict(name=['img', 'script', 'li']),
|
|
||||||
dict(name='p', attrs={'class': 'ratingbyline'}),
|
|
||||||
dict(name='div', attrs={'class': 'article-tools'}),
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': 'article-pagination article-pagination-top'}),
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': 'article-pagination article-pagination-bottom'}),
|
|
||||||
dict(name='div', attrs={'class': 'articleflex-container'})]
|
|
@ -1,53 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ClicRBS(BasicNewsRecipe):
|
|
||||||
title = u'ClicRBS'
|
|
||||||
language = 'pt'
|
|
||||||
__author__ = 'arvoredo'
|
|
||||||
oldest_article = 3
|
|
||||||
max_articles_per_feed = 9
|
|
||||||
cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': ['clic-barra-inner', 'botao-versao-mobile ']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div ', attrs={'class': 'descricao'})
|
|
||||||
remove_tags_before = dict(name='div', attrs={'id': 'glb-corpo'})
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class': 'descricao'})
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class': 'coluna'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'extra'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id': 'links-patrocinados'})
|
|
||||||
remove_tags_after = dict(name='h4', attrs={'class': 'tipo-c comente'})
|
|
||||||
remove_tags_after = dict(name='ul', attrs={'class': 'lista'})
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13'),
|
|
||||||
(u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67'),
|
|
||||||
(u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml'),
|
|
||||||
(u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1'),
|
|
||||||
(u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13'),
|
|
||||||
(u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13'),
|
|
||||||
(u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1'),
|
|
||||||
(u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1'),
|
|
||||||
(u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1'),
|
|
||||||
(u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2'),
|
|
||||||
(u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1'),
|
|
||||||
(u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13'),
|
|
||||||
(u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2'),
|
|
||||||
(u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18'),
|
|
||||||
(u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2'),
|
|
||||||
(u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
cite{color:#007BB5; font-size:xx-small; font-style:italic;}
|
|
||||||
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
|
||||||
h3{font-size:large; color:#082963; font-weight:bold;}
|
|
||||||
#ident{color:#0179B4; font-size:xx-small;}
|
|
||||||
p{color:#000000;font-weight:normal;}
|
|
||||||
.commentario p{color:#007BB5; font-style:italic;}
|
|
||||||
'''
|
|
@ -1,63 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
climateprogress.org
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class ClimateProgress(BasicNewsRecipe):
|
|
||||||
title = 'Climate Progress'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = "An insider's view of climate science, politics and solutions"
|
|
||||||
publisher = 'Climate Progress'
|
|
||||||
category = 'news, ecology, climate, blog'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
lang = 'en'
|
|
||||||
direction = 'ltr'
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description, '--category', category, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + \
|
|
||||||
'"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h2{color:#003366;font-size: large ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
|
|
||||||
h3{color:#003366;font-size: small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
|
|
||||||
h4{color:#003366;font-size: x-small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
|
|
||||||
.date{color:#333333; font-size:xx-small; font-family:Arial,Helvetica,sans-serif; font-style:italic}
|
|
||||||
a{color:#339966;}
|
|
||||||
body{font-family:Georgia,Times New Roman,Times,serif; font-size:x-small;color:#333333;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
feeds = [(u'Posts', u'http://feeds.feedburner.com/climateprogress/lCrX')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
soup.html['dir'] = self.direction
|
|
||||||
mlang = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
|
||||||
mcharset = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
|
||||||
soup.head.insert(0, mlang)
|
|
||||||
soup.head.insert(1, mcharset)
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,33 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.codinghorror.com/blog/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CodingHorror(BasicNewsRecipe):
|
|
||||||
title = 'Coding Horror'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'programming and human factors - Jeff Atwood'
|
|
||||||
category = 'blog, programming'
|
|
||||||
publisher = 'Jeff Atwood'
|
|
||||||
language = 'en'
|
|
||||||
oldest_article = 30
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'authors': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object', 'link']), dict(
|
|
||||||
name='div', attrs={'class': 'feedflare'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror')]
|
|
@ -1,54 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ColumbusDispatchRecipe(BasicNewsRecipe):
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'kwetal'
|
|
||||||
language = 'en'
|
|
||||||
version = 1
|
|
||||||
|
|
||||||
title = u'The Columbus Dispatch'
|
|
||||||
publisher = u'The Columbus Dispatch'
|
|
||||||
category = u'News, Newspaper'
|
|
||||||
description = u'Daily newspaper from central Ohio'
|
|
||||||
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
oldest_article = 1.2
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
# Feeds from http://www.dispatch.com/live/content/rss/index.html
|
|
||||||
feeds = [
|
|
||||||
('Local',
|
|
||||||
'http://www.dispatch.com/content/syndication/news_local-state.xml'),
|
|
||||||
('National',
|
|
||||||
'http://www.dispatch.com/content/syndication/news_national.xml'),
|
|
||||||
('Business',
|
|
||||||
'http://www.dispatch.com/content/syndication/news_business.xml'),
|
|
||||||
('Editorials',
|
|
||||||
'http://www.dispatch.com/content/syndication/opinion_editorials.xml'),
|
|
||||||
('Columnists',
|
|
||||||
'http://www.dispatch.com/content/syndication/opinion_columns.xml'),
|
|
||||||
('Life and Arts',
|
|
||||||
'http://www.dispatch.com/content/syndication/lae_life-and-arts.xml'),
|
|
||||||
('OSU Sports',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_osu.xml'),
|
|
||||||
('Auto Racing',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_auto-racing.xml'),
|
|
||||||
('Outdoors',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_outdoors.xml'),
|
|
||||||
('Bengals',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_bengals.xml'),
|
|
||||||
('Indians',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_indians.xml'),
|
|
||||||
('Clippers',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_clippers.xml'),
|
|
||||||
('Crew',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_crew.xml'),
|
|
||||||
('Reds',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_reds.xml'),
|
|
||||||
('Blue Jackets',
|
|
||||||
'http://www.dispatch.com/content/syndication/sports_bluejackets.xml'),
|
|
||||||
]
|
|
@ -1,27 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Computerworld_pl(BasicNewsRecipe):
|
|
||||||
title = u'Computerworld.pl'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
|
|
||||||
category = 'IT'
|
|
||||||
language = 'pl'
|
|
||||||
masthead_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
|
|
||||||
cover_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
|
|
||||||
no_stylesheets = True
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''),
|
|
||||||
(re.compile(u'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
|
|
||||||
keep_only_tags = [dict(name='article')]
|
|
||||||
remove_tags = [dict(attrs={'class': ['share_tools nocontent', 'rec']}),
|
|
||||||
dict(name='ul',attrs={'class':'tags'}),
|
|
||||||
dict(name='ol'),
|
|
||||||
dict(id=['topComment', 'bottom_tools'])]
|
|
||||||
|
|
||||||
feeds = [(u'Wiadomo\u015bci', u'https://www.computerworld.pl/news?rss')]
|
|
@ -1,78 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
##
|
|
||||||
# Title: Consortium News
|
|
||||||
##
|
|
||||||
# License: GNU General Public License v3 -
|
|
||||||
# http://www.gnu.org/copyleft/gpl.html
|
|
||||||
|
|
||||||
# Feb 2012: Initial release
|
|
||||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
|
||||||
'''
|
|
||||||
consortiumnews.com
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ConsortiumNews(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Consortium News'
|
|
||||||
publisher = 'Copyright © 2012 Consortiumnews. All Rights Reserved.'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'kiavash'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
# Flattens all the tables to make it compatible with Nook
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
|
|
||||||
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
|
||||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
|
|
||||||
|
|
||||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
|
||||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
|
||||||
.introduction, .first { font-weight: bold; } \
|
|
||||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
|
||||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
|
||||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
|
||||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
|
||||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
|
||||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
|
||||||
.story-date, .published { font-size: 80%; } \
|
|
||||||
table { width: 100%; } \
|
|
||||||
td img { display: block; margin: 5px auto; } \
|
|
||||||
ul { padding-top: 10px; } \
|
|
||||||
ol { padding-top: 10px; } \
|
|
||||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
|
||||||
h1 { font-size: 175%; font-weight: bold; } \
|
|
||||||
h2 { font-size: 150%; font-weight: bold; } \
|
|
||||||
h3 { font-size: 125%; font-weight: bold; } \
|
|
||||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
|
||||||
|
|
||||||
# Remove the line breaks and float left/right and picture width/height.
|
|
||||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
|
||||||
(re.compile(r'<br[ ]*clear.*/>',
|
|
||||||
re.IGNORECASE), lambda m: ''),
|
|
||||||
(re.compile(r'float:.*?'), lambda m: ''),
|
|
||||||
(re.compile(r'width:.*?px'), lambda m: ''),
|
|
||||||
(re.compile(r'height:.*?px'), lambda m: ''),
|
|
||||||
(re.compile(r'<a.*?>'), lambda h1: ''),
|
|
||||||
(re.compile(r'</a>'), lambda h2: ''),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Main article is inside this tag
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': lambda x: x and 'post-' in x})]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
# remove 'Share this Article'
|
|
||||||
dict(name='div', attrs={'class': 'sociable'}),
|
|
||||||
dict(name='p', attrs={'class': 'tags'}), # remove 'Tags: ... '
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Consortium News', u'http://feeds.feedburner.com/Consortiumnewscom')]
|
|
@ -1,33 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
contemporaryargentinewriters.wordpress.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class contemporaryargentinewriters(BasicNewsRecipe):
|
|
||||||
title = 'Contemporary Argentine Writers'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Short stories by Argentine writers (and others) translated into English'
|
|
||||||
publisher = 'Dario Bard'
|
|
||||||
category = 'fiction, literature, Argentina, english'
|
|
||||||
oldest_article = 25
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = True
|
|
||||||
language = 'en_AR'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'blog'
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
|
||||||
img{margin-bottom: 0.4em; display:block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [(u'Posts', u'http://contemporaryargentinewriters.wordpress.com/feed/')]
|
|
@ -1,95 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
|
||||||
__version__ = 'v1.02'
|
|
||||||
__date__ = '14, March 2010'
|
|
||||||
__description__ = 'Italian daily newspaper (english version)'
|
|
||||||
# NOTE: the feeds url are broken on the main site as the permalink structure has been changed erroneously ie:
|
|
||||||
# actual link in feed http://www.corriere.it/english/10_marzo_11/legitimate_impediment_approved_de9ba480-2cfd-11df-a00c-00144f02aabe.shtml
|
|
||||||
# this needs to be change to
|
|
||||||
# real feed URL
|
|
||||||
# http://www.corriere.it/International/english/articoli/2010/03/11/legitimate_impediment_approved.shtml
|
|
||||||
'''
|
|
||||||
http://www.corriere.it/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ilCorriereEn(BasicNewsRecipe):
|
|
||||||
author = 'Lorenzo Vigentini, based on Darko Miletic'
|
|
||||||
description = 'Italian daily newspaper (english version)'
|
|
||||||
|
|
||||||
cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
|
|
||||||
title = u'Il Corriere della sera (english) '
|
|
||||||
publisher = 'RCS Digital'
|
|
||||||
category = 'News, politics, culture, economy, general interest'
|
|
||||||
|
|
||||||
language = 'en'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
|
|
||||||
oldest_article = 5
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
recursion = 10
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
recipe_specific_options = {
|
|
||||||
'days': {
|
|
||||||
'short': 'Oldest article to download from this news source. In days ',
|
|
||||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
|
||||||
'default': str(oldest_article)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
|
||||||
d = self.recipe_specific_options.get('days')
|
|
||||||
if d and isinstance(d, str):
|
|
||||||
self.oldest_article = float(d)
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
articleUrl = article.get('link')
|
|
||||||
segments = articleUrl.split('/')
|
|
||||||
basename = '/'.join(segments[:3]) + '/' + \
|
|
||||||
'International/english/articoli/'
|
|
||||||
|
|
||||||
# the date has to be redone with the url structure
|
|
||||||
mlist1 = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno',
|
|
||||||
'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre']
|
|
||||||
mlist2 = ['01', '02', '03', '04', '05',
|
|
||||||
'06', '07', '08', '09', '10', '11', '12']
|
|
||||||
myDate = segments[4].split('_')
|
|
||||||
x = 0
|
|
||||||
for x in range(11):
|
|
||||||
if myDate[1] == mlist1[x]:
|
|
||||||
noMonth = mlist2[x]
|
|
||||||
break
|
|
||||||
|
|
||||||
newDateUrl = '20' + myDate[0] + '/' + noMonth + '/' + myDate[2] + '/'
|
|
||||||
|
|
||||||
# clean the article title
|
|
||||||
articleURLseg = segments[5].split('-')
|
|
||||||
myArticle = (articleURLseg[0])[:-9] + '.shtml'
|
|
||||||
|
|
||||||
myURL = basename + newDateUrl + myArticle
|
|
||||||
# print myURL
|
|
||||||
return myURL
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['news-dettaglio article', 'article']})]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['base', 'object', 'link', 'embed']),
|
|
||||||
dict(name='div', attrs={'class': 'news-goback'}),
|
|
||||||
dict(name='ul', attrs={'class': 'toolbar'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = dict(name='p', attrs={'class': 'footnotes'})
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'News', u'http://www.corriere.it/rss/english.xml')
|
|
||||||
]
|
|
@ -1,55 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'GabrieleMarini, based on Darko Miletic'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
|
|
||||||
__version__ = ' '
|
|
||||||
__date__ = '14-06-2010'
|
|
||||||
__description__ = 'Italian daily newspaper'
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.corrieredellosport.it/
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ilCorrieredelloSport(BasicNewsRecipe):
|
|
||||||
__author__ = 'Gabriele Marini'
|
|
||||||
description = 'Italian daily newspaper'
|
|
||||||
|
|
||||||
cover_url = 'http://edicola.corrieredellosport.it/newsmem/corsport/prima/nazionale_prima.jpg'
|
|
||||||
|
|
||||||
title = u'Il Corriere dello Sport'
|
|
||||||
publisher = 'CORRIERE DELLO SPORT s.r.l. '
|
|
||||||
category = 'Sport'
|
|
||||||
|
|
||||||
language = 'it'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
|
|
||||||
oldest_article = 10
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
recursion = 10
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables'
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \
|
|
||||||
description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h1', attrs={'class': ['tit_Article']}),
|
|
||||||
dict(name='h1', attrs={'class': ['tit_Article_mondiali']}),
|
|
||||||
dict(name='div', attrs={'class': ['box_Img']}),
|
|
||||||
dict(name='p', attrs={'class': ['summary', 'text']})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Primo Piano', u'http://www.corrieredellosport.it/rss/primo_piano.xml'),
|
|
||||||
(u'Calcio', u'http://www.corrieredellosport.it/rss/Calcio-3.xml'),
|
|
||||||
(u'Formula 1', u'http://www.corrieredellosport.it/rss/Formula-1-7.xml'),
|
|
||||||
(u'Moto', u'http://www.corrieredellosport.it/rss/Moto-8.xml'),
|
|
||||||
(u'Piu visti', u'http://www.corrieredellosport.it/rss/piu_visti.xml')
|
|
||||||
]
|
|
@ -1,66 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
|
||||||
'''
|
|
||||||
Muy Interesante
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class General(BasicNewsRecipe):
|
|
||||||
title = 'Cosmopolitan'
|
|
||||||
__author__ = 'Gustavo Azambuja'
|
|
||||||
description = 'Revista Cosmopolitan, Edicion Espanola'
|
|
||||||
language = 'es'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
use_embedded_content = False
|
|
||||||
recursion = 1
|
|
||||||
encoding = 'utf8'
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
|
|
||||||
oldest_article = 180
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(id=['contenido']),
|
|
||||||
dict(name='td', attrs={'class': ['contentheading', 'txt_articulo']})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['breadcrumb', 'bloque1', 'article', 'bajo_title',
|
|
||||||
'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
|
|
||||||
dict(name='div', attrs={'id': 'comment'}),
|
|
||||||
dict(name='table', attrs={'class': 'pagenav'}),
|
|
||||||
dict(name=['object', 'link'])
|
|
||||||
]
|
|
||||||
remove_attributes = ['width', 'height', 'style', 'font', 'color']
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
|
||||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
|
||||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
|
||||||
img {float:left; clear:both; margin:10px}
|
|
||||||
p {font-family:Arial,Helvetica,sans-serif;}
|
|
||||||
'''
|
|
||||||
feeds = [
|
|
||||||
(u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' ] # noqa
|
|
||||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
|
||||||
item.name = 'div'
|
|
||||||
for attrib in attribs:
|
|
||||||
item[attrib] = ''
|
|
||||||
del item[attrib]
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
index = 'http://www.cosmohispano.com/revista'
|
|
||||||
soup = self.index_to_soup(index)
|
|
||||||
link_item = soup.find('img', attrs={'class': 'img_portada'})
|
|
||||||
if link_item:
|
|
||||||
cover_url = "http://www.cosmohispano.com" + link_item['src']
|
|
||||||
return cover_url
|
|
@ -1,36 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
|
|
||||||
title = u'Cosmopolitan.de'
|
|
||||||
__author__ = 'schuster'
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'de'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
cover_url = 'http://www.cosmopolitan.com/cm/shared/site_images/print_this/cosmopolitan_logo.gif'
|
|
||||||
remove_tags_before = dict(name='h1', attrs={'class': 'artikel'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'morePages'})
|
|
||||||
extra_css = '''
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
|
|
||||||
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
|
||||||
'''
|
|
||||||
remove_tags = [dict(id='strong'),
|
|
||||||
dict(title='strong'),
|
|
||||||
dict(name='span'),
|
|
||||||
dict(name='li', attrs={'class': 'large'}),
|
|
||||||
dict(name='ul', attrs={
|
|
||||||
'class': 'articleImagesPortrait clearfix'}),
|
|
||||||
dict(name='p', attrs={'class': 'external'}),
|
|
||||||
dict(name='a', attrs={'target': '_blank'}), ]
|
|
||||||
feeds = [(u'Komplett', u'http://www.cosmopolitan.de/rss/allgemein.xml'),
|
|
||||||
(u'Mode', u'http://www.cosmopolitan.de/rss/mode.xml'),
|
|
||||||
(u'Beauty', u'http://www.cosmopolitan.de/rss/beauty.xml'),
|
|
||||||
(u'Liebe&Sex', u'http://www.cosmopolitan.de/rss/liebe.xml'),
|
|
||||||
(u'Psychologie', u'http://www.cosmopolitan.de/rss/psychologie.xml'),
|
|
||||||
(u'Job&Karriere', u'http://www.cosmopolitan.de/rss/job.xml'),
|
|
||||||
(u'Lifestyle', u'http://www.cosmopolitan.de/rss/lifestyle.xml'),
|
|
||||||
(u'Shopping', u'http://www.cosmopolitan.de/rss/shopping.xml'),
|
|
||||||
(u'Bildergalerien', u'http://www.cosmopolitan.de/rss/bildgalerien.xml')]
|
|
@ -1,41 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1390635673(BasicNewsRecipe):
|
|
||||||
title = u'Cosmopolitan UK'
|
|
||||||
description = 'Womens Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
|
|
||||||
language = 'en_GB'
|
|
||||||
|
|
||||||
__author__ = 'Dave Asbury'
|
|
||||||
# 2/2/14
|
|
||||||
oldest_article = 28
|
|
||||||
max_articles_per_feed = 10
|
|
||||||
compress_news_images = True
|
|
||||||
compress_news_images_max_size = 20
|
|
||||||
auto_cleanup_keep = '//div[@class="articleHeading"]'
|
|
||||||
auto_cleanup = True
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
no_stylesheets = True
|
|
||||||
masthead_url = 'http://www.cosmopolitan.co.uk//cm/cosmopolitanuk/site_images/site_logo.gif'
|
|
||||||
cover_url = 'http://www.natmagnewsletters.co.uk/CIRCULES/CosmoXXLCover.jpg'
|
|
||||||
# kovids code
|
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
|
||||||
for pat, f in [
|
|
||||||
(re.compile(r':: [\w].+</title>',
|
|
||||||
re.DOTALL), lambda m: '</title>'),
|
|
||||||
|
|
||||||
]:
|
|
||||||
raw_html = pat.sub(f, raw_html)
|
|
||||||
return raw_html
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'),
|
|
||||||
(u'Men', u'http://cosmopolitan.co.uk/men/rss/'),
|
|
||||||
(u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'),
|
|
||||||
(u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'),
|
|
||||||
(u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'),
|
|
||||||
(u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'),
|
|
||||||
(u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
|
|
@ -1,66 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
cotidianul.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Cotidianul(BasicNewsRecipe):
|
|
||||||
title = u'Cotidianul'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = u''
|
|
||||||
publisher = u'Cotidianul'
|
|
||||||
oldest_article = 25
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Stiri'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
||||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
|
||||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
|
||||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'titlu'}), dict(name='div', attrs={
|
|
||||||
'class': 'gallery clearfix'}), dict(name='div', attrs={'align': 'justify'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['space']}), dict(
|
|
||||||
name='div', attrs={'id': ['title_desc']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'class': ['space']}), dict(
|
|
||||||
name='span', attrs={'class': ['date']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,11 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Counterpunch(BasicNewsRecipe):
|
|
||||||
title = u'Counterpunch'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
feeds = [(u'Counterpunch', u'http://www.counterpunch.org/category/article/feed/')]
|
|
@ -1,44 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre import browser
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|
||||||
title = u'Countryfile.com'
|
|
||||||
__author__ = 'Dave Asbury'
|
|
||||||
description = 'The official website of Countryfile Magazine'
|
|
||||||
# last updated 24.10.14
|
|
||||||
language = 'en_GB'
|
|
||||||
oldest_article = 30
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
remove_empty_feeds = True
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
compress_news_images = True
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup('http://www.countryfile.com/magazine')
|
|
||||||
cov = soup.find(attrs={'class': re.compile(
|
|
||||||
'imagecache imagecache-250px')}) # 'width' : '160',
|
|
||||||
cov = str(cov)
|
|
||||||
cov = cov[10:]
|
|
||||||
cov = cov[:-135]
|
|
||||||
# print '++++ ',cov,' ++++'
|
|
||||||
br = browser()
|
|
||||||
|
|
||||||
br.set_handle_redirect(False)
|
|
||||||
try:
|
|
||||||
br.open_novisit(cov)
|
|
||||||
cover_url = cov
|
|
||||||
except:
|
|
||||||
cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r' \| Countryfile.com', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
|
||||||
feeds = [
|
|
||||||
(u'Country News', u'http://www.feed43.com/7204505705648666.xml'),
|
|
||||||
(u'Articles', u'http://www.feed43.com/8542080013204443.xml'),
|
|
||||||
]
|
|
@ -1,33 +0,0 @@
|
|||||||
import datetime
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Politics(BasicNewsRecipe):
|
|
||||||
title = u'The Courier-Mail'
|
|
||||||
description = 'Breaking news headlines for Brisbane and Queensland, Australia. The Courier-Mail is owned by News Corp Australia.'
|
|
||||||
language = 'en_AU'
|
|
||||||
__author__ = 'Krittika Goyal, James Cridland'
|
|
||||||
oldest_article = 3 # days
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
d = datetime.datetime.today()
|
|
||||||
cover_url='http://mfeeds.news.com.au/smedia/NCCOURIER/NCCM_1_' + d.strftime('%Y_%m_%d') + '_thumb_big.jpg'
|
|
||||||
masthead_url='https://couriermail.digitaleditions.com.au/images/couriermail-logo.jpg'
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
handle_gzip = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Top Stories', 'http://www.couriermail.com.au/rss'),
|
|
||||||
('Breaking', 'https://www.couriermail.com.au/news/breaking-news/rss'),
|
|
||||||
('Queensland', 'https://www.couriermail.com.au/news/queensland/rss'),
|
|
||||||
('Technology', 'https://www.couriermail.com.au/technology/rss'),
|
|
||||||
('Entertainment', 'https://www.couriermail.com.au/entertainment/rss'),
|
|
||||||
('Finance','https://www.couriermail.com.au/business/rss'),
|
|
||||||
('Sport', 'https://www.couriermail.com.au/sport/rss'),
|
|
||||||
]
|
|
||||||
|
|
||||||
# This isn't perfect, but works rather better than it once did. To do - remove links to subscription content.
|
|
@ -1,28 +0,0 @@
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CourierPress(BasicNewsRecipe):
|
|
||||||
title = u'Courier Press'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'Krittika Goyal'
|
|
||||||
oldest_article = 1 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
|
|
||||||
remove_stylesheets = True
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='iframe'),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Courier Press',
|
|
||||||
'http://www.courierpress.com/rss/headlines/news/'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
story = soup.find(name='div', attrs={'id': 'article_body'})
|
|
||||||
soup = BeautifulSoup(
|
|
||||||
'<html><head><title>t</title></head><body></body></html>')
|
|
||||||
body = soup.find(name='body')
|
|
||||||
body.insert(0, story)
|
|
||||||
return soup
|
|
@ -1,21 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CraigsList(BasicNewsRecipe):
|
|
||||||
title = u'craigslist - Best Of'
|
|
||||||
oldest_article = 365
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
__author__ = 'kiodane'
|
|
||||||
|
|
||||||
feeds = [(u'Best of craigslist',
|
|
||||||
u'http://www.craigslist.org/about/best/all/index.rss'), ]
|
|
@ -1,45 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = 'zotzo'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CreditSlips(BasicNewsRecipe):
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'zotzot'
|
|
||||||
version = 2
|
|
||||||
title = u'Credit Slips.org'
|
|
||||||
publisher = u'Bankr-L'
|
|
||||||
category = u'Economic blog'
|
|
||||||
description = u'A discussion on credit and bankruptcy'
|
|
||||||
cover_url = 'http://bit.ly/eAKNCB'
|
|
||||||
oldest_article = 15
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = True
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description,
|
|
||||||
'tags': category,
|
|
||||||
'language': 'en',
|
|
||||||
'publisher': publisher,
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Credit Slips', u'http://www.creditslips.org/creditslips/atom.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
.author {font-family:Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
||||||
h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
||||||
p {font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
body {font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
|
||||||
h2 = soup.find('h2')
|
|
||||||
h2.replaceWith(h2.prettify() + '<p><em>Posted by ' +
|
|
||||||
article.author + '</em></p>')
|
|
@ -1,79 +0,0 @@
|
|||||||
# -*- mode: python -*-
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2018, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.cronica.com.ar
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Veintitres(BasicNewsRecipe):
|
|
||||||
title = 'Cronica'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Últimas noticias'
|
|
||||||
publisher = 'Grupo Crónica'
|
|
||||||
category = 'politica, noticias generales, Argentina'
|
|
||||||
oldest_article = 15
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = False
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
masthead_url = 'https://www.cronica.com.ar/export/sites/cronica/arte/logos/logoCronica.svg_799932565.svg'
|
|
||||||
language = 'es_AR'
|
|
||||||
remove_javascript = True
|
|
||||||
publication_type = 'magazine'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
auto_cleanup = True
|
|
||||||
auto_cleanup_keep = '//h1'
|
|
||||||
resolve_internal_links = True
|
|
||||||
INDEX = "https://www.cronica.com.ar"
|
|
||||||
extra_css = """
|
|
||||||
img{margin-bottom: 0.8em}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description,
|
|
||||||
'tags': category,
|
|
||||||
'publisher': publisher,
|
|
||||||
'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Policiales', u'https://www.cronica.com.ar/seccion/policiales/'),
|
|
||||||
(u'Politica', u'https://www.cronica.com.ar/seccion/politica/'),
|
|
||||||
(u'General', u'https://www.cronica.com.ar/seccion/info-general/'),
|
|
||||||
(u'Mundo', u'https://www.cronica.com.ar/seccion/mundo/'),
|
|
||||||
(u'Opinion', u'https://www.cronica.com.ar/seccion/opinion/'),
|
|
||||||
(u'Deportes', u'https://www.cronica.com.ar/seccion/deportes/'),
|
|
||||||
(u'Cosa de locos', u'https://www.cronica.com.ar/seccion/cosa-de-locos/'),
|
|
||||||
(u'Espectaculos', u'https://www.diarioshow.com/seccion/espectaculos/'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
totalfeeds = []
|
|
||||||
lfeeds = self.get_feeds()
|
|
||||||
for feedobj in lfeeds:
|
|
||||||
feedtitle, feedurl = feedobj
|
|
||||||
self.report_progress(
|
|
||||||
0,
|
|
||||||
_('Fetching feed') + ' %s...' %
|
|
||||||
(feedtitle if feedtitle else feedurl)
|
|
||||||
)
|
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup(feedurl)
|
|
||||||
for item in soup.findAll('a', attrs={'class': 'cover-link'}):
|
|
||||||
url = self.INDEX + item['href']
|
|
||||||
if feedtitle == 'Espectaculos':
|
|
||||||
url = 'https://www.diarioshow.com' + item['href']
|
|
||||||
title = item['title']
|
|
||||||
articles.append({
|
|
||||||
'title': title,
|
|
||||||
'date': '',
|
|
||||||
'url': url,
|
|
||||||
'description': ''
|
|
||||||
})
|
|
||||||
totalfeeds.append((feedtitle, articles))
|
|
||||||
return totalfeeds
|
|
@ -1,50 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
curierulnational.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CurierulNal(BasicNewsRecipe):
|
|
||||||
title = u'Curierul Na\u0163ional'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = ''
|
|
||||||
publisher = 'Curierul Na\u0163ional'
|
|
||||||
oldest_article = 5
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Stiri'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.curierulnational.ro/logo.gif'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'col1'}), dict(
|
|
||||||
name='img', attrs={'id': 'placeholder'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='p', attrs={'id': ['alteArticole']}), dict(name='div', attrs={'id': ['textSize']}), dict(
|
|
||||||
name='ul', attrs={'class': ['unit-rating']}), dict(name='div', attrs={'id': ['comments']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='ul', attrs={'class': 'unit-rating'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://www.curierulnational.ro/feed.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,56 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Cyberpresse(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Cyberpresse'
|
|
||||||
__author__ = 'balok and Sujata Raman'
|
|
||||||
description = 'Canadian news in French'
|
|
||||||
language = 'fr'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
html2lrf_options = ['--left-margin=0', '--right-margin=0',
|
|
||||||
'--top-margin=0', '--bottom-margin=0']
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'article-page'}),
|
|
||||||
dict(name='div', attrs={'id': 'articlePage'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
.photodata{font-family:Arial,Helvetica,Verdana,sans-serif;color: #999999; font-size: 90%; }
|
|
||||||
h1{font-family:Georgia,Times,serif ; font-size: large; }
|
|
||||||
.amorce{font-family:Arial,Helvetica,Verdana,sans-serif; font-weight:bold;}
|
|
||||||
.article-page{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
|
|
||||||
#articlePage{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
|
|
||||||
.auteur{font-family:Georgia,Times,sans-serif; font-size: 90%; color:#006699 ;}
|
|
||||||
.bodyText{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
|
|
||||||
.byLine{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%;}
|
|
||||||
.entry{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
|
|
||||||
.minithumb-auteurs{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%; }
|
|
||||||
a{color:#003399; font-weight:bold; }
|
|
||||||
'''
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={
|
|
||||||
'class': ['centerbar', 'colspan', 'share-module']}),
|
|
||||||
dict(name='p', attrs={'class': ['zoom']}),
|
|
||||||
dict(name='ul', attrs={'class': ['stories']}),
|
|
||||||
dict(name='h4', attrs={'class': ['general-cat']}),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Manchettes', u'http://www.cyberpresse.ca/rss/225.xml'),
|
|
||||||
(u'Capitale nationale', u'http://www.cyberpresse.ca/rss/501.xml'),
|
|
||||||
(u'Opinions', u'http://www.cyberpresse.ca/rss/977.xml'),
|
|
||||||
(u'Insolite', u'http://www.cyberpresse.ca/rss/279.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
|
||||||
|
|
||||||
for tag in soup.findAll(name=['i', 'strong']):
|
|
||||||
tag.name = 'div'
|
|
||||||
|
|
||||||
return soup
|
|
@ -1,118 +0,0 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class CyNewsLiveRecipe(BasicNewsRecipe):
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'kwetal'
|
|
||||||
language = 'en_CY'
|
|
||||||
version = 1
|
|
||||||
|
|
||||||
title = u'Cyprus News Live'
|
|
||||||
publisher = u'The Cyprus Weekly'
|
|
||||||
category = u'News, Newspaper'
|
|
||||||
description = u'News from Cyprus'
|
|
||||||
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
pubTime = None
|
|
||||||
minTime = None
|
|
||||||
articleCount = 0
|
|
||||||
|
|
||||||
INDEX = 'http://www.cynewslive.com'
|
|
||||||
|
|
||||||
feeds = []
|
|
||||||
feeds.append(
|
|
||||||
('News: Cyprus', 'http://www.cynewslive.com/main/92,0,0,0-CYPRUS.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('News: World', 'http://www.cynewslive.com/main/78,0,0,0-UKWORLD.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Football', 'http://www.cynewslive.com/main/82,0,0,0-FOOTBALL.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Rugby', 'http://www.cynewslive.com/main/83,0,0,0-RUGBY.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Cricket', 'http://www.cynewslive.com/main/85,0,0,0-CRICKET.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Tennis', 'http://www.cynewslive.com/main/84,0,0,0-TENNIS.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Sport: Other', 'http://www.cynewslive.com/main/86,0,0,0-OTHER.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Business: Local', 'http://www.cynewslive.com/main/100,0,0,0-LOCAL.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Business: Foreign', 'http://www.cynewslive.com/main/101,0,0,0-FOREIGN.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Environment', 'http://www.cynewslive.com/main/93,0,0,0-ENVIRONMENT.aspx'))
|
|
||||||
feeds.append(
|
|
||||||
('Culture', 'http://www.cynewslive.com/main/208,0,0,0-CULTURE.aspx'))
|
|
||||||
|
|
||||||
keep_only_tags = []
|
|
||||||
keep_only_tags.append(
|
|
||||||
dict(name='div', attrs={'class': 'ArticleCategories'}))
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
answer = []
|
|
||||||
for feed in self.feeds:
|
|
||||||
self.articleCount = 0
|
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup(feed[1])
|
|
||||||
|
|
||||||
table = soup.find('table', attrs={'id': 'ctl00_cp_ctl01_listp'})
|
|
||||||
if table:
|
|
||||||
self.pubTime = datetime.now()
|
|
||||||
self.minTime = self.pubTime - \
|
|
||||||
timedelta(days=self.oldest_article)
|
|
||||||
|
|
||||||
self.find_articles(table, articles)
|
|
||||||
|
|
||||||
answer.append((feed[0], articles))
|
|
||||||
|
|
||||||
return answer
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
|
||||||
for el in soup.findAll(attrs={'style': True}):
|
|
||||||
del el['style']
|
|
||||||
|
|
||||||
for el in soup.findAll('font'):
|
|
||||||
el.name = 'div'
|
|
||||||
for attr, value in el:
|
|
||||||
del el[attr]
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def find_articles(self, table, articles):
|
|
||||||
for div in table.findAll('div', attrs={'class': 'ListArticle'}):
|
|
||||||
el = div.find('div', attrs={'class': 'ListArticle_T'})
|
|
||||||
title = self.tag_to_string(el.a)
|
|
||||||
url = self.INDEX + el.a['href']
|
|
||||||
|
|
||||||
description = self.tag_to_string(
|
|
||||||
div.find('div', attrs={'class': 'ListArticle_BODY300'}))
|
|
||||||
|
|
||||||
el = div.find('div', attrs={'class': 'ListArticle_D'})
|
|
||||||
if el:
|
|
||||||
dateParts = self.tag_to_string(el).split(' ')
|
|
||||||
monthNames = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
|
|
||||||
'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11,
|
|
||||||
'December': 12}
|
|
||||||
timeParts = dateParts[3].split(':')
|
|
||||||
self.pubTime = datetime(year=int(dateParts[2]), month=int(monthNames[dateParts[1]]),
|
|
||||||
day=int(dateParts[0]), hour=int(timeParts[0]),
|
|
||||||
minute=int(timeParts[1]))
|
|
||||||
|
|
||||||
if self.pubTime >= self.minTime and self.articleCount <= self.max_articles_per_feed:
|
|
||||||
articles.append(
|
|
||||||
{'title': title, 'date': self.pubTime, 'url': url, 'description': description})
|
|
||||||
self.articleCount += 1
|
|
||||||
else:
|
|
||||||
return
|
|
Before Width: | Height: | Size: 557 B |
Before Width: | Height: | Size: 297 B |
Before Width: | Height: | Size: 1.0 KiB |
Before Width: | Height: | Size: 2.7 KiB |
Before Width: | Height: | Size: 638 B |
Before Width: | Height: | Size: 445 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 550 B |
Before Width: | Height: | Size: 1.0 KiB |
Before Width: | Height: | Size: 198 B |
Before Width: | Height: | Size: 206 B |
Before Width: | Height: | Size: 470 B |
Before Width: | Height: | Size: 979 B |
Before Width: | Height: | Size: 644 B |
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 974 B |
Before Width: | Height: | Size: 603 B |
Before Width: | Height: | Size: 389 B |
Before Width: | Height: | Size: 394 B |
Before Width: | Height: | Size: 237 B |
Before Width: | Height: | Size: 2.0 KiB |
Before Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 494 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 195 B |