Merge branch 'master' of https://github.com/unkn0w7n/calibre
@ -1,40 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class f1ultra(BasicNewsRecipe):
|
||||
title = u'Formuła 1 - F1 ultra'
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
|
||||
masthead_url = 'http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [(dict(name='div', attrs={'id': 'main'}))]
|
||||
remove_tags_after = [
|
||||
dict(attrs={'style': 'margin-top:5px;margin-bottom:5px;display: inline;'})]
|
||||
remove_tags = [
|
||||
(dict(attrs={'class': ['buttonheading', 'avPlayerContainer', 'createdate']}))]
|
||||
remove_tags.append(dict(attrs={'title': ['PDF', 'Drukuj', 'Email']}))
|
||||
remove_tags.append(dict(name='form', attrs={'method': 'post'}))
|
||||
remove_tags.append(dict(name='hr', attrs={'size': '2'}))
|
||||
|
||||
preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
|
||||
(re.compile(r'align="right"'), lambda match: ''),
|
||||
(re.compile(r'width=\"*\"'), lambda match: ''),
|
||||
(re.compile(r'\<table .*?\>'), lambda match: '')]
|
||||
|
||||
extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
|
||||
img { display: block; clear: both;}
|
||||
'''
|
||||
remove_attributes = ['width', 'height', 'position', 'float',
|
||||
'padding-left', 'padding-right', 'padding', 'text-align']
|
||||
|
||||
feeds = [
|
||||
(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
|
@ -1,23 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1301860159(BasicNewsRecipe):
|
||||
title = u'F-Secure Weblog'
|
||||
language = 'en'
|
||||
__author__ = 'louhike'
|
||||
description = u'All the news from the weblog of F-Secure'
|
||||
publisher = u'F-Secure'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
encoding = 'ISO-8859-1'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'modSectionTd2'})]
|
||||
remove_tags = [dict(name='hr')]
|
||||
|
||||
feeds = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')]
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://www.f-secure.com/weblog/archives/images/company_logo.png'
|
@ -1,26 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Favrskov Avisen
|
||||
'''
|
||||
|
||||
|
||||
class FavrskovAvisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Favrskov Avisen'
|
||||
description = 'Lokale og regionale nyheder'
|
||||
category = 'newspaper, news, localnews, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Nyheder', 'http://dinby.dk/favrskov-avisen/rss'),
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Favrskovposten
|
||||
'''
|
||||
|
||||
|
||||
class FavrskovLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Favrskovposten'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Favrskov og omegn på favrskov.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,134 +0,0 @@
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
|
||||
|
||||
|
||||
class FazNet(BasicNewsRecipe):
|
||||
# Version 9.1
|
||||
# Update 2022-05-29
|
||||
# Armin Geller
|
||||
# new page layout
|
||||
|
||||
title = 'FAZ.NET'
|
||||
__author__ = 'Kovid Goyal, Darko Miletic, Armin Geller'
|
||||
description = 'Frankfurter Allgemeine Zeitung'
|
||||
publisher = 'Frankfurter Allgemeine Zeitung GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
|
||||
encoding = 'utf-8'
|
||||
language = 'de'
|
||||
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
extra_css = '''
|
||||
.atc-headlineemphasis, h1, h2 {font-size:1.6em; text-align:left}
|
||||
.atc-HeadlineEmphasisText {font-size:0.6em; text-align:left; display:block; text-transform:uppercase;}
|
||||
.atc-IntroText {font-size:1em; font-style:italic; font-weight:bold;margin-bottom:1em}
|
||||
h3 {font-size:1.3em;text-align:left}
|
||||
h4, h5, h6 {font-size:1em;text-align:left}
|
||||
.textbox-wide {font-size:1.3em; font-style:italic}
|
||||
.atc-ImageDescriptionText, .atc-ImageDescriptionCopyright {font-size: 0.75em; font-style:italic; font-weight:normal}
|
||||
.atc-MetaItem {
|
||||
font-size:0.6em; font-weight:normal; margin-bottom:0.75em; text-align:left;
|
||||
list-style-type:none; text-transform:uppercase; display:inline-block}
|
||||
.aut-Teaser_Avatar {font-size:0.6em; font-weight:bold; margin-bottom:0.75em; text-align:left}
|
||||
.aut-Teaser_Name {font-size:0.6em; font-weight:bold; margin-bottom:0.75em; float:left; text-align:left}
|
||||
.aut-Teaser_Description {font-size:0.6em; font-weight: normal; margin-bottom:0.75em; text-align:left; display:block}
|
||||
.atc-Footer{font-size:0.6em; font-weight: normal; margin-bottom:0.75em; display:block}
|
||||
'''
|
||||
|
||||
keep_only_tags = [dict(name='article', attrs={'class':'atc'}),
|
||||
dict(name='div', attrs={'id':'FAZContent'})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='article', attrs={'class':'atc'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':[
|
||||
'atc-ContainerSocialMedia',
|
||||
'atc-ContainerFunctions_Interaction ',
|
||||
'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium',
|
||||
'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium ctn-PlaceholderContent-has-centered-content',
|
||||
'ctn-PlaceholderBox ctn-PlaceholderBox-is-in-article-text-right',
|
||||
'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-text-left ctn-PlaceholderContent-is-in-article-small',
|
||||
'aut-Follow aut-Follow-is-small-teaser',
|
||||
'aut-Follow aut-Follow-is-teaser',
|
||||
'js-ctn-PaywallTeasers ctn-PaywallTeasers',
|
||||
'ctn-PaywallInfo_TeaserImageContainer',
|
||||
'ctn-PaywallInfo_OfferContainer'
|
||||
]}),
|
||||
dict(name='aside', attrs={'class':['atc-ContainerMore',
|
||||
'atc-ContainerMoreOneTeaser'
|
||||
]}),
|
||||
dict(name='span', attrs={'class':['data-button',
|
||||
'o-VisuallyHidden'
|
||||
]}),
|
||||
dict(name='a', attrs={'class':'btn-Base_Link'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('FAZ.NET Aktuell', 'http://www.faz.net/aktuell/?rssview=1'),
|
||||
('Politik', 'http://www.faz.net/aktuell/politik/?rssview=1'),
|
||||
('Wirtschaft', 'http://www.faz.net/aktuell/wirtschaft/?rssview=1'),
|
||||
('Feuilleton', 'http://www.faz.net/aktuell/feuilleton/?rssview=1'),
|
||||
('Sport', 'http://www.faz.net/aktuell/sport/?rssview=1'),
|
||||
('Lebensstil', 'http://www.faz.net/aktuell/lebensstil/?rssview=1'),
|
||||
('Gesellschaft', 'http://www.faz.net/aktuell/gesellschaft/?rssview=1'),
|
||||
('Finanzen', 'http://www.faz.net/aktuell/finanzen/?rssview=1'),
|
||||
('Technik & Motor', 'http://www.faz.net/aktuell/technik-motor/?rssview=1'),
|
||||
('Wissen', 'http://www.faz.net/aktuell/wissen/?rssview=1'),
|
||||
('Reise', 'http://www.faz.net/aktuell/reise/?rssview=1'),
|
||||
('Beruf & Chance', 'http://www.faz.net/aktuell/beruf-chance/?rssview=1'),
|
||||
('Rhein-Main', 'http://www.faz.net/aktuell/rhein-main/?rssview=1')
|
||||
]
|
||||
|
||||
# For multipages:
|
||||
|
||||
INDEX = ''
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('li',attrs={'class':'nvg-Paginator_Item nvg-Paginator_Item-to-next-page'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager.a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('article', attrs={'class':'atc'})
|
||||
for cls in (
|
||||
'atc-Header',
|
||||
'atc-ContainerMore',
|
||||
'atc-ContainerFunctions_Interaction',
|
||||
'aut-Follow aut-Follow-is-small-teaser',
|
||||
'aut-Follow aut-Follow-is-teaser'
|
||||
):
|
||||
div = texttag.find(attrs={'class':cls})
|
||||
if div is not None:
|
||||
div.extract()
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
pager.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
# Find images
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
for img in soup.findAll('img', attrs={'data-retina-src':True}):
|
||||
img['src'] = img['data-retina-src']
|
||||
for img in soup.findAll('img', attrs={'data-src':True}):
|
||||
img['src'] = img['data-src']
|
||||
return self.adeify_images(soup)
|
||||
|
||||
# Some last cleanup
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for div in soup.findAll('div',attrs={'class':['atc-ContainerFunctions js-som-Abbinder',
|
||||
'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium'
|
||||
]}):
|
||||
div.extract()
|
||||
return soup
|
@ -1,21 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1347706704(BasicNewsRecipe):
|
||||
title = u'FC Knudde'
|
||||
__author__ = u'DrMerry'
|
||||
description = u'FC Knudde de populaire sport strip van Toon van Driel (http://www.toonvandriel.nl)'
|
||||
language = u'nl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
cover_url = 'http://a1.mzstatic.com/us/r1000/035/Purple/be/33/70/mzl.qkvshinq.320x480-75.jpg'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
remove_tags_before = dict(id='title')
|
||||
remove_tags_after = dict(attrs={'class': 'entry-content rich-content'})
|
||||
use_embedded_content = True
|
||||
extra_css = 'img{border:0;padding:0;margin:0;width:100%}'
|
||||
|
||||
feeds = [(u'FC Knudde', u'http://www.nusport.nl/feeds/rss/fc-knudde.rss')]
|
@ -1,47 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class FDBPl(BasicNewsRecipe):
|
||||
title = u'Fdb.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Wiadomości ze świata filmu, baza danych filmowych, recenzje, zwiastuny, boxoffice.'
|
||||
category = 'film'
|
||||
language = 'pl'
|
||||
extra_css = '.options-left > li {display: inline;} em {display: block;}'
|
||||
cover_url = 'https://i1.fdbimg.pl/hygg2xp1/480x300_magq39.jpg'
|
||||
use_embedded_content = False
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
remove_attributes = ['style', 'font']
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class': ['row justify-content-center', 'figure']})]
|
||||
remove_tags = [
|
||||
dict(attrs={'class': ['news-footer infinite-scroll-breakepoit', 'list-inline text-muted m-0']})]
|
||||
feeds = []
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds.append((u'Wiadomości', self.get_articles(
|
||||
'https://fdb.pl/wiadomosci?page={0}', 2)))
|
||||
return feeds
|
||||
|
||||
def get_articles(self, url, pages=1):
|
||||
articles = []
|
||||
for nr in range(1, pages + 1):
|
||||
soup = self.index_to_soup(url.format(nr))
|
||||
for tag in soup.findAll(attrs={'class': 'col-xs-6 col-sm-4 col-md-4 col-lg-3'}):
|
||||
node = tag.find('h5')
|
||||
title = node.a.string
|
||||
url = node.a['href']
|
||||
date = ''
|
||||
articles.append({'title': title,
|
||||
'url': url,
|
||||
'date': date,
|
||||
'description': ''
|
||||
})
|
||||
return articles
|
@ -1,75 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
financialexpress.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class FE_India(BasicNewsRecipe):
|
||||
title = 'The Financial Express'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Financial news from India'
|
||||
publisher = 'The Indian Express Limited'
|
||||
category = 'news, politics, finances, India'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'en_IN'
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'url'}
|
||||
publication_type = 'magazine'
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
keep_only_tags = [classes('wp-block-post-title wp-block-post-excerpt ie-network-post-meta-wrapper wp-block-post-featured-image wp-block-post-content')]
|
||||
remove_tags = [classes('parent_also_read')]
|
||||
remove_attributes = ['width', 'height']
|
||||
|
||||
feeds = [
|
||||
# https://www.financialexpress.com/syndication/
|
||||
# Print feeds
|
||||
('Front Page','https://www.financialexpress.com/print/front-page/feed/'),
|
||||
('Corporate Markets','https://www.financialexpress.com/print/corporate-markets/feed/'),
|
||||
('Economy','https://www.financialexpress.com/print/economy-print/feed/'),
|
||||
('Opinion','https://www.financialexpress.com/print/edits-columns/feed/'),
|
||||
('personal Finance','https://www.financialexpress.com/print/personal-finance-print/feed/'),
|
||||
# ('Brandwagon', 'https://www.financialexpress.com/print/brandwagon/feed/'),
|
||||
# Other Feeds
|
||||
('Economy', 'https://www.financialexpress.com/economy/feed/'),
|
||||
('Banking & finance', 'https://www.financialexpress.com/industry/banking-finance/feed/'),
|
||||
('Opinion', 'https://www.financialexpress.com/opinion/feed/'),
|
||||
('Editorial', 'https://www.financialexpress.com/editorial/feed/'),
|
||||
('Budget', 'https://www.financialexpress.com/budget/feed/'),
|
||||
('Industry', 'https://www.financialexpress.com/industry/feed/'),
|
||||
('Market', 'https://www.financialexpress.com/market/feed/'),
|
||||
('Jobs', 'https://www.financialexpress.com/jobs/feed/'),
|
||||
('SME', 'https://www.financialexpress.com/industry/sme/feed/'),
|
||||
('Mutual Funds', 'https://www.financialexpress.com/money/mutual-funds/feed/'),
|
||||
('Health','https://www.financialexpress.com/lifestyle/health/feed'),
|
||||
# ('Health Care','https://www.financialexpress.com/healthcare/feed'),
|
||||
('Science','https://www.financialexpress.com/lifestyle/science/feed'),
|
||||
('Infrastructure','https://www.financialexpress.com/infrastructure/feed'),
|
||||
('Money','https://www.financialexpress.com/money/feed'),
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.magzter.com/IN/The-Indian-Express-Ltd./Financial-Express-Mumbai/Business/')
|
||||
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
|
||||
return citem['content']
|
||||
|
||||
def preprocess_html(self, soup, *a):
|
||||
for img in soup.findAll(attrs={'data-src': True}):
|
||||
img['src'] = img['data-src']
|
||||
return soup
|
@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
revistafelicia.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Felicia(BasicNewsRecipe):
|
||||
title = u'Revista Felicia'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'O revist\u0103 pentru sufletul t\u0103u'
|
||||
publisher = u'Revista Felicia'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'header'}), dict(
|
||||
name='div', attrs={'id': 'contentArticol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='img', attrs={'src': ['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']}), dict(
|
||||
name='div', attrs={'class': ['content']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.revistafelicia.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,45 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
title = u'FHM UK'
|
||||
description = 'Good News for Men.'
|
||||
cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
|
||||
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
||||
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 7/10/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 31
|
||||
max_articles_per_feed = 15
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='img', attrs={'id': 'ctl00_Body_imgMainImage'}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['profileLeft', 'articleLeft', 'profileRight', 'profileBody']}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['imagesCenterArticle', 'containerCenterArticle', 'articleBody', ]}),
|
||||
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'id': ['ctl00_Body_divSlideShow']}),
|
||||
|
||||
]
|
||||
feeds = [
|
||||
# repeatable search = </div>{|}<a href="{%}"
|
||||
# class="{*}">{%}</a>{|}<p>{*}</p>
|
||||
(u'Homepage', u'http://rss.feedsportal.com/c/375/f/434908/index.rss'),
|
||||
(u'Funny', u'http://rss.feedsportal.com/c/375/f/434910/index.rss'),
|
||||
(u'Girls', u'http://rss.feedsportal.com/c/375/f/434913/index.rss'),
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
fhm.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class FHMro(BasicNewsRecipe):
|
||||
title = u'FHM Ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Pentru c\u0103 noi putem'
|
||||
publisher = 'FHM'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'contentMainTitle'}), dict(
|
||||
name='div', attrs={'class': 'entry'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class': ['ratingblock ']}), dict(
|
||||
name='a', attrs={'rel': ['tag']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['ratingblock ']}), dict(
|
||||
name='div', attrs={'class': ['socialize-containter']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.fhm.ro/feed')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,30 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
|
||||
|
||||
class fiftytwo(BasicNewsRecipe):
|
||||
title = u'Fifty Two'
|
||||
description = ('Every week, 52 publishes an essay that dives deep into an aspect of India’s history,'
|
||||
' politics and culture. Each story will explain, recall or establish something interesting '
|
||||
'about life on our subcontinent, and tell readers why it matters to them.')
|
||||
language = 'en_IN'
|
||||
__author__ = 'unkn0wn'
|
||||
oldest_article = 30 # days
|
||||
max_articles_per_feed = 50
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_attributes = ['style', 'height', 'width']
|
||||
masthead_url = 'https://fiftytwo.in//img/52-logo.png'
|
||||
# https://fiftytwo.in/img/favicon.png
|
||||
ignore_duplicate_articles = {'url'}
|
||||
extra_css = '.story-info, .story-notes, .story-intro {font-size:small; font-style:italic;}'
|
||||
|
||||
keep_only_tags = [
|
||||
classes(
|
||||
'story-banner__container story-info story-slices story-notes'
|
||||
),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Articles', 'https://fiftytwo.in/feed.xml'),
|
||||
]
|
@ -1,27 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1324112023(BasicNewsRecipe):
|
||||
title = u'Fisco Oggi'
|
||||
language = 'it'
|
||||
__author__ = 'faber1971'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [
|
||||
(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'),
|
||||
(u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'),
|
||||
(u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'),
|
||||
(u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'),
|
||||
|
||||
(u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'),
|
||||
(u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'),
|
||||
(u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'),
|
||||
(u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]
|
@ -1,39 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, NA'
|
||||
'''
|
||||
fleshbot.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Fleshbot(BasicNewsRecipe):
|
||||
title = 'Fleshbot'
|
||||
__author__ = 'NA'
|
||||
description = "Fleshbot, Pure Filth."
|
||||
publisher = 'Fleshbot.com'
|
||||
category = 'news, sex, sex industry, celebs, nudes, adult, adult toys, sex toys'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
language = 'en'
|
||||
masthead_url = 'http://fbassets.s3.amazonaws.com/images/uploads/2012/01/fleshbot-logo.png'
|
||||
extra_css = '''
|
||||
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
|
||||
img{margin-bottom: 1em}
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
|
||||
'''
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
feeds = [(u'Articles', u'http://fleshbot.com/?feed=rss2')]
|
||||
|
||||
remove_tags = [
|
||||
{'class': 'feedflare'},
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Folkebladet
|
||||
'''
|
||||
|
||||
|
||||
class Folkebladet_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Folkebladet'
|
||||
description = 'Dine lokale nyheder på nettet'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Folkebladet', 'http://folkebladet.dk/feed/'),
|
||||
('Kommentarer', 'http://folkebladet.dk/comments/feed/'),
|
||||
]
|
||||
|
@ -1,26 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Folkebladet Djursland
|
||||
'''
|
||||
|
||||
|
||||
class FolkebladetDjursland_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Folkebladet Djursland'
|
||||
description = 'Lokale og regionale nyheder'
|
||||
category = 'newspaper, news, localnews, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Nyheder', 'http://dinby.dk/folkebladet-djursland/rss'),
|
||||
]
|
||||
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
folketidende.dk
|
||||
'''
|
||||
|
||||
|
||||
class Folketidende_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'folketidende.dk'
|
||||
description = 'Lokalt nyhedssite, med nyheder og lokalstof om Lolland og Falster'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('folketidende.dk - Lolland & Falster samlet på et sted', 'http://folketidende.dk/rss-nyhedsbrev.xml'),
|
||||
|
||||
]
|
||||
|
@ -1,67 +0,0 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class Forbes(BasicNewsRecipe):
|
||||
title = u'Forbes'
|
||||
description = 'Business and Financial News'
|
||||
__author__ = 'Kovid Goyal'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 20
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_empty_feeds = True
|
||||
|
||||
extra_css = '''
|
||||
div.fb-captioned-img {
|
||||
font-size: smaller;
|
||||
margin-top: 1em; margin-bottom: 1em;
|
||||
}
|
||||
div.fb-captioned-img img {
|
||||
display:block;
|
||||
margin-left: auto; margin-right: auto;
|
||||
}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Latest', u'https://www.forbes.com/news/index.xml'),
|
||||
(u'Most Popular', u'https://www.forbes.com/feeds/popstories.xml'),
|
||||
(u'Technology', u'https://www.forbes.com/technology/index.xml'),
|
||||
(u'Business', u'https://www.forbes.com/business/index.xml'),
|
||||
(u'Sports Money', u'https://www.forbes.com/sportsmoney/index.xml'),
|
||||
(u'Leadership', u'https://www.forbes.com/leadership/index.xml'),
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
classes('article-headline-container hero-image-block article-body bottom-contrib-block')
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
classes('article-sharing'),
|
||||
dict(name='button'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
h = soup.find(**classes('hero-image-block'))
|
||||
if h is not None:
|
||||
h1 = soup.find(**classes('article-headline-container'))
|
||||
h.extract()
|
||||
h1.append(h)
|
||||
return soup
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.set_cookie('dailyWelcomeCookie', 'true', '.forbes.com')
|
||||
br.set_cookie('welcomeAd', 'true', '.forbes.com')
|
||||
return br
|
||||
|
||||
# def parse_index(self):
|
||||
# return [('Articles', [{'title':'Test', 'url':
|
||||
# 'http://www.forbes.com/sites/hamdiraini/2016/04/25/bazin-seeks-startups-to-accelerate-accorhotels-transformation/'}])]
|
@ -1,55 +0,0 @@
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1276934715(BasicNewsRecipe):
|
||||
title = u'Forbes India'
|
||||
__author__ = 'rty'
|
||||
description = 'India Edition Forbes'
|
||||
publisher = 'Forbes India'
|
||||
category = 'Business News, Economy, India'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en_IN'
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
conversion_options = {'linearize_tables': True}
|
||||
feeds = [
|
||||
(u'Contents', u'http://business.in.com/rssfeed/rss_all.xml'),
|
||||
]
|
||||
extra_css = '''
|
||||
.t-10-gy-l{font-style: italic; font-size: small}
|
||||
.t-30-b-d{font-weight: bold; font-size: xx-large}
|
||||
.t-16-gy-l{font-weight: bold; font-size: x-large; font-syle: italic}
|
||||
.storycontent{font-size: 4px;font-family: Times New Roman;}
|
||||
'''
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'class': 'pdl10 pdr15'})
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url)
|
||||
response = br.follow_link(url_regex=r'/printcontent/[0-9]+', nr=0)
|
||||
html = response.read()
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
def get_cover_url(self):
|
||||
index = 'http://business.in.com/magazine/'
|
||||
soup = self.index_to_soup(index)
|
||||
for image in soup.findAll('a', {"class": "lbOn a-9-b-d"}):
|
||||
return image['href']
|
||||
# return image['href'] + '.jpg'
|
||||
return None
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(width=True):
|
||||
del item['width']
|
||||
return soup
|
@ -1,58 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class forbes_pl(BasicNewsRecipe):
|
||||
title = u'Forbes.pl'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.'
|
||||
oldest_article = 1
|
||||
index = 'http://www.forbes.pl'
|
||||
cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
|
||||
max_articles_per_feed = 100
|
||||
extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
|
||||
preprocess_regexps = [(re.compile(u'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL),
|
||||
lambda match: ''), (re.compile(u'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
now = datetime.datetime.now()
|
||||
yesterday = now - datetime.timedelta(hours=24)
|
||||
yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S")
|
||||
pages_count = 4
|
||||
keep_only_tags = [dict(attrs={'class': [
|
||||
'Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
|
||||
remove_tags = [dict(attrs={'class': [
|
||||
'Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
|
||||
|
||||
feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
|
||||
|
||||
'''def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
cleanup = False
|
||||
nexturl = appendtag.find('a', attrs={'class':'next'})
|
||||
if nexturl:
|
||||
cleanup = True
|
||||
while nexturl:
|
||||
soup2 = self.index_to_soup(self.index + nexturl['href'])
|
||||
nexturl = soup2.find('a', attrs={'class':'next'})
|
||||
pagetext = soup2.findAll(id='article-body-wrapper')
|
||||
if not pagetext:
|
||||
pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'})
|
||||
for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)):
|
||||
comment.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
if cleanup:
|
||||
for r in appendtag.findAll(attrs={'class':'paginator'}):
|
||||
r.extract()'''
|
@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
formula-as.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class FormulaAS(BasicNewsRecipe):
|
||||
title = u'Formula AS'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
publisher = u'Formula AS'
|
||||
description = u'Formula AS'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Romania'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.formula-as.ro/_client/img/header_logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'item padded'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class': 'subtitle lower'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='ul', attrs={'class': 'subtitle lower'}),
|
||||
dict(name='div', attrs={'class': 'item-brief-options'})
|
||||
]
|
||||
feeds = [
|
||||
(u'\u0218tiri', u'http://www.formula-as.ro/rss/articole.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,51 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ForsalPL(BasicNewsRecipe):
|
||||
title = u'Forsal.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Na portalu finansowym Forsal.pl znajdziesz najświeższe wiadomości finansowe i analizy. Kliknij i poznaj aktualne kursy walut, notowania giełdowe oraz inne wiadomości ze świata finansów.' # noqa
|
||||
category = 'economy, finance'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
cover_url = 'http://www.bizneswnieruchomosciach.pl/wp-content/uploads/2010/07/logo_forsal.jpg'
|
||||
no_stylesheets = True
|
||||
remove_tags = [dict(name='div', attrs={'class': 'related'}), dict(
|
||||
name='img', attrs={'title': 'Forsal'})]
|
||||
feeds = [
|
||||
(u'Najnowsze', u'http://forsal.pl/atom/najnowsze'),
|
||||
(u'Tylko na forsal.pl', u'http://forsal.pl/atom/tagi/forsal'),
|
||||
(u'Publicystyka', u'http://forsal.pl/atom/tagi/opinia'),
|
||||
(u'Bloomberg', u'http://forsal.pl/atom/tagi/bloomberg'),
|
||||
(u'Financial Times', u'http://forsal.pl/atom/tagi/financial_times'),
|
||||
(u'Gie\u0142da', u'http://forsal.pl/atom/tagi/gielda'),
|
||||
(u'Waluty', u'http://forsal.pl/atom/tagi/waluty'),
|
||||
(u'Surowce', u'http://forsal.pl/atom/tagi/surowce'),
|
||||
(u'Komenarze finasnowe', u'http://forsal.pl/atom/tagi/komentarz'),
|
||||
(u'Komentarze gie\u0142dowe', u'http://forsal.pl/atom/tagi/komentarz;gielda'),
|
||||
(u'Komentarze walutowe', u'http://forsal.pl/atom/tagi/komentarz;waluty'),
|
||||
|
||||
(u'Makroekonomia', u'http://forsal.pl/atom/tagi/makroekonomia'),
|
||||
(u'Handel', u'http://forsal.pl/atom/tagi/handel'),
|
||||
(u'Nieruchomo\u015bci', u'http://forsal.pl/atom/tagi/nieruchomosci'),
|
||||
(u'Motoryzacja', u'http://forsal.pl/atom/tagi/motoryzacja'),
|
||||
(u'Finanse', u'http://forsal.pl/atom/tagi/finanse'),
|
||||
(u'Transport', u'http://forsal.pl/atom/tagi/transport'),
|
||||
(u'Media', u'http://forsal.pl/atom/tagi/media'),
|
||||
(u'Telekomunikacja', u'http://forsal.pl/atom/tagi/telekomunikacja'),
|
||||
(u'Energetyka', u'http://forsal.pl/atom/tagi/energetyka'),
|
||||
(u'Przemys\u0142', u'http://forsal.pl/atom/tagi/przemysl'),
|
||||
(u'Moja firma', u'http://forsal.pl/atom/tagi/moja_firma')]
|
||||
|
||||
def print_version(self, url):
|
||||
url_id = re.search(u'/[0-9]+,', url)
|
||||
if url_id:
|
||||
return 'http://forsal.pl/drukowanie' + url_id.group(0)[:-1]
|
||||
else:
|
||||
return url
|
@ -1,18 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Fotoblogia_pl(BasicNewsRecipe):
|
||||
title = u'Fotoblogia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Jeden z największych polskich blogów o fotografii.'
|
||||
category = 'photography'
|
||||
language = 'pl'
|
||||
masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg'
|
||||
cover_url = 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(name='article')]
|
||||
remove_tags = [dict(attrs={'class': 'article-related'})]
|
||||
feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')]
|
@ -1,75 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Christian Schmitt'
|
||||
|
||||
'''
|
||||
fr-online.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class FROnlineRecipe(BasicNewsRecipe):
|
||||
title = 'Frankfurter Rundschau'
|
||||
__author__ = 'maccs'
|
||||
description = 'Nachrichten aus D und aller Welt'
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
|
||||
publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
|
||||
category = 'news, germany, world'
|
||||
language = 'de'
|
||||
publication_type = 'newspaper'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
oldest_article = 1 # Increase this number if you're interested in older articles
|
||||
max_articles_per_feed = 50 # Seems a reasonable number to me
|
||||
extra_css = '''
|
||||
body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
|
||||
.imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
|
||||
.p--heading-1 {font-weight: bold;}
|
||||
.calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
|
||||
'''
|
||||
remove_tags = [dict(name='div', attrs={'id': 'Logo'})]
|
||||
cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
|
||||
cover_margins = (100, 150, '#ffffff')
|
||||
|
||||
feeds = []
|
||||
feeds.append(
|
||||
('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(('Eintracht Frankfurt',
|
||||
u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(('Kultur und Medien',
|
||||
u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
|
||||
feeds.append(
|
||||
('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('index.html', 'view/printVersion/-/index.html')
|
@ -1,59 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
'''
|
||||
fr-online.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class FR(BasicNewsRecipe):
|
||||
title = 'Frankfurter Rundschau'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Nachrichten aus D und aller Welt'
|
||||
language = 'de'
|
||||
publication_type = 'newspaper'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
oldest_article = 1 # Increase this number if you're interested in older articles
|
||||
max_articles_per_feed = 50 # Seems a reasonable number to me
|
||||
encoding = 'cp1252'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(id='fcms_page_main'),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='footer'),
|
||||
dict(id='comments'),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Startseite', u'http://www.fr.de/?_XML=rss'),
|
||||
('Frankfurt', u'https://www.fr.de/frankfurt/?_XML=rss'),
|
||||
('Rhein-Main', 'https://www.fr.de/rhein-main/?_XML=rss'),
|
||||
('Politik', 'https://www.fr.de/politik/?_XML=rss'),
|
||||
('Wirtschaft', 'https://www.fr.de/wirtschaft/?_XML=rss'),
|
||||
('Sport', 'https://www.fr.de/sport/?_XML=rss'),
|
||||
('Eintracht Frankfurt', 'https://www.fr.de/sport/eintracht/?_XML=rss'),
|
||||
('Kultur', 'https://www.fr.de/kultur/?_XML=rss'),
|
||||
('Wissen', 'https://www.fr.de/wissen/?_XML=rss'),
|
||||
('Leben', 'https://www.fr.de/leben/?_XML=rss'),
|
||||
('Panorama', 'https://www.fr.de/panorama/?_XML=rss'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||
img['src'] = img['data-src']
|
||||
main = soup.find(id='fcms_page_main')
|
||||
for i, tag in tuple(enumerate(main)):
|
||||
if getattr(tag, 'name', None):
|
||||
main.replaceWith(tag)
|
||||
break
|
||||
return soup
|
@ -1,35 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Starson17'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Freakonomics(BasicNewsRecipe):
|
||||
title = 'Freakonomics Blog'
|
||||
description = 'The Hidden side of everything'
|
||||
__author__ = 'Starson17'
|
||||
__version__ = '1.02'
|
||||
__date__ = '11 July 2011'
|
||||
language = 'en'
|
||||
cover_url = 'http://ilkerugur.files.wordpress.com/2009/04/freakonomics.jpg'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
oldest_article = 30
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
max_articles_per_feed = 50
|
||||
|
||||
feeds = [(u'Freakonomics Blog', u'http://www.freakonomics.com/feed/')]
|
||||
keep_only_tags = [dict(name='div', attrs={'id': ['content']})]
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class': ['simple_socialmedia']})]
|
||||
remove_tags = [dict(name='div', attrs={
|
||||
'class': ['simple_socialmedia', 'single-fb-share', 'wp-polls']})]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Uge-Nyt
|
||||
'''
|
||||
|
||||
|
||||
class FredensborgLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Uge-Nyt'
|
||||
description = 'Uge-Nyt: Lokale og regionale nyheder, sport og kultur fra Fredensborg, Kokkedal og Humlebæk på fredensborg.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Fredericia
|
||||
'''
|
||||
|
||||
|
||||
class FredericiaLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Fredericia'
|
||||
description = 'Lokale og regionale nyheder, sport, kultur fra Fredericia og omegn på fredericia.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,28 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Frederiksberg Bladet
|
||||
'''
|
||||
|
||||
|
||||
class FrederiksbergBladet_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Frederiksberg Bladet'
|
||||
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Frederiksberg Bladet', 'http://minby.dk/frederiksberg-bladet/feed/'),
|
||||
('Kommentarer til Frederiksberg Bladet', 'http://minby.dk/frederiksberg-bladet/comments/feed/'),
|
||||
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Frederikssund
|
||||
'''
|
||||
|
||||
|
||||
class FrederikssundLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Frederikssund'
|
||||
description = 'Lokale, regionale nyheder, sport og kultur fra Frederikssund, Jægerspris og omegn på frederikssund.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,92 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
http://freeway.com.uy
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class General(BasicNewsRecipe):
|
||||
title = 'freeway.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Revista Freeway, Montevideo, Uruguay'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 1
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables': True}
|
||||
|
||||
oldest_article = 180
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [
|
||||
dict(id=['contenido']),
|
||||
dict(name='a', attrs={'class': 'titulo_art_ppal'}),
|
||||
dict(name='img', attrs={'class': 'recuadro'}),
|
||||
dict(name='td', attrs={'class': 'txt_art_ppal'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link'])
|
||||
]
|
||||
remove_attributes = ['width', 'height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
img {float:left; clear:both; margin:10px}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
|
||||
articles = self.art_parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def art_parse_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
div = soup.find(attrs={'id': 'tbl_1'})
|
||||
|
||||
current_articles = []
|
||||
for tag in div.findAllNext(attrs={'class': 'ancho_articulos'}):
|
||||
if tag.get('class') == 'link-list-heading':
|
||||
break
|
||||
for td in tag.findAll('td'):
|
||||
a = td.find('a', attrs={'class': 'titulo_articulos'})
|
||||
if a is None:
|
||||
continue
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://freeway.com.uy' + url
|
||||
p = td.find('p', attrs={'class': 'txt_articulos'})
|
||||
description = self.tag_to_string(p)
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
self.log('\t\t\t', description)
|
||||
current_articles.append(
|
||||
{'title': title, 'url': url, 'description': description, 'date': ''})
|
||||
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' ] # noqa
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'
|
@ -1,75 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class FIELDSTREAM(BasicNewsRecipe):
|
||||
title = 'Field and Stream'
|
||||
__author__ = 'Starson17 and Tonythebookworm'
|
||||
description = 'Hunting and Fishing and Gun Talk'
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
publisher = 'Starson17 and Tonythebookworm'
|
||||
category = 'food recipes, hunting, fishing, guns'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
oldest_article = 24
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
cover_url = 'http://www.arrowheadflyangler.com/Portals/1/Articles/FieldStream/Field%20and%20Stream%20March%20Fishing%20Edition%20Article%20Cover.jpg' # noqa
|
||||
max_articles_per_feed = 10
|
||||
INDEX = 'http://www.fieldandstream.com'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': ['article-wrapper']}),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={
|
||||
'class': lambda x: x and 'content-main-bottom' in x.split()}),
|
||||
dict(name='div', attrs={
|
||||
'class': lambda x: x and 'pw-widget' in x.split()}),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||
img['src'] = img['data-src']
|
||||
for form in soup.findAll('form'):
|
||||
form.parent.extract()
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
num = self.test[0] if self.test else 100
|
||||
for title, url in [
|
||||
('Field Test', 'http://www.fieldandstream.com/blogs/field-test'),
|
||||
(u"Wild Chef", u"http://www.fieldandstream.com/blogs/wild-chef"),
|
||||
(u"The Gun Nuts", u"http://www.fieldandstream.com/blogs/gun-nut"),
|
||||
(u"Whitetail 365", u"http://www.fieldandstream.com/blogs/whitetail-365"),
|
||||
('Field Notes', 'http://www.fieldandstream.com/blogs/field-notes'),
|
||||
(u"Fly Talk", u"http://www.fieldandstream.com/blogs/flytalk"),
|
||||
(u"The Conservationist",
|
||||
u"http://www.fieldandstream.com/blogs/conservationist"),
|
||||
('The Lateral Line', 'http://www.fieldandstream.com/blogs/lateral-line'),
|
||||
('Total Outdoorsman',
|
||||
'http://www.fieldandstream.com/blogs/total-outdoorsman'),
|
||||
('A Sportsman\'s Life',
|
||||
'http://www.fieldandstream.com/blogs/a-sportsmans-life'),
|
||||
]:
|
||||
self.log('Section:', title)
|
||||
articles = self.make_links(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
if len(feeds) > num:
|
||||
break
|
||||
return feeds
|
||||
|
||||
def make_links(self, url):
|
||||
current_articles = []
|
||||
soup = self.index_to_soup(url)
|
||||
for item in soup.findAll('h2'):
|
||||
link = item.find('a')
|
||||
if link:
|
||||
url = self.INDEX + link['href']
|
||||
title = self.tag_to_string(link)
|
||||
self.log('\t', title, 'at', url)
|
||||
current_articles.append(
|
||||
{'title': title, 'url': url, 'description': '', 'date': ''})
|
||||
return current_articles
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Furesø Avis
|
||||
'''
|
||||
|
||||
|
||||
class FuresoeLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Furesø Avis'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Farum, Værløse og Furesø på furesoe.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,39 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
gamasutra.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Gamasutra(BasicNewsRecipe):
|
||||
title = 'Gamasutra Featured articles'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The Art and Business of Making Games'
|
||||
publisher = 'Gamasutra'
|
||||
category = 'news, games, IT'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
remove_tags_before = dict(name="div", attrs={'class': 'page_item'})
|
||||
remove_tags = [
|
||||
dict(name='meta'), dict(name='link'), dict(name='hr'), dict(name='div', attrs={'class': 'hide-phone'}), dict(name='div', attrs={'class': 'nav_links'}),
|
||||
dict(name='div', attrs={'class': 'superfooter'}), dict(name='span', attrs={'class': 'comment_text'}), dict(name='a', attrs={'type': 'button'})
|
||||
]
|
||||
remove_attributes = ['width', 'height', 'name']
|
||||
|
||||
feeds = [
|
||||
(u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.partition('?')[0] + '?print=1'
|
@ -1,40 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
gamasutra.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Gamasutra(BasicNewsRecipe):
|
||||
title = 'Gamasutra News'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The Art and Business of Making Games'
|
||||
publisher = 'Gamasutra'
|
||||
category = 'news, games, IT'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
remove_tags_before = dict(name="div", attrs={'class': 'page_item'})
|
||||
remove_tags = [
|
||||
dict(name='meta'), dict(name='link'),
|
||||
dict(name='hr'), dict(name='div', attrs={'class': 'hide-phone'}),
|
||||
dict(name='div', attrs={'class': 'nav_links'}), dict(name='div', attrs={'class': 'superfooter'}),
|
||||
dict(name='span', attrs={'class': 'comment_text'}), dict(name='a', attrs={'type': 'button'})
|
||||
]
|
||||
remove_attributes = ['width', 'height', 'name']
|
||||
|
||||
feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.partition('?')[0] + '?print=1'
|
@ -1,47 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'Marc Toensing'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GamespotCom(BasicNewsRecipe):
|
||||
|
||||
title = u'Gamespot.com Reviews'
|
||||
description = 'review articles from gamespot.com'
|
||||
language = 'en'
|
||||
__author__ = u'Marc T\xf6nsing'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 40
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
no_javascript = True
|
||||
|
||||
feeds = [
|
||||
('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
|
||||
('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
|
||||
('XBOX 360 Reviews',
|
||||
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
|
||||
('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
|
||||
('PlayStation 3 Reviews',
|
||||
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1028'),
|
||||
('PlayStation 2 Reviews',
|
||||
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=7'),
|
||||
('PlayStation Portable Reviews',
|
||||
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1024'),
|
||||
('Nintendo DS Reviews',
|
||||
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1026'),
|
||||
('iPhone Reviews',
|
||||
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1049'),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'top_bar'}),
|
||||
dict(name='div', attrs={'class': 'video_embed'})
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://image.gamespotcdn.net/gamespot/shared/gs5/gslogo_bw.gif'
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('link') + '?print=1'
|
@ -1,45 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
gandul.info
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Gandul(BasicNewsRecipe):
|
||||
title = u'G\u00E2ndul'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
publisher = 'Gandul'
|
||||
description = 'Cotidian Online'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Romania'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'article'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a', attrs={'class': 'photo'}), dict(
|
||||
name='div', attrs={'class': 'ad'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,69 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GazetaLubuska(BasicNewsRecipe):
|
||||
title = u'Gazeta Lubuska'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Gazeta Lubuska - portal regionalny województwa lubuskiego.'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
encoding = 'iso-8859-2'
|
||||
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
|
||||
INDEX = 'http://www.gazetalubuska.pl'
|
||||
masthead_url = INDEX + '/images/top_logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
feeds = [
|
||||
(u'Wszystkie', u'http://www.gazetalubuska.pl/rss.xml'),
|
||||
(u'Dreznenko', u'http://www.gazetalubuska.pl/drezdenko.xml'),
|
||||
(u'G\u0142og\xf3w', u'http://www.gazetalubuska.pl/glogow.xml'),
|
||||
(u'Gorz\xf3w Wielkopolski', u'http://www.gazetalubuska.pl/gorzow-wielkopolski.xml'),
|
||||
(u'Gubin', u'http://www.gazetalubuska.pl/gubin.xml'),
|
||||
(u'Kostrzyn', u'http://www.gazetalubuska.pl/kostrzyn.xml'),
|
||||
(u'Krosno Odrza\u0144skie', u'http://www.gazetalubuska.pl/krosno-odrzanskie.xml'),
|
||||
(u'Lubsko', u'http://www.gazetalubuska.pl/lubsko.xml'),
|
||||
(u'Mi\u0119dzych\xf3d', u'http://www.gazetalubuska.pl/miedzychod.xml'),
|
||||
(u'Mi\u0119dzyrzecz', u'http://www.gazetalubuska.pl/miedzyrzecz.xml'),
|
||||
(u'Nowa S\xf3l', u'http://www.gazetalubuska.pl/nowa-sol.xml'),
|
||||
(u'S\u0142ubice', u'http://www.gazetalubuska.pl/slubice.xml'),
|
||||
(u'Strzelce Kraje\u0144skie', u'http://www.gazetalubuska.pl/strzelce-krajenskie.xml'),
|
||||
(u'Sulech\xf3w', u'http://www.gazetalubuska.pl/sulechow.xml'),
|
||||
(u'Sul\u0119cin', u'http://www.gazetalubuska.pl/sulecin.xml'),
|
||||
(u'\u015awi\u0119bodzin', u'http://www.gazetalubuska.pl/swiebodzin.xml'),
|
||||
(u'Wolsztyn', u'http://www.gazetalubuska.pl/wolsztyn.xml'),
|
||||
(u'Wschowa', u'http://www.gazetalubuska.pl/wschowa.xml'),
|
||||
(u'Zielona G\xf3ra', u'http://www.gazetalubuska.pl/zielona-gora.xml'),
|
||||
(u'\u017baga\u0144', u'http://www.gazetalubuska.pl/zagan.xml'),
|
||||
(u'\u017bary', u'http://www.gazetalubuska.pl/zary.xml'),
|
||||
(u'Sport', u'http://www.gazetalubuska.pl/sport.xml'),
|
||||
(u'Auto', u'http://www.gazetalubuska.pl/auto.xml'),
|
||||
(u'Dom', u'http://www.gazetalubuska.pl/dom.xml'),
|
||||
(u'Praca', u'http://www.gazetalubuska.pl/praca.xml'),
|
||||
(u'Zdrowie', u'http://www.gazetalubuska.pl/zdrowie.xml')]
|
||||
|
||||
keep_only_tags = [dict(id='article')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
|
||||
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
|
||||
soup = self.index_to_soup(nexturl)
|
||||
self.cover_url = self.INDEX + \
|
||||
soup.find(id='cover').find(name='img')['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def decode_feedportal_url(self, url):
|
||||
link = url.rpartition('l/0L0S')[2][:-12]
|
||||
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
|
||||
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
|
||||
for t in replaces:
|
||||
link = link.replace(*t)
|
||||
return 'http://' + link
|
||||
|
||||
def print_version(self, url):
|
||||
return self.decode_feedportal_url(url) + '&Template=printpicart'
|
@ -1,95 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
import re
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Comment
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class gw_bydgoszcz(BasicNewsRecipe):
|
||||
title = u'Gazeta Wyborcza Bydgoszcz'
|
||||
__author__ = 'fenuks'
|
||||
language = 'pl'
|
||||
description = 'Wiadomości z Bydgoszczy na portalu Gazeta.pl.'
|
||||
category = 'newspaper'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://bi.gazeta.pl/im/3/4089/m4089863.gif'
|
||||
INDEX = 'http://bydgoszcz.gazeta.pl'
|
||||
cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
# rules for gazeta.pl
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
|
||||
keep_only_tags = [dict(id='gazeta_article')]
|
||||
remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(
|
||||
attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
|
||||
remove_tags_after = dict(id='gazeta_article_body')
|
||||
|
||||
feeds = [
|
||||
(u'Wiadomości', u'http://rss.feedsportal.com/c/32739/f/530239/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
if 'feedsportal.com' in url:
|
||||
s = url.rpartition('gazeta0Bpl')
|
||||
u = s[2]
|
||||
if not s[0]:
|
||||
u = url.rpartition('wyborcza0Bpl')[2]
|
||||
u = u.replace('/l/', '/')
|
||||
u = u.replace('/ia1.htm', '')
|
||||
u = u.replace('0Dbo0F1', '')
|
||||
u = u.replace('/story01.htm', '')
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace('0E', '-')
|
||||
u = u.replace('0H', ',')
|
||||
u = u.replace('0I', '_')
|
||||
u = u.replace('0B', '.')
|
||||
u = self.INDEX + u
|
||||
return u
|
||||
else:
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
tag = soup.find(id='Str')
|
||||
if soup.find(attrs={'class': 'piano_btn_1'}):
|
||||
return None
|
||||
elif tag and tag.findAll('a'):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag = soup.find('div', attrs={'id': 'Str'})
|
||||
try:
|
||||
baseurl = soup.find(name='meta', attrs={
|
||||
'property': 'og:url'})['content']
|
||||
except:
|
||||
return 1
|
||||
link = tag.findAll('a')[-1]
|
||||
while link:
|
||||
soup2 = self.index_to_soup(baseurl + link['href'])
|
||||
link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
|
||||
if u'następne' not in link.string:
|
||||
link = ''
|
||||
pagetext = soup2.find(id='artykul')
|
||||
comments = pagetext.findAll(
|
||||
text=lambda text: isinstance(text, Comment))
|
||||
for comment in comments:
|
||||
comment.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
tag.extract()
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
if url.startswith(' '):
|
||||
return url.strip()
|
||||
else:
|
||||
return url
|
@ -1,90 +0,0 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
import re
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Comment
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GazetaPlSzczecin(BasicNewsRecipe):
|
||||
title = u'Gazeta Wyborcza Szczecin'
|
||||
description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
|
||||
__author__ = u'Michał Szkutnik'
|
||||
__license__ = u'GPL v3'
|
||||
language = 'pl'
|
||||
publisher = 'Agora S.A.'
|
||||
category = 'news, szczecin'
|
||||
INDEX = 'http://szczecin.gazeta.pl'
|
||||
cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
# rules for gazeta.pl
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
|
||||
keep_only_tags = [dict(id='gazeta_article')]
|
||||
remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(
|
||||
attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
|
||||
remove_tags_after = dict(id='gazeta_article_body')
|
||||
feeds = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
if 'feedsportal.com' in url:
|
||||
s = url.rpartition('gazeta0Bpl')
|
||||
u = s[2]
|
||||
if not s[0]:
|
||||
u = url.rpartition('wyborcza0Bpl')[2]
|
||||
u = u.replace('/l/', '/')
|
||||
u = u.replace('/ia1.htm', '')
|
||||
u = u.replace('/story01.htm', '')
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace('0E', '-')
|
||||
u = u.replace('0H', ',')
|
||||
u = u.replace('0I', '_')
|
||||
u = u.replace('0B', '.')
|
||||
u = self.INDEX + u
|
||||
return u
|
||||
else:
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
tag = soup.find(id='Str')
|
||||
if soup.find(attrs={'class': 'piano_btn_1'}):
|
||||
return None
|
||||
elif tag and tag.findAll('a'):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag = soup.find('div', attrs={'id': 'Str'})
|
||||
try:
|
||||
baseurl = soup.find(name='meta', attrs={
|
||||
'property': 'og:url'})['content']
|
||||
except:
|
||||
return 1
|
||||
link = tag.findAll('a')[-1]
|
||||
while link:
|
||||
soup2 = self.index_to_soup(baseurl + link['href'])
|
||||
link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
|
||||
if u'następne' not in link.string:
|
||||
link = ''
|
||||
pagetext = soup2.find(id='artykul')
|
||||
comments = pagetext.findAll(
|
||||
text=lambda text: isinstance(text, Comment))
|
||||
for comment in comments:
|
||||
comment.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
tag.extract()
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
if url.startswith(' '):
|
||||
return url.strip()
|
||||
else:
|
||||
return url
|
@ -1,72 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GazetaPomorska(BasicNewsRecipe):
|
||||
title = u'Gazeta Pomorska'
|
||||
__author__ = 'Richard z forum.eksiazki.org, fenuks'
|
||||
description = u'Gazeta Pomorska - portal regionalny'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
encoding = 'iso-8859-2'
|
||||
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
|
||||
INDEX = 'http://www.pomorska.pl'
|
||||
masthead_url = INDEX + '/images/top_logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
feeds = [(u'Wszystkie', u'http://www.pomorska.pl/rss.xml'),
|
||||
(u'Region', u'http://www.pomorska.pl/region.xml'),
|
||||
(u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'),
|
||||
(u'Nakło', u'http://www.pomorska.pl/naklo.xml'),
|
||||
(u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'),
|
||||
(u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'),
|
||||
(u'Grudziądz', u'http://www.pomorska.pl/grudziadz.xml'),
|
||||
(u'Inowrocław', u'http://www.pomorska.pl/inowroclaw.xml'),
|
||||
(u'Toruń', u'http://www.pomorska.pl/torun.xml'),
|
||||
(u'Włocławek', u'http://www.pomorska.pl/wloclawek.xml'),
|
||||
(u'Aleksandrów Kujawski',
|
||||
u'http://www.pomorska.pl/aleksandrow.xml'),
|
||||
(u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'),
|
||||
(u'Chełmno', u'http://www.pomorska.pl/chelmno.xml'),
|
||||
(u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'),
|
||||
(u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'),
|
||||
(u'Golub-Dobrzyń', u'http://www.pomorska.pl/golubdobrzyn.xml'),
|
||||
(u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'),
|
||||
(u'Radziejów', u'http://www.pomorska.pl/radziejow.xml'),
|
||||
(u'Rypin', u'http://www.pomorska.pl/rypin.xml'),
|
||||
(u'Sępólno', u'http://www.pomorska.pl/sepolno.xml'),
|
||||
(u'Świecie', u'http://www.pomorska.pl/swiecie.xml'),
|
||||
(u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'),
|
||||
(u'Żnin', u'http://www.pomorska.pl/znin.xml'),
|
||||
(u'Sport', u'http://www.pomorska.pl/sport.xml'),
|
||||
(u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'),
|
||||
(u'Auto', u'http://www.pomorska.pl/moto.xml'),
|
||||
(u'Dom', u'http://www.pomorska.pl/dom.xml'),
|
||||
# (u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'),
|
||||
(u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')]
|
||||
|
||||
keep_only_tags = [dict(id='article')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
|
||||
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
|
||||
soup = self.index_to_soup(nexturl)
|
||||
self.cover_url = self.INDEX + \
|
||||
soup.find(id='cover').find(name='img')['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def decode_feedportal_url(self, url):
|
||||
link = url.rpartition('l/0L0S')[2][:-12]
|
||||
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
|
||||
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
|
||||
for t in replaces:
|
||||
link = link.replace(*t)
|
||||
return 'http://' + link
|
||||
|
||||
def print_version(self, url):
|
||||
return self.decode_feedportal_url(url) + '&Template=printpicart'
|
@ -1,46 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GazetaWroclawska(BasicNewsRecipe):
|
||||
title = u'Gazeta Wroc\u0142awska'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Gazeta Regionalna Gazeta Wrocławska. Najnowsze Wiadomości Wrocław, Informacje Wrocław. Czytaj!'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
encoding = 'iso-8859-2'
|
||||
masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gazetawroclawska.png?24'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})
|
||||
remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={
|
||||
'class': 'czytajDalej'}), dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})]
|
||||
|
||||
feeds = [
|
||||
(u'Fakty24', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533775/index.rss?201302'),
|
||||
(u'Region', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_region.xml?201302'),
|
||||
(u'Kultura', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533777/index.rss?201302'),
|
||||
(u'Sport', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533776/index.rss?201302'),
|
||||
(u'Z archiwum', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_zarchiwum.xml?201302'),
|
||||
|
||||
(u'M\xf3j reporter', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_mojreporter.xml?201302'),
|
||||
(u'Historia', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_historia.xml?201302'),
|
||||
(u'Listy do redakcji', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_listydoredakcji.xml?201302'),
|
||||
(u'Na drogach', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_nadrogach.xml?201302')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('artykul', 'drukuj')
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
if 'Advertisement' in soup.title:
|
||||
nexturl = soup.find('a')['href']
|
||||
return self.index_to_soup(nexturl, raw=True)
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'http://www.prasa24.pl/gazeta/gazeta-wroclawska/')
|
||||
self.cover_url = soup.find(id='pojemnik').img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
@ -1,68 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GazetaWspolczesna(BasicNewsRecipe):
|
||||
title = u'Gazeta Wsp\xf3\u0142czesna'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Gazeta Współczesna - portal regionalny.'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
encoding = 'iso-8859-2'
|
||||
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
|
||||
INDEX = 'http://www.wspolczesna.pl'
|
||||
masthead_url = INDEX + '/images/top_logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
feeds = [
|
||||
(u'Wszystkie', u'http://www.wspolczesna.pl/rss.xml'),
|
||||
(u'August\xf3w', u'http://www.wspolczesna.pl/augustow.xml'),
|
||||
(u'Bia\u0142ystok', u'http://www.wspolczesna.pl/bialystok.xml'),
|
||||
(u'Bielsk Podlaski', u'http://www.wspolczesna.pl/bielsk.xml'),
|
||||
(u'E\u0142k', u'http://www.wspolczesna.pl/elk.xml'),
|
||||
(u'Grajewo', u'http://www.wspolczesna.pl/grajewo.xml'),
|
||||
(u'Go\u0142dap', u'http://www.wspolczesna.pl/goldap.xml'),
|
||||
(u'Hajn\xf3wka', u'http://www.wspolczesna.pl/hajnowka.xml'),
|
||||
(u'Kolno', u'http://www.wspolczesna.pl/kolno.xml'),
|
||||
(u'\u0141om\u017ca', u'http://www.wspolczesna.pl/lomza.xml'),
|
||||
(u'Mo\u0144ki', u'http://www.wspolczesna.pl/monki.xml'),
|
||||
(u'Olecko', u'http://www.wspolczesna.pl/olecko.xml'),
|
||||
(u'Ostro\u0142\u0119ka', u'http://www.wspolczesna.pl/ostroleka.xml'),
|
||||
(u'Powiat Bia\u0142ostocki', u'http://www.wspolczesna.pl/powiat.xml'),
|
||||
(u'Sejny', u'http://www.wspolczesna.pl/sejny.xml'),
|
||||
(u'Siemiatycze', u'http://www.wspolczesna.pl/siemiatycze.xml'),
|
||||
(u'Sok\xf3\u0142ka', u'http://www.wspolczesna.pl/sokolka.xml'),
|
||||
(u'Suwa\u0142ki', u'http://www.wspolczesna.pl/suwalki.xml'),
|
||||
(u'Wysokie Mazowieckie', u'http://www.wspolczesna.pl/wysokie.xml'),
|
||||
(u'Zambr\xf3w', u'http://www.wspolczesna.pl/zambrow.xml'),
|
||||
(u'Sport', u'http://www.wspolczesna.pl/sport.xml'),
|
||||
(u'Praca', u'http://www.wspolczesna.pl/praca.xml'),
|
||||
(u'Dom', u'http://www.wspolczesna.pl/dom.xml'),
|
||||
(u'Auto', u'http://www.wspolczesna.pl/auto.xml'),
|
||||
(u'Zdrowie', u'http://www.wspolczesna.pl/zdrowie.xml')]
|
||||
|
||||
keep_only_tags = [dict(id='article')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
|
||||
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
|
||||
soup = self.index_to_soup(nexturl)
|
||||
self.cover_url = self.INDEX + \
|
||||
soup.find(id='cover').find(name='img')['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def decode_feedportal_url(self, url):
|
||||
link = url.rpartition('l/0L0S')[2][:-12]
|
||||
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
|
||||
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
|
||||
for t in replaces:
|
||||
link = link.replace(*t)
|
||||
return 'http://' + link
|
||||
|
||||
def print_version(self, url):
|
||||
return self.decode_feedportal_url(url) + '&Template=printpicart'
|
@ -1,126 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Comment
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
title = u'Gazeta Wyborcza'
|
||||
__author__ = 'fenuks, Artur Stachecki'
|
||||
language = 'pl'
|
||||
description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
|
||||
category = 'newspaper'
|
||||
publication_type = 'newspaper'
|
||||
# encoding = 'iso-8859-2'
|
||||
masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
|
||||
INDEX = 'http://wyborcza.pl'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
# rules for gazeta.pl
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
|
||||
keep_only_tags = [dict(id='gazeta_article')]
|
||||
remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(
|
||||
attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
|
||||
remove_tags_after = dict(id='gazeta_article_body')
|
||||
|
||||
# rules for wyborcza.biz
|
||||
preprocess_regexps.append((re.compile(
|
||||
u'(<br>)?(<br>)? Czytaj (także|też):.*?</a>\\.?<br>', re.DOTALL), lambda m: ''))
|
||||
|
||||
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'),
|
||||
(u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
|
||||
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
|
||||
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
|
||||
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
|
||||
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
|
||||
(u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
|
||||
(u'Gazeta \u015awi\u0105teczna',
|
||||
u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
|
||||
(u'Du\u017cy Format',
|
||||
u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
|
||||
(u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
|
||||
(u'M\u0119ska Muzyka',
|
||||
u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
|
||||
(u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
|
||||
(u'Solidarni z Tybetem',
|
||||
u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
|
||||
(u'W pon. - \u017bakowski',
|
||||
u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
|
||||
(u'We wt. - Kolenda-Zalewska',
|
||||
u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
|
||||
(u'\u015aroda w \u015brod\u0119',
|
||||
u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
|
||||
(u'W pi\u0105tek - Olejnik',
|
||||
u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'),
|
||||
(u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
if 'feedsportal.com' in url:
|
||||
s = url.rpartition('wyborcza0Bpl')
|
||||
u = s[2]
|
||||
if not s[0]:
|
||||
u = url.rpartition('gazeta0Bpl')[2]
|
||||
u = u.replace('/l/', '/')
|
||||
u = u.replace('/ia1.htm', '')
|
||||
u = u.replace('/story01.htm', '')
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace('0E', '-')
|
||||
u = u.replace('0H', ',')
|
||||
u = u.replace('0I', '_')
|
||||
u = u.replace('0B', '.')
|
||||
u = self.INDEX + u
|
||||
return u
|
||||
else:
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
tag = soup.find(id='Str')
|
||||
if soup.find(attrs={'class': 'piano_btn_1'}):
|
||||
return None
|
||||
elif tag and tag.findAll('a'):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag = soup.find('div', attrs={'id': 'Str'})
|
||||
try:
|
||||
baseurl = soup.find(name='meta', attrs={
|
||||
'property': 'og:url'})['content']
|
||||
except:
|
||||
return 1
|
||||
link = tag.findAll('a')[-1]
|
||||
while link:
|
||||
soup2 = self.index_to_soup(baseurl + link['href'])
|
||||
link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
|
||||
if u'następne' not in link.string:
|
||||
link = ''
|
||||
pagetext = soup2.find(id='artykul')
|
||||
comments = pagetext.findAll(
|
||||
text=lambda text: isinstance(text, Comment))
|
||||
for comment in comments:
|
||||
comment.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
tag.extract()
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
|
||||
cover = soup.find(attrs={'class': 'gallerycontent'})
|
||||
self.cover_url = cover.ul.li.a.img['src'].replace('P.jpg', '.jpg')
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
if url.startswith(' '):
|
||||
return url.strip()
|
||||
else:
|
||||
return url
|
@ -1,64 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GCN(BasicNewsRecipe):
|
||||
title = u'Gazeta Codziennej Nowiny'
|
||||
__author__ = 'fenuks'
|
||||
description = u'nowiny24.pl - portal regionalny województwa podkarpackiego.'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
encoding = 'iso-8859-2'
|
||||
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
|
||||
INDEX = 'http://www.nowiny24.pl'
|
||||
masthead_url = INDEX + '/images/top_logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_attributes = ['style']
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [(u'Wszystkie', u'http://www.nowiny24.pl/rss.xml'),
|
||||
(u'Podkarpacie', u'http://www.nowiny24.pl/podkarpacie.xml'),
|
||||
(u'Bieszczady', u'http://www.nowiny24.pl/bieszczady.xml'),
|
||||
(u'Rzeszów', u'http://www.nowiny24.pl/rzeszow.xml'),
|
||||
(u'Przemyśl', u'http://www.nowiny24.pl/przemysl.xml'),
|
||||
(u'Leżajsk', u'http://www.nowiny24.pl/lezajsk.xml'),
|
||||
(u'Łańcut', u'http://www.nowiny24.pl/lancut.xml'),
|
||||
(u'Dębica', u'http://www.nowiny24.pl/debica.xml'),
|
||||
(u'Jarosław', u'http://www.nowiny24.pl/jaroslaw.xml'),
|
||||
(u'Krosno', u'http://www.nowiny24.pl/krosno.xml'),
|
||||
(u'Mielec', u'http://www.nowiny24.pl/mielec.xml'),
|
||||
(u'Nisko', u'http://www.nowiny24.pl/nisko.xml'),
|
||||
(u'Sanok', u'http://www.nowiny24.pl/sanok.xml'),
|
||||
(u'Stalowa Wola', u'http://www.nowiny24.pl/stalowawola.xml'),
|
||||
(u'Tarnobrzeg', u'http://www.nowiny24.pl/tarnobrzeg.xml'),
|
||||
(u'Sport', u'http://www.nowiny24.pl/sport.xml'),
|
||||
(u'Dom', u'http://www.nowiny24.pl/dom.xml'),
|
||||
(u'Auto', u'http://www.nowiny24.pl/auto.xml'),
|
||||
(u'Praca', u'http://www.nowiny24.pl/praca.xml'),
|
||||
(u'Zdrowie', u'http://www.nowiny24.pl/zdrowie.xml'),
|
||||
(u'Wywiady', u'http://www.nowiny24.pl/wywiady.xml')]
|
||||
|
||||
keep_only_tags = [dict(id='article')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
|
||||
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
|
||||
soup = self.index_to_soup(nexturl)
|
||||
self.cover_url = self.INDEX + \
|
||||
soup.find(id='cover').find(name='img')['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def decode_feedportal_url(self, url):
|
||||
link = url.rpartition('l/0L0S')[2][:-12]
|
||||
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
|
||||
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
|
||||
for t in replaces:
|
||||
link = link.replace(*t)
|
||||
return 'http://' + link
|
||||
|
||||
def print_version(self, url):
|
||||
return self.decode_feedportal_url(url) + '&Template=printpicart'
|
@ -1,80 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.utils.magick import Image, create_canvas
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1307556816(BasicNewsRecipe):
|
||||
title = u'Geek and Poke'
|
||||
__author__ = u'DrMerry'
|
||||
description = u'Geek and Poke Cartoons'
|
||||
publisher = u'Oliver Widder'
|
||||
author = u'Oliver Widder, DrMerry (calibre-code), calibre'
|
||||
oldest_article = 31
|
||||
max_articles_per_feed = 100
|
||||
language = u'en'
|
||||
simultaneous_downloads = 1
|
||||
timefmt = ' [%a, %d %B, %Y]'
|
||||
summary_length = -1
|
||||
no_stylesheets = True
|
||||
category = 'News.IT, Cartoon, Humor, Geek'
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'blog'
|
||||
masthead_url = None
|
||||
conversion_options = {
|
||||
'comments': '', 'tags': category, 'language': language, 'publisher': publisher, 'author': author
|
||||
}
|
||||
|
||||
remove_tags_before = dict(name='p', attrs={'class': 'content-nav'})
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'entry-content'})
|
||||
remove_tags = [dict(name='div', attrs={'class': 'entry-footer'}),
|
||||
dict(name='div', attrs={'id': 'alpha'}),
|
||||
dict(name='div', attrs={'id': 'gamma'}),
|
||||
dict(name='iframe'),
|
||||
dict(name='p', attrs={'class': 'content-nav'})]
|
||||
|
||||
filter_regexps = [(r'feedburner\.com'),
|
||||
(r'pixel.quantserve\.com'),
|
||||
(r'googlesyndication\.com'),
|
||||
(r'yimg\.com'),
|
||||
(r'scorecardresearch\.com')]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'(<p>( |\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->|<h2[^>]*>[^<]*</h2>[^<]*)', re.DOTALL | re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'( |\s\s)+\s*', re.DOTALL |
|
||||
re.IGNORECASE), lambda match: ' '),
|
||||
(re.compile(r'(<h3[^>]*>)<a[^>]>((?!</a)*)</a></h3>', re.DOTALL |
|
||||
re.IGNORECASE), lambda match: match.group(1) + match.group(2) + '</h3>'),
|
||||
(re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: '<div id="merryImage"><cite>' + match.group(2) + '</cite><br>' + match.group(1) + '</div>'),
|
||||
(re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL |
|
||||
re.IGNORECASE), lambda match: '<br>'),
|
||||
]
|
||||
|
||||
extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}'
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll('img', src=True):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
# print '***img is: ', iurl, '\n****width is: ', width, 'height is:
|
||||
# ', height
|
||||
img.trim(0)
|
||||
# print '***TRIMMED img width is: ', width, 'height is: ', height
|
||||
left = 0
|
||||
top = 0
|
||||
border_color = '#ffffff'
|
||||
width, height = img.size
|
||||
# print '***retrieved img width is: ', width, 'height is: ', height
|
||||
height_correction = 1.17
|
||||
canvas = create_canvas(
|
||||
width, height * height_correction, border_color)
|
||||
canvas.compose(img, left, top)
|
||||
canvas.save(iurl)
|
||||
return soup
|
||||
|
||||
feeds = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Villabyerne
|
||||
'''
|
||||
|
||||
|
||||
class GentofteLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Villabyerne'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Gentofte, Hellerup og Charlottenlund på gentofte.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,31 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GermanGovernmentPress(BasicNewsRecipe):
|
||||
title = u'Pressemitteilungen der Bundesregierung'
|
||||
oldest_article = 14
|
||||
__author__ = 'malfi'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
|
||||
language = 'de'
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='h2'))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'textblack'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'subtitle'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'text'}))
|
||||
remove_tags = []
|
||||
feeds = [
|
||||
(u'Pressemitteilungen', u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf')] # noqa
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
m = re.search(r'^(.*).html$', url)
|
||||
return str(m.group(1)) + ',layoutVariant=Druckansicht.html'
|
@ -1,14 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class BasicUserRecipe1390492898(BasicNewsRecipe):
|
||||
title = u'Gezgin Dergi'
|
||||
__author__ = 'asalet_r'
|
||||
language = 'tr'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Gezgin Dergi', u'http://www.gezgindergi.com/feed/')]
|
@ -1,72 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Gildia(BasicNewsRecipe):
|
||||
title = u'Gildia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Fantastyczny Portal Kulturalny - newsy, recenzje, galerie, wywiady. Literatura, film, gry komputerowe i planszowe, komiks, RPG, sklep. Nie lekceważ potęgi wyobraźni!' # noqa
|
||||
cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg'
|
||||
category = 'culture'
|
||||
cover_url = 'http://portal.gildia.pl/images/logo-main.png'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
preprocess_regexps = [(re.compile(u'</?sup>'), lambda match: '')]
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_tags = [dict(name='div', attrs={'class': [
|
||||
'backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'widetext'}), dict(name='article', attrs={'id': re.compile(r'post-\d+')})]
|
||||
feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'),
|
||||
(u'Literatura', u'http://www.literatura.gildia.pl/rss'),
|
||||
(u'Film', u'http://www.film.gildia.pl/rss'),
|
||||
(u'Horror', u'http://www.horror.gildia.pl/rss'),
|
||||
(u'Konwenty', u'http://www.konwenty.gildia.pl/rss'),
|
||||
(u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'),
|
||||
(u'Manga i anime', u'http://www.manga.gildia.pl/rss'),
|
||||
(u'Star Wars', u'http://www.starwars.gildia.pl/rss'),
|
||||
(u'Techno', u'http://www.techno.gildia.pl/rss'),
|
||||
(u'Historia', u'http://www.historia.gildia.pl/rss'),
|
||||
(u'Magia', u'http://www.magia.gildia.pl/rss'),
|
||||
(u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'),
|
||||
(u'RPG', u'http://www.rpg.gildia.pl/rss'),
|
||||
(u'LARP', u'http://www.larp.gildia.pl/rss'),
|
||||
(u'Muzyka', u'http://www.muzyka.gildia.pl/rss'),
|
||||
(u'Nauka', u'http://www.nauka.gildia.pl/rss'),
|
||||
]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
content = soup.find('div', attrs={'class': 'news'})
|
||||
if content is None:
|
||||
return
|
||||
|
||||
words = ('recenzj', 'zapowied', 'fragmen',
|
||||
'relacj', 'wywiad', 'nominacj')
|
||||
document_title = soup.title.renderContents().decode('utf-8').lower()
|
||||
for word in words:
|
||||
if word in document_title:
|
||||
for link in content.findAll(name='a'):
|
||||
if word in link['href'] or (link.string and word in link.string):
|
||||
return self.index_to_soup(link['href'], raw=True)
|
||||
for tag in content.findAll(name='a', href=re.compile('/publicystyka/')):
|
||||
if 'Więcej...' == tag.string:
|
||||
return self.index_to_soup(tag['href'], raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
title = soup.title.renderContents().decode('utf-8').lower()
|
||||
for a in soup('a', href=True):
|
||||
if not a['href'].startswith('http'):
|
||||
if '/gry/' in a['href']:
|
||||
a['href'] = 'http://www.gry.gildia.pl' + a['href']
|
||||
elif u'książk' in title or u'komiks' in title:
|
||||
a['href'] = 'http://www.literatura.gildia.pl' + a['href']
|
||||
elif u'komiks' in title:
|
||||
a['href'] = 'http://www.literatura.gildia.pl' + a['href']
|
||||
else:
|
||||
a['href'] = 'http://www.gildia.pl' + a['href']
|
||||
return soup
|
@ -1,36 +0,0 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
gizmodo.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Gizmodo(BasicNewsRecipe):
|
||||
title = 'Gizmodo'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
||||
publisher = 'gizmodo.com'
|
||||
category = 'news, IT, Internet, gadgets'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
language = 'en'
|
||||
masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/vip?format=xml')]
|
||||
|
||||
remove_tags = [
|
||||
{'class': 'feedflare'},
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,36 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
|
||||
title = u'Glamour (US)'
|
||||
oldest_article = 21
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
__author__ = 'Anonymous'
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
(u'All Fashion',
|
||||
u'http://feeds.glamour.com/glamour/all_fashion'),
|
||||
(u'All Beauty',
|
||||
u'http://feeds.glamour.com/glamour/all_beauty'),
|
||||
(u'All Sex, Love & Life',
|
||||
u'http://feeds.glamour.com/glamour/sex_love_life'),
|
||||
(u'All Health & Fitness',
|
||||
u'http://feeds.glamour.com/glamour/health_fitness'),
|
||||
(u'Slaves to Fashion blog',
|
||||
u'http://feeds.glamour.com/glamour/slavestofashion'),
|
||||
(u'The Girls in the Beauty Department',
|
||||
u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
|
||||
(u'Smitten blog',
|
||||
u'http://feeds.glamour.com/glamour/smitten'),
|
||||
(u'Save the Date',
|
||||
u'http://feeds.feedburner.com/glamour/save-the-date'),
|
||||
(u'Save the Date',
|
||||
u'http://feeds.feedburner.com/glamour/save-the-date'),
|
||||
(u'Vitamin G blog',
|
||||
u'http://feeds.glamour.com/glamour/vitamin-g'),
|
||||
]
|
@ -1,100 +0,0 @@
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class GlennBeckRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
language = 'en'
|
||||
version = 1
|
||||
|
||||
title = u'Glenn Beck'
|
||||
publisher = u'Premiere Radio Networks'
|
||||
category = u'News, Opinion'
|
||||
description = u'The fusion of entertainment and enlightenment'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [(u'Glenn Beck', u'http://feeds.feedburner.com/GlennBeckArticles')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# Their html is horribly broken; if we search for the div that has the content BeatifulSoup returns the div with only the headline and no content.
|
||||
# This is due to illegal nesting of tags. So we do it the hard way.
|
||||
|
||||
# We can find this one, and we don't want it.
|
||||
div = soup.find('div', attrs={'id': 'extraInfo'})
|
||||
if div:
|
||||
div.extract()
|
||||
|
||||
# Don't want these either.
|
||||
iframes = soup.findAll('iframe')
|
||||
[iframe.extract() for iframe in iframes]
|
||||
|
||||
# Get empty document.
|
||||
freshSoup = self.getFreshSoup()
|
||||
|
||||
# This is the broken div; but we can find the headline.
|
||||
newsDiv = soup.find('div', attrs={'class': 'news-detail'})
|
||||
if newsDiv:
|
||||
if newsDiv.h1:
|
||||
freshSoup.body.append(newsDiv.h1)
|
||||
|
||||
# The content is wrapped in <p></p> tags, most of the time anyway.
|
||||
counter = 0
|
||||
for p in soup.findAll('p'):
|
||||
if p.get('class') == 'smalltextwhite':
|
||||
# But we don't want this one.
|
||||
continue
|
||||
|
||||
freshSoup.body.append(p)
|
||||
counter += 1
|
||||
|
||||
# Debugging block
|
||||
|
||||
# In some articles the content is not wrapped in <p></p> tags. In that case the counter is low.
|
||||
# 2 is the magic number that seems to work.
|
||||
if counter <= 2:
|
||||
# So they are playing hard-to-get: first throw out all comments.
|
||||
comments = soup.findAll(
|
||||
text=lambda text: isinstance(text, Comment))
|
||||
[comment.extract() for comment in comments]
|
||||
|
||||
# Find all unwrapped strings.
|
||||
for txt in soup.findAll(text=True):
|
||||
raw = txt.strip()
|
||||
# Debugging line
|
||||
|
||||
if (txt.parent.name == 'body' and len(raw) > 0) and not (len(raw) == 6 and raw == ' '):
|
||||
# This is our content; ignore the rest.
|
||||
para = new_tag(freshSoup, 'p')
|
||||
para.append(raw)
|
||||
freshSoup.body.append(para)
|
||||
counter += 1
|
||||
|
||||
# Now if the counter is still 0 or 1 they did something completely
|
||||
# different and we still have an empty article. In a last attempt,
|
||||
# add the whole content div, just in case.
|
||||
if counter < 2:
|
||||
freshSoup.body.append(newsDiv)
|
||||
|
||||
# Debugging block
|
||||
|
||||
return freshSoup
|
||||
|
||||
def getFreshSoup(self, title=None):
|
||||
if title:
|
||||
return BeautifulSoup('<html><head><title>' + str(title) + '</title></head><body></body></html>')
|
||||
else:
|
||||
return BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
@ -1,45 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GlosWielkopolski(BasicNewsRecipe):
|
||||
title = u'G\u0142os Wielkopolski'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Gazeta Regionalna Głos Wielkopolski. Najnowsze Wiadomości Poznań. Czytaj Informacje Poznań!'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
encoding = 'iso-8859-2'
|
||||
masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gloswielkopolski.png?24'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})
|
||||
remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={'class': 'czytajDalej'}), dict(attrs={
|
||||
'src': 'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href', 'http://www.gloswielkopolski.pl/newsletter/'})]
|
||||
|
||||
feeds = [
|
||||
(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'),
|
||||
(u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'),
|
||||
(u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'),
|
||||
(u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'),
|
||||
(u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'),
|
||||
(u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'),
|
||||
(u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'),
|
||||
(u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'),
|
||||
(u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('artykul', 'drukuj')
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
if 'Advertisement' in soup.title:
|
||||
nexturl = soup.find('a')['href']
|
||||
return self.index_to_soup(nexturl, raw=True)
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'http://www.prasa24.pl/gazeta/glos-wielkopolski/')
|
||||
self.cover_url = soup.find(id='pojemnik').img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
go4it.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Go4ITro(BasicNewsRecipe):
|
||||
title = u'go4it'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Gadgeturi, Lifestyle, Tehnologie'
|
||||
publisher = 'go4it'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Reviste,Ziare,IT'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.go4it.ro/images/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'subTitle clearfix'}), dict(
|
||||
name='div', attrs={'class': 'story'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class': ['data']}), dict(
|
||||
name='a', attrs={'class': ['comments']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds2.feedburner.com/Go4itro-Stiri')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,13 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1325677767(BasicNewsRecipe):
|
||||
title = u'Goal'
|
||||
oldest_article = 1
|
||||
language = 'it'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags_after = [dict(id='article_content')]
|
||||
feeds = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
|
||||
__author__ = 'faber1971'
|
||||
description = 'Sports news from Italy'
|
@ -1,33 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
gofin.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class gofin(BasicNewsRecipe):
|
||||
title = u'Gofin'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Portal Podatkowo-Księgowy'
|
||||
INDEX = 'http://gofin.pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
(u'Podatki', u'http://www.rss.gofin.pl/podatki.xml'),
|
||||
(u'Prawo Pracy', u'http://www.rss.gofin.pl/prawopracy.xml'),
|
||||
(u'Rachunkowo\u015b\u0107', u'http://www.rss.gofin.pl/rachunkowosc.xml'),
|
||||
(u'Sk\u0142adki, zasi\u0142ki, emerytury', u'http://www.rss.gofin.pl/zasilki.xml'),
|
||||
(u'Firma', u'http://www.rss.gofin.pl/firma.xml'),
|
||||
(u'Prawnik radzi', u'http://www.rss.gofin.pl/prawnikradzi.xml')]
|
@ -1,34 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
|
||||
title = u'Good to Know (uk)'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
__author__ = 'Anonymous'
|
||||
language = 'en_GB'
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['articles_footer', 'printoptions']})]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '/print/1'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'),
|
||||
(u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'),
|
||||
(u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'),
|
||||
(u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'),
|
||||
(u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'),
|
||||
(u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'),
|
||||
(u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'),
|
||||
(u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'),
|
||||
]
|
@ -1,13 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class BasicUserRecipe1318572445(BasicNewsRecipe):
|
||||
title = u'Google Mobile Blog'
|
||||
language = 'en'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Grenaa
|
||||
'''
|
||||
|
||||
|
||||
class GrenaaLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Grenaa'
|
||||
description = 'Lokale og regionale nyheder, sport, kultur fra Grenå og omegn på grenaa.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Ugeposten Gribskov
|
||||
'''
|
||||
|
||||
|
||||
class GribskovLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Ugeposten Gribskov'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Gribskov og omegn på gribskov.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,81 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheGrid(BasicNewsRecipe):
|
||||
#: The title to use for the ebook
|
||||
title = u'The Grid'
|
||||
|
||||
#: A couple of lines that describe the content this recipe downloads.
|
||||
#: This will be used primarily in a GUI that presents a list of recipes.
|
||||
description = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
|
||||
'accessible voice for Toronto.')
|
||||
|
||||
#: The author of this recipe
|
||||
__author__ = u'Yusuf W'
|
||||
|
||||
#: The language that the news is in. Must be an ISO-639 code either
|
||||
#: two or three characters long
|
||||
language = 'en_CA'
|
||||
|
||||
#: Publication type
|
||||
#: Set to newspaper, magazine or blog
|
||||
publication_type = 'newspaper'
|
||||
|
||||
#: Convenient flag to disable loading of stylesheets for websites
|
||||
#: that have overly complex stylesheets unsuitable for conversion
|
||||
#: to ebooks formats
|
||||
#: If True stylesheets are not downloaded and processed
|
||||
no_stylesheets = True
|
||||
|
||||
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
|
||||
remove_tags_before = dict(name='div', id='content')
|
||||
remove_tags_after = dict(name='div', id='content')
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'right-content pull-right'}),
|
||||
dict(name='div', attrs={'class': 'right-content'}),
|
||||
dict(name='div', attrs={'class': 'ftr-line'}),
|
||||
dict(name='div', attrs={'class': 'pull-right'}),
|
||||
dict(name='div', id='comments'),
|
||||
dict(name='div', id='tags')
|
||||
]
|
||||
|
||||
#: Keep only the specified tags and their children.
|
||||
# keep_only_tags = [dict(name='div', id='content')]
|
||||
|
||||
cover_margins = (0, 0, '#ffffff')
|
||||
|
||||
INDEX = 'http://www.thegridto.com'
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_url = soup.find(
|
||||
attrs={'class': 'article-block latest-issue'}).find('img')['src']
|
||||
|
||||
return cover_url
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
# Get the latest issue
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
a = soup.find(
|
||||
'div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
|
||||
|
||||
# Parse the index of the latest issue
|
||||
self.INDEX = self.INDEX + a['href']
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
feeds = []
|
||||
for section in ['city', 'life', 'culture']:
|
||||
section_class = 'left-content article-listing ' + section + ' pull-left'
|
||||
div = soup.find(attrs={'class': section_class})
|
||||
|
||||
articles = []
|
||||
for a in div.findAll(attrs={'class': 'post-title'}):
|
||||
title = self.tag_to_string(a)
|
||||
url = a['href']
|
||||
|
||||
articles.append({'title': title, 'url': url,
|
||||
'description': '', 'date': ''})
|
||||
|
||||
feeds.append((section, articles))
|
||||
return feeds
|
@ -1,32 +0,0 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
grrm.livejournal.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class NotABlog(BasicNewsRecipe):
|
||||
title = 'Not A Blog - George R.R. Martin'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'George R.R. Martin'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publication_type = 'blog'
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': 'sf, fantasy, game of thrones', 'publisher': 'George R.R. Martin', 'language': language
|
||||
}
|
||||
|
||||
feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
@ -1,46 +0,0 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
import re
|
||||
import string
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1322322819(BasicNewsRecipe):
|
||||
title = u'GS24.pl (Głos Szczeciński)'
|
||||
description = u'Internetowy serwis Głosu Szczecińskiego'
|
||||
__author__ = u'Michał Szkutnik'
|
||||
__license__ = u'GPL v3'
|
||||
language = 'pl'
|
||||
publisher = 'Media Regionalne sp. z o.o.'
|
||||
category = 'news, szczecin'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
cover_url = "http://www.gs24.pl/images/top_logo.png"
|
||||
|
||||
feeds = [
|
||||
# (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
|
||||
(u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
|
||||
(u'Stargard', u'http://www.gs24.pl/stargard.xml'),
|
||||
(u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
|
||||
(u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
|
||||
(u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
|
||||
(u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
|
||||
(u'Police', u'http://www.gs24.pl/police.xml'),
|
||||
(u'Region', u'http://www.gs24.pl/region.xml'),
|
||||
(u'Sport', u'http://www.gs24.pl/sport.xml'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
|
||||
s = s.group(1)
|
||||
replacements = {"0B": ".", "0C": "/",
|
||||
"0H": ",", "0I": "_", "0D": "?", "0F": "="}
|
||||
for (a, b) in replacements.items():
|
||||
s = string.replace(s, a, b)
|
||||
s = string.replace(s, "0A", "0")
|
||||
return "http://" + s
|
||||
|
||||
def print_version(self, url):
|
||||
return url + "&Template=printpicart"
|
@ -1,61 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
gulfnews.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class GulfNews(BasicNewsRecipe):
|
||||
title = 'Gulf News'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from United Arab Emirrates, persian gulf and rest of the world'
|
||||
publisher = 'Al Nisr Publishing LLC'
|
||||
category = 'news, politics, UAE, world'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://gulfnews.com/media/img/gulf_news_logo.jpg'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
h1{font-family: Georgia, 'Times New Roman', Times, serif}
|
||||
ol,ul{list-style: none}
|
||||
.synopsis{font-size: small}
|
||||
.details{font-size: x-small}
|
||||
.image{font-size: xx-small}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta', 'link', 'object', 'embed']), dict(
|
||||
attrs={'class': ['quickLinks', 'ratings']}), dict(attrs={'id': 'imageSelector'})
|
||||
]
|
||||
remove_attributes = ['lang']
|
||||
keep_only_tags = [
|
||||
dict(name='h1'), dict(
|
||||
attrs={'class': ['synopsis', 'details', 'image', 'article']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'UAE News', u'http://gulfnews.com/cmlink/1.446094'),
|
||||
(u'Business', u'http://gulfnews.com/cmlink/1.446098'),
|
||||
(u'Entertainment', u'http://gulfnews.com/cmlink/1.446095'),
|
||||
(u'Sport', u'http://gulfnews.com/cmlink/1.446096'),
|
||||
(u'Life', u'http://gulfnews.com/cmlink/1.446097')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -1,26 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1259599587(BasicNewsRecipe):
|
||||
title = u'Gulli'
|
||||
description = 'News from Germany'
|
||||
language = 'de'
|
||||
__author__ = 'posativ'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'gulli:news', u'http://ticker.gulli.com/rss/')]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class': ['FloatL', '_forumBox']})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': ['_contentLeft']})]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class': ['_bookmark']})]
|
||||
|
||||
extra_css = '''
|
||||
.byline {color:#666;margin-bottom:0;font-size:12px}
|
||||
.blockquote {color:#030303;font-style:italic;padding-left:15px;}
|
||||
img {align:center;}
|
||||
.li {list-style-type: none}
|
||||
'''
|
@ -1,38 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class H3(BasicNewsRecipe):
|
||||
title = u'H\xedrszerz\u0151'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 50
|
||||
language = 'hu'
|
||||
|
||||
__author__ = 'Ezmegaz'
|
||||
|
||||
feeds = [
|
||||
(u'Belf\xf6ld',
|
||||
u'http://www.hirszerzo.hu/rss.belfold.xml'),
|
||||
(u'K\xfclf\xf6ld',
|
||||
u'http://www.hirszerzo.hu/rss.kulfold.xml'),
|
||||
(u'Profit',
|
||||
u'http://www.hirszerzo.hu/rss.profit.xml'),
|
||||
(u'Shake',
|
||||
u'http://www.hirszerzo.hu/rss.shake.xml'),
|
||||
(u'Publicisztika',
|
||||
u'http://www.hirszerzo.hu/rss.publicisztika.xml'),
|
||||
(u'Elemz\xe9s',
|
||||
u'http://www.hirszerzo.hu/rss.elemzes.xml'),
|
||||
(u'Sorok k\xf6z\xf6tt',
|
||||
u'http://www.hirszerzo.hu/rss.sorok_kozott.xml'),
|
||||
(u'Gal\xe9ria',
|
||||
u'http://www.hirszerzo.hu/rss.galeria.xml'),
|
||||
(u'Patro',
|
||||
u'http://www.hirszerzo.hu/rss.patro.xml')]
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Haderslev
|
||||
'''
|
||||
|
||||
|
||||
class HaderslevLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Haderslev'
|
||||
description = 'Lokale og regionale nyheder, sport, kultur fra Haderslev og omegn på haderslev.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,36 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
Profile to download The Hankyoreh
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Hankyoreh(BasicNewsRecipe):
|
||||
language = 'ko'
|
||||
title = u'한겨례'
|
||||
description = u'The Hankyoreh News articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': ['article-head']}),
|
||||
dict(name='div', attrs={'class': ['article-text']}),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class': ['category']}),
|
||||
]
|
||||
remove_tags_after = dict(id={'ad_box01'})
|
||||
|
||||
feeds = [
|
||||
(u'정치', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_politics.xml'),
|
||||
(u'사회', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_society.xml'),
|
||||
(u'문화', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_culture.xml'),
|
||||
(u'스포츠', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_sports.xml'),
|
||||
(u'사설·칼럼', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_opinion.xml'),
|
||||
(u'만화만평', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_cartoon.xml'),
|
||||
]
|
@ -1,23 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1336289226(BasicNewsRecipe):
|
||||
title = u'Heavy Metal'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
|
||||
feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'entry'})
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class': 'sociable'})
|
||||
]
|
||||
description = 'An Heavy metal Italian magazine'
|
||||
__author__ = 'faber1971'
|
||||
language = 'it'
|
||||
|
||||
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '6, May 2012'
|
@ -1,37 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Anton Gillert <atx at binaryninja.de>'
|
||||
|
||||
'''
|
||||
Fetch Heise Open.
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class HeiseOpenDe(BasicNewsRecipe):
|
||||
|
||||
title = 'Heise Open'
|
||||
description = 'Opensource news from Germany'
|
||||
__author__ = 'Anton Gillert'
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [('Heise Open', 'http://www.heise.de/open/news/news-atom.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?view=print'
|
||||
|
||||
remove_tags = [dict(id='navi_top'),
|
||||
dict(id='navi_bottom'),
|
||||
dict(name='div', attrs={'class': 'navi_top_logo'}),
|
||||
dict(name='img', attrs={
|
||||
'src': '/open/icons/open_logo_2009_weiss.gif'}),
|
||||
dict(name='h5', attrs={'style': 'margin: 0.5em 0;'}),
|
||||
dict(name='p', attrs={'class': 'news_datum'}),
|
||||
dict(name='p', attrs={'class': 'size80'})]
|
||||
remove_tags_after = [dict(name='p', attrs={'class': 'size80'})]
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://www.heise.de/open/icons/open_logo_2009_weiss.gif'
|
@ -1,34 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class AdvancedUserRecipe1298137661(BasicNewsRecipe):
|
||||
title = u'Helsingin Sanomat'
|
||||
__author__ = 'oneillpt'
|
||||
language = 'fi'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
keep_only_tags = [
|
||||
classes('article-title single-article'),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(attrs={'class':['hidden print-url', 'article-paywall']}),
|
||||
dict(style=lambda x: x and 'height: 0' in x),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Uutiset - HS.fi', u'https://www.hs.fi/uutiset/rss/'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll(attrs={'data-mfp-src':True}):
|
||||
tag.name = 'img'
|
||||
tag['src'] = tag['data-mfp-src']
|
||||
tag['style'] = 'display:block'
|
||||
return soup
|
@ -1,22 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(
|
||||
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class HinduHumanRights(BasicNewsRecipe):
|
||||
title = 'Hindu Human Rights'
|
||||
__author__ = 'Vishvas Vasuki'
|
||||
language = 'en_IN'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
('HHR main', 'https://www.hindusforhumanrights.org/en/home?format=rss'),
|
||||
]
|
@ -1,79 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class HNonlineRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'lacike'
|
||||
language = 'sk'
|
||||
version = 1
|
||||
|
||||
title = u'HNonline'
|
||||
publisher = u'HNonline'
|
||||
category = u'News, Newspaper'
|
||||
description = u'News from Slovakia'
|
||||
cover_url = u'http://hnonline.sk/img/sk/_relaunch/logo2.png'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
# Feeds from: http://rss.hnonline.sk, for listing see
|
||||
# http://rss.hnonline.sk/prehlad
|
||||
feeds = []
|
||||
feeds.append((u'HNonline|Ekonomika a firmy',
|
||||
u'http://rss.hnonline.sk/?p=kC1000'))
|
||||
feeds.append((u'HNonline|Slovensko', u'http://rss.hnonline.sk/?p=kC2000'))
|
||||
feeds.append((u'HNonline|Svet', u'http://rss.hnonline.sk/?p=kC3000'))
|
||||
feeds.append((u'HNonline|\u0160port', u'http://rss.hnonline.sk/?p=kC4000'))
|
||||
feeds.append((u'HNonline|Online rozhovor',
|
||||
u'http://rss.hnonline.sk/?p=kCR000'))
|
||||
|
||||
feeds.append((u'FinWeb|Spr\u00E1vy zo sveta financi\u00ED',
|
||||
u'http://rss.finweb.hnonline.sk/spravodajstvo'))
|
||||
feeds.append((u'FinWeb|Koment\u00E1re a anal\u00FDzy',
|
||||
u'http://rss.finweb.hnonline.sk/?p=kPC200'))
|
||||
feeds.append((u'FinWeb|Invest\u00EDcie',
|
||||
u'http://rss.finweb.hnonline.sk/?p=kPC300'))
|
||||
feeds.append((u'FinWeb|Svet akci\u00ED',
|
||||
u'http://rss.finweb.hnonline.sk/?p=kPC400'))
|
||||
feeds.append(
|
||||
(u'FinWeb|Rozhovory', u'http://rss.finweb.hnonline.sk/?p=kPC500'))
|
||||
feeds.append((u'FinWeb|T\u00E9ma t\u00FD\u017Ed\u0148a',
|
||||
u'http://rss.finweb.hnonline.sk/?p=kPC600'))
|
||||
feeds.append((u'FinWeb|Rebr\u00ED\u010Dky',
|
||||
u'http://rss.finweb.hnonline.sk/?p=kPC700'))
|
||||
|
||||
feeds.append((u'HNstyle|Kult\u00FAra',
|
||||
u'http://style.hnonline.sk/?p=kTC100'))
|
||||
feeds.append((u'HNstyle|Auto-moto', u'http://style.hnonline.sk/?p=kTC200'))
|
||||
feeds.append((u'HNstyle|Digit\u00E1l',
|
||||
u'http://style.hnonline.sk/?p=kTC300'))
|
||||
feeds.append((u'HNstyle|Veda', u'http://style.hnonline.sk/?p=kTCV00'))
|
||||
feeds.append((u'HNstyle|Dizajn', u'http://style.hnonline.sk/?p=kTC400'))
|
||||
feeds.append(
|
||||
(u'HNstyle|Cestovanie', u'http://style.hnonline.sk/?p=kTCc00'))
|
||||
feeds.append(
|
||||
(u'HNstyle|V\u00EDkend', u'http://style.hnonline.sk/?p=kTC800'))
|
||||
feeds.append((u'HNstyle|Gastro', u'http://style.hnonline.sk/?p=kTC600'))
|
||||
feeds.append((u'HNstyle|M\u00F3da', u'http://style.hnonline.sk/?p=kTC700'))
|
||||
feeds.append((u'HNstyle|Modern\u00E1 \u017Eena',
|
||||
u'http://style.hnonline.sk/?p=kTCA00'))
|
||||
feeds.append((u'HNstyle|Pre\u010Do nie?!',
|
||||
u'http://style.hnonline.sk/?p=k7C000'))
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='h1', attrs={'class': 'detail-titulek'}))
|
||||
keep_only_tags.append(
|
||||
dict(name='div', attrs={'class': 'detail-podtitulek'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'detail-perex'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'detail-text'}))
|
||||
|
||||
extra_css = '''
|
||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)}
|
||||
body {font-family: sans1, serif1;}
|
||||
'''
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Ugebladet
|
||||
'''
|
||||
|
||||
|
||||
class HoersholmLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Ugebladet'
|
||||
description = 'Lokale, regionale nyheder, sport og kultur i Hørsholm, Rungsted, Fredensborg og Humlebæk på hoersholm.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,93 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '30 June 2012, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__description__ = 'Diario de actualidad, moda y belleza'
|
||||
__version__ = 'v0.03'
|
||||
__date__ = '28, Jul 2016'
|
||||
'''
|
||||
http://www.hola.com/
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class hola_es(BasicNewsRecipe):
|
||||
author = 'desUBIKado'
|
||||
description = 'Diario de actualidad, moda y belleza'
|
||||
title = u'¡Hola!'
|
||||
publisher = 'Hola S.L.'
|
||||
category = 'Spanish celebrities, Entertainment News, Royalty, Daily Variety, Hollywood'
|
||||
language = 'es'
|
||||
masthead_url = 'http://imagenes.hola.com/comunes/2008/logo-holacom.gif'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 7
|
||||
delay = 1
|
||||
encoding = 'utf-8'
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Famosos', u'http://www.hola.com/famosos/rss.xml'),
|
||||
(u'Realeza', u'http://www.hola.com/realeza/rss.xml'),
|
||||
(u'Cine', u'http://www.hola.com/cine/rss.xml'),
|
||||
(u'M\xfasica', u'http://www.hola.com/musica/rss.xml'),
|
||||
(u'Moda y modelos', u'http://www.hola.com/moda/portada/rss.xml'),
|
||||
(u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml'),
|
||||
(u'Ni\xf1os', u'http://www.hola.com/ninos/rss.xml')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article', attrs={'class': ['body col-md-8 col-xs-12']})]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class': ['comments', 'news-share', 'sponsored-news']}),
|
||||
dict(name='div', attrs={'itemprop': ['logo']}),
|
||||
dict(name='span', attrs={'class': ['hidden']}),
|
||||
dict(name='p', attrs={'class': ['hidden']}),
|
||||
dict(name='section', attrs={'class': ['news-tags']})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'comments'})
|
||||
|
||||
# <span>VER GALERÍA<i data-icon="1" class="icon"></i></span>
|
||||
preprocess_regexps = [
|
||||
# Quitar VER GALERÍA
|
||||
(re.compile(r'<span>VER GALER', re.DOTALL | re.IGNORECASE), lambda m: '<!--'),
|
||||
(re.compile(r'class="icon"></i></span>',
|
||||
re.DOTALL | re.IGNORECASE), lambda m: '-->'),
|
||||
# Quitar enlaces varios
|
||||
(re.compile(r'<p><a href="http://www.hola.com',
|
||||
re.DOTALL | re.IGNORECASE), lambda m: '<!--'),
|
||||
(re.compile(r'<p style="text-align: center;">',
|
||||
re.DOTALL | re.IGNORECASE), lambda m: '<!--'),
|
||||
(re.compile(r'<p style="line-height: 20.8px;"><a href="http://www.hola.com',
|
||||
re.DOTALL | re.IGNORECASE), lambda m: '<!--'),
|
||||
(re.compile(r'</strong></a></p>',
|
||||
re.DOTALL | re.IGNORECASE), lambda m: '-->')
|
||||
]
|
||||
|
||||
# Recuperamos la portada de papel (la imagen 520 tiene mayor resolucion)
|
||||
# http://www.hola.com/imagenes/revista/3727/portada-revista-hola-520.jpg
|
||||
def get_cover_url(self):
|
||||
index = 'http://www.hola.com/abono/ediciondigital/'
|
||||
soup = self.index_to_soup(index)
|
||||
for image in soup.findAll('img', src=True):
|
||||
if image['src'].endswith('portada-revista-hola-520.jpg'):
|
||||
return 'http://www.hola.com' + image['src']
|
||||
return None
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = article.get('guid', None)
|
||||
return url
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
|
||||
'''
|
@ -1,55 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.hollywoodreporter.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class THR_En(BasicNewsRecipe):
|
||||
title = 'The Hollywood Reporter'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Read about the latest in Hollywood and entertainment news from The Hollywood Reporter, your source for detailed movie reviews, celebrity styles, and industry blogs.' # noqa
|
||||
publisher = 'The Hollywood Reporter'
|
||||
category = 'Entertainment news, Hollywood news, celebrity news, latest Hollywood news'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = "//div[contains(concat(' ', normalize-space(@class), ' '), ' image ')]"
|
||||
|
||||
extra_css = """
|
||||
body{font-family: Georgia,Times,serif}
|
||||
h1,h2,h3{font-family: "Vonness-Bold-Compressed",Helvetica,sans-serif}
|
||||
.credit,.caption{font-family: Arial,sans-serif;}
|
||||
.credit,.caption,.submitted{font-size: small; color: gray;}
|
||||
.main_media_credit{clear: left; font-size: x-small; text-align: right; color: gray;}
|
||||
img{margin-top: 0.5em; margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Movies', u'http://feeds.feedburner.com/thr/film'),
|
||||
(u'TV', u'http://feeds.feedburner.com/thr/television'),
|
||||
(u'Style&Culture', u'http://feeds.feedburner.com/thr/style'),
|
||||
(u'International', u'http://feeds.feedburner.com/thr/international'),
|
||||
(u'Music', u'http://feeds.feedburner.com/thr/music'),
|
||||
(u'Tech', u'http://feeds.feedburner.com/TheHollywoodReporter-Technology'),
|
||||
(u'Awards', u'http://feeds.feedburner.com/thr/awards'),
|
||||
(u'Business', u'http://feeds.feedburner.com/thr/business'),
|
||||
(u'Asia', u'http://feeds.feedburner.com/HollywoodReporterAsia'),
|
||||
(u'Guilds and Labor', u'http://feeds.feedburner.com/thr/labor'),
|
||||
(u'Box Office', u'http://feeds.feedburner.com/thr/boxoffice'),
|
||||
(u'Real Estate', u'http://feeds.feedburner.com/thr/RealEstate'),
|
||||
(u'Politics', u'http://feeds.feedburner.com/thr/politics')
|
||||
]
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Hornsherred Avis
|
||||
'''
|
||||
|
||||
|
||||
class Hornsherredavis_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Hornsherred Avis'
|
||||
description = 'Lokale nyheder fra Jægerspis, Skibby og Bramsnæs'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
# Feed are found here: http://hornsherredavis.dk/
|
||||
feeds = [
|
||||
('Hornsherred Avis', 'http://hornsherredavis.dk/?feed=rss2'),
|
||||
('Kommentarer til Hornsherred Avis', 'http://hornsherredavis.dk/?feed=comments-rss2'),
|
||||
]
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Lokalavisen Hornsherred
|
||||
'''
|
||||
|
||||
|
||||
class HornsherredLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Lokalavisen Hornsherred'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Hornsherred og omegn på hornsherred.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,41 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
hotcity.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class HotcityRo(BasicNewsRecipe):
|
||||
title = u'Hotcity'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Cultura urban\u0103 feminin\u0103'
|
||||
publisher = 'Hotcity'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.hotcity.ro/i/bg_header.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'articol_title'}), dict(
|
||||
name='div', attrs={'class': 'text'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.hotcity.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,40 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
hotnews.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Hotnews(BasicNewsRecipe):
|
||||
title = 'Hotnews'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'\u0218tiri din Rom\u00e2nia'
|
||||
publisher = 'Hotnews'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Romania'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.hotnews.ro/images/new/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class': 'title'}), dict(
|
||||
name='div', attrs={'id': 'articleContent'})
|
||||
]
|
||||
|
||||
feeds = [(u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate'), (u'English', u'http://www.hotnews.ro/rss/english')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,31 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012-2015, Eddie Lau'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipeHouseNews(BasicNewsRecipe):
|
||||
title = u'The House News Bloggers 主場博客'
|
||||
__author__ = 'Eddie Lau'
|
||||
publisher = 'The House News Bloggers'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'utf-8'
|
||||
description = 'http://thehousenewsbloggers.net'
|
||||
category = 'Chinese, Blogs, Opinion, News, Hong Kong'
|
||||
masthead_url = 'http://thehousenewsbloggers.files.wordpress.com/2014/09/screen-shot-2014-09-11-at-8-55-13.png'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}' # noqa
|
||||
feeds = [(u'Latest', u'http://thehousenewsbloggers.net/feed/')]
|
||||
keep_only_tags = [dict(name='h1', attrs={'class': ['title']}),
|
||||
dict(name='span', attrs={'class': ['author vcard']}),
|
||||
dict(name='time', attrs={'class': ['entry-date']}),
|
||||
dict(name='section', attrs={'class': ['entry']})]
|
||||
remove_tags = [dict(name='div', attrs={'id': ['jp-post-flair']})]
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article, picdiv['src'])
|
@ -1,81 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
www.hrt.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def new_tag(soup, name, attrs=()):
|
||||
impl = getattr(soup, 'new_tag', None)
|
||||
if impl is not None:
|
||||
return impl(name, attrs=dict(attrs))
|
||||
return Tag(soup, name, attrs=attrs or None)
|
||||
|
||||
|
||||
class HRT(BasicNewsRecipe):
|
||||
title = 'HRT: Vesti'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Croatia'
|
||||
publisher = 'HRT'
|
||||
category = 'news, politics, Croatia, HRT'
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'hr'
|
||||
|
||||
lang = 'hr-HR'
|
||||
extra_css = '''@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
body{font-family: serif1, serif} .article_description{font-family: serif1, serif}
|
||||
.news-single-timedata{color:#20558A; font-size:x-small;}
|
||||
.nsTitle{color:#20558A; font-size:large; font-weight:bold;}
|
||||
a{color:#20558A;}
|
||||
.external-link-new-window{color:#20558A;}
|
||||
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'bigVijest'})]
|
||||
|
||||
remove_tags = [dict(name=['object', 'link', 'embed'])]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'nsAuthor'})
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Vijesti', u'http://www.hrt.hr/?id=316&type=100&rss=vijesti'),
|
||||
(u'Sport', u'http://www.hrt.hr/?id=316&type=100&rss=sport'),
|
||||
(u'Zabava', u'http://www.hrt.hr/?id=316&type=100&rss=zabava'),
|
||||
(u'Filmovi i serije', u'http://www.hrt.hr/?id=316&type=100&rss=filmovi'),
|
||||
(u'Dokumentarni program', u'http://www.hrt.hr/?id=316&type=100&rss=dokumentarci'),
|
||||
(u'Glazba', u'http://www.hrt.hr/?id=316&type=100&rss=glazba'),
|
||||
(u'Kultura', u'http://www.hrt.hr/?id=316&type=100&rss=kultura'),
|
||||
(u'Mladi', u'http://www.hrt.hr/?id=316&type=100&rss=mladi'),
|
||||
(u'Manjine', u'http://www.hrt.hr/?id=316&type=100&rss=manjine'),
|
||||
(u'Radio', u'http://www.hrt.hr/?id=316&type=100&rss=radio')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = new_tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
@ -1,127 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import print_function
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class HuffingtonPostRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal and Archana Raman'
|
||||
language = 'en'
|
||||
version = 2
|
||||
|
||||
title = u'The Huffington Post'
|
||||
publisher = u'huffingtonpost.com'
|
||||
category = u'News, Politics'
|
||||
description = u'Political Blog'
|
||||
|
||||
oldest_article = 1.1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
encoding = 'utf-8'
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
keep_only_tags = [
|
||||
classes('entry__header entry__body')
|
||||
]
|
||||
remove_tags = [
|
||||
classes('app-download-interstitial share-bar top-media--video advertisement extra-content'
|
||||
' below-entry entry-inline-subscription-module related-articles')
|
||||
]
|
||||
# Feeds from: http://www.huffingtonpost.com/syndication/
|
||||
feeds = []
|
||||
|
||||
feeds.append(
|
||||
(u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
|
||||
|
||||
feeds.append(
|
||||
(u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
|
||||
|
||||
feeds.append((u'Original Reporting',
|
||||
u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
||||
#title_permalink{color:black;font-size:large;}
|
||||
.date{color:#858585;font-family:"Times New Roman",sans-serif;}
|
||||
.comments_datetime v05{color:#696969;}
|
||||
.teaser_permalink{font-style:italic;font-size:xx-small;}
|
||||
.blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
|
||||
'''
|
||||
# a[href]{color: blue; text-decoration: none; cursor: pointer;}
|
||||
|
||||
def get_article_url(self, article):
|
||||
"""
|
||||
Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
|
||||
article.links contains a list of dictionaries.
|
||||
Todo: refactor to searching this list to avoid the hardcoded zero-index
|
||||
"""
|
||||
link = article.get('link')
|
||||
print("Link:" + link)
|
||||
if not link:
|
||||
links = article.get('links')
|
||||
if links:
|
||||
link = links[0]['href']
|
||||
if not links[0]['href']:
|
||||
link = links[1]['href']
|
||||
|
||||
return link
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for tag in soup.findAll('div', text="What's Your Reaction?"):
|
||||
tag.extract()
|
||||
|
||||
for tg in soup.findAll('blockquote'):
|
||||
tg.extract()
|
||||
|
||||
return soup
|
@ -1,20 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Hvidovre_Avis_dk(BasicNewsRecipe):
|
||||
title = 'Hvidovre avis'
|
||||
language = 'da'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
feeds = [
|
||||
('Lokale nyheder', 'http://hvidovre.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Sport', 'http://hvidovre.lokalavisen.dk/section/senestesportrss'),
|
||||
('112', 'http://hvidovre.lokalavisen.dk/section/seneste112rss'),
|
||||
('Kultur', 'http://hvidovre.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Læserbreve', 'http://hvidovre.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
]
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Hvidovre Avis
|
||||
'''
|
||||
|
||||
|
||||
class HvidovreLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Hvidovre Avis'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Hvidovre på hvidovre.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,25 +0,0 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1293122276(BasicNewsRecipe):
|
||||
title = u'Smarter Planet | Tumblr'
|
||||
__author__ = 'Jack Mason'
|
||||
author = 'IBM Global Business Services'
|
||||
publisher = 'IBM'
|
||||
language = 'en'
|
||||
category = 'news, technology, IT, internet of things, analytics'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.hellercd.com/wp-content/uploads/2010/09/hero.jpg'
|
||||
remove_tags_before = dict(id='item')
|
||||
remove_tags_after = dict(id='item')
|
||||
remove_tags = [dict(attrs={'class': ['sidebar', 'about', 'footer', 'description,' 'disqus', 'nav', 'notes', 'disqus_thread']}),
|
||||
dict(id=['sidebar', 'footer', 'disqus', 'nav', 'notes',
|
||||
'likes_container', 'description', 'disqus_thread', 'about']),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
|
||||
feeds = [(u'Smarter Planet Tumblr',
|
||||
u'http://smarterplanet.tumblr.com/mobile/rss')]
|
Before Width: | Height: | Size: 242 B |
Before Width: | Height: | Size: 783 B |
Before Width: | Height: | Size: 230 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 1.4 KiB |
Before Width: | Height: | Size: 5.4 KiB |
Before Width: | Height: | Size: 238 B |
Before Width: | Height: | Size: 162 B |
Before Width: | Height: | Size: 753 B |
Before Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 640 B |
Before Width: | Height: | Size: 169 B |
Before Width: | Height: | Size: 739 B |