This commit is contained in:
Kovid Goyal 2024-09-28 11:22:49 +05:30
commit 065bafd0c5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
207 changed files with 7 additions and 5067 deletions

View File

@ -1,40 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class f1ultra(BasicNewsRecipe):
title = u'Formuła 1 - F1 ultra'
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
masthead_url = 'http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
remove_empty_feeds = True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
keep_only_tags = [(dict(name='div', attrs={'id': 'main'}))]
remove_tags_after = [
dict(attrs={'style': 'margin-top:5px;margin-bottom:5px;display: inline;'})]
remove_tags = [
(dict(attrs={'class': ['buttonheading', 'avPlayerContainer', 'createdate']}))]
remove_tags.append(dict(attrs={'title': ['PDF', 'Drukuj', 'Email']}))
remove_tags.append(dict(name='form', attrs={'method': 'post'}))
remove_tags.append(dict(name='hr', attrs={'size': '2'}))
preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
(re.compile(r'align="right"'), lambda match: ''),
(re.compile(r'width=\"*\"'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: '')]
extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
img { display: block; clear: both;}
'''
remove_attributes = ['width', 'height', 'position', 'float',
'padding-left', 'padding-right', 'padding', 'text-align']
feeds = [
(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]

View File

@ -1,23 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1301860159(BasicNewsRecipe):
title = u'F-Secure Weblog'
language = 'en'
__author__ = 'louhike'
description = u'All the news from the weblog of F-Secure'
publisher = u'F-Secure'
timefmt = ' [%a, %d %b, %Y]'
encoding = 'ISO-8859-1'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class': 'modSectionTd2'})]
remove_tags = [dict(name='hr')]
feeds = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')]
def get_cover_url(self):
return 'http://www.f-secure.com/weblog/archives/images/company_logo.png'

View File

@ -1,26 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Favrskov Avisen
'''
class FavrskovAvisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Favrskov Avisen'
description = 'Lokale og regionale nyheder'
category = 'newspaper, news, localnews, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Nyheder', 'http://dinby.dk/favrskov-avisen/rss'),
]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Favrskovposten
'''
class FavrskovLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Favrskovposten'
description = 'Lokale og regionale nyheder, sport og kultur fra Favrskov og omegn på favrskov.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,134 +0,0 @@
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
__license__ = 'GPL v3'
__copyright__ = '2008-2011, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
class FazNet(BasicNewsRecipe):
# Version 9.1
# Update 2022-05-29
# Armin Geller
# new page layout
title = 'FAZ.NET'
__author__ = 'Kovid Goyal, Darko Miletic, Armin Geller'
description = 'Frankfurter Allgemeine Zeitung'
publisher = 'Frankfurter Allgemeine Zeitung GmbH'
category = 'news, politics, Germany'
encoding = 'utf-8'
language = 'de'
max_articles_per_feed = 30
no_stylesheets = True
remove_javascript = True
extra_css = '''
.atc-headlineemphasis, h1, h2 {font-size:1.6em; text-align:left}
.atc-HeadlineEmphasisText {font-size:0.6em; text-align:left; display:block; text-transform:uppercase;}
.atc-IntroText {font-size:1em; font-style:italic; font-weight:bold;margin-bottom:1em}
h3 {font-size:1.3em;text-align:left}
h4, h5, h6 {font-size:1em;text-align:left}
.textbox-wide {font-size:1.3em; font-style:italic}
.atc-ImageDescriptionText, .atc-ImageDescriptionCopyright {font-size: 0.75em; font-style:italic; font-weight:normal}
.atc-MetaItem {
font-size:0.6em; font-weight:normal; margin-bottom:0.75em; text-align:left;
list-style-type:none; text-transform:uppercase; display:inline-block}
.aut-Teaser_Avatar {font-size:0.6em; font-weight:bold; margin-bottom:0.75em; text-align:left}
.aut-Teaser_Name {font-size:0.6em; font-weight:bold; margin-bottom:0.75em; float:left; text-align:left}
.aut-Teaser_Description {font-size:0.6em; font-weight: normal; margin-bottom:0.75em; text-align:left; display:block}
.atc-Footer{font-size:0.6em; font-weight: normal; margin-bottom:0.75em; display:block}
'''
keep_only_tags = [dict(name='article', attrs={'class':'atc'}),
dict(name='div', attrs={'id':'FAZContent'})
]
remove_tags_after = [dict(name='article', attrs={'class':'atc'})]
remove_tags = [
dict(name='div', attrs={'class':[
'atc-ContainerSocialMedia',
'atc-ContainerFunctions_Interaction ',
'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium',
'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium ctn-PlaceholderContent-has-centered-content',
'ctn-PlaceholderBox ctn-PlaceholderBox-is-in-article-text-right',
'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-text-left ctn-PlaceholderContent-is-in-article-small',
'aut-Follow aut-Follow-is-small-teaser',
'aut-Follow aut-Follow-is-teaser',
'js-ctn-PaywallTeasers ctn-PaywallTeasers',
'ctn-PaywallInfo_TeaserImageContainer',
'ctn-PaywallInfo_OfferContainer'
]}),
dict(name='aside', attrs={'class':['atc-ContainerMore',
'atc-ContainerMoreOneTeaser'
]}),
dict(name='span', attrs={'class':['data-button',
'o-VisuallyHidden'
]}),
dict(name='a', attrs={'class':'btn-Base_Link'})
]
feeds = [
('FAZ.NET Aktuell', 'http://www.faz.net/aktuell/?rssview=1'),
('Politik', 'http://www.faz.net/aktuell/politik/?rssview=1'),
('Wirtschaft', 'http://www.faz.net/aktuell/wirtschaft/?rssview=1'),
('Feuilleton', 'http://www.faz.net/aktuell/feuilleton/?rssview=1'),
('Sport', 'http://www.faz.net/aktuell/sport/?rssview=1'),
('Lebensstil', 'http://www.faz.net/aktuell/lebensstil/?rssview=1'),
('Gesellschaft', 'http://www.faz.net/aktuell/gesellschaft/?rssview=1'),
('Finanzen', 'http://www.faz.net/aktuell/finanzen/?rssview=1'),
('Technik & Motor', 'http://www.faz.net/aktuell/technik-motor/?rssview=1'),
('Wissen', 'http://www.faz.net/aktuell/wissen/?rssview=1'),
('Reise', 'http://www.faz.net/aktuell/reise/?rssview=1'),
('Beruf & Chance', 'http://www.faz.net/aktuell/beruf-chance/?rssview=1'),
('Rhein-Main', 'http://www.faz.net/aktuell/rhein-main/?rssview=1')
]
# For multipages:
INDEX = ''
def append_page(self, soup, appendtag, position):
pager = soup.find('li',attrs={'class':'nvg-Paginator_Item nvg-Paginator_Item-to-next-page'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('article', attrs={'class':'atc'})
for cls in (
'atc-Header',
'atc-ContainerMore',
'atc-ContainerFunctions_Interaction',
'aut-Follow aut-Follow-is-small-teaser',
'aut-Follow aut-Follow-is-teaser'
):
div = texttag.find(attrs={'class':cls})
if div is not None:
div.extract()
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
pager.extract()
appendtag.insert(position,texttag)
# Find images
def preprocess_html(self, soup):
self.append_page(soup, soup.body, 3)
for img in soup.findAll('img', attrs={'data-retina-src':True}):
img['src'] = img['data-retina-src']
for img in soup.findAll('img', attrs={'data-src':True}):
img['src'] = img['data-src']
return self.adeify_images(soup)
# Some last cleanup
def postprocess_html(self, soup, first_fetch):
for div in soup.findAll('div',attrs={'class':['atc-ContainerFunctions js-som-Abbinder',
'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium'
]}):
div.extract()
return soup

View File

@ -1,21 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1347706704(BasicNewsRecipe):
title = u'FC Knudde'
__author__ = u'DrMerry'
description = u'FC Knudde de populaire sport strip van Toon van Driel (http://www.toonvandriel.nl)'
language = u'nl'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = False
cover_url = 'http://a1.mzstatic.com/us/r1000/035/Purple/be/33/70/mzl.qkvshinq.320x480-75.jpg'
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
remove_tags_before = dict(id='title')
remove_tags_after = dict(attrs={'class': 'entry-content rich-content'})
use_embedded_content = True
extra_css = 'img{border:0;padding:0;margin:0;width:100%}'
feeds = [(u'FC Knudde', u'http://www.nusport.nl/feeds/rss/fc-knudde.rss')]

View File

@ -1,47 +0,0 @@
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class FDBPl(BasicNewsRecipe):
title = u'Fdb.pl'
__author__ = 'fenuks'
description = u'Wiadomości ze świata filmu, baza danych filmowych, recenzje, zwiastuny, boxoffice.'
category = 'film'
language = 'pl'
extra_css = '.options-left > li {display: inline;} em {display: block;}'
cover_url = 'https://i1.fdbimg.pl/hygg2xp1/480x300_magq39.jpg'
use_embedded_content = False
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
remove_javascript = True
remove_attributes = ['style', 'font']
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(attrs={'class': ['row justify-content-center', 'figure']})]
remove_tags = [
dict(attrs={'class': ['news-footer infinite-scroll-breakepoit', 'list-inline text-muted m-0']})]
feeds = []
def parse_index(self):
feeds = []
feeds.append((u'Wiadomości', self.get_articles(
'https://fdb.pl/wiadomosci?page={0}', 2)))
return feeds
def get_articles(self, url, pages=1):
articles = []
for nr in range(1, pages + 1):
soup = self.index_to_soup(url.format(nr))
for tag in soup.findAll(attrs={'class': 'col-xs-6 col-sm-4 col-md-4 col-lg-3'}):
node = tag.find('h5')
title = node.a.string
url = node.a['href']
date = ''
articles.append({'title': title,
'url': url,
'date': date,
'description': ''
})
return articles

View File

@ -1,75 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
financialexpress.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class FE_India(BasicNewsRecipe):
title = 'The Financial Express'
__author__ = 'Darko Miletic'
description = 'Financial news from India'
publisher = 'The Indian Express Limited'
category = 'news, politics, finances, India'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en_IN'
remove_empty_feeds = True
ignore_duplicate_articles = {'url'}
publication_type = 'magazine'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = [classes('wp-block-post-title wp-block-post-excerpt ie-network-post-meta-wrapper wp-block-post-featured-image wp-block-post-content')]
remove_tags = [classes('parent_also_read')]
remove_attributes = ['width', 'height']
feeds = [
# https://www.financialexpress.com/syndication/
# Print feeds
('Front Page','https://www.financialexpress.com/print/front-page/feed/'),
('Corporate Markets','https://www.financialexpress.com/print/corporate-markets/feed/'),
('Economy','https://www.financialexpress.com/print/economy-print/feed/'),
('Opinion','https://www.financialexpress.com/print/edits-columns/feed/'),
('personal Finance','https://www.financialexpress.com/print/personal-finance-print/feed/'),
# ('Brandwagon', 'https://www.financialexpress.com/print/brandwagon/feed/'),
# Other Feeds
('Economy', 'https://www.financialexpress.com/economy/feed/'),
('Banking & finance', 'https://www.financialexpress.com/industry/banking-finance/feed/'),
('Opinion', 'https://www.financialexpress.com/opinion/feed/'),
('Editorial', 'https://www.financialexpress.com/editorial/feed/'),
('Budget', 'https://www.financialexpress.com/budget/feed/'),
('Industry', 'https://www.financialexpress.com/industry/feed/'),
('Market', 'https://www.financialexpress.com/market/feed/'),
('Jobs', 'https://www.financialexpress.com/jobs/feed/'),
('SME', 'https://www.financialexpress.com/industry/sme/feed/'),
('Mutual Funds', 'https://www.financialexpress.com/money/mutual-funds/feed/'),
('Health','https://www.financialexpress.com/lifestyle/health/feed'),
# ('Health Care','https://www.financialexpress.com/healthcare/feed'),
('Science','https://www.financialexpress.com/lifestyle/science/feed'),
('Infrastructure','https://www.financialexpress.com/infrastructure/feed'),
('Money','https://www.financialexpress.com/money/feed'),
]
def get_cover_url(self):
soup = self.index_to_soup('https://www.magzter.com/IN/The-Indian-Express-Ltd./Financial-Express-Mumbai/Business/')
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
return citem['content']
def preprocess_html(self, soup, *a):
for img in soup.findAll(attrs={'data-src': True}):
img['src'] = img['data-src']
return soup

View File

@ -1,46 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
revistafelicia.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Felicia(BasicNewsRecipe):
title = u'Revista Felicia'
__author__ = u'Silviu Cotoar\u0103'
description = u'O revist\u0103 pentru sufletul t\u0103u'
publisher = u'Revista Felicia'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste'
encoding = 'utf-8'
cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'class': 'header'}), dict(
name='div', attrs={'id': 'contentArticol'})
]
remove_tags = [
dict(name='img', attrs={'src': ['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']}), dict(
name='div', attrs={'class': ['content']})
]
feeds = [
(u'Feeds', u'http://www.revistafelicia.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,45 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
title = u'FHM UK'
description = 'Good News for Men.'
cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
__author__ = 'Dave Asbury'
# last updated 7/10/12
language = 'en_GB'
oldest_article = 31
max_articles_per_feed = 15
remove_empty_feeds = True
no_stylesheets = True
keep_only_tags = [
dict(name='h1'),
dict(name='img', attrs={'id': 'ctl00_Body_imgMainImage'}),
dict(name='div', attrs={
'id': ['profileLeft', 'articleLeft', 'profileRight', 'profileBody']}),
dict(name='div', attrs={
'class': ['imagesCenterArticle', 'containerCenterArticle', 'articleBody', ]}),
]
remove_tags = [
dict(attrs={'id': ['ctl00_Body_divSlideShow']}),
]
feeds = [
# repeatable search = </div>{|}<a href="{%}"
# class="{*}">{%}</a>{|}<p>{*}</p>
(u'Homepage', u'http://rss.feedsportal.com/c/375/f/434908/index.rss'),
(u'Funny', u'http://rss.feedsportal.com/c/375/f/434910/index.rss'),
(u'Girls', u'http://rss.feedsportal.com/c/375/f/434913/index.rss'),
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -1,51 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
fhm.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class FHMro(BasicNewsRecipe):
title = u'FHM Ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Pentru c\u0103 noi putem'
publisher = 'FHM'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Reviste'
encoding = 'utf-8'
cover_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'class': 'contentMainTitle'}), dict(
name='div', attrs={'class': 'entry'})
]
remove_tags_after = [
dict(name='div', attrs={'class': ['ratingblock ']}), dict(
name='a', attrs={'rel': ['tag']})
]
remove_tags = [
dict(name='div', attrs={'class': ['ratingblock ']}), dict(
name='div', attrs={'class': ['socialize-containter']})
]
feeds = [
(u'Feeds', u'http://www.fhm.ro/feed')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,30 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe, classes
class fiftytwo(BasicNewsRecipe):
title = u'Fifty Two'
description = ('Every week, 52 publishes an essay that dives deep into an aspect of Indias history,'
' politics and culture. Each story will explain, recall or establish something interesting '
'about life on our subcontinent, and tell readers why it matters to them.')
language = 'en_IN'
__author__ = 'unkn0wn'
oldest_article = 30 # days
max_articles_per_feed = 50
encoding = 'utf-8'
use_embedded_content = False
no_stylesheets = True
remove_attributes = ['style', 'height', 'width']
masthead_url = 'https://fiftytwo.in//img/52-logo.png'
# https://fiftytwo.in/img/favicon.png
ignore_duplicate_articles = {'url'}
extra_css = '.story-info, .story-notes, .story-intro {font-size:small; font-style:italic;}'
keep_only_tags = [
classes(
'story-banner__container story-info story-slices story-notes'
),
]
feeds = [
('Articles', 'https://fiftytwo.in/feed.xml'),
]

View File

@ -1,27 +0,0 @@
__license__ = 'GPL v3'
__author__ = 'faber1971'
description = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324112023(BasicNewsRecipe):
title = u'Fisco Oggi'
language = 'it'
__author__ = 'faber1971'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
remove_javascript = True
no_stylesheets = True
feeds = [
(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'),
(u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'),
(u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'),
(u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'),
(u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'),
(u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'),
(u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'),
(u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]

View File

@ -1,39 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
fleshbot.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Fleshbot(BasicNewsRecipe):
title = 'Fleshbot'
__author__ = 'NA'
description = "Fleshbot, Pure Filth."
publisher = 'Fleshbot.com'
category = 'news, sex, sex industry, celebs, nudes, adult, adult toys, sex toys'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = True
language = 'en'
masthead_url = 'http://fbassets.s3.amazonaws.com/images/uploads/2012/01/fleshbot-logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
feeds = [(u'Articles', u'http://fleshbot.com/?feed=rss2')]
remove_tags = [
{'class': 'feedflare'},
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,27 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Folkebladet
'''
class Folkebladet_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Folkebladet'
description = 'Dine lokale nyheder på nettet'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Folkebladet', 'http://folkebladet.dk/feed/'),
('Kommentarer', 'http://folkebladet.dk/comments/feed/'),
]

View File

@ -1,26 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Folkebladet Djursland
'''
class FolkebladetDjursland_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Folkebladet Djursland'
description = 'Lokale og regionale nyheder'
category = 'newspaper, news, localnews, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Nyheder', 'http://dinby.dk/folkebladet-djursland/rss'),
]

View File

@ -1,27 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
folketidende.dk
'''
class Folketidende_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'folketidende.dk'
description = 'Lokalt nyhedssite, med nyheder og lokalstof om Lolland og Falster'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('folketidende.dk - Lolland &amp; Falster samlet på et sted', 'http://folketidende.dk/rss-nyhedsbrev.xml'),
]

View File

@ -1,67 +0,0 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
class Forbes(BasicNewsRecipe):
title = u'Forbes'
description = 'Business and Financial News'
__author__ = 'Kovid Goyal'
oldest_article = 30
max_articles_per_feed = 20
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = True
extra_css = '''
div.fb-captioned-img {
font-size: smaller;
margin-top: 1em; margin-bottom: 1em;
}
div.fb-captioned-img img {
display:block;
margin-left: auto; margin-right: auto;
}
'''
feeds = [
(u'Latest', u'https://www.forbes.com/news/index.xml'),
(u'Most Popular', u'https://www.forbes.com/feeds/popstories.xml'),
(u'Technology', u'https://www.forbes.com/technology/index.xml'),
(u'Business', u'https://www.forbes.com/business/index.xml'),
(u'Sports Money', u'https://www.forbes.com/sportsmoney/index.xml'),
(u'Leadership', u'https://www.forbes.com/leadership/index.xml'),
]
keep_only_tags = [
classes('article-headline-container hero-image-block article-body bottom-contrib-block')
]
remove_tags = [
classes('article-sharing'),
dict(name='button'),
]
def preprocess_html(self, soup):
h = soup.find(**classes('hero-image-block'))
if h is not None:
h1 = soup.find(**classes('article-headline-container'))
h.extract()
h1.append(h)
return soup
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.set_cookie('dailyWelcomeCookie', 'true', '.forbes.com')
br.set_cookie('welcomeAd', 'true', '.forbes.com')
return br
# def parse_index(self):
# return [('Articles', [{'title':'Test', 'url':
# 'http://www.forbes.com/sites/hamdiraini/2016/04/25/bazin-seeks-startups-to-accelerate-accorhotels-transformation/'}])]

View File

@ -1,55 +0,0 @@
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1276934715(BasicNewsRecipe):
title = u'Forbes India'
__author__ = 'rty'
description = 'India Edition Forbes'
publisher = 'Forbes India'
category = 'Business News, Economy, India'
oldest_article = 7
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en_IN'
temp_files = []
articles_are_obfuscated = True
conversion_options = {'linearize_tables': True}
feeds = [
(u'Contents', u'http://business.in.com/rssfeed/rss_all.xml'),
]
extra_css = '''
.t-10-gy-l{font-style: italic; font-size: small}
.t-30-b-d{font-weight: bold; font-size: xx-large}
.t-16-gy-l{font-weight: bold; font-size: x-large; font-syle: italic}
.storycontent{font-size: 4px;font-family: Times New Roman;}
'''
remove_tags_before = dict(name='div', attrs={'class': 'pdl10 pdr15'})
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
response = br.follow_link(url_regex=r'/printcontent/[0-9]+', nr=0)
html = response.read()
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
def get_cover_url(self):
index = 'http://business.in.com/magazine/'
soup = self.index_to_soup(index)
for image in soup.findAll('a', {"class": "lbOn a-9-b-d"}):
return image['href']
# return image['href'] + '.jpg'
return None
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup

View File

@ -1,58 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
import datetime
import re
from calibre.web.feeds.news import BasicNewsRecipe
class forbes_pl(BasicNewsRecipe):
title = u'Forbes.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.'
oldest_article = 1
index = 'http://www.forbes.pl'
cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
max_articles_per_feed = 100
extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
preprocess_regexps = [(re.compile(u'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL),
lambda match: ''), (re.compile(u'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
remove_javascript = True
no_stylesheets = True
now = datetime.datetime.now()
yesterday = now - datetime.timedelta(hours=24)
yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S")
pages_count = 4
keep_only_tags = [dict(attrs={'class': [
'Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
remove_tags = [dict(attrs={'class': [
'Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
'''def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup
def append_page(self, soup, appendtag):
cleanup = False
nexturl = appendtag.find('a', attrs={'class':'next'})
if nexturl:
cleanup = True
while nexturl:
soup2 = self.index_to_soup(self.index + nexturl['href'])
nexturl = soup2.find('a', attrs={'class':'next'})
pagetext = soup2.findAll(id='article-body-wrapper')
if not pagetext:
pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'})
for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)):
comment.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
if cleanup:
for r in appendtag.findAll(attrs={'class':'paginator'}):
r.extract()'''

View File

@ -1,48 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
formula-as.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class FormulaAS(BasicNewsRecipe):
title = u'Formula AS'
__author__ = u'Silviu Cotoar\u0103'
publisher = u'Formula AS'
description = u'Formula AS'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Romania'
encoding = 'utf-8'
cover_url = 'http://www.formula-as.ro/_client/img/header_logo.png'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'class': 'item padded'})
]
remove_tags = [
dict(name='ul', attrs={'class': 'subtitle lower'})
]
remove_tags_after = [
dict(name='ul', attrs={'class': 'subtitle lower'}),
dict(name='div', attrs={'class': 'item-brief-options'})
]
feeds = [
(u'\u0218tiri', u'http://www.formula-as.ro/rss/articole.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,51 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class ForsalPL(BasicNewsRecipe):
title = u'Forsal.pl'
__author__ = 'fenuks'
description = u'Na portalu finansowym Forsal.pl znajdziesz najświeższe wiadomości finansowe i analizy. Kliknij i poznaj aktualne kursy walut, notowania giełdowe oraz inne wiadomości ze świata finansów.' # noqa
category = 'economy, finance'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
cover_url = 'http://www.bizneswnieruchomosciach.pl/wp-content/uploads/2010/07/logo_forsal.jpg'
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class': 'related'}), dict(
name='img', attrs={'title': 'Forsal'})]
feeds = [
(u'Najnowsze', u'http://forsal.pl/atom/najnowsze'),
(u'Tylko na forsal.pl', u'http://forsal.pl/atom/tagi/forsal'),
(u'Publicystyka', u'http://forsal.pl/atom/tagi/opinia'),
(u'Bloomberg', u'http://forsal.pl/atom/tagi/bloomberg'),
(u'Financial Times', u'http://forsal.pl/atom/tagi/financial_times'),
(u'Gie\u0142da', u'http://forsal.pl/atom/tagi/gielda'),
(u'Waluty', u'http://forsal.pl/atom/tagi/waluty'),
(u'Surowce', u'http://forsal.pl/atom/tagi/surowce'),
(u'Komenarze finasnowe', u'http://forsal.pl/atom/tagi/komentarz'),
(u'Komentarze gie\u0142dowe', u'http://forsal.pl/atom/tagi/komentarz;gielda'),
(u'Komentarze walutowe', u'http://forsal.pl/atom/tagi/komentarz;waluty'),
(u'Makroekonomia', u'http://forsal.pl/atom/tagi/makroekonomia'),
(u'Handel', u'http://forsal.pl/atom/tagi/handel'),
(u'Nieruchomo\u015bci', u'http://forsal.pl/atom/tagi/nieruchomosci'),
(u'Motoryzacja', u'http://forsal.pl/atom/tagi/motoryzacja'),
(u'Finanse', u'http://forsal.pl/atom/tagi/finanse'),
(u'Transport', u'http://forsal.pl/atom/tagi/transport'),
(u'Media', u'http://forsal.pl/atom/tagi/media'),
(u'Telekomunikacja', u'http://forsal.pl/atom/tagi/telekomunikacja'),
(u'Energetyka', u'http://forsal.pl/atom/tagi/energetyka'),
(u'Przemys\u0142', u'http://forsal.pl/atom/tagi/przemysl'),
(u'Moja firma', u'http://forsal.pl/atom/tagi/moja_firma')]
def print_version(self, url):
url_id = re.search(u'/[0-9]+,', url)
if url_id:
return 'http://forsal.pl/drukowanie' + url_id.group(0)[:-1]
else:
return url

View File

@ -1,18 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Fotoblogia_pl(BasicNewsRecipe):
title = u'Fotoblogia.pl'
__author__ = 'fenuks'
description = u'Jeden z największych polskich blogów o fotografii.'
category = 'photography'
language = 'pl'
masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg'
cover_url = 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
keep_only_tags = [dict(name='article')]
remove_tags = [dict(attrs={'class': 'article-related'})]
feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')]

View File

@ -1,75 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Christian Schmitt'
'''
fr-online.de
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class FROnlineRecipe(BasicNewsRecipe):
title = 'Frankfurter Rundschau'
__author__ = 'maccs'
description = 'Nachrichten aus D und aller Welt'
encoding = 'utf-8'
masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
category = 'news, germany, world'
language = 'de'
publication_type = 'newspaper'
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
oldest_article = 1 # Increase this number if you're interested in older articles
max_articles_per_feed = 50 # Seems a reasonable number to me
extra_css = '''
body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
.imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
.p--heading-1 {font-weight: bold;}
.calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
'''
remove_tags = [dict(name='div', attrs={'id': 'Logo'})]
cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
cover_margins = (100, 150, '#ffffff')
feeds = []
feeds.append(
('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
feeds.append(
('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
feeds.append(
('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
feeds.append(
('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
feeds.append(
('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
feeds.append(('Eintracht Frankfurt',
u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
feeds.append(('Kultur und Medien',
u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
feeds.append(
('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
feeds.append(
('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
feeds.append(
('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
feeds.append(
('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
feeds.append(
('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
feeds.append(
('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
feeds.append(
('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
feeds.append(
('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
feeds.append(
('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
feeds.append(
('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))
def print_version(self, url):
return url.replace('index.html', 'view/printVersion/-/index.html')

View File

@ -1,59 +0,0 @@
#!/usr/bin/env python
'''
fr-online.de
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class FR(BasicNewsRecipe):
title = 'Frankfurter Rundschau'
__author__ = 'Kovid Goyal'
description = 'Nachrichten aus D und aller Welt'
language = 'de'
publication_type = 'newspaper'
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
oldest_article = 1 # Increase this number if you're interested in older articles
max_articles_per_feed = 50 # Seems a reasonable number to me
encoding = 'cp1252'
keep_only_tags = [
dict(id='fcms_page_main'),
]
remove_tags = [
dict(name='footer'),
dict(id='comments'),
]
feeds = [
('Startseite', u'http://www.fr.de/?_XML=rss'),
('Frankfurt', u'https://www.fr.de/frankfurt/?_XML=rss'),
('Rhein-Main', 'https://www.fr.de/rhein-main/?_XML=rss'),
('Politik', 'https://www.fr.de/politik/?_XML=rss'),
('Wirtschaft', 'https://www.fr.de/wirtschaft/?_XML=rss'),
('Sport', 'https://www.fr.de/sport/?_XML=rss'),
('Eintracht Frankfurt', 'https://www.fr.de/sport/eintracht/?_XML=rss'),
('Kultur', 'https://www.fr.de/kultur/?_XML=rss'),
('Wissen', 'https://www.fr.de/wissen/?_XML=rss'),
('Leben', 'https://www.fr.de/leben/?_XML=rss'),
('Panorama', 'https://www.fr.de/panorama/?_XML=rss'),
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
main = soup.find(id='fcms_page_main')
for i, tag in tuple(enumerate(main)):
if getattr(tag, 'name', None):
main.replaceWith(tag)
break
return soup

View File

@ -1,35 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2011, Starson17'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class Freakonomics(BasicNewsRecipe):
title = 'Freakonomics Blog'
description = 'The Hidden side of everything'
__author__ = 'Starson17'
__version__ = '1.02'
__date__ = '11 July 2011'
language = 'en'
cover_url = 'http://ilkerugur.files.wordpress.com/2009/04/freakonomics.jpg'
use_embedded_content = False
no_stylesheets = True
oldest_article = 30
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 50
feeds = [(u'Freakonomics Blog', u'http://www.freakonomics.com/feed/')]
keep_only_tags = [dict(name='div', attrs={'id': ['content']})]
remove_tags_after = [
dict(name='div', attrs={'class': ['simple_socialmedia']})]
remove_tags = [dict(name='div', attrs={
'class': ['simple_socialmedia', 'single-fb-share', 'wp-polls']})]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Uge-Nyt
'''
class FredensborgLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Uge-Nyt'
description = 'Uge-Nyt: Lokale og regionale nyheder, sport og kultur fra Fredensborg, Kokkedal og Humlebæk på fredensborg.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Fredericia
'''
class FredericiaLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Fredericia'
description = 'Lokale og regionale nyheder, sport, kultur fra Fredericia og omegn på fredericia.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,28 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Frederiksberg Bladet
'''
class FrederiksbergBladet_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Frederiksberg Bladet'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Frederiksberg Bladet', 'http://minby.dk/frederiksberg-bladet/feed/'),
('Kommentarer til Frederiksberg Bladet', 'http://minby.dk/frederiksberg-bladet/comments/feed/'),
]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Frederikssund
'''
class FrederikssundLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Frederikssund'
description = 'Lokale, regionale nyheder, sport og kultur fra Frederikssund, Jægerspris og omegn på frederikssund.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,92 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://freeway.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'freeway.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Revista Freeway, Montevideo, Uruguay'
language = 'es_UY'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 1
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
conversion_options = {'linearize_tables': True}
oldest_article = 180
max_articles_per_feed = 100
keep_only_tags = [
dict(id=['contenido']),
dict(name='a', attrs={'class': 'titulo_art_ppal'}),
dict(name='img', attrs={'class': 'recuadro'}),
dict(name='td', attrs={'class': 'txt_art_ppal'})
]
remove_tags = [
dict(name=['object', 'link'])
]
remove_attributes = ['width', 'height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
img {float:left; clear:both; margin:10px}
p {font-family:Arial,Helvetica,sans-serif;}
'''
def parse_index(self):
feeds = []
for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
articles = self.art_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
def art_parse_section(self, url):
soup = self.index_to_soup(url)
div = soup.find(attrs={'id': 'tbl_1'})
current_articles = []
for tag in div.findAllNext(attrs={'class': 'ancho_articulos'}):
if tag.get('class') == 'link-list-heading':
break
for td in tag.findAll('td'):
a = td.find('a', attrs={'class': 'titulo_articulos'})
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://freeway.com.uy' + url
p = td.find('p', attrs={'class': 'txt_articulos'})
description = self.tag_to_string(p)
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
self.log('\t\t\t', description)
current_articles.append(
{'title': title, 'url': url, 'description': description, 'date': ''})
return current_articles
def preprocess_html(self, soup):
attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' ] # noqa
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
item.name = 'div'
for attrib in attribs:
item[attrib] = ''
del item[attrib]
return soup
def get_cover_url(self):
return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'

View File

@ -1,75 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class FIELDSTREAM(BasicNewsRecipe):
title = 'Field and Stream'
__author__ = 'Starson17 and Tonythebookworm'
description = 'Hunting and Fishing and Gun Talk'
language = 'en'
no_stylesheets = True
publisher = 'Starson17 and Tonythebookworm'
category = 'food recipes, hunting, fishing, guns'
use_embedded_content = False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
cover_url = 'http://www.arrowheadflyangler.com/Portals/1/Articles/FieldStream/Field%20and%20Stream%20March%20Fishing%20Edition%20Article%20Cover.jpg' # noqa
max_articles_per_feed = 10
INDEX = 'http://www.fieldandstream.com'
keep_only_tags = [
dict(name='div', attrs={'class': ['article-wrapper']}),
]
remove_tags = [
dict(name='div', attrs={
'class': lambda x: x and 'content-main-bottom' in x.split()}),
dict(name='div', attrs={
'class': lambda x: x and 'pw-widget' in x.split()}),
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
for form in soup.findAll('form'):
form.parent.extract()
return soup
def parse_index(self):
feeds = []
num = self.test[0] if self.test else 100
for title, url in [
('Field Test', 'http://www.fieldandstream.com/blogs/field-test'),
(u"Wild Chef", u"http://www.fieldandstream.com/blogs/wild-chef"),
(u"The Gun Nuts", u"http://www.fieldandstream.com/blogs/gun-nut"),
(u"Whitetail 365", u"http://www.fieldandstream.com/blogs/whitetail-365"),
('Field Notes', 'http://www.fieldandstream.com/blogs/field-notes'),
(u"Fly Talk", u"http://www.fieldandstream.com/blogs/flytalk"),
(u"The Conservationist",
u"http://www.fieldandstream.com/blogs/conservationist"),
('The Lateral Line', 'http://www.fieldandstream.com/blogs/lateral-line'),
('Total Outdoorsman',
'http://www.fieldandstream.com/blogs/total-outdoorsman'),
('A Sportsman\'s Life',
'http://www.fieldandstream.com/blogs/a-sportsmans-life'),
]:
self.log('Section:', title)
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
if len(feeds) > num:
break
return feeds
def make_links(self, url):
current_articles = []
soup = self.index_to_soup(url)
for item in soup.findAll('h2'):
link = item.find('a')
if link:
url = self.INDEX + link['href']
title = self.tag_to_string(link)
self.log('\t', title, 'at', url)
current_articles.append(
{'title': title, 'url': url, 'description': '', 'date': ''})
return current_articles

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Furesø Avis
'''
class FuresoeLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Furesø Avis'
description = 'Lokale og regionale nyheder, sport og kultur fra Farum, Værløse og Furesø på furesoe.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,39 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
gamasutra.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gamasutra(BasicNewsRecipe):
title = 'Gamasutra Featured articles'
__author__ = 'Darko Miletic'
description = 'The Art and Business of Making Games'
publisher = 'Gamasutra'
category = 'news, games, IT'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
}
remove_tags_before = dict(name="div", attrs={'class': 'page_item'})
remove_tags = [
dict(name='meta'), dict(name='link'), dict(name='hr'), dict(name='div', attrs={'class': 'hide-phone'}), dict(name='div', attrs={'class': 'nav_links'}),
dict(name='div', attrs={'class': 'superfooter'}), dict(name='span', attrs={'class': 'comment_text'}), dict(name='a', attrs={'type': 'button'})
]
remove_attributes = ['width', 'height', 'name']
feeds = [
(u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')]
def print_version(self, url):
return url.partition('?')[0] + '?print=1'

View File

@ -1,40 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
gamasutra.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gamasutra(BasicNewsRecipe):
title = 'Gamasutra News'
__author__ = 'Darko Miletic'
description = 'The Art and Business of Making Games'
publisher = 'Gamasutra'
category = 'news, games, IT'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
}
remove_tags_before = dict(name="div", attrs={'class': 'page_item'})
remove_tags = [
dict(name='meta'), dict(name='link'),
dict(name='hr'), dict(name='div', attrs={'class': 'hide-phone'}),
dict(name='div', attrs={'class': 'nav_links'}), dict(name='div', attrs={'class': 'superfooter'}),
dict(name='span', attrs={'class': 'comment_text'}), dict(name='a', attrs={'type': 'button'})
]
remove_attributes = ['width', 'height', 'name']
feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')]
def print_version(self, url):
return url.partition('?')[0] + '?print=1'

View File

@ -1,47 +0,0 @@
__license__ = 'GPL v3'
__author__ = u'Marc Toensing'
from calibre.web.feeds.news import BasicNewsRecipe
class GamespotCom(BasicNewsRecipe):
title = u'Gamespot.com Reviews'
description = 'review articles from gamespot.com'
language = 'en'
__author__ = u'Marc T\xf6nsing'
oldest_article = 7
max_articles_per_feed = 40
remove_empty_feeds = True
no_stylesheets = True
no_javascript = True
feeds = [
('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
('XBOX 360 Reviews',
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
('PlayStation 3 Reviews',
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1028'),
('PlayStation 2 Reviews',
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=7'),
('PlayStation Portable Reviews',
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1024'),
('Nintendo DS Reviews',
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1026'),
('iPhone Reviews',
'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1049'),
]
remove_tags = [
dict(name='div', attrs={'class': 'top_bar'}),
dict(name='div', attrs={'class': 'video_embed'})
]
def get_cover_url(self):
return 'http://image.gamespotcdn.net/gamespot/shared/gs5/gslogo_bw.gif'
def get_article_url(self, article):
return article.get('link') + '?print=1'

View File

@ -1,45 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
gandul.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gandul(BasicNewsRecipe):
title = u'G\u00E2ndul'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Gandul'
description = 'Cotidian Online'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'class': 'article'})
]
remove_tags = [
dict(name='a', attrs={'class': 'photo'}), dict(
name='div', attrs={'class': 'ad'})
]
feeds = [
(u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,69 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaLubuska(BasicNewsRecipe):
title = u'Gazeta Lubuska'
__author__ = 'fenuks'
description = u'Gazeta Lubuska - portal regionalny województwa lubuskiego.'
category = 'newspaper'
language = 'pl'
encoding = 'iso-8859-2'
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
INDEX = 'http://www.gazetalubuska.pl'
masthead_url = INDEX + '/images/top_logo.png'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
feeds = [
(u'Wszystkie', u'http://www.gazetalubuska.pl/rss.xml'),
(u'Dreznenko', u'http://www.gazetalubuska.pl/drezdenko.xml'),
(u'G\u0142og\xf3w', u'http://www.gazetalubuska.pl/glogow.xml'),
(u'Gorz\xf3w Wielkopolski', u'http://www.gazetalubuska.pl/gorzow-wielkopolski.xml'),
(u'Gubin', u'http://www.gazetalubuska.pl/gubin.xml'),
(u'Kostrzyn', u'http://www.gazetalubuska.pl/kostrzyn.xml'),
(u'Krosno Odrza\u0144skie', u'http://www.gazetalubuska.pl/krosno-odrzanskie.xml'),
(u'Lubsko', u'http://www.gazetalubuska.pl/lubsko.xml'),
(u'Mi\u0119dzych\xf3d', u'http://www.gazetalubuska.pl/miedzychod.xml'),
(u'Mi\u0119dzyrzecz', u'http://www.gazetalubuska.pl/miedzyrzecz.xml'),
(u'Nowa S\xf3l', u'http://www.gazetalubuska.pl/nowa-sol.xml'),
(u'S\u0142ubice', u'http://www.gazetalubuska.pl/slubice.xml'),
(u'Strzelce Kraje\u0144skie', u'http://www.gazetalubuska.pl/strzelce-krajenskie.xml'),
(u'Sulech\xf3w', u'http://www.gazetalubuska.pl/sulechow.xml'),
(u'Sul\u0119cin', u'http://www.gazetalubuska.pl/sulecin.xml'),
(u'\u015awi\u0119bodzin', u'http://www.gazetalubuska.pl/swiebodzin.xml'),
(u'Wolsztyn', u'http://www.gazetalubuska.pl/wolsztyn.xml'),
(u'Wschowa', u'http://www.gazetalubuska.pl/wschowa.xml'),
(u'Zielona G\xf3ra', u'http://www.gazetalubuska.pl/zielona-gora.xml'),
(u'\u017baga\u0144', u'http://www.gazetalubuska.pl/zagan.xml'),
(u'\u017bary', u'http://www.gazetalubuska.pl/zary.xml'),
(u'Sport', u'http://www.gazetalubuska.pl/sport.xml'),
(u'Auto', u'http://www.gazetalubuska.pl/auto.xml'),
(u'Dom', u'http://www.gazetalubuska.pl/dom.xml'),
(u'Praca', u'http://www.gazetalubuska.pl/praca.xml'),
(u'Zdrowie', u'http://www.gazetalubuska.pl/zdrowie.xml')]
keep_only_tags = [dict(id='article')]
def get_cover_url(self):
soup = self.index_to_soup(
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
soup = self.index_to_soup(nexturl)
self.cover_url = self.INDEX + \
soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
def decode_feedportal_url(self, url):
link = url.rpartition('l/0L0S')[2][:-12]
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
for t in replaces:
link = link.replace(*t)
return 'http://' + link
def print_version(self, url):
return self.decode_feedportal_url(url) + '&Template=printpicart'

View File

@ -1,95 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
import re
from calibre.ebooks.BeautifulSoup import Comment
from calibre.web.feeds.news import BasicNewsRecipe
class gw_bydgoszcz(BasicNewsRecipe):
title = u'Gazeta Wyborcza Bydgoszcz'
__author__ = 'fenuks'
language = 'pl'
description = 'Wiadomości z Bydgoszczy na portalu Gazeta.pl.'
category = 'newspaper'
publication_type = 'newspaper'
masthead_url = 'http://bi.gazeta.pl/im/3/4089/m4089863.gif'
INDEX = 'http://bydgoszcz.gazeta.pl'
cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
remove_empty_feeds = True
oldest_article = 3
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
# rules for gazeta.pl
preprocess_regexps = [
(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
keep_only_tags = [dict(id='gazeta_article')]
remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(
attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
remove_tags_after = dict(id='gazeta_article_body')
feeds = [
(u'Wiadomości', u'http://rss.feedsportal.com/c/32739/f/530239/index.rss')]
def print_version(self, url):
if 'feedsportal.com' in url:
s = url.rpartition('gazeta0Bpl')
u = s[2]
if not s[0]:
u = url.rpartition('wyborcza0Bpl')[2]
u = u.replace('/l/', '/')
u = u.replace('/ia1.htm', '')
u = u.replace('0Dbo0F1', '')
u = u.replace('/story01.htm', '')
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace('0E', '-')
u = u.replace('0H', ',')
u = u.replace('0I', '_')
u = u.replace('0B', '.')
u = self.INDEX + u
return u
else:
return url
def preprocess_html(self, soup):
tag = soup.find(id='Str')
if soup.find(attrs={'class': 'piano_btn_1'}):
return None
elif tag and tag.findAll('a'):
self.append_page(soup, soup.body)
return soup
def append_page(self, soup, appendtag):
tag = soup.find('div', attrs={'id': 'Str'})
try:
baseurl = soup.find(name='meta', attrs={
'property': 'og:url'})['content']
except:
return 1
link = tag.findAll('a')[-1]
while link:
soup2 = self.index_to_soup(baseurl + link['href'])
link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
if u'następne' not in link.string:
link = ''
pagetext = soup2.find(id='artykul')
comments = pagetext.findAll(
text=lambda text: isinstance(text, Comment))
for comment in comments:
comment.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag.extract()
def image_url_processor(self, baseurl, url):
if url.startswith(' '):
return url.strip()
else:
return url

View File

@ -1,90 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
from calibre.ebooks.BeautifulSoup import Comment
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaPlSzczecin(BasicNewsRecipe):
title = u'Gazeta Wyborcza Szczecin'
description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
__author__ = u'Michał Szkutnik'
__license__ = u'GPL v3'
language = 'pl'
publisher = 'Agora S.A.'
category = 'news, szczecin'
INDEX = 'http://szczecin.gazeta.pl'
cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
remove_empty_feeds = True
oldest_article = 3
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
# rules for gazeta.pl
preprocess_regexps = [
(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
keep_only_tags = [dict(id='gazeta_article')]
remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(
attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
remove_tags_after = dict(id='gazeta_article_body')
feeds = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
def print_version(self, url):
if 'feedsportal.com' in url:
s = url.rpartition('gazeta0Bpl')
u = s[2]
if not s[0]:
u = url.rpartition('wyborcza0Bpl')[2]
u = u.replace('/l/', '/')
u = u.replace('/ia1.htm', '')
u = u.replace('/story01.htm', '')
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace('0E', '-')
u = u.replace('0H', ',')
u = u.replace('0I', '_')
u = u.replace('0B', '.')
u = self.INDEX + u
return u
else:
return url
def preprocess_html(self, soup):
tag = soup.find(id='Str')
if soup.find(attrs={'class': 'piano_btn_1'}):
return None
elif tag and tag.findAll('a'):
self.append_page(soup, soup.body)
return soup
def append_page(self, soup, appendtag):
tag = soup.find('div', attrs={'id': 'Str'})
try:
baseurl = soup.find(name='meta', attrs={
'property': 'og:url'})['content']
except:
return 1
link = tag.findAll('a')[-1]
while link:
soup2 = self.index_to_soup(baseurl + link['href'])
link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
if u'następne' not in link.string:
link = ''
pagetext = soup2.find(id='artykul')
comments = pagetext.findAll(
text=lambda text: isinstance(text, Comment))
for comment in comments:
comment.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag.extract()
def image_url_processor(self, baseurl, url):
if url.startswith(' '):
return url.strip()
else:
return url

View File

@ -1,72 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaPomorska(BasicNewsRecipe):
title = u'Gazeta Pomorska'
__author__ = 'Richard z forum.eksiazki.org, fenuks'
description = u'Gazeta Pomorska - portal regionalny'
category = 'newspaper'
language = 'pl'
encoding = 'iso-8859-2'
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
INDEX = 'http://www.pomorska.pl'
masthead_url = INDEX + '/images/top_logo.png'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
feeds = [(u'Wszystkie', u'http://www.pomorska.pl/rss.xml'),
(u'Region', u'http://www.pomorska.pl/region.xml'),
(u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'),
(u'Nakło', u'http://www.pomorska.pl/naklo.xml'),
(u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'),
(u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'),
(u'Grudziądz', u'http://www.pomorska.pl/grudziadz.xml'),
(u'Inowrocław', u'http://www.pomorska.pl/inowroclaw.xml'),
(u'Toruń', u'http://www.pomorska.pl/torun.xml'),
(u'Włocławek', u'http://www.pomorska.pl/wloclawek.xml'),
(u'Aleksandrów Kujawski',
u'http://www.pomorska.pl/aleksandrow.xml'),
(u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'),
(u'Chełmno', u'http://www.pomorska.pl/chelmno.xml'),
(u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'),
(u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'),
(u'Golub-Dobrzyń', u'http://www.pomorska.pl/golubdobrzyn.xml'),
(u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'),
(u'Radziejów', u'http://www.pomorska.pl/radziejow.xml'),
(u'Rypin', u'http://www.pomorska.pl/rypin.xml'),
(u'Sępólno', u'http://www.pomorska.pl/sepolno.xml'),
(u'Świecie', u'http://www.pomorska.pl/swiecie.xml'),
(u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'),
(u'Żnin', u'http://www.pomorska.pl/znin.xml'),
(u'Sport', u'http://www.pomorska.pl/sport.xml'),
(u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'),
(u'Auto', u'http://www.pomorska.pl/moto.xml'),
(u'Dom', u'http://www.pomorska.pl/dom.xml'),
# (u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'),
(u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')]
keep_only_tags = [dict(id='article')]
def get_cover_url(self):
soup = self.index_to_soup(
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
soup = self.index_to_soup(nexturl)
self.cover_url = self.INDEX + \
soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
def decode_feedportal_url(self, url):
link = url.rpartition('l/0L0S')[2][:-12]
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
for t in replaces:
link = link.replace(*t)
return 'http://' + link
def print_version(self, url):
return self.decode_feedportal_url(url) + '&Template=printpicart'

View File

@ -1,46 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaWroclawska(BasicNewsRecipe):
title = u'Gazeta Wroc\u0142awska'
__author__ = 'fenuks'
description = u'Gazeta Regionalna Gazeta Wrocławska. Najnowsze Wiadomości Wrocław, Informacje Wrocław. Czytaj!'
category = 'newspaper'
language = 'pl'
encoding = 'iso-8859-2'
masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gazetawroclawska.png?24'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})
remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={
'class': 'czytajDalej'}), dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})]
feeds = [
(u'Fakty24', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533775/index.rss?201302'),
(u'Region', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_region.xml?201302'),
(u'Kultura', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533777/index.rss?201302'),
(u'Sport', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533776/index.rss?201302'),
(u'Z archiwum', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_zarchiwum.xml?201302'),
(u'M\xf3j reporter', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_mojreporter.xml?201302'),
(u'Historia', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_historia.xml?201302'),
(u'Listy do redakcji', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_listydoredakcji.xml?201302'),
(u'Na drogach', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_nadrogach.xml?201302')]
def print_version(self, url):
return url.replace('artykul', 'drukuj')
def skip_ad_pages(self, soup):
if 'Advertisement' in soup.title:
nexturl = soup.find('a')['href']
return self.index_to_soup(nexturl, raw=True)
def get_cover_url(self):
soup = self.index_to_soup(
'http://www.prasa24.pl/gazeta/gazeta-wroclawska/')
self.cover_url = soup.find(id='pojemnik').img['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -1,68 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaWspolczesna(BasicNewsRecipe):
title = u'Gazeta Wsp\xf3\u0142czesna'
__author__ = 'fenuks'
description = u'Gazeta Współczesna - portal regionalny.'
category = 'newspaper'
language = 'pl'
encoding = 'iso-8859-2'
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
INDEX = 'http://www.wspolczesna.pl'
masthead_url = INDEX + '/images/top_logo.png'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
feeds = [
(u'Wszystkie', u'http://www.wspolczesna.pl/rss.xml'),
(u'August\xf3w', u'http://www.wspolczesna.pl/augustow.xml'),
(u'Bia\u0142ystok', u'http://www.wspolczesna.pl/bialystok.xml'),
(u'Bielsk Podlaski', u'http://www.wspolczesna.pl/bielsk.xml'),
(u'E\u0142k', u'http://www.wspolczesna.pl/elk.xml'),
(u'Grajewo', u'http://www.wspolczesna.pl/grajewo.xml'),
(u'Go\u0142dap', u'http://www.wspolczesna.pl/goldap.xml'),
(u'Hajn\xf3wka', u'http://www.wspolczesna.pl/hajnowka.xml'),
(u'Kolno', u'http://www.wspolczesna.pl/kolno.xml'),
(u'\u0141om\u017ca', u'http://www.wspolczesna.pl/lomza.xml'),
(u'Mo\u0144ki', u'http://www.wspolczesna.pl/monki.xml'),
(u'Olecko', u'http://www.wspolczesna.pl/olecko.xml'),
(u'Ostro\u0142\u0119ka', u'http://www.wspolczesna.pl/ostroleka.xml'),
(u'Powiat Bia\u0142ostocki', u'http://www.wspolczesna.pl/powiat.xml'),
(u'Sejny', u'http://www.wspolczesna.pl/sejny.xml'),
(u'Siemiatycze', u'http://www.wspolczesna.pl/siemiatycze.xml'),
(u'Sok\xf3\u0142ka', u'http://www.wspolczesna.pl/sokolka.xml'),
(u'Suwa\u0142ki', u'http://www.wspolczesna.pl/suwalki.xml'),
(u'Wysokie Mazowieckie', u'http://www.wspolczesna.pl/wysokie.xml'),
(u'Zambr\xf3w', u'http://www.wspolczesna.pl/zambrow.xml'),
(u'Sport', u'http://www.wspolczesna.pl/sport.xml'),
(u'Praca', u'http://www.wspolczesna.pl/praca.xml'),
(u'Dom', u'http://www.wspolczesna.pl/dom.xml'),
(u'Auto', u'http://www.wspolczesna.pl/auto.xml'),
(u'Zdrowie', u'http://www.wspolczesna.pl/zdrowie.xml')]
keep_only_tags = [dict(id='article')]
def get_cover_url(self):
soup = self.index_to_soup(
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
soup = self.index_to_soup(nexturl)
self.cover_url = self.INDEX + \
soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
def decode_feedportal_url(self, url):
link = url.rpartition('l/0L0S')[2][:-12]
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
for t in replaces:
link = link.replace(*t)
return 'http://' + link
def print_version(self, url):
return self.decode_feedportal_url(url) + '&Template=printpicart'

View File

@ -1,126 +0,0 @@
# -*- coding: utf-8 -*-
import re
from calibre.ebooks.BeautifulSoup import Comment
from calibre.web.feeds.news import BasicNewsRecipe
class Gazeta_Wyborcza(BasicNewsRecipe):
title = u'Gazeta Wyborcza'
__author__ = 'fenuks, Artur Stachecki'
language = 'pl'
description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
category = 'newspaper'
publication_type = 'newspaper'
# encoding = 'iso-8859-2'
masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
INDEX = 'http://wyborcza.pl'
remove_empty_feeds = True
oldest_article = 3
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
# rules for gazeta.pl
preprocess_regexps = [
(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
keep_only_tags = [dict(id='gazeta_article')]
remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(
attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
remove_tags_after = dict(id='gazeta_article_body')
# rules for wyborcza.biz
preprocess_regexps.append((re.compile(
u'(<br>)?(<br>)? Czytaj (także|też):.*?</a>\\.?<br>', re.DOTALL), lambda m: ''))
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'),
(u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
(u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
(u'Gazeta \u015awi\u0105teczna',
u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
(u'Du\u017cy Format',
u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
(u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
(u'M\u0119ska Muzyka',
u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
(u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
(u'Solidarni z Tybetem',
u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
(u'W pon. - \u017bakowski',
u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
(u'We wt. - Kolenda-Zalewska',
u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
(u'\u015aroda w \u015brod\u0119',
u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
(u'W pi\u0105tek - Olejnik',
u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'),
(u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
]
def print_version(self, url):
if 'feedsportal.com' in url:
s = url.rpartition('wyborcza0Bpl')
u = s[2]
if not s[0]:
u = url.rpartition('gazeta0Bpl')[2]
u = u.replace('/l/', '/')
u = u.replace('/ia1.htm', '')
u = u.replace('/story01.htm', '')
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace('0E', '-')
u = u.replace('0H', ',')
u = u.replace('0I', '_')
u = u.replace('0B', '.')
u = self.INDEX + u
return u
else:
return url
def preprocess_html(self, soup):
tag = soup.find(id='Str')
if soup.find(attrs={'class': 'piano_btn_1'}):
return None
elif tag and tag.findAll('a'):
self.append_page(soup, soup.body)
return soup
def append_page(self, soup, appendtag):
tag = soup.find('div', attrs={'id': 'Str'})
try:
baseurl = soup.find(name='meta', attrs={
'property': 'og:url'})['content']
except:
return 1
link = tag.findAll('a')[-1]
while link:
soup2 = self.index_to_soup(baseurl + link['href'])
link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
if u'następne' not in link.string:
link = ''
pagetext = soup2.find(id='artykul')
comments = pagetext.findAll(
text=lambda text: isinstance(text, Comment))
for comment in comments:
comment.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag.extract()
def get_cover_url(self):
soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
cover = soup.find(attrs={'class': 'gallerycontent'})
self.cover_url = cover.ul.li.a.img['src'].replace('P.jpg', '.jpg')
return getattr(self, 'cover_url', self.cover_url)
def image_url_processor(self, baseurl, url):
if url.startswith(' '):
return url.strip()
else:
return url

View File

@ -1,64 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GCN(BasicNewsRecipe):
title = u'Gazeta Codziennej Nowiny'
__author__ = 'fenuks'
description = u'nowiny24.pl - portal regionalny województwa podkarpackiego.'
category = 'newspaper'
language = 'pl'
encoding = 'iso-8859-2'
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
INDEX = 'http://www.nowiny24.pl'
masthead_url = INDEX + '/images/top_logo.png'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
remove_attributes = ['style']
use_embedded_content = False
feeds = [(u'Wszystkie', u'http://www.nowiny24.pl/rss.xml'),
(u'Podkarpacie', u'http://www.nowiny24.pl/podkarpacie.xml'),
(u'Bieszczady', u'http://www.nowiny24.pl/bieszczady.xml'),
(u'Rzeszów', u'http://www.nowiny24.pl/rzeszow.xml'),
(u'Przemyśl', u'http://www.nowiny24.pl/przemysl.xml'),
(u'Leżajsk', u'http://www.nowiny24.pl/lezajsk.xml'),
(u'Łańcut', u'http://www.nowiny24.pl/lancut.xml'),
(u'Dębica', u'http://www.nowiny24.pl/debica.xml'),
(u'Jarosław', u'http://www.nowiny24.pl/jaroslaw.xml'),
(u'Krosno', u'http://www.nowiny24.pl/krosno.xml'),
(u'Mielec', u'http://www.nowiny24.pl/mielec.xml'),
(u'Nisko', u'http://www.nowiny24.pl/nisko.xml'),
(u'Sanok', u'http://www.nowiny24.pl/sanok.xml'),
(u'Stalowa Wola', u'http://www.nowiny24.pl/stalowawola.xml'),
(u'Tarnobrzeg', u'http://www.nowiny24.pl/tarnobrzeg.xml'),
(u'Sport', u'http://www.nowiny24.pl/sport.xml'),
(u'Dom', u'http://www.nowiny24.pl/dom.xml'),
(u'Auto', u'http://www.nowiny24.pl/auto.xml'),
(u'Praca', u'http://www.nowiny24.pl/praca.xml'),
(u'Zdrowie', u'http://www.nowiny24.pl/zdrowie.xml'),
(u'Wywiady', u'http://www.nowiny24.pl/wywiady.xml')]
keep_only_tags = [dict(id='article')]
def get_cover_url(self):
soup = self.index_to_soup(
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
soup = self.index_to_soup(nexturl)
self.cover_url = self.INDEX + \
soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
def decode_feedportal_url(self, url):
link = url.rpartition('l/0L0S')[2][:-12]
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
for t in replaces:
link = link.replace(*t)
return 'http://' + link
def print_version(self, url):
return self.decode_feedportal_url(url) + '&Template=printpicart'

View File

@ -1,80 +0,0 @@
import re
from calibre.utils.magick import Image, create_canvas
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1307556816(BasicNewsRecipe):
title = u'Geek and Poke'
__author__ = u'DrMerry'
description = u'Geek and Poke Cartoons'
publisher = u'Oliver Widder'
author = u'Oliver Widder, DrMerry (calibre-code), calibre'
oldest_article = 31
max_articles_per_feed = 100
language = u'en'
simultaneous_downloads = 1
timefmt = ' [%a, %d %B, %Y]'
summary_length = -1
no_stylesheets = True
category = 'News.IT, Cartoon, Humor, Geek'
use_embedded_content = False
cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
remove_javascript = True
remove_empty_feeds = True
publication_type = 'blog'
masthead_url = None
conversion_options = {
'comments': '', 'tags': category, 'language': language, 'publisher': publisher, 'author': author
}
remove_tags_before = dict(name='p', attrs={'class': 'content-nav'})
remove_tags_after = dict(name='div', attrs={'class': 'entry-content'})
remove_tags = [dict(name='div', attrs={'class': 'entry-footer'}),
dict(name='div', attrs={'id': 'alpha'}),
dict(name='div', attrs={'id': 'gamma'}),
dict(name='iframe'),
dict(name='p', attrs={'class': 'content-nav'})]
filter_regexps = [(r'feedburner\.com'),
(r'pixel.quantserve\.com'),
(r'googlesyndication\.com'),
(r'yimg\.com'),
(r'scorecardresearch\.com')]
preprocess_regexps = [
(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->|<h2[^>]*>[^<]*</h2>[^<]*)', re.DOTALL | re.IGNORECASE), lambda match: ''),
(re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL |
re.IGNORECASE), lambda match: ' '),
(re.compile(r'(<h3[^>]*>)<a[^>]>((?!</a)*)</a></h3>', re.DOTALL |
re.IGNORECASE), lambda match: match.group(1) + match.group(2) + '</h3>'),
(re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL | re.IGNORECASE),
lambda match: '<div id="merryImage"><cite>' + match.group(2) + '</cite><br>' + match.group(1) + '</div>'),
(re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL |
re.IGNORECASE), lambda match: '<br>'),
]
extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}'
def postprocess_html(self, soup, first):
for tag in soup.findAll('img', src=True):
iurl = tag['src']
img = Image()
img.open(iurl)
# print '***img is: ', iurl, '\n****width is: ', width, 'height is:
# ', height
img.trim(0)
# print '***TRIMMED img width is: ', width, 'height is: ', height
left = 0
top = 0
border_color = '#ffffff'
width, height = img.size
# print '***retrieved img width is: ', width, 'height is: ', height
height_correction = 1.17
canvas = create_canvas(
width, height * height_correction, border_color)
canvas.compose(img, left, top)
canvas.save(iurl)
return soup
feeds = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Villabyerne
'''
class GentofteLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Villabyerne'
description = 'Lokale og regionale nyheder, sport og kultur fra Gentofte, Hellerup og Charlottenlund på gentofte.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,31 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class GermanGovernmentPress(BasicNewsRecipe):
title = u'Pressemitteilungen der Bundesregierung'
oldest_article = 14
__author__ = 'malfi'
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
language = 'de'
keep_only_tags = []
keep_only_tags.append(dict(name='h2'))
keep_only_tags.append(dict(name='div', attrs={'class': 'textblack'}))
keep_only_tags.append(dict(name='div', attrs={'class': 'subtitle'}))
keep_only_tags.append(dict(name='div', attrs={'class': 'text'}))
remove_tags = []
feeds = [
(u'Pressemitteilungen', u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf')] # noqa
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
m = re.search(r'^(.*).html$', url)
return str(m.group(1)) + ',layoutVariant=Druckansicht.html'

View File

@ -1,14 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1390492898(BasicNewsRecipe):
title = u'Gezgin Dergi'
__author__ = 'asalet_r'
language = 'tr'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Gezgin Dergi', u'http://www.gezgindergi.com/feed/')]

View File

@ -1,72 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Gildia(BasicNewsRecipe):
title = u'Gildia.pl'
__author__ = 'fenuks'
description = u'Fantastyczny Portal Kulturalny - newsy, recenzje, galerie, wywiady. Literatura, film, gry komputerowe i planszowe, komiks, RPG, sklep. Nie lekceważ potęgi wyobraźni!' # noqa
cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg'
category = 'culture'
cover_url = 'http://portal.gildia.pl/images/logo-main.png'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
preprocess_regexps = [(re.compile(u'</?sup>'), lambda match: '')]
ignore_duplicate_articles = {'title', 'url'}
remove_tags = [dict(name='div', attrs={'class': [
'backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})]
keep_only_tags = [dict(name='div', attrs={'class': 'widetext'}), dict(name='article', attrs={'id': re.compile(r'post-\d+')})]
feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'),
(u'Literatura', u'http://www.literatura.gildia.pl/rss'),
(u'Film', u'http://www.film.gildia.pl/rss'),
(u'Horror', u'http://www.horror.gildia.pl/rss'),
(u'Konwenty', u'http://www.konwenty.gildia.pl/rss'),
(u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'),
(u'Manga i anime', u'http://www.manga.gildia.pl/rss'),
(u'Star Wars', u'http://www.starwars.gildia.pl/rss'),
(u'Techno', u'http://www.techno.gildia.pl/rss'),
(u'Historia', u'http://www.historia.gildia.pl/rss'),
(u'Magia', u'http://www.magia.gildia.pl/rss'),
(u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'),
(u'RPG', u'http://www.rpg.gildia.pl/rss'),
(u'LARP', u'http://www.larp.gildia.pl/rss'),
(u'Muzyka', u'http://www.muzyka.gildia.pl/rss'),
(u'Nauka', u'http://www.nauka.gildia.pl/rss'),
]
def skip_ad_pages(self, soup):
content = soup.find('div', attrs={'class': 'news'})
if content is None:
return
words = ('recenzj', 'zapowied', 'fragmen',
'relacj', 'wywiad', 'nominacj')
document_title = soup.title.renderContents().decode('utf-8').lower()
for word in words:
if word in document_title:
for link in content.findAll(name='a'):
if word in link['href'] or (link.string and word in link.string):
return self.index_to_soup(link['href'], raw=True)
for tag in content.findAll(name='a', href=re.compile('/publicystyka/')):
if 'Wi&#281;cej...' == tag.string:
return self.index_to_soup(tag['href'], raw=True)
def preprocess_html(self, soup):
title = soup.title.renderContents().decode('utf-8').lower()
for a in soup('a', href=True):
if not a['href'].startswith('http'):
if '/gry/' in a['href']:
a['href'] = 'http://www.gry.gildia.pl' + a['href']
elif u'książk' in title or u'komiks' in title:
a['href'] = 'http://www.literatura.gildia.pl' + a['href']
elif u'komiks' in title:
a['href'] = 'http://www.literatura.gildia.pl' + a['href']
else:
a['href'] = 'http://www.gildia.pl' + a['href']
return soup

View File

@ -1,36 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
gizmodo.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gizmodo(BasicNewsRecipe):
title = 'Gizmodo'
__author__ = 'Darko Miletic'
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
publisher = 'gizmodo.com'
category = 'news, IT, Internet, gadgets'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = True
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/vip?format=xml')]
remove_tags = [
{'class': 'feedflare'},
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,36 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Glamour (US)'
oldest_article = 21
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'en'
remove_javascript = True
__author__ = 'Anonymous'
auto_cleanup = True
feeds = [
(u'All Fashion',
u'http://feeds.glamour.com/glamour/all_fashion'),
(u'All Beauty',
u'http://feeds.glamour.com/glamour/all_beauty'),
(u'All Sex, Love & Life',
u'http://feeds.glamour.com/glamour/sex_love_life'),
(u'All Health & Fitness',
u'http://feeds.glamour.com/glamour/health_fitness'),
(u'Slaves to Fashion blog',
u'http://feeds.glamour.com/glamour/slavestofashion'),
(u'The Girls in the Beauty Department',
u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
(u'Smitten blog',
u'http://feeds.glamour.com/glamour/smitten'),
(u'Save the Date',
u'http://feeds.feedburner.com/glamour/save-the-date'),
(u'Save the Date',
u'http://feeds.feedburner.com/glamour/save-the-date'),
(u'Vitamin G blog',
u'http://feeds.glamour.com/glamour/vitamin-g'),
]

View File

@ -1,100 +0,0 @@
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag
from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class GlennBeckRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en'
version = 1
title = u'Glenn Beck'
publisher = u'Premiere Radio Networks'
category = u'News, Opinion'
description = u'The fusion of entertainment and enlightenment'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
feeds = [(u'Glenn Beck', u'http://feeds.feedburner.com/GlennBeckArticles')]
def preprocess_html(self, soup):
# Their html is horribly broken; if we search for the div that has the content BeatifulSoup returns the div with only the headline and no content.
# This is due to illegal nesting of tags. So we do it the hard way.
# We can find this one, and we don't want it.
div = soup.find('div', attrs={'id': 'extraInfo'})
if div:
div.extract()
# Don't want these either.
iframes = soup.findAll('iframe')
[iframe.extract() for iframe in iframes]
# Get empty document.
freshSoup = self.getFreshSoup()
# This is the broken div; but we can find the headline.
newsDiv = soup.find('div', attrs={'class': 'news-detail'})
if newsDiv:
if newsDiv.h1:
freshSoup.body.append(newsDiv.h1)
# The content is wrapped in <p></p> tags, most of the time anyway.
counter = 0
for p in soup.findAll('p'):
if p.get('class') == 'smalltextwhite':
# But we don't want this one.
continue
freshSoup.body.append(p)
counter += 1
# Debugging block
# In some articles the content is not wrapped in <p></p> tags. In that case the counter is low.
# 2 is the magic number that seems to work.
if counter <= 2:
# So they are playing hard-to-get: first throw out all comments.
comments = soup.findAll(
text=lambda text: isinstance(text, Comment))
[comment.extract() for comment in comments]
# Find all unwrapped strings.
for txt in soup.findAll(text=True):
raw = txt.strip()
# Debugging line
if (txt.parent.name == 'body' and len(raw) > 0) and not (len(raw) == 6 and raw == '&nbsp;'):
# This is our content; ignore the rest.
para = new_tag(freshSoup, 'p')
para.append(raw)
freshSoup.body.append(para)
counter += 1
# Now if the counter is still 0 or 1 they did something completely
# different and we still have an empty article. In a last attempt,
# add the whole content div, just in case.
if counter < 2:
freshSoup.body.append(newsDiv)
# Debugging block
return freshSoup
def getFreshSoup(self, title=None):
if title:
return BeautifulSoup('<html><head><title>' + str(title) + '</title></head><body></body></html>')
else:
return BeautifulSoup('<html><head><title></title></head><body></body></html>')

View File

@ -1,45 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GlosWielkopolski(BasicNewsRecipe):
title = u'G\u0142os Wielkopolski'
__author__ = 'fenuks'
description = u'Gazeta Regionalna Głos Wielkopolski. Najnowsze Wiadomości Poznań. Czytaj Informacje Poznań!'
category = 'newspaper'
language = 'pl'
encoding = 'iso-8859-2'
masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gloswielkopolski.png?24'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})
remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={'class': 'czytajDalej'}), dict(attrs={
'src': 'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href', 'http://www.gloswielkopolski.pl/newsletter/'})]
feeds = [
(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'),
(u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'),
(u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'),
(u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'),
(u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'),
(u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'),
(u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'),
(u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'),
(u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')]
def print_version(self, url):
return url.replace('artykul', 'drukuj')
def skip_ad_pages(self, soup):
if 'Advertisement' in soup.title:
nexturl = soup.find('a')['href']
return self.index_to_soup(nexturl, raw=True)
def get_cover_url(self):
soup = self.index_to_soup(
'http://www.prasa24.pl/gazeta/glos-wielkopolski/')
self.cover_url = soup.find(id='pojemnik').img['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -1,46 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
go4it.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Go4ITro(BasicNewsRecipe):
title = u'go4it'
__author__ = u'Silviu Cotoar\u0103'
description = 'Gadgeturi, Lifestyle, Tehnologie'
publisher = 'go4it'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Reviste,Ziare,IT'
encoding = 'utf-8'
cover_url = 'http://www.go4it.ro/images/logo.png'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'class': 'subTitle clearfix'}), dict(
name='div', attrs={'class': 'story'})
]
remove_tags = [
dict(name='span', attrs={'class': ['data']}), dict(
name='a', attrs={'class': ['comments']})
]
feeds = [
(u'Feeds', u'http://feeds2.feedburner.com/Go4itro-Stiri')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,13 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1325677767(BasicNewsRecipe):
title = u'Goal'
oldest_article = 1
language = 'it'
max_articles_per_feed = 100
auto_cleanup = True
remove_tags_after = [dict(id='article_content')]
feeds = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
__author__ = 'faber1971'
description = 'Sports news from Italy'

View File

@ -1,33 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
gofin.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class gofin(BasicNewsRecipe):
title = u'Gofin'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description = u'Portal Podatkowo-Księgowy'
INDEX = 'http://gofin.pl'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
auto_cleanup = True
feeds = [
(u'Podatki', u'http://www.rss.gofin.pl/podatki.xml'),
(u'Prawo Pracy', u'http://www.rss.gofin.pl/prawopracy.xml'),
(u'Rachunkowo\u015b\u0107', u'http://www.rss.gofin.pl/rachunkowosc.xml'),
(u'Sk\u0142adki, zasi\u0142ki, emerytury', u'http://www.rss.gofin.pl/zasilki.xml'),
(u'Firma', u'http://www.rss.gofin.pl/firma.xml'),
(u'Prawnik radzi', u'http://www.rss.gofin.pl/prawnikradzi.xml')]

View File

@ -1,34 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Good to Know (uk)'
oldest_article = 14
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
__author__ = 'Anonymous'
language = 'en_GB'
remove_tags = [
dict(name='div', attrs={'class': ['articles_footer', 'printoptions']})]
def print_version(self, url):
return url + '/print/1'
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'),
(u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'),
(u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'),
(u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'),
(u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'),
(u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'),
(u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'),
(u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'),
]

View File

@ -1,13 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318572445(BasicNewsRecipe):
title = u'Google Mobile Blog'
language = 'en'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Grenaa
'''
class GrenaaLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Grenaa'
description = 'Lokale og regionale nyheder, sport, kultur fra Grenå og omegn på grenaa.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Ugeposten Gribskov
'''
class GribskovLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Ugeposten Gribskov'
description = 'Lokale og regionale nyheder, sport og kultur fra Gribskov og omegn på gribskov.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,81 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TheGrid(BasicNewsRecipe):
#: The title to use for the ebook
title = u'The Grid'
#: A couple of lines that describe the content this recipe downloads.
#: This will be used primarily in a GUI that presents a list of recipes.
description = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
'accessible voice for Toronto.')
#: The author of this recipe
__author__ = u'Yusuf W'
#: The language that the news is in. Must be an ISO-639 code either
#: two or three characters long
language = 'en_CA'
#: Publication type
#: Set to newspaper, magazine or blog
publication_type = 'newspaper'
#: Convenient flag to disable loading of stylesheets for websites
#: that have overly complex stylesheets unsuitable for conversion
#: to ebooks formats
#: If True stylesheets are not downloaded and processed
no_stylesheets = True
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
remove_tags_before = dict(name='div', id='content')
remove_tags_after = dict(name='div', id='content')
remove_tags = [
dict(name='div', attrs={'class': 'right-content pull-right'}),
dict(name='div', attrs={'class': 'right-content'}),
dict(name='div', attrs={'class': 'ftr-line'}),
dict(name='div', attrs={'class': 'pull-right'}),
dict(name='div', id='comments'),
dict(name='div', id='tags')
]
#: Keep only the specified tags and their children.
# keep_only_tags = [dict(name='div', id='content')]
cover_margins = (0, 0, '#ffffff')
INDEX = 'http://www.thegridto.com'
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX)
cover_url = soup.find(
attrs={'class': 'article-block latest-issue'}).find('img')['src']
return cover_url
def parse_index(self):
# Get the latest issue
soup = self.index_to_soup(self.INDEX)
a = soup.find(
'div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
# Parse the index of the latest issue
self.INDEX = self.INDEX + a['href']
soup = self.index_to_soup(self.INDEX)
feeds = []
for section in ['city', 'life', 'culture']:
section_class = 'left-content article-listing ' + section + ' pull-left'
div = soup.find(attrs={'class': section_class})
articles = []
for a in div.findAll(attrs={'class': 'post-title'}):
title = self.tag_to_string(a)
url = a['href']
articles.append({'title': title, 'url': url,
'description': '', 'date': ''})
feeds.append((section, articles))
return feeds

View File

@ -1,32 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
grrm.livejournal.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NotABlog(BasicNewsRecipe):
title = 'Not A Blog - George R.R. Martin'
__author__ = 'Darko Miletic'
description = 'George R.R. Martin'
oldest_article = 15
max_articles_per_feed = 100
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
conversion_options = {
'comment': description, 'tags': 'sf, fantasy, game of thrones', 'publisher': 'George R.R. Martin', 'language': language
}
feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -1,46 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
import string
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1322322819(BasicNewsRecipe):
title = u'GS24.pl (Głos Szczeciński)'
description = u'Internetowy serwis Głosu Szczecińskiego'
__author__ = u'Michał Szkutnik'
__license__ = u'GPL v3'
language = 'pl'
publisher = 'Media Regionalne sp. z o.o.'
category = 'news, szczecin'
oldest_article = 2
max_articles_per_feed = 100
auto_cleanup = True
cover_url = "http://www.gs24.pl/images/top_logo.png"
feeds = [
# (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
(u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
(u'Stargard', u'http://www.gs24.pl/stargard.xml'),
(u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
(u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
(u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
(u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
(u'Police', u'http://www.gs24.pl/police.xml'),
(u'Region', u'http://www.gs24.pl/region.xml'),
(u'Sport', u'http://www.gs24.pl/sport.xml'),
]
def get_article_url(self, article):
s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
s = s.group(1)
replacements = {"0B": ".", "0C": "/",
"0H": ",", "0I": "_", "0D": "?", "0F": "="}
for (a, b) in replacements.items():
s = string.replace(s, a, b)
s = string.replace(s, "0A", "0")
return "http://" + s
def print_version(self, url):
return url + "&Template=printpicart"

View File

@ -1,61 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
gulfnews.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class GulfNews(BasicNewsRecipe):
title = 'Gulf News'
__author__ = 'Darko Miletic'
description = 'News from United Arab Emirrates, persian gulf and rest of the world'
publisher = 'Al Nisr Publishing LLC'
category = 'news, politics, UAE, world'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newsportal'
masthead_url = 'http://gulfnews.com/media/img/gulf_news_logo.jpg'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
h1{font-family: Georgia, 'Times New Roman', Times, serif}
ol,ul{list-style: none}
.synopsis{font-size: small}
.details{font-size: x-small}
.image{font-size: xx-small}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
dict(name=['meta', 'link', 'object', 'embed']), dict(
attrs={'class': ['quickLinks', 'ratings']}), dict(attrs={'id': 'imageSelector'})
]
remove_attributes = ['lang']
keep_only_tags = [
dict(name='h1'), dict(
attrs={'class': ['synopsis', 'details', 'image', 'article']})
]
feeds = [
(u'UAE News', u'http://gulfnews.com/cmlink/1.446094'),
(u'Business', u'http://gulfnews.com/cmlink/1.446098'),
(u'Entertainment', u'http://gulfnews.com/cmlink/1.446095'),
(u'Sport', u'http://gulfnews.com/cmlink/1.446096'),
(u'Life', u'http://gulfnews.com/cmlink/1.446097')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,26 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1259599587(BasicNewsRecipe):
title = u'Gulli'
description = 'News from Germany'
language = 'de'
__author__ = 'posativ'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
feeds = [(u'gulli:news', u'http://ticker.gulli.com/rss/')]
remove_tags = [dict(name='div', attrs={'class': ['FloatL', '_forumBox']})]
keep_only_tags = [dict(name='div', attrs={'id': ['_contentLeft']})]
remove_tags_after = [dict(name='div', attrs={'class': ['_bookmark']})]
extra_css = '''
.byline {color:#666;margin-bottom:0;font-size:12px}
.blockquote {color:#030303;font-style:italic;padding-left:15px;}
img {align:center;}
.li {list-style-type: none}
'''

View File

@ -1,38 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class H3(BasicNewsRecipe):
title = u'H\xedrszerz\u0151'
oldest_article = 5
max_articles_per_feed = 50
language = 'hu'
__author__ = 'Ezmegaz'
feeds = [
(u'Belf\xf6ld',
u'http://www.hirszerzo.hu/rss.belfold.xml'),
(u'K\xfclf\xf6ld',
u'http://www.hirszerzo.hu/rss.kulfold.xml'),
(u'Profit',
u'http://www.hirszerzo.hu/rss.profit.xml'),
(u'Shake',
u'http://www.hirszerzo.hu/rss.shake.xml'),
(u'Publicisztika',
u'http://www.hirszerzo.hu/rss.publicisztika.xml'),
(u'Elemz\xe9s',
u'http://www.hirszerzo.hu/rss.elemzes.xml'),
(u'Sorok k\xf6z\xf6tt',
u'http://www.hirszerzo.hu/rss.sorok_kozott.xml'),
(u'Gal\xe9ria',
u'http://www.hirszerzo.hu/rss.galeria.xml'),
(u'Patro',
u'http://www.hirszerzo.hu/rss.patro.xml')]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Haderslev
'''
class HaderslevLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Haderslev'
description = 'Lokale og regionale nyheder, sport, kultur fra Haderslev og omegn på haderslev.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,36 +0,0 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh(BasicNewsRecipe):
language = 'ko'
title = u'한겨례'
description = u'The Hankyoreh News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 7
max_articles_per_feed = 10
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='div', attrs={'class': ['article-head']}),
dict(name='div', attrs={'class': ['article-text']}),
]
remove_tags = [
dict(name='p', attrs={'class': ['category']}),
]
remove_tags_after = dict(id={'ad_box01'})
feeds = [
(u'정치', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_politics.xml'),
(u'사회', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_society.xml'),
(u'문화', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_culture.xml'),
(u'스포츠', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_sports.xml'),
(u'사설·칼럼', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_opinion.xml'),
(u'만화만평', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_cartoon.xml'),
]

View File

@ -1,23 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336289226(BasicNewsRecipe):
title = u'Heavy Metal'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = False
masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
keep_only_tags = [
dict(name='div', attrs={'class': 'entry'})
]
remove_tags_after = [
dict(name='div', attrs={'class': 'sociable'})
]
description = 'An Heavy metal Italian magazine'
__author__ = 'faber1971'
language = 'it'
__version__ = 'v1.0'
__date__ = '6, May 2012'

View File

@ -1,37 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Anton Gillert <atx at binaryninja.de>'
'''
Fetch Heise Open.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class HeiseOpenDe(BasicNewsRecipe):
title = 'Heise Open'
description = 'Opensource news from Germany'
__author__ = 'Anton Gillert'
use_embedded_content = False
language = 'de'
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 40
no_stylesheets = True
feeds = [('Heise Open', 'http://www.heise.de/open/news/news-atom.xml')]
def print_version(self, url):
return url + '?view=print'
remove_tags = [dict(id='navi_top'),
dict(id='navi_bottom'),
dict(name='div', attrs={'class': 'navi_top_logo'}),
dict(name='img', attrs={
'src': '/open/icons/open_logo_2009_weiss.gif'}),
dict(name='h5', attrs={'style': 'margin: 0.5em 0;'}),
dict(name='p', attrs={'class': 'news_datum'}),
dict(name='p', attrs={'class': 'size80'})]
remove_tags_after = [dict(name='p', attrs={'class': 'size80'})]
def get_cover_url(self):
return 'http://www.heise.de/open/icons/open_logo_2009_weiss.gif'

View File

@ -1,34 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
class AdvancedUserRecipe1298137661(BasicNewsRecipe):
title = u'Helsingin Sanomat'
__author__ = 'oneillpt'
language = 'fi'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
classes('article-title single-article'),
]
remove_tags = [
dict(attrs={'class':['hidden print-url', 'article-paywall']}),
dict(style=lambda x: x and 'height: 0' in x),
]
feeds = [
(u'Uutiset - HS.fi', u'https://www.hs.fi/uutiset/rss/'),
]
def preprocess_html(self, soup):
for tag in soup.findAll(attrs={'data-mfp-src':True}):
tag.name = 'img'
tag['src'] = tag['data-mfp-src']
tag['style'] = 'display:block'
return soup

View File

@ -1,22 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
class HinduHumanRights(BasicNewsRecipe):
title = 'Hindu Human Rights'
__author__ = 'Vishvas Vasuki'
language = 'en_IN'
oldest_article = 30
max_articles_per_feed = 100
auto_cleanup = True
feeds = [
('HHR main', 'https://www.hindusforhumanrights.org/en/home?format=rss'),
]

View File

@ -1,79 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class HNonlineRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'lacike'
language = 'sk'
version = 1
title = u'HNonline'
publisher = u'HNonline'
category = u'News, Newspaper'
description = u'News from Slovakia'
cover_url = u'http://hnonline.sk/img/sk/_relaunch/logo2.png'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
# Feeds from: http://rss.hnonline.sk, for listing see
# http://rss.hnonline.sk/prehlad
feeds = []
feeds.append((u'HNonline|Ekonomika a firmy',
u'http://rss.hnonline.sk/?p=kC1000'))
feeds.append((u'HNonline|Slovensko', u'http://rss.hnonline.sk/?p=kC2000'))
feeds.append((u'HNonline|Svet', u'http://rss.hnonline.sk/?p=kC3000'))
feeds.append((u'HNonline|\u0160port', u'http://rss.hnonline.sk/?p=kC4000'))
feeds.append((u'HNonline|Online rozhovor',
u'http://rss.hnonline.sk/?p=kCR000'))
feeds.append((u'FinWeb|Spr\u00E1vy zo sveta financi\u00ED',
u'http://rss.finweb.hnonline.sk/spravodajstvo'))
feeds.append((u'FinWeb|Koment\u00E1re a anal\u00FDzy',
u'http://rss.finweb.hnonline.sk/?p=kPC200'))
feeds.append((u'FinWeb|Invest\u00EDcie',
u'http://rss.finweb.hnonline.sk/?p=kPC300'))
feeds.append((u'FinWeb|Svet akci\u00ED',
u'http://rss.finweb.hnonline.sk/?p=kPC400'))
feeds.append(
(u'FinWeb|Rozhovory', u'http://rss.finweb.hnonline.sk/?p=kPC500'))
feeds.append((u'FinWeb|T\u00E9ma t\u00FD\u017Ed\u0148a',
u'http://rss.finweb.hnonline.sk/?p=kPC600'))
feeds.append((u'FinWeb|Rebr\u00ED\u010Dky',
u'http://rss.finweb.hnonline.sk/?p=kPC700'))
feeds.append((u'HNstyle|Kult\u00FAra',
u'http://style.hnonline.sk/?p=kTC100'))
feeds.append((u'HNstyle|Auto-moto', u'http://style.hnonline.sk/?p=kTC200'))
feeds.append((u'HNstyle|Digit\u00E1l',
u'http://style.hnonline.sk/?p=kTC300'))
feeds.append((u'HNstyle|Veda', u'http://style.hnonline.sk/?p=kTCV00'))
feeds.append((u'HNstyle|Dizajn', u'http://style.hnonline.sk/?p=kTC400'))
feeds.append(
(u'HNstyle|Cestovanie', u'http://style.hnonline.sk/?p=kTCc00'))
feeds.append(
(u'HNstyle|V\u00EDkend', u'http://style.hnonline.sk/?p=kTC800'))
feeds.append((u'HNstyle|Gastro', u'http://style.hnonline.sk/?p=kTC600'))
feeds.append((u'HNstyle|M\u00F3da', u'http://style.hnonline.sk/?p=kTC700'))
feeds.append((u'HNstyle|Modern\u00E1 \u017Eena',
u'http://style.hnonline.sk/?p=kTCA00'))
feeds.append((u'HNstyle|Pre\u010Do nie?!',
u'http://style.hnonline.sk/?p=k7C000'))
keep_only_tags = []
keep_only_tags.append(dict(name='h1', attrs={'class': 'detail-titulek'}))
keep_only_tags.append(
dict(name='div', attrs={'class': 'detail-podtitulek'}))
keep_only_tags.append(dict(name='div', attrs={'class': 'detail-perex'}))
keep_only_tags.append(dict(name='div', attrs={'class': 'detail-text'}))
extra_css = '''
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)}
body {font-family: sans1, serif1;}
'''

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Ugebladet
'''
class HoersholmLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Ugebladet'
description = 'Lokale, regionale nyheder, sport og kultur i Hørsholm, Rungsted, Fredensborg og Humlebæk på hoersholm.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,93 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8
__license__ = 'GPL v3'
__copyright__ = '30 June 2012, desUBIKado'
__author__ = 'desUBIKado'
__description__ = 'Diario de actualidad, moda y belleza'
__version__ = 'v0.03'
__date__ = '28, Jul 2016'
'''
http://www.hola.com/
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class hola_es(BasicNewsRecipe):
author = 'desUBIKado'
description = 'Diario de actualidad, moda y belleza'
title = u'¡Hola!'
publisher = 'Hola S.L.'
category = 'Spanish celebrities, Entertainment News, Royalty, Daily Variety, Hollywood'
language = 'es'
masthead_url = 'http://imagenes.hola.com/comunes/2008/logo-holacom.gif'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 7
delay = 1
encoding = 'utf-8'
max_articles_per_feed = 100
use_embedded_content = False
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
feeds = [
(u'Famosos', u'http://www.hola.com/famosos/rss.xml'),
(u'Realeza', u'http://www.hola.com/realeza/rss.xml'),
(u'Cine', u'http://www.hola.com/cine/rss.xml'),
(u'M\xfasica', u'http://www.hola.com/musica/rss.xml'),
(u'Moda y modelos', u'http://www.hola.com/moda/portada/rss.xml'),
(u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml'),
(u'Ni\xf1os', u'http://www.hola.com/ninos/rss.xml')
]
keep_only_tags = [
dict(name='article', attrs={'class': ['body col-md-8 col-xs-12']})]
remove_tags = [dict(name='div', attrs={'class': ['comments', 'news-share', 'sponsored-news']}),
dict(name='div', attrs={'itemprop': ['logo']}),
dict(name='span', attrs={'class': ['hidden']}),
dict(name='p', attrs={'class': ['hidden']}),
dict(name='section', attrs={'class': ['news-tags']})
]
remove_tags_after = dict(name='div', attrs={'class': 'comments'})
# <span>VER GALERÍA<i data-icon="1" class="icon"></i></span>
preprocess_regexps = [
# Quitar VER GALERÍA
(re.compile(r'<span>VER GALER', re.DOTALL | re.IGNORECASE), lambda m: '<!--'),
(re.compile(r'class="icon"></i></span>',
re.DOTALL | re.IGNORECASE), lambda m: '-->'),
# Quitar enlaces varios
(re.compile(r'<p><a href="http://www.hola.com',
re.DOTALL | re.IGNORECASE), lambda m: '<!--'),
(re.compile(r'<p style="text-align: center;">',
re.DOTALL | re.IGNORECASE), lambda m: '<!--'),
(re.compile(r'<p style="line-height: 20.8px;"><a href="http://www.hola.com',
re.DOTALL | re.IGNORECASE), lambda m: '<!--'),
(re.compile(r'</strong></a></p>',
re.DOTALL | re.IGNORECASE), lambda m: '-->')
]
# Recuperamos la portada de papel (la imagen 520 tiene mayor resolucion)
# http://www.hola.com/imagenes/revista/3727/portada-revista-hola-520.jpg
def get_cover_url(self):
index = 'http://www.hola.com/abono/ediciondigital/'
soup = self.index_to_soup(index)
for image in soup.findAll('img', src=True):
if image['src'].endswith('portada-revista-hola-520.jpg'):
return 'http://www.hola.com' + image['src']
return None
def get_article_url(self, article):
url = article.get('guid', None)
return url
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
'''

View File

@ -1,55 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2014, Darko Miletic <darko.miletic at gmail.com>'
'''
www.hollywoodreporter.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class THR_En(BasicNewsRecipe):
title = 'The Hollywood Reporter'
__author__ = 'Darko Miletic'
description = 'Read about the latest in Hollywood and entertainment news from The Hollywood Reporter, your source for detailed movie reviews, celebrity styles, and industry blogs.' # noqa
publisher = 'The Hollywood Reporter'
category = 'Entertainment news, Hollywood news, celebrity news, latest Hollywood news'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newsportal'
auto_cleanup = True
auto_cleanup_keep = "//div[contains(concat(' ', normalize-space(@class), ' '), ' image ')]"
extra_css = """
body{font-family: Georgia,Times,serif}
h1,h2,h3{font-family: "Vonness-Bold-Compressed",Helvetica,sans-serif}
.credit,.caption{font-family: Arial,sans-serif;}
.credit,.caption,.submitted{font-size: small; color: gray;}
.main_media_credit{clear: left; font-size: x-small; text-align: right; color: gray;}
img{margin-top: 0.5em; margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
feeds = [
(u'Movies', u'http://feeds.feedburner.com/thr/film'),
(u'TV', u'http://feeds.feedburner.com/thr/television'),
(u'Style&Culture', u'http://feeds.feedburner.com/thr/style'),
(u'International', u'http://feeds.feedburner.com/thr/international'),
(u'Music', u'http://feeds.feedburner.com/thr/music'),
(u'Tech', u'http://feeds.feedburner.com/TheHollywoodReporter-Technology'),
(u'Awards', u'http://feeds.feedburner.com/thr/awards'),
(u'Business', u'http://feeds.feedburner.com/thr/business'),
(u'Asia', u'http://feeds.feedburner.com/HollywoodReporterAsia'),
(u'Guilds and Labor', u'http://feeds.feedburner.com/thr/labor'),
(u'Box Office', u'http://feeds.feedburner.com/thr/boxoffice'),
(u'Real Estate', u'http://feeds.feedburner.com/thr/RealEstate'),
(u'Politics', u'http://feeds.feedburner.com/thr/politics')
]

View File

@ -1,27 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Hornsherred Avis
'''
class Hornsherredavis_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Hornsherred Avis'
description = 'Lokale nyheder fra Jægerspis, Skibby og Bramsnæs'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 30
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
# Feed are found here: http://hornsherredavis.dk/
feeds = [
('Hornsherred Avis', 'http://hornsherredavis.dk/?feed=rss2'),
('Kommentarer til Hornsherred Avis', 'http://hornsherredavis.dk/?feed=comments-rss2'),
]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Hornsherred
'''
class HornsherredLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Hornsherred'
description = 'Lokale og regionale nyheder, sport og kultur fra Hornsherred og omegn på hornsherred.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,41 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
hotcity.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class HotcityRo(BasicNewsRecipe):
title = u'Hotcity'
__author__ = u'Silviu Cotoar\u0103'
description = u'Cultura urban\u0103 feminin\u0103'
publisher = 'Hotcity'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste'
encoding = 'utf-8'
cover_url = 'http://www.hotcity.ro/i/bg_header.gif'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'class': 'articol_title'}), dict(
name='div', attrs={'class': 'text'})
]
feeds = [
(u'Feeds', u'http://www.hotcity.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,40 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
hotnews.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hotnews(BasicNewsRecipe):
title = 'Hotnews'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Hotnews'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://www.hotnews.ro/images/new/logo.gif'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class': 'title'}), dict(
name='div', attrs={'id': 'articleContent'})
]
feeds = [(u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate'), (u'English', u'http://www.hotnews.ro/rss/english')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,31 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2012-2015, Eddie Lau'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipeHouseNews(BasicNewsRecipe):
title = u'The House News Bloggers 主場博客'
__author__ = 'Eddie Lau'
publisher = 'The House News Bloggers'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = False
no_stylesheets = True
language = 'zh'
encoding = 'utf-8'
description = 'http://thehousenewsbloggers.net'
category = 'Chinese, Blogs, Opinion, News, Hong Kong'
masthead_url = 'http://thehousenewsbloggers.files.wordpress.com/2014/09/screen-shot-2014-09-11-at-8-55-13.png'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}' # noqa
feeds = [(u'Latest', u'http://thehousenewsbloggers.net/feed/')]
keep_only_tags = [dict(name='h1', attrs={'class': ['title']}),
dict(name='span', attrs={'class': ['author vcard']}),
dict(name='time', attrs={'class': ['entry-date']}),
dict(name='section', attrs={'class': ['entry']})]
remove_tags = [dict(name='div', attrs={'id': ['jp-post-flair']})]
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article, picdiv['src'])

View File

@ -1,81 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.hrt.hr
'''
import re
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class HRT(BasicNewsRecipe):
title = 'HRT: Vesti'
__author__ = 'Darko Miletic'
description = 'News from Croatia'
publisher = 'HRT'
category = 'news, politics, Croatia, HRT'
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'hr'
lang = 'hr-HR'
extra_css = '''@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
body{font-family: serif1, serif} .article_description{font-family: serif1, serif}
.news-single-timedata{color:#20558A; font-size:x-small;}
.nsTitle{color:#20558A; font-size:large; font-weight:bold;}
a{color:#20558A;}
.external-link-new-window{color:#20558A;}
'''
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class': 'bigVijest'})]
remove_tags = [dict(name=['object', 'link', 'embed'])]
remove_tags_after = dict(name='div', attrs={'class': 'nsAuthor'})
feeds = [
(u'Vijesti', u'http://www.hrt.hr/?id=316&type=100&rss=vijesti'),
(u'Sport', u'http://www.hrt.hr/?id=316&type=100&rss=sport'),
(u'Zabava', u'http://www.hrt.hr/?id=316&type=100&rss=zabava'),
(u'Filmovi i serije', u'http://www.hrt.hr/?id=316&type=100&rss=filmovi'),
(u'Dokumentarni program', u'http://www.hrt.hr/?id=316&type=100&rss=dokumentarci'),
(u'Glazba', u'http://www.hrt.hr/?id=316&type=100&rss=glazba'),
(u'Kultura', u'http://www.hrt.hr/?id=316&type=100&rss=kultura'),
(u'Mladi', u'http://www.hrt.hr/?id=316&type=100&rss=mladi'),
(u'Manjine', u'http://www.hrt.hr/?id=316&type=100&rss=manjine'),
(u'Radio', u'http://www.hrt.hr/?id=316&type=100&rss=radio')
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -1,127 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import print_function
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class HuffingtonPostRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal and Archana Raman'
language = 'en'
version = 2
title = u'The Huffington Post'
publisher = u'huffingtonpost.com'
category = u'News, Politics'
description = u'Political Blog'
oldest_article = 1.1
max_articles_per_feed = 100
encoding = 'utf-8'
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
classes('entry__header entry__body')
]
remove_tags = [
classes('app-download-interstitial share-bar top-media--video advertisement extra-content'
' below-entry entry-inline-subscription-module related-articles')
]
# Feeds from: http://www.huffingtonpost.com/syndication/
feeds = []
feeds.append(
(u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
feeds.append(
(u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
feeds.append(
(u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
feeds.append(
(u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
feeds.append(
(u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
feeds.append(
(u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
feeds.append(
(u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
feeds.append(
(u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
feeds.append(
(u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
feeds.append(
(u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
feeds.append((u'Original Reporting',
u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
extra_css = '''
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
#title_permalink{color:black;font-size:large;}
.date{color:#858585;font-family:"Times New Roman",sans-serif;}
.comments_datetime v05{color:#696969;}
.teaser_permalink{font-style:italic;font-size:xx-small;}
.blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
'''
# a[href]{color: blue; text-decoration: none; cursor: pointer;}
def get_article_url(self, article):
"""
Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
article.links contains a list of dictionaries.
Todo: refactor to searching this list to avoid the hardcoded zero-index
"""
link = article.get('link')
print("Link:" + link)
if not link:
links = article.get('links')
if links:
link = links[0]['href']
if not links[0]['href']:
link = links[1]['href']
return link
def postprocess_html(self, soup, first_fetch):
for tag in soup.findAll('div', text="What's Your Reaction?"):
tag.extract()
for tg in soup.findAll('blockquote'):
tg.extract()
return soup

View File

@ -1,20 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class Hvidovre_Avis_dk(BasicNewsRecipe):
title = 'Hvidovre avis'
language = 'da'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
feeds = [
('Lokale nyheder', 'http://hvidovre.lokalavisen.dk/section/senestelokalenyhederrss'),
('Sport', 'http://hvidovre.lokalavisen.dk/section/senestesportrss'),
('112', 'http://hvidovre.lokalavisen.dk/section/seneste112rss'),
('Kultur', 'http://hvidovre.lokalavisen.dk/section/senestekulturrss'),
('Læserbreve', 'http://hvidovre.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Hvidovre Avis
'''
class HvidovreLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Hvidovre Avis'
description = 'Lokale og regionale nyheder, sport og kultur fra Hvidovre på hvidovre.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,25 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1293122276(BasicNewsRecipe):
title = u'Smarter Planet | Tumblr'
__author__ = 'Jack Mason'
author = 'IBM Global Business Services'
publisher = 'IBM'
language = 'en'
category = 'news, technology, IT, internet of things, analytics'
oldest_article = 14
max_articles_per_feed = 30
no_stylesheets = True
use_embedded_content = False
masthead_url = 'http://www.hellercd.com/wp-content/uploads/2010/09/hero.jpg'
remove_tags_before = dict(id='item')
remove_tags_after = dict(id='item')
remove_tags = [dict(attrs={'class': ['sidebar', 'about', 'footer', 'description,' 'disqus', 'nav', 'notes', 'disqus_thread']}),
dict(id=['sidebar', 'footer', 'disqus', 'nav', 'notes',
'likes_container', 'description', 'disqus_thread', 'about']),
dict(name=['script', 'noscript', 'style'])]
feeds = [(u'Smarter Planet Tumblr',
u'http://smarterplanet.tumblr.com/mobile/rss')]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 242 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 783 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 230 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 801 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 238 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 162 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 753 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 640 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 169 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 739 B

Some files were not shown because too many files have changed in this diff Show More