remove dead recipes
These recipes are based on RSS feeds that no longer work.
@ -1,52 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Aksiyon (BasicNewsRecipe):
|
||||
|
||||
title = u'Aksiyon Dergisi'
|
||||
__author__ = u'thomass'
|
||||
description = 'Haftalık haber dergisi '
|
||||
oldest_article = 13
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Aksiyon'
|
||||
category = 'news, haberler,TR,gazete'
|
||||
language = 'tr'
|
||||
publication_type = 'magazine'
|
||||
auto_cleanup = True
|
||||
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_empty_feeds = True
|
||||
feeds = [
|
||||
(u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||
(u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
|
||||
(u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
|
||||
(u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
|
||||
(u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
|
||||
(u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
|
||||
(u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
|
||||
(u'ARKA PENCERE',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
|
||||
(u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
|
||||
(u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
|
||||
(u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
|
||||
(u'KÜLTÜR & SANAT',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
|
||||
(u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
|
||||
(u'BİLİŞİM - TEKNOLOJİ',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
|
||||
(u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
|
||||
(u'HAYAT BİLGİSİ',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
(u'İŞ DÜNYASI',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
]
|
||||
|
||||
# def print_version(self, url):
|
||||
# return
|
||||
# url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&',
|
||||
# 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Allerød Nyt: RSS feed: Seneste nyt - alleroed.lokalavisen.dk
|
||||
'''
|
||||
|
||||
|
||||
class AlleroedLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Allerød Nyt - alleroed.lokalavisen.dk'
|
||||
description = ('RSS feed med sidste nyt fra Allerød Nyt. Der er nye historier flere gange dagligt'
|
||||
' - få de seneste nyheder fra dit lokalområde automatisk. Allerød Nyt. alleroed.lokalavisen.dk')
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Allerød Nyt', 'http://alleroed.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Allerød Nyt', 'http://alleroed.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Allerød Nyt', 'http://alleroed.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Allerød Nyt', 'http://alleroed.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Allerød Nyt', 'http://alleroed.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Allerød Nyt', 'http://alleroed.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
('Allerød Nyt: RSS feed: Seneste nyt - alleroed.lokalavisen.dk', 'http://alleroed.lokalavisen.dk/section/rss'),
|
||||
|
||||
]
|
||||
|
@ -1,40 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Rasmus Lauritsen <rasmus at lauritsen.info>'
|
||||
'''
|
||||
aoh.dk
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class aoh_dk(BasicNewsRecipe):
|
||||
title = 'Alt om Herning'
|
||||
__author__ = 'Rasmus Lauritsen'
|
||||
description = 'Nyheder fra Herning om omegn'
|
||||
publisher = 'Mediehuset Herning Folkeblad'
|
||||
category = 'news, local, Denmark'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
delay = 1
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'da'
|
||||
extra_css = """ body{font-family: Verdana,Arial,sans-serif }
|
||||
img{margin-bottom: 0.4em}
|
||||
.txtContent,.stamp{font-size: small}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
feeds = [(u'All news', u'http://aoh.dk/rss.xml')]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'), dict(name='span', attrs={'class': ['frontpage_body']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link'])
|
||||
]
|
@ -1,39 +0,0 @@
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Alternet(BasicNewsRecipe):
|
||||
title = u'Alternet'
|
||||
__author__ = 'rty'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
publisher = 'alternet.org'
|
||||
category = 'News, Magazine'
|
||||
description = 'News magazine and online community'
|
||||
feeds = [
|
||||
(u'Front Page', u'http://feeds.feedblitz.com/alternet')
|
||||
]
|
||||
|
||||
remove_attributes = ['width', 'align', 'cellspacing']
|
||||
remove_javascript = True
|
||||
use_embedded_content = True
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
encoding = 'UTF-8'
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('link', None)
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url)
|
||||
response = br.follow_link(url_regex=r'/printversion/[0-9]+', nr=0)
|
||||
html = response.read()
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
conversion_options = {'linearize_tables': True}
|
@ -1,28 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Amagerbladet
|
||||
'''
|
||||
|
||||
|
||||
class Amagerbladet_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Amagerbladet'
|
||||
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Amagerbladet', 'http://minby.dk/amagerbladet/feed/'),
|
||||
('Kommentarer til Amagerbladet', 'http://minby.dk/amagerbladet/comments/feed/'),
|
||||
|
||||
]
|
||||
|
@ -1,13 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AnDrumaMor(BasicNewsRecipe):
|
||||
title = u'An Druma M\xf3r'
|
||||
__author__ = "David O'Callaghan"
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'ga'
|
||||
use_embedded_content = True
|
||||
|
||||
feeds = [(u'Nuacht Laeth\xfail',
|
||||
u'http://feeds.feedburner.com/NuachtLneLaethilArAnDrumaMr')]
|
@ -1,28 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||
title = u'Anchorage Daily News'
|
||||
__author__ = 'rty'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Alaska News', u'http://www.adn.com/rss-feeds/feed/all'),
|
||||
(u'Politics', u'http://www.adn.com/rss-feeds/feed/politics'),
|
||||
]
|
||||
description = ''''Alaska's Newspaper'''
|
||||
publisher = 'http://www.adn.com'
|
||||
category = 'news, Alaska, Anchorage'
|
||||
language = 'en'
|
||||
extra_css = '''
|
||||
p{font-weight: normal;text-align: justify}
|
||||
'''
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
conversion_options = {'linearize_tables': True}
|
||||
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
|
||||
|
@ -1,18 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import AutomaticNewsRecipe
|
||||
|
||||
|
||||
class BasicUserRecipe1501589429(AutomaticNewsRecipe):
|
||||
title = 'AnthonyMuroni.it (sardu)'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
language = 'sc'
|
||||
__author__ = 'tzium'
|
||||
|
||||
feeds = [
|
||||
('AnthonyMuroni.it (sardu)', 'http://www.anthonymuroni.it/su/feed/'),
|
||||
]
|
@ -1,47 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Gabriele Marini, based on Darko Miletic'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
description = 'Italian daily newspaper - 14-05-2010'
|
||||
|
||||
'''
|
||||
http://www.apcom.NET/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Apcom(BasicNewsRecipe):
|
||||
__author__ = 'Marini Gabriele'
|
||||
description = 'Italian daily newspaper'
|
||||
|
||||
cover_url = 'http://www.apcom.net/img/logoAP.gif'
|
||||
title = u'Apcom'
|
||||
publisher = 'TM News S.p.A.'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
use_embedded_content = False
|
||||
recursion = 100
|
||||
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables': True}
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'ag_center'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Globale', u'http://www.apcom.net/rss/globale.xml '),
|
||||
(u'Politica', u'http://www.apcom.net/rss/politica.xml'),
|
||||
(u'Cronaca', u'http://www.apcom.net/rss/cronaca.xml'),
|
||||
(u'Econimia', u'http://www.apcom.net/rss/economia.xml'),
|
||||
(u'Esteri', u'http://www.apcom.net/rss/esteri.xml'),
|
||||
(u'Cultura', u'http://www.apcom.net/rss/cultura.xml'),
|
||||
(u'Sport', u'http://www.apcom.net/rss/sport.xml')
|
||||
]
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
appfunds.blogspot.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class app_funds(BasicNewsRecipe):
|
||||
title = u'APP Funds'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description = 'Blog inwestora dla inwestorów i oszczędzających'
|
||||
INDEX = 'http://appfunds.blogspot.com'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
|
@ -1,112 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'YuLun Shih'
|
||||
|
||||
|
||||
class AppledailyTW(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'YuLun Shih'
|
||||
title = u'蘋果日報 (台灣)'
|
||||
description = '24 小時陪你看新聞,我們是最勁爆、最八卦、最貼近大家生活的台灣蘋果日報!'
|
||||
cover_url = 'http://i.imgur.com/3pkzQbw.jpg'
|
||||
masthead_url = 'http://i.imgur.com/otLZwQq.gif'
|
||||
oldest_article = 1.5
|
||||
max_articles_per_feed = 25
|
||||
auto_cleanup = True
|
||||
simultaneous_downloads = 25
|
||||
language = 'zh_TW'
|
||||
tags = 'news, chinese, Taiwan'
|
||||
category = 'news, chinese, Taiwan'
|
||||
publisher = u'蘋果日報 (台灣)'
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
extra_css = 'img { display:block; margin: 5px auto; width: 50%;}'
|
||||
remove_tags = [
|
||||
{'name': 'a', 'attrs': {'href': 'mailto:.*?'}},
|
||||
{'name': 'a', 'attrs': {'target': '_blank'}},
|
||||
{'name': 'hr'}
|
||||
]
|
||||
conversion_options = {
|
||||
'title': title,
|
||||
'comments': description,
|
||||
'tags': category,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'linearize_tables': True
|
||||
}
|
||||
feeds = [
|
||||
(u'頭條', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1077'),
|
||||
(u'要聞', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/11'),
|
||||
(u'政治', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/151'),
|
||||
(u'社會', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1066'),
|
||||
(u'生活', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/2724'),
|
||||
(u'地方綜合', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1076'),
|
||||
(u'論壇', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/824711'),
|
||||
(u'法庭', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/152'),
|
||||
(u'暖流', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/9499'),
|
||||
(u'投訴', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/16287'),
|
||||
(u'娛樂名人', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1600'),
|
||||
(u'木瓜霞吐槽', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/26153'),
|
||||
(u'直擊好萊塢', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/2890'),
|
||||
(u'亞洲哈燒星', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/6270'),
|
||||
(u'名人時尚', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1403917'),
|
||||
(u'強檔新片', 'http://www.appledaily.com.tw/rss/create/kind/col/type/35151804'),
|
||||
(u'勇闖星宅門', 'http://www.appledaily.com.tw/rss/create/kind/col/type/1761666'),
|
||||
(u'國際頭條', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31488833'),
|
||||
(u'國際新聞', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1697'),
|
||||
(u'中國新聞', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/13'),
|
||||
(u'雙語天下', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1945241'),
|
||||
(u'體育焦點', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/2153'),
|
||||
(u'大運動場', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/16289'),
|
||||
(u'籃球瘋', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/16'),
|
||||
(u'投打對決', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/2154'),
|
||||
(u'足球新聞', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31320925'),
|
||||
(u'運彩分析', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31320926'),
|
||||
(u'財經焦點', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/14'),
|
||||
(u'財經熱門話題', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1059'),
|
||||
(u'國際財經', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1052'),
|
||||
(u'投資理財', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1028'),
|
||||
(u'科技3C', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1048'),
|
||||
(u'金融保險', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1078'),
|
||||
(u'綜合產業', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/3847'),
|
||||
(u'頭家生意', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/5050'),
|
||||
(u'地產焦點', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1061'),
|
||||
(u'地產王', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31425399'),
|
||||
(u'中古好屋王', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31477902'),
|
||||
(u'家居王', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31477866'),
|
||||
(u'豪宅王', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31477901'),
|
||||
(u'蘋論', 'http://www.appledaily.com.tw/rss/create/kind/col/type/974972'),
|
||||
(u'司馬觀點', 'http://www.appledaily.com.tw/rss/create/kind/col/type/1635487'),
|
||||
(u'人間異語', 'http://www.appledaily.com.tw/rss/create/kind/col/type/2262677'),
|
||||
(u'論壇', 'http://www.appledaily.com.tw/rss/create/kind/col/type/836825'),
|
||||
(u'副刊頭條', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31477907'),
|
||||
(u'讓我更美', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/22'),
|
||||
(u'流行尖端', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/2452'),
|
||||
(u'消費高手', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1619'),
|
||||
(u'旅遊與探險', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/24'),
|
||||
(u'美食天地', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1898'),
|
||||
(u'健康醫療', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/177600'),
|
||||
(u'讓我自己來', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31478018'),
|
||||
(u'寵物 DIY', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/31478020'),
|
||||
(u'運勢與彩券', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/6199'),
|
||||
(u'3C通訊', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/23'),
|
||||
(u'車市快遞', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/1540'),
|
||||
(u'名采', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/25'),
|
||||
(u'人間事', 'http://www.appledaily.com.tw/rss/create/kind/sec/type/17822'),
|
||||
]
|
||||
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
raw_html = re.sub((r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
|
||||
raw_html = re.sub(
|
||||
(r'<title>(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html)
|
||||
return raw_html
|
@ -1,24 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Ara(BasicNewsRecipe):
|
||||
title = 'Ara (diari)'
|
||||
__author__ = 'santboia'
|
||||
description = ('Ara.cat is an online Catalan-language newspaper edited in Barcelona.'
|
||||
' It is ranked 2nd by unique monthly visitors among Catalan newspapers,'
|
||||
' with over 2 million monthly visitors and ca. 12 million pagew visited as at March 2022.')
|
||||
oldest_article = 2 # days
|
||||
max_articles_per_feed = 30 # articles
|
||||
auto_cleanup = True
|
||||
language = 'ca'
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_attributes = ['style', 'height', 'width']
|
||||
ignore_duplicate_articles = {'url'}
|
||||
|
||||
feeds = [
|
||||
('Ara', 'https://www.ara.cat/rss/'),
|
||||
]
|
@ -1,25 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = 'Ruben Pollan <meskio@sindominio.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1335656316(BasicNewsRecipe):
|
||||
title = u'AraInfo.org'
|
||||
__author__ = 'Ruben Pollan'
|
||||
description = 'Regional newspaper from Aragon'
|
||||
language = 'es'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
cover_url = u'http://arainfo.org/wordpress/wp-content/uploads/2011/10/logo-web_alta.jpg'
|
||||
|
||||
feeds = [
|
||||
(u'Movimientos', u'http://arainfo.org/category/movimientos/feed/'),
|
||||
(u'Econom\xeda', u'http://arainfo.org/category/economia/feed/'),
|
||||
(u'Ecolog\xeda', u'http://arainfo.org/category/ecologia/feed/'),
|
||||
(u'Culturas', u'http://arainfo.org/category/culturas/feed/'),
|
||||
(u'Altavoz', u'http://arainfo.org/category/altavoz/feed/')]
|
@ -1,83 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.arabianbusiness.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Arabian_Business(BasicNewsRecipe):
|
||||
title = 'Arabian Business'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.' # noqa
|
||||
publisher = 'Arabian Business Publishing Ltd.'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://www.arabianbusiness.com/skins/ab.main/gfx/arabianbusiness_logo_sm.gif'
|
||||
extra_css = """
|
||||
body{font-family: Georgia,serif }
|
||||
img{margin-bottom: 0.4em; margin-top: 0.4em; display:block}
|
||||
.byline,.dateline{font-size: small; display: inline; font-weight: bold}
|
||||
ul{list-style: none outside none;}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags_before = dict(attrs={'id': 'article-title'})
|
||||
remove_tags = [
|
||||
dict(name=['meta', 'link', 'base', 'iframe', 'embed', 'object']), dict(
|
||||
attrs={'class': 'printfooter'})
|
||||
]
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Africa', u'http://www.arabianbusiness.com/world/Africa/?service=rss'),
|
||||
(u'Americas', u'http://www.arabianbusiness.com/world/americas/?service=rss'),
|
||||
(u'Asia Pacific', u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss'),
|
||||
(u'Europe', u'http://www.arabianbusiness.com/world/europe/?service=rss'),
|
||||
(u'Middle East', u'http://www.arabianbusiness.com/world/middle-east/?service=rss'),
|
||||
(u'South Asia', u'http://www.arabianbusiness.com/world/south-asia/?service=rss'),
|
||||
(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss'),
|
||||
(u'Construction', u'http://www.arabianbusiness.com/industries/construction/?service=rss'),
|
||||
(u'Education', u'http://www.arabianbusiness.com/industries/education/?service=rss'),
|
||||
(u'Energy', u'http://www.arabianbusiness.com/industries/energy/?service=rss'),
|
||||
(u'Healthcare', u'http://www.arabianbusiness.com/industries/healthcare/?service=rss'),
|
||||
(u'Media', u'http://www.arabianbusiness.com/industries/media/?service=rss'),
|
||||
(u'Real Estate', u'http://www.arabianbusiness.com/industries/real-estate/?service=rss'),
|
||||
(u'Retail', u'http://www.arabianbusiness.com/industries/retail/?service=rss'),
|
||||
(u'Technology', u'http://www.arabianbusiness.com/industries/technology/?service=rss'),
|
||||
(u'Transport', u'http://www.arabianbusiness.com/industries/transport/?service=rss'),
|
||||
(u'Travel', u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss'),
|
||||
(u'Equities', u'http://www.arabianbusiness.com/markets/equities/?service=rss'),
|
||||
(u'Commodities', u'http://www.arabianbusiness.com/markets/commodities/?service=rss'),
|
||||
(u'Currencies', u'http://www.arabianbusiness.com/markets/currencies/?service=rss'),
|
||||
(u'Market Data', u'http://www.arabianbusiness.com/markets/market-data/?service=rss'),
|
||||
(u'Comment', u'http://www.arabianbusiness.com/opinion/comment/?service=rss'),
|
||||
(u'Think Tank', u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss'),
|
||||
(u'Arts', u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss'),
|
||||
(u'Cars', u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss'),
|
||||
(u'Food', u'http://www.arabianbusiness.com/lifestyle/food/?service=rss'),
|
||||
(u'Sport', u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?service=printer&page='
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
@ -1,34 +0,0 @@
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class Arcadia_BBS(BasicNewsRecipe):
|
||||
title = u'Arcadia'
|
||||
__author__ = 'Masahiro Hasegawa'
|
||||
language = 'ja'
|
||||
encoding = 'utf8'
|
||||
filter_regexps = [r'ad\.jp\.ap\.valuecommerce.com', ]
|
||||
timefmt = '[%Y/%m/%d]'
|
||||
remove_tags_before = dict(name='a', attrs={'name': 'kiji'})
|
||||
|
||||
sid_list = [] # some sotory id
|
||||
|
||||
def parse_index(self):
|
||||
result = []
|
||||
for sid in self.sid_list:
|
||||
s_result = []
|
||||
soup = self.index_to_soup(
|
||||
'http://www.mai-net.net/bbs/sst/sst.php?act=dump&all=%d'
|
||||
% sid)
|
||||
sec = soup.findAll('a', attrs={'href': re.compile(r'.*?kiji')})
|
||||
for s in sec[:-2]:
|
||||
s_result.append(dict(title=s.string,
|
||||
url="http://www.mai-net.net" + s['href'],
|
||||
date=s.parent.parent.parent.findAll('td')[
|
||||
3].string[:-6],
|
||||
description='', content=''))
|
||||
result.append((s_result[0]['title'], s_result))
|
||||
return result
|
@ -1,97 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, jolo'
|
||||
'''
|
||||
azrepublic.com
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1307301031(BasicNewsRecipe):
|
||||
title = u'AZRepublic'
|
||||
__author__ = 'Jim Olo'
|
||||
language = 'en'
|
||||
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years" # noqa
|
||||
publisher = 'AZRepublic/AZCentral'
|
||||
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
|
||||
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
|
||||
category = 'news, politics, USA, AZ, Arizona'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
|
||||
|
||||
remove_attributes = ['width', 'height', 'h2', 'subHeadline', 'style']
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={
|
||||
'id': ['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
|
||||
dict(name='div', attrs={'id': ['nav', 'mp', 'subnav', 'jobsDrop']}),
|
||||
dict(name='h6', attrs={'class': ['section-header']}),
|
||||
dict(name='a', attrs={'href': ['#comments']}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['articletools clearfix', 'floatRight']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['blogsHed', 'blog_comments', 'blogByline', 'blogTopics']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
|
||||
dict(name='div', attrs={'id': ['ttdHeader', 'ttdTimeWeather']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['membersRightMain', 'deals-header-wrap']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
|
||||
dict(name='h1', attrs={'id': ['SEOtext']}),
|
||||
dict(name='table', attrs={'class': ['ap-mediabox-table']}),
|
||||
dict(name='p', attrs={'class': ['ap_para']}),
|
||||
dict(name='span', attrs={'class': ['source-org vcard', 'org fn']}),
|
||||
dict(name='a', attrs={
|
||||
'href': ['http://hosted2.ap.org/APDEFAULT/privacy']}),
|
||||
dict(name='a', attrs={
|
||||
'href': ['http://hosted2.ap.org/APDEFAULT/terms']}),
|
||||
dict(name='div', attrs={'id': ['onespot_nextclick']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Local', 'http://rssfeeds.azcentral.com/phoenix/local'),
|
||||
('Nation', 'http://rssfeeds.azcentral.com/phoenix/nation'),
|
||||
('Business', 'http://rssfeeds.azcentral.com/phoenix/business'),
|
||||
('Politics', 'http://rssfeeds.azcentral.com/phoenix/politics'),
|
||||
('Fact check', 'http://rssfeeds.azcentral.com/phoenix/fact-check'),
|
||||
('Opinions', 'http://rssfeeds.azcentral.com/phoenix/opinions'),
|
||||
('Things to Do', 'http://rssfeeds.azcentral.com/phoenix/thingstodo'),
|
||||
('Events', 'http://rssfeeds.azcentral.com/phoenix/events'),
|
||||
('Dining', 'http://rssfeeds.azcentral.com/phoenix/dining'),
|
||||
('Movies & TV', 'http://rssfeeds.azcentral.com/phoenix/moviesandtv'),
|
||||
('Kids', 'http://rssfeeds.azcentral.com/phoenix/kids'),
|
||||
('Arts', 'http://rssfeeds.azcentral.com/phoenix/arts'),
|
||||
('Music', 'http://rssfeeds.azcentral.com/phoenix/music'),
|
||||
('Celebrities', 'http://rssfeeds.azcentral.com/phoenix/celebrities'),
|
||||
('Travel & Explore', 'http://rssfeeds.azcentral.com/phoenix/travelandexplore'),
|
||||
('Arizona History', 'http://rssfeeds.azcentral.com/phoenix/az-history'),
|
||||
('Sports', 'http://rssfeeds.azcentral.com/phoenix/sports'),
|
||||
('Suns', 'http://rssfeeds.azcentral.com/phoenix/suns'),
|
||||
('Diamondbacks', 'http://rssfeeds.azcentral.com/phoenix/diamondbacks'),
|
||||
('Coyotes', 'http://rssfeeds.azcentral.com/phoenix/coyotes'),
|
||||
('ASU', 'http://rssfeeds.azcentral.com/phoenix/asu'),
|
||||
('UA', 'http://rssfeeds.azcentral.com/phoenix/ua'),
|
||||
('High School Sports', 'http://rssfeeds.azcentral.com/high-school-sports'),
|
||||
('Chandler', 'http://rssfeeds.azcentral.com/phoenix/chandler'),
|
||||
('Gilbert', 'http://rssfeeds.azcentral.com/phoenix/gilbert'),
|
||||
('Glendale & Peoria', 'http://rssfeeds.azcentral.com/phoenix/glendaleandpeoria'),
|
||||
('Mesa', 'http://rssfeeds.azcentral.com/phoenix/mesa'),
|
||||
('Phoenix', 'http://rssfeeds.azcentral.com/phoenix/phoenix'),
|
||||
('Scottsdale', 'http://rssfeeds.azcentral.com/phoenix/scottsdale'),
|
||||
('SW Valley', 'http://rssfeeds.azcentral.com/phoenix/swvalley'),
|
||||
('Suprise', 'http://rssfeeds.azcentral.com/phoenix/surprise'),
|
||||
('Tempe & Ahwatukee', 'http://rssfeeds.azcentral.com/phoenix/tempe'),
|
||||
]
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Bruce <bruce at dotdoh.com>'
|
||||
'''
|
||||
asiaone.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AsiaOne(BasicNewsRecipe):
|
||||
title = u'AsiaOne'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Bruce'
|
||||
description = 'News from Singapore Press Holdings Portal'
|
||||
no_stylesheets = False
|
||||
language = 'en_SG'
|
||||
remove_javascript = True
|
||||
remove_tags = [dict(name='span', attrs={'class': 'footer'})]
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class': 'headline'}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['article-content', 'person-info row']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Singapore', 'http://asiaone.feedsportal.com/c/34151/f/618415/index.rss'),
|
||||
('Asia', 'http://asiaone.feedsportal.com/c/34151/f/618416/index.rss')
|
||||
|
||||
]
|
@ -1,67 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.athensnews.gr
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AthensNews(BasicNewsRecipe):
|
||||
title = 'Athens News'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Greece in English since 1952'
|
||||
publisher = 'NEP Publishing Company SA'
|
||||
category = 'news, politics, Greece, Athens'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en_GR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.athensnews.gr/sites/athensnews/themes/athensnewsv3/images/logo.jpg'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.big{font-size: xx-large; font-family: Georgia,serif}
|
||||
.articlepubdate{font-size: small; color: gray; font-family: Georgia,serif}
|
||||
.lezanta{font-size: x-small; font-weight: bold; text-align: left; margin-bottom: 1em; display: block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta', 'link'])
|
||||
]
|
||||
keep_only_tags = [
|
||||
dict(name='span', attrs={'class': 'big'}), dict(
|
||||
name='td', attrs={'class': ['articlepubdate', 'text']})
|
||||
]
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'News', u'http://www.athensnews.gr/category/1/feed'),
|
||||
(u'Politics', u'http://www.athensnews.gr/category/8/feed'),
|
||||
(u'Business', u'http://www.athensnews.gr/category/2/feed'),
|
||||
(u'Economy', u'http://www.athensnews.gr/category/11/feed'),
|
||||
(u'Community', u'http://www.athensnews.gr/category/5/feed'),
|
||||
(u'Arts', u'http://www.athensnews.gr/category/3/feed'),
|
||||
(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed'),
|
||||
(u'Sports', u'http://www.athensnews.gr/category/4/feed'),
|
||||
(u'Travel', u'http://www.athensnews.gr/category/6/feed'),
|
||||
(u'Letters', u'http://www.athensnews.gr/category/44/feed'),
|
||||
(u'Media', u'http://www.athensnews.gr/multimedia/feed')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?action=print'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -1,56 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'GabrieleMarini, based on Darko Miletic'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
|
||||
__version__ = 'v1.02 Marini Gabriele '
|
||||
__date__ = '14062010'
|
||||
__description__ = 'Italian daily newspaper'
|
||||
|
||||
'''
|
||||
http://www.corrieredellosport.it/
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Auto(BasicNewsRecipe):
|
||||
__author__ = 'Gabriele Marini'
|
||||
description = 'Auto and Formula 1'
|
||||
|
||||
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
|
||||
|
||||
title = u'Auto'
|
||||
publisher = 'CONTE Editore'
|
||||
category = 'Sport'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 60
|
||||
max_articles_per_feed = 30
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \
|
||||
description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
|
||||
dict(name='h2', attrs={'class': ['tit_Article']}),
|
||||
dict(name='div', attrs={'class': ['box_Img newsdet_new ']}),
|
||||
dict(name='div', attrs={'class': ['box_Img newsdet_as ']}),
|
||||
dict(name='table', attrs={'class': ['table_A']}),
|
||||
dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}),
|
||||
dict(name='testoscheda')]
|
||||
|
||||
feeds = [
|
||||
(u'Tutte le News', u'http://www.auto.it/rss/articoli.xml'),
|
||||
(u'Prove su Strada', u'http://www.auto.it/rss/prove+6.xml'),
|
||||
(u'Novit\xe0', u'http://www.auto.it/rss/novita+3.xml')
|
||||
]
|
@ -1,15 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AutoBlog(BasicNewsRecipe):
|
||||
title = u'Auto Blog'
|
||||
__author__ = 'Welovelucy'
|
||||
language = 'en'
|
||||
description = 'Auto industry news'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'AutoBlog', u'http://www.autoblog.com/rss.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
@ -1,92 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'GabrieleMarini, based on Darko Miletic'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
|
||||
__version__ = 'v1.02 Marini Gabriele '
|
||||
__date__ = '10, January 2010'
|
||||
__description__ = 'Italian daily newspaper'
|
||||
|
||||
'''
|
||||
http://www.corrieredellosport.it/
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AutoPR(BasicNewsRecipe):
|
||||
__author__ = 'Gabriele Marini'
|
||||
description = 'Auto and Formula 1'
|
||||
|
||||
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
|
||||
|
||||
title = u'Auto Prove'
|
||||
publisher = 'CONTE Editore'
|
||||
category = 'Sport'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 60
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
recursion = 100
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
# html2lrf_options = [
|
||||
# '--comment', description
|
||||
# , '--category', category
|
||||
# , '--publisher', publisher
|
||||
# , '--ignore-tables'
|
||||
# ]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
|
||||
dict(name='h2', attrs={'class': ['tit_Article']}),
|
||||
dict(name='div', attrs={'class': ['box_Img newsdet_new ']}),
|
||||
dict(name='div', attrs={'class': ['box_Img newsdet_as ']}),
|
||||
dict(name='table', attrs={'class': ['table_A']}),
|
||||
dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}),
|
||||
dict(name='testoscheda')]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [
|
||||
("Prove su Strada", "http://www.auto.it/rss/prove+6.xml")
|
||||
]:
|
||||
soup = self.index_to_soup(url)
|
||||
soup = soup.find('channel')
|
||||
print(soup)
|
||||
|
||||
for article in soup.findAllNext('item'):
|
||||
title = self.tag_to_string(article.title)
|
||||
date = self.tag_to_string(article.pubDate)
|
||||
description = self.tag_to_string(article.description)
|
||||
link = self.tag_to_string(article.guid)
|
||||
# print article
|
||||
articles = self.create_links_append(link, date, description)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def create_links_append(self, link, date, description):
|
||||
current_articles = []
|
||||
|
||||
current_articles.append(
|
||||
{'title': 'Generale', 'url': link, 'description': description, 'date': date}),
|
||||
current_articles.append({'title': 'Design', 'url': link.replace(
|
||||
'scheda', 'design'), 'description': 'scheda', 'date': ''}),
|
||||
current_articles.append({'title': 'Interni', 'url': link.replace(
|
||||
'scheda', 'interni'), 'description': 'Interni', 'date': ''}),
|
||||
current_articles.append({'title': 'Tecnica', 'url': link.replace(
|
||||
'scheda', 'tecnica'), 'description': 'Tecnica', 'date': ''}),
|
||||
current_articles.append({'title': 'Su Strada', 'url': link.replace(
|
||||
'scheda', 'su_strada'), 'description': 'Su Strada', 'date': ''}),
|
||||
current_articles.append({'title': 'Pagella', 'url': link.replace(
|
||||
'scheda', 'pagella'), 'description': 'Pagella', 'date': ''}),
|
||||
current_articles.append({'title': 'Rilevamenti', 'url': link.replace(
|
||||
'scheda', 'telemetria'), 'description': 'Rilevamenti', 'date': ''})
|
||||
|
||||
return current_articles
|
@ -1,50 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
auto-bild.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AutoBild(BasicNewsRecipe):
|
||||
title = u'Auto Bild'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Auto'
|
||||
publisher = 'Auto Bild'
|
||||
oldest_article = 50
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Auto'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.auto-bild.ro/images/autobild.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'box_2 articol clearfix'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['detail']}), dict(name='a', attrs={'id': ['zoom_link']}), dict(
|
||||
name='div', attrs={'class': ['icons clearfix']}), dict(name='div', attrs={'class': ['pub_articol clearfix']})
|
||||
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class': ['pub_articol clearfix']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.auto-bild.ro/rss/toate')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,29 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class autogids(BasicNewsRecipe):
|
||||
title = u'Automatiseringgids IT'
|
||||
oldest_article = 7
|
||||
__author__ = 'DrMerry'
|
||||
description = 'IT-nieuws van Automatiseringgids'
|
||||
language = 'nl'
|
||||
publisher = 'AutomatiseringGids'
|
||||
category = 'Nieuws, IT, Nederlandstalig'
|
||||
simultaneous_downloads = 5
|
||||
timefmt = ' [%a, %d %B, %Y]'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.automatiseringgids.nl/binaries/content/gallery/ag/marketing/ag-avatar-100x50.jpg'
|
||||
keep_only_tags = [dict(name='div', attrs={'class': ['content']})]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
feeds = [(u'Actueel', u'http://www.automatiseringgids.nl/rss.aspx')]
|
@ -1,33 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
|
||||
'''
|
||||
www.autosport.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class autosport(BasicNewsRecipe):
|
||||
title = u'Autosport'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'en_GB'
|
||||
description = u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...' # noqa
|
||||
masthead_url = 'http://cdn.images.autosport.com/asdotcom.gif'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='h1', attrs={'class': 'news_headline'}))
|
||||
keep_only_tags.append(
|
||||
dict(name='td', attrs={'class': 'news_article_author'}))
|
||||
keep_only_tags.append(
|
||||
dict(name='td', attrs={'class': 'news_article_date'}))
|
||||
keep_only_tags.append(dict(name='p'))
|
||||
|
||||
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]
|
@ -1,49 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
avantaje.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Avantaje(BasicNewsRecipe):
|
||||
title = u'Avantaje'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u''
|
||||
publisher = u'Avantaje'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.avantaje.ro/images/default/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'articol'}), dict(name='div', attrs={
|
||||
'class': 'gallery clearfix'}), dict(name='div', attrs={'align': 'justify'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': ['color_sanatate_box']}), dict(name='div', attrs={'class': ['nav']}), dict(name='div', attrs={'class': ['voteaza_art']}), dict(name='div', attrs={'class': ['bookmark']}), dict(name='div', attrs={'class': ['links clearfix']}), dict(name='div', attrs={'class': ['title']}) # noqa
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class': ['title']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds.feedburner.com/Avantaje')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,49 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
aventurilapescuit.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AventuriLaPescuit(BasicNewsRecipe):
|
||||
title = u'Aventuri La Pescuit'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Aventuri La Pescuit'
|
||||
publisher = 'Aventuri La Pescuit'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Pescuit,Hobby'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'Article'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['right option']}), dict(
|
||||
name='iframe', attrs={'scrolling': ['no']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='iframe', attrs={'scrolling': ['no']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.aventurilapescuit.ro/sections/rssread/1')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,47 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
b365.realitatea.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class b365Realitatea(BasicNewsRecipe):
|
||||
title = u'b365 Realitatea'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
publisher = u'b365 Realitatea'
|
||||
description = u'b365 Realitatea'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Romania,Bucuresti'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'newsArticle'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'date'}), dict(name='dic', attrs={'class': 'addthis_toolbox addthis_default_style'}), dict(
|
||||
name='div', attrs={'class': 'related_posts'}), dict(name='div', attrs={'id': 'RelevantiWidget'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id': 'RelevantiWidget'})
|
||||
]
|
||||
feeds = [
|
||||
(u'\u0218tiri', u'http://b365.realitatea.net/rss-full/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,70 +0,0 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
b92.net
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class B92(BasicNewsRecipe):
|
||||
title = 'B92'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Najnovije vesti iz Srbije, regiona i sveta, aktuelne teme iz sveta politike, ekonomije, drustva, foto galerija, kolumne'
|
||||
publisher = 'B92'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1250'
|
||||
language = 'sr'
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://b92s.net/v4/img/new-logo.png'
|
||||
extra_css = """
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||
.article-info2,.article-info1{text-transform: uppercase; font-size: small}
|
||||
img{display: block}
|
||||
.sms{font-weight: bold}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'\u0110'), lambda match: u'\u00D0'),
|
||||
(re.compile(r'<html.*?<body>', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: '<html><head><title>something</title></head><body>')
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(attrs={'class': ['article-info1', 'article-text']})]
|
||||
remove_attributes = ['width', 'height', 'align',
|
||||
'hspace', 'vspace', 'border', 'lang', 'xmlns:fb']
|
||||
remove_tags = [
|
||||
dict(name=['embed', 'link', 'base', 'meta', 'iframe']), dict(
|
||||
attrs={'id': 'social'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml'),
|
||||
(u'Biz', u'http://www.b92.net/info/rss/biz.xml'),
|
||||
(u'Sport', u'http://www.b92.net/info/rss/sport.xml'),
|
||||
(u'Zivot', u'http://www.b92.net/info/rss/zivot.xml'),
|
||||
(u'Kultura', u'http://www.b92.net/info/rss/kultura.xml'),
|
||||
(u'Automobili', u'http://www.b92.net/info/rss/automobili.xml'),
|
||||
(u'Tehnopolis', u'http://www.b92.net/info/rss/tehnopolis.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
@ -1,57 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
babyonline.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class BabyOnline(BasicNewsRecipe):
|
||||
title = u'Baby Online'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'De la p\u0103rinte la p\u0103rinte'
|
||||
publisher = u'Baby Online'
|
||||
oldest_article = 50
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Copii,Mame'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.babyonline.ro/images/default/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'article_container'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': 'bar_nav'}),
|
||||
dict(name='div', attrs={'id': 'service_send'}),
|
||||
dict(name='div', attrs={'id': 'other_videos'}),
|
||||
dict(name='div', attrs={'class': 'dot_line_yellow'}),
|
||||
dict(name='a', attrs={'class': 'print'}),
|
||||
dict(name='a', attrs={'class': 'email'}),
|
||||
dict(name='a', attrs={'class': 'YM'}),
|
||||
dict(name='a', attrs={'class': 'comment'}),
|
||||
dict(name='div', attrs={'class': 'tombstone_cross'}),
|
||||
dict(name='span', attrs={'class': 'liketext'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id': 'service_send'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.babyonline.ro/rss_homepage.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,30 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class BadaniaNet(BasicNewsRecipe):
|
||||
title = u'badania.net'
|
||||
__author__ = 'fenuks'
|
||||
description = u'chcesz wiedzieć więcej?'
|
||||
category = 'science'
|
||||
language = 'pl'
|
||||
cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
|
||||
extra_css = '.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
preprocess_regexps = [
|
||||
(re.compile(r"<h4>Tekst sponsoruje</h4>", re.IGNORECASE), lambda m: ''), ]
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
remove_tags = []
|
||||
keep_only_tags = [dict(name='article')]
|
||||
feeds = [
|
||||
(u'Psychologia', u'http://badania.net/category/psychologia/feed/'),
|
||||
(u'Technologie', u'http://badania.net/category/technologie/feed/'),
|
||||
(u'Biologia', u'http://badania.net/category/biologia/feed/'),
|
||||
|
||||
(u'Chemia', u'http://badania.net/category/chemia/feed/'),
|
||||
(u'Zdrowie', u'http://badania.net/category/zdrowie/'),
|
||||
(u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
|
@ -1,62 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
balkaninsight.com
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class BalkanInsight(BasicNewsRecipe):
|
||||
title = 'Balkan Insight'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Get exclusive news and in depth information on business, politics, events and lifestyle in the Balkans. Free and exclusive premium content.'
|
||||
publisher = 'BalkanInsight.com'
|
||||
category = 'news, politics, Balcans'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.balkaninsight.com/templates/balkaninsight/images/aindex_02.jpg'
|
||||
language = 'en'
|
||||
publication_type = 'newsportal'
|
||||
remove_empty_feeds = True
|
||||
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
.article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif}
|
||||
img{margin-bottom: 0.8em}
|
||||
h1,h2,h3,h4{font-family: Times,Georgia,serif1,serif; color: #24569E}
|
||||
.article-deck {color:#777777; font-size: small;}
|
||||
.main_news_img{font-size: small} """
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'article'})]
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link', 'iframe'])
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Albania', u'http://www.balkaninsight.com/?tpl=653&tpid=144'),
|
||||
(u'Bosnia', u'http://www.balkaninsight.com/?tpl=653&tpid=145'),
|
||||
(u'Bulgaria', u'http://www.balkaninsight.com/?tpl=653&tpid=146'),
|
||||
(u'Croatia', u'http://www.balkaninsight.com/?tpl=653&tpid=147'),
|
||||
(u'Kosovo', u'http://www.balkaninsight.com/?tpl=653&tpid=148'),
|
||||
(u'Macedonia', u'http://www.balkaninsight.com/?tpl=653&tpid=149'),
|
||||
(u'Montenegro', u'http://www.balkaninsight.com/?tpl=653&tpid=150'),
|
||||
(u'Romania', u'http://www.balkaninsight.com/?tpl=653&tpid=151'),
|
||||
(u'Serbia', u'http://www.balkaninsight.com/?tpl=653&tpid=152')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
Before Width: | Height: | Size: 195 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 528 B |
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 2.4 KiB |
Before Width: | Height: | Size: 386 B |
Before Width: | Height: | Size: 379 B |
Before Width: | Height: | Size: 544 B |
Before Width: | Height: | Size: 568 B |
Before Width: | Height: | Size: 2.2 KiB |
Before Width: | Height: | Size: 537 B |
Before Width: | Height: | Size: 1.9 KiB |
Before Width: | Height: | Size: 441 B |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 1.9 KiB |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 476 B |
Before Width: | Height: | Size: 3.5 KiB |
Before Width: | Height: | Size: 872 B |
Before Width: | Height: | Size: 701 B |
Before Width: | Height: | Size: 564 B |
Before Width: | Height: | Size: 192 B |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 226 B |
Before Width: | Height: | Size: 624 B |
Before Width: | Height: | Size: 306 B |