This commit is contained in:
Kovid Goyal 2020-03-15 18:58:33 +05:30
commit 1a3d3600b1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
11 changed files with 0 additions and 681 deletions

View File

@ -1,76 +0,0 @@
# coding=utf-8
__license__ = 'GPL v3'
__copyright__ = '2011-2016, Hassan Williamson <haz at hazrpg.co.uk>'
'''
ahram.org.eg
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class AlAhram(BasicNewsRecipe):
title = u'Al-Ahram (الأهرام)'
__author__ = 'Hassan Williamson'
description = 'The Arabic version of the Al-Ahram newspaper.'
language = 'ar'
encoding = 'utf8'
cover_url = 'http://www.ahram.org.eg/Media/News/2015/3/14/2015-635619650946000713-600.jpg'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
publisher = 'Al-Ahram'
category = 'News'
publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } ' # noqa
keep_only_tags = [
dict(name='div', attrs={'class': ['bbcolright']})
]
remove_tags = [
dict(name='div', attrs={'class': ['bbnav', 'bbsp']}),
dict(name='div', attrs={'id': ['AddThisButton']}),
dict(name='a', attrs={'class': ['twitter-share-button']}),
dict(name='div', attrs={'id': ['ReaderCount']}),
]
remove_attributes = [
'width', 'height', 'style'
]
feeds = [
(u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'),
(u'الصفحة الثانية',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'),
(u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'),
(u'المشهد السياسي',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'),
(u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'),
(u'الوطن العربي',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'),
(u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'),
(u'تقارير المراسلين',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'),
(u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'),
(u'قضايا واراء',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'),
(u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'),
(u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'),
(u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'),
(u'دنيا الثقافة',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'),
(u'المراة والطفل',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'),
(u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'),
(u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'),
(u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'),
(u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'),
(u'ملفات الاهرام',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'),
(u'بريد الاهرام',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'),
(u'برلمان الثورة',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'),
(u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'),
]

View File

@ -1,31 +0,0 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
'''
Albertslund Posten
'''
class AlbertslundLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Albertslund Posten'
description = ('RSS feed med sidste nyt fra Albertslund Posten. Der er nye historier flere gange dagligt'
' - få de seneste nyheder fra dit lokalområde automatisk. Albertslund Posten. albertslund.lokalavisen.dk')
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 25
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,71 +0,0 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class BanatNews(BasicNewsRecipe):
title = 'Banat News'
custom_title = "Banat News - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa
language = 'ceb'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_Banat.jpg'
masthead_url = 'http://www.philstar.com/images/logo_Banat.jpg'
oldest_article = 1.5 # days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo
# Section (Headlines, Nation, Metro, ...)
# Comments
# View Comments
# Zoom
, dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa
]
conversion_options = {'title': custom_title,
'comments': description,
'tags': tags,
'language': language,
'publisher': publisher,
'authors': publisher,
'smarten_punctuation': True
}
feeds = [
('Balita' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101'),
('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102'),
('Kalingawan' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104'),
('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62'),
('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103'),
('Imong Kapalaran' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105')
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding
# add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find(
'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -1,48 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Ciekawostki_Historyczne(BasicNewsRecipe):
title = u'Ciekawostki Historyczne'
oldest_article = 7
__author__ = u'fenuks & Tomasz Długosz'
description = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.'
category = 'history'
language = 'pl'
masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
max_articles_per_feed = 100
extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
oldest_article = 12
preprocess_regexps = [(re.compile(u'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
lambda match: ''), (re.compile(u'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
no_stylesheets = True
remove_empty_feeds = True
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
recursions = 5
remove_tags = [dict(id=['catapult-cookie-bar','header','footer','rightcolumn','singlepostinfo']), dict(
attrs={'class': ['ubm_banner','ciekawostki-slider-popular','books short floatRight', 'unprintable', 'booksTable', 'bawmrp']})]
feeds = [
(u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'),
(u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'),
(u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'),
(u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'),
(u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'),
(u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'),
(u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'),
(u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')]
def is_link_wanted(self, url, tag):
return 'ciekawostkihistoryczne' in url and url[-2] in {'2', '3', '4', '5', '6'}
def postprocess_html(self, soup, first_fetch):
tag = soup.find('h7')
if tag:
tag.nextSibling.extract()
if not first_fetch:
for r in soup.findAll(['h1']):
r.extract()
soup.find('h6').nextSibling.extract()
return soup

View File

@ -1,123 +0,0 @@
# -*- mode: python -*-
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2018, Darko Miletic <darko.miletic at gmail.com>'
'''
www.computing.co.uk
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Computing_UK(BasicNewsRecipe):
title = 'Computing'
__author__ = 'Darko Miletic'
description = 'Computing is the leading information resource for UK technology decision makers, providing the latest market news and hard-hitting opinion.'
publisher = 'Incisive Business Media Limited'
category = 'it computing uk, computing events, big data summit, cloud and infrastructure, it devops, computing security, HP, intel'
oldest_article = 7
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en_GB'
remove_empty_feeds = True
publication_type = 'newsportal'
auto_cleanup = True
resolve_internal_links = True
needs_subscription = True
ignore_duplicate_articles = {'url'}
INDEX = 'https://www.computing.co.uk/'
LOGIN = 'https://www.computing.co.uk/userlogin'
def get_browser(self):
def is_form_login(form):
return "id" in form.attrs and form.attrs['id'] == "userlogin"
br = BasicNewsRecipe.get_browser(self)
br.open(self.INDEX)
if self.username:
br.open(self.LOGIN)
br.select_form(predicate=is_form_login)
br['subscriber[email_id]'] = self.username
br['subscriber[password]'] = self.password
br.submit()
return br
extra_css = """
body{font-family: sans-serif}
img{margin-top:1em; margin-bottom: 1em; display:block}
"""
conversion_options = {
'comment': description,
'tags': category,
'publisher': publisher,
'language': language
}
feeds = [
(
u'Financial Solutions',
u'https://www.computing.co.uk/feeds/rss/category/financial-solutions/'
),
(
u'Big Data',
u'https://www.computing.co.uk/feeds/rss/category/big-data-and-analytics/'
),
(u'DevOps', u'https://www.computing.co.uk/feeds/rss/category/devops/'),
(
u'Cloud and Infrastructure',
u'https://www.computing.co.uk/feeds/rss/category/cloud-and-infrastructure/'
),
(
u'Internet of Things',
u'https://www.computing.co.uk/feeds/rss/category/internet-of-things/'
),
(
u'Leadership',
u'https://www.computing.co.uk/feeds/rss/category/leadership/'
),
(
u'Application',
u'https://www.computing.co.uk/feeds/rss/category/software/applications/'
),
(
u'Business Software',
u'https://www.computing.co.uk/feeds/rss/category/software/business-software/'
),
(
u'Developer',
u'https://www.computing.co.uk/feeds/rss/category/software/developer/'
),
(
u'Mobile Software',
u'https://www.computing.co.uk/feeds/rss/category/software/mobile-software/'
),
(u'Strategy', u'https://www.computing.co.uk/feeds/rss/category/strategy/'),
(
u'Corporate',
u'https://www.computing.co.uk/feeds/rss/category/management/corporate/'
),
(
u'Privacy',
u'https://www.computing.co.uk/feeds/rss/category/security/privacy/'
),
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
(u'Hardware', u'https://www.computing.co.uk/feeds/rss/category/hardware/'),
(
u'Mobile Phones',
u'https://www.computing.co.uk/feeds/rss/category/hardware/mobile-phones/'
),
(
u'Communications',
u'https://www.computing.co.uk/feeds/rss/category/communications/'
),
(
u'Public Sector',
u'https://www.computing.co.uk/feeds/rss/category/public-sector/'
),
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
]

View File

@ -1,30 +0,0 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
'''
Halsnæs Avis
'''
class HalsnaesLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Halsnæs Avis'
description = 'Lokale og regionale nyheder, sport og kultur fra Halsnæs og omegn på halsnaes.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,81 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class HeritageFoundation(BasicNewsRecipe):
title = u'The Heritage Foundation'
description = 'Founded in 1973, The Heritage Foundation is a research and educational institution—a think tank—\
whose mission is to formulate and promote conservative public policies based on the principles of free enterprise, limited government, \
individual freedom, traditional American values, and a strong national defense.'
__author__ = '_reader'
__date__ = '05 July 2012'
__version__ = '1.0'
oldest_article = 30
max_articles_per_feed = 100
publisher = 'The Heritage Foundation'
category = 'commentary'
tags = 'commentary'
language = 'en'
publication_type = 'blog'
cover_url = 'http://www.heritage.org/static/images/logo.jpg'
masthead_url = 'http://www.heritage.org/static/images/logo.jpg'
encoding = None
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
recursions = 0
remove_empty_feeds = True
auto_cleanup = True
conversion_options = {
'comments': description,
'tags': tags,
'language': language,
'publisher': publisher,
'authors': publisher,
'smarten_punctuation': True
}
feeds = [
(u'Agriculture', u'http://origin.heritage.org/static/RSS/Agriculture.xml'),
(u'Alliances', u'http://origin.heritage.org/static/RSS/Alliances.xml'),
(u'Arms Control and Non-Proliferation',
u'http://origin.heritage.org/static/RSS/Arms-Control-and-Non-Proliferation.xml'),
(u'Budget and Spending',
u'http://origin.heritage.org/static/RSS/Budget-and-Spending.xml'),
(u'Economic Freedom', u'http://origin.heritage.org/static/RSS/Economic-Freedom.xml'),
(u'Economy', u'http://origin.heritage.org/static/RSS/Economy.xml'),
(u'Education', u'http://origin.heritage.org/static/RSS/Education.xml'),
(u'Energy and Environment',
u'http://origin.heritage.org/static/RSS/Energy-and-Environment.xml'),
(u'Family and Marriage',
u'http://origin.heritage.org/static/RSS/Family-And-Marriage.xml'),
(u'Foreign Aid and Development',
u'http://origin.heritage.org/static/RSS/Foreign-Aid-and-Development.xml'),
(u'Health Care', u'http://origin.heritage.org/static/RSS/Health-Care.xml'),
(u'Homeland Security', u'http://origin.heritage.org/static/RSS/Homeland-Security.xml'),
(u'Housing', u'http://origin.heritage.org/static/RSS/Housing.xml'),
(u'Immigration', u'http://origin.heritage.org/static/RSS/Immigration.xml'),
(u'International Conflicts',
u'http://origin.heritage.org/static/RSS/International-Conflicts.xml'),
(u'International Law', u'http://origin.heritage.org/static/RSS/International-Law.xml'),
(u'Labor', u'http://origin.heritage.org/static/RSS/Labor.xml'),
(u'Legal Issues', u'http://origin.heritage.org/static/RSS/Legal.xml'),
(u'Missile Defense', u'http://origin.heritage.org/static/RSS/Missile-Defense.xml'),
(u'National Security and Defense',
u'http://origin.heritage.org/static/RSS/National-Security-and-Defense.xml'),
(u'Political Thought', u'http://origin.heritage.org/static/RSS/Political-Thought.xml'),
(u'Public Diplomacy', u'http://origin.heritage.org/static/RSS/Public-Diplomacy.xml'),
(u'Regulation', u'http://origin.heritage.org/static/RSS/Regulation.xml'),
(u'Religion and Civil Society',
u'http://origin.heritage.org/static/RSS/Religion-and-Civil-Society.xml'),
(u'Retirement Security',
u'http://origin.heritage.org/static/RSS/Retirement-Security.xml'),
(u'Space Policy', u'http://origin.heritage.org/static/RSS/Space-Policy.xml'),
(u'Taxes', u'http://origin.heritage.org/static/RSS/Taxes.xml'),
(u'Terrorism', u'http://origin.heritage.org/static/RSS/Terrorism.xml'),
(u'Trade', u'http://origin.heritage.org/static/RSS/Trade.xml'),
(u'Transportation', u'http://origin.heritage.org/static/RSS/Transportation.xml'),
(u'Welfare', u'http://origin.heritage.org/static/RSS/Welfare.xml'),
(u'Worldwide Freedom and Human Rights',
u'http://origin.heritage.org/static/RSS/Worldwide-Freedom-and-Human-Rights.xml'),
]

View File

@ -1,31 +0,0 @@
# vim:fileencoding=utf-8
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class HistoriasDelMundo (BasicNewsRecipe):
__author__ = 'Marc Busqué <marc@lamarciana.com>'
__url__ = 'http://www.lamarciana.com'
__version__ = '1.0.1'
__license__ = 'GPL v3'
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'Historias del Mundo'
description = u'Historias del Mundo contadas por Marc Busqué'
url = 'http://www.marcbusque.org'
language = 'es'
tags = 'viajes, social'
oldest_article = 120
remove_empty_feeds = True
no_stylesheets = True
cover_url = u'http://www.marcbusque.org/wp-content/uploads/2011/12/cuchitril.png'
def get_extra_css(self):
if not self.extra_css:
br = self.get_browser()
self.extra_css = br.open_novisit(
'https://raw.githubusercontent.com/laMarciana/gutenweb/master/dist/gutenweb.css').read().replace('@charset "UTF-8";', '')
return self.extra_css
feeds = [
(u'Historias del Mundo', u'http://www.marcbusque.org/?feed=rss'),
]

View File

@ -1,31 +0,0 @@
# vim:fileencoding=utf-8
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class HistoriesDelMon (BasicNewsRecipe):
__author__ = 'Marc Busqué <marc@lamarciana.com>'
__url__ = 'http://www.lamarciana.com'
__version__ = '1.0.1'
__license__ = 'GPL v3'
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'Històries del Món'
description = u'Històries del Món explicades pel Marc Busqué'
url = 'http://www.marcbusque.org'
language = 'ca'
tags = 'viatges, social'
oldest_article = 120
remove_empty_feeds = True
no_stylesheets = True
cover_url = u'http://www.marcbusque.org/wp-content/uploads/2011/12/cuchitril.png'
def get_extra_css(self):
if not self.extra_css:
br = self.get_browser()
self.extra_css = br.open_novisit(
'https://raw.githubusercontent.com/laMarciana/gutenweb/master/dist/gutenweb.css').read().replace('@charset "UTF-8";', '')
return self.extra_css
feeds = [
(u'Històries del Món', u'http://www.marcbusque.org/ca/feed/'),
]

View File

@ -1,74 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs, Comment
class KurierGalicyjski(BasicNewsRecipe):
title = u'Kurier Galicyjski'
__author__ = 'fenuks'
description = u'Kurier Galicyjski - największa gazeta dla Polaków na Ukrainie. Bieżące wydarzenia z życia polskiej mniejszości, historia, kultura, polityka, reportaże.' # noqa
category = 'news'
language = 'pl'
cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
keep_only_tags = [dict(attrs={'class': 'item-page'})]
remove_tags = [dict(attrs={'class': 'pagenav'}), dict(attrs={
'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'})] # noqa
feeds = [
(u'Wydarzenia', u'http://kuriergalicyjski.com/index.php/wydarzenia?format=feed&type=atom'),
(u'Publicystyka', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'),
(u'Reporta\u017ce', u'http://kuriergalicyjski.com/index.php/report?format=feed&type=atom'),
(u'Rozmowy Kuriera', u'http://kuriergalicyjski.com/index.php/kuriera?format=feed&type=atom'),
(u'Przegl\u0105d prasy', u'http://kuriergalicyjski.com/index.php/2012-01-05-14-08-55?format=feed&type=atom'),
(u'Kultura', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-26-39?format=feed&type=atom'),
(u'Zabytki', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-27-32?format=feed&type=atom'),
(u'Polska-Ukraina', u'http://kuriergalicyjski.com/index.php/pol-ua?format=feed&type=atom'),
(u'Polacy i Ukrai\u0144cy', u'http://kuriergalicyjski.com/index.php/polacy-i-ukr?format=feed&type=atom'),
(u'Niezwyk\u0142e historie', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'),
(u'Polemiki', u'http://kuriergalicyjski.com/index.php/polemiki?format=feed&type=atom')]
def append_page(self, soup, appendtag):
pager = soup.find(id='article-index')
if pager:
pager = pager.findAll('a')[1:]
if pager:
for a in pager:
nexturl = 'http://www.kuriergalicyjski.com' + a['href']
soup2 = self.index_to_soup(nexturl)
pagetext = soup2.find(attrs={'class': 'item-page'})
if pagetext.h2:
pagetext.h2.extract()
r = pagetext.find(attrs={'class': 'article-info'})
if r:
r.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
pos = len(appendtag.contents)
for r in appendtag.findAll(id='article-index'):
r.extract()
for r in appendtag.findAll(attrs={'class': 'pagenavcounter'}):
r.extract()
for r in appendtag.findAll(attrs={'class': 'pagination'}):
r.extract()
for r in appendtag.findAll(attrs={'class': 'pagenav'}):
r.extract()
for r in appendtag.findAll(attrs={'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'}): # noqa
r.extract()
comments = appendtag.findAll(
text=lambda text: isinstance(text, Comment))
for comment in comments:
comment.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
for r in soup.findAll(style=True):
del r['style']
for img in soup.findAll(attrs={'class': 'easy_img_caption smartresize'}):
img.insert(len(img.contents) - 1, bs('<br />'))
img.insert(len(img.contents), bs('<br /><br />'))
for a in soup.findAll('a', href=True):
if a['href'].startswith('/'):
a['href'] = 'http://kuriergalicyjski.com' + a['href']
return soup

View File

@ -1,85 +0,0 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = 'Marcin Urban 2011'
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class recipeMagic(BasicNewsRecipe):
title = 'National Geographic PL'
__author__ = 'Marcin Urban 2011'
__modified_by__ = 'fenuks'
description = u'Legenda wśród magazynów z historią sięgającą 120 lat'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
publisher = 'G+J Gruner+Jahr Polska'
category = 'news, PL,'
language = 'pl'
remove_empty_feeds = True
publication_type = 'newsportal'
extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
h1{text-align: center;}
h2{font-size: medium; font-weight: bold;}
.authordate {font-size: small; color: #696969;}
p.lead {font-weight: bold; text-align: center;}
.fot{font-size: x-small; color: #666666;} '''
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
}
remove_tags = [
dict(name='div', attrs={'class': 'add_inf'}),
dict(name='div', attrs={'class': 'add_f'}),
]
remove_attributes = ['width', 'height']
feeds = []
def find_articles(self, url):
articles = []
soup = self.index_to_soup(url)
tag = soup.find(attrs={'class': 'arl'})
if not tag:
return articles
art = tag.ul.findAll('li')
for i in art:
title = i.a['title']
url = i.a['href']
# date=soup.find(id='footer').ul.li.string[41:-1]
desc = i.div.p.string
articles.append({'title': title,
'url': url,
'date': '',
'description': desc
})
return articles
def parse_index(self):
feeds = []
feeds.append((u"Aktualności", self.find_articles(
'http://www.national-geographic.pl/aktualnosci/')))
feeds.append((u"Artykuły", self.find_articles(
'http://www.national-geographic.pl/artykuly/')))
return feeds
def print_version(self, url):
if 'artykuly' in url:
return url.replace('artykuly/pokaz', 'drukuj-artykul')
elif 'aktualnosci' in url:
return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
else:
return url
def get_cover_url(self):
soup = self.index_to_soup(
'http://www.national-geographic.pl/biezace-wydania/')
tag = soup.find(attrs={'class': 'txt jus'})
self.cover_url = tag.img['src']
return getattr(self, 'cover_url', self.cover_url)