mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Merge branch 'master' of https://github.com/jony0008/calibre
This commit is contained in:
commit
1a3d3600b1
@ -1,76 +0,0 @@
|
||||
# coding=utf-8
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011-2016, Hassan Williamson <haz at hazrpg.co.uk>'
|
||||
'''
|
||||
ahram.org.eg
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class AlAhram(BasicNewsRecipe):
|
||||
title = u'Al-Ahram (الأهرام)'
|
||||
__author__ = 'Hassan Williamson'
|
||||
description = 'The Arabic version of the Al-Ahram newspaper.'
|
||||
language = 'ar'
|
||||
encoding = 'utf8'
|
||||
cover_url = 'http://www.ahram.org.eg/Media/News/2015/3/14/2015-635619650946000713-600.jpg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
publisher = 'Al-Ahram'
|
||||
category = 'News'
|
||||
publication_type = 'newsportal'
|
||||
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } ' # noqa
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': ['bbcolright']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['bbnav', 'bbsp']}),
|
||||
dict(name='div', attrs={'id': ['AddThisButton']}),
|
||||
dict(name='a', attrs={'class': ['twitter-share-button']}),
|
||||
dict(name='div', attrs={'id': ['ReaderCount']}),
|
||||
]
|
||||
|
||||
remove_attributes = [
|
||||
'width', 'height', 'style'
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'),
|
||||
(u'الصفحة الثانية',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'),
|
||||
(u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'),
|
||||
(u'المشهد السياسي',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'),
|
||||
(u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'),
|
||||
(u'الوطن العربي',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'),
|
||||
(u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'),
|
||||
(u'تقارير المراسلين',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'),
|
||||
(u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'),
|
||||
(u'قضايا واراء',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'),
|
||||
(u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'),
|
||||
(u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'),
|
||||
(u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'),
|
||||
(u'دنيا الثقافة',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'),
|
||||
(u'المراة والطفل',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'),
|
||||
(u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'),
|
||||
(u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'),
|
||||
(u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'),
|
||||
(u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'),
|
||||
(u'ملفات الاهرام',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'),
|
||||
(u'بريد الاهرام',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'),
|
||||
(u'برلمان الثورة',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'),
|
||||
(u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'),
|
||||
]
|
@ -1,31 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
'''
|
||||
Albertslund Posten
|
||||
'''
|
||||
|
||||
|
||||
class AlbertslundLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Albertslund Posten'
|
||||
description = ('RSS feed med sidste nyt fra Albertslund Posten. Der er nye historier flere gange dagligt'
|
||||
' - få de seneste nyheder fra dit lokalområde automatisk. Albertslund Posten. albertslund.lokalavisen.dk')
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 25
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,71 +0,0 @@
|
||||
|
||||
'''
|
||||
www.philstar.com
|
||||
'''
|
||||
|
||||
import time
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class BanatNews(BasicNewsRecipe):
|
||||
title = 'Banat News'
|
||||
custom_title = "Banat News - " + time.strftime('%d %b %Y %I:%M %p')
|
||||
__author__ = 'jde'
|
||||
__date__ = '31 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa
|
||||
language = 'ceb'
|
||||
publisher = 'The Philippine STAR'
|
||||
category = 'news, Philippines'
|
||||
tags = 'news, Philippines'
|
||||
cover_url = 'http://www.philstar.com/images/logo_Banat.jpg'
|
||||
masthead_url = 'http://www.philstar.com/images/logo_Banat.jpg'
|
||||
oldest_article = 1.5 # days
|
||||
max_articles_per_feed = 25
|
||||
simultaneous_downloads = 10
|
||||
publication_type = 'newspaper'
|
||||
timefmt = ' [%a, %d %b %Y %I:%M %p]'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = False
|
||||
|
||||
remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo
|
||||
# Section (Headlines, Nation, Metro, ...)
|
||||
# Comments
|
||||
# View Comments
|
||||
# Zoom
|
||||
, dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa
|
||||
]
|
||||
conversion_options = {'title': custom_title,
|
||||
'comments': description,
|
||||
'tags': tags,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'smarten_punctuation': True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
|
||||
('Balita' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101'),
|
||||
('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102'),
|
||||
('Kalingawan' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104'),
|
||||
('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62'),
|
||||
('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103'),
|
||||
('Imong Kapalaran' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105')
|
||||
]
|
||||
|
||||
# process the printer friendly version of article
|
||||
def print_version(self, url):
|
||||
return url.replace('/Article', '/ArticlePrinterFriendly')
|
||||
|
||||
# obtain title from printer friendly version of article; avoiding
|
||||
# add_toc_thumbnail changing title when article has image
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.title = soup.find(
|
||||
'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()
|
@ -1,48 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class Ciekawostki_Historyczne(BasicNewsRecipe):
|
||||
title = u'Ciekawostki Historyczne'
|
||||
oldest_article = 7
|
||||
__author__ = u'fenuks & Tomasz Długosz'
|
||||
description = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.'
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
||||
cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
||||
max_articles_per_feed = 100
|
||||
extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
|
||||
oldest_article = 12
|
||||
preprocess_regexps = [(re.compile(u'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
|
||||
lambda match: ''), (re.compile(u'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
|
||||
recursions = 5
|
||||
remove_tags = [dict(id=['catapult-cookie-bar','header','footer','rightcolumn','singlepostinfo']), dict(
|
||||
attrs={'class': ['ubm_banner','ciekawostki-slider-popular','books short floatRight', 'unprintable', 'booksTable', 'bawmrp']})]
|
||||
|
||||
feeds = [
|
||||
(u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'),
|
||||
(u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'),
|
||||
(u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'),
|
||||
(u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'),
|
||||
|
||||
(u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'),
|
||||
(u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'),
|
||||
(u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'),
|
||||
(u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')]
|
||||
|
||||
def is_link_wanted(self, url, tag):
|
||||
return 'ciekawostkihistoryczne' in url and url[-2] in {'2', '3', '4', '5', '6'}
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
tag = soup.find('h7')
|
||||
if tag:
|
||||
tag.nextSibling.extract()
|
||||
if not first_fetch:
|
||||
for r in soup.findAll(['h1']):
|
||||
r.extract()
|
||||
soup.find('h6').nextSibling.extract()
|
||||
return soup
|
@ -1,123 +0,0 @@
|
||||
# -*- mode: python -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2018, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.computing.co.uk
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Computing_UK(BasicNewsRecipe):
|
||||
title = 'Computing'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Computing is the leading information resource for UK technology decision makers, providing the latest market news and hard-hitting opinion.'
|
||||
publisher = 'Incisive Business Media Limited'
|
||||
category = 'it computing uk, computing events, big data summit, cloud and infrastructure, it devops, computing security, HP, intel'
|
||||
oldest_article = 7
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en_GB'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
auto_cleanup = True
|
||||
resolve_internal_links = True
|
||||
needs_subscription = True
|
||||
ignore_duplicate_articles = {'url'}
|
||||
INDEX = 'https://www.computing.co.uk/'
|
||||
LOGIN = 'https://www.computing.co.uk/userlogin'
|
||||
|
||||
def get_browser(self):
|
||||
|
||||
def is_form_login(form):
|
||||
return "id" in form.attrs and form.attrs['id'] == "userlogin"
|
||||
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX)
|
||||
if self.username:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(predicate=is_form_login)
|
||||
br['subscriber[email_id]'] = self.username
|
||||
br['subscriber[password]'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
extra_css = """
|
||||
body{font-family: sans-serif}
|
||||
img{margin-top:1em; margin-bottom: 1em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description,
|
||||
'tags': category,
|
||||
'publisher': publisher,
|
||||
'language': language
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(
|
||||
u'Financial Solutions',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/financial-solutions/'
|
||||
),
|
||||
(
|
||||
u'Big Data',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/big-data-and-analytics/'
|
||||
),
|
||||
(u'DevOps', u'https://www.computing.co.uk/feeds/rss/category/devops/'),
|
||||
(
|
||||
u'Cloud and Infrastructure',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/cloud-and-infrastructure/'
|
||||
),
|
||||
(
|
||||
u'Internet of Things',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/internet-of-things/'
|
||||
),
|
||||
(
|
||||
u'Leadership',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/leadership/'
|
||||
),
|
||||
(
|
||||
u'Application',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/software/applications/'
|
||||
),
|
||||
(
|
||||
u'Business Software',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/software/business-software/'
|
||||
),
|
||||
(
|
||||
u'Developer',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/software/developer/'
|
||||
),
|
||||
(
|
||||
u'Mobile Software',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/software/mobile-software/'
|
||||
),
|
||||
(u'Strategy', u'https://www.computing.co.uk/feeds/rss/category/strategy/'),
|
||||
(
|
||||
u'Corporate',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/management/corporate/'
|
||||
),
|
||||
(
|
||||
u'Privacy',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/security/privacy/'
|
||||
),
|
||||
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
|
||||
(u'Hardware', u'https://www.computing.co.uk/feeds/rss/category/hardware/'),
|
||||
(
|
||||
u'Mobile Phones',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/hardware/mobile-phones/'
|
||||
),
|
||||
(
|
||||
u'Communications',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/communications/'
|
||||
),
|
||||
(
|
||||
u'Public Sector',
|
||||
u'https://www.computing.co.uk/feeds/rss/category/public-sector/'
|
||||
),
|
||||
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
|
||||
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
|
||||
]
|
@ -1,30 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
'''
|
||||
Halsnæs Avis
|
||||
'''
|
||||
|
||||
|
||||
class HalsnaesLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Halsnæs Avis'
|
||||
description = 'Lokale og regionale nyheder, sport og kultur fra Halsnæs og omegn på halsnaes.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,81 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class HeritageFoundation(BasicNewsRecipe):
|
||||
title = u'The Heritage Foundation'
|
||||
description = 'Founded in 1973, The Heritage Foundation is a research and educational institution—a think tank—\
|
||||
whose mission is to formulate and promote conservative public policies based on the principles of free enterprise, limited government, \
|
||||
individual freedom, traditional American values, and a strong national defense.'
|
||||
__author__ = '_reader'
|
||||
__date__ = '05 July 2012'
|
||||
__version__ = '1.0'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
publisher = 'The Heritage Foundation'
|
||||
category = 'commentary'
|
||||
tags = 'commentary'
|
||||
language = 'en'
|
||||
publication_type = 'blog'
|
||||
cover_url = 'http://www.heritage.org/static/images/logo.jpg'
|
||||
masthead_url = 'http://www.heritage.org/static/images/logo.jpg'
|
||||
encoding = None
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
conversion_options = {
|
||||
'comments': description,
|
||||
'tags': tags,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'smarten_punctuation': True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u'Agriculture', u'http://origin.heritage.org/static/RSS/Agriculture.xml'),
|
||||
(u'Alliances', u'http://origin.heritage.org/static/RSS/Alliances.xml'),
|
||||
(u'Arms Control and Non-Proliferation',
|
||||
u'http://origin.heritage.org/static/RSS/Arms-Control-and-Non-Proliferation.xml'),
|
||||
(u'Budget and Spending',
|
||||
u'http://origin.heritage.org/static/RSS/Budget-and-Spending.xml'),
|
||||
(u'Economic Freedom', u'http://origin.heritage.org/static/RSS/Economic-Freedom.xml'),
|
||||
(u'Economy', u'http://origin.heritage.org/static/RSS/Economy.xml'),
|
||||
(u'Education', u'http://origin.heritage.org/static/RSS/Education.xml'),
|
||||
(u'Energy and Environment',
|
||||
u'http://origin.heritage.org/static/RSS/Energy-and-Environment.xml'),
|
||||
(u'Family and Marriage',
|
||||
u'http://origin.heritage.org/static/RSS/Family-And-Marriage.xml'),
|
||||
(u'Foreign Aid and Development',
|
||||
u'http://origin.heritage.org/static/RSS/Foreign-Aid-and-Development.xml'),
|
||||
(u'Health Care', u'http://origin.heritage.org/static/RSS/Health-Care.xml'),
|
||||
(u'Homeland Security', u'http://origin.heritage.org/static/RSS/Homeland-Security.xml'),
|
||||
(u'Housing', u'http://origin.heritage.org/static/RSS/Housing.xml'),
|
||||
(u'Immigration', u'http://origin.heritage.org/static/RSS/Immigration.xml'),
|
||||
(u'International Conflicts',
|
||||
u'http://origin.heritage.org/static/RSS/International-Conflicts.xml'),
|
||||
(u'International Law', u'http://origin.heritage.org/static/RSS/International-Law.xml'),
|
||||
(u'Labor', u'http://origin.heritage.org/static/RSS/Labor.xml'),
|
||||
(u'Legal Issues', u'http://origin.heritage.org/static/RSS/Legal.xml'),
|
||||
(u'Missile Defense', u'http://origin.heritage.org/static/RSS/Missile-Defense.xml'),
|
||||
(u'National Security and Defense',
|
||||
u'http://origin.heritage.org/static/RSS/National-Security-and-Defense.xml'),
|
||||
(u'Political Thought', u'http://origin.heritage.org/static/RSS/Political-Thought.xml'),
|
||||
(u'Public Diplomacy', u'http://origin.heritage.org/static/RSS/Public-Diplomacy.xml'),
|
||||
(u'Regulation', u'http://origin.heritage.org/static/RSS/Regulation.xml'),
|
||||
(u'Religion and Civil Society',
|
||||
u'http://origin.heritage.org/static/RSS/Religion-and-Civil-Society.xml'),
|
||||
(u'Retirement Security',
|
||||
u'http://origin.heritage.org/static/RSS/Retirement-Security.xml'),
|
||||
(u'Space Policy', u'http://origin.heritage.org/static/RSS/Space-Policy.xml'),
|
||||
(u'Taxes', u'http://origin.heritage.org/static/RSS/Taxes.xml'),
|
||||
(u'Terrorism', u'http://origin.heritage.org/static/RSS/Terrorism.xml'),
|
||||
(u'Trade', u'http://origin.heritage.org/static/RSS/Trade.xml'),
|
||||
(u'Transportation', u'http://origin.heritage.org/static/RSS/Transportation.xml'),
|
||||
(u'Welfare', u'http://origin.heritage.org/static/RSS/Welfare.xml'),
|
||||
(u'Worldwide Freedom and Human Rights',
|
||||
u'http://origin.heritage.org/static/RSS/Worldwide-Freedom-and-Human-Rights.xml'),
|
||||
]
|
@ -1,31 +0,0 @@
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class HistoriasDelMundo (BasicNewsRecipe):
|
||||
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
||||
__url__ = 'http://www.lamarciana.com'
|
||||
__version__ = '1.0.1'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||
title = u'Historias del Mundo'
|
||||
description = u'Historias del Mundo contadas por Marc Busqué'
|
||||
url = 'http://www.marcbusque.org'
|
||||
language = 'es'
|
||||
tags = 'viajes, social'
|
||||
oldest_article = 120
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
cover_url = u'http://www.marcbusque.org/wp-content/uploads/2011/12/cuchitril.png'
|
||||
|
||||
def get_extra_css(self):
|
||||
if not self.extra_css:
|
||||
br = self.get_browser()
|
||||
self.extra_css = br.open_novisit(
|
||||
'https://raw.githubusercontent.com/laMarciana/gutenweb/master/dist/gutenweb.css').read().replace('@charset "UTF-8";', '')
|
||||
return self.extra_css
|
||||
|
||||
feeds = [
|
||||
(u'Historias del Mundo', u'http://www.marcbusque.org/?feed=rss'),
|
||||
]
|
@ -1,31 +0,0 @@
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class HistoriesDelMon (BasicNewsRecipe):
|
||||
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
||||
__url__ = 'http://www.lamarciana.com'
|
||||
__version__ = '1.0.1'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||
title = u'Històries del Món'
|
||||
description = u'Històries del Món explicades pel Marc Busqué'
|
||||
url = 'http://www.marcbusque.org'
|
||||
language = 'ca'
|
||||
tags = 'viatges, social'
|
||||
oldest_article = 120
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
cover_url = u'http://www.marcbusque.org/wp-content/uploads/2011/12/cuchitril.png'
|
||||
|
||||
def get_extra_css(self):
|
||||
if not self.extra_css:
|
||||
br = self.get_browser()
|
||||
self.extra_css = br.open_novisit(
|
||||
'https://raw.githubusercontent.com/laMarciana/gutenweb/master/dist/gutenweb.css').read().replace('@charset "UTF-8";', '')
|
||||
return self.extra_css
|
||||
|
||||
feeds = [
|
||||
(u'Històries del Món', u'http://www.marcbusque.org/ca/feed/'),
|
||||
]
|
@ -1,74 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs, Comment
|
||||
|
||||
|
||||
class KurierGalicyjski(BasicNewsRecipe):
|
||||
title = u'Kurier Galicyjski'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Kurier Galicyjski - największa gazeta dla Polaków na Ukrainie. Bieżące wydarzenia z życia polskiej mniejszości, historia, kultura, polityka, reportaże.' # noqa
|
||||
category = 'news'
|
||||
language = 'pl'
|
||||
cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [dict(attrs={'class': 'item-page'})]
|
||||
remove_tags = [dict(attrs={'class': 'pagenav'}), dict(attrs={
|
||||
'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'})] # noqa
|
||||
feeds = [
|
||||
(u'Wydarzenia', u'http://kuriergalicyjski.com/index.php/wydarzenia?format=feed&type=atom'),
|
||||
(u'Publicystyka', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'),
|
||||
(u'Reporta\u017ce', u'http://kuriergalicyjski.com/index.php/report?format=feed&type=atom'),
|
||||
(u'Rozmowy Kuriera', u'http://kuriergalicyjski.com/index.php/kuriera?format=feed&type=atom'),
|
||||
(u'Przegl\u0105d prasy', u'http://kuriergalicyjski.com/index.php/2012-01-05-14-08-55?format=feed&type=atom'),
|
||||
(u'Kultura', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-26-39?format=feed&type=atom'),
|
||||
(u'Zabytki', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-27-32?format=feed&type=atom'),
|
||||
(u'Polska-Ukraina', u'http://kuriergalicyjski.com/index.php/pol-ua?format=feed&type=atom'),
|
||||
(u'Polacy i Ukrai\u0144cy', u'http://kuriergalicyjski.com/index.php/polacy-i-ukr?format=feed&type=atom'),
|
||||
(u'Niezwyk\u0142e historie', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'),
|
||||
(u'Polemiki', u'http://kuriergalicyjski.com/index.php/polemiki?format=feed&type=atom')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
pager = soup.find(id='article-index')
|
||||
if pager:
|
||||
pager = pager.findAll('a')[1:]
|
||||
if pager:
|
||||
for a in pager:
|
||||
nexturl = 'http://www.kuriergalicyjski.com' + a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
pagetext = soup2.find(attrs={'class': 'item-page'})
|
||||
if pagetext.h2:
|
||||
pagetext.h2.extract()
|
||||
r = pagetext.find(attrs={'class': 'article-info'})
|
||||
if r:
|
||||
r.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
pos = len(appendtag.contents)
|
||||
for r in appendtag.findAll(id='article-index'):
|
||||
r.extract()
|
||||
for r in appendtag.findAll(attrs={'class': 'pagenavcounter'}):
|
||||
r.extract()
|
||||
for r in appendtag.findAll(attrs={'class': 'pagination'}):
|
||||
r.extract()
|
||||
for r in appendtag.findAll(attrs={'class': 'pagenav'}):
|
||||
r.extract()
|
||||
for r in appendtag.findAll(attrs={'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'}): # noqa
|
||||
r.extract()
|
||||
comments = appendtag.findAll(
|
||||
text=lambda text: isinstance(text, Comment))
|
||||
for comment in comments:
|
||||
comment.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
for r in soup.findAll(style=True):
|
||||
del r['style']
|
||||
for img in soup.findAll(attrs={'class': 'easy_img_caption smartresize'}):
|
||||
img.insert(len(img.contents) - 1, bs('<br />'))
|
||||
img.insert(len(img.contents), bs('<br /><br />'))
|
||||
for a in soup.findAll('a', href=True):
|
||||
if a['href'].startswith('/'):
|
||||
a['href'] = 'http://kuriergalicyjski.com' + a['href']
|
||||
return soup
|
@ -1,85 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = 'Marcin Urban 2011'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class recipeMagic(BasicNewsRecipe):
|
||||
title = 'National Geographic PL'
|
||||
__author__ = 'Marcin Urban 2011'
|
||||
__modified_by__ = 'fenuks'
|
||||
description = u'Legenda wśród magazynów z historią sięgającą 120 lat'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'G+J Gruner+Jahr Polska'
|
||||
category = 'news, PL,'
|
||||
language = 'pl'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
h1{text-align: center;}
|
||||
h2{font-size: medium; font-weight: bold;}
|
||||
.authordate {font-size: small; color: #696969;}
|
||||
p.lead {font-weight: bold; text-align: center;}
|
||||
.fot{font-size: x-small; color: #666666;} '''
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': 'add_inf'}),
|
||||
dict(name='div', attrs={'class': 'add_f'}),
|
||||
]
|
||||
|
||||
remove_attributes = ['width', 'height']
|
||||
feeds = []
|
||||
|
||||
def find_articles(self, url):
|
||||
articles = []
|
||||
soup = self.index_to_soup(url)
|
||||
tag = soup.find(attrs={'class': 'arl'})
|
||||
if not tag:
|
||||
return articles
|
||||
art = tag.ul.findAll('li')
|
||||
for i in art:
|
||||
title = i.a['title']
|
||||
url = i.a['href']
|
||||
# date=soup.find(id='footer').ul.li.string[41:-1]
|
||||
desc = i.div.p.string
|
||||
articles.append({'title': title,
|
||||
'url': url,
|
||||
'date': '',
|
||||
'description': desc
|
||||
})
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds.append((u"Aktualności", self.find_articles(
|
||||
'http://www.national-geographic.pl/aktualnosci/')))
|
||||
feeds.append((u"Artykuły", self.find_articles(
|
||||
'http://www.national-geographic.pl/artykuly/')))
|
||||
|
||||
return feeds
|
||||
|
||||
def print_version(self, url):
|
||||
if 'artykuly' in url:
|
||||
return url.replace('artykuly/pokaz', 'drukuj-artykul')
|
||||
elif 'aktualnosci' in url:
|
||||
return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
|
||||
else:
|
||||
return url
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'http://www.national-geographic.pl/biezace-wydania/')
|
||||
tag = soup.find(attrs={'class': 'txt jus'})
|
||||
self.cover_url = tag.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
Loading…
x
Reference in New Issue
Block a user