mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Merge branch 'master' of https://github.com/jony0008/calibre
This commit is contained in:
commit
1a3d3600b1
@ -1,76 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011-2016, Hassan Williamson <haz at hazrpg.co.uk>'
|
|
||||||
'''
|
|
||||||
ahram.org.eg
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AlAhram(BasicNewsRecipe):
|
|
||||||
title = u'Al-Ahram (الأهرام)'
|
|
||||||
__author__ = 'Hassan Williamson'
|
|
||||||
description = 'The Arabic version of the Al-Ahram newspaper.'
|
|
||||||
language = 'ar'
|
|
||||||
encoding = 'utf8'
|
|
||||||
cover_url = 'http://www.ahram.org.eg/Media/News/2015/3/14/2015-635619650946000713-600.jpg'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
publisher = 'Al-Ahram'
|
|
||||||
category = 'News'
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
|
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } ' # noqa
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['bbcolright']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['bbnav', 'bbsp']}),
|
|
||||||
dict(name='div', attrs={'id': ['AddThisButton']}),
|
|
||||||
dict(name='a', attrs={'class': ['twitter-share-button']}),
|
|
||||||
dict(name='div', attrs={'id': ['ReaderCount']}),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_attributes = [
|
|
||||||
'width', 'height', 'style'
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'),
|
|
||||||
(u'الصفحة الثانية',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'),
|
|
||||||
(u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'),
|
|
||||||
(u'المشهد السياسي',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'),
|
|
||||||
(u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'),
|
|
||||||
(u'الوطن العربي',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'),
|
|
||||||
(u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'),
|
|
||||||
(u'تقارير المراسلين',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'),
|
|
||||||
(u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'),
|
|
||||||
(u'قضايا واراء',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'),
|
|
||||||
(u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'),
|
|
||||||
(u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'),
|
|
||||||
(u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'),
|
|
||||||
(u'دنيا الثقافة',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'),
|
|
||||||
(u'المراة والطفل',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'),
|
|
||||||
(u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'),
|
|
||||||
(u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'),
|
|
||||||
(u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'),
|
|
||||||
(u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'),
|
|
||||||
(u'ملفات الاهرام',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'),
|
|
||||||
(u'بريد الاهرام',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'),
|
|
||||||
(u'برلمان الثورة',
|
|
||||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'),
|
|
||||||
(u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'),
|
|
||||||
]
|
|
@ -1,31 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
'''
|
|
||||||
Albertslund Posten
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class AlbertslundLokalavisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Albertslund Posten'
|
|
||||||
description = ('RSS feed med sidste nyt fra Albertslund Posten. Der er nye historier flere gange dagligt'
|
|
||||||
' - få de seneste nyheder fra dit lokalområde automatisk. Albertslund Posten. albertslund.lokalavisen.dk')
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Seneste nyt fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestenytrss'),
|
|
||||||
('Seneste lokale nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestelokalenyhederrss'),
|
|
||||||
('Seneste sport fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestesportrss'),
|
|
||||||
('Seneste 112 nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/seneste112rss'),
|
|
||||||
('Seneste kultur nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestekulturrss'),
|
|
||||||
('Seneste læserbreve fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestelaeserbreverss'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,71 +0,0 @@
|
|||||||
|
|
||||||
'''
|
|
||||||
www.philstar.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
import time
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BanatNews(BasicNewsRecipe):
|
|
||||||
title = 'Banat News'
|
|
||||||
custom_title = "Banat News - " + time.strftime('%d %b %Y %I:%M %p')
|
|
||||||
__author__ = 'jde'
|
|
||||||
__date__ = '31 May 2012'
|
|
||||||
__version__ = '1.0'
|
|
||||||
description = 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa
|
|
||||||
language = 'ceb'
|
|
||||||
publisher = 'The Philippine STAR'
|
|
||||||
category = 'news, Philippines'
|
|
||||||
tags = 'news, Philippines'
|
|
||||||
cover_url = 'http://www.philstar.com/images/logo_Banat.jpg'
|
|
||||||
masthead_url = 'http://www.philstar.com/images/logo_Banat.jpg'
|
|
||||||
oldest_article = 1.5 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
simultaneous_downloads = 10
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
timefmt = ' [%a, %d %b %Y %I:%M %p]'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = None
|
|
||||||
recursions = 0
|
|
||||||
needs_subscription = False
|
|
||||||
remove_javascript = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
auto_cleanup = False
|
|
||||||
|
|
||||||
remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo
|
|
||||||
# Section (Headlines, Nation, Metro, ...)
|
|
||||||
# Comments
|
|
||||||
# View Comments
|
|
||||||
# Zoom
|
|
||||||
, dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa
|
|
||||||
]
|
|
||||||
conversion_options = {'title': custom_title,
|
|
||||||
'comments': description,
|
|
||||||
'tags': tags,
|
|
||||||
'language': language,
|
|
||||||
'publisher': publisher,
|
|
||||||
'authors': publisher,
|
|
||||||
'smarten_punctuation': True
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
('Balita' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101'),
|
|
||||||
('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102'),
|
|
||||||
('Kalingawan' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104'),
|
|
||||||
('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62'),
|
|
||||||
('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103'),
|
|
||||||
('Imong Kapalaran' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105')
|
|
||||||
]
|
|
||||||
|
|
||||||
# process the printer friendly version of article
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('/Article', '/ArticlePrinterFriendly')
|
|
||||||
|
|
||||||
# obtain title from printer friendly version of article; avoiding
|
|
||||||
# add_toc_thumbnail changing title when article has image
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
|
||||||
article.title = soup.find(
|
|
||||||
'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()
|
|
@ -1,48 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class Ciekawostki_Historyczne(BasicNewsRecipe):
|
|
||||||
title = u'Ciekawostki Historyczne'
|
|
||||||
oldest_article = 7
|
|
||||||
__author__ = u'fenuks & Tomasz Długosz'
|
|
||||||
description = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.'
|
|
||||||
category = 'history'
|
|
||||||
language = 'pl'
|
|
||||||
masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
|
||||||
cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
|
|
||||||
oldest_article = 12
|
|
||||||
preprocess_regexps = [(re.compile(u'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
|
|
||||||
lambda match: ''), (re.compile(u'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
|
|
||||||
recursions = 5
|
|
||||||
remove_tags = [dict(id=['catapult-cookie-bar','header','footer','rightcolumn','singlepostinfo']), dict(
|
|
||||||
attrs={'class': ['ubm_banner','ciekawostki-slider-popular','books short floatRight', 'unprintable', 'booksTable', 'bawmrp']})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'),
|
|
||||||
(u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'),
|
|
||||||
(u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'),
|
|
||||||
(u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'),
|
|
||||||
|
|
||||||
(u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'),
|
|
||||||
(u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'),
|
|
||||||
(u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'),
|
|
||||||
(u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')]
|
|
||||||
|
|
||||||
def is_link_wanted(self, url, tag):
|
|
||||||
return 'ciekawostkihistoryczne' in url and url[-2] in {'2', '3', '4', '5', '6'}
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
|
||||||
tag = soup.find('h7')
|
|
||||||
if tag:
|
|
||||||
tag.nextSibling.extract()
|
|
||||||
if not first_fetch:
|
|
||||||
for r in soup.findAll(['h1']):
|
|
||||||
r.extract()
|
|
||||||
soup.find('h6').nextSibling.extract()
|
|
||||||
return soup
|
|
@ -1,123 +0,0 @@
|
|||||||
# -*- mode: python -*-
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2018, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.computing.co.uk
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Computing_UK(BasicNewsRecipe):
|
|
||||||
title = 'Computing'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Computing is the leading information resource for UK technology decision makers, providing the latest market news and hard-hitting opinion.'
|
|
||||||
publisher = 'Incisive Business Media Limited'
|
|
||||||
category = 'it computing uk, computing events, big data summit, cloud and infrastructure, it devops, computing security, HP, intel'
|
|
||||||
oldest_article = 7
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en_GB'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
auto_cleanup = True
|
|
||||||
resolve_internal_links = True
|
|
||||||
needs_subscription = True
|
|
||||||
ignore_duplicate_articles = {'url'}
|
|
||||||
INDEX = 'https://www.computing.co.uk/'
|
|
||||||
LOGIN = 'https://www.computing.co.uk/userlogin'
|
|
||||||
|
|
||||||
def get_browser(self):
|
|
||||||
|
|
||||||
def is_form_login(form):
|
|
||||||
return "id" in form.attrs and form.attrs['id'] == "userlogin"
|
|
||||||
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
br.open(self.INDEX)
|
|
||||||
if self.username:
|
|
||||||
br.open(self.LOGIN)
|
|
||||||
br.select_form(predicate=is_form_login)
|
|
||||||
br['subscriber[email_id]'] = self.username
|
|
||||||
br['subscriber[password]'] = self.password
|
|
||||||
br.submit()
|
|
||||||
return br
|
|
||||||
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: sans-serif}
|
|
||||||
img{margin-top:1em; margin-bottom: 1em; display:block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description,
|
|
||||||
'tags': category,
|
|
||||||
'publisher': publisher,
|
|
||||||
'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(
|
|
||||||
u'Financial Solutions',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/financial-solutions/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Big Data',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/big-data-and-analytics/'
|
|
||||||
),
|
|
||||||
(u'DevOps', u'https://www.computing.co.uk/feeds/rss/category/devops/'),
|
|
||||||
(
|
|
||||||
u'Cloud and Infrastructure',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/cloud-and-infrastructure/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Internet of Things',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/internet-of-things/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Leadership',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/leadership/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Application',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/software/applications/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Business Software',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/software/business-software/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Developer',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/software/developer/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Mobile Software',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/software/mobile-software/'
|
|
||||||
),
|
|
||||||
(u'Strategy', u'https://www.computing.co.uk/feeds/rss/category/strategy/'),
|
|
||||||
(
|
|
||||||
u'Corporate',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/management/corporate/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Privacy',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/security/privacy/'
|
|
||||||
),
|
|
||||||
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
|
|
||||||
(u'Hardware', u'https://www.computing.co.uk/feeds/rss/category/hardware/'),
|
|
||||||
(
|
|
||||||
u'Mobile Phones',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/hardware/mobile-phones/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Communications',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/communications/'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
u'Public Sector',
|
|
||||||
u'https://www.computing.co.uk/feeds/rss/category/public-sector/'
|
|
||||||
),
|
|
||||||
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
|
|
||||||
(u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'),
|
|
||||||
]
|
|
@ -1,30 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
'''
|
|
||||||
Halsnæs Avis
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class HalsnaesLokalavisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Halsnæs Avis'
|
|
||||||
description = 'Lokale og regionale nyheder, sport og kultur fra Halsnæs og omegn på halsnaes.lokalavisen.dk'
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Seneste nyt fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestenytrss'),
|
|
||||||
('Seneste lokale nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestelokalenyhederrss'),
|
|
||||||
('Seneste sport fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestesportrss'),
|
|
||||||
('Seneste 112 nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/seneste112rss'),
|
|
||||||
('Seneste kultur nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestekulturrss'),
|
|
||||||
('Seneste læserbreve fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestelaeserbreverss'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,81 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class HeritageFoundation(BasicNewsRecipe):
|
|
||||||
title = u'The Heritage Foundation'
|
|
||||||
description = 'Founded in 1973, The Heritage Foundation is a research and educational institution—a think tank—\
|
|
||||||
whose mission is to formulate and promote conservative public policies based on the principles of free enterprise, limited government, \
|
|
||||||
individual freedom, traditional American values, and a strong national defense.'
|
|
||||||
__author__ = '_reader'
|
|
||||||
__date__ = '05 July 2012'
|
|
||||||
__version__ = '1.0'
|
|
||||||
oldest_article = 30
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
publisher = 'The Heritage Foundation'
|
|
||||||
category = 'commentary'
|
|
||||||
tags = 'commentary'
|
|
||||||
language = 'en'
|
|
||||||
publication_type = 'blog'
|
|
||||||
cover_url = 'http://www.heritage.org/static/images/logo.jpg'
|
|
||||||
masthead_url = 'http://www.heritage.org/static/images/logo.jpg'
|
|
||||||
encoding = None
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
recursions = 0
|
|
||||||
remove_empty_feeds = True
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description,
|
|
||||||
'tags': tags,
|
|
||||||
'language': language,
|
|
||||||
'publisher': publisher,
|
|
||||||
'authors': publisher,
|
|
||||||
'smarten_punctuation': True
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Agriculture', u'http://origin.heritage.org/static/RSS/Agriculture.xml'),
|
|
||||||
(u'Alliances', u'http://origin.heritage.org/static/RSS/Alliances.xml'),
|
|
||||||
(u'Arms Control and Non-Proliferation',
|
|
||||||
u'http://origin.heritage.org/static/RSS/Arms-Control-and-Non-Proliferation.xml'),
|
|
||||||
(u'Budget and Spending',
|
|
||||||
u'http://origin.heritage.org/static/RSS/Budget-and-Spending.xml'),
|
|
||||||
(u'Economic Freedom', u'http://origin.heritage.org/static/RSS/Economic-Freedom.xml'),
|
|
||||||
(u'Economy', u'http://origin.heritage.org/static/RSS/Economy.xml'),
|
|
||||||
(u'Education', u'http://origin.heritage.org/static/RSS/Education.xml'),
|
|
||||||
(u'Energy and Environment',
|
|
||||||
u'http://origin.heritage.org/static/RSS/Energy-and-Environment.xml'),
|
|
||||||
(u'Family and Marriage',
|
|
||||||
u'http://origin.heritage.org/static/RSS/Family-And-Marriage.xml'),
|
|
||||||
(u'Foreign Aid and Development',
|
|
||||||
u'http://origin.heritage.org/static/RSS/Foreign-Aid-and-Development.xml'),
|
|
||||||
(u'Health Care', u'http://origin.heritage.org/static/RSS/Health-Care.xml'),
|
|
||||||
(u'Homeland Security', u'http://origin.heritage.org/static/RSS/Homeland-Security.xml'),
|
|
||||||
(u'Housing', u'http://origin.heritage.org/static/RSS/Housing.xml'),
|
|
||||||
(u'Immigration', u'http://origin.heritage.org/static/RSS/Immigration.xml'),
|
|
||||||
(u'International Conflicts',
|
|
||||||
u'http://origin.heritage.org/static/RSS/International-Conflicts.xml'),
|
|
||||||
(u'International Law', u'http://origin.heritage.org/static/RSS/International-Law.xml'),
|
|
||||||
(u'Labor', u'http://origin.heritage.org/static/RSS/Labor.xml'),
|
|
||||||
(u'Legal Issues', u'http://origin.heritage.org/static/RSS/Legal.xml'),
|
|
||||||
(u'Missile Defense', u'http://origin.heritage.org/static/RSS/Missile-Defense.xml'),
|
|
||||||
(u'National Security and Defense',
|
|
||||||
u'http://origin.heritage.org/static/RSS/National-Security-and-Defense.xml'),
|
|
||||||
(u'Political Thought', u'http://origin.heritage.org/static/RSS/Political-Thought.xml'),
|
|
||||||
(u'Public Diplomacy', u'http://origin.heritage.org/static/RSS/Public-Diplomacy.xml'),
|
|
||||||
(u'Regulation', u'http://origin.heritage.org/static/RSS/Regulation.xml'),
|
|
||||||
(u'Religion and Civil Society',
|
|
||||||
u'http://origin.heritage.org/static/RSS/Religion-and-Civil-Society.xml'),
|
|
||||||
(u'Retirement Security',
|
|
||||||
u'http://origin.heritage.org/static/RSS/Retirement-Security.xml'),
|
|
||||||
(u'Space Policy', u'http://origin.heritage.org/static/RSS/Space-Policy.xml'),
|
|
||||||
(u'Taxes', u'http://origin.heritage.org/static/RSS/Taxes.xml'),
|
|
||||||
(u'Terrorism', u'http://origin.heritage.org/static/RSS/Terrorism.xml'),
|
|
||||||
(u'Trade', u'http://origin.heritage.org/static/RSS/Trade.xml'),
|
|
||||||
(u'Transportation', u'http://origin.heritage.org/static/RSS/Transportation.xml'),
|
|
||||||
(u'Welfare', u'http://origin.heritage.org/static/RSS/Welfare.xml'),
|
|
||||||
(u'Worldwide Freedom and Human Rights',
|
|
||||||
u'http://origin.heritage.org/static/RSS/Worldwide-Freedom-and-Human-Rights.xml'),
|
|
||||||
]
|
|
@ -1,31 +0,0 @@
|
|||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class HistoriasDelMundo (BasicNewsRecipe):
|
|
||||||
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
|
||||||
__url__ = 'http://www.lamarciana.com'
|
|
||||||
__version__ = '1.0.1'
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
|
||||||
title = u'Historias del Mundo'
|
|
||||||
description = u'Historias del Mundo contadas por Marc Busqué'
|
|
||||||
url = 'http://www.marcbusque.org'
|
|
||||||
language = 'es'
|
|
||||||
tags = 'viajes, social'
|
|
||||||
oldest_article = 120
|
|
||||||
remove_empty_feeds = True
|
|
||||||
no_stylesheets = True
|
|
||||||
cover_url = u'http://www.marcbusque.org/wp-content/uploads/2011/12/cuchitril.png'
|
|
||||||
|
|
||||||
def get_extra_css(self):
|
|
||||||
if not self.extra_css:
|
|
||||||
br = self.get_browser()
|
|
||||||
self.extra_css = br.open_novisit(
|
|
||||||
'https://raw.githubusercontent.com/laMarciana/gutenweb/master/dist/gutenweb.css').read().replace('@charset "UTF-8";', '')
|
|
||||||
return self.extra_css
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Historias del Mundo', u'http://www.marcbusque.org/?feed=rss'),
|
|
||||||
]
|
|
@ -1,31 +0,0 @@
|
|||||||
# vim:fileencoding=utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class HistoriesDelMon (BasicNewsRecipe):
|
|
||||||
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
|
||||||
__url__ = 'http://www.lamarciana.com'
|
|
||||||
__version__ = '1.0.1'
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
|
||||||
title = u'Històries del Món'
|
|
||||||
description = u'Històries del Món explicades pel Marc Busqué'
|
|
||||||
url = 'http://www.marcbusque.org'
|
|
||||||
language = 'ca'
|
|
||||||
tags = 'viatges, social'
|
|
||||||
oldest_article = 120
|
|
||||||
remove_empty_feeds = True
|
|
||||||
no_stylesheets = True
|
|
||||||
cover_url = u'http://www.marcbusque.org/wp-content/uploads/2011/12/cuchitril.png'
|
|
||||||
|
|
||||||
def get_extra_css(self):
|
|
||||||
if not self.extra_css:
|
|
||||||
br = self.get_browser()
|
|
||||||
self.extra_css = br.open_novisit(
|
|
||||||
'https://raw.githubusercontent.com/laMarciana/gutenweb/master/dist/gutenweb.css').read().replace('@charset "UTF-8";', '')
|
|
||||||
return self.extra_css
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Històries del Món', u'http://www.marcbusque.org/ca/feed/'),
|
|
||||||
]
|
|
@ -1,74 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs, Comment
|
|
||||||
|
|
||||||
|
|
||||||
class KurierGalicyjski(BasicNewsRecipe):
|
|
||||||
title = u'Kurier Galicyjski'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'Kurier Galicyjski - największa gazeta dla Polaków na Ukrainie. Bieżące wydarzenia z życia polskiej mniejszości, historia, kultura, polityka, reportaże.' # noqa
|
|
||||||
category = 'news'
|
|
||||||
language = 'pl'
|
|
||||||
cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_empty_feeds = True
|
|
||||||
no_stylesheets = True
|
|
||||||
keep_only_tags = [dict(attrs={'class': 'item-page'})]
|
|
||||||
remove_tags = [dict(attrs={'class': 'pagenav'}), dict(attrs={
|
|
||||||
'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'})] # noqa
|
|
||||||
feeds = [
|
|
||||||
(u'Wydarzenia', u'http://kuriergalicyjski.com/index.php/wydarzenia?format=feed&type=atom'),
|
|
||||||
(u'Publicystyka', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'),
|
|
||||||
(u'Reporta\u017ce', u'http://kuriergalicyjski.com/index.php/report?format=feed&type=atom'),
|
|
||||||
(u'Rozmowy Kuriera', u'http://kuriergalicyjski.com/index.php/kuriera?format=feed&type=atom'),
|
|
||||||
(u'Przegl\u0105d prasy', u'http://kuriergalicyjski.com/index.php/2012-01-05-14-08-55?format=feed&type=atom'),
|
|
||||||
(u'Kultura', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-26-39?format=feed&type=atom'),
|
|
||||||
(u'Zabytki', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-27-32?format=feed&type=atom'),
|
|
||||||
(u'Polska-Ukraina', u'http://kuriergalicyjski.com/index.php/pol-ua?format=feed&type=atom'),
|
|
||||||
(u'Polacy i Ukrai\u0144cy', u'http://kuriergalicyjski.com/index.php/polacy-i-ukr?format=feed&type=atom'),
|
|
||||||
(u'Niezwyk\u0142e historie', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'),
|
|
||||||
(u'Polemiki', u'http://kuriergalicyjski.com/index.php/polemiki?format=feed&type=atom')]
|
|
||||||
|
|
||||||
def append_page(self, soup, appendtag):
|
|
||||||
pager = soup.find(id='article-index')
|
|
||||||
if pager:
|
|
||||||
pager = pager.findAll('a')[1:]
|
|
||||||
if pager:
|
|
||||||
for a in pager:
|
|
||||||
nexturl = 'http://www.kuriergalicyjski.com' + a['href']
|
|
||||||
soup2 = self.index_to_soup(nexturl)
|
|
||||||
pagetext = soup2.find(attrs={'class': 'item-page'})
|
|
||||||
if pagetext.h2:
|
|
||||||
pagetext.h2.extract()
|
|
||||||
r = pagetext.find(attrs={'class': 'article-info'})
|
|
||||||
if r:
|
|
||||||
r.extract()
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
for r in appendtag.findAll(id='article-index'):
|
|
||||||
r.extract()
|
|
||||||
for r in appendtag.findAll(attrs={'class': 'pagenavcounter'}):
|
|
||||||
r.extract()
|
|
||||||
for r in appendtag.findAll(attrs={'class': 'pagination'}):
|
|
||||||
r.extract()
|
|
||||||
for r in appendtag.findAll(attrs={'class': 'pagenav'}):
|
|
||||||
r.extract()
|
|
||||||
for r in appendtag.findAll(attrs={'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'}): # noqa
|
|
||||||
r.extract()
|
|
||||||
comments = appendtag.findAll(
|
|
||||||
text=lambda text: isinstance(text, Comment))
|
|
||||||
for comment in comments:
|
|
||||||
comment.extract()
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
for r in soup.findAll(style=True):
|
|
||||||
del r['style']
|
|
||||||
for img in soup.findAll(attrs={'class': 'easy_img_caption smartresize'}):
|
|
||||||
img.insert(len(img.contents) - 1, bs('<br />'))
|
|
||||||
img.insert(len(img.contents), bs('<br /><br />'))
|
|
||||||
for a in soup.findAll('a', href=True):
|
|
||||||
if a['href'].startswith('/'):
|
|
||||||
a['href'] = 'http://kuriergalicyjski.com' + a['href']
|
|
||||||
return soup
|
|
@ -1,85 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = 'Marcin Urban 2011'
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class recipeMagic(BasicNewsRecipe):
|
|
||||||
title = 'National Geographic PL'
|
|
||||||
__author__ = 'Marcin Urban 2011'
|
|
||||||
__modified_by__ = 'fenuks'
|
|
||||||
description = u'Legenda wśród magazynów z historią sięgającą 120 lat'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf8'
|
|
||||||
publisher = 'G+J Gruner+Jahr Polska'
|
|
||||||
category = 'news, PL,'
|
|
||||||
language = 'pl'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
|
||||||
h1{text-align: center;}
|
|
||||||
h2{font-size: medium; font-weight: bold;}
|
|
||||||
.authordate {font-size: small; color: #696969;}
|
|
||||||
p.lead {font-weight: bold; text-align: center;}
|
|
||||||
.fot{font-size: x-small; color: #666666;} '''
|
|
||||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'add_inf'}),
|
|
||||||
dict(name='div', attrs={'class': 'add_f'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_attributes = ['width', 'height']
|
|
||||||
feeds = []
|
|
||||||
|
|
||||||
def find_articles(self, url):
|
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup(url)
|
|
||||||
tag = soup.find(attrs={'class': 'arl'})
|
|
||||||
if not tag:
|
|
||||||
return articles
|
|
||||||
art = tag.ul.findAll('li')
|
|
||||||
for i in art:
|
|
||||||
title = i.a['title']
|
|
||||||
url = i.a['href']
|
|
||||||
# date=soup.find(id='footer').ul.li.string[41:-1]
|
|
||||||
desc = i.div.p.string
|
|
||||||
articles.append({'title': title,
|
|
||||||
'url': url,
|
|
||||||
'date': '',
|
|
||||||
'description': desc
|
|
||||||
})
|
|
||||||
return articles
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
feeds = []
|
|
||||||
feeds.append((u"Aktualności", self.find_articles(
|
|
||||||
'http://www.national-geographic.pl/aktualnosci/')))
|
|
||||||
feeds.append((u"Artykuły", self.find_articles(
|
|
||||||
'http://www.national-geographic.pl/artykuly/')))
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
if 'artykuly' in url:
|
|
||||||
return url.replace('artykuly/pokaz', 'drukuj-artykul')
|
|
||||||
elif 'aktualnosci' in url:
|
|
||||||
return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
|
|
||||||
else:
|
|
||||||
return url
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup(
|
|
||||||
'http://www.national-geographic.pl/biezace-wydania/')
|
|
||||||
tag = soup.find(attrs={'class': 'txt jus'})
|
|
||||||
self.cover_url = tag.img['src']
|
|
||||||
return getattr(self, 'cover_url', self.cover_url)
|
|
Loading…
x
Reference in New Issue
Block a user