Various Russian News surces by Darko Miletic

This commit is contained in:
Kovid Goyal 2010-05-10 07:51:30 -06:00
parent f8107ebffd
commit 2288b24137
10 changed files with 190 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 1003 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 475 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 353 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 534 B

View File

@ -0,0 +1,31 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.aif.ru
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AIF_ru(BasicNewsRecipe):
title = 'Arguments & Facts - Russian'
__author__ = 'Darko Miletic'
description = 'News from Russia'
publisher = 'AIF'
category = 'news, politics, Russia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1251'
language = 'ru'
publication_type = 'magazine'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} '
keep_only_tags = [dict(name='div',attrs={'id':'inner'})]
remove_tags = [
dict(name=['iframe','object','link','base','input','img'])
,dict(name='div',attrs={'class':'photo'})
,dict(name='p',attrs={'class':'resizefont'})
]
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]

View File

@ -0,0 +1,28 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
izvestia.ru
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Izvestia(BasicNewsRecipe):
title = 'Izvestia'
__author__ = 'Darko Miletic'
description = 'News from Russia'
publisher = 'Izvestia'
category = 'news, politics, Russia'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1251'
language = 'ru'
publication_type = 'newspaper'
masthead_url = 'http://images.izvestia.ru/izv/sys/logo.gif'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} '
keep_only_tags = [dict(name='div', attrs={'class':'newsFull'})]
remove_tags = [dict(name=['iframe','object','img','link','base'])]
remove_tags_before = dict(name='h1', attrs={'class':'statya'})
feeds = [(u'Daily edition', u'http://rss.feedsportal.com/c/32171/f/424076/index.rss')]

View File

@ -0,0 +1,42 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.kommersant.ru
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Kommersant_ru(BasicNewsRecipe):
title = 'Kommersant'
__author__ = 'Darko Miletic'
description = 'News from Russia'
publisher = 'Kommersant'
category = 'news, politics, Russia'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1251'
language = 'ru'
publication_type = 'newspaper'
masthead_url = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial, sans1, sans-serif} span#ctl00_ContentPlaceHolderStyle_LabelSubTitle{margin-bottom: 1em; display: block} .author{margin-bottom: 1em; display: block} .paragraph{margin-bottom: 1em; display: block} .vvodka{font-weight: bold; margin-bottom: 1em} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [
dict(attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'})
,dict(attrs={'class':['vvodka','paragraph','author']})
]
remove_tags = [dict(name=['iframe','object','link','img','base'])]
feeds = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')]
def print_version(self, url):
return url.replace('doc-rss.aspx','doc.aspx') + '&print=true'

View File

@ -0,0 +1,43 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.rian.ru
'''
from calibre.web.feeds.news import BasicNewsRecipe
class RIANovosti(BasicNewsRecipe):
title = 'RIA Novosti - Russian'
__author__ = 'Darko Miletic'
description = 'News from Russia'
publisher = 'RIA'
category = 'news, politics, Russia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
language = 'ru'
publication_type = 'newsportal'
masthead_url = 'http://img.beta.rian.ru/images/22868/43/228684314.jpg'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,Helvetica,sans1,sans-serif} '
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class':'text'})
remove_tags = [dict(name=['iframe','object','link','img','base'])]
feeds = [
(u'Frontpage', u'http://www.rian.ru/export/rss2/lenta/index.xml')
,(u'Politics', u'http://www.rian.ru/export/rss2/politics/index.xml')
,(u'World', u'http://www.rian.ru/export/rss2/world/index.xml')
,(u'Economy', u'http://www.rian.ru/export/rss2/economy/index.xml')
,(u'Society', u'http://www.rian.ru/export/rss2/society/index.xml')
,(u'Moscow', u'http://www.rian.ru/export/rss2/moscow/index.xml')
,(u'Defense', u'http://www.rian.ru/export/rss2/defense_safety/index.xml')
,(u'Science', u'http://www.rian.ru/export/rss2/science/index.xml')
,(u'Turism', u'http://www.rian.ru/export/rss2/tourism/index.xml')
,(u'Culture', u'http://www.rian.ru/export/rss2/culture/index.xml')
]
def print_version(self, url):
return url.replace('.html','-print.html')

View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
''' Design documentation {{{
Storage paradigm {{{
* Agnostic to storage paradigm (i.e. no book per folder assumptions)
* Two separate concepts: A store and collection
A store is a backend, like a sqlite database associated with a path on
the local filesystem, or a cloud based storage solution.
A collection is a user defined group of stores. Most of the logic for
data manipulation sorting/searching/restrictions should be in the collection
class. The collection class should transparently handle the
conversion from store name + id to row number in the collection.
* Not sure how feasible it is to allow many-many maps between stores
and collections.
}}}
Event system {{{
* Comprehensive event system that other components can subscribe to
* Subscribers should be able to temporarily block receiving events
* Should event dispatch be asynchronous?
* Track last modified time for metadata and each format
}}}
}}}'''
# Imports {{{
# }}}