CBC Canada by rty and Haaretz by DM

This commit is contained in:
Kovid Goyal 2010-06-06 22:20:07 -06:00
commit 744032dd21
5 changed files with 96 additions and 6 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 712 B

View File

@ -0,0 +1,25 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1275798572(BasicNewsRecipe):
title = u'CBC Canada'
publisher = 'www.cbc.ca'
language = 'en_CA'
__author__ = 'rty'
category = 'news'
oldest_article = 4
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
masthead_url = 'http://www.cbc.ca/includes/gfx/cbcnews_logo_09.gif'
cover_url = 'http://img692.imageshack.us/img692/2814/cbc.png'
keep_only_tags = [dict(name='div', attrs={'id':['storyhead','storybody']})]
remove_tags_after = dict(id=['socialtools'])
feeds = [(u'Top Stories', u'http://rss.cbc.ca/lineup/topstories.xml'),
(u'World', u'http://rss.cbc.ca/lineup/world.xml'),
(u'National', u'http://rss.cbc.ca/lineup/canada.xml'),
(u'Manitoba', u'http://rss.cbc.ca/lineup/canada-manitoba.xml'),
(u'Politics', u'http://rss.cbc.ca/lineup/politics.xml'),
(u'Tech & Science', u'http://rss.cbc.ca/lineup/technology.xml'),
(u'Books', u'http://rss.cbc.ca/lineup/arts-books.xml')]

View File

@ -0,0 +1,57 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
haaretz.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Haaretz_en(BasicNewsRecipe):
title = 'Haaretz in English'
__author__ = 'Darko Miletic'
description = 'Haaretz.com, the online edition of Haaretz Newspaper in Israel, and analysis from Israel and the Middle East. Haaretz.com provides extensive and in-depth coverage of Israel, the Jewish World and the Middle East, including defense, diplomacy, the Arab-Israeli conflict, the peace process, Israeli politics, Jerusalem affairs, international relations, Iran, Iraq, Syria, Lebanon, the Palestinian Authority, the West Bank and the Gaza Strip, the Israeli business world and Jewish life in Israel and the Diaspora. '
publisher = 'haaretz.com'
category = 'news, politics, Israel'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en_IL'
publication_type = 'newspaper'
remove_empty_feeds = True
masthead_url = 'http://www.haaretz.com/images/logos/logoGrey.gif'
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [dict(name='div', attrs={'class':['rightcol']}),dict(name='table')]
remove_tags_before = dict(name='h1')
remove_tags_after = dict(attrs={'id':'innerArticle'})
keep_only_tags = [dict(attrs={'id':'content'})]
feeds = [
(u'Opinion' , u'http://www.haaretz.com/cmlink/opinion-rss-1.209234?localLinksEnabled=false' )
,(u'Defense and diplomacy' , u'http://www.haaretz.com/cmlink/defense-and-diplomacy-rss-1.208894?localLinksEnabled=false')
,(u'National' , u'http://www.haaretz.com/cmlink/national-rss-1.208896?localLinksEnabled=false' )
,(u'International' , u'http://www.haaretz.com/cmlink/international-rss-1.208898?localLinksEnabled=false' )
,(u'Jewish World' , u'http://www.haaretz.com/cmlink/jewish-world-rss-1.209085?localLinksEnabled=false' )
,(u'Business' , u'http://www.haaretz.com/cmlink/business-print-rss-1.264904?localLinksEnabled=false' )
,(u'Real Estate' , u'http://www.haaretz.com/cmlink/real-estate-print-rss-1.264977?localLinksEnabled=false' )
,(u'Features' , u'http://www.haaretz.com/cmlink/features-print-rss-1.264912?localLinksEnabled=false' )
,(u'Arts and leisure' , u'http://www.haaretz.com/cmlink/arts-and-leisure-rss-1.286090?localLinksEnabled=false' )
,(u'Books' , u'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false' )
,(u'Food and Wine' , u'http://www.haaretz.com/cmlink/food-and-wine-print-rss-1.265034?localLinksEnabled=false' )
,(u'Sports' , u'http://www.haaretz.com/cmlink/sports-rss-1.286092?localLinksEnabled=false' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1089,7 +1089,6 @@ class DeviceBooksModel(BooksModel): # {{{
idx = self.map[row]
if cname == 'title' :
self.db[idx].title = val
self.db[idx].title_sorter = val
elif cname == 'authors':
self.db[idx].authors = string_to_authors(val)
elif cname == 'collections':

View File

@ -8,16 +8,25 @@ Customizing |app|
==================================
|app| has a highly modular design. Various parts of it can be customized. You can learn how to create
*recipes* to add new sources of online content to |app| in the Section :ref:`news`. Here, you will learn how to
use *plugins* to customize and control various aspects of |app|'s behavior.
Theer are different kinds of plugins, corresponding to different aspects of |app|. As more and more aspects of |app|
are modularized, new plugin types will be added.
*recipes* to add new sources of online content to |app| in the Section :ref:`news`. Here, you will learn,
first, how to use environment variables and *tweaks* to customize |app|'s behavior and then how to
use *plugins* to add funtionality to |app|.
.. contents::
:depth: 2
:local:
Environment variables
-----------------------
* ``CALIBRE_CONFIG_DIRECTORY``
* ``CALIBRE_OVERRIDE_DATABASE_PATH``
* ``CALIBRE_DEVELOP_FROM``
* ``CALIBRE_OVERRIDE_LANG``
* ``SYSFS_PATH``
* ``http_proxy``
A Hello World plugin
------------------------