Merge from trunk

This commit is contained in:
Charles Haley 2010-06-22 09:00:49 +01:00
commit fa1588478e
4 changed files with 150 additions and 2 deletions

View File

@ -0,0 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
class LondonFreePress(BasicNewsRecipe):
title = u'London Free Press'
__author__ = 'rty'
oldest_article = 4
max_articles_per_feed = 100
pubisher = 'lfpress.com'
description = 'Ontario Canada Newspaper'
category = 'News, Ontario, Canada'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en_CA'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
feeds = [
(u'News', u'http://www.lfpress.com/news/rss.xml'),
(u'Comment', u'http://www.lfpress.com/comment/rss.xml'),
(u'Entertainment', u'http://www.lfpress.com/entertainment/rss.xml '),
(u'Money', u'http://www.lfpress.com/money/rss.xml '),
(u'Life', u'http://www.lfpress.com/life/rss.xml '),
(u'Sports', u'http://www.lfpress.com/sports/rss.xml ')
]
keep_only_tags = [
dict(name='div', attrs={'id':'article'}),
]
remove_tags = [
dict(name='div', attrs={'id':'commentsBottom'}),
dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
]
remove_tags_after = [
dict(name='div', attrs={'class':'bottomBox clear'}),
]

View File

@ -0,0 +1,48 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.vatican.va/news_services/or/or_quo
'''
from calibre.web.feeds.news import BasicNewsRecipe
class LOsservatoreRomano_it(BasicNewsRecipe):
title = "L'Osservatore Romano"
__author__ = 'Darko Miletic'
description = 'Quiornale quotidiano, politico, religioso del Vaticano'
publisher = 'La Santa Sede'
category = 'news, politics, religion, Vatican'
no_stylesheets = True
INDEX = 'http://www.vatican.va'
FEEDPAGE = INDEX + '/news_services/or/or_quo/index.html'
CONTENTPAGE = INDEX + '/news_services/or/or_quo/text.html'
use_embedded_content = False
encoding = 'cp1252'
language = 'it'
publication_type = 'newspaper'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
def parse_index(self):
articles = []
articles.append({
'title' :self.title
,'date' :''
,'url' :self.CONTENTPAGE
,'description':''
})
return [(self.title, articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -0,0 +1,57 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277129332(BasicNewsRecipe):
title = u'People Daily - China'
oldest_article = 2
max_articles_per_feed = 100
__author__ = 'rty'
pubisher = 'people.com.cn'
description = 'People Daily Newspaper'
language = 'zh'
category = 'News, China'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'GB2312'
conversion_options = {'linearize_tables':True}
feeds = [(u'\u56fd\u5185\u65b0\u95fb', u'http://www.people.com.cn/rss/politics.xml'),
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.people.com.cn/rss/world.xml'),
(u'\u7ecf\u6d4e\u65b0\u95fb', u'http://www.people.com.cn/rss/finance.xml'),
(u'\u4f53\u80b2\u65b0\u95fb', u'http://www.people.com.cn/rss/sports.xml'),
(u'\u53f0\u6e7e\u65b0\u95fb', u'http://www.people.com.cn/rss/haixia.xml')]
keep_only_tags = [
dict(name='div', attrs={'class':'left_content'}),
]
remove_tags = [
dict(name='table', attrs={'class':'title'}),
]
remove_tags_after = [
dict(name='table', attrs={'class':'bianji'}),
]
def append_page(self, soup, appendtag, position):
pager = soup.find('img',attrs={'src':'/img/next_b.gif'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'left_content'})
#for it in texttag.findAll(style=True):
# del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
mtag = '<meta http-equiv="content-type" content="text/html;charset=GB2312" />\n<meta http-equiv="content-language" content="utf-8" />'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['form']
self.append_page(soup, soup.body, 3)
#pager = soup.find('a',attrs={'class':'ab12'})
#if pager:
# pager.extract()
return soup

View File

@ -190,11 +190,16 @@ The most likely cause of this is your antivirus program. Try temporarily disabli
Why is my device not detected in linux?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|app| uses something called SYSFS to detect devices in linux. The linux kernel can export two version of SYSFS, one of which is deprecated. Some linux distributions still ship with kernels that support the deprecated version of SYSFS, even though it was deprecated a long time ago. In this case, device detection in |app| will not work. You can check what version of SYSFS is exported by your kernel with the following command::
|app| needs your linux kernel to have been setup correctly to detect devices. If your devices are not detected, perform the following tests::
grep SYSFS_DEPRECATED /boot/config-`uname -r`
You should see something like ``CONFIG_SYSFS_DEPRECATED_V2 is not set``. If you don't you have to either recompile your kernel with the correct setting, or upgrade your linux distro to a more modern version, where this will not be set.
You should see something like ``CONFIG_SYSFS_DEPRECATED_V2 is not set``.
Also, ::
grep CONFIG_SCSI_MULTI_LUN /boot/config-`uname -r`
must return ``CONFIG_SCSI_MULTI_LUN=y``. If you don't see either, you have to recompile your kernel with the correct settings.
Library Management
------------------