Merge from trunk

2025-11-22 06:23:02 -05:00 · 2010-06-22 09:00:49 +01:00 · 2010-06-22 09:00:49 +01:00 · fa1588478e
commit fa1588478e
parent eaf9c93e9e a29718aa30
4 changed files with 150 additions and 2 deletions
--- a/resources/recipes/london_free_press.recipe
+++ b/resources/recipes/london_free_press.recipe
@ -0,0 +1,38 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LondonFreePress(BasicNewsRecipe):
+    title          = u'London Free Press'
+    __author__            = 'rty'
+    oldest_article = 4
+    max_articles_per_feed = 100
+
+    pubisher  = 'lfpress.com'
+    description           = 'Ontario Canada Newspaper'
+    category              = 'News, Ontario, Canada'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en_CA'
+    encoding               = 'utf-8'
+    conversion_options = {'linearize_tables':True}
+
+    feeds          = [
+                    (u'News', u'http://www.lfpress.com/news/rss.xml'),
+                    (u'Comment', u'http://www.lfpress.com/comment/rss.xml'),
+                    (u'Entertainment', u'http://www.lfpress.com/entertainment/rss.xml '),
+                    (u'Money', u'http://www.lfpress.com/money/rss.xml '),
+                    (u'Life', u'http://www.lfpress.com/life/rss.xml '),
+                    (u'Sports', u'http://www.lfpress.com/sports/rss.xml ')
+                        ]
+
+    keep_only_tags = [
+                              dict(name='div', attrs={'id':'article'}),
+                               ]
+    remove_tags = [
+                    dict(name='div', attrs={'id':'commentsBottom'}),
+                    dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
+   	dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
+                         ]
+    remove_tags_after = [
+                  dict(name='div', attrs={'class':'bottomBox clear'}),
+                         ]
--- a/resources/recipes/losservatoreromano_it.recipe
+++ b/resources/recipes/losservatoreromano_it.recipe
@ -0,0 +1,48 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.vatican.va/news_services/or/or_quo
+'''
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LOsservatoreRomano_it(BasicNewsRecipe):
+    title                = "L'Osservatore Romano"
+    __author__           = 'Darko Miletic'
+    description          = 'Quiornale quotidiano, politico, religioso del Vaticano'
+    publisher            = 'La Santa Sede'
+    category             = 'news, politics, religion, Vatican'
+    no_stylesheets       = True
+    INDEX                = 'http://www.vatican.va'
+    FEEDPAGE             = INDEX + '/news_services/or/or_quo/index.html'
+    CONTENTPAGE          = INDEX + '/news_services/or/or_quo/text.html'
+    use_embedded_content = False
+    encoding             = 'cp1252'
+    language             = 'it'
+    publication_type     = 'newspaper'
+
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+    def parse_index(self):
+        articles = []
+        articles.append({
+                          'title'      :self.title
+                         ,'date'       :''
+                         ,'url'        :self.CONTENTPAGE
+                         ,'description':''
+                        })
+        return [(self.title, articles)]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
+
--- a/resources/recipes/people_daily.recipe
+++ b/resources/recipes/people_daily.recipe
@ -0,0 +1,57 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1277129332(BasicNewsRecipe):
+    title          = u'People Daily - China'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    __author__            = 'rty'
+
+    pubisher  = 'people.com.cn'
+    description           = 'People Daily Newspaper'
+    language = 'zh'
+    category              = 'News, China'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    encoding               = 'GB2312'
+    conversion_options = {'linearize_tables':True}
+
+    feeds          = [(u'\u56fd\u5185\u65b0\u95fb', u'http://www.people.com.cn/rss/politics.xml'),
+       (u'\u56fd\u9645\u65b0\u95fb', u'http://www.people.com.cn/rss/world.xml'),
+       (u'\u7ecf\u6d4e\u65b0\u95fb', u'http://www.people.com.cn/rss/finance.xml'),
+       (u'\u4f53\u80b2\u65b0\u95fb', u'http://www.people.com.cn/rss/sports.xml'),
+       (u'\u53f0\u6e7e\u65b0\u95fb', u'http://www.people.com.cn/rss/haixia.xml')]
+    keep_only_tags = [
+                              dict(name='div', attrs={'class':'left_content'}),
+                               ]
+    remove_tags = [
+                    dict(name='table', attrs={'class':'title'}),
+                         ]
+    remove_tags_after = [
+                  dict(name='table', attrs={'class':'bianji'}),
+                         ]
+
+    def append_page(self, soup, appendtag, position):
+        pager = soup.find('img',attrs={'src':'/img/next_b.gif'})
+        if pager:
+           nexturl = self.INDEX + pager.a['href']
+           soup2 = self.index_to_soup(nexturl)
+           texttag = soup2.find('div', attrs={'class':'left_content'})
+           #for it in texttag.findAll(style=True):
+           #   del it['style']
+           newpos = len(texttag.contents)
+           self.append_page(soup2,texttag,newpos)
+           texttag.extract()
+           appendtag.insert(position,texttag)
+
+
+    def preprocess_html(self, soup):
+        mtag = '<meta http-equiv="content-type" content="text/html;charset=GB2312" />\n<meta http-equiv="content-language" content="utf-8" />'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['form']
+        self.append_page(soup, soup.body, 3)
+        #pager = soup.find('a',attrs={'class':'ab12'})
+        #if pager:
+        #   pager.extract()
+        return soup
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -190,11 +190,16 @@ The most likely cause of this is your antivirus program. Try temporarily disabli
 Why is my device not detected in linux?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-|app| uses something called SYSFS to detect devices in linux. The linux kernel can export two version of SYSFS, one of which is deprecated. Some linux distributions still ship with kernels that support the deprecated version of SYSFS, even though it was deprecated a long time ago. In this case, device detection in |app| will not work. You can check what version of SYSFS is exported by your kernel with the following command::
+|app| needs your linux kernel to have been setup correctly to detect devices. If your devices are not detected, perform the following tests:: 
    
    grep SYSFS_DEPRECATED /boot/config-`uname -r`

-You should see something like ``CONFIG_SYSFS_DEPRECATED_V2 is not set``. If you don't you have to either recompile your kernel with the correct setting, or upgrade your linux distro to a more modern version, where this will not be set.
+You should see something like ``CONFIG_SYSFS_DEPRECATED_V2 is not set``.
+Also, ::
+
+    grep CONFIG_SCSI_MULTI_LUN /boot/config-`uname -r`
+
+must return ``CONFIG_SCSI_MULTI_LUN=y``. If you don't see either, you have to recompile your kernel with the correct settings.

 Library Management
 ------------------