Pull from trunk

2025-09-14 16:18:05 -04:00 · 2010-09-20 08:30:00 -06:00 · 2010-09-20 08:30:00 -06:00 · 937d6432a1
commit 937d6432a1
parent 97c3127cb3 656c88792d
4 changed files with 100 additions and 3 deletions
--- a/resources/recipes/tagesan.recipe
+++ b/resources/recipes/tagesan.recipe
@ -0,0 +1,45 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1284927619(BasicNewsRecipe):
+    title = u'Tagesanzeiger'
+    publisher = u'Tamedia AG'
+    oldest_article = 2
+    __author__ = 'noxxx'
+    max_articles_per_feed = 100
+    description = 'tagesanzeiger.ch: Nichts verpassen'
+    category = 'News, Politik, Nachrichten, Schweiz, Zürich'
+    language = 'de'
+
+    conversion_options = {
+                             'comments'  : description
+                            ,'tags'      : category
+                            ,'language'  : language
+                            ,'publisher' : publisher
+                         }
+
+    remove_tags = [
+     dict(name='img')
+                    ,dict(name='div',attrs={'class':['swissquote ad','boxNews','centerAD','contentTabs2','sbsLabel']})
+                    ,dict(name='div',attrs={'id':['colRightAd','singleRight','singleSmallRight','MailInfo','metaLine','sidebarSky','contentFooter','commentInfo','commentInfo2','commentInfo3','footerBottom','clear','boxExclusiv','singleLogo','navSearch','headerLogin','headerBottomRight','horizontalNavigation','subnavigation','googleAdSense','footerAd','contentbox','articleGalleryNav']})
+    ,dict(name='form',attrs={'id':['articleMailForm','commentform']})
+    ,dict(name='div',attrs={'style':['position:absolute']})
+    ,dict(name='script',attrs={'type':['text/javascript']})
+    ,dict(name='p',attrs={'class':['schreiben','smallPrint','charCounter','caption']})
+     ]
+    feeds = [
+     (u'Front', u'http://www.tagesanzeiger.ch/rss.html')
+    ,(u'Zürich', u'http://www.tagesanzeiger.ch/zuerich/rss.html')
+    ,(u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html')
+    ,(u'Ausland', u'http://www.tagesanzeiger.ch/ausland/rss.html')
+    ,(u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html')
+    ,(u'Wissen', u'http://www.tagesanzeiger.ch/wissen/rss.html')
+    ,(u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html')
+    ,(u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html')
+    ,(u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html')
+    ,(u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html')
+    ,(u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html')
+    ,(u'Auto', u'http://www.tagesanzeiger.ch/auto/rss.html')]
+
+    def print_version(self, url):
+        return url + '/print.html'
+
--- a/resources/recipes/the_marker.recipe
+++ b/resources/recipes/the_marker.recipe
@ -0,0 +1,52 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1283848012(BasicNewsRecipe):
+    description   = 'TheMarker Financial News in Hebrew'
+    __author__            = 'TonyTheBookworm, Marbs'
+    cover_url      = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
+    title          = u'TheMarker'
+    language              = 'he'
+    simultaneous_downloads = 5
+    remove_javascript     = True
+    timefmt        = '[%a, %d %b, %Y]'
+    oldest_article = 1
+    remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']})          ]
+    max_articles_per_feed = 10
+    extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
+    feeds          = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
+                      (u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
+                      (u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
+                      (u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
+                      (u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
+                      (u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
+                      (u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
+                      (u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
+                      (u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
+                      (u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
+                      (u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
+
+    def print_version(self, url):
+        split1 = url.split("=")
+        weblinks = url
+
+        if weblinks is not None:
+            for link in weblinks:
+                #---------------------------------------------------------
+                #here we need some help with some regexpressions
+                #we are trying to find it.themarker.com in a url
+                #-----------------------------------------------------------
+                re1='.*?'   # Non-greedy match on filler
+                re2='(it\\.themarker\\.com)'    # Fully Qualified Domain Name 1
+                rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
+                m = rg.search(url)
+
+
+                if m:
+                 split2 = url.split("article/")
+                 print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
+
+                else:
+                    print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
+
+        return print_url
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -443,9 +443,9 @@ class KOBO(USBMS):

        # Reset Im_Reading list in the database
        if oncard == 'carda':
-            query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
+            query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\''
        elif oncard != 'carda' and oncard != 'cardb':
-            query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
+            query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\''
                    
        try:
            cursor.execute (query)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -241,7 +241,7 @@ OptionRecommendation(name='toc_filter',

 OptionRecommendation(name='chapter',
        recommended_value="//*[((name()='h1' or name()='h2') and "
-              r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
+              r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class "
              "= 'chapter']", level=OptionRecommendation.LOW,
            help=_('An XPath expression to detect chapter titles. The default '
                'is to consider <h1> or <h2> tags that contain the words '