diff --git a/resources/recipes/tagesan.recipe b/resources/recipes/tagesan.recipe new file mode 100644 index 0000000000..8514162598 --- /dev/null +++ b/resources/recipes/tagesan.recipe @@ -0,0 +1,45 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1284927619(BasicNewsRecipe): + title = u'Tagesanzeiger' + publisher = u'Tamedia AG' + oldest_article = 2 + __author__ = 'noxxx' + max_articles_per_feed = 100 + description = 'tagesanzeiger.ch: Nichts verpassen' + category = 'News, Politik, Nachrichten, Schweiz, Zürich' + language = 'de' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + remove_tags = [ + dict(name='img') + ,dict(name='div',attrs={'class':['swissquote ad','boxNews','centerAD','contentTabs2','sbsLabel']}) + ,dict(name='div',attrs={'id':['colRightAd','singleRight','singleSmallRight','MailInfo','metaLine','sidebarSky','contentFooter','commentInfo','commentInfo2','commentInfo3','footerBottom','clear','boxExclusiv','singleLogo','navSearch','headerLogin','headerBottomRight','horizontalNavigation','subnavigation','googleAdSense','footerAd','contentbox','articleGalleryNav']}) + ,dict(name='form',attrs={'id':['articleMailForm','commentform']}) + ,dict(name='div',attrs={'style':['position:absolute']}) + ,dict(name='script',attrs={'type':['text/javascript']}) + ,dict(name='p',attrs={'class':['schreiben','smallPrint','charCounter','caption']}) + ] + feeds = [ + (u'Front', u'http://www.tagesanzeiger.ch/rss.html') + ,(u'Zürich', u'http://www.tagesanzeiger.ch/zuerich/rss.html') + ,(u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html') + ,(u'Ausland', u'http://www.tagesanzeiger.ch/ausland/rss.html') + ,(u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html') + ,(u'Wissen', u'http://www.tagesanzeiger.ch/wissen/rss.html') + ,(u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html') + ,(u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html') + ,(u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html') + ,(u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html') + ,(u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html') + ,(u'Auto', u'http://www.tagesanzeiger.ch/auto/rss.html')] + + def print_version(self, url): + return url + '/print.html' + diff --git a/resources/recipes/the_marker.recipe b/resources/recipes/the_marker.recipe new file mode 100644 index 0000000000..e5f1ffc761 --- /dev/null +++ b/resources/recipes/the_marker.recipe @@ -0,0 +1,52 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1283848012(BasicNewsRecipe): + description = 'TheMarker Financial News in Hebrew' + __author__ = 'TonyTheBookworm, Marbs' + cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg' + title = u'TheMarker' + language = 'he' + simultaneous_downloads = 5 + remove_javascript = True + timefmt = '[%a, %d %b, %Y]' + oldest_article = 1 + remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ] + max_articles_per_feed = 10 + extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }' + feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'), + (u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'), + (u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'), + (u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'), + (u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'), + (u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'), + (u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'), + (u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'), + (u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'), + (u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'), + (u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')] + + def print_version(self, url): + split1 = url.split("=") + weblinks = url + + if weblinks is not None: + for link in weblinks: + #--------------------------------------------------------- + #here we need some help with some regexpressions + #we are trying to find it.themarker.com in a url + #----------------------------------------------------------- + re1='.*?' # Non-greedy match on filler + re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1 + rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL) + m = rg.search(url) + + + if m: + split2 = url.split("article/") + print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1] + + else: + print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml' + + return print_url diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index c72ad4736f..62507ebfc1 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -443,9 +443,9 @@ class KOBO(USBMS): # Reset Im_Reading list in the database if oncard == 'carda': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\'' + query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\'' elif oncard != 'carda' and oncard != 'cardb': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\'' + query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\'' try: cursor.execute (query) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 8f2550733a..25b6d1aaae 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -241,7 +241,7 @@ OptionRecommendation(name='toc_filter', OptionRecommendation(name='chapter', recommended_value="//*[((name()='h1' or name()='h2') and " - r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class " + r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class " "= 'chapter']", level=OptionRecommendation.LOW, help=_('An XPath expression to detect chapter titles. The default ' 'is to consider

or

tags that contain the words '