mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
937d6432a1
45
resources/recipes/tagesan.recipe
Normal file
45
resources/recipes/tagesan.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1284927619(BasicNewsRecipe):
|
||||||
|
title = u'Tagesanzeiger'
|
||||||
|
publisher = u'Tamedia AG'
|
||||||
|
oldest_article = 2
|
||||||
|
__author__ = 'noxxx'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
description = 'tagesanzeiger.ch: Nichts verpassen'
|
||||||
|
category = 'News, Politik, Nachrichten, Schweiz, Zürich'
|
||||||
|
language = 'de'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='img')
|
||||||
|
,dict(name='div',attrs={'class':['swissquote ad','boxNews','centerAD','contentTabs2','sbsLabel']})
|
||||||
|
,dict(name='div',attrs={'id':['colRightAd','singleRight','singleSmallRight','MailInfo','metaLine','sidebarSky','contentFooter','commentInfo','commentInfo2','commentInfo3','footerBottom','clear','boxExclusiv','singleLogo','navSearch','headerLogin','headerBottomRight','horizontalNavigation','subnavigation','googleAdSense','footerAd','contentbox','articleGalleryNav']})
|
||||||
|
,dict(name='form',attrs={'id':['articleMailForm','commentform']})
|
||||||
|
,dict(name='div',attrs={'style':['position:absolute']})
|
||||||
|
,dict(name='script',attrs={'type':['text/javascript']})
|
||||||
|
,dict(name='p',attrs={'class':['schreiben','smallPrint','charCounter','caption']})
|
||||||
|
]
|
||||||
|
feeds = [
|
||||||
|
(u'Front', u'http://www.tagesanzeiger.ch/rss.html')
|
||||||
|
,(u'Zürich', u'http://www.tagesanzeiger.ch/zuerich/rss.html')
|
||||||
|
,(u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html')
|
||||||
|
,(u'Ausland', u'http://www.tagesanzeiger.ch/ausland/rss.html')
|
||||||
|
,(u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html')
|
||||||
|
,(u'Wissen', u'http://www.tagesanzeiger.ch/wissen/rss.html')
|
||||||
|
,(u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html')
|
||||||
|
,(u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html')
|
||||||
|
,(u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html')
|
||||||
|
,(u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html')
|
||||||
|
,(u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html')
|
||||||
|
,(u'Auto', u'http://www.tagesanzeiger.ch/auto/rss.html')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '/print.html'
|
||||||
|
|
52
resources/recipes/the_marker.recipe
Normal file
52
resources/recipes/the_marker.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||||
|
description = 'TheMarker Financial News in Hebrew'
|
||||||
|
__author__ = 'TonyTheBookworm, Marbs'
|
||||||
|
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
|
||||||
|
title = u'TheMarker'
|
||||||
|
language = 'he'
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript = True
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
oldest_article = 1
|
||||||
|
remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ]
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
|
||||||
|
feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
|
||||||
|
(u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
|
||||||
|
(u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
|
||||||
|
(u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
|
||||||
|
(u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
|
||||||
|
(u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
|
||||||
|
(u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
|
||||||
|
(u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
|
||||||
|
(u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
|
||||||
|
(u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
|
||||||
|
(u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
split1 = url.split("=")
|
||||||
|
weblinks = url
|
||||||
|
|
||||||
|
if weblinks is not None:
|
||||||
|
for link in weblinks:
|
||||||
|
#---------------------------------------------------------
|
||||||
|
#here we need some help with some regexpressions
|
||||||
|
#we are trying to find it.themarker.com in a url
|
||||||
|
#-----------------------------------------------------------
|
||||||
|
re1='.*?' # Non-greedy match on filler
|
||||||
|
re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1
|
||||||
|
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
|
||||||
|
m = rg.search(url)
|
||||||
|
|
||||||
|
|
||||||
|
if m:
|
||||||
|
split2 = url.split("article/")
|
||||||
|
print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
|
||||||
|
|
||||||
|
else:
|
||||||
|
print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
|
||||||
|
|
||||||
|
return print_url
|
@ -443,9 +443,9 @@ class KOBO(USBMS):
|
|||||||
|
|
||||||
# Reset Im_Reading list in the database
|
# Reset Im_Reading list in the database
|
||||||
if oncard == 'carda':
|
if oncard == 'carda':
|
||||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
|
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\''
|
||||||
elif oncard != 'carda' and oncard != 'cardb':
|
elif oncard != 'carda' and oncard != 'cardb':
|
||||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
|
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cursor.execute (query)
|
cursor.execute (query)
|
||||||
|
@ -241,7 +241,7 @@ OptionRecommendation(name='toc_filter',
|
|||||||
|
|
||||||
OptionRecommendation(name='chapter',
|
OptionRecommendation(name='chapter',
|
||||||
recommended_value="//*[((name()='h1' or name()='h2') and "
|
recommended_value="//*[((name()='h1' or name()='h2') and "
|
||||||
r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
|
r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class "
|
||||||
"= 'chapter']", level=OptionRecommendation.LOW,
|
"= 'chapter']", level=OptionRecommendation.LOW,
|
||||||
help=_('An XPath expression to detect chapter titles. The default '
|
help=_('An XPath expression to detect chapter titles. The default '
|
||||||
'is to consider <h1> or <h2> tags that contain the words '
|
'is to consider <h1> or <h2> tags that contain the words '
|
||||||
|
Loading…
x
Reference in New Issue
Block a user