mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improve The Marker
This commit is contained in:
parent
92de7e1807
commit
631afb7eac
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||||
description = 'TheMarker Financial News in Hebrew'
|
description = 'TheMarker Financial News in Hebrew'
|
||||||
__author__ = 'TonyTheBookworm, Marbs'
|
__author__ = 'Marbs'
|
||||||
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
|
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
|
||||||
title = u'TheMarker'
|
title = u'TheMarker'
|
||||||
language = 'he'
|
language = 'he'
|
||||||
@ -11,42 +11,38 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ]
|
keep_only_tags =dict(name='div', attrs={'id':'content'})
|
||||||
max_articles_per_feed = 10
|
remove_attributes = ['width','float','margin-left']
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':['social-nav article-social-nav','prsnlArticleEnvelope','cb']}) ,
|
||||||
|
dict(name='a', attrs={'href':['/misc/mobile']}) ,
|
||||||
|
dict(name='span', attrs={'class':['post-summ']}) ]
|
||||||
|
max_articles_per_feed = 100
|
||||||
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
|
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
|
||||||
feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
|
feeds = [(u'Head Lines', u'http://www.themarker.com/cmlink/1.144'),
|
||||||
(u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
|
(u'TA Market', u'http://www.themarker.com/cmlink/1.243'),
|
||||||
(u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
|
(u'Real Estate', u'http://www.themarker.com/cmlink/1.605656'),
|
||||||
(u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
|
(u'Global', u'http://www.themarker.com/cmlink/1.605658'),
|
||||||
(u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
|
(u'Wall Street', u'http://www.themarker.com/cmlink/1.613713'),
|
||||||
(u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
|
(u'SmartPhone', u'http://www.themarker.com/cmlink/1.605661'),
|
||||||
(u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
|
(u'Law', u'http://www.themarker.com/cmlink/1.605664'),
|
||||||
(u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
|
(u'Media', u'http://www.themarker.com/cmlink/1.605660'),
|
||||||
(u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
|
(u'Consumer', u'http://www.themarker.com/cmlink/1.605662'),
|
||||||
(u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
|
(u'Career', u'http://www.themarker.com/cmlink/1.605665'),
|
||||||
(u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
|
(u'Car', u'http://www.themarker.com/cmlink/1.605663'),
|
||||||
|
(u'High Tech', u'http://www.themarker.com/cmlink/1.605659'),
|
||||||
|
(u'Small Business', u'http://www.themarker.com/cmlink/1.605666')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
split1 = url.split("=")
|
#split1 = url.split("/")
|
||||||
weblinks = url
|
#print_url='http://www.themarker.com/misc/article-print-page/'+split1[-1]
|
||||||
|
txt=url
|
||||||
|
|
||||||
if weblinks is not None:
|
re1='.*?' # Non-greedy match on filler
|
||||||
for link in weblinks:
|
re2='(tv)' # Word 1
|
||||||
#---------------------------------------------------------
|
|
||||||
#here we need some help with some regexpressions
|
|
||||||
#we are trying to find it.themarker.com in a url
|
|
||||||
#-----------------------------------------------------------
|
|
||||||
re1='.*?' # Non-greedy match on filler
|
|
||||||
re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1
|
|
||||||
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
|
|
||||||
m = rg.search(url)
|
|
||||||
|
|
||||||
|
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
|
||||||
if m:
|
m = rg.search(txt)
|
||||||
split2 = url.split("article/")
|
if m:
|
||||||
print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
|
#print 'bad link'
|
||||||
|
return 1
|
||||||
else:
|
|
||||||
print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
|
|
||||||
|
|
||||||
return print_url
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user