mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix volksrant.nl
This commit is contained in:
parent
cd4950b98e
commit
82305c2b1b
@ -10,14 +10,14 @@
|
|||||||
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
||||||
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
||||||
|
|
||||||
<link rel="stylesheet" type="text/css" href="browse.css" />
|
<link rel="stylesheet" type="text/css" href="/static/browse/browse.css" />
|
||||||
<link type="text/css" href="/static/jquery_ui/css/pepper-grinder/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
<link type="text/css" href="/static/jquery_ui/css/pepper-grinder/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
||||||
|
|
||||||
<script type="text/javascript" src="/static/jquery.js"></script>
|
<script type="text/javascript" src="/static/jquery.js"></script>
|
||||||
<script type="text/javascript" src="/static/jquery.corner.js"></script>
|
<script type="text/javascript" src="/static/jquery.corner.js"></script>
|
||||||
<script type="text/javascript" src="/static/jquery_ui/js/jquery-ui-1.8.5.custom.min.js"></script>
|
<script type="text/javascript" src="/static/jquery_ui/js/jquery-ui-1.8.5.custom.min.js"></script>
|
||||||
|
|
||||||
<script type="text/javascript" src="browse.js"></script>
|
<script type="text/javascript" src="/static/browse/browse.js"></script>
|
||||||
|
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
$(document).ready(function() {{
|
$(document).ready(function() {{
|
||||||
|
@ -6,7 +6,18 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Modified by Tony Stegall
|
||||||
|
on 10/10/10 to include function to grab print version of articles
|
||||||
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
'''
|
||||||
|
added by Tony Stegall
|
||||||
|
'''
|
||||||
|
#######################################################
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
#######################################################
|
||||||
|
|
||||||
class AdvancedUserRecipe1249039563(BasicNewsRecipe):
|
class AdvancedUserRecipe1249039563(BasicNewsRecipe):
|
||||||
title = u'De Volkskrant'
|
title = u'De Volkskrant'
|
||||||
@ -16,20 +27,54 @@ class AdvancedUserRecipe1249039563(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'leftColumnArticle'}) ]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div',attrs={'class':'article_tools'}),
|
|
||||||
dict(name='div',attrs={'id':'article_tools'}),
|
|
||||||
dict(name='div',attrs={'class':'articletools'}),
|
|
||||||
dict(name='div',attrs={'id':'articletools'}),
|
|
||||||
dict(name='div',attrs={'id':'myOverlay'}),
|
|
||||||
dict(name='div',attrs={'id':'trackback'}),
|
|
||||||
dict(name='div',attrs={'id':'googleBanner'}),
|
|
||||||
dict(name='div',attrs={'id':'article_headlines'}),
|
|
||||||
]
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body{font-family:Arial,Helvetica,sans-serif; font-size:small;}
|
body{font-family:Arial,Helvetica,sans-serif; font-size:small;}
|
||||||
h1{font-size:large;}
|
h1{font-size:large;}
|
||||||
'''
|
'''
|
||||||
|
'''
|
||||||
|
Change Log:
|
||||||
|
Date: 10/10/10 - Modified code to include obfuscated to get the print version
|
||||||
|
Author: Tony Stegall
|
||||||
|
'''
|
||||||
|
#######################################################################################################
|
||||||
|
temp_files = []
|
||||||
|
articles_are_obfuscated = True
|
||||||
|
|
||||||
feeds = [(u'Laatste Nieuws', u'http://volkskrant.nl/rss/laatstenieuws.rss'), (u'Binnenlands nieuws', u'http://volkskrant.nl/rss/nederland.rss'), (u'Buitenlands nieuws', u'http://volkskrant.nl/rss/internationaal.rss'), (u'Economisch nieuws', u'http://volkskrant.nl/rss/economie.rss'), (u'Sportnieuws', u'http://volkskrant.nl/rss/sport.rss'), (u'Kunstnieuws', u'http://volkskrant.nl/rss/kunst.rss'), (u'Wetenschapsnieuws', u'http://feeds.feedburner.com/DeVolkskrantWetenschap'), (u'Technologienieuws', u'http://feeds.feedburner.com/vkmedia')]
|
def get_obfuscated_article(self, url):
|
||||||
|
br = self.get_browser()
|
||||||
|
br.open(url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = br.follow_link(url_regex='.*?(2010)(\\/)(article)(\\/)(print)(\\/)', nr = 0)
|
||||||
|
html = response.read()
|
||||||
|
except:
|
||||||
|
response = br.open(url)
|
||||||
|
html = response.read()
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write(html)
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
||||||
|
###############################################################################################################
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Laatste Nieuws', u'http://volkskrant.nl/rss/laatstenieuws.rss'),
|
||||||
|
(u'Binnenlands nieuws', u'http://volkskrant.nl/rss/nederland.rss'),
|
||||||
|
(u'Buitenlands nieuws', u'http://volkskrant.nl/rss/internationaal.rss'),
|
||||||
|
(u'Economisch nieuws', u'http://volkskrant.nl/rss/economie.rss'),
|
||||||
|
(u'Sportnieuws', u'http://volkskrant.nl/rss/sport.rss'),
|
||||||
|
(u'Kunstnieuws', u'http://volkskrant.nl/rss/kunst.rss'),
|
||||||
|
|
||||||
|
#both of these rss feeds link back to the main volksrant.nl url a.k.a Broken
|
||||||
|
#If someone happens to know the correct paths then they can put them in here
|
||||||
|
#(u'Wetenschapsnieuws', u'http://feeds.feedburner.com/DeVolkskrantWetenschap'),
|
||||||
|
#(u'Technologienieuws', u'http://feeds.feedburner.com/vkmedia')
|
||||||
|
]
|
||||||
|
|
||||||
|
'''
|
||||||
|
example for formating
|
||||||
|
'''
|
||||||
|
# original url: http://www.volkskrant.nl/vk/nl/2668/Buitenland/article/detail/1031493/2010/10/10/Noord-Korea-ziet-nieuwe-leider.dhtml
|
||||||
|
# print url : http://www.volkskrant.nl/vk/nl/2668/2010/article/print/detail/1031493/Noord-Korea-ziet-nieuwe-leider.dhtml
|
||||||
|
Loading…
x
Reference in New Issue
Block a user