from calibre.web.feeds.news import BasicNewsRecipe from calibre.utils.ipc.simple_worker import fork_job from calibre.ptempfile import PersistentTemporaryFile js_fetcher = ''' import calibre.web.jsbrowser.browser as jsbrowser def grab(url): browser = jsbrowser.Browser() #10 second timeout browser.visit(url, 10) browser.run_for_a_time(10) html = browser.html browser.close() return html ''' class MarketingSensoriale(BasicNewsRecipe): title = u'Marketing sensoriale' __author__ = 'NotTaken' description = 'Marketing Sensoriale, il Blog' category = 'Blog' oldest_article = 7 max_articles_per_feed = 200 no_stylesheets = True encoding = 'utf8' use_embedded_content = False language = 'it' remove_empty_feeds = True recursions = 0 requires_version = (0, 8, 58) auto_cleanup = False simultaneous_downloads = 1 articles_are_obfuscated = True remove_tags_after = [dict(name='div', attrs={'class': ['article-footer']})] def get_article_url(self, article): return article.get('feedburner_origlink', None) def get_obfuscated_article(self, url): result = fork_job(js_fetcher, 'grab', (url,), module_is_source_code=True) html = result['result'] if isinstance(html, type(u'')): html = html.encode('utf-8') pt = PersistentTemporaryFile('.html') pt.write(html) pt.close() return pt.name feeds = [(u'Marketing sensoriale', u'http://feeds.feedburner.com/MarketingSensoriale?format=xml')]