import re __license__ = 'GPL v3' __copyright__ = '2011, Rasmus Lauritsen ' ''' version2.dk ''' from calibre.web.feeds.news import BasicNewsRecipe class version2(BasicNewsRecipe): title = 'Version2.dk' __author__ = 'Rasmus Lauritsen' description = 'IT News' publisher = 'version2.dk' category = 'news, IT, hardware, software, Denmark' oldest_article = 14 max_articles_per_feed = 25 no_stylesheets = False remove_empty_feeds = True use_embedded_content = False encoding = 'utf-8' language = 'da' extra_css = '.article { font-weight: normal; }' preprocess_regexps = [(re.compile(r']*>'), lambda match: ''), (re.compile(r']*article-link-id.*?'), lambda match: '')] keep_only_tags = [dict(name='div', attrs={'id': 'article-page'}), dict(name='section', attrs={'class': 'byline'}), dict(name='section', attrs={'class': 'body'})] remove_tags = [ dict(name='p', attrs={'class': 'meta links'}), dict(name='div', attrs={'class': 'float-right'}), dict(name='span', attrs={'class': 'article-link-id'}), dict(name='section', attrs={'class': 'social-tools-pane'}), dict(name='section', attrs={'class': 'article-timeline'}), dict(name='div', attrs={'id' : 'mini-panel-comments_and_form'}), dict(name='div', attrs={'class': 'related-articles top-three'}), dict(name='div', attrs={'id': 'mini-panel-jobfinder_1'}), dict(name='section', attrs={'id': 'mini-panel-frontpage_debat_zone'}), dict(name='div', attrs={'class': 'panel-panel panel-col-center grid w4'}), dict(name='section', attrs={'class': 'short-list sidebar most-rate-up'}), dict(name='section', attrs={'class': 'short-list sidebar debat-pane most-comment-lists'}), dict(name='section', attrs={'class': 'short-list sidebar most-controversial'}), dict(name='div', attrs={'id': 'mini-panel-related_branch_news'}), dict(name='div', attrs={'class': 'items-pane pr-pane'}), dict(name='section', attrs={'id': 'mini-panel-jobfinder_front'}), dict(name='section', attrs={'class': 'item-news'}), dict(name='section', attrs={'class': 'jobs-list'}), dict(name='footer', attrs={'id': 'footer'}), dict(name='section', attrs={'class': 'banner'}), dict(name='div', attrs={'class' : 'fast-track-frontpage'}), dict(name='a', attrs={'class': 'byline-comments'}) ] feeds = [ (u'Seneste nyheder', u'https://www.version2.dk/it-nyheder/rss'), (u'Seneste blogs', u'https://www.version2.dk/blogs/rss'), (u'Seneste kommentarer', u'https://www.version2.dk/debat/rss') ]