calibre/recipes/kp.recipe


from calibre.web.feeds.news import BasicNewsRecipe


class KrytykaPolitycznaRecipe(BasicNewsRecipe):
    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    version = 1

    title = u'Krytyka Polityczna'
    category = u'News'
    description = u' Lewicowe pismo zaangażowane w bieg spraw publicznych w Polsce.'
    cover_url = ''
    remove_empty_feeds = True
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100000
    recursions = 0

    no_stylesheets = True
    remove_javascript = True
    simultaneous_downloads = 3

    keep_only_tags = []
    keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'}))
    keep_only_tags.append(dict(name='div', attrs={'class': 'print-content'}))

    remove_tags = []
    remove_tags.append(dict(attrs={'class': ['field field-type-text field-field-story-switch', 'field field-type-filefield field-field-story-temp',
                                             'field field-type-text field-field-story-author', 'field field-type-text field-field-story-lead-switch']}))

    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
                    td.contentheading{font-size: large; font-weight: bold;}
                    '''

    feeds = [
        ('Wszystkie', 'http://www.krytykapolityczna.pl/rss.xml')
    ]

    def print_version(self, url):
        soup = self.index_to_soup(url)
        print_ico = soup.find(attrs={'class': 'print-page'})
        print_uri = print_ico['href']
        self.log('PRINT', print_uri)
        return 'http://www.krytykapolityczna.pl/' + print_uri

    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
                tstr = alink.string
                alink.replaceWith(tstr)
        return soup