diff --git a/recipes/fronda.recipe b/recipes/fronda.recipe index 452dca9068..06b86d83bb 100644 --- a/recipes/fronda.recipe +++ b/recipes/fronda.recipe @@ -1,39 +1,87 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = u'2010, Tomasz Dlugosz ' +__copyright__ = u'2010-2012, Tomasz Dlugosz ' ''' fronda.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re +from datetime import timedelta, date class Fronda(BasicNewsRecipe): title = u'Fronda.pl' publisher = u'Fronda.pl' - description = u'Portal po\u015bwi\u0119cony - Infformacje' + description = u'Portal po\u015bwi\u0119cony - Informacje' language = 'pl' __author__ = u'Tomasz D\u0142ugosz' oldest_article = 7 max_articles_per_feed = 100 use_embedded_content = False + no_stylesheets = True - feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')] + extra_css = ''' + h1 {font-size:150%} + .body {text-align:left;} + ''' - keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}), - dict(name='div', attrs={'class':'naglowek_tresc'}), - dict(name='div', attrs={'id':'czytaj'}) ] + earliest_date = date.today() - timedelta(days=oldest_article) - remove_tags = [dict(name='a', attrs={'class':'print'})] + def date_cut(self,datestr): + # eg. 5.11.2012, 12:07 + timestamp = datestr.split(',')[0] + parts = timestamp.split('.') + art_date = date(int(parts[2]),int(parts[1]),int(parts[0])) + return True if art_date < self.earliest_date else False - preprocess_regexps = [ - (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in - [ (r'

.*

', lambda match: ''), - (r'

.*

', lambda match: ''), - (r'

W.* lektury.*

', lambda match: ''), - (r'

Zobacz t.*?', lambda match: ''), - (r']*> 

', lambda match: ''), - (r'


', lambda match: ''), - (r'