remove fronda, as its new pages are impossible to parse

The first (solveable but resulting in monstrous overhead) problem is that articles don't have dates in feed nor category pages, The second (not solvable for me) is multipage articles link to next page using relative links.
2025-07-09 03:04:10 -04:00 · 2014-05-04 19:32:16 +02:00 · 2014-05-04 19:32:16 +02:00 · d390b4d361
commit d390b4d361
parent 8b567ce66e
1 changed files with 0 additions and 95 deletions
--- a/recipes/fronda.recipe
+++ b/recipes/fronda.recipe
@ -1,95 +0,0 @@
-#!/usr/bin/env  python
-
-__license__   = 'GPL v3'
-__copyright__ = u'2010-2014, Tomasz Dlugosz <tomek3d@gmail.com>'
-'''
-fronda.pl
-'''
-
-import re
-from calibre.web.feeds.news import BasicNewsRecipe
-from datetime import timedelta, date
-
-class Fronda(BasicNewsRecipe):
-    title          = u'Fronda.pl'
-    publisher      = u'Fronda.pl'
-    description    = u'Portal po\u015bwi\u0119cony - Informacje'
-    language = 'pl'
-    __author__ = u'Tomasz D\u0142ugosz'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    use_embedded_content = False
-    no_stylesheets = True
-
-    extra_css = '''
-        h1 {font-size:150%}
-        .body {text-align:left;}
-        div#featured-image {font-style:italic; font-size:70%}
-    '''
-
-    earliest_date = date.today() - timedelta(days=oldest_article)
-
-    def date_cut(self,datestr):
-        # eg. 5.11.2012, 12:07
-        timestamp = datestr.split(',')[0]
-        parts = timestamp.split('.')
-        art_date = date(int(parts[2]),int(parts[1]),int(parts[0]))
-        return True if art_date < self.earliest_date else False
-
-    def parse_index(self):
-        genres = [
-            ('ekonomia,4.html', 'Ekonomia'),
-            ('filozofia,15.html', 'Filozofia'),
-            ('historia,6.html', 'Historia'),
-            ('kosciol,8.html', 'Kościół'),
-            ('kultura,5.html', 'Kultura'),
-            ('media,10.html', 'Media'),
-            ('nauka,9.html', 'Nauka'),
-            ('polityka,11.html', 'Polityka'),
-            ('polska,12.html', 'Polska'),
-            ('prolife,3.html', 'Prolife'),
-            ('religia,7.html', 'Religia'),
-            ('rodzina,13.html', 'Rodzina'),
-            ('swiat,14.html', 'Świat'),
-            ('wydarzenie,16.html', 'Wydarzenie')
-        ]
-        feeds = []
-        articles = {}
-
-        for url, genName in genres:
-            try:
-                soup = self.index_to_soup('http://www.fronda.pl/c/'+ url)
-            except:
-                continue
-            articles[genName] = []
-            for item in soup.findAll('article',attrs={'class':'article article-wide'}):
-                article_a = item.find('a')
-                article_url = 'http://www.fronda.pl' + article_a['href']
-                article_title = self.tag_to_string(article_a)
-                articles[genName].append( { 'title' : article_title, 'url' : article_url })
-            if articles[genName]:
-                feeds.append((genName, articles[genName]))
-        return feeds
-
-    def preprocess_html(self, soup):
-        r = soup.find('small')
-        timestamp = str(r.contents)[3:].split(',')[0]
-        parts = timestamp.split('.')
-        art_date = date(int(parts[2]),int(parts[1]),int(parts[0]))
-        if self.earliest_date < art_date :
-            return soup
-
-    keep_only_tags = [
-        dict(name='div', attrs={'class':'content content-70 phone-100'})
-        ]
-
-    remove_tags = [
-        dict(name='div', attrs={'class':['clearfix','last-articles clearfix','comments clearfix','related-articles','social-buttons clearfix']}),
-        dict(name='span', attrs={'class':'small-info'}),
-        dict(name='ul', attrs={'class':'nav nav-tags clearfix'}),
-        dict(name='h3', attrs={'class':'section-header'}),
-        dict(name='article', attrs={'class':['slided-article hidden-phone', 'article article-wide hidden-phone']})
-        ]
-
-    preprocess_regexps = [
-        (re.compile(r'komentarzy: .*?</h6>', re.IGNORECASE | re.DOTALL | re.M ), lambda match: '</h6>')]