remove recipes for no content or missing webpage

2025-06-23 15:30:45 -04:00 · 2018-10-09 01:21:09 +02:00 · 2018-10-09 01:21:09 +02:00 · 04d5823308
commit 04d5823308
parent c4b92cebda
6 changed files with 0 additions and 200 deletions
--- a/recipes/dzial_zagraniczny.recipe
+++ b/recipes/dzial_zagraniczny.recipe
@ -1,28 +0,0 @@
-#!/usr/bin/env  python2
-
-__license__ = 'GPL v3'
-__author__ = 'teepel <teepel44@gmail.com>'
-
-'''
-dzialzagraniczny.pl
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class dzial_zagraniczny(BasicNewsRecipe):
-    title = u'Dział Zagraniczny'
-    __author__ = 'teepel <teepel44@gmail.com>'
-    language = 'pl'
-    description = u'Polskiego czytelnika to nie interesuje'
-    INDEX = 'http://dzialzagraniczny.pl'
-    extra_css = 'img {display: block;}'
-    oldest_article = 7
-    cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg'
-    max_articles_per_feed = 100
-    remove_empty_feeds = True
-    remove_javascript = True
-    no_stylesheets = True
-    use_embedded_content = True
-
-    feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')]
--- a/recipes/homopedia_pl.recipe
+++ b/recipes/homopedia_pl.recipe
@ -1,32 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class AdvancedUserRecipe1325420346(BasicNewsRecipe):
-    title = u'Homopedia'
-    __author__ = 'rainbowwarrior'
-    language = 'pl'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    publication_type = 'newspaper'
-    masthead_url = 'http://a5.sphotos.ak.fbcdn.net/hphotos-ak-snc6/67335_168352243178437_166186720061656_594975_5800720_n.jpg'
-    encoding = 'utf-8'
-
-    def get_cover_url(self):
-        return 'http://a7.sphotos.ak.fbcdn.net/hphotos-ak-snc4/65568_166186970061631_166186720061656_580324_7584264_n.jpg'
-
-    feeds = [
-    (u'Nowe has\u0142a', u'http://www.homopedia.pl/w/index.php?title=Specjalna:Nowe_strony&feed=atom&hideliu=&hidepatrolled=&hidebots=&hideredirs=1&limit=50&namespace=0'),  # noqa
-
-    (u'Blog', u'http://blog.homopedia.pl/feeds/posts/default')]
-
-    def get_article_url(self, article):
-        artl = article.get('link',  None)
-        rest, sep, article_id = artl.rpartition('/')
-        return 'http://www.homopedia.pl/w/index.php?redirect=no&printable=yes&title=' + article_id
-
-    remove_tags = [dict(name='div', attrs={'class': 'noprint'}), dict(name='ul', attrs={'class': 'noprint'}), dict(name='ul', attrs={'id': 'footer-places'}), dict(name='li', attrs={'id': 'footer-info-viewcount'}), dict(name='span', attrs={'class': 'editsection'}), dict(name='div', attrs={'id': 'jump-to-nav'})]  # noqa
-
-    remove_tags_before = dict(dict(name='h2', attrs={'class': 'post-title'}))
-    remove_tags_after = dict(dict(name='a', attrs={'class': 'timestamp-link'}))
-
-    extra_css = 'p{text-indent:1.5em!important;padding:0!important;margin;0!important}'
--- a/recipes/icons/dzial_zagraniczny.png
+++ b/recipes/icons/dzial_zagraniczny.png
--- a/recipes/icons/homopedia_pl.png
+++ b/recipes/icons/homopedia_pl.png
--- a/recipes/odkrywcy_pl.recipe
+++ b/recipes/odkrywcy_pl.recipe
@ -1,111 +0,0 @@
-__license__ = 'GPL v3'
-import re
-import datetime
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Comment
-
-
-class Odkrywcy(BasicNewsRecipe):
-    title = u'Odkrywcy.pl'
-    __author__ = 'fenuks'
-    description = u''
-    language = 'pl'
-    extra_css = 'img {display: block;}'
-    cover_url = ''
-    INDEX = 'http://odkrywcy.pl'
-    use_embedded_content = False
-    oldest_article = 7
-    max_articles_per_feed = 100
-    no_stylesheets = True
-    remove_empty_feeds = True
-    remove_javascript = True
-    remove_attributes = ['style', 'font']
-    ignore_duplicate_articles = {'title', 'url'}
-
-    keep_only_tags = [dict(attrs={'class': 'content'})]
-    remove_tags = [
-        dict(name='a', attrs={'href': ['#opOpinie', '#opinie']}), dict(attrs={'class': ['fr', 'clra', 'close', 'wpsocial-fbFanpageBox', 'tagi', 'test']}),
-        dict(id=['rekSrd05', 'moreTopNews']), dict(name='img', attrs={'class': 'zr'}), dict(name='img', attrs={'alt': u'Następne'})]
-    remove_tags_after = dict(id='aTxt')
-    feeds = [(u'', '')]
-
-    def find_articles(self, url):
-        articles = []
-        soup = self.index_to_soup(url)
-        for i in soup.findAll(attrs={'class': 'katZj clra'}):
-            tmp = i.find('small')
-            datestring = re.search(
-                'dodano: (\d{4}-\d{2}-\d{2})', tmp.string).group(1)
-            d = datetime.datetime.strptime(datestring, "%Y-%m-%d").date()
-            if (datetime.datetime.now().date() - d).days > self.oldest_article:
-                continue
-            tmp = i.find('a')
-            title = tmp.string
-            url = self.INDEX + tmp['href']
-            articles.append({'title': title,
-                             'url': url,
-                             'date': '',
-                             'description': ''
-                             })
-        return articles
-
-    def parse_index(self):
-        feeds = []
-        feeds.append((u'Człowiek', self.find_articles(
-            'http://odkrywcy.pl/kat,111396,name,Czlowiek,kategoria.html')))
-        feeds.append((u'Technologie', self.find_articles(
-            'http://odkrywcy.pl/kat,111398,name,Technologie,kategoria.html')))
-        feeds.append((u'Ekologia', self.find_articles(
-            'http://odkrywcy.pl/kat,111400,name,Ekologia,kategoria.html')))
-        feeds.append((u'Kosmos', self.find_articles(
-            'http://odkrywcy.pl/kat,111402,name,Kosmos,kategoria.html')))
-        feeds.append((u'Cywilizacja', self.find_articles(
-            'http://odkrywcy.pl/kat,111404,name,Cywilizacja,kategoria.html')))
-        feeds.append((u'Przyroda', self.find_articles(
-            'http://odkrywcy.pl/kat,111406,name,Przyroda,kategoria.html')))
-        feeds.append((u'Fizyka i chemia', self.find_articles(
-            'http://odkrywcy.pl/kat,111408,name,Fizyka,kategoria.html')))
-        feeds.append((u'Historia', self.find_articles(
-            'http://odkrywcy.pl/kat,122994,name,Historia,kategoria.html')))
-        feeds.append((u'Media', self.find_articles(
-            'http://odkrywcy.pl/kat,116794,name,Media,media.html')))
-
-        return feeds
-
-    def append_page(self, soup, appendtag):
-        tag = soup.find('a', attrs={'class': 'btnNext'})
-        urls = []
-        while tag is not None:
-            if tag['href'] in urls:
-                break
-            urls.append(tag['href'])
-            soup2 = self.index_to_soup(self.INDEX + tag['href'])
-            tag = soup2.find(name='a', attrs={'class': 'btnNext'})
-            pagetext = soup2.findAll(attrs={'class': 'content'})
-            for container in pagetext:
-                header = container.find(name='h1')
-                if header:
-                    header.extract()
-                for comment in container.findAll(text=lambda text: isinstance(text, Comment)):
-                    comment.extract()
-            for container in pagetext:
-                pos = len(appendtag.contents)
-                appendtag.insert(pos, container)
-        for r in appendtag.findAll(attrs={'class': 'galStr'}):
-            r.extract()
-        for r in appendtag.findAll(attrs={'alt': 'Następne'}):
-            r.extract()
-        for r in appendtag.findAll(attrs={'alt': 'Poprzednie'}):
-            r.extract()
-        for r in appendtag.findAll(attrs={'class': 'clra'}):
-            r.extract()
-        for r in appendtag.findAll(attrs={'class': 'close'}):
-            r.extract()
-        for r in appendtag.findAll(attrs={'class': 'tagi'}):
-            r.extract()
-        for r in appendtag.findAll(attrs={'id': 'moreTopNews'}):
-            r.extract()
-
-    def preprocess_html(self, soup):
-        self.append_page(soup, soup.body)
-        return soup
--- a/recipes/rybinski.recipe
+++ b/recipes/rybinski.recipe
@ -1,29 +0,0 @@
-#!/usr/bin/env  python2
-
-__license__ = 'GPL v3'
-__copyright__ = u'2012, Tomasz Dlugosz <tomek3d@gmail.com>'
-'''
-rybinski.eu
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Rybinski(BasicNewsRecipe):
-    title = u'Rybinski.eu - economy of the XXI century'
-    description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego'
-    language = 'pl'
-    __author__ = u'Tomasz D\u0142ugosz'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    no_stylesheets = True
-
-    feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')]
-
-    keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
-
-    remove_tags = [
-        dict(name='div', attrs={'class': 'post-meta-1'}),
-        dict(name='div', attrs={'class': 'post-meta-2'}),
-        dict(name='div', attrs={'class': 'post-comments'})
-    ]