Merge branch 'master' of https://github.com/t3d/calibre

2025-07-09 03:04:10 -04:00 · 2018-10-18 08:33:40 +05:30 · 2018-10-18 08:33:40 +05:30 · f32426a3d0
commit f32426a3d0
parent f6fa902490 1bec05d8bf
56 changed files with 17 additions and 446 deletions
--- a/recipes/icons/24sata_rs.png
+++ b/recipes/icons/24sata_rs.png
--- a/recipes/icons/akter.png
+++ b/recipes/icons/akter.png
--- a/recipes/icons/alo_novine.png
+++ b/recipes/icons/alo_novine.png
--- a/recipes/icons/beta.png
+++ b/recipes/icons/beta.png
--- a/recipes/icons/beta_en.png
+++ b/recipes/icons/beta_en.png
--- a/recipes/icons/consumerist.png
+++ b/recipes/icons/consumerist.png
--- a/recipes/icons/e_novine.png
+++ b/recipes/icons/e_novine.png
--- a/recipes/icons/eclicto.png
+++ b/recipes/icons/eclicto.png
--- a/recipes/icons/elcronista.png
+++ b/recipes/icons/elcronista.png
--- a/recipes/icons/emg_rs.png
+++ b/recipes/icons/emg_rs.png
--- a/recipes/icons/financial_times.png
+++ b/recipes/icons/financial_times.png
--- a/recipes/icons/financial_times_uk.png
+++ b/recipes/icons/financial_times_uk.png
--- a/recipes/icons/financial_times_us.png
+++ b/recipes/icons/financial_times_us.png
--- a/recipes/icons/gawker.png
+++ b/recipes/icons/gawker.png
--- a/recipes/icons/glas_srpske.png
+++ b/recipes/icons/glas_srpske.png
--- a/recipes/icons/glasjavnosti.png
+++ b/recipes/icons/glasjavnosti.png
--- a/recipes/icons/ieco.png
+++ b/recipes/icons/ieco.png
--- a/recipes/icons/krstarica.png
+++ b/recipes/icons/krstarica.png
--- a/recipes/icons/lacapital.png
+++ b/recipes/icons/lacapital.png
--- a/recipes/icons/libartes.png
+++ b/recipes/icons/libartes.png
--- a/recipes/icons/linux_journal.png
+++ b/recipes/icons/linux_journal.png
--- a/recipes/icons/monitor.png
+++ b/recipes/icons/monitor.png
--- a/recipes/icons/novistandard.png
+++ b/recipes/icons/novistandard.png
--- a/recipes/icons/nowy_ekran.png
+++ b/recipes/icons/nowy_ekran.png
--- a/recipes/icons/nto.png
+++ b/recipes/icons/nto.png
--- a/recipes/icons/osworld_pl.png
+++ b/recipes/icons/osworld_pl.png
--- a/recipes/icons/palmtop_pl.png
+++ b/recipes/icons/palmtop_pl.png
--- a/recipes/icons/pc_arena.png
+++ b/recipes/icons/pc_arena.png
--- a/recipes/icons/pc_centre_pl.png
+++ b/recipes/icons/pc_centre_pl.png
--- a/recipes/icons/picoboard_pl.png
+++ b/recipes/icons/picoboard_pl.png
--- a/recipes/icons/polska_times.png
+++ b/recipes/icons/polska_times.png
--- a/recipes/icons/poradnia_pwn.png
+++ b/recipes/icons/poradnia_pwn.png
--- a/recipes/icons/pravda_en.png
+++ b/recipes/icons/pravda_en.png
--- a/recipes/icons/prawica_net.png
+++ b/recipes/icons/prawica_net.png
--- a/recipes/icons/presseurop.png
+++ b/recipes/icons/presseurop.png
--- a/recipes/icons/rionegro.png
+++ b/recipes/icons/rionegro.png
--- a/recipes/icons/rstones.png
+++ b/recipes/icons/rstones.png
--- a/recipes/icons/tanjug.png
+++ b/recipes/icons/tanjug.png
--- a/recipes/icons/the_nation_thai.png
+++ b/recipes/icons/the_nation_thai.png
--- a/recipes/icons/tvp_info.png
+++ b/recipes/icons/tvp_info.png
--- a/recipes/nowy_ekran.recipe
+++ b/recipes/nowy_ekran.recipe
@ -1,19 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class NowyEkran(BasicNewsRecipe):
-    title = u'Nowy ekran'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    no_stylesheets = True
-    __author__ = 'fenuks'
-    description = u'Niezależny serwis społeczności blogerów'
-    category = 'blog'
-    language = 'pl'
-    masthead_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif'
-    cover_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif'
-    remove_tags_before = dict(name='div', attrs={'class': 'post_detal'})
-    remove_tags_after = dict(name='div', attrs={'class': 'post_footer'})
-    remove_tags = [dict(name='span', attrs={'class': 'ico ico_comments'}), dict(
-        name='div', attrs={'class': 'post_footer'}), dict(name='a', attrs={'class': 'getpdf'})]
-    feeds = [(u'Najnowsze notki', u'http://www.nowyekran.pl/RSS/')]
--- a/recipes/nto.recipe
+++ b/recipes/nto.recipe
@ -1,62 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class NTO(BasicNewsRecipe):
-    title = u'Nowa Trybuna Opolska'
-    __author__ = 'fenuks'
-    description = u'Nowa Trybuna Opolska - portal regionalny województwa opolskiego.'
-    category = 'newspaper'
-    language = 'pl'
-    encoding = 'iso-8859-2'
-    extra_css = 'ul {list-style: none; padding:0; margin:0;}'
-    INDEX = 'http://www.nto.pl'
-    masthead_url = INDEX + '/images/top_logo.png'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    remove_empty_feeds = True
-    no_stylesheets = True
-    ignore_duplicate_articles = {'title', 'url'}
-    use_embedded_content = False
-
-    feeds = [
-    (u'Wszystkie', u'http://www.nto.pl/rss.xml'),
-    (u'Region', u'http://www.nto.pl/region.xml'),
-    (u'Brzeg', u'http://www.nto.pl/brzeg.xml'),
-    (u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'),
-    (u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'),
-    (u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'),
-    (u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'),
-    (u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'),
-    (u'Nysa', u'http://www.nto.pl/nysa.xml'),
-    (u'Olesno', u'http://www.nto.pl/olesno.xml'),
-
-    (u'Opole', u'http://www.nto.pl/opole.xml'),
-    (u'Prudnik', u'http://www.nto.pl/prudnik.xml'),
-    (u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'),
-    (u'Sport', u'http://www.nto.pl/sport.xml'),
-    (u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'),
-    (u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'),
-    (u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'),
-    (u'Studia', u'http://www.nto.pl/akademicka.xml')]
-
-    keep_only_tags = [dict(id='article')]
-
-    def get_cover_url(self):
-        soup = self.index_to_soup(
-            self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
-        nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
-        soup = self.index_to_soup(nexturl)
-        self.cover_url = self.INDEX + \
-            soup.find(id='cover').find(name='img')['src']
-        return getattr(self, 'cover_url', self.cover_url)
-
-    def decode_feedportal_url(self, url):
-        link = url.rpartition('l/0L0S')[2][:-12]
-        replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
-                    ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
-        for t in replaces:
-            link = link.replace(*t)
-        return 'http://' + link
-
-    def print_version(self, url):
-        return self.decode_feedportal_url(url) + '&Template=printpicart'
--- a/recipes/optyczne_pl.recipe
+++ b/recipes/optyczne_pl.recipe
@ -15,19 +15,16 @@ class OptyczneRecipe(BasicNewsRecipe):
    remove_empty_feeds = True
    no_stylesheets = True
    oldest_article = 7
-    max_articles_per_feed = 100000
+    max_articles_per_feed = 100
    recursions = 0

    no_stylesheets = True
    remove_javascript = True

-    keep_only_tags = []
-    keep_only_tags.append(dict(name='div', attrs={'class': 'news'}))
+    keep_only_tags = dict(name='div', attrs={'class':'main-article-content'})

-    remove_tags = []
-    remove_tags.append(dict(name='div', attrs={'class': 'center'}))
-    remove_tags.append(dict(name='div', attrs={'class': 'news_foto'}))
-    remove_tags.append(dict(name='div', attrs={'align': 'right'}))
+    remove_tags = [dict(name='div', attrs={'class':['banner','colored','content-panel']}),
+                dict(name='a', attrs={'class':'icon-link comments-link'})]

    extra_css = '''
                    body {font-family: Arial,Helvetica,sans-serif;}
@ -38,5 +35,5 @@ class OptyczneRecipe(BasicNewsRecipe):
                    .fot{font-size: x-small; color: #666666;}
                    '''
    feeds = [
-        ('Aktualnosci', 'http://www.optyczne.pl/rss.xml'),
+        (u'Aktualności', 'http://www.optyczne.pl/rss.xml'),
    ]
--- a/recipes/osw.recipe
+++ b/recipes/osw.recipe
@ -27,16 +27,14 @@ class OSW_Recipe(BasicNewsRecipe):
    simultaneous_downloads = 5

    keep_only_tags = []
-    # this line should show title of the article, but it doesnt work
-    keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'}))
-    keep_only_tags.append(dict(name='div', attrs={'class': 'print-submitted'}))
-    keep_only_tags.append(dict(name='div', attrs={'class': 'print-content'}))
+    keep_only_tags.append(dict(name='h2', attrs={'class': 'node-title'}))
+    keep_only_tags.append(dict(name='div', attrs={'class': 'content clearfix'}))

    remove_tags = []
    remove_tags.append(dict(name='table', attrs={'id': 'attachments'}))
    remove_tags.append(dict(name='div', attrs={'class': 'print-submitted'}))

-    feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')]
+    feeds = [(u'OSW', u'https://www.osw.waw.pl/pl/rss.xml')]

    def print_version(self, url):
-        return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/')
+        return url.replace('https://www.osw.waw.pl/pl/', 'https://www.osw.waw.pl/pl/print/')
--- a/recipes/osworld_pl.recipe
+++ b/recipes/osworld_pl.recipe
@ -1,36 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class OSWorld(BasicNewsRecipe):
-    title = u'OSWorld.pl'
-    __author__ = 'fenuks'
-    description = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!'  # noqa
-    category = 'OS, IT, open source, Linux'
-    language = 'pl'
-    cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
-    extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    no_stylesheets = True
-    remove_empty_feeds = True
-    use_embedded_content = False
-    keep_only_tags = [dict(id=['dzial', 'posts'])]
-    remove_tags = [dict(attrs={'class': 'post-comments'})]
-    remove_tags_after = dict(attrs={'class': 'entry clr'})
-    feeds = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'),
-             (u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')]
-
-    def append_page(self, soup, appendtag):
-        tag = appendtag.find(attrs={'id': 'paginacja'})
-        if tag:
-            for nexturl in tag.findAll('a'):
-                soup2 = self.index_to_soup(nexturl['href'])
-                pagetext = soup2.find(attrs={'class': 'entry clr'})
-                pos = len(appendtag.contents)
-                appendtag.insert(pos, pagetext)
-            for r in appendtag.findAll(attrs={'id': 'paginacja'}):
-                r.extract()
-
-    def preprocess_html(self, soup):
-        self.append_page(soup, soup.body)
-        return soup
--- a/recipes/palmtop_pl.recipe
+++ b/recipes/palmtop_pl.recipe
@ -1,17 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class palmtop_pl(BasicNewsRecipe):
-    title = u'Palmtop.pl'
-    __author__ = 'fenuks'
-    description = 'wortal technologii mobilnych'
-    category = 'mobile'
-    language = 'pl'
-    cover_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
-    masthead_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    no_stylesheets = True
-    use_embedded_content = True
-    # remove_tags_before=dict(name='h2')
-    feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
--- a/recipes/pc_arena.recipe
+++ b/recipes/pc_arena.recipe
@ -1,37 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class PC_Arena(BasicNewsRecipe):
-    title = u'PCArena'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    __author__ = 'fenuks'
-    description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
-    category = 'IT'
-    language = 'pl'
-    index = 'http://pcarena.pl'
-    masthead_url = 'http://pcarena.pl/pcarena/img/logo.png'
-    cover_url = 'http://pcarena.pl/pcarena/img/logo.png'
-    no_stylesheets = True
-    remove_empty_feeds = True
-    feeds = [
-    (u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'),
-    (u'Testy', u'http://pcarena.pl/testy/feeds.rss'),
-    (u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'),
-    (u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'),
-    (u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')]
-
-    def print_version(self, url):
-        return url.replace('show', 'print')
-
-    def image_url_processor(self, baseurl, url):
-        if 'http' not in url:
-            return 'http://pcarena.pl' + url
-        else:
-            return url
-
-    def preprocess_html(self, soup):
-        for a in soup('a'):
-            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:  # noqa
-                a['href'] = self.index + a['href']
-        return soup
--- a/recipes/pc_centre_pl.recipe
+++ b/recipes/pc_centre_pl.recipe
@ -1,30 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class PC_Centre(BasicNewsRecipe):
-    title = u'PC Centre'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    __author__ = 'fenuks'
-    description = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
-    category = 'IT'
-    language = 'pl'
-    masthead_url = 'http://pccentre.pl/views/images/logo.gif'
-    cover_url = 'http://pccentre.pl/views/images/logo.gif'
-    no_stylesheets = True
-    remove_empty_feeds = True
-    ignore_duplicate_articles = {'title', 'url'}
-    remove_tags = [dict(attrs={'class': 'logo_print'})]
-    feeds = [
-    (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'),
-    (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'),
-    (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'),
-    (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'),
-    (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'),
-    (u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'),
-    (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'),
-    (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'),
-    (u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')]
-
-    def print_version(self, url):
-        return url.replace('show', 'print')
--- a/recipes/pc_lab.recipe
+++ b/recipes/pc_lab.recipe
@ -75,9 +75,7 @@ class PCLab(BasicNewsRecipe):
            href = link.get('href', None)
            if href and href.startswith('/'):
                link['href'] = 'http://pclab.pl' + href
-        # finally remove some tags
-        # for r in soup.findAll('div', attrs={'class':['tags', 'index',
-        # 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi',
-        # 'navigation']})
+        for r in soup.findAll(name='a', href=re.compile(r'^https://www.skapiec.pl/')):
+            r.extract()

        return soup
--- a/recipes/picoboard_pl.recipe
+++ b/recipes/picoboard_pl.recipe
@ -1,36 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Pikoboard(BasicNewsRecipe):
-    title = u'Pikoboard.pl'
-    __author__ = 'fenuks'
-    description = u'Portal poświęcony takim urządzeniom jak: Raspberry Pi, XBMC, ODROID-X, BeagleBoard czy CuBox. Systemy operacyjne, modyfikacje oraz obudowy i innego rodzaju dodatki.'  # noqa
-    category = 'IT, open source, Linux, Raspberry Pi'
-    language = 'pl'
-    cover_url = 'http://picoboard.pl/wp-content/themes/portal/img/logo.jpg'
-    extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    no_stylesheets = True
-    remove_empty_feeds = True
-    use_embedded_content = False
-    keep_only_tags = [dict(id=['dzial', 'posts'])]
-    remove_tags = [dict(attrs={'class': 'post-comments'})]
-    remove_tags_after = dict(attrs={'class': 'entry clr'})
-    feeds = [(u'Newsy', u'http://picoboard.pl/feed/atom/'),
-             (u'Artyku\u0142y', u'http://picoboard.pl/category/artykuly/feed/')]
-
-    def append_page(self, soup, appendtag):
-        tag = appendtag.find(attrs={'id': 'paginacja'})
-        if tag:
-            for nexturl in tag.findAll('a'):
-                soup2 = self.index_to_soup(nexturl['href'])
-                pagetext = soup2.find(attrs={'class': 'entry clr'})
-                pos = len(appendtag.contents)
-                appendtag.insert(pos, pagetext)
-            for r in appendtag.findAll(attrs={'id': 'paginacja'}):
-                r.extract()
-
-    def preprocess_html(self, soup):
-        self.append_page(soup, soup.body)
-        return soup
--- a/recipes/polska_times.recipe
+++ b/recipes/polska_times.recipe
@ -1,42 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class PolskaTimes(BasicNewsRecipe):
-    title = u'Polska Times'
-    __author__ = 'fenuks'
-    description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.'  # noqa
-    category = 'newspaper'
-    language = 'pl'
-    masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
-    oldest_article = 7
-    encoding = 'iso-8859-2'
-    max_articles_per_feed = 100
-    remove_empty_feeds = True
-    no_stylesheets = True
-    use_embedded_content = False
-    ignore_duplicate_articles = {'title', 'url'}
-    remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})
-    remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={
-        'class': 'czytajDalej'}), dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})]
-    feeds = [
-    (u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'),
-    (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'),
-    (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'),
-    (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'),
-    (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'),
-    (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'),
-    (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
-
-    def print_version(self, url):
-        return url.replace('artykul', 'drukuj')
-
-    def skip_ad_pages(self, soup):
-        if 'Advertisement' in soup.title:
-            nexturl = soup.find('a')['href']
-            return self.index_to_soup(nexturl, raw=True)
-
-    def get_cover_url(self):
-        soup = self.index_to_soup(
-            'http://www.prasa24.pl/gazeta/metropolia-warszawska/')
-        self.cover_url = soup.find(id='pojemnik').img['src']
-        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/polter_pl.recipe
+++ b/recipes/polter_pl.recipe
@ -21,7 +21,8 @@ class Polter(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}

    keep_only_tags = [dict(attrs={'class': 'boxcontent'})]
-    remove_tags = [dict(id='komentarze')]
+    remove_tags = [dict(id='komentarze'),
+        dict(name='div',attrs={'class':'ostatnieArtykuly'})]
    remove_tags_after = dict(id='komentarze')

    feeds = [
@ -36,8 +37,7 @@ class Polter(BasicNewsRecipe):
    (u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'),
    (u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'),
    (u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'),
-    (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'),
-    (u'Blogi', 'http://polter.pl/blogi,rss.html')]
+    (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')]

    def preprocess_html(self, soup):
        for s in soup.findAll(attrs={'style': re.compile('float: ?left')}):
@ -65,3 +65,6 @@ class Polter(BasicNewsRecipe):
        for r in soup.findAll(name='a', href=re.compile(r'^http://www.ceneo.pl/')):
            r.extract()
        return soup
+
+    def preprocess_raw_html(self, raw_html, url):
+        return raw_html.replace('<br /><br /><h3>Czytaj również</h3>', '')
--- a/recipes/poradnia_pwn.recipe
+++ b/recipes/poradnia_pwn.recipe
@ -1,63 +0,0 @@
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class PoradniaPWN(BasicNewsRecipe):
-    title = u'Poradnia Językowa PWN'
-    __author__ = 'fenuks'
-    description = u'Internetowa poradnia językowa Wydawnictwa Naukowego PWN. Poradnię prowadzi Redaktor Naczelny Słowników Języka Polskiego, prof. Mirosław Bańko. Pomagają mu eksperci - znani polscy językoznawcy. Współpracuje z nami m.in. prof. Jerzy Bralczyk oraz dr Jan Grzenia.'  # noqa
-    category = 'language'
-    language = 'pl'
-    oldest_article = 14
-    max_articles_per_feed = 100000
-    INDEX = "http://poradnia.pwn.pl/"
-    no_stylesheets = True
-    remove_attributes = ['style']
-    remove_javascript = True
-    use_embedded_content = False
-    keep_only_tags = [dict(name="div", attrs={"class": "searchhi"})]
-    feeds = [(u'Poradnia', u'http://rss.pwn.pl/poradnia.rss')]
-
-    '''def find_articles(self, url):
-        articles = []
-        soup=self.index_to_soup(url)
-        counter = int(soup.find(name='p', attrs={'class':'count'}).findAll('b')[-1].string)
-        counter = 500
-        pos = 0
-        next = url
-        while next:
-            soup=self.index_to_soup(next)
-            tag=soup.find(id="listapytan")
-            art=tag.findAll(name='li')
-            for i in art:
-                if i.h4:
-                    title=i.h4.a.string
-                    url=self.INDEX+i.h4.a['href']
-                    #date=soup.find(id='footer').ul.li.string[41:-1]
-                    articles.append({'title' : title,
-                       'url'   : url,
-                       'date'  : '',
-                       'description' : ''
-                        })
-            pos += 10
-            if not pos >=counter:
-                next = 'http://poradnia.pwn.pl/lista.php?kat=18&od=' + str(pos)
-                print u'Tworzenie listy artykułów dla', next
-            else:
-                next = None
-        print articles
-        return articles
-
-    def parse_index(self):
-         feeds = []
-         feeds.append((u"Poradnia", self.find_articles('http://poradnia.pwn.pl/lista.php')))
-
-         return feeds'''
-
-    def preprocess_html(self, soup):
-        for i in soup.findAll(name=['ul', 'li']):
-            i.name = "div"
-        for z in soup.findAll(name='a'):
-            if not z['href'].startswith('http'):
-                z['href'] = 'http://poradnia.pwn.pl/' + z['href']
-        return soup
--- a/recipes/ppe_pl.recipe
+++ b/recipes/ppe_pl.recipe
@ -29,9 +29,3 @@ class ppeRecipe(BasicNewsRecipe):
        ('Recenzje', 'http://ppe.pl/rss-recenzje.html'),
        ('Publicystyka', 'http://ppe.pl/rss-publicystyka.html'),
    ]
-
-    def get_cover_url(self):
-        soup = self.index_to_soup('http://www.ppe.pl/psx_extreme.html')
-        part = soup.find(attrs={'class': 'archiwum-foto'})['style']
-        part = re.search("'(.+)'", part).group(1).replace('_min', '')
-        return 'http://www.ppe.pl' + part
--- a/recipes/prawica_net.recipe
+++ b/recipes/prawica_net.recipe
@ -1,43 +0,0 @@
-#!/usr/bin/env  python2
-
-__license__ = 'GPL v3'
-__author__ = 'teepel <teepel44@gmail.com>'
-
-'''
-http://prawica.net
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class prawica_recipe(BasicNewsRecipe):
-    title = u'prawica.net'
-    __author__ = 'teepel <teepel44@gmail.com>'
-    language = 'pl'
-    description = 'Wiadomości ze strony prawica.net'
-    INDEX = 'http://prawica.net/'
-    remove_empty_feeds = True
-    oldest_article = 1
-    max_articles_per_feed = 100
-    remove_javascript = True
-    no_stylesheets = True
-
-    feeds = [(u'all', u'http://prawica.net/all/feed')]
-
-    keep_only_tags = []
-    # this line should show title of the article, but it doesnt work
-    keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'}))
-    keep_only_tags.append(dict(name='div', attrs={'class': 'content'}))
-
-    remove_tags = []
-    remove_tags.append(dict(name='div', attrs={
-                       'class': 'field field-type-viewfield field-field-autor2'}))
-    remove_tags.append(dict(name='div', attrs={
-                       'class': 'field field-type-viewfield field-field-publikacje-autora'}))
-    remove_tags.append(dict(name='div', attrs={
-                       'id': 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'}))
-    remove_tags_after = [
-        (dict(name='div', attrs={'class': 'field-label-inline-first'}))]
-
-    def print_version(self, url):
-        return url.replace('http://prawica.net/', 'http://prawica.net/print/')
--- a/recipes/presseurop.recipe
+++ b/recipes/presseurop.recipe
@ -1,34 +0,0 @@
-#!/usr/bin/env  python2
-
-'''
-www.presseurop.eu/pl
-'''
-
-__license__ = 'GPL v3'
-__author__ = 'teepel <teepel44@gmail.com>'
-
-from calibre.web.feeds.news import BasicNewsRecipe
-import re
-
-
-class presseurop(BasicNewsRecipe):
-    title = u'Presseurop'
-    description = u'Najlepsze artykuły z prasy europejskiej'
-    language = 'pl'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    auto_cleanup = True
-    remove_empty_feeds = True
-
-    feeds = [
-        (u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'),
-        (u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'),
-        (u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'),
-        (u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'),
-        (u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed')
-    ]
-
-    preprocess_regexps = [
-        (re.compile(r'\|.*</title>', re.DOTALL | re.IGNORECASE),
-         lambda match: '</title>'),
-    ]