diff --git a/recipes/icons/24sata_rs.png b/recipes/icons/24sata_rs.png deleted file mode 100644 index 4ce933ae14..0000000000 Binary files a/recipes/icons/24sata_rs.png and /dev/null differ diff --git a/recipes/icons/akter.png b/recipes/icons/akter.png deleted file mode 100644 index efc4fceb29..0000000000 Binary files a/recipes/icons/akter.png and /dev/null differ diff --git a/recipes/icons/alo_novine.png b/recipes/icons/alo_novine.png deleted file mode 100644 index e88c8675d5..0000000000 Binary files a/recipes/icons/alo_novine.png and /dev/null differ diff --git a/recipes/icons/beta.png b/recipes/icons/beta.png deleted file mode 100644 index adf8dd4777..0000000000 Binary files a/recipes/icons/beta.png and /dev/null differ diff --git a/recipes/icons/beta_en.png b/recipes/icons/beta_en.png deleted file mode 100644 index adf8dd4777..0000000000 Binary files a/recipes/icons/beta_en.png and /dev/null differ diff --git a/recipes/icons/consumerist.png b/recipes/icons/consumerist.png deleted file mode 100644 index fed155af42..0000000000 Binary files a/recipes/icons/consumerist.png and /dev/null differ diff --git a/recipes/icons/e_novine.png b/recipes/icons/e_novine.png deleted file mode 100644 index f7f7254ae9..0000000000 Binary files a/recipes/icons/e_novine.png and /dev/null differ diff --git a/recipes/icons/eclicto.png b/recipes/icons/eclicto.png deleted file mode 100644 index 34636f3b0e..0000000000 Binary files a/recipes/icons/eclicto.png and /dev/null differ diff --git a/recipes/icons/elcronista.png b/recipes/icons/elcronista.png deleted file mode 100644 index 651290b97c..0000000000 Binary files a/recipes/icons/elcronista.png and /dev/null differ diff --git a/recipes/icons/emg_rs.png b/recipes/icons/emg_rs.png deleted file mode 100644 index b6d3872895..0000000000 Binary files a/recipes/icons/emg_rs.png and /dev/null differ diff --git a/recipes/icons/financial_times.png b/recipes/icons/financial_times.png deleted file mode 100644 index 687c1551bb..0000000000 Binary files a/recipes/icons/financial_times.png and /dev/null differ diff --git a/recipes/icons/financial_times_uk.png b/recipes/icons/financial_times_uk.png deleted file mode 100644 index 9180c1c0b3..0000000000 Binary files a/recipes/icons/financial_times_uk.png and /dev/null differ diff --git a/recipes/icons/financial_times_us.png b/recipes/icons/financial_times_us.png deleted file mode 100644 index 9180c1c0b3..0000000000 Binary files a/recipes/icons/financial_times_us.png and /dev/null differ diff --git a/recipes/icons/gawker.png b/recipes/icons/gawker.png deleted file mode 100644 index 5ecff566d3..0000000000 Binary files a/recipes/icons/gawker.png and /dev/null differ diff --git a/recipes/icons/glas_srpske.png b/recipes/icons/glas_srpske.png deleted file mode 100644 index 3f57630833..0000000000 Binary files a/recipes/icons/glas_srpske.png and /dev/null differ diff --git a/recipes/icons/glasjavnosti.png b/recipes/icons/glasjavnosti.png deleted file mode 100644 index ea4cf0d97a..0000000000 Binary files a/recipes/icons/glasjavnosti.png and /dev/null differ diff --git a/recipes/icons/ieco.png b/recipes/icons/ieco.png deleted file mode 100644 index 7b1ba32786..0000000000 Binary files a/recipes/icons/ieco.png and /dev/null differ diff --git a/recipes/icons/krstarica.png b/recipes/icons/krstarica.png deleted file mode 100644 index 2ece457161..0000000000 Binary files a/recipes/icons/krstarica.png and /dev/null differ diff --git a/recipes/icons/lacapital.png b/recipes/icons/lacapital.png deleted file mode 100644 index fff6e12ee7..0000000000 Binary files a/recipes/icons/lacapital.png and /dev/null differ diff --git a/recipes/icons/libartes.png b/recipes/icons/libartes.png deleted file mode 100644 index b479bf7732..0000000000 Binary files a/recipes/icons/libartes.png and /dev/null differ diff --git a/recipes/icons/linux_journal.png b/recipes/icons/linux_journal.png deleted file mode 100644 index e7f30f8900..0000000000 Binary files a/recipes/icons/linux_journal.png and /dev/null differ diff --git a/recipes/icons/monitor.png b/recipes/icons/monitor.png deleted file mode 100644 index 76b3255038..0000000000 Binary files a/recipes/icons/monitor.png and /dev/null differ diff --git a/recipes/icons/novistandard.png b/recipes/icons/novistandard.png deleted file mode 100644 index 84c184ca2e..0000000000 Binary files a/recipes/icons/novistandard.png and /dev/null differ diff --git a/recipes/icons/nowy_ekran.png b/recipes/icons/nowy_ekran.png deleted file mode 100644 index f772abc67a..0000000000 Binary files a/recipes/icons/nowy_ekran.png and /dev/null differ diff --git a/recipes/icons/nto.png b/recipes/icons/nto.png deleted file mode 100644 index eb725918ae..0000000000 Binary files a/recipes/icons/nto.png and /dev/null differ diff --git a/recipes/icons/osworld_pl.png b/recipes/icons/osworld_pl.png deleted file mode 100644 index 5ec7966931..0000000000 Binary files a/recipes/icons/osworld_pl.png and /dev/null differ diff --git a/recipes/icons/palmtop_pl.png b/recipes/icons/palmtop_pl.png deleted file mode 100644 index 344304dd41..0000000000 Binary files a/recipes/icons/palmtop_pl.png and /dev/null differ diff --git a/recipes/icons/pc_arena.png b/recipes/icons/pc_arena.png deleted file mode 100644 index 87d9b5e0c3..0000000000 Binary files a/recipes/icons/pc_arena.png and /dev/null differ diff --git a/recipes/icons/pc_centre_pl.png b/recipes/icons/pc_centre_pl.png deleted file mode 100644 index 030cef3968..0000000000 Binary files a/recipes/icons/pc_centre_pl.png and /dev/null differ diff --git a/recipes/icons/picoboard_pl.png b/recipes/icons/picoboard_pl.png deleted file mode 100644 index 003631e908..0000000000 Binary files a/recipes/icons/picoboard_pl.png and /dev/null differ diff --git a/recipes/icons/polska_times.png b/recipes/icons/polska_times.png deleted file mode 100644 index 91c0bec1ad..0000000000 Binary files a/recipes/icons/polska_times.png and /dev/null differ diff --git a/recipes/icons/poradnia_pwn.png b/recipes/icons/poradnia_pwn.png deleted file mode 100644 index 22ed7364b8..0000000000 Binary files a/recipes/icons/poradnia_pwn.png and /dev/null differ diff --git a/recipes/icons/pravda_en.png b/recipes/icons/pravda_en.png deleted file mode 100644 index cc7aa958cf..0000000000 Binary files a/recipes/icons/pravda_en.png and /dev/null differ diff --git a/recipes/icons/prawica_net.png b/recipes/icons/prawica_net.png deleted file mode 100644 index f6bc81d98e..0000000000 Binary files a/recipes/icons/prawica_net.png and /dev/null differ diff --git a/recipes/icons/presseurop.png b/recipes/icons/presseurop.png deleted file mode 100644 index 3e7d961878..0000000000 Binary files a/recipes/icons/presseurop.png and /dev/null differ diff --git a/recipes/icons/rionegro.png b/recipes/icons/rionegro.png deleted file mode 100644 index 990b9643c7..0000000000 Binary files a/recipes/icons/rionegro.png and /dev/null differ diff --git a/recipes/icons/rstones.png b/recipes/icons/rstones.png deleted file mode 100644 index f7ec38c97b..0000000000 Binary files a/recipes/icons/rstones.png and /dev/null differ diff --git a/recipes/icons/tanjug.png b/recipes/icons/tanjug.png deleted file mode 100644 index ff12c216cb..0000000000 Binary files a/recipes/icons/tanjug.png and /dev/null differ diff --git a/recipes/icons/the_nation_thai.png b/recipes/icons/the_nation_thai.png deleted file mode 100644 index b69b270c2c..0000000000 Binary files a/recipes/icons/the_nation_thai.png and /dev/null differ diff --git a/recipes/icons/tvp_info.png b/recipes/icons/tvp_info.png deleted file mode 100644 index 5cbf0322b7..0000000000 Binary files a/recipes/icons/tvp_info.png and /dev/null differ diff --git a/recipes/nowy_ekran.recipe b/recipes/nowy_ekran.recipe deleted file mode 100644 index 59b7b80f67..0000000000 --- a/recipes/nowy_ekran.recipe +++ /dev/null @@ -1,19 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class NowyEkran(BasicNewsRecipe): - title = u'Nowy ekran' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - __author__ = 'fenuks' - description = u'Niezależny serwis społeczności blogerów' - category = 'blog' - language = 'pl' - masthead_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif' - cover_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif' - remove_tags_before = dict(name='div', attrs={'class': 'post_detal'}) - remove_tags_after = dict(name='div', attrs={'class': 'post_footer'}) - remove_tags = [dict(name='span', attrs={'class': 'ico ico_comments'}), dict( - name='div', attrs={'class': 'post_footer'}), dict(name='a', attrs={'class': 'getpdf'})] - feeds = [(u'Najnowsze notki', u'http://www.nowyekran.pl/RSS/')] diff --git a/recipes/nto.recipe b/recipes/nto.recipe deleted file mode 100644 index df7f70b459..0000000000 --- a/recipes/nto.recipe +++ /dev/null @@ -1,62 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class NTO(BasicNewsRecipe): - title = u'Nowa Trybuna Opolska' - __author__ = 'fenuks' - description = u'Nowa Trybuna Opolska - portal regionalny województwa opolskiego.' - category = 'newspaper' - language = 'pl' - encoding = 'iso-8859-2' - extra_css = 'ul {list-style: none; padding:0; margin:0;}' - INDEX = 'http://www.nto.pl' - masthead_url = INDEX + '/images/top_logo.png' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - ignore_duplicate_articles = {'title', 'url'} - use_embedded_content = False - - feeds = [ - (u'Wszystkie', u'http://www.nto.pl/rss.xml'), - (u'Region', u'http://www.nto.pl/region.xml'), - (u'Brzeg', u'http://www.nto.pl/brzeg.xml'), - (u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'), - (u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'), - (u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'), - (u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'), - (u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'), - (u'Nysa', u'http://www.nto.pl/nysa.xml'), - (u'Olesno', u'http://www.nto.pl/olesno.xml'), - - (u'Opole', u'http://www.nto.pl/opole.xml'), - (u'Prudnik', u'http://www.nto.pl/prudnik.xml'), - (u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'), - (u'Sport', u'http://www.nto.pl/sport.xml'), - (u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'), - (u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'), - (u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'), - (u'Studia', u'http://www.nto.pl/akademicka.xml')] - - keep_only_tags = [dict(id='article')] - - def get_cover_url(self): - soup = self.index_to_soup( - self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') - nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] - soup = self.index_to_soup(nexturl) - self.cover_url = self.INDEX + \ - soup.find(id='cover').find(name='img')['src'] - return getattr(self, 'cover_url', self.cover_url) - - def decode_feedportal_url(self, url): - link = url.rpartition('l/0L0S')[2][:-12] - replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), - ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) - for t in replaces: - link = link.replace(*t) - return 'http://' + link - - def print_version(self, url): - return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/optyczne_pl.recipe b/recipes/optyczne_pl.recipe index 2ebdd54652..c1c690e703 100644 --- a/recipes/optyczne_pl.recipe +++ b/recipes/optyczne_pl.recipe @@ -15,19 +15,16 @@ class OptyczneRecipe(BasicNewsRecipe): remove_empty_feeds = True no_stylesheets = True oldest_article = 7 - max_articles_per_feed = 100000 + max_articles_per_feed = 100 recursions = 0 no_stylesheets = True remove_javascript = True - keep_only_tags = [] - keep_only_tags.append(dict(name='div', attrs={'class': 'news'})) + keep_only_tags = dict(name='div', attrs={'class':'main-article-content'}) - remove_tags = [] - remove_tags.append(dict(name='div', attrs={'class': 'center'})) - remove_tags.append(dict(name='div', attrs={'class': 'news_foto'})) - remove_tags.append(dict(name='div', attrs={'align': 'right'})) + remove_tags = [dict(name='div', attrs={'class':['banner','colored','content-panel']}), + dict(name='a', attrs={'class':'icon-link comments-link'})] extra_css = ''' body {font-family: Arial,Helvetica,sans-serif;} @@ -38,5 +35,5 @@ class OptyczneRecipe(BasicNewsRecipe): .fot{font-size: x-small; color: #666666;} ''' feeds = [ - ('Aktualnosci', 'http://www.optyczne.pl/rss.xml'), + (u'Aktualności', 'http://www.optyczne.pl/rss.xml'), ] diff --git a/recipes/osw.recipe b/recipes/osw.recipe index 356aa09158..9e98da4aef 100644 --- a/recipes/osw.recipe +++ b/recipes/osw.recipe @@ -27,16 +27,14 @@ class OSW_Recipe(BasicNewsRecipe): simultaneous_downloads = 5 keep_only_tags = [] - # this line should show title of the article, but it doesnt work - keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'print-submitted'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'print-content'})) + keep_only_tags.append(dict(name='h2', attrs={'class': 'node-title'})) + keep_only_tags.append(dict(name='div', attrs={'class': 'content clearfix'})) remove_tags = [] remove_tags.append(dict(name='table', attrs={'id': 'attachments'})) remove_tags.append(dict(name='div', attrs={'class': 'print-submitted'})) - feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')] + feeds = [(u'OSW', u'https://www.osw.waw.pl/pl/rss.xml')] def print_version(self, url): - return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/') + return url.replace('https://www.osw.waw.pl/pl/', 'https://www.osw.waw.pl/pl/print/') diff --git a/recipes/osworld_pl.recipe b/recipes/osworld_pl.recipe deleted file mode 100644 index 011a429f27..0000000000 --- a/recipes/osworld_pl.recipe +++ /dev/null @@ -1,36 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class OSWorld(BasicNewsRecipe): - title = u'OSWorld.pl' - __author__ = 'fenuks' - description = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!' # noqa - category = 'OS, IT, open source, Linux' - language = 'pl' - cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png' - extra_css = 'img.alignleft {float: left; margin-right: 5px;}' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - use_embedded_content = False - keep_only_tags = [dict(id=['dzial', 'posts'])] - remove_tags = [dict(attrs={'class': 'post-comments'})] - remove_tags_after = dict(attrs={'class': 'entry clr'}) - feeds = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'), - (u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')] - - def append_page(self, soup, appendtag): - tag = appendtag.find(attrs={'id': 'paginacja'}) - if tag: - for nexturl in tag.findAll('a'): - soup2 = self.index_to_soup(nexturl['href']) - pagetext = soup2.find(attrs={'class': 'entry clr'}) - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - for r in appendtag.findAll(attrs={'id': 'paginacja'}): - r.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup diff --git a/recipes/palmtop_pl.recipe b/recipes/palmtop_pl.recipe deleted file mode 100644 index 14482b38a0..0000000000 --- a/recipes/palmtop_pl.recipe +++ /dev/null @@ -1,17 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class palmtop_pl(BasicNewsRecipe): - title = u'Palmtop.pl' - __author__ = 'fenuks' - description = 'wortal technologii mobilnych' - category = 'mobile' - language = 'pl' - cover_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png' - masthead_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = True - # remove_tags_before=dict(name='h2') - feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')] diff --git a/recipes/pc_arena.recipe b/recipes/pc_arena.recipe deleted file mode 100644 index acf1743820..0000000000 --- a/recipes/pc_arena.recipe +++ /dev/null @@ -1,37 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class PC_Arena(BasicNewsRecipe): - title = u'PCArena' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = 'fenuks' - description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.' - category = 'IT' - language = 'pl' - index = 'http://pcarena.pl' - masthead_url = 'http://pcarena.pl/pcarena/img/logo.png' - cover_url = 'http://pcarena.pl/pcarena/img/logo.png' - no_stylesheets = True - remove_empty_feeds = True - feeds = [ - (u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'), - (u'Testy', u'http://pcarena.pl/testy/feeds.rss'), - (u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'), - (u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'), - (u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')] - - def print_version(self, url): - return url.replace('show', 'print') - - def image_url_processor(self, baseurl, url): - if 'http' not in url: - return 'http://pcarena.pl' + url - else: - return url - - def preprocess_html(self, soup): - for a in soup('a'): - if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa - a['href'] = self.index + a['href'] - return soup diff --git a/recipes/pc_centre_pl.recipe b/recipes/pc_centre_pl.recipe deleted file mode 100644 index dbffd5c686..0000000000 --- a/recipes/pc_centre_pl.recipe +++ /dev/null @@ -1,30 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class PC_Centre(BasicNewsRecipe): - title = u'PC Centre' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = 'fenuks' - description = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.' - category = 'IT' - language = 'pl' - masthead_url = 'http://pccentre.pl/views/images/logo.gif' - cover_url = 'http://pccentre.pl/views/images/logo.gif' - no_stylesheets = True - remove_empty_feeds = True - ignore_duplicate_articles = {'title', 'url'} - remove_tags = [dict(attrs={'class': 'logo_print'})] - feeds = [ - (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), - (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), - (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'), - (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'), - (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'), - (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'), - (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'), - (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'), - (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')] - - def print_version(self, url): - return url.replace('show', 'print') diff --git a/recipes/pc_lab.recipe b/recipes/pc_lab.recipe index 99ac822e73..0df923cd25 100644 --- a/recipes/pc_lab.recipe +++ b/recipes/pc_lab.recipe @@ -75,9 +75,7 @@ class PCLab(BasicNewsRecipe): href = link.get('href', None) if href and href.startswith('/'): link['href'] = 'http://pclab.pl' + href - # finally remove some tags - # for r in soup.findAll('div', attrs={'class':['tags', 'index', - # 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', - # 'navigation']}) + for r in soup.findAll(name='a', href=re.compile(r'^https://www.skapiec.pl/')): + r.extract() return soup diff --git a/recipes/picoboard_pl.recipe b/recipes/picoboard_pl.recipe deleted file mode 100644 index f61c029aab..0000000000 --- a/recipes/picoboard_pl.recipe +++ /dev/null @@ -1,36 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class Pikoboard(BasicNewsRecipe): - title = u'Pikoboard.pl' - __author__ = 'fenuks' - description = u'Portal poświęcony takim urządzeniom jak: Raspberry Pi, XBMC, ODROID-X, BeagleBoard czy CuBox. Systemy operacyjne, modyfikacje oraz obudowy i innego rodzaju dodatki.' # noqa - category = 'IT, open source, Linux, Raspberry Pi' - language = 'pl' - cover_url = 'http://picoboard.pl/wp-content/themes/portal/img/logo.jpg' - extra_css = 'img.alignleft {float: left; margin-right: 5px;}' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - use_embedded_content = False - keep_only_tags = [dict(id=['dzial', 'posts'])] - remove_tags = [dict(attrs={'class': 'post-comments'})] - remove_tags_after = dict(attrs={'class': 'entry clr'}) - feeds = [(u'Newsy', u'http://picoboard.pl/feed/atom/'), - (u'Artyku\u0142y', u'http://picoboard.pl/category/artykuly/feed/')] - - def append_page(self, soup, appendtag): - tag = appendtag.find(attrs={'id': 'paginacja'}) - if tag: - for nexturl in tag.findAll('a'): - soup2 = self.index_to_soup(nexturl['href']) - pagetext = soup2.find(attrs={'class': 'entry clr'}) - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - for r in appendtag.findAll(attrs={'id': 'paginacja'}): - r.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup diff --git a/recipes/polska_times.recipe b/recipes/polska_times.recipe deleted file mode 100644 index 7e0be41d14..0000000000 --- a/recipes/polska_times.recipe +++ /dev/null @@ -1,42 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class PolskaTimes(BasicNewsRecipe): - title = u'Polska Times' - __author__ = 'fenuks' - description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.' # noqa - category = 'newspaper' - language = 'pl' - masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17' - oldest_article = 7 - encoding = 'iso-8859-2' - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'}) - remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={ - 'class': 'czytajDalej'}), dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})] - feeds = [ - (u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'), - (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'), - (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'), - (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'), - (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'), - (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'), - (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')] - - def print_version(self, url): - return url.replace('artykul', 'drukuj') - - def skip_ad_pages(self, soup): - if 'Advertisement' in soup.title: - nexturl = soup.find('a')['href'] - return self.index_to_soup(nexturl, raw=True) - - def get_cover_url(self): - soup = self.index_to_soup( - 'http://www.prasa24.pl/gazeta/metropolia-warszawska/') - self.cover_url = soup.find(id='pojemnik').img['src'] - return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/polter_pl.recipe b/recipes/polter_pl.recipe index 462effecfa..740a0f7ca5 100644 --- a/recipes/polter_pl.recipe +++ b/recipes/polter_pl.recipe @@ -21,7 +21,8 @@ class Polter(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} keep_only_tags = [dict(attrs={'class': 'boxcontent'})] - remove_tags = [dict(id='komentarze')] + remove_tags = [dict(id='komentarze'), + dict(name='div',attrs={'class':'ostatnieArtykuly'})] remove_tags_after = dict(id='komentarze') feeds = [ @@ -36,8 +37,7 @@ class Polter(BasicNewsRecipe): (u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'), (u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'), (u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'), - (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'), - (u'Blogi', 'http://polter.pl/blogi,rss.html')] + (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')] def preprocess_html(self, soup): for s in soup.findAll(attrs={'style': re.compile('float: ?left')}): @@ -65,3 +65,6 @@ class Polter(BasicNewsRecipe): for r in soup.findAll(name='a', href=re.compile(r'^http://www.ceneo.pl/')): r.extract() return soup + + def preprocess_raw_html(self, raw_html, url): + return raw_html.replace('

Czytaj również

', '') diff --git a/recipes/poradnia_pwn.recipe b/recipes/poradnia_pwn.recipe deleted file mode 100644 index ce89cb73f0..0000000000 --- a/recipes/poradnia_pwn.recipe +++ /dev/null @@ -1,63 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai -from calibre.web.feeds.news import BasicNewsRecipe - - -class PoradniaPWN(BasicNewsRecipe): - title = u'Poradnia Językowa PWN' - __author__ = 'fenuks' - description = u'Internetowa poradnia językowa Wydawnictwa Naukowego PWN. Poradnię prowadzi Redaktor Naczelny Słowników Języka Polskiego, prof. Mirosław Bańko. Pomagają mu eksperci - znani polscy językoznawcy. Współpracuje z nami m.in. prof. Jerzy Bralczyk oraz dr Jan Grzenia.' # noqa - category = 'language' - language = 'pl' - oldest_article = 14 - max_articles_per_feed = 100000 - INDEX = "http://poradnia.pwn.pl/" - no_stylesheets = True - remove_attributes = ['style'] - remove_javascript = True - use_embedded_content = False - keep_only_tags = [dict(name="div", attrs={"class": "searchhi"})] - feeds = [(u'Poradnia', u'http://rss.pwn.pl/poradnia.rss')] - - '''def find_articles(self, url): - articles = [] - soup=self.index_to_soup(url) - counter = int(soup.find(name='p', attrs={'class':'count'}).findAll('b')[-1].string) - counter = 500 - pos = 0 - next = url - while next: - soup=self.index_to_soup(next) - tag=soup.find(id="listapytan") - art=tag.findAll(name='li') - for i in art: - if i.h4: - title=i.h4.a.string - url=self.INDEX+i.h4.a['href'] - #date=soup.find(id='footer').ul.li.string[41:-1] - articles.append({'title' : title, - 'url' : url, - 'date' : '', - 'description' : '' - }) - pos += 10 - if not pos >=counter: - next = 'http://poradnia.pwn.pl/lista.php?kat=18&od=' + str(pos) - print u'Tworzenie listy artykułów dla', next - else: - next = None - print articles - return articles - - def parse_index(self): - feeds = [] - feeds.append((u"Poradnia", self.find_articles('http://poradnia.pwn.pl/lista.php'))) - - return feeds''' - - def preprocess_html(self, soup): - for i in soup.findAll(name=['ul', 'li']): - i.name = "div" - for z in soup.findAll(name='a'): - if not z['href'].startswith('http'): - z['href'] = 'http://poradnia.pwn.pl/' + z['href'] - return soup diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe index 224922c464..4f9bcd30d0 100644 --- a/recipes/ppe_pl.recipe +++ b/recipes/ppe_pl.recipe @@ -29,9 +29,3 @@ class ppeRecipe(BasicNewsRecipe): ('Recenzje', 'http://ppe.pl/rss-recenzje.html'), ('Publicystyka', 'http://ppe.pl/rss-publicystyka.html'), ] - - def get_cover_url(self): - soup = self.index_to_soup('http://www.ppe.pl/psx_extreme.html') - part = soup.find(attrs={'class': 'archiwum-foto'})['style'] - part = re.search("'(.+)'", part).group(1).replace('_min', '') - return 'http://www.ppe.pl' + part diff --git a/recipes/prawica_net.recipe b/recipes/prawica_net.recipe deleted file mode 100644 index 744c8d3227..0000000000 --- a/recipes/prawica_net.recipe +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__author__ = 'teepel ' - -''' -http://prawica.net -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class prawica_recipe(BasicNewsRecipe): - title = u'prawica.net' - __author__ = 'teepel ' - language = 'pl' - description = 'Wiadomości ze strony prawica.net' - INDEX = 'http://prawica.net/' - remove_empty_feeds = True - oldest_article = 1 - max_articles_per_feed = 100 - remove_javascript = True - no_stylesheets = True - - feeds = [(u'all', u'http://prawica.net/all/feed')] - - keep_only_tags = [] - # this line should show title of the article, but it doesnt work - keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'content'})) - - remove_tags = [] - remove_tags.append(dict(name='div', attrs={ - 'class': 'field field-type-viewfield field-field-autor2'})) - remove_tags.append(dict(name='div', attrs={ - 'class': 'field field-type-viewfield field-field-publikacje-autora'})) - remove_tags.append(dict(name='div', attrs={ - 'id': 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'})) - remove_tags_after = [ - (dict(name='div', attrs={'class': 'field-label-inline-first'}))] - - def print_version(self, url): - return url.replace('http://prawica.net/', 'http://prawica.net/print/') diff --git a/recipes/presseurop.recipe b/recipes/presseurop.recipe deleted file mode 100644 index 75bde97ee9..0000000000 --- a/recipes/presseurop.recipe +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python2 - -''' -www.presseurop.eu/pl -''' - -__license__ = 'GPL v3' -__author__ = 'teepel ' - -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class presseurop(BasicNewsRecipe): - title = u'Presseurop' - description = u'Najlepsze artykuły z prasy europejskiej' - language = 'pl' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - remove_empty_feeds = True - - feeds = [ - (u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'), - (u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'), - (u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'), - (u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'), - (u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed') - ] - - preprocess_regexps = [ - (re.compile(r'\|.*', re.DOTALL | re.IGNORECASE), - lambda match: ''), - ]