diff --git a/recipes/icons/24sata_rs.png b/recipes/icons/24sata_rs.png
deleted file mode 100644
index 4ce933ae14..0000000000
Binary files a/recipes/icons/24sata_rs.png and /dev/null differ
diff --git a/recipes/icons/akter.png b/recipes/icons/akter.png
deleted file mode 100644
index efc4fceb29..0000000000
Binary files a/recipes/icons/akter.png and /dev/null differ
diff --git a/recipes/icons/alo_novine.png b/recipes/icons/alo_novine.png
deleted file mode 100644
index e88c8675d5..0000000000
Binary files a/recipes/icons/alo_novine.png and /dev/null differ
diff --git a/recipes/icons/beta.png b/recipes/icons/beta.png
deleted file mode 100644
index adf8dd4777..0000000000
Binary files a/recipes/icons/beta.png and /dev/null differ
diff --git a/recipes/icons/beta_en.png b/recipes/icons/beta_en.png
deleted file mode 100644
index adf8dd4777..0000000000
Binary files a/recipes/icons/beta_en.png and /dev/null differ
diff --git a/recipes/icons/consumerist.png b/recipes/icons/consumerist.png
deleted file mode 100644
index fed155af42..0000000000
Binary files a/recipes/icons/consumerist.png and /dev/null differ
diff --git a/recipes/icons/e_novine.png b/recipes/icons/e_novine.png
deleted file mode 100644
index f7f7254ae9..0000000000
Binary files a/recipes/icons/e_novine.png and /dev/null differ
diff --git a/recipes/icons/eclicto.png b/recipes/icons/eclicto.png
deleted file mode 100644
index 34636f3b0e..0000000000
Binary files a/recipes/icons/eclicto.png and /dev/null differ
diff --git a/recipes/icons/elcronista.png b/recipes/icons/elcronista.png
deleted file mode 100644
index 651290b97c..0000000000
Binary files a/recipes/icons/elcronista.png and /dev/null differ
diff --git a/recipes/icons/emg_rs.png b/recipes/icons/emg_rs.png
deleted file mode 100644
index b6d3872895..0000000000
Binary files a/recipes/icons/emg_rs.png and /dev/null differ
diff --git a/recipes/icons/financial_times.png b/recipes/icons/financial_times.png
deleted file mode 100644
index 687c1551bb..0000000000
Binary files a/recipes/icons/financial_times.png and /dev/null differ
diff --git a/recipes/icons/financial_times_uk.png b/recipes/icons/financial_times_uk.png
deleted file mode 100644
index 9180c1c0b3..0000000000
Binary files a/recipes/icons/financial_times_uk.png and /dev/null differ
diff --git a/recipes/icons/financial_times_us.png b/recipes/icons/financial_times_us.png
deleted file mode 100644
index 9180c1c0b3..0000000000
Binary files a/recipes/icons/financial_times_us.png and /dev/null differ
diff --git a/recipes/icons/gawker.png b/recipes/icons/gawker.png
deleted file mode 100644
index 5ecff566d3..0000000000
Binary files a/recipes/icons/gawker.png and /dev/null differ
diff --git a/recipes/icons/glas_srpske.png b/recipes/icons/glas_srpske.png
deleted file mode 100644
index 3f57630833..0000000000
Binary files a/recipes/icons/glas_srpske.png and /dev/null differ
diff --git a/recipes/icons/glasjavnosti.png b/recipes/icons/glasjavnosti.png
deleted file mode 100644
index ea4cf0d97a..0000000000
Binary files a/recipes/icons/glasjavnosti.png and /dev/null differ
diff --git a/recipes/icons/ieco.png b/recipes/icons/ieco.png
deleted file mode 100644
index 7b1ba32786..0000000000
Binary files a/recipes/icons/ieco.png and /dev/null differ
diff --git a/recipes/icons/krstarica.png b/recipes/icons/krstarica.png
deleted file mode 100644
index 2ece457161..0000000000
Binary files a/recipes/icons/krstarica.png and /dev/null differ
diff --git a/recipes/icons/lacapital.png b/recipes/icons/lacapital.png
deleted file mode 100644
index fff6e12ee7..0000000000
Binary files a/recipes/icons/lacapital.png and /dev/null differ
diff --git a/recipes/icons/libartes.png b/recipes/icons/libartes.png
deleted file mode 100644
index b479bf7732..0000000000
Binary files a/recipes/icons/libartes.png and /dev/null differ
diff --git a/recipes/icons/linux_journal.png b/recipes/icons/linux_journal.png
deleted file mode 100644
index e7f30f8900..0000000000
Binary files a/recipes/icons/linux_journal.png and /dev/null differ
diff --git a/recipes/icons/monitor.png b/recipes/icons/monitor.png
deleted file mode 100644
index 76b3255038..0000000000
Binary files a/recipes/icons/monitor.png and /dev/null differ
diff --git a/recipes/icons/novistandard.png b/recipes/icons/novistandard.png
deleted file mode 100644
index 84c184ca2e..0000000000
Binary files a/recipes/icons/novistandard.png and /dev/null differ
diff --git a/recipes/icons/nowy_ekran.png b/recipes/icons/nowy_ekran.png
deleted file mode 100644
index f772abc67a..0000000000
Binary files a/recipes/icons/nowy_ekran.png and /dev/null differ
diff --git a/recipes/icons/nto.png b/recipes/icons/nto.png
deleted file mode 100644
index eb725918ae..0000000000
Binary files a/recipes/icons/nto.png and /dev/null differ
diff --git a/recipes/icons/osworld_pl.png b/recipes/icons/osworld_pl.png
deleted file mode 100644
index 5ec7966931..0000000000
Binary files a/recipes/icons/osworld_pl.png and /dev/null differ
diff --git a/recipes/icons/palmtop_pl.png b/recipes/icons/palmtop_pl.png
deleted file mode 100644
index 344304dd41..0000000000
Binary files a/recipes/icons/palmtop_pl.png and /dev/null differ
diff --git a/recipes/icons/pc_arena.png b/recipes/icons/pc_arena.png
deleted file mode 100644
index 87d9b5e0c3..0000000000
Binary files a/recipes/icons/pc_arena.png and /dev/null differ
diff --git a/recipes/icons/pc_centre_pl.png b/recipes/icons/pc_centre_pl.png
deleted file mode 100644
index 030cef3968..0000000000
Binary files a/recipes/icons/pc_centre_pl.png and /dev/null differ
diff --git a/recipes/icons/picoboard_pl.png b/recipes/icons/picoboard_pl.png
deleted file mode 100644
index 003631e908..0000000000
Binary files a/recipes/icons/picoboard_pl.png and /dev/null differ
diff --git a/recipes/icons/polska_times.png b/recipes/icons/polska_times.png
deleted file mode 100644
index 91c0bec1ad..0000000000
Binary files a/recipes/icons/polska_times.png and /dev/null differ
diff --git a/recipes/icons/poradnia_pwn.png b/recipes/icons/poradnia_pwn.png
deleted file mode 100644
index 22ed7364b8..0000000000
Binary files a/recipes/icons/poradnia_pwn.png and /dev/null differ
diff --git a/recipes/icons/pravda_en.png b/recipes/icons/pravda_en.png
deleted file mode 100644
index cc7aa958cf..0000000000
Binary files a/recipes/icons/pravda_en.png and /dev/null differ
diff --git a/recipes/icons/prawica_net.png b/recipes/icons/prawica_net.png
deleted file mode 100644
index f6bc81d98e..0000000000
Binary files a/recipes/icons/prawica_net.png and /dev/null differ
diff --git a/recipes/icons/presseurop.png b/recipes/icons/presseurop.png
deleted file mode 100644
index 3e7d961878..0000000000
Binary files a/recipes/icons/presseurop.png and /dev/null differ
diff --git a/recipes/icons/rionegro.png b/recipes/icons/rionegro.png
deleted file mode 100644
index 990b9643c7..0000000000
Binary files a/recipes/icons/rionegro.png and /dev/null differ
diff --git a/recipes/icons/rstones.png b/recipes/icons/rstones.png
deleted file mode 100644
index f7ec38c97b..0000000000
Binary files a/recipes/icons/rstones.png and /dev/null differ
diff --git a/recipes/icons/tanjug.png b/recipes/icons/tanjug.png
deleted file mode 100644
index ff12c216cb..0000000000
Binary files a/recipes/icons/tanjug.png and /dev/null differ
diff --git a/recipes/icons/the_nation_thai.png b/recipes/icons/the_nation_thai.png
deleted file mode 100644
index b69b270c2c..0000000000
Binary files a/recipes/icons/the_nation_thai.png and /dev/null differ
diff --git a/recipes/icons/tvp_info.png b/recipes/icons/tvp_info.png
deleted file mode 100644
index 5cbf0322b7..0000000000
Binary files a/recipes/icons/tvp_info.png and /dev/null differ
diff --git a/recipes/nowy_ekran.recipe b/recipes/nowy_ekran.recipe
deleted file mode 100644
index 59b7b80f67..0000000000
--- a/recipes/nowy_ekran.recipe
+++ /dev/null
@@ -1,19 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class NowyEkran(BasicNewsRecipe):
- title = u'Nowy ekran'
- oldest_article = 7
- max_articles_per_feed = 100
- no_stylesheets = True
- __author__ = 'fenuks'
- description = u'Niezależny serwis społeczności blogerów'
- category = 'blog'
- language = 'pl'
- masthead_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif'
- cover_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif'
- remove_tags_before = dict(name='div', attrs={'class': 'post_detal'})
- remove_tags_after = dict(name='div', attrs={'class': 'post_footer'})
- remove_tags = [dict(name='span', attrs={'class': 'ico ico_comments'}), dict(
- name='div', attrs={'class': 'post_footer'}), dict(name='a', attrs={'class': 'getpdf'})]
- feeds = [(u'Najnowsze notki', u'http://www.nowyekran.pl/RSS/')]
diff --git a/recipes/nto.recipe b/recipes/nto.recipe
deleted file mode 100644
index df7f70b459..0000000000
--- a/recipes/nto.recipe
+++ /dev/null
@@ -1,62 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class NTO(BasicNewsRecipe):
- title = u'Nowa Trybuna Opolska'
- __author__ = 'fenuks'
- description = u'Nowa Trybuna Opolska - portal regionalny województwa opolskiego.'
- category = 'newspaper'
- language = 'pl'
- encoding = 'iso-8859-2'
- extra_css = 'ul {list-style: none; padding:0; margin:0;}'
- INDEX = 'http://www.nto.pl'
- masthead_url = INDEX + '/images/top_logo.png'
- oldest_article = 7
- max_articles_per_feed = 100
- remove_empty_feeds = True
- no_stylesheets = True
- ignore_duplicate_articles = {'title', 'url'}
- use_embedded_content = False
-
- feeds = [
- (u'Wszystkie', u'http://www.nto.pl/rss.xml'),
- (u'Region', u'http://www.nto.pl/region.xml'),
- (u'Brzeg', u'http://www.nto.pl/brzeg.xml'),
- (u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'),
- (u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'),
- (u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'),
- (u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'),
- (u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'),
- (u'Nysa', u'http://www.nto.pl/nysa.xml'),
- (u'Olesno', u'http://www.nto.pl/olesno.xml'),
-
- (u'Opole', u'http://www.nto.pl/opole.xml'),
- (u'Prudnik', u'http://www.nto.pl/prudnik.xml'),
- (u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'),
- (u'Sport', u'http://www.nto.pl/sport.xml'),
- (u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'),
- (u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'),
- (u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'),
- (u'Studia', u'http://www.nto.pl/akademicka.xml')]
-
- keep_only_tags = [dict(id='article')]
-
- def get_cover_url(self):
- soup = self.index_to_soup(
- self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
- nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
- soup = self.index_to_soup(nexturl)
- self.cover_url = self.INDEX + \
- soup.find(id='cover').find(name='img')['src']
- return getattr(self, 'cover_url', self.cover_url)
-
- def decode_feedportal_url(self, url):
- link = url.rpartition('l/0L0S')[2][:-12]
- replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
- ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
- for t in replaces:
- link = link.replace(*t)
- return 'http://' + link
-
- def print_version(self, url):
- return self.decode_feedportal_url(url) + '&Template=printpicart'
diff --git a/recipes/optyczne_pl.recipe b/recipes/optyczne_pl.recipe
index 2ebdd54652..c1c690e703 100644
--- a/recipes/optyczne_pl.recipe
+++ b/recipes/optyczne_pl.recipe
@@ -15,19 +15,16 @@ class OptyczneRecipe(BasicNewsRecipe):
remove_empty_feeds = True
no_stylesheets = True
oldest_article = 7
- max_articles_per_feed = 100000
+ max_articles_per_feed = 100
recursions = 0
no_stylesheets = True
remove_javascript = True
- keep_only_tags = []
- keep_only_tags.append(dict(name='div', attrs={'class': 'news'}))
+ keep_only_tags = dict(name='div', attrs={'class':'main-article-content'})
- remove_tags = []
- remove_tags.append(dict(name='div', attrs={'class': 'center'}))
- remove_tags.append(dict(name='div', attrs={'class': 'news_foto'}))
- remove_tags.append(dict(name='div', attrs={'align': 'right'}))
+ remove_tags = [dict(name='div', attrs={'class':['banner','colored','content-panel']}),
+ dict(name='a', attrs={'class':'icon-link comments-link'})]
extra_css = '''
body {font-family: Arial,Helvetica,sans-serif;}
@@ -38,5 +35,5 @@ class OptyczneRecipe(BasicNewsRecipe):
.fot{font-size: x-small; color: #666666;}
'''
feeds = [
- ('Aktualnosci', 'http://www.optyczne.pl/rss.xml'),
+ (u'Aktualności', 'http://www.optyczne.pl/rss.xml'),
]
diff --git a/recipes/osw.recipe b/recipes/osw.recipe
index 356aa09158..9e98da4aef 100644
--- a/recipes/osw.recipe
+++ b/recipes/osw.recipe
@@ -27,16 +27,14 @@ class OSW_Recipe(BasicNewsRecipe):
simultaneous_downloads = 5
keep_only_tags = []
- # this line should show title of the article, but it doesnt work
- keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'}))
- keep_only_tags.append(dict(name='div', attrs={'class': 'print-submitted'}))
- keep_only_tags.append(dict(name='div', attrs={'class': 'print-content'}))
+ keep_only_tags.append(dict(name='h2', attrs={'class': 'node-title'}))
+ keep_only_tags.append(dict(name='div', attrs={'class': 'content clearfix'}))
remove_tags = []
remove_tags.append(dict(name='table', attrs={'id': 'attachments'}))
remove_tags.append(dict(name='div', attrs={'class': 'print-submitted'}))
- feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')]
+ feeds = [(u'OSW', u'https://www.osw.waw.pl/pl/rss.xml')]
def print_version(self, url):
- return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/')
+ return url.replace('https://www.osw.waw.pl/pl/', 'https://www.osw.waw.pl/pl/print/')
diff --git a/recipes/osworld_pl.recipe b/recipes/osworld_pl.recipe
deleted file mode 100644
index 011a429f27..0000000000
--- a/recipes/osworld_pl.recipe
+++ /dev/null
@@ -1,36 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class OSWorld(BasicNewsRecipe):
- title = u'OSWorld.pl'
- __author__ = 'fenuks'
- description = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!' # noqa
- category = 'OS, IT, open source, Linux'
- language = 'pl'
- cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
- extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
- oldest_article = 7
- max_articles_per_feed = 100
- no_stylesheets = True
- remove_empty_feeds = True
- use_embedded_content = False
- keep_only_tags = [dict(id=['dzial', 'posts'])]
- remove_tags = [dict(attrs={'class': 'post-comments'})]
- remove_tags_after = dict(attrs={'class': 'entry clr'})
- feeds = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'),
- (u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')]
-
- def append_page(self, soup, appendtag):
- tag = appendtag.find(attrs={'id': 'paginacja'})
- if tag:
- for nexturl in tag.findAll('a'):
- soup2 = self.index_to_soup(nexturl['href'])
- pagetext = soup2.find(attrs={'class': 'entry clr'})
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- for r in appendtag.findAll(attrs={'id': 'paginacja'}):
- r.extract()
-
- def preprocess_html(self, soup):
- self.append_page(soup, soup.body)
- return soup
diff --git a/recipes/palmtop_pl.recipe b/recipes/palmtop_pl.recipe
deleted file mode 100644
index 14482b38a0..0000000000
--- a/recipes/palmtop_pl.recipe
+++ /dev/null
@@ -1,17 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class palmtop_pl(BasicNewsRecipe):
- title = u'Palmtop.pl'
- __author__ = 'fenuks'
- description = 'wortal technologii mobilnych'
- category = 'mobile'
- language = 'pl'
- cover_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
- masthead_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
- oldest_article = 7
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = True
- # remove_tags_before=dict(name='h2')
- feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
diff --git a/recipes/pc_arena.recipe b/recipes/pc_arena.recipe
deleted file mode 100644
index acf1743820..0000000000
--- a/recipes/pc_arena.recipe
+++ /dev/null
@@ -1,37 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class PC_Arena(BasicNewsRecipe):
- title = u'PCArena'
- oldest_article = 7
- max_articles_per_feed = 100
- __author__ = 'fenuks'
- description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
- category = 'IT'
- language = 'pl'
- index = 'http://pcarena.pl'
- masthead_url = 'http://pcarena.pl/pcarena/img/logo.png'
- cover_url = 'http://pcarena.pl/pcarena/img/logo.png'
- no_stylesheets = True
- remove_empty_feeds = True
- feeds = [
- (u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'),
- (u'Testy', u'http://pcarena.pl/testy/feeds.rss'),
- (u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'),
- (u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'),
- (u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')]
-
- def print_version(self, url):
- return url.replace('show', 'print')
-
- def image_url_processor(self, baseurl, url):
- if 'http' not in url:
- return 'http://pcarena.pl' + url
- else:
- return url
-
- def preprocess_html(self, soup):
- for a in soup('a'):
- if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
- a['href'] = self.index + a['href']
- return soup
diff --git a/recipes/pc_centre_pl.recipe b/recipes/pc_centre_pl.recipe
deleted file mode 100644
index dbffd5c686..0000000000
--- a/recipes/pc_centre_pl.recipe
+++ /dev/null
@@ -1,30 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class PC_Centre(BasicNewsRecipe):
- title = u'PC Centre'
- oldest_article = 7
- max_articles_per_feed = 100
- __author__ = 'fenuks'
- description = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
- category = 'IT'
- language = 'pl'
- masthead_url = 'http://pccentre.pl/views/images/logo.gif'
- cover_url = 'http://pccentre.pl/views/images/logo.gif'
- no_stylesheets = True
- remove_empty_feeds = True
- ignore_duplicate_articles = {'title', 'url'}
- remove_tags = [dict(attrs={'class': 'logo_print'})]
- feeds = [
- (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'),
- (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'),
- (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'),
- (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'),
- (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'),
- (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'),
- (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'),
- (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'),
- (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
-
- def print_version(self, url):
- return url.replace('show', 'print')
diff --git a/recipes/pc_lab.recipe b/recipes/pc_lab.recipe
index 99ac822e73..0df923cd25 100644
--- a/recipes/pc_lab.recipe
+++ b/recipes/pc_lab.recipe
@@ -75,9 +75,7 @@ class PCLab(BasicNewsRecipe):
href = link.get('href', None)
if href and href.startswith('/'):
link['href'] = 'http://pclab.pl' + href
- # finally remove some tags
- # for r in soup.findAll('div', attrs={'class':['tags', 'index',
- # 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi',
- # 'navigation']})
+ for r in soup.findAll(name='a', href=re.compile(r'^https://www.skapiec.pl/')):
+ r.extract()
return soup
diff --git a/recipes/picoboard_pl.recipe b/recipes/picoboard_pl.recipe
deleted file mode 100644
index f61c029aab..0000000000
--- a/recipes/picoboard_pl.recipe
+++ /dev/null
@@ -1,36 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Pikoboard(BasicNewsRecipe):
- title = u'Pikoboard.pl'
- __author__ = 'fenuks'
- description = u'Portal poświęcony takim urządzeniom jak: Raspberry Pi, XBMC, ODROID-X, BeagleBoard czy CuBox. Systemy operacyjne, modyfikacje oraz obudowy i innego rodzaju dodatki.' # noqa
- category = 'IT, open source, Linux, Raspberry Pi'
- language = 'pl'
- cover_url = 'http://picoboard.pl/wp-content/themes/portal/img/logo.jpg'
- extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
- oldest_article = 7
- max_articles_per_feed = 100
- no_stylesheets = True
- remove_empty_feeds = True
- use_embedded_content = False
- keep_only_tags = [dict(id=['dzial', 'posts'])]
- remove_tags = [dict(attrs={'class': 'post-comments'})]
- remove_tags_after = dict(attrs={'class': 'entry clr'})
- feeds = [(u'Newsy', u'http://picoboard.pl/feed/atom/'),
- (u'Artyku\u0142y', u'http://picoboard.pl/category/artykuly/feed/')]
-
- def append_page(self, soup, appendtag):
- tag = appendtag.find(attrs={'id': 'paginacja'})
- if tag:
- for nexturl in tag.findAll('a'):
- soup2 = self.index_to_soup(nexturl['href'])
- pagetext = soup2.find(attrs={'class': 'entry clr'})
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- for r in appendtag.findAll(attrs={'id': 'paginacja'}):
- r.extract()
-
- def preprocess_html(self, soup):
- self.append_page(soup, soup.body)
- return soup
diff --git a/recipes/polska_times.recipe b/recipes/polska_times.recipe
deleted file mode 100644
index 7e0be41d14..0000000000
--- a/recipes/polska_times.recipe
+++ /dev/null
@@ -1,42 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class PolskaTimes(BasicNewsRecipe):
- title = u'Polska Times'
- __author__ = 'fenuks'
- description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.' # noqa
- category = 'newspaper'
- language = 'pl'
- masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
- oldest_article = 7
- encoding = 'iso-8859-2'
- max_articles_per_feed = 100
- remove_empty_feeds = True
- no_stylesheets = True
- use_embedded_content = False
- ignore_duplicate_articles = {'title', 'url'}
- remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})
- remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={
- 'class': 'czytajDalej'}), dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})]
- feeds = [
- (u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'),
- (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'),
- (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'),
- (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'),
- (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'),
- (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'),
- (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
-
- def print_version(self, url):
- return url.replace('artykul', 'drukuj')
-
- def skip_ad_pages(self, soup):
- if 'Advertisement' in soup.title:
- nexturl = soup.find('a')['href']
- return self.index_to_soup(nexturl, raw=True)
-
- def get_cover_url(self):
- soup = self.index_to_soup(
- 'http://www.prasa24.pl/gazeta/metropolia-warszawska/')
- self.cover_url = soup.find(id='pojemnik').img['src']
- return getattr(self, 'cover_url', self.cover_url)
diff --git a/recipes/polter_pl.recipe b/recipes/polter_pl.recipe
index 462effecfa..740a0f7ca5 100644
--- a/recipes/polter_pl.recipe
+++ b/recipes/polter_pl.recipe
@@ -21,7 +21,8 @@ class Polter(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(attrs={'class': 'boxcontent'})]
- remove_tags = [dict(id='komentarze')]
+ remove_tags = [dict(id='komentarze'),
+ dict(name='div',attrs={'class':'ostatnieArtykuly'})]
remove_tags_after = dict(id='komentarze')
feeds = [
@@ -36,8 +37,7 @@ class Polter(BasicNewsRecipe):
(u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'),
(u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'),
(u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'),
- (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'),
- (u'Blogi', 'http://polter.pl/blogi,rss.html')]
+ (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')]
def preprocess_html(self, soup):
for s in soup.findAll(attrs={'style': re.compile('float: ?left')}):
@@ -65,3 +65,6 @@ class Polter(BasicNewsRecipe):
for r in soup.findAll(name='a', href=re.compile(r'^http://www.ceneo.pl/')):
r.extract()
return soup
+
+ def preprocess_raw_html(self, raw_html, url):
+ return raw_html.replace('