diff --git a/resources/images/news/kopalniawiedzy.png b/resources/images/news/kopalniawiedzy.png new file mode 100644 index 0000000000..73a4a338aa Binary files /dev/null and b/resources/images/news/kopalniawiedzy.png differ diff --git a/resources/images/news/korespondent.png b/resources/images/news/korespondent.png new file mode 100644 index 0000000000..e2724b11b4 Binary files /dev/null and b/resources/images/news/korespondent.png differ diff --git a/resources/recipes/kopalniawiedzy.recipe b/resources/recipes/kopalniawiedzy.recipe new file mode 100644 index 0000000000..79aa913498 --- /dev/null +++ b/resources/recipes/kopalniawiedzy.recipe @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +__license__ = 'GPL v3' +__copyright__ = '2011, Attis ' +__version__ = 'v. 0.1' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class KopalniaWiedzy(BasicNewsRecipe): + title = u'Kopalnia Wiedzy' + publisher = u'Kopalnia Wiedzy' + description = u'Ciekawostki ze świata nauki i techniki' + encoding = 'utf-8' + __author__ = 'Attis' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + INDEX = u'http://kopalniawiedzy.pl/' + remove_javascript = True + no_stylesheets = True + + remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }] + remove_tags_after = dict(attrs={'class':'ad-square'}) + keep_only_tags = [dict(name="div", attrs={'id':'articleContent'})] + extra_css = '.topimage {margin-top: 30px}' + + preprocess_regexps = [ + (re.compile(u''), + lambda match: '' ), + (re.compile(u'

'), + lambda match: '') + ] + + feeds = [ + (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), + (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), + (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'), + (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'), + (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), + (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') + ] + + def is_link_wanted(self, url, tag): + return tag['class'] == 'next' + + def remove_beyond(self, tag, next): + while tag is not None and getattr(tag, 'name', None) != 'body': + after = getattr(tag, next) + while after is not None: + ns = getattr(tag, next) + after.extract() + after = ns + tag = tag.parent + + def append_page(self, soup, appendtag, position): + pager = soup.find('a',attrs={'class':'next'}) + if pager: + nexturl = self.INDEX + pager['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'id':'articleContent'}) + + tag = texttag.find(attrs={'class':'pages'}) + self.remove_beyond(tag, 'nextSibling') + + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + + appendtag.insert(position,texttag) + + + def preprocess_html(self, soup): + self.append_page(soup, soup.body, 3) + + for item in soup.findAll('div',attrs={'class':'pages'}): + item.extract() + + for item in soup.findAll('p', attrs={'class':'wykop'}): + item.extract() + + return soup diff --git a/resources/recipes/korespondent.recipe b/resources/recipes/korespondent.recipe new file mode 100644 index 0000000000..aa9cf6e828 --- /dev/null +++ b/resources/recipes/korespondent.recipe @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2011, Attis ' +__version__ = 'v. 0.1' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class KorespondentPL(BasicNewsRecipe): + title = u'Korespondent.pl' + publisher = u'Korespondent.pl' + description = u'Centrum wolnorynkowe - serwis ludzi wolnych' + encoding = 'utf-8' + __author__ = 'Attis' + language = 'pl' + oldest_article = 15 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [dict(name='div', attrs={'class':'publicystyka'})] + remove_tags = [{'name': 'meta'}, {'name':'div', 'attrs': {'class': 'zdjecie'} }] + extra_css = '.naglowek {font-size: small}\n .tytul {font-size: x-large; padding-bottom: 10px; padding-top: 30px} \n .external {font-size: small}' + + preprocess_regexps = [ + (re.compile(u'' ), + (re.compile(u'

Więcej'), + lambda match:'Więcej' ), + (re.compile(u'target="_blank"'), + lambda match:'target="_blank" class="external"' ), + (re.compile(u'

\nPoczytaj inne teksty w Serwisie wolnorynkowym Korespondent.pl.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + + feeds = [(u'Serwis informacyjny', u'http://korespondent.pl/rss.xml')] +