fix telepolis_pl and improve swiatkindle

2025-07-09 03:04:10 -04:00 · 2013-03-09 10:17:39 +01:00 · 2013-03-09 10:17:39 +01:00 · 41b9342a2a
commit 41b9342a2a
parent a98c808a47
2 changed files with 24 additions and 3 deletions
--- a/recipes/swiatkindle.recipe
+++ b/recipes/swiatkindle.recipe
@ -19,6 +19,7 @@ class swiatczytnikow(BasicNewsRecipe):
    feeds       = [(u'Świat Czytników - wpisy', u'http://swiatczytnikow.pl/feed')]
-    remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})]
+    remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'}),
        dict(name = 'div', attrs = {'class' : 'feedflare'})]
    preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]
--- a/recipes/telepolis_pl.recipe
+++ b/recipes/telepolis_pl.recipe
@ -16,11 +16,31 @@ class telepolis(BasicNewsRecipe):
    use_embedded_content = False
    feeds = [
-        (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#,
+        (u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html')
        #(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
    ]
    keep_only_tags = [
        dict(name='div', attrs={'class':'flol w510'}),
        dict(name='div', attrs={'class':'main_tresc'}),
        dict(name='div', attrs={'class':'main_tresc_news'})
    ]
    def append_page(self, soup, appendtag):
        chpage= appendtag.find(attrs={'class':'str'})
        if chpage:
            for page in chpage.findAll('a'):
                if page.renderContents() == 'Następna &rsaquo;':
                    break
                soup2 = self.index_to_soup(page['href'])
                pagetext = soup2.find(attrs={'class':'main_tresc'})
                pos = len(appendtag.contents)
                appendtag.insert(pos, pagetext)
            for r in appendtag.findAll(attrs={'class':'str'}):
                r.extract()
    def preprocess_html(self, soup):
        self.append_page(soup, soup.body)
        for image in soup.findAll('img'):
            if 'm.jpg' in image['src']:
                image['src'] = image['src'].replace('m.jpg', '.jpg')
        return soup