calibre/recipes/national_geographic_pl.recipe

# -*- coding: utf-8 -*-

__license__   = 'GPL v3'
__copyright__ = 'Marcin Urban 2011'

import re
from calibre.web.feeds.recipes import BasicNewsRecipe

class recipeMagic(BasicNewsRecipe):
    title                  = 'National Geographic PL'
    __author__             = 'Marcin Urban 2011'
    __modified_by__        = 'fenuks'
    description            = 'legenda wśród magazynów z historią sięgającą 120 lat'
    #cover_url      	       = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
    oldest_article         = 7
    max_articles_per_feed  = 100
    no_stylesheets         = True
    #delay                 = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'G+J Gruner+Jahr Polska'
    category               = 'news, PL,'
    language               = 'pl'
    publication_type       = 'newsportal'
    extra_css              = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
	                    h1{text-align: center;}
        	 	           h2{font-size: medium; font-weight: bold;}
        	   	         .authordate {font-size: small; color: #696969;}
        		            p.lead {font-weight: bold; text-align: center;}
        		            .fot{font-size: x-small; color: #666666;} '''
    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                         }

    remove_tags 	= [
			dict(name='div', attrs={'class':'add_inf'}),
			dict(name='div', attrs={'class':'add_f'}),
                        ]

    remove_attributes = ['width','height']
    feeds=[]

    def find_articles(self, url):
        articles = []
        soup=self.index_to_soup(url)
        tag=soup.find(attrs={'class':'arl'})
        art=tag.ul.findAll('li')
        for i in art:
            title=i.a['title']
            url=i.a['href']
            #date=soup.find(id='footer').ul.li.string[41:-1]
            desc=i.div.p.string
            articles.append({'title' : title,
                   'url'   : url,
                   'date'  : '',
                   'description' : desc
                    })
        return articles

    def parse_index(self):
         feeds = []
         feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/')))
         feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/')))

         return feeds

    def print_version(self, url):
        if 'artykuly' in url:
            return url.replace('artykuly/pokaz', 'drukuj-artykul')
        elif 'aktualnosci' in url:
            return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
        else:
            return url

    def get_cover_url(self):
        soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/')
        tag=soup.find(attrs={'class':'txt jus'})
        self.cover_url=tag.img['src']
        return getattr(self, 'cover_url', self.cover_url)