mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 02:27:01 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			103 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			103 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env  python
 | |
| 
 | |
| __license__   = 'GPL v3'
 | |
| __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | |
| 
 | |
| '''
 | |
| pobjeda.co.me
 | |
| '''
 | |
| 
 | |
| import re
 | |
| from calibre import strftime
 | |
| from calibre.web.feeds.news import BasicNewsRecipe
 | |
| from calibre.ebooks.BeautifulSoup import Tag
 | |
| 
 | |
| class Pobjeda(BasicNewsRecipe):
 | |
|     title                 = 'Pobjeda Online'
 | |
|     __author__            = 'Darko Miletic'
 | |
|     description           = 'News from Montenegro'
 | |
|     publisher             = 'Pobjeda a.d.'
 | |
|     category              = 'news, politics, Montenegro'
 | |
|     no_stylesheets        = True
 | |
|     encoding              = 'utf-8'
 | |
|     use_embedded_content  = False
 | |
|     language = 'sr'
 | |
| 
 | |
|     lang                  = 'sr-Latn-Me'
 | |
|     INDEX                 = u'http://www.pobjeda.co.me'
 | |
|     extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
 | |
| 
 | |
|     conversion_options = {
 | |
|                           'comment'          : description
 | |
|                         , 'tags'             : category
 | |
|                         , 'publisher'        : publisher
 | |
|                         , 'language'         : lang
 | |
|                         , 'pretty_print'     : True
 | |
|                         }
 | |
| 
 | |
| 
 | |
|     preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 | |
| 
 | |
|     keep_only_tags = [dict(name='div', attrs={'class':'vijest'})]
 | |
| 
 | |
|     remove_tags = [dict(name=['object','link'])]
 | |
| 
 | |
|     feeds = [
 | |
|                (u'Politika'          , u'http://www.pobjeda.co.me/rubrika.php?rubrika=1'  )
 | |
|               ,(u'Ekonomija'         , u'http://www.pobjeda.co.me/rubrika.php?rubrika=2'  )
 | |
|               ,(u'Drustvo'           , u'http://www.pobjeda.co.me/rubrika.php?rubrika=3'  )
 | |
|               ,(u'Crna Hronika'      , u'http://www.pobjeda.co.me/rubrika.php?rubrika=4'  )
 | |
|               ,(u'Kultura'           , u'http://www.pobjeda.co.me/rubrika.php?rubrika=5'  )
 | |
|               ,(u'Hronika Podgorice' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=7'  )
 | |
|               ,(u'Feljton'           , u'http://www.pobjeda.co.me/rubrika.php?rubrika=8'  )
 | |
|               ,(u'Crna Gora'         , u'http://www.pobjeda.co.me/rubrika.php?rubrika=9'  )
 | |
|               ,(u'Svijet'            , u'http://www.pobjeda.co.me/rubrika.php?rubrika=202')
 | |
|               ,(u'Ekonomija i Biznis', u'http://www.pobjeda.co.me/dodatak.php?rubrika=11' )
 | |
|               ,(u'Djeciji Svijet'    , u'http://www.pobjeda.co.me/dodatak.php?rubrika=12' )
 | |
|               ,(u'Kultura i Drustvo' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=13' )
 | |
|               ,(u'Agora'             , u'http://www.pobjeda.co.me/dodatak.php?rubrika=133')
 | |
|               ,(u'Ekologija'         , u'http://www.pobjeda.co.me/dodatak.php?rubrika=252')
 | |
|             ]
 | |
| 
 | |
|     def preprocess_html(self, soup):
 | |
|         soup.html['xml:lang'] = self.lang
 | |
|         soup.html['lang']     = self.lang
 | |
|         mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
 | |
|         mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
 | |
|         soup.head.insert(0,mlang)
 | |
|         soup.head.insert(1,mcharset)
 | |
|         return self.adeify_images(soup)
 | |
| 
 | |
|     def get_cover_url(self):
 | |
|         cover_url = None
 | |
|         soup = self.index_to_soup(self.INDEX)
 | |
|         cover_item = soup.find('img',attrs={'alt':'Naslovna strana'})
 | |
|         if cover_item:
 | |
|            cover_url = self.INDEX + cover_item.parent['href']
 | |
|         return cover_url
 | |
| 
 | |
|     def parse_index(self):
 | |
|         totalfeeds = []
 | |
|         lfeeds = self.get_feeds()
 | |
|         for feedobj in lfeeds:
 | |
|             feedtitle, feedurl = feedobj
 | |
|             self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
 | |
|             articles = []
 | |
|             soup = self.index_to_soup(feedurl)
 | |
|             for item in soup.findAll('div', attrs={'class':'vijest'}):
 | |
|                 description = self.tag_to_string(item.h2)
 | |
|                 atag = item.h1.find('a')
 | |
|                 if atag and atag.has_key('href'):
 | |
|                     url         = self.INDEX + '/' + atag['href']
 | |
|                     title       = self.tag_to_string(atag)
 | |
|                     date        = strftime(self.timefmt)
 | |
|                     articles.append({
 | |
|                                       'title'      :title
 | |
|                                      ,'date'       :date
 | |
|                                      ,'url'        :url
 | |
|                                      ,'description':description
 | |
|                                     })
 | |
|             totalfeeds.append((feedtitle, articles))
 | |
|         return totalfeeds
 | |
| 
 |