From 376aac607dd4325d31b2de8c5107e837f6c71718 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 22 Feb 2009 06:17:24 -0800 Subject: [PATCH] New recipe for Borba by Darko Miletic --- src/calibre/gui2/images/news/borba.png | Bin 0 -> 365 bytes src/calibre/web/feeds/recipes/__init__.py | 2 +- src/calibre/web/feeds/recipes/recipe_borba.py | 92 ++++++++++++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 src/calibre/gui2/images/news/borba.png create mode 100644 src/calibre/web/feeds/recipes/recipe_borba.py diff --git a/src/calibre/gui2/images/news/borba.png b/src/calibre/gui2/images/news/borba.png new file mode 100644 index 0000000000000000000000000000000000000000..41766d6264292c69e57d0ef533616dffc2e0f38f GIT binary patch literal 365 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPFP2=EDUH3oq+<1`SMITJ(xDKI!Q z^9&gLhXSAuAW+EXY6DV?N#5=*{1UquZUQ-+1s;*bK-vS0-A-oPfdtD69Mgd`*jk2| zV`~osIq{w@jv*Y^lP#Fpiq9D|ybQi7aBS8UV~1m%pB5i|qi{^~@TCSFfyZZ@+zv_^ zJQitYXxVtm;U`Z=;if+gN-j^%1gdI07MRRCMR5UhqR6T)X9-ruX9CAQZZZ`(HuI?i z8^er4!Qqc)1d0KzQY~?fC`m3#O)N>(O#u=NMg|6!x&}tNMn)lqCRT>VR>lU}1_o9J u2A@7{?}ll}%}>cpt3=gcXc}T*Vg*D-77z_i7MvA84Gf;HelF{r5}E)+hj@bk literal 0 HcmV?d00001 diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 579291a83b..8277338e18 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -31,7 +31,7 @@ recipe_modules = ['recipe_' + r for r in ( 'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices', 'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline', 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', - 'al_jazeera', 'winsupersite', + 'al_jazeera', 'winsupersite', 'borba', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_borba.py b/src/calibre/web/feeds/recipes/recipe_borba.py new file mode 100644 index 0000000000..a7d8d9f0a4 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_borba.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +borba.rs +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Borba(BasicNewsRecipe): + title = 'Borba Online' + __author__ = 'Darko Miletic' + description = 'Dnevne novine Borba Online' + publisher = 'IP Novine Borba' + category = 'news, politics, Serbia' + language = _('Serbian') + oldest_article = 1 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf8' + remove_javascript = True + use_embedded_content = False + cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg' + INDEX = u'http://www.borba.rs/' + extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + , '--ignore-tables' + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [dict(name='div', attrs={'class':'main'})] + + remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'}) + + remove_tags = [ + dict(name=['object','link','iframe','base','img']) + ,dict(name='div',attrs={'id':'written_comments_title'}) + ] + + feeds = [ + (u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/') + ,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' ) + ,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' ) + ,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' ) + ,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' ) + ,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' ) + ,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' ) + ,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' ) + ,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/') + ] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = 'sr-Latn-ME' + soup.html['lang'] = 'sr-Latn-ME' + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll(font=True): + del item['font'] + return soup + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('a', attrs={'class':'contentpagetitle'}): + url = item['href'] + title = self.tag_to_string(item) + articles.append({ + 'title' :title + ,'date' :'' + ,'url' :url + ,'description':'' + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds +