From 0379b914e7d0592f503818b76901bb005642581d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 9 Feb 2011 09:06:50 -0700 Subject: [PATCH] Fix #8889 (New recipe for njuz.net website) --- resources/images/news/njuz_net.png | Bin 0 -> 914 bytes resources/recipes/njuz_net.recipe | 61 +++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 resources/images/news/njuz_net.png create mode 100644 resources/recipes/njuz_net.recipe diff --git a/resources/images/news/njuz_net.png b/resources/images/news/njuz_net.png new file mode 100644 index 0000000000000000000000000000000000000000..86f2f77e17f3a01fd3820538ad5b83b2f242551e GIT binary patch literal 914 zcmV;D18w|?P)lu%$(WNI}t7O+y<^1rs3=QW7HxFTC*NgBt%BA9>Ix zV}9}Gl@&_XOrw{-g}TX(zn=iC{;S$*QwN#H6MXwQqaM?p70e{}%WN&%cRbh;6`K};})I98aQ8Ambv1KW*7Fd^vNg|pmPyO5-M zv{LE2CGa~jK>s(}trl!PS(d>Ps5HX8XC6t{zZ%1lX$76J-<&u~n2k|o z8d_`b>JK-*K6qgC_~C=&pa1#rG1~18qA*4n1nlbXCH#2LpJI%kO;d$br5jF%E_9|@ z94AyMwIsNCc3Av4;rFpMmt(j=LI zJN5eVhr$skFTdma622#q z_gr{h4xGAXM@9&Wjd5x-;i;88PEuyWa;3C+y7l_fwo&OLt;yF;A3k*8OWUPVsi3FcJmwV({@ae= zu$IThMpl?C4pH)qCNG14B4&ep-p9wETtK<63{76w*EY}8pRSyXqWA>a9oAWj>Okf8 z2dCyP^IjmmP&j#G|9uhD3(E%OP*5ZL_Iy{X4X%@tVS>TS$?o~{=Pn&RI&p_T|GVrQ zl*<)V2ZtcYj$r>njM4U%2-)4a_hzOhjvTpfi#X2{C>ommW8>JpyMic+dS{Y|$oI{> oD?7DX|CK_q@Yv$)`bU5P0DvuQWND}bivR!s07*qoM6N<$f{#$RCIA2c literal 0 HcmV?d00001 diff --git a/resources/recipes/njuz_net.recipe b/resources/recipes/njuz_net.recipe new file mode 100644 index 0000000000..23069d7604 --- /dev/null +++ b/resources/recipes/njuz_net.recipe @@ -0,0 +1,61 @@ + +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +njuz.net +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class NjuzNet(BasicNewsRecipe): + title = 'Njuz.net' + __author__ = 'Darko Miletic' + description = 'Iscasene vesti iz Srbije, regiona i sveta' + publisher = 'njuz.net' + category = 'news, politics, humor, Serbia' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf8' + language = 'sr' + publication_type = 'newsportal' + masthead_url = 'http://www.njuz.net/njuznet.jpg' + extra_css = """ + @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} + body{font-family: serif1, serif} + .articledescription{font-family: serif1, serif} + .wp-caption-text{font-size: x-small} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [ + dict(attrs={'id':'entryMeta'}) + ,dict(attrs={'class':'post'}) + ] + + remove_tags = [ + dict(name=['embed','link','base','iframe','object','meta','fb:like']) + ,dict(name='div', attrs={'id':'tagsandcats'}) + ] + remove_tags_after= dict(name='div', attrs={'id':'tagsandcats'}) + remove_attributes= ['lang'] + feeds = [(u'Clanci', u'http://www.njuz.net/feed/')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup +