From 14891bdd52d08a298a0557c312176f91b7a1e0a5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 10:25:00 -0700 Subject: [PATCH] New recipe for Wikinews by Darko Miletic --- src/calibre/gui2/images/news/wikinews_en.png | Bin 0 -> 951 bytes src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_wikinews_en.py | 70 ++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 src/calibre/gui2/images/news/wikinews_en.png create mode 100644 src/calibre/web/feeds/recipes/recipe_wikinews_en.py diff --git a/src/calibre/gui2/images/news/wikinews_en.png b/src/calibre/gui2/images/news/wikinews_en.png new file mode 100644 index 0000000000000000000000000000000000000000..489061b923e207b18d5dfee4e08ab715d198d7fc GIT binary patch literal 951 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>V-x;TbdoX(wmzDp)l;%NQ%YkNbl{$G_o z!))?PHNOHY-;yJ)i(VX4eBk9+u=w4XQ|u>%c|Nn`u{sLOcj#$S;AmRS={9k}OG)-Q z8P;lwGyjE`u70<+I+jJWH@))5Im?0{eLH^K)BjVFJNe>035J=fG4<8DXM7fHF%|IG zqU6nbxGwsG{GV=%IcJ>XvXquy&{|Y9%|Rj1@Z8lMw=VYR9bdf<-O5KErU-h?7FsUoIv7GtjO=1c4pPAgR36>*pl+P=fP_0Y0Z%*wVw-@u^IZC=yk&Rq)-gC%3ndHyMCAREXxat1$S`RZ~{-J_$XT zV4C{B;$ZLOS-+1Jm%p4LI`3@XqgPA$o*Q`@=LJkP&CHRrz4YB(O{K6&+*Eyeu-%@( zg3m`bP1Lk$JUey9>gl^H!Z%zzJ3;YKh-!Xajn0#dd>`YjGgc+dls>nvWaqZcr#`2f z9{XL{2#lG1k!K{h}%VamM zS+G!wLw=#MFNY;C75sTWW15?==TUxj)%ZJhPp@`M-hQonAh_CHdux5SyfK5s+uoml z?$*r^V+^)zX>ifr#d|PY-bZftf8k=A=-tbkBGZ5gU$w+Fq9nN}HL)aBHw8#A7#SE? z>KYj98d!uFnp+teSQ%Mp8yHv_7?>V?#0%4qo1c=IR*9*>%*x2j%Fqm=A;rhh6{vy1 M)78&qol`;+0E-%zP5=M^ literal 0 HcmV?d00001 diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index a513f34728..8253021c57 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in ( 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', - 'el_universal', 'mediapart', + 'el_universal', 'mediapart', 'wikinews_en', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_wikinews_en.py b/src/calibre/web/feeds/recipes/recipe_wikinews_en.py new file mode 100644 index 0000000000..932981ca4c --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_wikinews_en.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +en.wikinews.org +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class WikiNews(BasicNewsRecipe): + title = 'Wikinews' + __author__ = 'Darko Miletic' + description = 'News from wikipedia' + category = 'news, world' + oldest_article = 7 + max_articles_per_feed = 100 + publisher = 'Wiki' + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + remove_javascript = True + language = _('English') + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [ + dict(name='h1', attrs={'id':'firstHeading'}) + ,dict(name='div', attrs={'id':'bodyContent'}) + ] + + remove_tags = [ + dict(name='link') + ,dict(name='div',attrs={'id':['printfooter','catlinks','footer']}) + ,dict(name='div',attrs={'class':['thumb left','thumb right']}) + ] + + remove_tags_after = dict(name='h2') + + feeds = [(u'News', u'http://feeds.feedburner.com/WikinewsLatestNews')] + + def get_article_url(self, article): + artl = article.get('link', None) + rest, sep, article_id = artl.rpartition('/') + return 'http://en.wikinews.org/wiki/' + article_id + + def print_version(self, url): + rest, sep, article_id = url.rpartition('/') + return 'http://en.wikinews.org/w/index.php?title=' + article_id + '&printable=yes' + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + btag = soup.find('div',attrs={'id':'bodyContent'}) + for item in btag.findAll('div'): + item.extract() + for item in btag.findAll('h2'): + item.extract() + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll(font=True): + del item['font'] + return soup +