From 18a208a9f4d1171e1016aedc7d44a8fe58105486 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 30 Dec 2009 10:48:46 -0700 Subject: [PATCH] New recipes for Pro Publica, Big Government, El Universal Impressa and Journal of Accountacy by kwetal --- resources/images/news/biggovernment.png | Bin 0 -> 1578 bytes resources/images/news/eluniversal.png | Bin 0 -> 878 bytes resources/images/news/propublica.png | Bin 0 -> 1406 bytes resources/recipes/biggovernment.recipe | 28 ++++++ resources/recipes/eluniversalimpresa.recipe | 82 ++++++++++++++++++ resources/recipes/journalofaccountancy.recipe | 44 ++++++++++ resources/recipes/propublica.recipe | 60 +++++++++++++ 7 files changed, 214 insertions(+) create mode 100644 resources/images/news/biggovernment.png create mode 100644 resources/images/news/eluniversal.png create mode 100644 resources/images/news/propublica.png create mode 100644 resources/recipes/biggovernment.recipe create mode 100644 resources/recipes/eluniversalimpresa.recipe create mode 100644 resources/recipes/journalofaccountancy.recipe create mode 100644 resources/recipes/propublica.recipe diff --git a/resources/images/news/biggovernment.png b/resources/images/news/biggovernment.png new file mode 100644 index 0000000000000000000000000000000000000000..d5c2442ebbcd3086bb235a7811e9b341f4fdf7a0 GIT binary patch literal 1578 zcmV+_2G#kAP)}^jtm{GfcWYzf#m5jjP?cXv@@NV>xaFL_w;ta(UnZ* z-gDMo>%Z6fugC3Jg48K>@HD&-ugA-Aa=h-~2p++2u?4%ToM)C|yR$^+Yw#hg#j~*j z%UU+(FpYoVVcd@IV}F%%@Bac&r!*8d{uthdQ*q)FF13I?*pBbvrl*R=q5$fYMsX27 zj#otZ2c68L7dAQ=Uc}x%@FRQ^+ZT(*aRBO+mf;e738xP@oy9I}#h)>S5v;;lI1?wu zvqt7n<63MT1koLEK7x1S%UIodHG_NcQ~VLz@Nkv$e4HB({paH?csrgKQR(8c`2X2R z&G8Wkvb_#pi?BDc8@FIXmGc9kcWT^vMci;V{#NDuc%9OTcq=}LH)6D9>sz?0%K6Y@ z08S3)?`<7;6yLygRnAB1l+MSSaG^rb2Eb!@2%GVfD(9&>rE~Dbh-ee(JqoFBsd8Qr zAyTI_jP>{^o*v4N<2$&v%K2!W(i!+XK7yoQhFzs6oP#%@cj?5$WR>%dOD86_;=D+W zh{iK;N4%%eZ!~1Z1m2l-_Ro6NC0hQ2nVX1=jxQ!p5cM~h?nVeNz;i>P7c76MPN^G2dQA{fozl7ZRD|_V=nu!uZ?03?5KO-U zWB3K`L4^|nzy+Z{-HpSiwQ@S6^z;XnQd%3SnvVAg7h}2Op4AXi87z}k&a-t&XU3uFFeVJ$8fO;bStmaK5(lv^Qkuj5E@eifR=Sp> z(~S=+Tct7|AmEvp2+}nr`v5)>9GDx3z*JPsg+a)b5!r*-KS0_Q4S?Bj*tE!|uZwpx zfw+;=@QyG(+d4Oj&2>t5<4*y4F3yh)5VjW)?RAxM+8YYpAj(OhxEucrXN|lJUr@Rs z#{$1eX-FG`h>r%yasBTayh57RVa7R3Uk7K9n zUi`Y*lA2u3C=FyiUL0u8#?^I7Q&rBB(NSunp^<-8>pmRI7naoh3Gc}&@kZmV+M8S~`|d^#@F$W~ll<$Qnh z;Fy)HPU%JXivCm-p}r3r;%0vh-H`y=!%UU)LacLZ@UDo|s+OHS_#|#@HmSt`3@MDi zMp-->=_&o-UfhX4C@c6v@L*J#8?REfsx|t1MdSS2xMpxeUTi5H3PYd6oAv9Tew*GG zMi;`yGNsJM0!-U(#&>ajmGcvW{+?_tA5zBcmC6`B#lQl)1UaP4jq8-%;_zZhPx;HJ zPH9Z(2Oq|(<91{Evtm$Yl*zdlKURL2Y-{KIe*zF7hLzcHhQfmKZ^a-RNg_Wun7pw8)NpvDyd001R)MObuXVRU6WV{&C-bY%cCFfchSF)}SPFjO%x zIx;vqGBYhOH##sdgFgH3dFc_rvVgLXD07*qoM6N<$f?x~rLI3~& literal 0 HcmV?d00001 diff --git a/resources/images/news/eluniversal.png b/resources/images/news/eluniversal.png new file mode 100644 index 0000000000000000000000000000000000000000..cd970ab9e50469124ce47e4c19faa6bec066842c GIT binary patch literal 878 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?`Gex;TbdoL)L1)8lfX#Buw}`)}XLn4Eda z<&;?ElCt_N|Z`$2nUOU{xZdfT|E$bkH`(>T%B_| z%N9HmPV1h!CG=rO`b~|J&TyAsvkbVhqAomgy?T`Q&YH}pUs|gsU1JxPe&>I3v)`@V zYh`YTocwrYr;md5`;`iI@lU4j5q9DzTzpGsT8e1rlF9up38zXObLJT=y(d)bT=r32 z#EIkLt=ywlZ&Knq?zenWoISbiSw7G07@PY0rE%(;+dIYfq~eP$S`0Q!HG5vlNjOc5ouiqd z^7l?MLs0mvh{_j?wttP(7SB5cYTh_KR$||IL%e^=2o+kphSlLz}ps_pPSW0*&a9_U+_b|jwwz) zv2H=)zpcRZs#@Y2QIe8al4_M)lnSI6j0_Acbq$SljSWH!EUgSJt$nC}Q!>*kku?|^Ss9yJ85uw{e0Ssd3e>>h>FVdQ&MBb@00h!~jsO4v literal 0 HcmV?d00001 diff --git a/resources/images/news/propublica.png b/resources/images/news/propublica.png new file mode 100644 index 0000000000000000000000000000000000000000..02954be4ea36161044ad0896bb01d110f5a91157 GIT binary patch literal 1406 zcmV-^1%djBP)u(fQ7>A$Pncbbebm`V^ufT3=?QV^i+D1W&sS>ZC0#(1@HxrG8G7t{mIe;TaeqnV> z3txQpDNa#j>ii^5rxW3JcR^oYKOemR9>==73Dw<8Fj$X6mWjn;l-yH#`v4S0VaK~W zIIwRomA+CMLLtg4D@pu$k?>%Ia|2;i-Ml>jd%oSnu3ev#oSLE`SWitL06-)XAv_cz zHZh6A;h?>}{htNs{`Dw3cYa8J?{C!j{XF!*{WxTq3m2y7>Fp&F`vXi(9ox6F zX3bOA22d0QRaLRsY`9#mzZH%~qkQtw$8>dd5vZ!jr+a34n*MWP&JBc_zI2&TsDX}- zw^_eoeg1n30z{)xQmHi4msRv^7A-ZKf3~!`iuz!%ApWIwFETthKtr&e`upn8v@DTG zguZZ?@%R)=+)m!uyp=6mHnViuvVwP|tA!JZBxz&>tBhFW#b%W$GSUDTYKo!Z2q%sm zr|q>@ak*S5ib6wefST$`irg;7#wYka9L}fv^-Y`Dymc$h%}ontFQ0+FzJB8IIC7~U zXR$9IV6_MU$aWjD1Q|`|d@M}w>Eo38%JBJ0si~>qo3Fm)@S#JDjEymU=`yQZ9_Ou& zcW8ZL)iv#t0LDf~2@ejjJlMn%ZwZR3qw6_mblpT01+-KK!zSXiOVouPBt4rTol4aP0Y;4BQ4>fZ3)G1c1TuF0t6K$`w-IO?h<;*DiEiXBfJ7p>cmP?JF|_&AdmLgxK*kivYT1IqyaJ+T z4Q6I$@Oq0E58w|3m>3@|nDjU#WO3e(j48~m+(1$F{CsLQ$#gPFd3pKb0h*he7>y2+ zQB^D!G4FA)Th6U=t_IR`R_VHlW|&w+iJ|_}ghHWfZZy{g0H~;_U}T^VtCZVlAlG)q zvV=_(@(_?~OP5V#HhGcmpMT=z4eJ*zo(EXFZY?9>KBgvfRygevvQ@z25Gixp@H!-l zWeJZ%!Xry$Q%Qc@_cg0pT5h`UuO9icKv5Jr_a7kC_$XEWx_rM|EFu|A#}q^aQztn! z$>ATqqokySZEtS7CGi4)E2FdPAZ}U4SG5$Mw+Pc&N@^xS{QLx?(E)mTdT3qqG;5!K z=9a}51OP}R5}Z77lCkkIGMNmSbQ-%X6AXn|vEs2?D!Lm2xRc|*_h3 zCB9Jr001R)MObuXVRU6WV{&C-bY%cCFfchSF)}SPFjO%xIx;vqGBYhOH##sd= 0: + return None + + main, sep, id = url.rpartition('/') + + return main + '/vi_' + id + + def preprocess_html(self, soup): + table = soup.find('table') + table.extract() + + for p in soup.findAll('p'): + if self.tag_to_string(p).strip() == '': + p.extract() + + tag = soup.find('font', attrs = {'color': '#0F046A'}) + if tag: + for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']: + if tag.has_key(attr): + del tag[attr] + tag.name = 'h1' + + return soup diff --git a/resources/recipes/journalofaccountancy.recipe b/resources/recipes/journalofaccountancy.recipe new file mode 100644 index 0000000000..51a6ac8d29 --- /dev/null +++ b/resources/recipes/journalofaccountancy.recipe @@ -0,0 +1,44 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class JournalOfAccountancyRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en' + version = 1 + + title = u'Journal of Accountancy' + publisher = u'AICPA' + category = u'News, Accountancy' + description = u'Publication of the American Institute of Certified Public Accountants' + + use_embedded_content = False + remove_empty_feeds = True + oldest_article = 30 + max_articles_per_feed = 100 + + no_stylesheets = True + remove_javascript = True + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif;} + div#Rubricname {font-size: small; color: #666666; margin-bottom: 1em;} + div#Headline {font-size: x-large; font-weight: bold; margin-bottom: 0.6em} + div#SubHeadline {font-size: medium; font-weight: bold; margin-bottom: 1em} + div#Authorname, div#Date {font-size: x-small; color: #696969;} + ''' + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher} + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'Rubricname'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'Headline'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'SubHeadline'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'Authorname'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'Date'})) + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'BodyContent'})) + + remove_attributes = ['style'] + + feeds = [] + feeds.append((u'Journal of Accountancy', u'http://feeds2.feedburner.com/JournalOfAccountancy')) diff --git a/resources/recipes/propublica.recipe b/resources/recipes/propublica.recipe new file mode 100644 index 0000000000..1e1f0af7a9 --- /dev/null +++ b/resources/recipes/propublica.recipe @@ -0,0 +1,60 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class ProPublicaRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en_US' + version = 1 + + title = u'Pro Publica' + publisher = u'ProPublica.org' + category = u'Political blog' + description = u'Independent investigative journalism in the public interest.' + + oldest_article = 14 + max_articles_per_feed = 100 + use_embedded_content = False + + remove_empty_feeds = True + no_stylesheets = True + remove_javascript = True + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'article'})) + + remove_tags = [] + remove_tags.append(dict(name = 'div', attrs = {'id': 'rollups'})) + remove_tags.append(dict(name = 'div', attrs = {'class': 'follow_info'})) + remove_tags.append(dict(name = 'ul', attrs = {'class': 'long-tools-top'})) + remove_tags.append(dict(name = 'ul', attrs = {'id': 'share-box'})) + remove_tags.append(dict(name = 'div', attrs = {'class': 'tags'})) + remove_tags.append(dict(name = 'ul', attrs = {'class': 'long-tools'})) + remove_tags.append(dict(name = 'ul', attrs = {'id': 'share-box2'})) + remove_tags.append(dict(name = 'p', attrs = {'id': 'original-url'})) + + feeds = [] + feeds.append((u'Top Stories', u'http://feeds.propublica.org/propublica/main')) + feeds.append((u'Stimulus', u'http://feeds.propublica.org/propublica/watchdog/stimulus')) + feeds.append((u'Bailout', u'http://feeds.propublica.org/propublica/watchdog/bailout')) + feeds.append((u'Business', u'http://feeds.propublica.org/propublica/business-money')) + feeds.append((u'Justice', u'http://feeds.propublica.org/propublica/justice-law')) + feeds.append((u'Energy & Environment', u'http://feeds.propublica.org/propublica/energy-environment')) + feeds.append((u'Government & Politics', u'http://feeds.propublica.org/propublica/government-politics')) + feeds.append((u'Health & Science', u'http://feeds.propublica.org/propublica/health-science')) + feeds.append((u'Media & Technology', u'http://feeds.propublica.org/propublica/media-technology')) + feeds.append((u'National Security', u'http://feeds.propublica.org/propublica/national-security')) + #feeds.append((u'', u'')) + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher} + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif;} + img {float: left; margin-right: 0.5em;} + h1 {text-align: left;} + a, a[href] {text-decoration: none; color: blue;} + div.cat {font-size: x-small; color: #666666; margin-bottom: 0.1em;} + div.info {font-size: small; color: #696969;} + ''' + \ No newline at end of file