From e5c8638af651b88e70227b0e94e770b7e5746c02 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 16 Nov 2010 08:22:21 -0700 Subject: [PATCH] Fix #7561 (Updated recipe for The Moscow Times) --- resources/images/news/moscow_times.png | Bin 0 -> 1034 bytes resources/recipes/moscow_times.recipe | 82 ++++++++++++------------- 2 files changed, 41 insertions(+), 41 deletions(-) create mode 100644 resources/images/news/moscow_times.png diff --git a/resources/images/news/moscow_times.png b/resources/images/news/moscow_times.png new file mode 100644 index 0000000000000000000000000000000000000000..34c31179742b7538511477edcae5b07936fb13c1 GIT binary patch literal 1034 zcmV+l1oiugP)y^$D}C=6|^OCAq6Tn2q6`Lc&MNPi>j)GC?MJ_*nk8J#G*wMgoI!L2m~mVsx}Fw zg@#rYh0qY!kMhZD;%#kLSUethmx$%)L72p8xy*AxFMm0CTCj0Rabl^w3y% z_S4I$D+Nh$4F`osyn^+wYOC|_gHbPZG#!@3I)uF(_yiUlHX4}mN25Tn0Yec;m0J3_ zcy0=SJbgV~dpn+ycJJtOu15U)=|sLhJ{aH$O%VhBw+I%2t^?52Ruk&frPLG_yLN|N znVCds|9ZIv*t;~;EgX(TJmYo?ee!{b=Sw$d0Rbl<6LTU+vId2E8?4x+t?EyFH=CKL zG!?^w&ryZ}BmG_Q%EMtWi_+LoQH@j4fbcFh$WrDD+k*n^8}dPq%gQZgrQ`Kh2PYzA zXQ&H?@NP%117ZVis5CnboH%)Ht@=#9-ga(OWpio*Zr*}G7{@oiZ@fIeUK{Q6+P9$2 zrE9Q?&}fDNRWl)5>0Xz5oMV}v^O8~-@Y#Vf;WZTv0X9E;ezfnzp|Mc0-@|=>t0L=W zlRZ%nZ0`}EV}b&(O~-R@L<4-Sg*C_n7_@8u)nsM#?mpq!n`IeXoP`YfxjiP-ODjd# zz_W9I=K&#tIc>Tk$CgED5?Lwu{T#DaZvG%jGQFCUsy$t{*YV=cBM*g+{`5}~&diG@ zFpyOZ68Xkf4lFb=_ei;+t%#fQd(@Nn^qfEy&`>ln!^rDra-gnsP10T<%)UU4H0>8(; z6*8xl*&GaTZM1vw#aRFSL8nbzD>mO5@bkY?$wK3!F3$2rv91h%el`8(+4;QG(Ts#? zn+M~XMXSzS&$y7`Mm)2*cPT4Ilokhpj?cKHg@vjjXxQTEYO6M(A(U>*zHr3@%%2i zqw!`)P?mwRh-u-4<@`1;&#E6hb=M1Mp@c_^EarpR8FGPzA{r*|G((f~+k(OzPV(c# zT-J=Pbl)9rDN~hQf268BK9VAYv?-+C?X-d)CDN}QdkZk`2FKDfIY|*wa}f!zH5Hx1 zbpG<#NO$@0-taNh9w9&)LqUGpW}(;3|6*6z4gU!+0Jq}Ici}N+P5=M^07*qoM6N<$ Ef(+L0i~s-t literal 0 HcmV?d00001 diff --git a/resources/recipes/moscow_times.recipe b/resources/recipes/moscow_times.recipe index 3105aba58e..9d178e8c53 100644 --- a/resources/recipes/moscow_times.recipe +++ b/resources/recipes/moscow_times.recipe @@ -1,31 +1,33 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' -moscowtimes.ru +www.themoscowtimes.com ''' from calibre.web.feeds.news import BasicNewsRecipe class Moscowtimes(BasicNewsRecipe): - title = u'The Moscow Times' + title = 'The Moscow Times' __author__ = 'Darko Miletic and Sujata Raman' - description = 'News from Russia' - language = 'en' - lang = 'en' - oldest_article = 7 + description = 'The Moscow Times is a daily English-language newspaper featuring objective, reliable news on business, politics, sports and culture in Moscow, in Russia and the former Soviet Union (CIS).' + category = 'Russia, Moscow, Russian news, Moscow news, Russian newspaper, daily news, independent news, reliable news, USSR, Soviet Union, CIS, Russian politics, Russian business, Russian culture, Russian opinion, St Petersburg, Saint Petersburg' + publisher = 'The Moscow Times' + language = 'en' + oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - #encoding = 'utf-8' - encoding = 'cp1252' - remove_javascript = True + remove_empty_feeds = True + encoding = 'cp1251' + masthead_url = 'http://www.themoscowtimes.com/bitrix/templates/tmt/img/logo.gif' + publication_type = 'newspaper' conversion_options = { - 'comment' : description - , 'language' : lang - } + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } extra_css = ''' h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large} @@ -35,39 +37,37 @@ class Moscowtimes(BasicNewsRecipe): .text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; } ''' feeds = [ - (u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'), - (u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'), - (u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'), - (u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'), - (u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'), - (u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion') + (u'Top Stories' , u'http://www.themoscowtimes.com/rss/top' ) + ,(u'Current Issue' , u'http://www.themoscowtimes.com/rss/issue' ) + ,(u'News' , u'http://www.themoscowtimes.com/rss/news' ) + ,(u'Business' , u'http://www.themoscowtimes.com/rss/business') + ,(u'Art and Ideas' , u'http://www.themoscowtimes.com/rss/art' ) + ,(u'Opinion' , u'http://www.themoscowtimes.com/rss/opinion' ) ] - keep_only_tags = [ - dict(name='div', attrs={'class':['newstextblock']}) - ] - + keep_only_tags = [dict(name='div', attrs={'id':'content'})] remove_tags = [ - dict(name='div', attrs={'class':['photo_nav']}) - ] - + dict(name='div', attrs={'class':['photo_nav','phototext']}) + ,dict(name=['iframe','meta','base','link','embed','object']) + ] + def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mtag = '' - soup.head.insert(0,mtag) - - return self.adeify_images(soup) + for lnk in soup.findAll('a'): + if lnk.string is not None: + ind = self.tag_to_string(lnk) + lnk.replaceWith(ind) + return soup + def print_version(self, url): + return url.replace('.themoscowtimes.com/','.themoscowtimes.com/print/') def get_cover_url(self): - + cover_url = None href = 'http://www.themoscowtimes.com/pdf/' - - soup = self.index_to_soup(href) + soup = self.index_to_soup(href) div = soup.find('div',attrs={'class':'left'}) - a = div.find('a') - print a - if a : - cover_url = a.img['src'] + if div: + a = div.find('a') + if a : + cover_url = 'http://www.themoscowtimes.com' + a.img['src'] return cover_url