From 7abf29c5ba73cdbcd06cf75579139d1db669aa72 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 09:49:13 -0600
Subject: [PATCH] Fix #848900 (Updated recipe for The Japan Times)

---
 recipes/icons/japan_times.png | Bin 0 -> 1264 bytes
 recipes/japan_times.recipe    |  67 ++++++++++++++++++++++++++--------
 2 files changed, 51 insertions(+), 16 deletions(-)
 create mode 100644 recipes/icons/japan_times.png
diff --git a/recipes/icons/japan_times.png b/recipes/icons/japan_times.png
new file mode 100644
index 0000000000000000000000000000000000000000..1b2ac895725ec55d4328a964f6c3f70f4076f111
GIT binary patch
literal 1264
zcmeIw|5MTj7zgkNycVOlqwIE@xy97zo6-l+^v-oV^y39ra_UOg4}NKDUXxZ%@6)<g
z89LG|Q@b@!k*+nlO~s_9n_#3;A=w4Tkmb=JQ!+3eUj-ELGyE_5VXu3h=l%Qpo=Z9W
zeH3yG82~6eE}9of+Uk;`A{Xtb*8oI8&VhqjRNzO$k*!qhFu!Uuh+^S#EL`0Jk{#5>
zIOvRn?l_ReLr*;1j)yx5a5n+^6X4ISFhGa<bQoe#rQ2X+8$8?&V>{sKPB83(i9|3b
zf+Z2GN$@fWrWjyjzzhTI2)GdNFkyiSe%9(NvLM8QWj3s^0pEw*;vusAw1oo1cNmqQ
z!`doKrKi3c_!4~~IHWE`-N(?NutHITDSp6IBJ7a}(}=LqI&Az3Hrs&BHDJz`W?9=M
ztF+bF)jHqR>XJzx_y0aT(B)Bdc^@gg8l_LIR%^9dy<Tt7Squh)*=)92t$K~;FP&||
zWV2dczOYW24Y>7heEJ{UX0y%C&c2D5!RPI`^9}BDdflr$5pI{?<6iLjd_h0HxVRV!
zg_eSNXlZ3Rgonf7<z;*&j7LP;{@>f&z?~sBAAr!!LpcXex1TdU5vUP($E9?%H)Ybe
zx!Rw{IkZfPGBDQhePfwp(-wP2mJ0RY#Vu{VBZKql{ZDrn8~aSvC6XtBK%U6ihmTwp
zw13uod0psh+EiL<YVviATJvi$N1i{7&}?x(9qNs~CZJVS4+xv6$R4rk-h{s-E%yG<
z^FOBL4aCwYqApCT<H}3PIrUA>#!<cg73(*xnDcqhiRhc7H;J3Tv~z!|EdR!LXV-r`
zQB%9Nuy)eRHqi)qlPg8f2nU4alwWdtYnd5$M}{xD-5aXC$0%g`#yWR0={1u+^TD?@
z9Q)C0Wz^~C>CD|_Tu`a5Q#=(FsH39!!s((&e$mPF`!$Mh;@|JtYn120txscSyw3`B
zy}hse$znyyQDN)Jw7uaa<-NW{-C!Y*9;a|!!R<mGiz23ENb|Z)j@S<+KO%f*-P!t2
z(vDBICzq@8jadl2!}4z+6A>2`c1n)V5ymb!KAPKXE69%V3?kV;UX#7+JwZyw_#jEx
zdA>HPoE~#3{XA8psnlZ@gEsJ>GKtL?E}J{^ZjeC4iR2b`)rQr#;$`vC+ZiR*{{h9F
BuS5U<

literal 0
HcmV?d00001

diff --git a/recipes/japan_times.recipe b/recipes/japan_times.recipe
index bb83b16f1e..229d5e4035 100644
--- a/recipes/japan_times.recipe
+++ b/recipes/japan_times.recipe
@@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 japantimes.co.jp
 '''
@@ -9,24 +7,61 @@ japantimes.co.jp
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class JapanTimes(BasicNewsRecipe):
-    title                 = u'The Japan Times'
+    title                 = 'The Japan Times'
     __author__            = 'Darko Miletic'
-    description           = 'News from Japan'
-    language = 'en'
-    
-    oldest_article        = 7
-    max_articles_per_feed = 100
+    description           = "Daily news and features on Japan from the most widely read English-language newspaper in Japan. Coverage includes national news, business news, sports news, commentary and features on living in Japan, entertainment, the arts, education and more."
+    language              = 'en_JP'
+    category              = 'news, politics, japan'
+    publisher             = 'The Japan Times'
+    oldest_article        = 5
+    max_articles_per_feed = 150
     no_stylesheets        = True
     use_embedded_content  = False
+    encoding              = 'utf8'
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://search.japantimes.co.jp/images/header_title.gif'
+    extra_css             = 'body{font-family: Geneva,Arial,Helvetica,sans-serif}'
 
-    keep_only_tags    = [ dict(name='div', attrs={'id':'searchresult'}) ]
-    remove_tags_after = [ dict(name='div', attrs={'id':'mainbody'    }) ]
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+
+    keep_only_tags    = [dict(name='div', attrs={'id':'printresult'})]
     remove_tags       = [
-                           dict(name='div'  , attrs={'id':'ads' })
-                          ,dict(name='table', attrs={'width':470})
+                          dict(name=['iframe','meta','link','embed','object','base'])
+                         ,dict(attrs={'id':'searchfooter'})
                         ]
+    feeds             = [(u'The Japan Times', u'http://feeds.feedburner.com/japantimes')]
+    remove_attributes = ['border']
 
+    def get_article_url(self, article):
+        rurl = BasicNewsRecipe.get_article_url(self, article)
+        return rurl.partition('?')[0]
 
-    feeds          = [
-                        (u'The Japan Times', u'http://feedproxy.google.com/japantimes')
-                     ]
\ No newline at end of file
+    def print_version(self, url):
+        return url.replace('/cgi-bin/','/print/')
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for item in soup.findAll('photo'):
+            item.name = 'div'
+        for item in soup.head.findAll('paragraph'):
+            item.extract()
+        for item in soup.findAll('wwfilename'):
+            item.extract()
+        for item in soup.findAll('jtcategory'):
+            item.extract()
+        for item in soup.findAll('nomooter'):
+            item.extract()
+        for item in soup.body.findAll('paragraph'):
+            item.name = 'p'
+        return soup