From 38d1375974023ace56817bd8e53e245ecf3621a5 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sat, 27 Nov 2010 01:08:09 +0900
Subject: [PATCH 01/12] fix charcode and minor bugs

---
 resources/recipes/jijinews.recipe        | 11 ++++++++++-
 resources/recipes/msnsankei.recipe       |  7 +++++--
 resources/recipes/nikkei_sub_main.recipe |  3 +++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe
index f74864365d..4af242063e 100644
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@@ -17,10 +17,19 @@ class JijiDotCom(BasicNewsRecipe):
     encoding       = 'utf-8'
     oldest_article = 6
     max_articles_per_feed = 100
+    encoding       = 'EUC_JP'
     language       = 'ja'
-    cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
     masthead_url    = 'http://jen.jiji.com/images/logo_jijipress.gif'
+	top_url			= 'http://www.jiji.com/'
 
     feeds          = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
     remove_tags_after = dict(id="ad_google")
 
+	def get_cover_url(self):
+		cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
+		soup = self.index_to_soup(self.top_url)
+		cover_item = soup.find('div', attrs={'class':'top-pad-photos'})
+		if cover_item:
+			cover_url = self.top_url + cover_item.img['src']
+		return cover_url
+
diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe
index 4c79771945..8c78ccd9e9 100644
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@@ -1,4 +1,3 @@
-#!/usr/bin/env  python
 
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
@@ -16,9 +15,13 @@ class MSNSankeiNewsProduct(BasicNewsRecipe):
     max_articles_per_feed = 100
     encoding       = 'Shift_JIS'
     language       = 'ja'
+    cover_url       = 'http://sankei.jp.msn.com/images/common/sankeShinbunLogo.jpg'
+    masthead_url = 'http://sankei.jp.msn.com/images/common/sankeiNewsLogo.gif'
 
     feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
 
     remove_tags_before = dict(id="__r_article_title__")
     remove_tags_after  = dict(id="ajax_release_news")
-    remove_tags = [{'class':"parent chromeCustom6G"}]
+    remove_tags = [{'class':"parent chromeCustom6G"},
+                              {'class':"RelatedImg"}
+                            ]
diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe
index 142edf624d..37fc8964c4 100644
--- a/resources/recipes/nikkei_sub_main.recipe
+++ b/resources/recipes/nikkei_sub_main.recipe
@@ -30,6 +30,9 @@ class NikkeiNet_sub_main(BasicNewsRecipe):
                        {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                        {'class':"cmn-article_keyword cmn-clearfix"},
                        {'class':"cmn-print_headline cmn-clearfix"},
+                       {'class':"cmn-article_list"},
+                       {'class':"cmn-dashedline"},
+                       {'class':"cmn-hide"},
                          ]
     remove_tags_after = {'class':"cmn-pr_list"}
 

From 176010dc03fa5bb6c4ca30e179c3a1a4d49e73a5 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sat, 27 Nov 2010 09:53:35 +0900
Subject: [PATCH 02/12] recipe: jijinews: fix indent error

---
 resources/recipes/jijinews.recipe | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe
index 4af242063e..a62b4db739 100644
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@@ -19,17 +19,16 @@ class JijiDotCom(BasicNewsRecipe):
     max_articles_per_feed = 100
     encoding       = 'EUC_JP'
     language       = 'ja'
-    masthead_url    = 'http://jen.jiji.com/images/logo_jijipress.gif'
-	top_url			= 'http://www.jiji.com/'
+    masthead_url   = 'http://jen.jiji.com/images/logo_jijipress.gif'
+    top_url        = 'http://www.jiji.com/'
 
     feeds          = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
     remove_tags_after = dict(id="ad_google")
 
-	def get_cover_url(self):
-		cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
-		soup = self.index_to_soup(self.top_url)
-		cover_item = soup.find('div', attrs={'class':'top-pad-photos'})
-		if cover_item:
-			cover_url = self.top_url + cover_item.img['src']
-		return cover_url
-
+    def get_cover_url(self):
+        cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
+        soup = self.index_to_soup(self.top_url)
+        cover_item = soup.find('div', attrs={'class':'top-pad-photos'})
+        if cover_item:
+            cover_url = self.top_url + cover_item.img['src']
+        return cover_url

From 3a8eae0fba90b4c3ee040e47c7f64e5f2ed8c6b2 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sat, 27 Nov 2010 10:18:43 +0900
Subject: [PATCH 03/12] recipe: some fix in japanese recipe   jijinews: charset
 fix   msnsankei: removal tag fix

---
 resources/recipes/jijinews.recipe  | 2 +-
 resources/recipes/msnsankei.recipe | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe
index a62b4db739..98e7a180d1 100644
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@@ -17,7 +17,7 @@ class JijiDotCom(BasicNewsRecipe):
     encoding       = 'utf-8'
     oldest_article = 6
     max_articles_per_feed = 100
-    encoding       = 'EUC_JP'
+    encoding       = 'euc_jisx0213'
     language       = 'ja'
     masthead_url   = 'http://jen.jiji.com/images/logo_jijipress.gif'
     top_url        = 'http://www.jiji.com/'
diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe
index 8c78ccd9e9..ae195559d5 100644
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@@ -23,5 +23,5 @@ class MSNSankeiNewsProduct(BasicNewsRecipe):
     remove_tags_before = dict(id="__r_article_title__")
     remove_tags_after  = dict(id="ajax_release_news")
     remove_tags = [{'class':"parent chromeCustom6G"},
-                              {'class':"RelatedImg"}
+                              dict(id="RelatedImg")
                             ]

From 064bfaa7f9eb61f298895c6a30c6cd90d5b0c87f Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sat, 27 Nov 2010 12:04:00 +0900
Subject: [PATCH 04/12] recipe: japanese: fix minor bugs

  - jijinews: fix encoding
  - mainichi, nikkei: add icons
---
 resources/images/news/mainichi.png                | Bin 0 -> 953 bytes
 resources/images/news/mainichi_it_news.png        | Bin 0 -> 953 bytes
 .../{nikkei_sub_industory.png => nikkei_sub.png}  | Bin
 resources/images/news/nikkei_sub_industry.png     | Bin 0 -> 948 bytes
 resources/recipes/jijinews.recipe                 |   1 -
 5 files changed, 1 deletion(-)
 create mode 100644 resources/images/news/mainichi.png
 create mode 100644 resources/images/news/mainichi_it_news.png
 rename resources/images/news/{nikkei_sub_industory.png => nikkei_sub.png} (100%)
 create mode 100644 resources/images/news/nikkei_sub_industry.png

diff --git a/resources/images/news/mainichi.png b/resources/images/news/mainichi.png
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e8f319fb20ec4540c942504efedf215fc2479
GIT binary patch
literal 953
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>V;x;TbdoUWY|-4PlpalAhJ?bi33pU=5?
zMKX6+rLw7;#>EqIT}7*=%Ps4Cv2;QEyc`3g7e|{`EKuoU(mdkMyn3QR*DKFU>~Gz|
zL@Zn973K8JGk(1D?dI=x*=yzc%vvw}|M~mh{qOrbe5?O$;GXF;qh}tYd(*ZD9RK7m
zEte5lk>!1PBctmwNgmaB8{0$e_qSF4`)d`a@_(jy!^VIVHv1_R`;^Y_);Qm2a{6Rc
ztXiJ_#zG0c*-Z~8DfWr+XE9GyuXu5d(eB5xv-w+l4n{0ss5<tjeNycAX&PIe9y}|^
zpWpd&J6H0-@TQ5I4u71fXMaAEVG6^gX%AO^biB5<;O4JOmityeX}Nb*O~d_x)ddgU
z!UxP!Q@`tEhgI(Rc_OGxZjXnu=^m?{Et|K>y*-n(MwL@#qxy{OjBoWfRlbSu4cwJ}
zbHl0B_vzCset)pZx7j&oWwn=2{nbb7dIg;)ru=g0mYuEqZ_W1`^=mx+qo)*G_83g4
z`IUU?9dqc0g3letH(riao42;0X4&b9@2!1of**dKAEdWbOzM~Yr#Tt7uW)<5`1mMg
z=I`xC-rNun`Z-BD|AVA_w?_M<tE-uh9ZzCoI=CxvK6AhR<Mzn2XL7psOxh?A(5G@;
zg>Qen#)NqZ8k5eLn)HcO@QFzEwAH#x|M?W&x0)rX<hx|GVd#>rQzlN8+g!aq`St1N
z7C%d8|G5yQc7NrkizRa}zhC?Caj<OJ-ZHsqyLWgV=vvagsX>{QWm2yBPlh*pcDwxU
zV+rS+cVk|`?XPmz`840fNN;$*bE-{&_mSHCt2;IXeCaM>ZEjq0?b;S2X0f=RDU9bE
zHwMl>AR6+@c1vTkdi0rc9<NOv0_~HOms`pFHGO*KCsWS;y7l(kdcGdkySnVz#q!l9
zv!$jecsWgLVVWx5@K~{N-8K_xgMfFRm(6^5B=YpPuU#vhOO4)6()qgaOZg`y_nce7
z8$a*lw%WaF7t@PpV#x<@{hNO6)@jxcn!8!+neF)1_BT~1z6U0K)e_f;l9a@fRIB8o
zR3OD*WMF8ZYiOuzWFBH@Xk}_@Wn!RhU}<GwaDL9DXDAwS^HVa@Dls(}n?W>Ozamx%
P)WG2B>gTe~DWM4fFNU-h

literal 0
HcmV?d00001

diff --git a/resources/images/news/mainichi_it_news.png b/resources/images/news/mainichi_it_news.png
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e8f319fb20ec4540c942504efedf215fc2479
GIT binary patch
literal 953
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>V;x;TbdoUWY|-4PlpalAhJ?bi33pU=5?
zMKX6+rLw7;#>EqIT}7*=%Ps4Cv2;QEyc`3g7e|{`EKuoU(mdkMyn3QR*DKFU>~Gz|
zL@Zn973K8JGk(1D?dI=x*=yzc%vvw}|M~mh{qOrbe5?O$;GXF;qh}tYd(*ZD9RK7m
zEte5lk>!1PBctmwNgmaB8{0$e_qSF4`)d`a@_(jy!^VIVHv1_R`;^Y_);Qm2a{6Rc
ztXiJ_#zG0c*-Z~8DfWr+XE9GyuXu5d(eB5xv-w+l4n{0ss5<tjeNycAX&PIe9y}|^
zpWpd&J6H0-@TQ5I4u71fXMaAEVG6^gX%AO^biB5<;O4JOmityeX}Nb*O~d_x)ddgU
z!UxP!Q@`tEhgI(Rc_OGxZjXnu=^m?{Et|K>y*-n(MwL@#qxy{OjBoWfRlbSu4cwJ}
zbHl0B_vzCset)pZx7j&oWwn=2{nbb7dIg;)ru=g0mYuEqZ_W1`^=mx+qo)*G_83g4
z`IUU?9dqc0g3letH(riao42;0X4&b9@2!1of**dKAEdWbOzM~Yr#Tt7uW)<5`1mMg
z=I`xC-rNun`Z-BD|AVA_w?_M<tE-uh9ZzCoI=CxvK6AhR<Mzn2XL7psOxh?A(5G@;
zg>Qen#)NqZ8k5eLn)HcO@QFzEwAH#x|M?W&x0)rX<hx|GVd#>rQzlN8+g!aq`St1N
z7C%d8|G5yQc7NrkizRa}zhC?Caj<OJ-ZHsqyLWgV=vvagsX>{QWm2yBPlh*pcDwxU
zV+rS+cVk|`?XPmz`840fNN;$*bE-{&_mSHCt2;IXeCaM>ZEjq0?b;S2X0f=RDU9bE
zHwMl>AR6+@c1vTkdi0rc9<NOv0_~HOms`pFHGO*KCsWS;y7l(kdcGdkySnVz#q!l9
zv!$jecsWgLVVWx5@K~{N-8K_xgMfFRm(6^5B=YpPuU#vhOO4)6()qgaOZg`y_nce7
z8$a*lw%WaF7t@PpV#x<@{hNO6)@jxcn!8!+neF)1_BT~1z6U0K)e_f;l9a@fRIB8o
zR3OD*WMF8ZYiOuzWFBH@Xk}_@Wn!RhU}<GwaDL9DXDAwS^HVa@Dls(}n?W>Ozamx%
P)WG2B>gTe~DWM4fFNU-h

literal 0
HcmV?d00001

diff --git a/resources/images/news/nikkei_sub_industory.png b/resources/images/news/nikkei_sub.png
similarity index 100%
rename from resources/images/news/nikkei_sub_industory.png
rename to resources/images/news/nikkei_sub.png
diff --git a/resources/images/news/nikkei_sub_industry.png b/resources/images/news/nikkei_sub_industry.png
new file mode 100644
index 0000000000000000000000000000000000000000..308f4b3085bafd5c8aabf5fcc5196da3ed2d39bb
GIT binary patch
literal 948
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?|#Qx;TbdoX(x>oe`5NbF}_`?Nhax&jQyz
zdn%Hiv2^LytJWG*r5riioPsX7?iRe#Yor~$O2b5N%S*R{hgty_YhA8zG?@g<Zi;xR
z66R`XvDDYw=hYF(=l864mcM)aJ4WS-*^@Qr-~W*>V43}NidsXG`|@pjZ5JyqUG&hi
zCO)ju^BLFU@3)THKF|obGjns>=E}(`jY~8=g)6>S$OjvTraiZvrhRg|v|~JThV+LS
zx!o@h8LvIeT*-WXa?OWJ40*nfuB#}YFZ*=vv~5Z5<(pqSB+PhCdxNEZ+cR|io*UK^
z))+n6=2~L$4~Mpm%Xe2A-EUjR&G#lxnM2ixSHu53qsBURi=~}&EwfK#HM_+MT@h{n
zww?LIz1Mr6{EB)Pw^RTAsipn0a#aq?cPE!}EH_#B(kgwEWpmr|jH)h^kR>mz5@t$q
zcl*Ao^ZHrlA)(18x9wt!+p?uRD~#RJm-|iPn$@Vn>A1|m`u)d@{YPIUh~JMb-MMYU
zG5-B#8EZTl#Qt}A2=$9??s{SWYxAqqQgh^9K0I>a+wRx%{x51u<<bdOzMUMnPqwI_
zTzda~j>^x6y-$1*|5L#qCE)St_M6`c4cGhLCcJXuJuM((H{;>diR_W9j_mmxIMaaV
z<6X8u<3k)wCm#q)Zo9Pb`0dKCe`mird-G0Ir^UBx^Id%9AJrSSc5m5r;gG|_^-H7K
zbpFadUhi4r8&Dd&NP{WTb%m&juIf^8U(IuK|E9Fgntg|T{hDKQ{uhY`q*UwtDxdMX
zU{}XEqe~TB0?$o=F&O6@cSe40df7U^v)wayKDzY#QD}hS3bk#zH@2U4QsK^cWmV;E
zZg|cWsPx#e1D@Lz=gHfCwGDhBbd+fdn}(gOwa1JWiLfI#COYn$8L1}Z&Y)&K{UuAl
zmb0gdcSR|5+APT_di_Z{W0#A<j!(g-UYuNdv)9Ns_ov02ODujb<@Xji)|US4TXn_u
z>*IV=)1N=*K52a<KR<Wde|Eoj9&@bN&b0wky=sYTL`h0wNvc(HQ7VvPFfuSS&^0vF
zH8Kt{G_^9aure^$Hn6lZFmSVx+JK@VH$NpatrCj{6Nm=3&K(nh8W=oX{an^LB{Ts5
Dv7?~O

literal 0
HcmV?d00001

diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe
index 98e7a180d1..fe52e76aaf 100644
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@@ -14,7 +14,6 @@ class JijiDotCom(BasicNewsRecipe):
     description    = 'World News from Jiji Press'
     publisher      = 'Jiji Press Ltd.'
     category       = 'news'
-    encoding       = 'utf-8'
     oldest_article = 6
     max_articles_per_feed = 100
     encoding       = 'euc_jisx0213'

From c0c1f8225f382a152c3c1e435acbd0504d8224e0 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sat, 27 Nov 2010 14:32:01 +0900
Subject: [PATCH 05/12] recipes: add new cnet japan feed

  - some removal of tags
  - skip ad pages
---
 resources/recipes/cnetjapan.recipe         | 23 +++++++++-
 resources/recipes/cnetjapan_digital.recipe | 49 ++++++++++++++++++++++
 resources/recipes/cnetjapan_release.recipe | 48 +++++++++++++++++++++
 3 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 resources/recipes/cnetjapan_digital.recipe
 create mode 100644 resources/recipes/cnetjapan_release.recipe

diff --git a/resources/recipes/cnetjapan.recipe b/resources/recipes/cnetjapan.recipe
index e0178c1ff2..1058b90401 100644
--- a/resources/recipes/cnetjapan.recipe
+++ b/resources/recipes/cnetjapan.recipe
@@ -7,7 +7,9 @@ class CNetJapan(BasicNewsRecipe):
     max_articles_per_feed = 30
     __author__  = 'Hiroshi Miura'
 
-    feeds          = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')]
+    feeds          = [(u'CNet News', u'http://feed.japan.cnet.com/rss/index.rdf'),
+                      (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf')
+                        ]
     language       = 'ja'
     encoding       = 'Shift_JIS'
     remove_javascript = True
@@ -21,12 +23,29 @@ class CNetJapan(BasicNewsRecipe):
         lambda match: '<!-- removed -->'),
         ]
 
-    remove_tags_before = dict(name="h2")
+    remove_tags_before = dict(id="contents_l")
     remove_tags = [
                    {'class':"social_bkm_share"},
                    {'class':"social_bkm_print"},
                    {'class':"block20 clearfix"},
                    dict(name="div",attrs={'id':'bookreview'}),
+                   {'class':"tag_left_ttl"},
+                   {'class':"tag_right"}
                     ]
     remove_tags_after = {'class':"block20"}
 
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'pheedo.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
diff --git a/resources/recipes/cnetjapan_digital.recipe b/resources/recipes/cnetjapan_digital.recipe
new file mode 100644
index 0000000000..9028126af2
--- /dev/null
+++ b/resources/recipes/cnetjapan_digital.recipe
@@ -0,0 +1,49 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CNetJapanDigital(BasicNewsRecipe):
+    title          = u'CNET Japan Digital'
+    oldest_article = 3
+    max_articles_per_feed = 30
+    __author__  = 'Hiroshi Miura'
+
+    feeds          = [(u'CNet digital',u'http://feed.japan.cnet.com/rss/digital/index.rdf') ]
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    remove_javascript = True
+
+    preprocess_regexps = [
+       (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE),
+        lambda match: '</body>'),
+       (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL|re.IGNORECASE),
+        lambda match: '</body>'),
+       (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
+        lambda match: '<!-- removed -->'),
+        ]
+
+    remove_tags_before = dict(id="contents_l")
+    remove_tags = [
+                   {'class':"social_bkm_share"},
+                   {'class':"social_bkm_print"},
+                   {'class':"block20 clearfix"},
+                   dict(name="div",attrs={'id':'bookreview'}),
+                   {'class':"tag_left_ttl"},
+                   {'class':"tag_right"}
+                    ]
+    remove_tags_after = {'class':"block20"}
+
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'pheedo.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
diff --git a/resources/recipes/cnetjapan_release.recipe b/resources/recipes/cnetjapan_release.recipe
new file mode 100644
index 0000000000..e8d13ec99f
--- /dev/null
+++ b/resources/recipes/cnetjapan_release.recipe
@@ -0,0 +1,48 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CNetJapanRelease(BasicNewsRecipe):
+    title          = u'CNET Japan release'
+    oldest_article = 3
+    max_articles_per_feed = 30
+    __author__  = 'Hiroshi Miura'
+
+    feeds          = [(u'CNet Release', u'http://feed.japan.cnet.com/rss/release/index.rdf') ]
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    remove_javascript = True
+
+    preprocess_regexps = [
+       (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE),
+        lambda match: '</body>'),
+       (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL|re.IGNORECASE),
+        lambda match: '</body>'),
+       (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
+        lambda match: '<!-- removed -->'),
+        ]
+
+    remove_tags_before = dict(id="contents_l")
+    remove_tags = [
+                   {'class':"social_bkm_share"},
+                   {'class':"social_bkm_print"},
+                   {'class':"block20 clearfix"},
+                   dict(name="div",attrs={'id':'bookreview'}),
+                   {'class':"tag_left_ttl"}
+                    ]
+    remove_tags_after = {'class':"block20"}
+
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'pheedo.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds

From 2145a487d45904113dc922db1ae131eeff178041 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Sun, 28 Nov 2010 11:47:36 +0000
Subject: [PATCH 06/12] Add check for valid has_cover to check_library

---
 src/calibre/gui2/dialogs/check_library.py | 89 ++++++++++++++++++-----
 src/calibre/library/check_library.py      | 69 +++++++++++++-----
 src/calibre/library/database2.py          |  8 ++
 3 files changed, 127 insertions(+), 39 deletions(-)

diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py
index 55cd91dcd3..c00ee99cc0 100644
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@@ -32,23 +32,30 @@ class CheckLibraryDialog(QDialog):
         self.log.itemChanged.connect(self.item_changed)
         self._layout.addWidget(self.log)
 
-        self.check = QPushButton(_('&Run the check'))
-        self.check.setDefault(False)
-        self.check.clicked.connect(self.run_the_check)
-        self.copy = QPushButton(_('Copy &to clipboard'))
-        self.copy.setDefault(False)
-        self.copy.clicked.connect(self.copy_to_clipboard)
-        self.ok = QPushButton('&Done')
-        self.ok.setDefault(True)
-        self.ok.clicked.connect(self.accept)
-        self.delete = QPushButton('Delete &marked')
-        self.delete.setDefault(False)
-        self.delete.clicked.connect(self.delete_marked)
+        self.check_button = QPushButton(_('&Run the check'))
+        self.check_button.setDefault(False)
+        self.check_button.clicked.connect(self.run_the_check)
+        self.copy_button = QPushButton(_('Copy &to clipboard'))
+        self.copy_button.setDefault(False)
+        self.copy_button.clicked.connect(self.copy_to_clipboard)
+        self.ok_button = QPushButton('&Done')
+        self.ok_button.setDefault(True)
+        self.ok_button.clicked.connect(self.accept)
+        self.delete_button = QPushButton('Delete &marked')
+        self.delete_button.setToolTip(_('Delete marked files (checked subitems)'))
+        self.delete_button.setDefault(False)
+        self.delete_button.clicked.connect(self.delete_marked)
+        self.fix_button = QPushButton('&Fix marked')
+        self.fix_button.setDefault(False)
+        self.fix_button.setEnabled(False)
+        self.fix_button.setToolTip(_('Fix marked sections (checked fixable items)'))
+        self.fix_button.clicked.connect(self.fix_items)
         self.bbox = QDialogButtonBox(self)
-        self.bbox.addButton(self.check, QDialogButtonBox.ActionRole)
-        self.bbox.addButton(self.delete, QDialogButtonBox.ActionRole)
-        self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole)
-        self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole)
+        self.bbox.addButton(self.check_button, QDialogButtonBox.ActionRole)
+        self.bbox.addButton(self.delete_button, QDialogButtonBox.ActionRole)
+        self.bbox.addButton(self.fix_button, QDialogButtonBox.ActionRole)
+        self.bbox.addButton(self.copy_button, QDialogButtonBox.ActionRole)
+        self.bbox.addButton(self.ok_button, QDialogButtonBox.AcceptRole)
 
         h = QHBoxLayout()
         ln = QLabel(_('Names to ignore:'))
@@ -93,12 +100,19 @@ class CheckLibraryDialog(QDialog):
         plaintext = []
 
         def builder(tree, checker, check):
-            attr, h, checkable = check
+            attr, h, checkable, fixable = check
             list = getattr(checker, attr, None)
             if list is None:
                 return
 
-            tl = Item([h])
+            tl = Item()
+            tl.setText(0, h)
+            if fixable:
+                tl.setText(1, _('(fixable)'))
+                tl.setFlags(Qt.ItemIsEnabled | Qt.ItemIsUserCheckable)
+                tl.setCheckState(1, False)
+            self.top_level_items[attr] = tl
+
             for problem in list:
                 it = Item()
                 if checkable:
@@ -107,6 +121,7 @@ class CheckLibraryDialog(QDialog):
                 else:
                     it.setFlags(Qt.ItemIsEnabled)
                 it.setText(0, problem[0])
+                it.setData(0, Qt.UserRole, problem[2])
                 it.setText(1, problem[1])
                 tl.addChild(it)
                 self.all_items.append(it)
@@ -118,18 +133,25 @@ class CheckLibraryDialog(QDialog):
         t.setColumnCount(2);
         t.setHeaderLabels([_('Name'), _('Path from library')])
         self.all_items = []
+        self.top_level_items = {}
         for check in CHECKS:
             builder(t, checker, check)
 
         t.setColumnWidth(0, 200)
         t.setColumnWidth(1, 400)
-        self.delete.setEnabled(False)
+        self.delete_button.setEnabled(False)
         self.text_results = '\n'.join(plaintext)
 
     def item_changed(self, item, column):
+        self.fix_button.setEnabled(False)
+        for it in self.top_level_items.values():
+            if it.checkState(1):
+                self.fix_button.setEnabled(True)
+
+        self.delete_button.setEnabled(False)
         for it in self.all_items:
             if it.checkState(1):
-                self.delete.setEnabled(True)
+                self.delete_button.setEnabled(True)
                 return
 
     def delete_marked(self):
@@ -157,6 +179,33 @@ class CheckLibraryDialog(QDialog):
                                 unicode(it.text(1))))
         self.run_the_check()
 
+    def fix_missing_covers(self):
+        tl = self.top_level_items['missing_covers']
+        child_count = tl.childCount()
+        for i in range(0, child_count):
+            item = tl.child(i);
+            id = item.data(0, Qt.UserRole).toInt()[0]
+            self.db.set_has_cover(id, False)
+
+    def fix_extra_covers(self):
+        tl = self.top_level_items['extra_covers']
+        child_count = tl.childCount()
+        for i in range(0, child_count):
+            item = tl.child(i);
+            id = item.data(0, Qt.UserRole).toInt()[0]
+            self.db.set_has_cover(id, True)
+
+    def fix_items(self):
+        for check in CHECKS:
+            attr = check[0]
+            fixable = check[3]
+            tl = self.top_level_items[attr]
+            if fixable and tl.checkState(1):
+                func = getattr(self, 'fix_' + attr, None)
+                if func is not None and callable(func):
+                    func()
+        self.run_the_check()
+
     def copy_to_clipboard(self):
         QApplication.clipboard().setText(self.text_results)
 
diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py
index b285da0006..b49330db3e 100644
--- a/src/calibre/library/check_library.py
+++ b/src/calibre/library/check_library.py
@@ -14,14 +14,25 @@ from calibre.ebooks import BOOK_EXTENSIONS
 EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS)
 NORMALS = frozenset(['metadata.opf', 'cover.jpg'])
 
-CHECKS = [('invalid_titles',    _('Invalid titles'), True),
-          ('extra_titles',      _('Extra titles'), True),
-          ('invalid_authors',   _('Invalid authors'), True),
-          ('extra_authors',     _('Extra authors'), True),
-          ('missing_formats',   _('Missing book formats'), False),
-          ('extra_formats',     _('Extra book formats'), True),
-          ('extra_files',       _('Unknown files in books'), True),
-          ('failed_folders',    _('Folders raising exception'), False)
+'''
+Checks fields:
+- name of array containing info
+- user-readable name of info
+- can be deleted (can be checked)
+- can be fixed. In this case, the name of the fix method is derived from the
+  array name
+'''
+
+CHECKS = [('invalid_titles',    _('Invalid titles'), True, False),
+          ('extra_titles',      _('Extra titles'), True, False),
+          ('invalid_authors',   _('Invalid authors'), True, False),
+          ('extra_authors',     _('Extra authors'), True, False),
+          ('missing_formats',   _('Missing book formats'), False, False),
+          ('extra_formats',     _('Extra book formats'), True, False),
+          ('extra_files',       _('Unknown files in books'), True, False),
+          ('missing_covers',    _('Missing covers in books'), False, True),
+          ('extra_covers',      _('Extra covers in books'), True, True),
+          ('failed_folders',    _('Folders raising exception'), False, False)
       ]
 
 
@@ -57,6 +68,10 @@ class CheckLibrary(object):
         self.extra_formats = []
         self.extra_files = []
 
+        self.missing_covers = []
+        self.extra_covers = []
+
+        self.failed_folders = []
 
     def dbpath(self, id):
         return self.db.path(id, index_is_id=True)
@@ -83,7 +98,7 @@ class CheckLibrary(object):
             auth_path = os.path.join(lib, auth_dir)
             # First check: author must be a directory
             if not os.path.isdir(auth_path):
-                self.invalid_authors.append((auth_dir, auth_dir))
+                self.invalid_authors.append((auth_dir, auth_dir, 0))
                 continue
 
             self.potential_authors[auth_dir] = {}
@@ -98,7 +113,7 @@ class CheckLibrary(object):
                 m = self.db_id_regexp.search(title_dir)
                 # Second check: title must have an ID and must be a directory
                 if m is None or not os.path.isdir(title_path):
-                    self.invalid_titles.append((auth_dir, db_path))
+                    self.invalid_titles.append((auth_dir, db_path, 0))
                     continue
 
                 id = m.group(1)
@@ -106,12 +121,12 @@ class CheckLibrary(object):
                 if self.is_case_sensitive:
                     if int(id) not in self.all_ids or \
                             db_path not in self.all_dbpaths:
-                        self.extra_titles.append((title_dir, db_path))
+                        self.extra_titles.append((title_dir, db_path, 0))
                         continue
                 else:
                     if int(id) not in self.all_ids or \
                             db_path.lower() not in self.all_lc_dbpaths:
-                        self.extra_titles.append((title_dir, db_path))
+                        self.extra_titles.append((title_dir, db_path, 0))
                         continue
 
                 # Record the book to check its formats
@@ -120,7 +135,7 @@ class CheckLibrary(object):
 
             # Fourth check: author directories that contain no titles
             if not found_titles:
-                self.extra_authors.append((auth_dir, auth_dir))
+                self.extra_authors.append((auth_dir, auth_dir, 0))
 
         for x in self.book_dirs:
             try:
@@ -152,17 +167,20 @@ class CheckLibrary(object):
             unknowns = frozenset(filenames-formats-NORMALS)
             # Check: any books that aren't formats or normally there?
             for u in unknowns:
-                self.extra_files.append((title_dir, os.path.join(db_path, u)))
+                self.extra_files.append((title_dir,
+                                         os.path.join(db_path, u), book_id))
 
             # Check: any book formats that should be there?
             missing = book_formats - formats
             for m in  missing:
-                self.missing_formats.append((title_dir, os.path.join(db_path, m)))
+                self.missing_formats.append((title_dir,
+                                             os.path.join(db_path, m), book_id))
 
             # Check: any book formats that shouldn't be there?
             extra = formats - book_formats - NORMALS
             for e in extra:
-                self.extra_formats.append((title_dir, os.path.join(db_path, e)))
+                self.extra_formats.append((title_dir,
+                                           os.path.join(db_path, e), book_id))
         else:
             def lc_map(fnames, fset):
                 m = {}
@@ -175,15 +193,28 @@ class CheckLibrary(object):
             unknowns = frozenset(filenames_lc-formats_lc-NORMALS)
             # Check: any books that aren't formats or normally there?
             for f in lc_map(filenames, unknowns):
-                self.extra_files.append((title_dir, os.path.join(db_path, f)))
+                self.extra_files.append((title_dir, os.path.join(db_path, f),
+                                         book_id))
 
             book_formats_lc = frozenset([f.lower() for f in book_formats])
             # Check: any book formats that should be there?
             missing = book_formats_lc - formats_lc
             for m in lc_map(book_formats, missing):
-                self.missing_formats.append((title_dir, os.path.join(db_path, m)))
+                self.missing_formats.append((title_dir,
+                                             os.path.join(db_path, m), book_id))
 
             # Check: any book formats that shouldn't be there?
             extra = formats_lc - book_formats_lc - NORMALS
             for e in lc_map(formats, extra):
-                self.extra_formats.append((title_dir, os.path.join(db_path, e)))
+                self.extra_formats.append((title_dir, os.path.join(db_path, e),
+                                           book_id))
+
+        # check cached has_cover
+        if self.db.has_cover(book_id):
+            if 'cover.jpg' not in filenames:
+                self.missing_covers.append((title_dir,
+                        os.path.join(db_path, title_dir, 'cover.jpg'), book_id))
+        else:
+            if 'cover.jpg' in filenames:
+                self.extra_covers.append((title_dir,
+                        os.path.join(db_path, title_dir, 'cover.jpg'), book_id))
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 47c575386b..a07e46577e 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -801,6 +801,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         if notify:
             self.notify('cover', [id])
 
+    def has_cover(self, id):
+        return self.data.get(id, self.FIELD_MAP['cover'], row_is_id=True)
+
+    def set_has_cover(self, id, val):
+        dval = 1 if val else 0
+        self.conn.execute('UPDATE books SET has_cover=? WHERE id=?', (dval, id,))
+        self.data.set(id, self.FIELD_MAP['cover'], val, row_is_id=True)
+
     def book_on_device(self, id):
         if callable(self.book_on_device_func):
             return self.book_on_device_func(id)

From 56e38290df6125f34ae8f6a18ae75b4bf7e7724e Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sun, 28 Nov 2010 23:41:46 +0900
Subject: [PATCH 07/12] recipes: fix minor bugs, add yomiuri news

  - remove wrong #!(hash-bang)
  - add yomiuri online news
---
 resources/recipes/endgadget_ja.recipe        |  2 -
 resources/recipes/jijinews.recipe            |  2 -
 resources/recipes/mainichi.recipe            |  2 -
 resources/recipes/nikkei_free.recipe         |  6 +-
 resources/recipes/nikkei_sub.recipe          |  6 +-
 resources/recipes/nikkei_sub_economy.recipe  |  2 -
 resources/recipes/nikkei_sub_industry.recipe |  1 -
 resources/recipes/nikkei_sub_life.recipe     |  2 -
 resources/recipes/nikkei_sub_main.recipe     |  2 -
 resources/recipes/nikkei_sub_sports.recipe   |  1 -
 resources/recipes/yomiuri.recipe             | 66 ++++++++++++++++++++
 11 files changed, 71 insertions(+), 21 deletions(-)
 create mode 100644 resources/recipes/yomiuri.recipe

diff --git a/resources/recipes/endgadget_ja.recipe b/resources/recipes/endgadget_ja.recipe
index 443a85905d..891e6720a5 100644
--- a/resources/recipes/endgadget_ja.recipe
+++ b/resources/recipes/endgadget_ja.recipe
@@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe
index fe52e76aaf..4f768ce7ee 100644
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
diff --git a/resources/recipes/mainichi.recipe b/resources/recipes/mainichi.recipe
index 47dc7d0ebc..2a44fa0980 100644
--- a/resources/recipes/mainichi.recipe
+++ b/resources/recipes/mainichi.recipe
@@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
diff --git a/resources/recipes/nikkei_free.recipe b/resources/recipes/nikkei_free.recipe
index d84aaa279b..adc596104b 100644
--- a/resources/recipes/nikkei_free.recipe
+++ b/resources/recipes/nikkei_free.recipe
@@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
@@ -9,9 +7,9 @@ www.nikkei.com
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class NikkeiNet(BasicNewsRecipe):
-    title          = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)'
+    title          = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free, MAX)'
     __author__     = 'Hiroshi Miura'
-    description    = 'News and current market affairs from Japan'
+    description    = 'News and current market affairs from Japan, no subscription and getting max feed.'
     cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
     masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
     oldest_article = 2
diff --git a/resources/recipes/nikkei_sub.recipe b/resources/recipes/nikkei_sub.recipe
index 95b0017339..18f324009a 100644
--- a/resources/recipes/nikkei_sub.recipe
+++ b/resources/recipes/nikkei_sub.recipe
@@ -5,12 +5,12 @@ from calibre.ptempfile import PersistentTemporaryFile
 
 
 class NikkeiNet_subscription(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248'
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)'
     __author__      = 'Hiroshi Miura'
-    description     = 'News and current market affairs from Japan'
+    description     = 'News and current market affairs from Japan, gather MAX articles'
     needs_subscription = True
     oldest_article  = 2
-    max_articles_per_feed = 20
+    max_articles_per_feed = 10
     language        = 'ja'
     remove_javascript = False
     temp_files = []
diff --git a/resources/recipes/nikkei_sub_economy.recipe b/resources/recipes/nikkei_sub_economy.recipe
index d762f505d1..2dd8f1add8 100644
--- a/resources/recipes/nikkei_sub_economy.recipe
+++ b/resources/recipes/nikkei_sub_economy.recipe
@@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
diff --git a/resources/recipes/nikkei_sub_industry.recipe b/resources/recipes/nikkei_sub_industry.recipe
index da04bbb5f3..81e86767d0 100644
--- a/resources/recipes/nikkei_sub_industry.recipe
+++ b/resources/recipes/nikkei_sub_industry.recipe
@@ -1,4 +1,3 @@
-#!/usr/bin/env  python
 
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
diff --git a/resources/recipes/nikkei_sub_life.recipe b/resources/recipes/nikkei_sub_life.recipe
index 2da5b13834..1bfa08a55f 100644
--- a/resources/recipes/nikkei_sub_life.recipe
+++ b/resources/recipes/nikkei_sub_life.recipe
@@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe
index 37fc8964c4..485d2f32c0 100644
--- a/resources/recipes/nikkei_sub_main.recipe
+++ b/resources/recipes/nikkei_sub_main.recipe
@@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
diff --git a/resources/recipes/nikkei_sub_sports.recipe b/resources/recipes/nikkei_sub_sports.recipe
index 6e5a1c6bb2..644b0aa252 100644
--- a/resources/recipes/nikkei_sub_sports.recipe
+++ b/resources/recipes/nikkei_sub_sports.recipe
@@ -1,4 +1,3 @@
-#!/usr/bin/env  python
 
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe
new file mode 100644
index 0000000000..6335b99e32
--- /dev/null
+++ b/resources/recipes/yomiuri.recipe
@@ -0,0 +1,66 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.yomiuri.co.jp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class YOLNews(BasicNewsRecipe):
+    title          = u'YOMIURI ONLINE'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 1
+    max_articles_per_feed = 50
+    description    = 'Japanese traditional newspaper Yomiuri Online News'
+    publisher      = 'Yomiuri Online News'
+    category       = 'news, japan'
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    index          = 'http://www.yomiuri.co.jp/latestnews/'
+    remove_javascript = True
+
+    remove_tags_before = {'class':"article-def"}
+    remove_tags = [{'class':"RelatedArticle"},
+                   {'class:"sbtns"}
+                    ]
+    remove_tags_after = {'class':"date-def"}
+
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        topstories = soup.find('ul',attrs={'class':'list-def'})
+        if topstories:
+           newsarticles = []
+           for itt in topstories.findAll('li'):
+                itema = itt.find('a',href=True)
+                if itema:
+                    itd1 = itema.findNextSibling(text = True)
+                    itd2 = itd1.findNextSibling(text = True)
+                    itd3 = itd2.findNextSibling(text = True)
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''.join([itd1, itd2, itd3])
+                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
+                                     ,'description':''
+                                    })
+           feeds.append(('News', newsarticles))
+
+        return feeds
+

From 2b2a8a1edcc789010e928ae29b3d079d79de3ec6 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sun, 28 Nov 2010 23:43:35 +0900
Subject: [PATCH 08/12] recipes: fix typo

---
 resources/recipes/yomiuri.recipe | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe
index 6335b99e32..39a085bf83 100644
--- a/resources/recipes/yomiuri.recipe
+++ b/resources/recipes/yomiuri.recipe
@@ -22,7 +22,7 @@ class YOLNews(BasicNewsRecipe):
 
     remove_tags_before = {'class':"article-def"}
     remove_tags = [{'class':"RelatedArticle"},
-                   {'class:"sbtns"}
+                   {'class':"sbtns"}
                     ]
     remove_tags_after = {'class':"date-def"}
 

From 5f5c41e495a50a30bb7cec0055decb0eb1156305 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sun, 28 Nov 2010 23:53:46 +0900
Subject: [PATCH 09/12] recipes: add icons and yomiuri online variant

---
 resources/images/news/cnetjapan_digital.png | Bin 0 -> 892 bytes
 resources/images/news/cnetjapan_release.png | Bin 0 -> 892 bytes
 resources/images/news/yomiuri.png           | Bin 0 -> 660 bytes
 resources/recipes/yomiuri.recipe            |   9 +--
 resources/recipes/yomiuri_world.recipe      |  63 ++++++++++++++++++++
 5 files changed, 66 insertions(+), 6 deletions(-)
 create mode 100644 resources/images/news/cnetjapan_digital.png
 create mode 100644 resources/images/news/cnetjapan_release.png
 create mode 100644 resources/images/news/yomiuri.png
 create mode 100644 resources/recipes/yomiuri_world.recipe

diff --git a/resources/images/news/cnetjapan_digital.png b/resources/images/news/cnetjapan_digital.png
new file mode 100644
index 0000000000000000000000000000000000000000..9a0dcc8f7fa7645db5f97ef0b5e2978133e05732
GIT binary patch
literal 892
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?_rNx;TbdoKBs5HhW6A$g%qG_pTQj9XoPS
zgUd~N@st@t=De*<J61@C=Y_Zkv|JKnSsAd9B}R1Dr6BQlSt2Y8176GzSj%|u(V`2j
z3m%JW)g4KBHh1Rt*ZaQTyL@!D;$^$p#c~$UZP@1(i~MhVGk2asvHQtCj-Ge*-tXCT
z%ieFw|HbR?=I?&A^xCT`(+huIp83vE%p$4!*<*i8p65%MmETHyZ)Rs#7kb{7-yd=6
z?<t0vcb9j5vEeQ5GG$xSe#9|nVwvNMNTateUrtKRt<3I<uRb;3=Fhs4U6a!K+)BDW
z<{H?t-afV<aY5dKGzaZ18e1}a1h#M=m;J=tz!!XZvcmIDk(@g$*6Im|*)Cj7G2~)>
zacN=G@pZC`V(gs?9`1fEy;#%tp}*Ix>$`Wk&56!=&G>S*>#pqU6K6<iX`k4me9zKv
z;`5%<2JG|oCmDxqcHDQ+eEa>%ir>>}4qV!Kd6U%X$;Qn!IYp}f_bzlv?~~pAUA{rt
zJu!*>^=(atyg!Ri&9Y#8@Nc6qb6I?^7PF}Y?}fTwQok29zZPMbvi?zH+gkJgi}qH2
z=kl)+EuDBoKbpJ#(w^-OYu{YzZCk}#v*OXdy$mMe(Li*^OP;yFZHd*A^ux3DuN0N9
z+n3H?wZgID6OW<PbhR6$rCn>>?VH@oX5I){wfOzj_lxQS|M!^K8!q~{E;65oOVqVR
zpkU&$wn<kwYGq`n3rZ_qZHs&P(!y!gm2|Fz#F8J+Icm)&@t3Pk;b`Sra_VyUynPi$
zjV2D)xi?PNp3ePKi|K`i`SXX#Yb}-pu&}fT2%Hcs68yvv*)UIhmt$Vk!#4+v?`Eu9
zxm>7D=tq<S7ZcwGCNl@A78QX>3fJd+cw3`+^8Y@;2bVf#Zap-8>51olH<DPVvfq2A
zRL=eIm8f=k*!BP1HmfF|O-@zb4NSeNC9V-ADTyViR>?)FK#IZ0z`#=1&`8(FG{nf*
z$^?ikbPdd{3=Fup)uf?l$jwj5OshoGU~FM!XaLc0!9Hs(Py>UftDnm{r-UW|`oMrT

literal 0
HcmV?d00001

diff --git a/resources/images/news/cnetjapan_release.png b/resources/images/news/cnetjapan_release.png
new file mode 100644
index 0000000000000000000000000000000000000000..9a0dcc8f7fa7645db5f97ef0b5e2978133e05732
GIT binary patch
literal 892
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?_rNx;TbdoKBs5HhW6A$g%qG_pTQj9XoPS
zgUd~N@st@t=De*<J61@C=Y_Zkv|JKnSsAd9B}R1Dr6BQlSt2Y8176GzSj%|u(V`2j
z3m%JW)g4KBHh1Rt*ZaQTyL@!D;$^$p#c~$UZP@1(i~MhVGk2asvHQtCj-Ge*-tXCT
z%ieFw|HbR?=I?&A^xCT`(+huIp83vE%p$4!*<*i8p65%MmETHyZ)Rs#7kb{7-yd=6
z?<t0vcb9j5vEeQ5GG$xSe#9|nVwvNMNTateUrtKRt<3I<uRb;3=Fhs4U6a!K+)BDW
z<{H?t-afV<aY5dKGzaZ18e1}a1h#M=m;J=tz!!XZvcmIDk(@g$*6Im|*)Cj7G2~)>
zacN=G@pZC`V(gs?9`1fEy;#%tp}*Ix>$`Wk&56!=&G>S*>#pqU6K6<iX`k4me9zKv
z;`5%<2JG|oCmDxqcHDQ+eEa>%ir>>}4qV!Kd6U%X$;Qn!IYp}f_bzlv?~~pAUA{rt
zJu!*>^=(atyg!Ri&9Y#8@Nc6qb6I?^7PF}Y?}fTwQok29zZPMbvi?zH+gkJgi}qH2
z=kl)+EuDBoKbpJ#(w^-OYu{YzZCk}#v*OXdy$mMe(Li*^OP;yFZHd*A^ux3DuN0N9
z+n3H?wZgID6OW<PbhR6$rCn>>?VH@oX5I){wfOzj_lxQS|M!^K8!q~{E;65oOVqVR
zpkU&$wn<kwYGq`n3rZ_qZHs&P(!y!gm2|Fz#F8J+Icm)&@t3Pk;b`Sra_VyUynPi$
zjV2D)xi?PNp3ePKi|K`i`SXX#Yb}-pu&}fT2%Hcs68yvv*)UIhmt$Vk!#4+v?`Eu9
zxm>7D=tq<S7ZcwGCNl@A78QX>3fJd+cw3`+^8Y@;2bVf#Zap-8>51olH<DPVvfq2A
zRL=eIm8f=k*!BP1HmfF|O-@zb4NSeNC9V-ADTyViR>?)FK#IZ0z`#=1&`8(FG{nf*
z$^?ikbPdd{3=Fup)uf?l$jwj5OshoGU~FM!XaLc0!9Hs(Py>UftDnm{r-UW|`oMrT

literal 0
HcmV?d00001

diff --git a/resources/images/news/yomiuri.png b/resources/images/news/yomiuri.png
new file mode 100644
index 0000000000000000000000000000000000000000..4a197f888f076f9801d3fd8cfd3fb2479af6520a
GIT binary patch
literal 660
zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdzwj^(N7l!{JxM1({$v_d#0*}aI
zAngIhZYQ(tK!Rljj_E*J0gT&!&6&%<z_{1b#W5t}@Z0d+d50VX<{0NG1tyhqWc}c{
zvuyk88m>QHS-aIkQ~omtsC%VrC@<Y)&MH&BkG;-qNrKjfobzwqyqR~VLH1!^!8^}?
z0v`lE=(Fs|tU8pnbyr+M&id6owIU14KhN7XBe(hU?grVDEZe3F>l%9XX)LZ0NMiU=
z<9D8k!J*7a)N#w3*qy?^XY`4OC-F`H&(Xx$apdftAoXw6lRvN8zmI=bvrpX7Wnnkg
zJU_$`aeL9N+$Qb+nu}!=-g|v`Cu_oVua9emR+O4qXm05N7p3t0(@k&g{PFeaUi&JF
zaa-7o6IatW={O7cWOerHzF@Mvd*Gvy=UL|49t`|<G%Ul`t=?;9d1C(Nrwb-CUtLwX
z-nHSW!FApBVN6{Y4%cXRycW90(#vu}G&A&jOPSQFzsgaEI%`uR*%&Hny?P!`U8c_P
z)_mDB2g#=i0@)Q-aS2vzadi#--lsZOx%`m{biU=u&_1Ui&+@R`Y~QA@RcmvPtV?e#
z6kPd4CfDgW>$X`;rF%ly+c@T)33b~kkg$5!NxSz>aV>HZdQ3h~YCE1T-o;^?+rqdp
z^k$Cr%9IECTKibVCDIm`vR2p3xfgvv|Nr)PSK9A})dOQxwZt`|BqgyV)hf9t6-Y4{
z85kPq8XD>vS%erFTbY;uk+y-Qm4QLK$Jd1@8glbfGSe!tXfT9mSfZP>3#fs?)78&q
Iol`;+00UkKJpcdz

literal 0
HcmV?d00001

diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe
index 39a085bf83..b3df1b58aa 100644
--- a/resources/recipes/yomiuri.recipe
+++ b/resources/recipes/yomiuri.recipe
@@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 import re
 
 class YOLNews(BasicNewsRecipe):
-    title          = u'YOMIURI ONLINE'
+    title          = u'YOMIURI ONLINE(Latest)'
     __author__     = 'Hiroshi Miura'
     oldest_article = 1
     max_articles_per_feed = 50
@@ -19,6 +19,7 @@ class YOLNews(BasicNewsRecipe):
     encoding       = 'Shift_JIS'
     index          = 'http://www.yomiuri.co.jp/latestnews/'
     remove_javascript = True
+    masthead_title = u'YOMIURI ONLINE'
 
     remove_tags_before = {'class':"article-def"}
     remove_tags = [{'class':"RelatedArticle"},
@@ -27,9 +28,7 @@ class YOLNews(BasicNewsRecipe):
     remove_tags_after = {'class':"date-def"}
 
     def parse_feeds(self):
-
         feeds = BasicNewsRecipe.parse_feeds(self)
-
         for curfeed in feeds:
             delList = []
             for a,curarticle in enumerate(curfeed.articles):
@@ -39,7 +38,6 @@ class YOLNews(BasicNewsRecipe):
                 for d in delList:
                     index = curfeed.articles.index(d)
                     curfeed.articles[index:index+1] = []
-
         return feeds
 
     def parse_index(self):
@@ -60,7 +58,6 @@ class YOLNews(BasicNewsRecipe):
                                      ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
                                      ,'description':''
                                     })
-           feeds.append(('News', newsarticles))
-
+           feeds.append(('latest', newsarticles))
         return feeds
 
diff --git a/resources/recipes/yomiuri_world.recipe b/resources/recipes/yomiuri_world.recipe
new file mode 100644
index 0000000000..0146ffa330
--- /dev/null
+++ b/resources/recipes/yomiuri_world.recipe
@@ -0,0 +1,63 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.yomiuri.co.jp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class YOLNews(BasicNewsRecipe):
+    title          = u'YOMIURI ONLINE(World)'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    max_articles_per_feed = 50
+    description    = 'Japanese traditional newspaper Yomiuri Online News/world news'
+    publisher      = 'Yomiuri Online News'
+    category       = 'news, japan'
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    index          = 'http://www.yomiuri.co.jp/world/'
+    remove_javascript = True
+    masthead_title = u"YOMIURI ONLINE"
+
+    remove_tags_before = {'class':"article-def"}
+    remove_tags = [{'class':"RelatedArticle"},
+                   {'class':"sbtns"}
+                    ]
+    remove_tags_after = {'class':"date-def"}
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+        return feeds
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        topstories = soup.find('ul',attrs={'class':'list-def'})
+        if topstories:
+           newsarticles = []
+           for itt in topstories.findAll('li'):
+                itema = itt.find('a',href=True)
+                if itema:
+                    itd1 = itema.findNextSibling(text = True)
+                    itd2 = itd1.findNextSibling(text = True)
+                    itd3 = itd2.findNextSibling(text = True)
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''.join([itd1, itd2, itd3])
+                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
+                                     ,'description':''
+                                    })
+           feeds.append(('World', newsarticles))
+        return feeds
+

From 21969e157cf02059bcab8a35849321d720007602 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sun, 28 Nov 2010 23:58:20 +0900
Subject: [PATCH 10/12] recipe: yomiuri_world: fix for fetching date field

---
 resources/recipes/yomiuri_world.recipe | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/resources/recipes/yomiuri_world.recipe b/resources/recipes/yomiuri_world.recipe
index 0146ffa330..eae5a2a40a 100644
--- a/resources/recipes/yomiuri_world.recipe
+++ b/resources/recipes/yomiuri_world.recipe
@@ -50,11 +50,9 @@ class YOLNews(BasicNewsRecipe):
                 itema = itt.find('a',href=True)
                 if itema:
                     itd1 = itema.findNextSibling(text = True)
-                    itd2 = itd1.findNextSibling(text = True)
-                    itd3 = itd2.findNextSibling(text = True)
                     newsarticles.append({
                                       'title'      :itema.string
-                                     ,'date'       :''.join([itd1, itd2, itd3])
+                                     ,'date'       :''.join([itd1])
                                      ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
                                      ,'description':''
                                     })

From af98ebf24decbf35966eb3c0f69da418f4acb824 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 28 Nov 2010 12:36:33 -0700
Subject: [PATCH 11/12] Animal Politico by leamsi

---
 resources/recipes/animal_politico.recipe | 111 +++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 resources/recipes/animal_politico.recipe

diff --git a/resources/recipes/animal_politico.recipe b/resources/recipes/animal_politico.recipe
new file mode 100644
index 0000000000..f48587ea94
--- /dev/null
+++ b/resources/recipes/animal_politico.recipe
@@ -0,0 +1,111 @@
+#!/usr/bin/python
+# encoding: utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1290663986(BasicNewsRecipe):
+    title          = u'Animal Pol\u00EDtico'
+    publisher      = u'Animal Pol\u00EDtico'
+    category       = u'News, Mexico'
+    description    = u'Noticias Pol\u00EDticas'
+    __author__     = 'leamsi'
+    masthead_url   = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    language       = 'es'
+
+    #feeds          = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
+
+    remove_tags_before = dict(name='div', id='main')
+    remove_tags = [dict(name='div', attrs={'class':'fb-like-button'})]
+    keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}),
+                      dict(name='div', attrs={'class':'entry-content'})]
+    remove_javascript = True
+    INDEX = 'http://www.animalpolitico.com/'
+
+    def generic_parse(self, soup):
+        articles = []
+        for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'):
+            article_url  = entry.a['href'] + '?print=yes'
+            article_title= entry.find('h3', 'entry-title')
+            article_title= self.tag_to_string(article_title)
+            article_date = entry.find('span', 'the-time')
+            article_date = self.tag_to_string(article_date)
+            article_desc = self.tag_to_string(entry.find('p'))
+
+            #print 'Article:',article_title, article_date,article_url
+            #print entry['class']
+
+            articles.append({'title' : article_title,
+                'date' : article_date,
+                'description' : article_desc,
+                'url'  : article_url})
+            # Avoid including the multimedia stuff.
+            if entry['class'].find('last') != -1:
+                break
+
+        return articles
+
+    def plumaje_parse(self, soup):
+        articles = []
+        blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1)
+        for entry in blogs_soup.findAll('li'):
+            article_title = entry.p
+            article_url   = article_title.a['href'] + '?print=yes'
+            article_date  = article_title.nextSibling
+            article_title = self.tag_to_string(article_title)
+            article_date  = self.tag_to_string(article_date).replace(u'Last Updated: ', '')
+            article_desc  = self.tag_to_string(entry.find('h4'))
+
+            #print 'Article:',article_title, article_date,article_url
+            articles.append({'title' : article_title,
+                'date' : article_date,
+                'description' : article_desc,
+                'url'  : article_url})
+
+        return articles
+
+    def boca_parse(self, soup):
+        articles = []
+        for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'):
+            article_title= entry.find('h2', 'entry-title')
+            article_url  = article_title.a['href'] + '?print=yes'
+            article_title= self.tag_to_string(article_title)
+            article_date = entry.find('span', 'entry-date')
+            article_date = self.tag_to_string(article_date)
+            article_desc = self.tag_to_string(entry.find('div', 'entry-content'))
+
+            #print 'Article:',article_title, article_date,article_url
+            #print entry['class']
+
+            articles.append({'title' : article_title,
+                'date' : article_date,
+                'description' : article_desc,
+                'url'  : article_url})
+            # Avoid including the multimedia stuff.
+            if entry['class'].find('last') != -1:
+                break
+
+        return articles
+
+
+
+
+    def parse_index(self):
+        gobierno_soup = self.index_to_soup(self.INDEX+'gobierno/')
+        congreso_soup = self.index_to_soup(self.INDEX+'congreso/')
+        seguridad_soup = self.index_to_soup(self.INDEX+'seguridad/')
+        comunidad_soup = self.index_to_soup(self.INDEX+'comunidad/')
+        plumaje_soup = self.index_to_soup(self.INDEX+'plumaje/')
+        la_boca_del_lobo_soup = self.index_to_soup(self.INDEX+'category/la-boca-del-lobo/')
+
+        gobierno_articles = self.generic_parse(gobierno_soup)
+        congreso_articles = self.generic_parse(congreso_soup)
+        seguridad_articles = self.generic_parse(seguridad_soup)
+        comunidad_articles = self.generic_parse(comunidad_soup)
+        plumaje_articles = self.plumaje_parse(plumaje_soup)
+        la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup)
+
+
+        return [ (u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles),
+             (u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ]

From f6d70a1cd27bda0270b6b99e346eb0723bed999c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 28 Nov 2010 14:54:36 -0700
Subject: [PATCH 12/12] Nook Color profile: Reduce screen height to 900px

---
 src/calibre/customize/profiles.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
index 2318c6724e..74a79689d9 100644
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@@ -683,8 +683,8 @@ class NookColorOutput(NookOutput):
     short_name = 'nook_color'
     description = _('This profile is intended for the B&N Nook Color.')
 
-    screen_size               = (600, 980)
-    comic_screen_size         = (584, 980)
+    screen_size               = (600, 900)
+    comic_screen_size         = (584, 900)
     dpi                       = 169
 
 class BambookOutput(OutputProfile):