From 38d1375974023ace56817bd8e53e245ecf3621a5 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 27 Nov 2010 01:08:09 +0900 Subject: [PATCH 01/12] fix charcode and minor bugs --- resources/recipes/jijinews.recipe | 11 ++++++++++- resources/recipes/msnsankei.recipe | 7 +++++-- resources/recipes/nikkei_sub_main.recipe | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index f74864365d..4af242063e 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -17,10 +17,19 @@ class JijiDotCom(BasicNewsRecipe): encoding = 'utf-8' oldest_article = 6 max_articles_per_feed = 100 + encoding = 'EUC_JP' language = 'ja' - cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' + top_url = 'http://www.jiji.com/' feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] remove_tags_after = dict(id="ad_google") + def get_cover_url(self): + cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' + soup = self.index_to_soup(self.top_url) + cover_item = soup.find('div', attrs={'class':'top-pad-photos'}) + if cover_item: + cover_url = self.top_url + cover_item.img['src'] + return cover_url + diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe index 4c79771945..8c78ccd9e9 100644 --- a/resources/recipes/msnsankei.recipe +++ b/resources/recipes/msnsankei.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' @@ -16,9 +15,13 @@ class MSNSankeiNewsProduct(BasicNewsRecipe): max_articles_per_feed = 100 encoding = 'Shift_JIS' language = 'ja' + cover_url = 'http://sankei.jp.msn.com/images/common/sankeShinbunLogo.jpg' + masthead_url = 'http://sankei.jp.msn.com/images/common/sankeiNewsLogo.gif' feeds = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')] remove_tags_before = dict(id="__r_article_title__") remove_tags_after = dict(id="ajax_release_news") - remove_tags = [{'class':"parent chromeCustom6G"}] + remove_tags = [{'class':"parent chromeCustom6G"}, + {'class':"RelatedImg"} + ] diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe index 142edf624d..37fc8964c4 100644 --- a/resources/recipes/nikkei_sub_main.recipe +++ b/resources/recipes/nikkei_sub_main.recipe @@ -30,6 +30,9 @@ class NikkeiNet_sub_main(BasicNewsRecipe): {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, {'class':"cmn-article_keyword cmn-clearfix"}, {'class':"cmn-print_headline cmn-clearfix"}, + {'class':"cmn-article_list"}, + {'class':"cmn-dashedline"}, + {'class':"cmn-hide"}, ] remove_tags_after = {'class':"cmn-pr_list"} From 176010dc03fa5bb6c4ca30e179c3a1a4d49e73a5 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 27 Nov 2010 09:53:35 +0900 Subject: [PATCH 02/12] recipe: jijinews: fix indent error --- resources/recipes/jijinews.recipe | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index 4af242063e..a62b4db739 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -19,17 +19,16 @@ class JijiDotCom(BasicNewsRecipe): max_articles_per_feed = 100 encoding = 'EUC_JP' language = 'ja' - masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' - top_url = 'http://www.jiji.com/' + masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' + top_url = 'http://www.jiji.com/' feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] remove_tags_after = dict(id="ad_google") - def get_cover_url(self): - cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' - soup = self.index_to_soup(self.top_url) - cover_item = soup.find('div', attrs={'class':'top-pad-photos'}) - if cover_item: - cover_url = self.top_url + cover_item.img['src'] - return cover_url - + def get_cover_url(self): + cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' + soup = self.index_to_soup(self.top_url) + cover_item = soup.find('div', attrs={'class':'top-pad-photos'}) + if cover_item: + cover_url = self.top_url + cover_item.img['src'] + return cover_url From 3a8eae0fba90b4c3ee040e47c7f64e5f2ed8c6b2 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 27 Nov 2010 10:18:43 +0900 Subject: [PATCH 03/12] recipe: some fix in japanese recipe jijinews: charset fix msnsankei: removal tag fix --- resources/recipes/jijinews.recipe | 2 +- resources/recipes/msnsankei.recipe | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index a62b4db739..98e7a180d1 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -17,7 +17,7 @@ class JijiDotCom(BasicNewsRecipe): encoding = 'utf-8' oldest_article = 6 max_articles_per_feed = 100 - encoding = 'EUC_JP' + encoding = 'euc_jisx0213' language = 'ja' masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' top_url = 'http://www.jiji.com/' diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe index 8c78ccd9e9..ae195559d5 100644 --- a/resources/recipes/msnsankei.recipe +++ b/resources/recipes/msnsankei.recipe @@ -23,5 +23,5 @@ class MSNSankeiNewsProduct(BasicNewsRecipe): remove_tags_before = dict(id="__r_article_title__") remove_tags_after = dict(id="ajax_release_news") remove_tags = [{'class':"parent chromeCustom6G"}, - {'class':"RelatedImg"} + dict(id="RelatedImg") ] From 064bfaa7f9eb61f298895c6a30c6cd90d5b0c87f Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 27 Nov 2010 12:04:00 +0900 Subject: [PATCH 04/12] recipe: japanese: fix minor bugs - jijinews: fix encoding - mainichi, nikkei: add icons --- resources/images/news/mainichi.png | Bin 0 -> 953 bytes resources/images/news/mainichi_it_news.png | Bin 0 -> 953 bytes .../{nikkei_sub_industory.png => nikkei_sub.png} | Bin resources/images/news/nikkei_sub_industry.png | Bin 0 -> 948 bytes resources/recipes/jijinews.recipe | 1 - 5 files changed, 1 deletion(-) create mode 100644 resources/images/news/mainichi.png create mode 100644 resources/images/news/mainichi_it_news.png rename resources/images/news/{nikkei_sub_industory.png => nikkei_sub.png} (100%) create mode 100644 resources/images/news/nikkei_sub_industry.png diff --git a/resources/images/news/mainichi.png b/resources/images/news/mainichi.png new file mode 100644 index 0000000000000000000000000000000000000000..9f8e8f319fb20ec4540c942504efedf215fc2479 GIT binary patch literal 953 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>V;x;TbdoUWY|-4PlpalAhJ?bi33pU=5? zMKX6+rLw7;#>EqIT}7*=%Ps4Cv2;QEyc`3g7e|{`EKuoU(mdkMyn3QR*DKFU>~Gz| zL@Zn973K8JGk(1D?dI=x*=yzc%vvw}|M~mh{qOrbe5?O$;GXF;qh}tYd(*ZD9RK7m zEte5lk>!1PBctmwNgmaB8{0$e_qSF4`)d`a@_(jy!^VIVHv1_R`;^Y_);Qm2a{6Rc ztXiJ_#zG0c*-Z~8DfWr+XE9GyuXu5d(eB5xv-w+l4n{0ss5y*-n(MwL@#qxy{OjBoWfRlbSu4cwJ} zbHl0B_vzCset)pZx7j&oWwn=2{nbb7dIg;)ru=g0mYuEqZ_W1`^=mx+qo)*G_83g4 z`IUU?9dqc0g3letH(riao42;0X4&b9@2!1of**dKAEdWbOzM~Yr#Tt7uW)<5`1mMg z=I`xC-rNun`Z-BD|AVA_w?_MQen#)NqZ8k5eLn)HcO@QFzEwAH#x|M?W&x0)rXrQzlN8+g!aq`St1N z7C%d8|G5yQc7NrkizRa}zhC?Caj{QWm2yBPlh*pcDwxU zV+rS+cVk|`?XPmz`840fNN;$*bE-{&_mSHCt2;IXeCaM>ZEjq0?b;S2X0f=RDU9bE zHwMl>AR6+@c1vTkdi0rc9Ozamx% P)WG2B>gTe~DWM4fFNU-h literal 0 HcmV?d00001 diff --git a/resources/images/news/mainichi_it_news.png b/resources/images/news/mainichi_it_news.png new file mode 100644 index 0000000000000000000000000000000000000000..9f8e8f319fb20ec4540c942504efedf215fc2479 GIT binary patch literal 953 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>V;x;TbdoUWY|-4PlpalAhJ?bi33pU=5? zMKX6+rLw7;#>EqIT}7*=%Ps4Cv2;QEyc`3g7e|{`EKuoU(mdkMyn3QR*DKFU>~Gz| zL@Zn973K8JGk(1D?dI=x*=yzc%vvw}|M~mh{qOrbe5?O$;GXF;qh}tYd(*ZD9RK7m zEte5lk>!1PBctmwNgmaB8{0$e_qSF4`)d`a@_(jy!^VIVHv1_R`;^Y_);Qm2a{6Rc ztXiJ_#zG0c*-Z~8DfWr+XE9GyuXu5d(eB5xv-w+l4n{0ss5y*-n(MwL@#qxy{OjBoWfRlbSu4cwJ} zbHl0B_vzCset)pZx7j&oWwn=2{nbb7dIg;)ru=g0mYuEqZ_W1`^=mx+qo)*G_83g4 z`IUU?9dqc0g3letH(riao42;0X4&b9@2!1of**dKAEdWbOzM~Yr#Tt7uW)<5`1mMg z=I`xC-rNun`Z-BD|AVA_w?_MQen#)NqZ8k5eLn)HcO@QFzEwAH#x|M?W&x0)rXrQzlN8+g!aq`St1N z7C%d8|G5yQc7NrkizRa}zhC?Caj{QWm2yBPlh*pcDwxU zV+rS+cVk|`?XPmz`840fNN;$*bE-{&_mSHCt2;IXeCaM>ZEjq0?b;S2X0f=RDU9bE zHwMl>AR6+@c1vTkdi0rc9Ozamx% P)WG2B>gTe~DWM4fFNU-h literal 0 HcmV?d00001 diff --git a/resources/images/news/nikkei_sub_industory.png b/resources/images/news/nikkei_sub.png similarity index 100% rename from resources/images/news/nikkei_sub_industory.png rename to resources/images/news/nikkei_sub.png diff --git a/resources/images/news/nikkei_sub_industry.png b/resources/images/news/nikkei_sub_industry.png new file mode 100644 index 0000000000000000000000000000000000000000..308f4b3085bafd5c8aabf5fcc5196da3ed2d39bb GIT binary patch literal 948 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?|#Qx;TbdoX(x>oe`5NbF}_`?Nhax&jQyz zdn%Hiv2^LytJWG*r5riioPsX7?iRe#Yor~$O2b5N%S*R{hgty_YhA8zG?@gV43}NidsXG`|@pjZ5JyqUG&hi zCO)ju^BLFU@3)THKF|obGjns>=E}(`jY~8=g)6>S$OjvTraiZvrhRg|v|~JThV+LS zx!o@h8LvIeT*-WXa?OWJ40*nfuB#}YFZ*=vv~5Z5<(pqSB+PhCdxNEZ+cR|io*UK^ z))+n6=2~L$4~Mpm%Xe2A-EUjR&G#lxnM2ixSHu53qsBURi=~}&EwfK#HM_+MT@h{n zww?LIz1Mr6{EB)Pw^RTAsipn0a#aq?cPE!}EH_#B(kgwEWpmr|jH)h^kR>mz5@t$q zcl*Ao^ZHrlA)(18x9wt!+p?uRD~#RJm-|iPn$@Vn>A1|m`u)d@{YPIUh~JMb-MMYU zG5-B#8EZTl#Qt}A2=$9??s{SWYxAqqQgh^9K0I>a+wRx%{x51u<^x6y-$1*|5L#qCE)St_M6`c4cGhLCcJXuJuM((H{;>diR_W9j_mmxIMaaV z<6X8u<3k)wCm#q)Zo9Pb`0dKCe`mird-G0Ir^UBx^Id%9AJrSSc5m5r;gG|_^-H7K zbpFadUhi4r8&Dd&NP{WTb%m&juIf^8U(IuK|E9Fgntg|T{hDKQ{uhY`q*UwtDxdMX zU{}XEqe~TB0?$o=F&O6@cSe40df7U^v)wayKDzY#QD}hS3bk#zH@2U4QsK^cWmV;E zZg|cWsPx#e1D@Lz=gHfCwGDhBbd+fdn}(gOwa1JWiLfI#COYn$8L1}Z&Y)&K{UuAl zmb0gdcSR|5+APT_di_Z{W0#A*IV=)1N=*K52a Date: Sat, 27 Nov 2010 14:32:01 +0900 Subject: [PATCH 05/12] recipes: add new cnet japan feed - some removal of tags - skip ad pages --- resources/recipes/cnetjapan.recipe | 23 +++++++++- resources/recipes/cnetjapan_digital.recipe | 49 ++++++++++++++++++++++ resources/recipes/cnetjapan_release.recipe | 48 +++++++++++++++++++++ 3 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 resources/recipes/cnetjapan_digital.recipe create mode 100644 resources/recipes/cnetjapan_release.recipe diff --git a/resources/recipes/cnetjapan.recipe b/resources/recipes/cnetjapan.recipe index e0178c1ff2..1058b90401 100644 --- a/resources/recipes/cnetjapan.recipe +++ b/resources/recipes/cnetjapan.recipe @@ -7,7 +7,9 @@ class CNetJapan(BasicNewsRecipe): max_articles_per_feed = 30 __author__ = 'Hiroshi Miura' - feeds = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')] + feeds = [(u'CNet News', u'http://feed.japan.cnet.com/rss/index.rdf'), + (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf') + ] language = 'ja' encoding = 'Shift_JIS' remove_javascript = True @@ -21,12 +23,29 @@ class CNetJapan(BasicNewsRecipe): lambda match: ''), ] - remove_tags_before = dict(name="h2") + remove_tags_before = dict(id="contents_l") remove_tags = [ {'class':"social_bkm_share"}, {'class':"social_bkm_print"}, {'class':"block20 clearfix"}, dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"}, + {'class':"tag_right"} ] remove_tags_after = {'class':"block20"} + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/cnetjapan_digital.recipe b/resources/recipes/cnetjapan_digital.recipe new file mode 100644 index 0000000000..9028126af2 --- /dev/null +++ b/resources/recipes/cnetjapan_digital.recipe @@ -0,0 +1,49 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapanDigital(BasicNewsRecipe): + title = u'CNET Japan Digital' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'CNet digital',u'http://feed.japan.cnet.com/rss/digital/index.rdf') ] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(id="contents_l") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"}, + {'class':"tag_right"} + ] + remove_tags_after = {'class':"block20"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/cnetjapan_release.recipe b/resources/recipes/cnetjapan_release.recipe new file mode 100644 index 0000000000..e8d13ec99f --- /dev/null +++ b/resources/recipes/cnetjapan_release.recipe @@ -0,0 +1,48 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapanRelease(BasicNewsRecipe): + title = u'CNET Japan release' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'CNet Release', u'http://feed.japan.cnet.com/rss/release/index.rdf') ] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(id="contents_l") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"} + ] + remove_tags_after = {'class':"block20"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds From 2145a487d45904113dc922db1ae131eeff178041 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 28 Nov 2010 11:47:36 +0000 Subject: [PATCH 06/12] Add check for valid has_cover to check_library --- src/calibre/gui2/dialogs/check_library.py | 89 ++++++++++++++++++----- src/calibre/library/check_library.py | 69 +++++++++++++----- src/calibre/library/database2.py | 8 ++ 3 files changed, 127 insertions(+), 39 deletions(-) diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py index 55cd91dcd3..c00ee99cc0 100644 --- a/src/calibre/gui2/dialogs/check_library.py +++ b/src/calibre/gui2/dialogs/check_library.py @@ -32,23 +32,30 @@ class CheckLibraryDialog(QDialog): self.log.itemChanged.connect(self.item_changed) self._layout.addWidget(self.log) - self.check = QPushButton(_('&Run the check')) - self.check.setDefault(False) - self.check.clicked.connect(self.run_the_check) - self.copy = QPushButton(_('Copy &to clipboard')) - self.copy.setDefault(False) - self.copy.clicked.connect(self.copy_to_clipboard) - self.ok = QPushButton('&Done') - self.ok.setDefault(True) - self.ok.clicked.connect(self.accept) - self.delete = QPushButton('Delete &marked') - self.delete.setDefault(False) - self.delete.clicked.connect(self.delete_marked) + self.check_button = QPushButton(_('&Run the check')) + self.check_button.setDefault(False) + self.check_button.clicked.connect(self.run_the_check) + self.copy_button = QPushButton(_('Copy &to clipboard')) + self.copy_button.setDefault(False) + self.copy_button.clicked.connect(self.copy_to_clipboard) + self.ok_button = QPushButton('&Done') + self.ok_button.setDefault(True) + self.ok_button.clicked.connect(self.accept) + self.delete_button = QPushButton('Delete &marked') + self.delete_button.setToolTip(_('Delete marked files (checked subitems)')) + self.delete_button.setDefault(False) + self.delete_button.clicked.connect(self.delete_marked) + self.fix_button = QPushButton('&Fix marked') + self.fix_button.setDefault(False) + self.fix_button.setEnabled(False) + self.fix_button.setToolTip(_('Fix marked sections (checked fixable items)')) + self.fix_button.clicked.connect(self.fix_items) self.bbox = QDialogButtonBox(self) - self.bbox.addButton(self.check, QDialogButtonBox.ActionRole) - self.bbox.addButton(self.delete, QDialogButtonBox.ActionRole) - self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole) - self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole) + self.bbox.addButton(self.check_button, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.delete_button, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.fix_button, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.copy_button, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.ok_button, QDialogButtonBox.AcceptRole) h = QHBoxLayout() ln = QLabel(_('Names to ignore:')) @@ -93,12 +100,19 @@ class CheckLibraryDialog(QDialog): plaintext = [] def builder(tree, checker, check): - attr, h, checkable = check + attr, h, checkable, fixable = check list = getattr(checker, attr, None) if list is None: return - tl = Item([h]) + tl = Item() + tl.setText(0, h) + if fixable: + tl.setText(1, _('(fixable)')) + tl.setFlags(Qt.ItemIsEnabled | Qt.ItemIsUserCheckable) + tl.setCheckState(1, False) + self.top_level_items[attr] = tl + for problem in list: it = Item() if checkable: @@ -107,6 +121,7 @@ class CheckLibraryDialog(QDialog): else: it.setFlags(Qt.ItemIsEnabled) it.setText(0, problem[0]) + it.setData(0, Qt.UserRole, problem[2]) it.setText(1, problem[1]) tl.addChild(it) self.all_items.append(it) @@ -118,18 +133,25 @@ class CheckLibraryDialog(QDialog): t.setColumnCount(2); t.setHeaderLabels([_('Name'), _('Path from library')]) self.all_items = [] + self.top_level_items = {} for check in CHECKS: builder(t, checker, check) t.setColumnWidth(0, 200) t.setColumnWidth(1, 400) - self.delete.setEnabled(False) + self.delete_button.setEnabled(False) self.text_results = '\n'.join(plaintext) def item_changed(self, item, column): + self.fix_button.setEnabled(False) + for it in self.top_level_items.values(): + if it.checkState(1): + self.fix_button.setEnabled(True) + + self.delete_button.setEnabled(False) for it in self.all_items: if it.checkState(1): - self.delete.setEnabled(True) + self.delete_button.setEnabled(True) return def delete_marked(self): @@ -157,6 +179,33 @@ class CheckLibraryDialog(QDialog): unicode(it.text(1)))) self.run_the_check() + def fix_missing_covers(self): + tl = self.top_level_items['missing_covers'] + child_count = tl.childCount() + for i in range(0, child_count): + item = tl.child(i); + id = item.data(0, Qt.UserRole).toInt()[0] + self.db.set_has_cover(id, False) + + def fix_extra_covers(self): + tl = self.top_level_items['extra_covers'] + child_count = tl.childCount() + for i in range(0, child_count): + item = tl.child(i); + id = item.data(0, Qt.UserRole).toInt()[0] + self.db.set_has_cover(id, True) + + def fix_items(self): + for check in CHECKS: + attr = check[0] + fixable = check[3] + tl = self.top_level_items[attr] + if fixable and tl.checkState(1): + func = getattr(self, 'fix_' + attr, None) + if func is not None and callable(func): + func() + self.run_the_check() + def copy_to_clipboard(self): QApplication.clipboard().setText(self.text_results) diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py index b285da0006..b49330db3e 100644 --- a/src/calibre/library/check_library.py +++ b/src/calibre/library/check_library.py @@ -14,14 +14,25 @@ from calibre.ebooks import BOOK_EXTENSIONS EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS) NORMALS = frozenset(['metadata.opf', 'cover.jpg']) -CHECKS = [('invalid_titles', _('Invalid titles'), True), - ('extra_titles', _('Extra titles'), True), - ('invalid_authors', _('Invalid authors'), True), - ('extra_authors', _('Extra authors'), True), - ('missing_formats', _('Missing book formats'), False), - ('extra_formats', _('Extra book formats'), True), - ('extra_files', _('Unknown files in books'), True), - ('failed_folders', _('Folders raising exception'), False) +''' +Checks fields: +- name of array containing info +- user-readable name of info +- can be deleted (can be checked) +- can be fixed. In this case, the name of the fix method is derived from the + array name +''' + +CHECKS = [('invalid_titles', _('Invalid titles'), True, False), + ('extra_titles', _('Extra titles'), True, False), + ('invalid_authors', _('Invalid authors'), True, False), + ('extra_authors', _('Extra authors'), True, False), + ('missing_formats', _('Missing book formats'), False, False), + ('extra_formats', _('Extra book formats'), True, False), + ('extra_files', _('Unknown files in books'), True, False), + ('missing_covers', _('Missing covers in books'), False, True), + ('extra_covers', _('Extra covers in books'), True, True), + ('failed_folders', _('Folders raising exception'), False, False) ] @@ -57,6 +68,10 @@ class CheckLibrary(object): self.extra_formats = [] self.extra_files = [] + self.missing_covers = [] + self.extra_covers = [] + + self.failed_folders = [] def dbpath(self, id): return self.db.path(id, index_is_id=True) @@ -83,7 +98,7 @@ class CheckLibrary(object): auth_path = os.path.join(lib, auth_dir) # First check: author must be a directory if not os.path.isdir(auth_path): - self.invalid_authors.append((auth_dir, auth_dir)) + self.invalid_authors.append((auth_dir, auth_dir, 0)) continue self.potential_authors[auth_dir] = {} @@ -98,7 +113,7 @@ class CheckLibrary(object): m = self.db_id_regexp.search(title_dir) # Second check: title must have an ID and must be a directory if m is None or not os.path.isdir(title_path): - self.invalid_titles.append((auth_dir, db_path)) + self.invalid_titles.append((auth_dir, db_path, 0)) continue id = m.group(1) @@ -106,12 +121,12 @@ class CheckLibrary(object): if self.is_case_sensitive: if int(id) not in self.all_ids or \ db_path not in self.all_dbpaths: - self.extra_titles.append((title_dir, db_path)) + self.extra_titles.append((title_dir, db_path, 0)) continue else: if int(id) not in self.all_ids or \ db_path.lower() not in self.all_lc_dbpaths: - self.extra_titles.append((title_dir, db_path)) + self.extra_titles.append((title_dir, db_path, 0)) continue # Record the book to check its formats @@ -120,7 +135,7 @@ class CheckLibrary(object): # Fourth check: author directories that contain no titles if not found_titles: - self.extra_authors.append((auth_dir, auth_dir)) + self.extra_authors.append((auth_dir, auth_dir, 0)) for x in self.book_dirs: try: @@ -152,17 +167,20 @@ class CheckLibrary(object): unknowns = frozenset(filenames-formats-NORMALS) # Check: any books that aren't formats or normally there? for u in unknowns: - self.extra_files.append((title_dir, os.path.join(db_path, u))) + self.extra_files.append((title_dir, + os.path.join(db_path, u), book_id)) # Check: any book formats that should be there? missing = book_formats - formats for m in missing: - self.missing_formats.append((title_dir, os.path.join(db_path, m))) + self.missing_formats.append((title_dir, + os.path.join(db_path, m), book_id)) # Check: any book formats that shouldn't be there? extra = formats - book_formats - NORMALS for e in extra: - self.extra_formats.append((title_dir, os.path.join(db_path, e))) + self.extra_formats.append((title_dir, + os.path.join(db_path, e), book_id)) else: def lc_map(fnames, fset): m = {} @@ -175,15 +193,28 @@ class CheckLibrary(object): unknowns = frozenset(filenames_lc-formats_lc-NORMALS) # Check: any books that aren't formats or normally there? for f in lc_map(filenames, unknowns): - self.extra_files.append((title_dir, os.path.join(db_path, f))) + self.extra_files.append((title_dir, os.path.join(db_path, f), + book_id)) book_formats_lc = frozenset([f.lower() for f in book_formats]) # Check: any book formats that should be there? missing = book_formats_lc - formats_lc for m in lc_map(book_formats, missing): - self.missing_formats.append((title_dir, os.path.join(db_path, m))) + self.missing_formats.append((title_dir, + os.path.join(db_path, m), book_id)) # Check: any book formats that shouldn't be there? extra = formats_lc - book_formats_lc - NORMALS for e in lc_map(formats, extra): - self.extra_formats.append((title_dir, os.path.join(db_path, e))) + self.extra_formats.append((title_dir, os.path.join(db_path, e), + book_id)) + + # check cached has_cover + if self.db.has_cover(book_id): + if 'cover.jpg' not in filenames: + self.missing_covers.append((title_dir, + os.path.join(db_path, title_dir, 'cover.jpg'), book_id)) + else: + if 'cover.jpg' in filenames: + self.extra_covers.append((title_dir, + os.path.join(db_path, title_dir, 'cover.jpg'), book_id)) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 47c575386b..a07e46577e 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -801,6 +801,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if notify: self.notify('cover', [id]) + def has_cover(self, id): + return self.data.get(id, self.FIELD_MAP['cover'], row_is_id=True) + + def set_has_cover(self, id, val): + dval = 1 if val else 0 + self.conn.execute('UPDATE books SET has_cover=? WHERE id=?', (dval, id,)) + self.data.set(id, self.FIELD_MAP['cover'], val, row_is_id=True) + def book_on_device(self, id): if callable(self.book_on_device_func): return self.book_on_device_func(id) From 56e38290df6125f34ae8f6a18ae75b4bf7e7724e Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:41:46 +0900 Subject: [PATCH 07/12] recipes: fix minor bugs, add yomiuri news - remove wrong #!(hash-bang) - add yomiuri online news --- resources/recipes/endgadget_ja.recipe | 2 - resources/recipes/jijinews.recipe | 2 - resources/recipes/mainichi.recipe | 2 - resources/recipes/nikkei_free.recipe | 6 +- resources/recipes/nikkei_sub.recipe | 6 +- resources/recipes/nikkei_sub_economy.recipe | 2 - resources/recipes/nikkei_sub_industry.recipe | 1 - resources/recipes/nikkei_sub_life.recipe | 2 - resources/recipes/nikkei_sub_main.recipe | 2 - resources/recipes/nikkei_sub_sports.recipe | 1 - resources/recipes/yomiuri.recipe | 66 ++++++++++++++++++++ 11 files changed, 71 insertions(+), 21 deletions(-) create mode 100644 resources/recipes/yomiuri.recipe diff --git a/resources/recipes/endgadget_ja.recipe b/resources/recipes/endgadget_ja.recipe index 443a85905d..891e6720a5 100644 --- a/resources/recipes/endgadget_ja.recipe +++ b/resources/recipes/endgadget_ja.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index fe52e76aaf..4f768ce7ee 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/mainichi.recipe b/resources/recipes/mainichi.recipe index 47dc7d0ebc..2a44fa0980 100644 --- a/resources/recipes/mainichi.recipe +++ b/resources/recipes/mainichi.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_free.recipe b/resources/recipes/nikkei_free.recipe index d84aaa279b..adc596104b 100644 --- a/resources/recipes/nikkei_free.recipe +++ b/resources/recipes/nikkei_free.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' @@ -9,9 +7,9 @@ www.nikkei.com from calibre.web.feeds.news import BasicNewsRecipe class NikkeiNet(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)' + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free, MAX)' __author__ = 'Hiroshi Miura' - description = 'News and current market affairs from Japan' + description = 'News and current market affairs from Japan, no subscription and getting max feed.' cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' oldest_article = 2 diff --git a/resources/recipes/nikkei_sub.recipe b/resources/recipes/nikkei_sub.recipe index 95b0017339..18f324009a 100644 --- a/resources/recipes/nikkei_sub.recipe +++ b/resources/recipes/nikkei_sub.recipe @@ -5,12 +5,12 @@ from calibre.ptempfile import PersistentTemporaryFile class NikkeiNet_subscription(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248' + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)' __author__ = 'Hiroshi Miura' - description = 'News and current market affairs from Japan' + description = 'News and current market affairs from Japan, gather MAX articles' needs_subscription = True oldest_article = 2 - max_articles_per_feed = 20 + max_articles_per_feed = 10 language = 'ja' remove_javascript = False temp_files = [] diff --git a/resources/recipes/nikkei_sub_economy.recipe b/resources/recipes/nikkei_sub_economy.recipe index d762f505d1..2dd8f1add8 100644 --- a/resources/recipes/nikkei_sub_economy.recipe +++ b/resources/recipes/nikkei_sub_economy.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_industry.recipe b/resources/recipes/nikkei_sub_industry.recipe index da04bbb5f3..81e86767d0 100644 --- a/resources/recipes/nikkei_sub_industry.recipe +++ b/resources/recipes/nikkei_sub_industry.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' diff --git a/resources/recipes/nikkei_sub_life.recipe b/resources/recipes/nikkei_sub_life.recipe index 2da5b13834..1bfa08a55f 100644 --- a/resources/recipes/nikkei_sub_life.recipe +++ b/resources/recipes/nikkei_sub_life.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe index 37fc8964c4..485d2f32c0 100644 --- a/resources/recipes/nikkei_sub_main.recipe +++ b/resources/recipes/nikkei_sub_main.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_sports.recipe b/resources/recipes/nikkei_sub_sports.recipe index 6e5a1c6bb2..644b0aa252 100644 --- a/resources/recipes/nikkei_sub_sports.recipe +++ b/resources/recipes/nikkei_sub_sports.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe new file mode 100644 index 0000000000..6335b99e32 --- /dev/null +++ b/resources/recipes/yomiuri.recipe @@ -0,0 +1,66 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.yomiuri.co.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class YOLNews(BasicNewsRecipe): + title = u'YOMIURI ONLINE' + __author__ = 'Hiroshi Miura' + oldest_article = 1 + max_articles_per_feed = 50 + description = 'Japanese traditional newspaper Yomiuri Online News' + publisher = 'Yomiuri Online News' + category = 'news, japan' + language = 'ja' + encoding = 'Shift_JIS' + index = 'http://www.yomiuri.co.jp/latestnews/' + remove_javascript = True + + remove_tags_before = {'class':"article-def"} + remove_tags = [{'class':"RelatedArticle"}, + {'class:"sbtns"} + ] + remove_tags_after = {'class':"date-def"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'list-def'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + if itema: + itd1 = itema.findNextSibling(text = True) + itd2 = itd1.findNextSibling(text = True) + itd3 = itd2.findNextSibling(text = True) + newsarticles.append({ + 'title' :itema.string + ,'date' :''.join([itd1, itd2, itd3]) + ,'url' :'http://www.yomiuri.co.jp' + itema['href'] + ,'description':'' + }) + feeds.append(('News', newsarticles)) + + return feeds + From 2b2a8a1edcc789010e928ae29b3d079d79de3ec6 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:43:35 +0900 Subject: [PATCH 08/12] recipes: fix typo --- resources/recipes/yomiuri.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe index 6335b99e32..39a085bf83 100644 --- a/resources/recipes/yomiuri.recipe +++ b/resources/recipes/yomiuri.recipe @@ -22,7 +22,7 @@ class YOLNews(BasicNewsRecipe): remove_tags_before = {'class':"article-def"} remove_tags = [{'class':"RelatedArticle"}, - {'class:"sbtns"} + {'class':"sbtns"} ] remove_tags_after = {'class':"date-def"} From 5f5c41e495a50a30bb7cec0055decb0eb1156305 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:53:46 +0900 Subject: [PATCH 09/12] recipes: add icons and yomiuri online variant --- resources/images/news/cnetjapan_digital.png | Bin 0 -> 892 bytes resources/images/news/cnetjapan_release.png | Bin 0 -> 892 bytes resources/images/news/yomiuri.png | Bin 0 -> 660 bytes resources/recipes/yomiuri.recipe | 9 +-- resources/recipes/yomiuri_world.recipe | 63 ++++++++++++++++++++ 5 files changed, 66 insertions(+), 6 deletions(-) create mode 100644 resources/images/news/cnetjapan_digital.png create mode 100644 resources/images/news/cnetjapan_release.png create mode 100644 resources/images/news/yomiuri.png create mode 100644 resources/recipes/yomiuri_world.recipe diff --git a/resources/images/news/cnetjapan_digital.png b/resources/images/news/cnetjapan_digital.png new file mode 100644 index 0000000000000000000000000000000000000000..9a0dcc8f7fa7645db5f97ef0b5e2978133e05732 GIT binary patch literal 892 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?_rNx;TbdoKBs5HhW6A$g%qG_pTQj9XoPS zgUd~N@st@t=De* zacN=G@pZC`V(gs?9`1fEy;#%tp}*Ix>$`Wk&56!=&G>S*>#pqU6K6%ir>>}4qV!Kd6U%X$;Qn!IYp}f_bzlv?~~pAUA{rt zJu!*>^=(atyg!Ri&9Y#8@Nc6qb6I?^7PF}Y?}fTwQok29zZPMbvi?zH+gkJgi}qH2 z=kl)+EuDBoKbpJ#(w^-OYu{YzZCk}#v*OXdy$mMe(Li*^OP;yFZHd*A^ux3DuN0N9 z+n3H?wZgID6OW>?VH@oX5I){wfOzj_lxQS|M!^K8!q~{E;65oOVqVR zpkU&$wn7D=tq3fJd+cw3`+^8Y@;2bVf#Zap-8>51olH?)FK#IZ0z`#=1&`8(FG{nf* z$^?ikbPdd{3=Fup)uf?l$jwj5OshoGU~FM!XaLc0!9Hs(Py>UftDnm{r-UW|`oMrT literal 0 HcmV?d00001 diff --git a/resources/images/news/cnetjapan_release.png b/resources/images/news/cnetjapan_release.png new file mode 100644 index 0000000000000000000000000000000000000000..9a0dcc8f7fa7645db5f97ef0b5e2978133e05732 GIT binary patch literal 892 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?_rNx;TbdoKBs5HhW6A$g%qG_pTQj9XoPS zgUd~N@st@t=De* zacN=G@pZC`V(gs?9`1fEy;#%tp}*Ix>$`Wk&56!=&G>S*>#pqU6K6%ir>>}4qV!Kd6U%X$;Qn!IYp}f_bzlv?~~pAUA{rt zJu!*>^=(atyg!Ri&9Y#8@Nc6qb6I?^7PF}Y?}fTwQok29zZPMbvi?zH+gkJgi}qH2 z=kl)+EuDBoKbpJ#(w^-OYu{YzZCk}#v*OXdy$mMe(Li*^OP;yFZHd*A^ux3DuN0N9 z+n3H?wZgID6OW>?VH@oX5I){wfOzj_lxQS|M!^K8!q~{E;65oOVqVR zpkU&$wn7D=tq3fJd+cw3`+^8Y@;2bVf#Zap-8>51olH?)FK#IZ0z`#=1&`8(FG{nf* z$^?ikbPdd{3=Fup)uf?l$jwj5OshoGU~FM!XaLc0!9Hs(Py>UftDnm{r-UW|`oMrT literal 0 HcmV?d00001 diff --git a/resources/images/news/yomiuri.png b/resources/images/news/yomiuri.png new file mode 100644 index 0000000000000000000000000000000000000000..4a197f888f076f9801d3fd8cfd3fb2479af6520a GIT binary patch literal 660 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdzwj^(N7l!{JxM1({$v_d#0*}aI zAngIhZYQ(tK!Rljj_E*J0gT&!&6&%QHS-aIkQ~omtsC%VrC@l%9XX)LZ0NMiU= z<9D8k!J*7a)N#w3*qy?^XY`4OC-F`H&(Xx$apdftAoXw6lRvN8zmI=bvrpX7Wnnkg zJU_$`aeL9N+$Qb+nu}!=-g|v`Cu_oVua9emR+O4qXm05N7p3t0(@k&g{PFeaUi&JF zaa-7o6IatW={O7cWOerHzF@Mvd*Gvy=UL|49t`|$X`;rF%ly+c@T)33b~kkg$5!NxSz>aV>HZdQ3h~YCE1T-o;^?+rqdp z^k$Cr%9IECTKibVCDIm`vR2p3xfgvv|Nr)PSK9A})dOQxwZt`|BqgyV)hf9t6-Y4{ z85kPq8XD>vS%erFTbY;uk+y-Qm4QLK$Jd1@8glbfGSe!tXfT9mSfZP>3#fs?)78&q Iol`;+00UkKJpcdz literal 0 HcmV?d00001 diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe index 39a085bf83..b3df1b58aa 100644 --- a/resources/recipes/yomiuri.recipe +++ b/resources/recipes/yomiuri.recipe @@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe import re class YOLNews(BasicNewsRecipe): - title = u'YOMIURI ONLINE' + title = u'YOMIURI ONLINE(Latest)' __author__ = 'Hiroshi Miura' oldest_article = 1 max_articles_per_feed = 50 @@ -19,6 +19,7 @@ class YOLNews(BasicNewsRecipe): encoding = 'Shift_JIS' index = 'http://www.yomiuri.co.jp/latestnews/' remove_javascript = True + masthead_title = u'YOMIURI ONLINE' remove_tags_before = {'class':"article-def"} remove_tags = [{'class':"RelatedArticle"}, @@ -27,9 +28,7 @@ class YOLNews(BasicNewsRecipe): remove_tags_after = {'class':"date-def"} def parse_feeds(self): - feeds = BasicNewsRecipe.parse_feeds(self) - for curfeed in feeds: delList = [] for a,curarticle in enumerate(curfeed.articles): @@ -39,7 +38,6 @@ class YOLNews(BasicNewsRecipe): for d in delList: index = curfeed.articles.index(d) curfeed.articles[index:index+1] = [] - return feeds def parse_index(self): @@ -60,7 +58,6 @@ class YOLNews(BasicNewsRecipe): ,'url' :'http://www.yomiuri.co.jp' + itema['href'] ,'description':'' }) - feeds.append(('News', newsarticles)) - + feeds.append(('latest', newsarticles)) return feeds diff --git a/resources/recipes/yomiuri_world.recipe b/resources/recipes/yomiuri_world.recipe new file mode 100644 index 0000000000..0146ffa330 --- /dev/null +++ b/resources/recipes/yomiuri_world.recipe @@ -0,0 +1,63 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.yomiuri.co.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class YOLNews(BasicNewsRecipe): + title = u'YOMIURI ONLINE(World)' + __author__ = 'Hiroshi Miura' + oldest_article = 2 + max_articles_per_feed = 50 + description = 'Japanese traditional newspaper Yomiuri Online News/world news' + publisher = 'Yomiuri Online News' + category = 'news, japan' + language = 'ja' + encoding = 'Shift_JIS' + index = 'http://www.yomiuri.co.jp/world/' + remove_javascript = True + masthead_title = u"YOMIURI ONLINE" + + remove_tags_before = {'class':"article-def"} + remove_tags = [{'class':"RelatedArticle"}, + {'class':"sbtns"} + ] + remove_tags_after = {'class':"date-def"} + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + return feeds + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'list-def'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + if itema: + itd1 = itema.findNextSibling(text = True) + itd2 = itd1.findNextSibling(text = True) + itd3 = itd2.findNextSibling(text = True) + newsarticles.append({ + 'title' :itema.string + ,'date' :''.join([itd1, itd2, itd3]) + ,'url' :'http://www.yomiuri.co.jp' + itema['href'] + ,'description':'' + }) + feeds.append(('World', newsarticles)) + return feeds + From 21969e157cf02059bcab8a35849321d720007602 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:58:20 +0900 Subject: [PATCH 10/12] recipe: yomiuri_world: fix for fetching date field --- resources/recipes/yomiuri_world.recipe | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/resources/recipes/yomiuri_world.recipe b/resources/recipes/yomiuri_world.recipe index 0146ffa330..eae5a2a40a 100644 --- a/resources/recipes/yomiuri_world.recipe +++ b/resources/recipes/yomiuri_world.recipe @@ -50,11 +50,9 @@ class YOLNews(BasicNewsRecipe): itema = itt.find('a',href=True) if itema: itd1 = itema.findNextSibling(text = True) - itd2 = itd1.findNextSibling(text = True) - itd3 = itd2.findNextSibling(text = True) newsarticles.append({ 'title' :itema.string - ,'date' :''.join([itd1, itd2, itd3]) + ,'date' :''.join([itd1]) ,'url' :'http://www.yomiuri.co.jp' + itema['href'] ,'description':'' }) From af98ebf24decbf35966eb3c0f69da418f4acb824 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 28 Nov 2010 12:36:33 -0700 Subject: [PATCH 11/12] Animal Politico by leamsi --- resources/recipes/animal_politico.recipe | 111 +++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 resources/recipes/animal_politico.recipe diff --git a/resources/recipes/animal_politico.recipe b/resources/recipes/animal_politico.recipe new file mode 100644 index 0000000000..f48587ea94 --- /dev/null +++ b/resources/recipes/animal_politico.recipe @@ -0,0 +1,111 @@ +#!/usr/bin/python +# encoding: utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1290663986(BasicNewsRecipe): + title = u'Animal Pol\u00EDtico' + publisher = u'Animal Pol\u00EDtico' + category = u'News, Mexico' + description = u'Noticias Pol\u00EDticas' + __author__ = 'leamsi' + masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png' + oldest_article = 1 + max_articles_per_feed = 100 + language = 'es' + + #feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')] + + remove_tags_before = dict(name='div', id='main') + remove_tags = [dict(name='div', attrs={'class':'fb-like-button'})] + keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}), + dict(name='div', attrs={'class':'entry-content'})] + remove_javascript = True + INDEX = 'http://www.animalpolitico.com/' + + def generic_parse(self, soup): + articles = [] + for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'): + article_url = entry.a['href'] + '?print=yes' + article_title= entry.find('h3', 'entry-title') + article_title= self.tag_to_string(article_title) + article_date = entry.find('span', 'the-time') + article_date = self.tag_to_string(article_date) + article_desc = self.tag_to_string(entry.find('p')) + + #print 'Article:',article_title, article_date,article_url + #print entry['class'] + + articles.append({'title' : article_title, + 'date' : article_date, + 'description' : article_desc, + 'url' : article_url}) + # Avoid including the multimedia stuff. + if entry['class'].find('last') != -1: + break + + return articles + + def plumaje_parse(self, soup): + articles = [] + blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) + for entry in blogs_soup.findAll('li'): + article_title = entry.p + article_url = article_title.a['href'] + '?print=yes' + article_date = article_title.nextSibling + article_title = self.tag_to_string(article_title) + article_date = self.tag_to_string(article_date).replace(u'Last Updated: ', '') + article_desc = self.tag_to_string(entry.find('h4')) + + #print 'Article:',article_title, article_date,article_url + articles.append({'title' : article_title, + 'date' : article_date, + 'description' : article_desc, + 'url' : article_url}) + + return articles + + def boca_parse(self, soup): + articles = [] + for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'): + article_title= entry.find('h2', 'entry-title') + article_url = article_title.a['href'] + '?print=yes' + article_title= self.tag_to_string(article_title) + article_date = entry.find('span', 'entry-date') + article_date = self.tag_to_string(article_date) + article_desc = self.tag_to_string(entry.find('div', 'entry-content')) + + #print 'Article:',article_title, article_date,article_url + #print entry['class'] + + articles.append({'title' : article_title, + 'date' : article_date, + 'description' : article_desc, + 'url' : article_url}) + # Avoid including the multimedia stuff. + if entry['class'].find('last') != -1: + break + + return articles + + + + + def parse_index(self): + gobierno_soup = self.index_to_soup(self.INDEX+'gobierno/') + congreso_soup = self.index_to_soup(self.INDEX+'congreso/') + seguridad_soup = self.index_to_soup(self.INDEX+'seguridad/') + comunidad_soup = self.index_to_soup(self.INDEX+'comunidad/') + plumaje_soup = self.index_to_soup(self.INDEX+'plumaje/') + la_boca_del_lobo_soup = self.index_to_soup(self.INDEX+'category/la-boca-del-lobo/') + + gobierno_articles = self.generic_parse(gobierno_soup) + congreso_articles = self.generic_parse(congreso_soup) + seguridad_articles = self.generic_parse(seguridad_soup) + comunidad_articles = self.generic_parse(comunidad_soup) + plumaje_articles = self.plumaje_parse(plumaje_soup) + la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup) + + + return [ (u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles), + (u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ] From f6d70a1cd27bda0270b6b99e346eb0723bed999c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 28 Nov 2010 14:54:36 -0700 Subject: [PATCH 12/12] Nook Color profile: Reduce screen height to 900px --- src/calibre/customize/profiles.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 2318c6724e..74a79689d9 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -683,8 +683,8 @@ class NookColorOutput(NookOutput): short_name = 'nook_color' description = _('This profile is intended for the B&N Nook Color.') - screen_size = (600, 980) - comic_screen_size = (584, 980) + screen_size = (600, 900) + comic_screen_size = (584, 900) dpi = 169 class BambookOutput(OutputProfile):