Merge from trunk

2025-08-11 09:13:57 -04:00 · 2010-11-29 00:09:24 +01:00 · 2010-11-29 00:09:24 +01:00 · 48251d28d4
commit 48251d28d4
parent 8af48a9d06 f6d70a1cd2
28 changed files with 505 additions and 69 deletions
--- a/resources/images/news/cnetjapan_digital.png
+++ b/resources/images/news/cnetjapan_digital.png
--- a/resources/images/news/cnetjapan_release.png
+++ b/resources/images/news/cnetjapan_release.png
--- a/resources/images/news/mainichi.png
+++ b/resources/images/news/mainichi.png
--- a/resources/images/news/mainichi_it_news.png
+++ b/resources/images/news/mainichi_it_news.png
--- a/resources/images/news/nikkei_sub_industory.png
+++ b/resources/images/news/nikkei_sub_industory.png
--- a/resources/images/news/nikkei_sub_industry.png
+++ b/resources/images/news/nikkei_sub_industry.png
--- a/resources/images/news/yomiuri.png
+++ b/resources/images/news/yomiuri.png
--- a/resources/recipes/animal_politico.recipe
+++ b/resources/recipes/animal_politico.recipe
@ -0,0 +1,111 @@
+#!/usr/bin/python
+# encoding: utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1290663986(BasicNewsRecipe):
+    title          = u'Animal Pol\u00EDtico'
+    publisher      = u'Animal Pol\u00EDtico'
+    category       = u'News, Mexico'
+    description    = u'Noticias Pol\u00EDticas'
+    __author__     = 'leamsi'
+    masthead_url   = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    language       = 'es'
+
+    #feeds          = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
+
+    remove_tags_before = dict(name='div', id='main')
+    remove_tags = [dict(name='div', attrs={'class':'fb-like-button'})]
+    keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}),
+                      dict(name='div', attrs={'class':'entry-content'})]
+    remove_javascript = True
+    INDEX = 'http://www.animalpolitico.com/'
+
+    def generic_parse(self, soup):
+        articles = []
+        for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'):
+            article_url  = entry.a['href'] + '?print=yes'
+            article_title= entry.find('h3', 'entry-title')
+            article_title= self.tag_to_string(article_title)
+            article_date = entry.find('span', 'the-time')
+            article_date = self.tag_to_string(article_date)
+            article_desc = self.tag_to_string(entry.find('p'))
+
+            #print 'Article:',article_title, article_date,article_url
+            #print entry['class']
+
+            articles.append({'title' : article_title,
+                'date' : article_date,
+                'description' : article_desc,
+                'url'  : article_url})
+            # Avoid including the multimedia stuff.
+            if entry['class'].find('last') != -1:
+                break
+
+        return articles
+
+    def plumaje_parse(self, soup):
+        articles = []
+        blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1)
+        for entry in blogs_soup.findAll('li'):
+            article_title = entry.p
+            article_url   = article_title.a['href'] + '?print=yes'
+            article_date  = article_title.nextSibling
+            article_title = self.tag_to_string(article_title)
+            article_date  = self.tag_to_string(article_date).replace(u'Last Updated: ', '')
+            article_desc  = self.tag_to_string(entry.find('h4'))
+
+            #print 'Article:',article_title, article_date,article_url
+            articles.append({'title' : article_title,
+                'date' : article_date,
+                'description' : article_desc,
+                'url'  : article_url})
+
+        return articles
+
+    def boca_parse(self, soup):
+        articles = []
+        for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'):
+            article_title= entry.find('h2', 'entry-title')
+            article_url  = article_title.a['href'] + '?print=yes'
+            article_title= self.tag_to_string(article_title)
+            article_date = entry.find('span', 'entry-date')
+            article_date = self.tag_to_string(article_date)
+            article_desc = self.tag_to_string(entry.find('div', 'entry-content'))
+
+            #print 'Article:',article_title, article_date,article_url
+            #print entry['class']
+
+            articles.append({'title' : article_title,
+                'date' : article_date,
+                'description' : article_desc,
+                'url'  : article_url})
+            # Avoid including the multimedia stuff.
+            if entry['class'].find('last') != -1:
+                break
+
+        return articles
+
+
+
+
+    def parse_index(self):
+        gobierno_soup = self.index_to_soup(self.INDEX+'gobierno/')
+        congreso_soup = self.index_to_soup(self.INDEX+'congreso/')
+        seguridad_soup = self.index_to_soup(self.INDEX+'seguridad/')
+        comunidad_soup = self.index_to_soup(self.INDEX+'comunidad/')
+        plumaje_soup = self.index_to_soup(self.INDEX+'plumaje/')
+        la_boca_del_lobo_soup = self.index_to_soup(self.INDEX+'category/la-boca-del-lobo/')
+
+        gobierno_articles = self.generic_parse(gobierno_soup)
+        congreso_articles = self.generic_parse(congreso_soup)
+        seguridad_articles = self.generic_parse(seguridad_soup)
+        comunidad_articles = self.generic_parse(comunidad_soup)
+        plumaje_articles = self.plumaje_parse(plumaje_soup)
+        la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup)
+
+
+        return [ (u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles),
+             (u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ]
--- a/resources/recipes/cnetjapan.recipe
+++ b/resources/recipes/cnetjapan.recipe
@ -7,7 +7,9 @@ class CNetJapan(BasicNewsRecipe):
    max_articles_per_feed = 30
    __author__  = 'Hiroshi Miura'

-    feeds          = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')]
+    feeds          = [(u'CNet News', u'http://feed.japan.cnet.com/rss/index.rdf'),
+                      (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf')
+                        ]
    language       = 'ja'
    encoding       = 'Shift_JIS'
    remove_javascript = True
@ -21,12 +23,29 @@ class CNetJapan(BasicNewsRecipe):
        lambda match: '<!-- removed -->'),
        ]

-    remove_tags_before = dict(name="h2")
+    remove_tags_before = dict(id="contents_l")
    remove_tags = [
                   {'class':"social_bkm_share"},
                   {'class':"social_bkm_print"},
                   {'class':"block20 clearfix"},
                   dict(name="div",attrs={'id':'bookreview'}),
+                   {'class':"tag_left_ttl"},
+                   {'class':"tag_right"}
                    ]
    remove_tags_after = {'class':"block20"}

+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'pheedo.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
--- a/resources/recipes/cnetjapan_digital.recipe
+++ b/resources/recipes/cnetjapan_digital.recipe
@ -0,0 +1,49 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CNetJapanDigital(BasicNewsRecipe):
+    title          = u'CNET Japan Digital'
+    oldest_article = 3
+    max_articles_per_feed = 30
+    __author__  = 'Hiroshi Miura'
+
+    feeds          = [(u'CNet digital',u'http://feed.japan.cnet.com/rss/digital/index.rdf') ]
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    remove_javascript = True
+
+    preprocess_regexps = [
+       (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE),
+        lambda match: '</body>'),
+       (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL|re.IGNORECASE),
+        lambda match: '</body>'),
+       (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
+        lambda match: '<!-- removed -->'),
+        ]
+
+    remove_tags_before = dict(id="contents_l")
+    remove_tags = [
+                   {'class':"social_bkm_share"},
+                   {'class':"social_bkm_print"},
+                   {'class':"block20 clearfix"},
+                   dict(name="div",attrs={'id':'bookreview'}),
+                   {'class':"tag_left_ttl"},
+                   {'class':"tag_right"}
+                    ]
+    remove_tags_after = {'class':"block20"}
+
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'pheedo.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
--- a/resources/recipes/cnetjapan_release.recipe
+++ b/resources/recipes/cnetjapan_release.recipe
@ -0,0 +1,48 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CNetJapanRelease(BasicNewsRecipe):
+    title          = u'CNET Japan release'
+    oldest_article = 3
+    max_articles_per_feed = 30
+    __author__  = 'Hiroshi Miura'
+
+    feeds          = [(u'CNet Release', u'http://feed.japan.cnet.com/rss/release/index.rdf') ]
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    remove_javascript = True
+
+    preprocess_regexps = [
+       (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE),
+        lambda match: '</body>'),
+       (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL|re.IGNORECASE),
+        lambda match: '</body>'),
+       (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
+        lambda match: '<!-- removed -->'),
+        ]
+
+    remove_tags_before = dict(id="contents_l")
+    remove_tags = [
+                   {'class':"social_bkm_share"},
+                   {'class':"social_bkm_print"},
+                   {'class':"block20 clearfix"},
+                   dict(name="div",attrs={'id':'bookreview'}),
+                   {'class':"tag_left_ttl"}
+                    ]
+    remove_tags_after = {'class':"block20"}
+
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'pheedo.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
--- a/resources/recipes/endgadget_ja.recipe
+++ b/resources/recipes/endgadget_ja.recipe
@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
@ -14,13 +12,20 @@ class JijiDotCom(BasicNewsRecipe):
    description    = 'World News from Jiji Press'
    publisher      = 'Jiji Press Ltd.'
    category       = 'news'
-    encoding       = 'utf-8'
    oldest_article = 6
    max_articles_per_feed = 100
+    encoding       = 'euc_jisx0213'
    language       = 'ja'
-    cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
    masthead_url   = 'http://jen.jiji.com/images/logo_jijipress.gif'
+    top_url        = 'http://www.jiji.com/'

    feeds          = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
    remove_tags_after = dict(id="ad_google")

+    def get_cover_url(self):
+        cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
+        soup = self.index_to_soup(self.top_url)
+        cover_item = soup.find('div', attrs={'class':'top-pad-photos'})
+        if cover_item:
+            cover_url = self.top_url + cover_item.img['src']
+        return cover_url
--- a/resources/recipes/mainichi.recipe
+++ b/resources/recipes/mainichi.recipe
@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@ -1,4 +1,3 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
@ -16,9 +15,13 @@ class MSNSankeiNewsProduct(BasicNewsRecipe):
    max_articles_per_feed = 100
    encoding       = 'Shift_JIS'
    language       = 'ja'
+    cover_url       = 'http://sankei.jp.msn.com/images/common/sankeShinbunLogo.jpg'
+    masthead_url = 'http://sankei.jp.msn.com/images/common/sankeiNewsLogo.gif'

    feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]

    remove_tags_before = dict(id="__r_article_title__")
    remove_tags_after  = dict(id="ajax_release_news")
-    remove_tags = [{'class':"parent chromeCustom6G"}]
+    remove_tags = [{'class':"parent chromeCustom6G"},
+                              dict(id="RelatedImg")
+                            ]
--- a/resources/recipes/nikkei_free.recipe
+++ b/resources/recipes/nikkei_free.recipe
@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
@ -9,9 +7,9 @@ www.nikkei.com
 from calibre.web.feeds.news import BasicNewsRecipe

 class NikkeiNet(BasicNewsRecipe):
-    title          = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)'
+    title          = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free, MAX)'
    __author__     = 'Hiroshi Miura'
-    description    = 'News and current market affairs from Japan'
+    description    = 'News and current market affairs from Japan, no subscription and getting max feed.'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    oldest_article = 2
--- a/resources/recipes/nikkei_sub.recipe
+++ b/resources/recipes/nikkei_sub.recipe
@ -5,12 +5,12 @@ from calibre.ptempfile import PersistentTemporaryFile


 class NikkeiNet_subscription(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248'
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)'
    __author__      = 'Hiroshi Miura'
-    description     = 'News and current market affairs from Japan'
+    description     = 'News and current market affairs from Japan, gather MAX articles'
    needs_subscription = True
    oldest_article  = 2
-    max_articles_per_feed = 20
+    max_articles_per_feed = 10
    language        = 'ja'
    remove_javascript = False
    temp_files = []
--- a/resources/recipes/nikkei_sub_economy.recipe
+++ b/resources/recipes/nikkei_sub_economy.recipe
@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
--- a/resources/recipes/nikkei_sub_industry.recipe
+++ b/resources/recipes/nikkei_sub_industry.recipe
@ -1,4 +1,3 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
--- a/resources/recipes/nikkei_sub_life.recipe
+++ b/resources/recipes/nikkei_sub_life.recipe
@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
--- a/resources/recipes/nikkei_sub_main.recipe
+++ b/resources/recipes/nikkei_sub_main.recipe
@ -1,5 +1,3 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
@ -30,6 +28,9 @@ class NikkeiNet_sub_main(BasicNewsRecipe):
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
+                       {'class':"cmn-article_list"},
+                       {'class':"cmn-dashedline"},
+                       {'class':"cmn-hide"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}

--- a/resources/recipes/nikkei_sub_sports.recipe
+++ b/resources/recipes/nikkei_sub_sports.recipe
@ -1,4 +1,3 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
--- a/resources/recipes/yomiuri.recipe
+++ b/resources/recipes/yomiuri.recipe
@ -0,0 +1,63 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.yomiuri.co.jp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class YOLNews(BasicNewsRecipe):
+    title          = u'Yomiuri Online (Latest)'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 1
+    max_articles_per_feed = 50
+    description    = 'Japanese traditional newspaper Yomiuri Online News'
+    publisher      = 'Yomiuri Online News'
+    category       = 'news, japan'
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    index          = 'http://www.yomiuri.co.jp/latestnews/'
+    remove_javascript = True
+    masthead_title = u'YOMIURI ONLINE'
+
+    remove_tags_before = {'class':"article-def"}
+    remove_tags = [{'class':"RelatedArticle"},
+                   {'class':"sbtns"}
+                    ]
+    remove_tags_after = {'class':"date-def"}
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+        return feeds
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        topstories = soup.find('ul',attrs={'class':'list-def'})
+        if topstories:
+           newsarticles = []
+           for itt in topstories.findAll('li'):
+                itema = itt.find('a',href=True)
+                if itema:
+                    itd1 = itema.findNextSibling(text = True)
+                    itd2 = itd1.findNextSibling(text = True)
+                    itd3 = itd2.findNextSibling(text = True)
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''.join([itd1, itd2, itd3])
+                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
+                                     ,'description':''
+                                    })
+           feeds.append(('latest', newsarticles))
+        return feeds
+
--- a/resources/recipes/yomiuri_world.recipe
+++ b/resources/recipes/yomiuri_world.recipe
@ -0,0 +1,61 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.yomiuri.co.jp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class YOLNews(BasicNewsRecipe):
+    title          = u'Yomiuri Online (World)'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    max_articles_per_feed = 50
+    description    = 'Japanese traditional newspaper Yomiuri Online News/world news'
+    publisher      = 'Yomiuri Online News'
+    category       = 'news, japan'
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    index          = 'http://www.yomiuri.co.jp/world/'
+    remove_javascript = True
+    masthead_title = u"YOMIURI ONLINE"
+
+    remove_tags_before = {'class':"article-def"}
+    remove_tags = [{'class':"RelatedArticle"},
+                   {'class':"sbtns"}
+                    ]
+    remove_tags_after = {'class':"date-def"}
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+        return feeds
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        topstories = soup.find('ul',attrs={'class':'list-def'})
+        if topstories:
+           newsarticles = []
+           for itt in topstories.findAll('li'):
+                itema = itt.find('a',href=True)
+                if itema:
+                    itd1 = itema.findNextSibling(text = True)
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''.join([itd1])
+                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
+                                     ,'description':''
+                                    })
+           feeds.append(('World', newsarticles))
+        return feeds
+
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -683,8 +683,8 @@ class NookColorOutput(NookOutput):
    short_name = 'nook_color'
    description = _('This profile is intended for the B&N Nook Color.')

-    screen_size               = (600, 980)
-    comic_screen_size         = (584, 980)
+    screen_size               = (600, 900)
+    comic_screen_size         = (584, 900)
    dpi                       = 169

 class BambookOutput(OutputProfile):
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@ -32,23 +32,30 @@ class CheckLibraryDialog(QDialog):
        self.log.itemChanged.connect(self.item_changed)
        self._layout.addWidget(self.log)

-        self.check = QPushButton(_('&Run the check'))
-        self.check.setDefault(False)
-        self.check.clicked.connect(self.run_the_check)
-        self.copy = QPushButton(_('Copy &to clipboard'))
-        self.copy.setDefault(False)
-        self.copy.clicked.connect(self.copy_to_clipboard)
-        self.ok = QPushButton('&Done')
-        self.ok.setDefault(True)
-        self.ok.clicked.connect(self.accept)
-        self.delete = QPushButton('Delete &marked')
-        self.delete.setDefault(False)
-        self.delete.clicked.connect(self.delete_marked)
+        self.check_button = QPushButton(_('&Run the check'))
+        self.check_button.setDefault(False)
+        self.check_button.clicked.connect(self.run_the_check)
+        self.copy_button = QPushButton(_('Copy &to clipboard'))
+        self.copy_button.setDefault(False)
+        self.copy_button.clicked.connect(self.copy_to_clipboard)
+        self.ok_button = QPushButton('&Done')
+        self.ok_button.setDefault(True)
+        self.ok_button.clicked.connect(self.accept)
+        self.delete_button = QPushButton('Delete &marked')
+        self.delete_button.setToolTip(_('Delete marked files (checked subitems)'))
+        self.delete_button.setDefault(False)
+        self.delete_button.clicked.connect(self.delete_marked)
+        self.fix_button = QPushButton('&Fix marked')
+        self.fix_button.setDefault(False)
+        self.fix_button.setEnabled(False)
+        self.fix_button.setToolTip(_('Fix marked sections (checked fixable items)'))
+        self.fix_button.clicked.connect(self.fix_items)
        self.bbox = QDialogButtonBox(self)
-        self.bbox.addButton(self.check, QDialogButtonBox.ActionRole)
-        self.bbox.addButton(self.delete, QDialogButtonBox.ActionRole)
-        self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole)
-        self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole)
+        self.bbox.addButton(self.check_button, QDialogButtonBox.ActionRole)
+        self.bbox.addButton(self.delete_button, QDialogButtonBox.ActionRole)
+        self.bbox.addButton(self.fix_button, QDialogButtonBox.ActionRole)
+        self.bbox.addButton(self.copy_button, QDialogButtonBox.ActionRole)
+        self.bbox.addButton(self.ok_button, QDialogButtonBox.AcceptRole)

        h = QHBoxLayout()
        ln = QLabel(_('Names to ignore:'))
@ -93,12 +100,19 @@ class CheckLibraryDialog(QDialog):
        plaintext = []

        def builder(tree, checker, check):
-            attr, h, checkable = check
+            attr, h, checkable, fixable = check
            list = getattr(checker, attr, None)
            if list is None:
                return

-            tl = Item([h])
+            tl = Item()
+            tl.setText(0, h)
+            if fixable:
+                tl.setText(1, _('(fixable)'))
+                tl.setFlags(Qt.ItemIsEnabled | Qt.ItemIsUserCheckable)
+                tl.setCheckState(1, False)
+            self.top_level_items[attr] = tl
+
            for problem in list:
                it = Item()
                if checkable:
@ -107,6 +121,7 @@ class CheckLibraryDialog(QDialog):
                else:
                    it.setFlags(Qt.ItemIsEnabled)
                it.setText(0, problem[0])
+                it.setData(0, Qt.UserRole, problem[2])
                it.setText(1, problem[1])
                tl.addChild(it)
                self.all_items.append(it)
@ -118,18 +133,25 @@ class CheckLibraryDialog(QDialog):
        t.setColumnCount(2);
        t.setHeaderLabels([_('Name'), _('Path from library')])
        self.all_items = []
+        self.top_level_items = {}
        for check in CHECKS:
            builder(t, checker, check)

        t.setColumnWidth(0, 200)
        t.setColumnWidth(1, 400)
-        self.delete.setEnabled(False)
+        self.delete_button.setEnabled(False)
        self.text_results = '\n'.join(plaintext)

    def item_changed(self, item, column):
+        self.fix_button.setEnabled(False)
+        for it in self.top_level_items.values():
+            if it.checkState(1):
+                self.fix_button.setEnabled(True)
+
+        self.delete_button.setEnabled(False)
        for it in self.all_items:
            if it.checkState(1):
-                self.delete.setEnabled(True)
+                self.delete_button.setEnabled(True)
                return

    def delete_marked(self):
@ -157,6 +179,33 @@ class CheckLibraryDialog(QDialog):
                                unicode(it.text(1))))
        self.run_the_check()

+    def fix_missing_covers(self):
+        tl = self.top_level_items['missing_covers']
+        child_count = tl.childCount()
+        for i in range(0, child_count):
+            item = tl.child(i);
+            id = item.data(0, Qt.UserRole).toInt()[0]
+            self.db.set_has_cover(id, False)
+
+    def fix_extra_covers(self):
+        tl = self.top_level_items['extra_covers']
+        child_count = tl.childCount()
+        for i in range(0, child_count):
+            item = tl.child(i);
+            id = item.data(0, Qt.UserRole).toInt()[0]
+            self.db.set_has_cover(id, True)
+
+    def fix_items(self):
+        for check in CHECKS:
+            attr = check[0]
+            fixable = check[3]
+            tl = self.top_level_items[attr]
+            if fixable and tl.checkState(1):
+                func = getattr(self, 'fix_' + attr, None)
+                if func is not None and callable(func):
+                    func()
+        self.run_the_check()
+
    def copy_to_clipboard(self):
        QApplication.clipboard().setText(self.text_results)

--- a/src/calibre/library/check_library.py
+++ b/src/calibre/library/check_library.py
@ -14,14 +14,25 @@ from calibre.ebooks import BOOK_EXTENSIONS
 EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS)
 NORMALS = frozenset(['metadata.opf', 'cover.jpg'])

-CHECKS = [('invalid_titles',    _('Invalid titles'), True),
-          ('extra_titles',      _('Extra titles'), True),
-          ('invalid_authors',   _('Invalid authors'), True),
-          ('extra_authors',     _('Extra authors'), True),
-          ('missing_formats',   _('Missing book formats'), False),
-          ('extra_formats',     _('Extra book formats'), True),
-          ('extra_files',       _('Unknown files in books'), True),
-          ('failed_folders',    _('Folders raising exception'), False)
+'''
+Checks fields:
+- name of array containing info
+- user-readable name of info
+- can be deleted (can be checked)
+- can be fixed. In this case, the name of the fix method is derived from the
+  array name
+'''
+
+CHECKS = [('invalid_titles',    _('Invalid titles'), True, False),
+          ('extra_titles',      _('Extra titles'), True, False),
+          ('invalid_authors',   _('Invalid authors'), True, False),
+          ('extra_authors',     _('Extra authors'), True, False),
+          ('missing_formats',   _('Missing book formats'), False, False),
+          ('extra_formats',     _('Extra book formats'), True, False),
+          ('extra_files',       _('Unknown files in books'), True, False),
+          ('missing_covers',    _('Missing covers in books'), False, True),
+          ('extra_covers',      _('Extra covers in books'), True, True),
+          ('failed_folders',    _('Folders raising exception'), False, False)
      ]


@ -57,6 +68,10 @@ class CheckLibrary(object):
        self.extra_formats = []
        self.extra_files = []

+        self.missing_covers = []
+        self.extra_covers = []
+
+        self.failed_folders = []

    def dbpath(self, id):
        return self.db.path(id, index_is_id=True)
@ -83,7 +98,7 @@ class CheckLibrary(object):
            auth_path = os.path.join(lib, auth_dir)
            # First check: author must be a directory
            if not os.path.isdir(auth_path):
-                self.invalid_authors.append((auth_dir, auth_dir))
+                self.invalid_authors.append((auth_dir, auth_dir, 0))
                continue

            self.potential_authors[auth_dir] = {}
@ -98,7 +113,7 @@ class CheckLibrary(object):
                m = self.db_id_regexp.search(title_dir)
                # Second check: title must have an ID and must be a directory
                if m is None or not os.path.isdir(title_path):
-                    self.invalid_titles.append((auth_dir, db_path))
+                    self.invalid_titles.append((auth_dir, db_path, 0))
                    continue

                id = m.group(1)
@ -106,12 +121,12 @@ class CheckLibrary(object):
                if self.is_case_sensitive:
                    if int(id) not in self.all_ids or \
                            db_path not in self.all_dbpaths:
-                        self.extra_titles.append((title_dir, db_path))
+                        self.extra_titles.append((title_dir, db_path, 0))
                        continue
                else:
                    if int(id) not in self.all_ids or \
                            db_path.lower() not in self.all_lc_dbpaths:
-                        self.extra_titles.append((title_dir, db_path))
+                        self.extra_titles.append((title_dir, db_path, 0))
                        continue

                # Record the book to check its formats
@ -120,7 +135,7 @@ class CheckLibrary(object):

            # Fourth check: author directories that contain no titles
            if not found_titles:
-                self.extra_authors.append((auth_dir, auth_dir))
+                self.extra_authors.append((auth_dir, auth_dir, 0))

        for x in self.book_dirs:
            try:
@ -152,17 +167,20 @@ class CheckLibrary(object):
            unknowns = frozenset(filenames-formats-NORMALS)
            # Check: any books that aren't formats or normally there?
            for u in unknowns:
-                self.extra_files.append((title_dir, os.path.join(db_path, u)))
+                self.extra_files.append((title_dir,
+                                         os.path.join(db_path, u), book_id))

            # Check: any book formats that should be there?
            missing = book_formats - formats
            for m in  missing:
-                self.missing_formats.append((title_dir, os.path.join(db_path, m)))
+                self.missing_formats.append((title_dir,
+                                             os.path.join(db_path, m), book_id))

            # Check: any book formats that shouldn't be there?
            extra = formats - book_formats - NORMALS
            for e in extra:
-                self.extra_formats.append((title_dir, os.path.join(db_path, e)))
+                self.extra_formats.append((title_dir,
+                                           os.path.join(db_path, e), book_id))
        else:
            def lc_map(fnames, fset):
                m = {}
@ -175,15 +193,28 @@ class CheckLibrary(object):
            unknowns = frozenset(filenames_lc-formats_lc-NORMALS)
            # Check: any books that aren't formats or normally there?
            for f in lc_map(filenames, unknowns):
-                self.extra_files.append((title_dir, os.path.join(db_path, f)))
+                self.extra_files.append((title_dir, os.path.join(db_path, f),
+                                         book_id))

            book_formats_lc = frozenset([f.lower() for f in book_formats])
            # Check: any book formats that should be there?
            missing = book_formats_lc - formats_lc
            for m in lc_map(book_formats, missing):
-                self.missing_formats.append((title_dir, os.path.join(db_path, m)))
+                self.missing_formats.append((title_dir,
+                                             os.path.join(db_path, m), book_id))

            # Check: any book formats that shouldn't be there?
            extra = formats_lc - book_formats_lc - NORMALS
            for e in lc_map(formats, extra):
-                self.extra_formats.append((title_dir, os.path.join(db_path, e)))
+                self.extra_formats.append((title_dir, os.path.join(db_path, e),
+                                           book_id))
+
+        # check cached has_cover
+        if self.db.has_cover(book_id):
+            if 'cover.jpg' not in filenames:
+                self.missing_covers.append((title_dir,
+                        os.path.join(db_path, title_dir, 'cover.jpg'), book_id))
+        else:
+            if 'cover.jpg' in filenames:
+                self.extra_covers.append((title_dir,
+                        os.path.join(db_path, title_dir, 'cover.jpg'), book_id))
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -801,6 +801,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        if notify:
            self.notify('cover', [id])

+    def has_cover(self, id):
+        return self.data.get(id, self.FIELD_MAP['cover'], row_is_id=True)
+
+    def set_has_cover(self, id, val):
+        dval = 1 if val else 0
+        self.conn.execute('UPDATE books SET has_cover=? WHERE id=?', (dval, id,))
+        self.data.set(id, self.FIELD_MAP['cover'], val, row_is_id=True)
+
    def book_on_device(self, id):
        if callable(self.book_on_device_func):
            return self.book_on_device_func(id)