sync with Kovid's branch

2025-07-09 03:04:10 -04:00 · 2013-04-07 23:16:49 +02:00 · 2013-04-07 23:16:49 +02:00 · f6fee32395
commit f6fee32395
parent ee72df324b 8523ad9103
134 changed files with 25329 additions and 17577 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,58 @@
 #   new recipes:
 #     - title: 
 - version: 0.9.26
  date: 2013-04-05
  new features:
    - title: "PDF Output: Allow using templates to create arbitrary headers and footers. Look under PDF Output in the conversion dialog for this feature."
    - title: "ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files."
      tickets: [1163520]
    - title: "ToC Editor: Add buttons to indent/unindent the current entry"
    - title: "ToC Editor: Right-click menu to perform various useful actions on entries in the ToC"
    - title: "Column icons: Allow use of wide images as column icons"
    - title: "Add USB ids for the Palm Pre2 and Samsung Galaxy phone to the device drivers"
      tickets: [1162293,1163115]
  bug fixes:
    - title: "PDF Output: Fix generating page numbers causing links to not work."
      tickets: [1162573]
    - title: "Wrong filename output in error message when 'Guide reference not found'"
      tickets: [1163659]
    - title: "Get Books: Update Amazon, Barnes & Noble, Waterstones and Gutenberg store plugins for website change"
    - title: "PDF Output: Fix 1 pixel wide left and top margins on the cover page for some PDF conversions due to incorrect rounding."
      tickets: [1162054]
    - title: "ToC Editor: Fix drag and drop of multiple items resulting in the dropped items being in random order sometimes."
      tickets: [1161999]
  improved recipes:
    - Financial Times UK
    - Sing Tao Daily
    - Apple Daily
    - A List Apart
    - Business Week
    - Harpers printed edition
    - Harvard Business Review
  new recipes:
    - title: AM730
      author: Eddie Lau
    - title: Arret sur images 
      author: Francois D
    - title: Diario de Noticias
      author: Jose Pinto
 - version: 0.9.25
  date: 2013-03-29
--- a/recipes/am730.recipe
+++ b/recipes/am730.recipe
@ -0,0 +1,290 @@
 # vim:fileencoding=UTF-8
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013, Eddie Lau'
 __Date__ = ''
 __HiResImg__ = True
 '''
 Change Log:
 2013/03/30 -- first version
 '''
 from calibre import (__appname__, force_unicode, strftime)
 from calibre.utils.date import now as nowf
 import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.localization import canonicalize_lang
 class AppleDaily(BasicNewsRecipe):
    title          = u'AM730'
    __author__     = 'Eddie Lau'
    publisher      = 'AM730'
    oldest_article = 1
    max_articles_per_feed = 100
    auto_cleanup = False
    language = 'zh'
    encoding = 'utf-8'
    auto_cleanup = False
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    description = 'http://www.am730.com.hk'
    category    = 'Chinese, News, Hong Kong'
    masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
    keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
                      dict(name='div', attrs={'class':'thecontent wordsnap'}),
                      dict(name='a', attrs={'class':'lightboximg'})]
    remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
                   dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        # convert UTC to local hk time - at HKT 6am, all news are available
        return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
    def get_fetchdate(self):
        if __Date__ <> '':
            return __Date__
        else:
            return self.get_dtlocal().strftime("%Y%m%d")
    def get_fetchformatteddate(self):
        if __Date__ <> '':
            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%Y-%m-%d")
    def get_fetchyear(self):
        if __Date__ <> '':
            return __Date__[0:4]
        else:
            return self.get_dtlocal().strftime("%Y")
    def get_fetchmonth(self):
        if __Date__ <> '':
            return __Date__[4:6]
        else:
            return self.get_dtlocal().strftime("%m")
    def get_fetchday(self):
        if __Date__ <> '':
            return __Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%d")
    # Note: does not work with custom date given by __Date__
    def get_weekday(self):
        return self.get_dtlocal().weekday()
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    def parse_index(self):
        feeds = []
        soup = self.index_to_soup('http://www.am730.com.hk/')
        ul = soup.find(attrs={'class':'nav-section'})
        sectionList = []
        for li in ul.findAll('li'):
            a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
            title = li.find('a').get('title', False).strip()
            sectionList.append((title, a))
        for title, url in sectionList:
            articles = self.parse_section(url)
            if articles:
                feeds.append((title, articles))
        return feeds
    def parse_section(self, url):
        soup = self.index_to_soup(url)
        items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
        current_articles = []
        for item in items:
            a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
            articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
            title = self.tag_to_string(a)
            description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
            current_articles.append({'title': title, 'url': articlelink, 'description': description})
        return current_articles
    def preprocess_html(self, soup):
        multia = soup.findAll('a')
        for a in multia:
            if not (a == None):
                image = a.find('img')
                if not (image == None):
                    if __HiResImg__:
                        image['src'] = image.get('src').replace('/thumbs/', '/')
                    caption = image.get('alt')
                    tag = Tag(soup, "photo", [])
                    tag2 = Tag(soup, "photocaption", [])
                    tag.insert(0, image)
                    if not caption == None:
                        tag2.insert(0, caption)
                        tag.insert(1, tag2)
                    a.replaceWith(tag)
        return soup
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
        title = self.short_title()
        if self.output_profile.periodical_date_in_title:
            title += strftime(self.timefmt)
        mi = MetaInformation(title, [__appname__])
        mi.publisher = __appname__
        mi.author_sort = __appname__
        if self.publication_type:
            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
        mi.timestamp = nowf()
        article_titles, aseen = [], set()
        for f in feeds:
            for a in f:
                if a.title and a.title not in aseen:
                    aseen.add(a.title)
                    article_titles.append(force_unicode(a.title, 'utf-8'))
        mi.comments = self.description
        if not isinstance(mi.comments, unicode):
            mi.comments = mi.comments.decode('utf-8', 'replace')
        mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
                '\n\n'.join(article_titles))
        language = canonicalize_lang(self.language)
        if language is not None:
            mi.language = language
        # This one affects the pub date shown in kindle title
        #mi.pubdate = nowf()
        # now appears to need the time field to be > 12.00noon as well
        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
        opf_path = os.path.join(dir, 'index.opf')
        ncx_path = os.path.join(dir, 'index.ncx')
        opf = OPFCreator(dir, mi)
        # Add mastheadImage entry to <guide> section
        mp = getattr(self, 'masthead_path', None)
        if mp is not None and os.access(mp, os.R_OK):
            from calibre.ebooks.metadata.opf2 import Guide
            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
            ref.type = 'masthead'
            ref.title = 'Masthead Image'
            opf.guide.append(ref)
        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
        manifest.append(os.path.join(dir, 'index.html'))
        manifest.append(os.path.join(dir, 'index.ncx'))
        # Get cover
        cpath = getattr(self, 'cover_path', None)
        if cpath is None:
            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
            if self.default_cover(pf):
                cpath =  pf.name
        if cpath is not None and os.access(cpath, os.R_OK):
            opf.cover = cpath
            manifest.append(cpath)
        # Get masthead
        mpath = getattr(self, 'masthead_path', None)
        if mpath is not None and os.access(mpath, os.R_OK):
            manifest.append(mpath)
        opf.create_manifest_from_files_in(manifest)
        for mani in opf.manifest:
            if mani.path.endswith('.ncx'):
                mani.id = 'ncx'
            if mani.path.endswith('mastheadImage.jpg'):
                mani.id = 'masthead-image'
        entries = ['index.html']
        toc = TOC(base_path=dir)
        self.play_order_counter = 0
        self.play_order_map = {}
        def feed_index(num, parent):
            f = feeds[num]
            for j, a in enumerate(f):
                if getattr(a, 'downloaded', False):
                    adir = 'feed_%d/article_%d/'%(num, j)
                    auth = a.author
                    if not auth:
                        auth = None
                    desc = a.text_summary
                    if not desc:
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
                    tt = a.toc_thumbnail if a.toc_thumbnail else None
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
                    parent.add_item('%sindex.html'%adir, None,
                            a.title if a.title else _('Untitled Article'),
                            play_order=po, author=auth,
                            description=desc, toc_thumbnail=tt)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
                        relp = sp[len(prefix):]
                        entries.append(relp.replace(os.sep, '/'))
                        last = sp
                    if os.path.exists(last):
                        with open(last, 'rb') as fi:
                            src = fi.read().decode('utf-8')
                        soup = BeautifulSoup(src)
                        body = soup.find('body')
                        if body is not None:
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
                            with open(last, 'wb') as fi:
                                fi.write(unicode(soup).encode('utf-8'))
        if len(feeds) == 0:
            raise Exception('All feeds are empty, aborting.')
        if len(feeds) > 1:
            for i, f in enumerate(feeds):
                entries.append('feed_%d/index.html'%i)
                po = self.play_order_map.get(entries[-1], None)
                if po is None:
                    self.play_order_counter += 1
                    po = self.play_order_counter
                auth = getattr(f, 'author', None)
                if not auth:
                    auth = None
                desc = getattr(f, 'description', None)
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
                    f.title, play_order=po, description=desc, author=auth))
        else:
            entries.append('feed_%d/index.html'%0)
            feed_index(0, toc)
        for i, p in enumerate(entries):
            entries[i] = os.path.join(dir, p.replace('/', os.sep))
        opf.create_spine(entries)
        opf.set_toc(toc)
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/recipes/apple_daily.recipe
+++ b/recipes/apple_daily.recipe
@ -1,161 +1,275 @@
-# -*- coding: utf-8 -*-
+# vim:fileencoding=UTF-8
-import re
+from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013, Eddie Lau'
 __Date__ = ''
 from calibre import (__appname__, force_unicode, strftime)
 from calibre.utils.date import now as nowf
 import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.localization import canonicalize_lang
 class AppleDaily(BasicNewsRecipe):
-
+    title          = u'蘋果日報 (香港)'
-    title       = u'蘋果日報'
+    __author__     = 'Eddie Lau'
-    __author__  = u'蘋果日報'
+    publisher      = '蘋果日報'
-    __publisher__  = u'蘋果日報'
+    oldest_article = 1
-    description = u'蘋果日報'
+    max_articles_per_feed = 100
-    masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
+    auto_cleanup = False
-    language = 'zh_TW'
+    language = 'zh'
-    encoding = 'UTF-8'
+    encoding = 'utf-8'
-    timefmt = ' [%a, %d %b, %Y]'
+    auto_cleanup = False
    needs_subscription = False
    remove_javascript = True
-    remove_tags_before = dict(name=['ul', 'h1'])
+    use_embedded_content   = False
    remove_tags_after  = dict(name='form')
    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
                dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
                dict(name=['script', 'noscript', 'style', 'form'])]
    no_stylesheets = True
-    extra_css = '''
+    description = 'http://hkm.appledaily.com/'
-    	@font-face {font-family: "uming", serif, sans-serif;  src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
+    category    = 'Chinese, News, Hong Kong'
-	    body {margin-right: 8pt; font-family: 'uming', serif;}
+    masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
        h1 {font-family: 'uming', serif, sans-serif}
            '''
    #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
-    preprocess_regexps = [
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
-       (re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE),
+    keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
-        lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
+    remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
-    ]
+                   dict(name='p', attrs={'class':'next'})]
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        # convert UTC to local hk time - at HKT 6am, all news are available
        return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
    def get_fetchdate(self):
        if __Date__ <> '':
            return __Date__
        else:
            return self.get_dtlocal().strftime("%Y%m%d")
    def get_fetchformatteddate(self):
        if __Date__ <> '':
            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%Y-%m-%d")
    def get_fetchyear(self):
        if __Date__ <> '':
            return __Date__[0:4]
        else:
            return self.get_dtlocal().strftime("%Y")
    def get_fetchmonth(self):
        if __Date__ <> '':
            return __Date__[4:6]
        else:
            return self.get_dtlocal().strftime("%m")
    def get_fetchday(self):
        if __Date__ <> '':
            return __Date__[6:8]
        else:
            return self.get_dtlocal().strftime("%d")
    # Note: does not work with custom date given by __Date__
    def get_weekday(self):
        return self.get_dtlocal().weekday()
    def get_cover_url(self):
-        return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
+        soup = self.index_to_soup('http://hkm.appledaily.com/')
-
+        cover = soup.find(attrs={'class':'top-news'}).get('src', False)
-
+        br = BasicNewsRecipe.get_browser(self)
-    #def get_browser(self):
+        try:
-        #br = BasicNewsRecipe.get_browser(self)
+            br.open(cover)
-        #if self.username is not None and self.password is not None:
+        except:
-        #    br.open('http://www.nytimes.com/auth/login')
+            cover = None
-        #    br.select_form(name='login')
+        return cover
        #    br['USERID']   = self.username
        #    br['PASSWORD'] = self.password
        #    br.submit()
        #return br
    def preprocess_html(self, soup):
        #process all the images
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            #print 'checking image: ' + iurl
            #img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
            p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
            m = p.search(iurl)
            if m is not None:
                iurl = 'http://' + m.group('server') + '/' + m.group('path')
                #print 'working! new url: ' + iurl
                tag['src'] = iurl
            #else:
                #print 'not good'
        for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
            iurl = tag['href']
            #print 'checking image: ' + iurl
            #img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
            p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
            m = p.search(iurl)
            if m is not None:
                iurl = 'http://' + m.group('server') + '/' + m.group('path')
                #print 'working! new url: ' + iurl
                tag['href'] = iurl
            #else:
                #print 'not good'
        return soup
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    def parse_index(self):
-        base = 'http://news.hotpot.hk/fruit'
+        feeds = []
-        soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
+        soup = self.index_to_soup('http://hkm.appledaily.com/')
        ul = soup.find(attrs={'class':'menu'})
        sectionList = []
        for li in ul.findAll('li'):
            a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False)
            title = li.find('a', text=True).strip()
            if not title == u'動新聞':
                sectionList.append((title, a))
        for title, url in sectionList:
            articles = self.parse_section(url)
            if articles:
                feeds.append((title, articles))
        return feeds
-        #def feed_title(div):
+    def parse_section(self, url):
-        #    return ''.join(div.findAll(text=True, recursive=False)).strip()
+        soup = self.index_to_soup(url)
        ul = soup.find(attrs={'class':'list'})
        current_articles = []
        for li in ul.findAll('li'):
            a = li.find('a', href=True)
            title = li.find('p', text=True).strip()
            if a is not None:
                current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
            pass
        return current_articles
-        articles = {}
+    def create_opf(self, feeds, dir=None):
-        key = None
+        if dir is None:
-        ans = []
+            dir = self.output_dir
-        for div in soup.findAll('li'):
+        title = self.short_title()
-            key = div.find(text=True, recursive=True);
+        if self.output_profile.periodical_date_in_title:
-            #if key == u'豪情':
+            title += strftime(self.timefmt)
-           #    continue;
+        mi = MetaInformation(title, [__appname__])
        mi.publisher = __appname__
        mi.author_sort = __appname__
        if self.publication_type:
            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
        mi.timestamp = nowf()
        article_titles, aseen = [], set()
        for f in feeds:
            for a in f:
                if a.title and a.title not in aseen:
                    aseen.add(a.title)
                    article_titles.append(force_unicode(a.title, 'utf-8'))
-            print 'section=' + key
+        mi.comments = self.description
        if not isinstance(mi.comments, unicode):
            mi.comments = mi.comments.decode('utf-8', 'replace')
        mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
                '\n\n'.join(article_titles))
-            articles[key] = []
+        language = canonicalize_lang(self.language)
        if language is not None:
            mi.language = language
        # This one affects the pub date shown in kindle title
        #mi.pubdate = nowf()
        # now appears to need the time field to be > 12.00noon as well
        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
        opf_path = os.path.join(dir, 'index.opf')
        ncx_path = os.path.join(dir, 'index.ncx')
-            ans.append(key)
+        opf = OPFCreator(dir, mi)
        # Add mastheadImage entry to <guide> section
        mp = getattr(self, 'masthead_path', None)
        if mp is not None and os.access(mp, os.R_OK):
            from calibre.ebooks.metadata.opf2 import Guide
            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
            ref.type = 'masthead'
            ref.title = 'Masthead Image'
            opf.guide.append(ref)
-            a = div.find('a', href=True)
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
        manifest.append(os.path.join(dir, 'index.html'))
        manifest.append(os.path.join(dir, 'index.ncx'))
-            if not a:
+        # Get cover
-                continue
+        cpath = getattr(self, 'cover_path', None)
        if cpath is None:
            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
            if self.default_cover(pf):
                cpath =  pf.name
        if cpath is not None and os.access(cpath, os.R_OK):
            opf.cover = cpath
            manifest.append(cpath)
-            url = base + '/' + a['href']
+        # Get masthead
-            print 'url=' + url
+        mpath = getattr(self, 'masthead_path', None)
        if mpath is not None and os.access(mpath, os.R_OK):
            manifest.append(mpath)
-            if not articles.has_key(key):
+        opf.create_manifest_from_files_in(manifest)
-                articles[key] = []
+        for mani in opf.manifest:
-            else:
+            if mani.path.endswith('.ncx'):
-                # sub page
+                mani.id = 'ncx'
-                subSoup = self.index_to_soup(url)
+            if mani.path.endswith('mastheadImage.jpg'):
                mani.id = 'masthead-image'
-                for subDiv in subSoup.findAll('li'):
+        entries = ['index.html']
-                    subA = subDiv.find('a', href=True)
+        toc = TOC(base_path=dir)
-                    subTitle = subDiv.find(text=True, recursive=True)
+        self.play_order_counter = 0
-                    subUrl = base + '/' + subA['href']
+        self.play_order_map = {}
                    print 'subUrl' + subUrl
                    articles[key].append(
                        dict(title=subTitle,
                         url=subUrl,
                         date='',
                         description='',
                         content=''))
-#             elif div['class'] in ['story', 'story headline']:
+        def feed_index(num, parent):
-#                 a = div.find('a', href=True)
+            f = feeds[num]
-#                 if not a:
+            for j, a in enumerate(f):
-#                     continue
+                if getattr(a, 'downloaded', False):
-#                 url = re.sub(r'\?.*', '', a['href'])
+                    adir = 'feed_%d/article_%d/'%(num, j)
-#                 url += '?pagewanted=all'
+                    auth = a.author
-#                 title = self.tag_to_string(a, use_alt=True).strip()
+                    if not auth:
-#                 description = ''
+                        auth = None
-#                 pubdate = strftime('%a, %d %b')
+                    desc = a.text_summary
-#                 summary = div.find(True, attrs={'class':'summary'})
+                    if not desc:
-#                 if summary:
+                        desc = None
-#                     description = self.tag_to_string(summary, use_alt=False)
+                    else:
-#
+                        desc = self.description_limiter(desc)
-#                 feed = key if key is not None else 'Uncategorized'
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
-#                 if not articles.has_key(feed):
+                    entries.append('%sindex.html'%adir)
-#                     articles[feed] = []
+                    po = self.play_order_map.get(entries[-1], None)
-#                 if not 'podcasts' in url:
+                    if po is None:
-#                     articles[feed].append(
+                        self.play_order_counter += 1
-#                               dict(title=title, url=url, date=pubdate,
+                        po = self.play_order_counter
-#                                    description=description,
+                    parent.add_item('%sindex.html'%adir, None,
-#                                    content=''))
+                            a.title if a.title else _('Untitled Article'),
-#        ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
+                            play_order=po, author=auth,
-        ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
+                            description=desc, toc_thumbnail=tt)
-        return ans
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
                        relp = sp[len(prefix):]
                        entries.append(relp.replace(os.sep, '/'))
                        last = sp
                    if os.path.exists(last):
                        with open(last, 'rb') as fi:
                            src = fi.read().decode('utf-8')
                        soup = BeautifulSoup(src)
                        body = soup.find('body')
                        if body is not None:
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
                            with open(last, 'wb') as fi:
                                fi.write(unicode(soup).encode('utf-8'))
        if len(feeds) == 0:
            raise Exception('All feeds are empty, aborting.')
        if len(feeds) > 1:
            for i, f in enumerate(feeds):
                entries.append('feed_%d/index.html'%i)
                po = self.play_order_map.get(entries[-1], None)
                if po is None:
                    self.play_order_counter += 1
                    po = self.play_order_counter
                auth = getattr(f, 'author', None)
                if not auth:
                    auth = None
                desc = getattr(f, 'description', None)
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
                    f.title, play_order=po, description=desc, author=auth))
        else:
            entries.append('feed_%d/index.html'%0)
            feed_index(0, toc)
        for i, p in enumerate(entries):
            entries[i] = os.path.join(dir, p.replace('/', os.sep))
        opf.create_spine(entries)
        opf.set_toc(toc)
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@ -9,14 +9,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    __author__ = 'Dave Asbury'
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
    oldest_article = 2
-    max_articles_per_feed = 12
+    max_articles_per_feed = 20
    linearize_tables = True
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    auto_cleanup = True
    language = 'en_GB'
-
+    compress_news_images = True
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
    masthead_url        = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
--- a/recipes/bwmagazine2.recipe
+++ b/recipes/bwmagazine2.recipe
@ -1,3 +1,4 @@
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from collections import OrderedDict
@ -39,7 +40,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
            title=self.tag_to_string(div.a).strip()
            url=div.a['href']
            soup0 = self.index_to_soup(url)
-            urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
+            urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
            articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
@ -56,7 +57,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
            title=self.tag_to_string(div.a).strip()
            url=div.a['href']
            soup0 = self.index_to_soup(url)
-            urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
+            urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
            articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
            if articles:
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@ -7,13 +7,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
    __author__ = 'Dave Asbury'
    description           = 'The official website of Countryfile Magazine'
-    # last updated 8/12/12
+    # last updated 19/10/12
    language = 'en_GB'
    oldest_article = 30
    max_articles_per_feed = 25
    remove_empty_feeds = True
    no_stylesheets = True
    auto_cleanup = True
    compress_news_images = True
    ignore_duplicate_articles = {'title', 'url'}
    #articles_are_obfuscated = True
    #article_already_exists = False
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -13,9 +13,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
-
+    compress_news_images = True
    oldest_article = 1
-    max_articles_per_feed = 1
+    max_articles_per_feed = 12
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
--- a/recipes/diario_de_noticias.recipe
+++ b/recipes/diario_de_noticias.recipe
@ -0,0 +1,23 @@
 # vim:fileencoding=UTF-8
 from __future__ import unicode_literals
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1365070687(BasicNewsRecipe):
  title ='Diário de Notícias'
  oldest_article = 7
  language = 'pt'
  __author__ = 'Jose Pinto'
  max_articles_per_feed = 100
  keep_only_tags = [dict(name='div', attrs={'id':'cln-esqmid'}) ]
  remove_tags    = [ dict(name='table', attrs={'class':'TabFerramentasInf'}) ]
  feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
    (u'Globo', u'http://feeds.dn.pt/DN-Globo'),
    (u'Economia', u'http://feeds.dn.pt/DN-Economia'),
    (u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
    (u'Artes', u'http://feeds.dn.pt/DN-Artes'),
    (u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
    (u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
    (u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
    ]
--- a/recipes/economia.recipe
+++ b/recipes/economia.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1314326622(BasicNewsRecipe):
    title          = u'Economia'
    __author__     = 'Manish Bhattarai'
    description = 'Economia - Intelligence & Insight for ICAEW Members'
    language = 'en_GB'
    oldest_article = 7
    max_articles_per_feed = 25
    masthead_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
    cover_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
    no_stylesheets = True
    remove_empty_feeds = True
    remove_tags_before = dict(id='content')
    remove_tags_after  = dict(id='stars-wrapper')
    remove_tags = [dict(attrs={'class':['floatR', 'sharethis', 'rating clearfix']})]
    feeds          = [(u'News', u'http://feedity.com/icaew-com/VlNTVFRa.rss'),(u'Business', u'http://feedity.com/icaew-com/VlNTVFtS.rss'),(u'People', u'http://feedity.com/icaew-com/VlNTVFtX.rss'),(u'Opinion', u'http://feedity.com/icaew-com/VlNTVFtW.rss'),(u'Finance', u'http://feedity.com/icaew-com/VlNTVFtV.rss')]
--- a/recipes/financial_times_uk.recipe
+++ b/recipes/financial_times_uk.recipe
@ -8,6 +8,7 @@ import datetime
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import OrderedDict
 class FinancialTimes(BasicNewsRecipe):
    title                 = 'Financial Times (UK)'
@ -93,7 +94,7 @@ class FinancialTimes(BasicNewsRecipe):
            try:
                urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
            except:
-                continue 
+                continue
            title = self.tag_to_string(item)
            date = strftime(self.timefmt)
            articles.append({
@ -105,29 +106,30 @@ class FinancialTimes(BasicNewsRecipe):
        return articles
    def parse_index(self):
-        feeds = []
+        feeds = OrderedDict()
        soup = self.index_to_soup(self.INDEX)
-        dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
+        #dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
-        self.timefmt = ' [%s]'%dates
+        #self.timefmt = ' [%s]'%dates
-        wide = soup.find('div',attrs={'class':'wide'})
+        section_title = 'Untitled'
-        if not wide:
+
-           return feeds
+        for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
-        allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()})
+            for section in column. findAll('div', attrs = {'class':'feedBox'}):
-        if not allsections:
+                sectiontitle=self.tag_to_string(section.find('h4'))
-           return feeds
+                if '...' not in sectiontitle: section_title=sectiontitle
-        count = 0
+                for article in section.ul.findAll('li'):
-        for item in allsections:
+                    articles = []
-            count = count + 1
+                    title=self.tag_to_string(article.a)
-            if self.test and count > 2:
+                    url=article.a['href']
-               return feeds
+                    articles.append({'title':title, 'url':url, 'description':'', 'date':''})
-            fitem = item.h3
+
-            if not fitem:
+                    if articles:
-               fitem = item.h4
+                        if section_title not in feeds:
-            ftitle = self.tag_to_string(fitem)   
+                            feeds[section_title] = []
-            self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
+                        feeds[section_title] += articles
-            feedarts = self.get_artlinks(item.ul)
+
-            feeds.append((ftitle,feedarts))
+
-        return feeds
+        ans = [(key, val) for key, val in feeds.iteritems()]
        return ans
    def preprocess_html(self, soup):
        items = ['promo-box','promo-title',
@ -174,9 +176,6 @@ class FinancialTimes(BasicNewsRecipe):
            count += 1
        tfile = PersistentTemporaryFile('_fa.html')
        tfile.write(html)
-        tfile.close()        
+        tfile.close()
        self.temp_files.append(tfile)
        return tfile.name
    def cleanup(self):
        self.browser.open('https://registration.ft.com/registration/login/logout?location=')
--- a/recipes/forbes_pl.recipe
+++ b/recipes/forbes_pl.recipe
@ -5,7 +5,6 @@ __license__ = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 import datetime
 import re
 from calibre.ebooks.BeautifulSoup import Comment
 class forbes_pl(BasicNewsRecipe):
    title = u'Forbes.pl'
@ -26,9 +25,9 @@ class forbes_pl(BasicNewsRecipe):
    pages_count = 4
    keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
    remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
-    
+
    feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
-    
+
    '''def preprocess_html(self, soup):
        self.append_page(soup, soup.body)
        return soup
@ -51,4 +50,4 @@ class forbes_pl(BasicNewsRecipe):
            appendtag.insert(pos, pagetext)
        if cleanup:
            for r in appendtag.findAll(attrs={'class':'paginator'}):
-                r.extract()'''
+                r.extract()'''
--- a/recipes/galaxys_edge.recipe
+++ b/recipes/galaxys_edge.recipe
@ -0,0 +1,108 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class GalaxyEdge(BasicNewsRecipe):
    title                 = u'The Galaxy\'s Edge'
    language = 'en'
    oldest_article        = 7
    __author__            = 'Krittika Goyal'
    no_stylesheets = True
    auto_cleanup = True
    #keep_only_tags = [dict(id='content')]
    #remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
            #dict(id=['email-section', 'right-column', 'printfooter', 'topover',
                     #'slidebox', 'th_footer'])]
    extra_css = '.photo-caption { font-size: smaller }'
    def parse_index(self):
        soup = self.index_to_soup('http://www.galaxysedge.com/')
        main = soup.find('table', attrs={'width':'911'})
        toc = main.find('td', attrs={'width':'225'})
        current_section = None
        current_articles = []
        feeds = []
        c = 0
        for x in toc.findAll(['p']):
            c = c+1
            if c == 5:
                if current_articles and current_section:
                    feeds.append((current_section, current_articles))
                edwo = x.find('a')
                current_section = self.tag_to_string(edwo)
                current_articles = []
                self.log('\tFound section:', current_section)
                title = self.tag_to_string(edwo)
                url = edwo.get('href', True)
                url = 'http://www.galaxysedge.com/'+url
                print(title)
                print(c)
                if not url or not title:
                    continue
                self.log('\t\tFound article:', title)
                self.log('\t\t\t', url)
                current_articles.append({'title': title, 'url':url,
                    'description':'', 'date':''})
            elif c>5:
                current_section = self.tag_to_string(x.find('b'))
                current_articles = []
                self.log('\tFound section:', current_section)
                for y in x.findAll('a'):
                    title = self.tag_to_string(y)
                    url = y.get('href', True)
                    url = 'http://www.galaxysedge.com/'+url
                    print(title)
                    if not url or not title:
                        continue
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    current_articles.append({'title': title, 'url':url,
                        'description':'', 'date':''})
            if current_articles and current_section:
                 feeds.append((current_section, current_articles))
        return feeds
    #def preprocess_raw_html(self, raw, url):
        #return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
    #def postprocess_html(self, soup, first_fetch):
        #for t in soup.findAll(['table', 'tr', 'td','center']):
            #t.name = 'div'
        #return soup
    #def parse_index(self):
        #today = time.strftime('%Y-%m-%d')
        #soup = self.index_to_soup(
                #'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
        #div = soup.find(id='left-column')
        #feeds = []
        #current_section = None
        #current_articles = []
        #for x in div.findAll(['h3', 'div']):
            #if current_section and x.get('class', '') == 'tpaper':
                #a = x.find('a', href=True)
                #if a is not None:
                    #current_articles.append({'url':a['href']+'?css=print',
                        #'title':self.tag_to_string(a), 'date': '',
                        #'description':''})
            #if x.name == 'h3':
                #if current_section and current_articles:
                    #feeds.append((current_section, current_articles))
                #current_section = self.tag_to_string(x)
                #current_articles = []
        #return feeds
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@ -1,6 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 from datetime import date, timedelta
 class HBR(BasicNewsRecipe):
@ -11,23 +9,18 @@ class HBR(BasicNewsRecipe):
    timefmt                = ' [%B %Y]'
    language = 'en'
    no_stylesheets = True
    # recipe_disabled = ('hbr.org has started requiring the use of javascript'
    #         ' to log into their website. This is unsupported in calibre, so'
    #         ' this recipe has been disabled. If you would like to see '
    #         ' HBR supported in calibre, contact hbr.org and ask them'
    #         ' to provide a javascript free login method.')
    LOGIN_URL = 'https://hbr.org/login?request_url=/'
    LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
-    INDEX = 'http://hbr.org/archive-toc/BR'
+    INDEX = 'http://hbr.org'
    keep_only_tags = [dict(name='div', id='pageContainer')]
    remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
        'mailingListTout', 'partnerCenter', 'pageFooter',
-        'superNavHeadContainer', 'hbrDisqus',
+        'superNavHeadContainer', 'hbrDisqus', 'article-toolbox',
        'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
        dict(name='iframe')]
    extra_css = '''
@ -57,22 +50,6 @@ class HBR(BasicNewsRecipe):
        if url.endswith('/ar/1'):
            return url[:-1]+'pr'
    def hbr_get_toc(self):
        # return self.index_to_soup(open('/t/toc.html').read())
        today = date.today()
        future = today + timedelta(days=30)
        past = today - timedelta(days=30)
        for x in [x.strftime('%y%m') for x in (future, today, past)]:
            url = self.INDEX + x
            soup = self.index_to_soup(url)
            if (not soup.find(text='Issue Not Found') and not soup.find(
                text="We're Sorry.  There was an error processing your request")
                and 'Exception: java.io.FileNotFoundException' not in
                unicode(soup)):
                return soup
        raise Exception('Could not find current issue')
    def hbr_parse_toc(self, soup):
        feeds = []
        current_section = None
@ -105,23 +82,19 @@ class HBR(BasicNewsRecipe):
                articles.append({'title':title, 'url':url, 'description':desc,
                    'date':''})
        if current_section is not None and articles:
            feeds.append((current_section, articles))
        return feeds
    def parse_index(self):
-        soup = self.hbr_get_toc()
+        soup0 = self.index_to_soup('http://hbr.org/magazine')
-        # open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
+        datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
        #find date & cover
        self.cover_url=datencover.img['src']
        dates=self.tag_to_string(datencover.img['alt'])
        self.timefmt = u' [%s]'%dates
        soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href'])
        feeds = self.hbr_parse_toc(soup)
        return feeds
    def get_cover_url(self):
        cover_url = None
        index = 'http://hbr.org/current'
        soup = self.index_to_soup(index)
        link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
        if link_item:
           cover_url = 'http://hbr.org' + link_item['src']
        return cover_url
--- a/recipes/list_apart.recipe
+++ b/recipes/list_apart.recipe
@ -1,33 +1,23 @@
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+# vim:fileencoding=UTF-8
 from __future__ import unicode_literals
 from calibre.web.feeds.news import BasicNewsRecipe
 class AListApart (BasicNewsRecipe):
-   __author__ = u'Marc Busqué <marc@lamarciana.com>'
+   __author__ = 'Marc Busqué <marc@lamarciana.com>'
   __url__ = 'http://www.lamarciana.com'
-   __version__ = '1.0'
+   __version__ = '2.0'
   __license__   = 'GPL v3'
-   __copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
+   __copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
   title = u'A List Apart'
-   description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.'
+   description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
   language = 'en'
   tags = 'web development, software'
   oldest_article = 120
   remove_empty_feeds = True
   no_stylesheets = True
   encoding = 'utf8'
   cover_url = u'http://alistapart.com/pix/alalogo.gif'
-   keep_only_tags = [
+   extra_css = u'img {max-width: 100%; display: block; margin: auto;}'
         dict(name='div', attrs={'id': 'content'})
         ]
   remove_tags = [
         dict(name='ul', attrs={'id': 'metastuff'}),
         dict(name='div', attrs={'class': 'discuss'}),
         dict(name='div', attrs={'class': 'discuss'}),
         dict(name='div', attrs={'id': 'learnmore'}),
         ]
   remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
   extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}'
   feeds = [
-         (u'A List Apart', u'http://www.alistapart.com/site/rss'),
+         (u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
         ]
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -6,10 +6,10 @@ import time
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Metro UK'
-    description = 'News as provided by The Metro -UK'
+    description = 'News from The Metro, UK'
    #timefmt = ''
-    __author__ = 'fleclerc & Dave Asbury'
+    __author__ = 'Dave Asbury'
-    #last update 20/1/13
+    #last update 4/4/13
    #cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
    cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
@ -22,7 +22,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    language = 'en_GB'
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
-
+    compress_news_images = True
    def parse_index(self):
 		articles = {}
 		key = None
--- a/recipes/new_yorker.recipe
+++ b/recipes/new_yorker.recipe
@ -1,64 +1,44 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 newyorker.com
 '''
 '''
 www.canada.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 class NewYorker(BasicNewsRecipe):
    title                 = 'The New Yorker'
    __author__            = 'Darko Miletic'
    description           = 'The best of US journalism'
    oldest_article        = 15
    language              = 'en'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    publisher             = 'Conde Nast Publications'
    category              = 'news, politics, USA'
    encoding              = 'cp1252'
    publication_type      = 'magazine'
    masthead_url          = 'http://www.newyorker.com/css/i/hed/logo.gif'
    extra_css             = """
                                body {font-family: "Times New Roman",Times,serif}
                                .articleauthor{color: #9F9F9F;
                                               font-family: Arial, sans-serif;
                                               font-size: small;
                                               text-transform: uppercase}
                                .rubric,.dd,h6#credit{color: #CD0021;
                                        font-family: Arial, sans-serif;
                                        font-size: small;
                                        text-transform: uppercase}
                                .descender:first-letter{display: inline; font-size: xx-large; font-weight: bold}
                                .dd,h6#credit{color: gray}
                                .c{display: block}
                                .caption,h2#articleintro{font-style: italic}
                                .caption{font-size: small}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
-    keep_only_tags = [dict(name='div', attrs={'id':'pagebody'})]
+    title = u'New Yorker Magazine'
-    remove_tags    = [
+    newyorker_prefix = 'http://m.newyorker.com'
-                         dict(name=['meta','iframe','base','link','embed','object'])
+    description = u'Content from the New Yorker website'
-                        ,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons','social-utils-top','entry-keywords','entry-categories','utilsPrintEmail'] })
+    fp_tag = 'CAN_TC'
                        ,dict(attrs={'id':['show-header','show-footer'] })
                     ]
    remove_tags_after = dict(attrs={'class':'entry-content'}) 
    remove_attributes = ['lang']
    feeds             = [(u'The New Yorker', u'http://www.newyorker.com/services/mrss/feeds/everything.xml')]
-    def print_version(self, url):
+    masthead_url = 'http://www.newyorker.com/images/elements/print/newyorker_printlogo.gif'
        return url + '?printable=true&currentPage=all'
-    def image_url_processor(self, baseurl, url):
+    compress_news_images = True
-        return url.strip()
+    compress_news_images_auto_size = 8
    scale_news_images_to_device = False
    scale_news_images = (768, 1024)
    url_list = []
    language = 'en'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt =  ' [%b %d]'
    encoding = 'utf-8'
    extra_css = '''
                .byline { font-size:xx-small; font-weight: bold;}
                h3 { margin-bottom: 6px; }
                .caption { font-size: xx-small; font-style: italic; font-weight: normal; }
                '''
    keep_only_tags = [dict(name='div', attrs={'id':re.compile('pagebody')})]
    remove_tags = [{'class':'socialUtils'},{'class':'entry-keywords'}]
    def get_cover_url(self):
        cover_url = "http://www.newyorker.com/images/covers/1925/1925_02_21_p233.jpg"
@ -68,13 +48,233 @@ class NewYorker(BasicNewsRecipe):
           cover_url = 'http://www.newyorker.com' + cover_item.div.img['src'].strip()
        return cover_url
-    def preprocess_html(self, soup):
+    def fixChars(self,string):
-        for item in soup.findAll(style=True):
+        # Replace lsquo (\x91)
-            del item['style']
+        fixed = re.sub("\x91","‘",string)
-        auth = soup.find(attrs={'id':'articleauthor'})
+        # Replace rsquo (\x92)
-        if auth:
+        fixed = re.sub("\x92","’",fixed)
-           alink = auth.find('a')
+        # Replace ldquo (\x93)
-           if alink and alink.string is not None:
+        fixed = re.sub("\x93","“",fixed)
-              txt = alink.string
+        # Replace rdquo (\x94)
-              alink.replaceWith(txt)
+        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        fixed = re.sub("&#x2019;","’",fixed)
        return fixed
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def populate_article_metadata(self, article, soup, first):
        if first:
            picdiv = soup.find('body').find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
        xtitle = article.text_summary.strip()
        if len(xtitle) == 0:
            desc = soup.find('meta',attrs={'property':'og:description'})
            if desc is not None:
                article.summary = article.text_summary = desc['content']
        shortparagraph = ""
 ##        try:
        if len(article.text_summary.strip()) == 0:
            articlebodies = soup.findAll('div',attrs={'class':'entry-content'})
            if articlebodies:
                for articlebody in articlebodies:
                    if articlebody:
                        paras = articlebody.findAll('p')
                        for p in paras:
                            refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
                            #account for blank paragraphs and short paragraphs by appending them to longer ones
                            if len(refparagraph) > 0:
                                if len(refparagraph) > 70: #approximately one line of text
                                    newpara = shortparagraph + refparagraph
                                    article.summary = article.text_summary = newpara.strip()
                                    return
                                else:
                                    shortparagraph = refparagraph + " "
                                    if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
                                        shortparagraph = shortparagraph + "- "
        else:
            article.summary = article.text_summary = self.massageNCXText(article.text_summary)
 ##        except:
 ##            self.log("Error creating article descriptions")
 ##            return
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self,soup):
        dateline = soup.find('div','published')
        byline = soup.find('div','byline')
        title = soup.find('h1','entry-title')
        if title is None:
            return self.strip_anchors(soup)
        if byline is None:
            title.append(dateline)
            return self.strip_anchors(soup)
        byline.append(dateline)
        return self.strip_anchors(soup)
    def load_global_nav(self,soup):
        seclist = []
        ul = soup.find('ul',attrs={'id':re.compile('global-nav-menu')})
        if ul is not None:
            for li in ul.findAll('li'):
                if li.a is not None:
                    securl = li.a['href']
                    if securl != '/' and securl != '/magazine' and securl.startswith('/'):
                        seclist.append((self.tag_to_string(li.a),self.newyorker_prefix+securl))
        return seclist
    def exclude_url(self,url):
        if url in self.url_list:
            return True
        if not url.endswith('html'):
            return True
        if 'goings-on-about-town-app' in url:
            return True
        if 'something-to-be-thankful-for' in url:
            return True
        if '/shouts/' in url:
            return True
        if 'out-loud' in url:
            return True
        if '/rss/' in url:
            return True
        if '/video-' in url:
            return True
        self.url_list.append(url)
        return False
    def load_index_page(self,soup):
        article_list = []
        for div in soup.findAll('div',attrs={'class':re.compile('^rotator')}):
            h2 = div.h2
            if h2 is not None:
                a = h2.a
                if a is not None:
                    url = a['href']
                    if not self.exclude_url(url):
                        if url.startswith('/'):
                            url = self.newyorker_prefix+url
                        byline = h2.span
                        if byline is not None:
                            author = self.tag_to_string(byline)
                            if author.startswith('by '):
                                author.replace('by ','')
                            byline.extract()
                        else:
                            author = ''
                        if h2.br is not None:
                            h2.br.replaceWith(' ')
                        title = self.tag_to_string(h2)
                        desc = div.find(attrs={'class':['rotator-ad-body','feature-blurb-text']})
                        if desc is not None:
                            description = self.tag_to_string(desc)
                        else:
                            description = ''
                        article_list.append(dict(title=title,url=url,date='',description=description,author=author,content=''))
                        ul = div.find('ul','feature-blurb-links')
                        if ul is not None:
                            for li in ul.findAll('li'):
                                a = li.a
                                if a is not None:
                                    url = a['href']
                                    if not self.exclude_url(url):
                                        if url.startswith('/'):
                                            url = self.newyorker_prefix+url
                                        if a.br is not None:
                                            a.br.replaceWith(' ')
                                        title = '>>'+self.tag_to_string(a)
                                        article_list.append(dict(title=title,url=url,date='',description='',author='',content=''))
        for h3 in soup.findAll('h3','header'):
            a = h3.a
            if a is not None:
                url = a['href']
                if not self.exclude_url(url):
                    if url.startswith('/'):
                        url = self.newyorker_prefix+url
                    byline = h3.span
                    if byline is not None:
                        author = self.tag_to_string(byline)
                        if author.startswith('by '):
                            author = author.replace('by ','')
                        byline.extract()
                    else:
                        author = ''
                    if h3.br is not None:
                        h3.br.replaceWith(' ')
                    title = self.tag_to_string(h3).strip()
                    article_list.append(dict(title=title,url=url,date='',description='',author=author,content=''))
        return article_list
    def load_global_section(self,securl):
        article_list = []
        try:
            soup = self.index_to_soup(securl)
        except:
            return article_list
        if '/blogs/' not in securl:
            return self.load_index_page(soup)
        for div in soup.findAll('div',attrs={'id':re.compile('^entry')}):
            h3 = div.h3
            if h3 is not None:
                a = h3.a
                if a is not None:
                    url = a['href']
                    if not self.exclude_url(url):
                        if url.startswith('/'):
                            url = self.newyorker_prefix+url
                        if h3.br is not None:
                            h3.br.replaceWith(' ')
                        title = self.tag_to_string(h3)
                        article_list.append(dict(title=title,url=url,date='',description='',author='',content=''))
        return article_list
    def filter_ans(self, ans) :
        total_article_count = 0
        idx = 0
        idx_max = len(ans)-1
        while idx <= idx_max:
            if True: #self.verbose
                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
            for article in ans[idx][1]:
                total_article_count += 1
                if True: #self.verbose
                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
                              article['url'].replace('http://m.newyorker.com','').encode('cp1252','replace')))
            idx = idx+1
        self.log( "Queued %d articles" % total_article_count )
        return ans
    def parse_index(self):
        ans = []
        try:
            soup = self.index_to_soup(self.newyorker_prefix)
        except:
            return ans
        seclist = self.load_global_nav(soup)
        ans.append(('Front Page',self.load_index_page(soup)))
        for (sectitle,securl) in seclist:
            ans.append((sectitle,self.load_global_section(securl)))
        return self.filter_ans(ans)
--- a/recipes/nme.recipe
+++ b/recipes/nme.recipe
@ -12,6 +12,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    max_articles_per_feed = 20
    #auto_cleanup = True
    language = 'en_GB'
    compress_news_images = True
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.nme.com/component/subscribe')
@ -27,7 +28,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
            br.open_novisit(cov2)
            cover_url = str(cov2)
        except:
-                cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
+            cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
        return cover_url
    masthead_url   = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
--- a/recipes/singtaohk.recipe
+++ b/recipes/singtaohk.recipe
@ -1,30 +1,30 @@
 # vim:fileencoding=UTF-8
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
-__copyright__ = '2011, Eddie Lau'
+__copyright__ = '2011-2013, Eddie Lau'
 # data source: normal, mobile
 __Source__ = 'mobile'
 # please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
 # Turn below to True if your device supports display of CJK titles (Default: False)
-__UseChineseTitle__ = False
+__UseChineseTitle__ = True
 # Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
 # Set it to True if you want to include a summary in Kindle's article view (Default: False)
-__IncludeSummary__ = False
+__IncludeSummary__ = True
 # Set it to True if you want thumbnail images in Kindle's article view (Default: True)
 __IncludeThumbnails__ = True
 '''
 Change Log:
 2013/03/31 -- fix cover retrieval code and heading size, and remove &nbsp; in summary
 2011/12/29 -- first version done
 TODO:
 * use alternative source at http://m.singtao.com/index.php
 '''
 from calibre.utils.date import now as nowf
 import os, datetime, re
 from datetime import date
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -41,7 +41,7 @@ class STHKRecipe(BasicNewsRecipe):
        title   = 'Sing Tao Daily - Hong Kong'
    description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
    category    = 'Chinese, News, Hong Kong'
-    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}'
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:200%;}'
    masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
    if __Source__ == 'normal':
        keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
@ -96,17 +96,13 @@ class STHKRecipe(BasicNewsRecipe):
        return self.get_dtlocal().strftime("%d")
    def get_cover_url(self):
-        #cover = 'http://singtao.com/media/a/a(2660).jpg'  # for 2011/12/29
+        soup = self.index_to_soup('http://m.singtao.com/')
-        base = 2660
+        cover = soup.find(attrs={'class':'special'}).get('src', False)
        todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()))
        diff = todaydate - date(2011, 12, 29)
        base = base + int(diff.total_seconds()/(3600*24))
        cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg'
        br = BasicNewsRecipe.get_browser(self)
        try:
            br.open(cover)
        except:
-            cover = 'http://singtao.com/images/stlogo.gif'
+            cover = None
        return cover
    def parse_index(self):
@ -289,11 +285,11 @@ class STHKRecipe(BasicNewsRecipe):
                            # the text may or may not be enclosed in <p></p> tag
                            paras = articlebody.findAll('p')
                            if not paras:
-                            	paras = articlebody
+                                paras = articlebody
                            textFound = False
                            for p in paras:
                                if not textFound:
-                                    summary_candidate = self.tag_to_string(p).strip()
+                                    summary_candidate = self.tag_to_string(p).strip().replace('&nbsp;', '')
                                    if len(summary_candidate) > 0:
                                        summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
                                        article.summary = article.text_summary = summary_candidate
@ -489,3 +485,4 @@ class STHKRecipe(BasicNewsRecipe):
--- a/recipes/sportowefakty.recipe
+++ b/recipes/sportowefakty.recipe
@ -2,6 +2,7 @@
 __license__ = 'GPL v3'
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.utils.magick import Image
--- a/recipes/the_sun.recipe
+++ b/recipes/the_sun.recipe
@ -20,7 +20,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    no_stylesheets = True
    ignore_duplicate_articles = {'title','url'}
-
+    compress_news_images = True
    extra_css  = '''
    body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
--- a/recipes/theonion.recipe
+++ b/recipes/theonion.recipe
@ -36,47 +36,21 @@ class TheOnion(BasicNewsRecipe):
                        , 'publisher': publisher
                        , 'language' : language
                        }
-
+    keep_only_tags = [dict(name='article', attrs={'class':'full-article'})]
    keep_only_tags = [
                         dict(name='h2', attrs={'class':['section_title','title']})
                        ,dict(attrs={'class':['main_image','meta','article_photo_lead','article_body']})
                        ,dict(attrs={'id':['entries']})
                     ]
    remove_attributes=['lang','rel']
    remove_tags_after = dict(attrs={'class':['article_body','feature_content']})
    remove_tags = [
-                     dict(name=['object','link','iframe','base','meta'])
+        dict(name=['nav', 'aside', 'section', 'meta']),
-                    ,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']})
+        {'attrs':{'class':lambda x: x and ('share-tools' in x or 'ad-zone' in x)}},
-                    ,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']})
+    ]
                  ]
    feeds = [
              (u'Daily'  , u'http://feeds.theonion.com/theonion/daily' )
             ,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
            ]
-    def get_article_url(self, article):
+    def preprocess_html(self, soup, *args):
-        artl = BasicNewsRecipe.get_article_url(self, article)
+        for img in soup.findAll('img', attrs={'data-src':True}):
-        if artl.startswith('http://www.theonion.com/audio/'):
+            if img['data-src']:
-           artl = None
+                img['src'] = img['data-src']
        return artl
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name  = 'div'
                  item.attrs = []
                  if not limg.has_key('alt'):
                     limg['alt'] = 'image'
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/recipes/universe_today.recipe
+++ b/recipes/universe_today.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class UniverseToday(BasicNewsRecipe):
    title                 = u'Universe Today'
    language = 'en'
    description           = u'Space and astronomy news.'
    __author__ = 'seird'
    publisher             = u'universetoday.com'
    category              = 'science, astronomy, news, rss'
    oldest_article = 7
    max_articles_per_feed = 40
    auto_cleanup = True
    no_stylesheets = True
    use_embedded_content = False
    remove_empty_feeds = True
    feeds          = [(u'Universe Today', u'http://feeds.feedburner.com/universetoday/pYdq')]
--- a/recipes/vic_times.recipe
+++ b/recipes/vic_times.recipe
@ -6,17 +6,62 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
 import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
 class TimesColonist(BasicNewsRecipe):
    # Customization -- remove sections you don't want.
    # If your e-reader is an e-ink Kindle and your output profile is
    # set properly this recipe will not include images because the
    # resulting file is too large. If you have one of these and want
    # images you can set kindle_omit_images = False
    # and remove sections (typically the e-ink Kindles will
    # work with about a dozen of these, but your mileage may vary).
    kindle_omit_images = True
    section_list = [
        ('','Web Front Page'),
        ('news/','News Headlines'),
        ('news/b-c/','BC News'),
        ('news/national/','National News'),
        ('news/world/','World News'),
        ('opinion/','Opinion'),
        ('opinion/letters/','Letters'),
        ('business/','Business'),
        ('business/money/','Money'),
        ('business/technology/','Technology'),
        ('business/working/','Working'),
        ('sports/','Sports'),
        ('sports/hockey/','Hockey'),
        ('sports/football/','Football'),
        ('sports/basketball/','Basketball'),
        ('sports/golf/','Golf'),
        ('entertainment/','entertainment'),
        ('entertainment/go/','Go!'),
        ('entertainment/music/','Music'),
        ('entertainment/books/','Books'),
        ('entertainment/Movies/','Movies'),
        ('entertainment/television/','Television'),
        ('life/','Life'),
        ('life/health/','Health'),
        ('life/travel/','Travel'),
        ('life/driving/','Driving'),
        ('life/homes/','Homes'),
        ('life/food-drink/','Food & Drink')
    ]
    title = u'Victoria Times Colonist'
    url_prefix = 'http://www.timescolonist.com'
    description = u'News from Victoria, BC'
    fp_tag = 'CAN_TC'
    masthead_url = 'http://www.timescolonist.com/gmg/img/global/logoTimesColonist.png'
    url_list = []
    language = 'en_CA'
    __author__ = 'Nick Redding'
@ -29,15 +74,21 @@ class TimesColonist(BasicNewsRecipe):
                .caption { font-size: xx-small; font-style: italic; font-weight: normal; }
                '''
    keep_only_tags = [dict(name='div', attrs={'class':re.compile('main.content')})]
    remove_tags = [{'class':'comments'},
                   {'id':'photocredit'},
                   dict(name='div', attrs={'class':re.compile('top.controls')}),
                   dict(name='div', attrs={'class':re.compile('social')}),
                   dict(name='div', attrs={'class':re.compile('tools')}),
                   dict(name='div', attrs={'class':re.compile('bottom.tools')}),
                   dict(name='div', attrs={'class':re.compile('window')}),
                   dict(name='div', attrs={'class':re.compile('related.news.element')})]
    def __init__(self, options, log, progress_reporter):
        self.remove_tags = [{'class':'comments'},
                       {'id':'photocredit'},
                       dict(name='div', attrs={'class':re.compile('top.controls')}),
                       dict(name='div', attrs={'class':re.compile('^comments')}),
                       dict(name='div', attrs={'class':re.compile('social')}),
                       dict(name='div', attrs={'class':re.compile('tools')}),
                       dict(name='div', attrs={'class':re.compile('bottom.tools')}),
                       dict(name='div', attrs={'class':re.compile('window')}),
                       dict(name='div', attrs={'class':re.compile('related.news.element')})]
        print("PROFILE NAME = "+options.output_profile.short_name)
        if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']:
            self.remove_tags.append(dict(name='div', attrs={'class':re.compile('image-container')}))
        BasicNewsRecipe.__init__(self, options, log, progress_reporter)
    def get_cover_url(self):
        from datetime import timedelta, date
@ -122,7 +173,6 @@ class TimesColonist(BasicNewsRecipe):
    def preprocess_html(self,soup):
        byline = soup.find('p',attrs={'class':re.compile('ancillary')})
        if byline is not None:
            byline.find('a')
            authstr = self.tag_to_string(byline,False)
            authstr = re.sub('/ *Times Colonist','/',authstr, flags=re.IGNORECASE)
            authstr = re.sub('BY */','',authstr, flags=re.IGNORECASE)
@ -149,9 +199,10 @@ class TimesColonist(BasicNewsRecipe):
        atag = htag.a
        if atag is not None:
            url = atag['href']
-            #print("Checking "+url)
+            url = url.strip()
-            if atag['href'].startswith('/'):
+            # print("Checking >>"+url+'<<\n\r')
-                url = self.url_prefix+atag['href']
+            if url.startswith('/'):
                url = self.url_prefix+url
            if url in self.url_list:
                return
            self.url_list.append(url)
@ -171,10 +222,10 @@ class TimesColonist(BasicNewsRecipe):
            if dtag is not None:
                description = self.tag_to_string(dtag,False)
            article_list.append(dict(title=title,url=url,date='',description=description,author='',content=''))
-            #print(sectitle+title+": description = "+description+" URL="+url)
+            print(sectitle+title+": description = "+description+" URL="+url+'\n\r')
    def add_section_index(self,ans,securl,sectitle):
-        print("Add section url="+self.url_prefix+'/'+securl)
+        print("Add section url="+self.url_prefix+'/'+securl+'\n\r')
        try:
            soup = self.index_to_soup(self.url_prefix+'/'+securl)
        except:
@ -193,33 +244,7 @@ class TimesColonist(BasicNewsRecipe):
    def parse_index(self):
        ans = []
-        ans = self.add_section_index(ans,'','Web Front Page')
+        for (url,title) in self.section_list:
-        ans = self.add_section_index(ans,'news/','News Headlines')
+            ans = self.add_section_index(ans,url,title)
        ans = self.add_section_index(ans,'news/b-c/','BC News')
        ans = self.add_section_index(ans,'news/national/','Natioanl News')
        ans = self.add_section_index(ans,'news/world/','World News')
        ans = self.add_section_index(ans,'opinion/','Opinion')
        ans = self.add_section_index(ans,'opinion/letters/','Letters')
        ans = self.add_section_index(ans,'business/','Business')
        ans = self.add_section_index(ans,'business/money/','Money')
        ans = self.add_section_index(ans,'business/technology/','Technology')
        ans = self.add_section_index(ans,'business/working/','Working')
        ans = self.add_section_index(ans,'sports/','Sports')
        ans = self.add_section_index(ans,'sports/hockey/','Hockey')
        ans = self.add_section_index(ans,'sports/football/','Football')
        ans = self.add_section_index(ans,'sports/basketball/','Basketball')
        ans = self.add_section_index(ans,'sports/golf/','Golf')
        ans = self.add_section_index(ans,'entertainment/','entertainment')
        ans = self.add_section_index(ans,'entertainment/go/','Go!')
        ans = self.add_section_index(ans,'entertainment/music/','Music')
        ans = self.add_section_index(ans,'entertainment/books/','Books')
        ans = self.add_section_index(ans,'entertainment/Movies/','movies')
        ans = self.add_section_index(ans,'entertainment/television/','Television')
        ans = self.add_section_index(ans,'life/','Life')
        ans = self.add_section_index(ans,'life/health/','Health')
        ans = self.add_section_index(ans,'life/travel/','Travel')
        ans = self.add_section_index(ans,'life/driving/','Driving')
        ans = self.add_section_index(ans,'life/homes/','Homes')
        ans = self.add_section_index(ans,'life/food-drink/','Food & Drink')
        return ans
--- a/recipes/wysokie_obcasy.recipe
+++ b/recipes/wysokie_obcasy.recipe
@ -1,6 +1,5 @@
 #!/usr/bin/env python
 __license__ = 'GPL v3'
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -51,8 +50,8 @@ class WysokieObcasyRecipe(BasicNewsRecipe):
        printVerString=articleURL1 + ',' + articleURL2
        s=  baseURL + subPath + printVerString + '.html'
        return s
-        
+
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.wysokieobcasy.pl/wysokie-obcasy/0,0.html')
        self.cover_url = soup.find(attrs={'class':'holder_cr'}).find('img')['src']
-        return getattr(self, 'cover_url', self.cover_url)
+        return getattr(self, 'cover_url', self.cover_url)
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@ -357,7 +357,7 @@
       <xsl:apply-templates/>
   </xsl:template>
-       <xsl:template match="rtf:table">
+    <xsl:template match="rtf:table">
        <xsl:element name="table">
            <xsl:attribute name="id">
                <xsl:value-of select="generate-id(.)"/>
@ -390,7 +390,6 @@
    <xsl:output method = "xml"/>
    <xsl:key name="style-types" match="rtf:paragraph-definition" use="@style-number"/>
@ -415,13 +414,11 @@
    </xsl:template>
    <xsl:template match="rtf:page-break">
-        <xsl:element name="br">
+        <br style = "page-break-after:always"/>
            <xsl:attribute name="style">page-break-after:always</xsl:attribute>
        </xsl:element>
    </xsl:template>
    <xsl:template match="rtf:hardline-break">
-        <xsl:element name="br"/>
+        <br/>
    </xsl:template>
    <xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
@ -445,7 +442,7 @@
    </xsl:template>
    <xsl:template match = "rtf:field-block">
-      <xsl:apply-templates/>
+        <xsl:apply-templates/>
    </xsl:template>
    <xsl:template match = "rtf:field[@type='hyperlink']">
@ -472,9 +469,7 @@
    </xsl:template>
    <xsl:template match="rtf:pict">
-        <xsl:element name="img">
+        <img src = "{@num}"/>
            <xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
        </xsl:element>
    </xsl:template>
    <xsl:template match="*">
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -47,6 +47,10 @@ binary_includes = [
                '/usr/lib/libgthread-2.0.so.0',
                '/usr/lib/libpng14.so.14',
                '/usr/lib/libexslt.so.0',
                # Ensure that libimobiledevice is compiled against openssl, not gnutls
                '/usr/lib/libimobiledevice.so.3',
                '/usr/lib/libusbmuxd.so.2',
                '/usr/lib/libplist.so.1',
                MAGICK_PREFIX+'/lib/libMagickWand.so.5',
                MAGICK_PREFIX+'/lib/libMagickCore.so.5',
                '/usr/lib/libgcrypt.so.11',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -399,7 +399,8 @@ class Py2App(object):
    @flush
    def add_fontconfig(self):
        info('\nAdding fontconfig')
-        for x in ('fontconfig.1', 'freetype.6', 'expat.1'):
+        for x in ('fontconfig.1', 'freetype.6', 'expat.1',
                  'plist.1', 'usbmuxd.2', 'imobiledevice.3'):
            src = os.path.join(SW, 'lib', 'lib'+x+'.dylib')
            self.install_dylib(src)
        dst = os.path.join(self.resources_dir, 'fonts')
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,13 +12,13 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-03-27 13:07+0000\n"
+"PO-Revision-Date: 2013-03-28 13:01+0000\n"
 "Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-03-28 04:41+0000\n"
+"X-Launchpad-Export-Date: 2013-03-29 04:36+0000\n"
 "X-Generator: Launchpad (build 16546)\n"
 "Language: ca\n"
@ -1884,7 +1884,7 @@ msgstr "Awera"
 #. name for aws
 msgid "Awyu; South"
-msgstr "Awyu meridional"
+msgstr "Awyu; meridional"
 #. name for awt
 msgid "Araweté"
@ -1892,7 +1892,7 @@ msgstr "Araweté"
 #. name for awu
 msgid "Awyu; Central"
-msgstr "Awyu central"
+msgstr "Awyu; Central"
 #. name for awv
 msgid "Awyu; Jair"
@ -4052,7 +4052,7 @@ msgstr "Buginès"
 #. name for buh
 msgid "Bunu; Younuo"
-msgstr "Bunu; Younuo"
+msgstr "Bunu; Younou"
 #. name for bui
 msgid "Bongili"
@ -4308,7 +4308,7 @@ msgstr "Bwa"
 #. name for bwx
 msgid "Bunu; Bu-Nao"
-msgstr "Bunu; Bu-Nao"
+msgstr "Bunu; Bu Nao"
 #. name for bwy
 msgid "Bwamu; Cwi"
@ -19804,7 +19804,7 @@ msgstr "Minoà"
 #. name for omo
 msgid "Utarmbung"
-msgstr ""
+msgstr "Utarmbung"
 #. name for omp
 msgid "Manipuri; Old"
@ -20344,7 +20344,7 @@ msgstr "Pear"
 #. name for pcc
 msgid "Bouyei"
-msgstr ""
+msgstr "Buyí"
 #. name for pcd
 msgid "Picard"
@ -20456,11 +20456,11 @@ msgstr "Pengo"
 #. name for peh
 msgid "Bonan"
-msgstr ""
+msgstr "Bonan"
 #. name for pei
 msgid "Chichimeca-Jonaz"
-msgstr ""
+msgstr "Chichimec"
 #. name for pej
 msgid "Pomo; Northern"
@ -20484,7 +20484,7 @@ msgstr "Persa Antic"
 #. name for pep
 msgid "Kunja"
-msgstr ""
+msgstr "Kunja"
 #. name for peq
 msgid "Pomo; Southern"
@ -20536,7 +20536,7 @@ msgstr "Pagi"
 #. name for pgk
 msgid "Rerep"
-msgstr ""
+msgstr "Rerep"
 #. name for pgl
 msgid "Irish; Primitive"
@ -20624,7 +20624,7 @@ msgstr "Pima Baix"
 #. name for pib
 msgid "Yine"
-msgstr ""
+msgstr "Yine"
 #. name for pic
 msgid "Pinji"
@ -20660,7 +20660,7 @@ msgstr "Pijao"
 #. name for pil
 msgid "Yom"
-msgstr ""
+msgstr "Yom"
 #. name for pim
 msgid "Powhatan"
@ -20760,7 +20760,7 @@ msgstr "Llenguatge de signes pakistaní"
 #. name for pkt
 msgid "Maleng"
-msgstr ""
+msgstr "Maleng"
 #. name for pku
 msgid "Paku"
@ -20768,7 +20768,7 @@ msgstr "Paku"
 #. name for pla
 msgid "Miani"
-msgstr ""
+msgstr "Miani"
 #. name for plb
 msgid "Polonombauk"
@ -20804,7 +20804,7 @@ msgstr "Polci"
 #. name for plk
 msgid "Shina; Kohistani"
-msgstr ""
+msgstr "Shina; Kohistani"
 #. name for pll
 msgid "Palaung; Shwe"
@ -20852,7 +20852,7 @@ msgstr "Palawà; Brooke"
 #. name for ply
 msgid "Bolyu"
-msgstr ""
+msgstr "Bolyu"
 #. name for plz
 msgid "Paluan"
@ -20896,7 +20896,7 @@ msgstr "Algonquí Carolina"
 #. name for pml
 msgid "Lingua Franca"
-msgstr ""
+msgstr "Aljamia"
 #. name for pmm
 msgid "Pomo"
@ -20924,7 +20924,7 @@ msgstr "Piemontès"
 #. name for pmt
 msgid "Tuamotuan"
-msgstr ""
+msgstr "Tuamotu"
 #. name for pmu
 msgid "Panjabi; Mirpur"
@ -20972,7 +20972,7 @@ msgstr "Penrhyn"
 #. name for pni
 msgid "Aoheng"
-msgstr ""
+msgstr "Aoheng"
 #. name for pnm
 msgid "Punan Batu 1"
@ -21008,7 +21008,7 @@ msgstr "Pontic"
 #. name for pnu
 msgid "Bunu; Jiongnai"
-msgstr ""
+msgstr "Bunu; Jiongnai"
 #. name for pnv
 msgid "Pinigura"
@ -21100,7 +21100,7 @@ msgstr "Potavatomi"
 #. name for pov
 msgid "Crioulo; Upper Guinea"
-msgstr ""
+msgstr "Crioll guineà"
 #. name for pow
 msgid "Popoloca; San Felipe Otlaltepec"
@ -21128,7 +21128,7 @@ msgstr "Paipai"
 #. name for ppk
 msgid "Uma"
-msgstr ""
+msgstr "Uma"
 #. name for ppl
 msgid "Pipil"
@ -21144,7 +21144,7 @@ msgstr "Papapana"
 #. name for ppo
 msgid "Folopa"
-msgstr ""
+msgstr "Folopa"
 #. name for ppp
 msgid "Pelende"
@ -21180,7 +21180,7 @@ msgstr "Malecite-Passamaquoddy"
 #. name for prb
 msgid "Lua'"
-msgstr ""
+msgstr "Lua"
 #. name for prc
 msgid "Parachi"
@ -21220,7 +21220,7 @@ msgstr "Llenguatge de signes peruà"
 #. name for prm
 msgid "Kibiri"
-msgstr ""
+msgstr "Kibiri"
 #. name for prn
 msgid "Prasuni"
@ -21272,7 +21272,7 @@ msgstr "Llenguatge de signes de Providencia"
 #. name for psa
 msgid "Awyu; Asue"
-msgstr ""
+msgstr "Awyu; Asue"
 #. name for psc
 msgid "Persian Sign Language"
@ -21328,7 +21328,7 @@ msgstr "Llenguatge de signes portuguès"
 #. name for pss
 msgid "Kaulong"
-msgstr ""
+msgstr "Kaulong"
 #. name for pst
 msgid "Pashto; Central"
@ -21376,11 +21376,11 @@ msgstr "Pìamatsina"
 #. name for ptt
 msgid "Enrekang"
-msgstr ""
+msgstr "Enrekang"
 #. name for ptu
 msgid "Bambam"
-msgstr ""
+msgstr "Bambam"
 #. name for ptv
 msgid "Port Vato"
@ -29584,7 +29584,7 @@ msgstr ""
 #. name for yir
 msgid "Awyu; North"
-msgstr ""
+msgstr "Awyu; Septentrional"
 #. name for yis
 msgid "Yis"
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 25)
+numeric_version = (0, 9, 26)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -757,9 +757,10 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB
 from calibre.ebooks.metadata.sources.overdrive import OverDrive
 from calibre.ebooks.metadata.sources.douban import Douban
 from calibre.ebooks.metadata.sources.ozon import Ozon
-# from calibre.ebooks.metadata.sources.google_images import GoogleImages
+from calibre.ebooks.metadata.sources.google_images import GoogleImages
 from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch
-plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
+plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]
 # }}}
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
    config['enabled_plugins'] = ep
 default_disabled_plugins = set([
-    'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
+    'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
 ])
 def is_disabled(plugin):
--- a/src/calibre/ebooks/conversion/plugins/txt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@ -97,6 +97,12 @@ class TXTInput(InputFormatPlugin):
        if not ienc:
            ienc = 'utf-8'
            log.debug('No input encoding specified and could not auto detect using %s' % ienc)
        # Remove BOM from start of txt as its presence can confuse markdown
        import codecs
        for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
            if txt.startswith(bom):
                txt = txt[len(bom):]
                break
        txt = txt.decode(ienc, 'replace')
        # Replace entities
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -132,7 +132,7 @@ class Worker(Thread): # Get details {{{
                 text()="Détails sur le produit" or \
                 text()="Detalles del producto" or \
                 text()="Detalhes do produto" or \
-                 text()="登録情報"]/../div[@class="content"]
+                 starts-with(text(), "登録情報")]/../div[@class="content"]
            '''
        # Editor: is for Spanish
        self.publisher_xpath = '''
@ -235,6 +235,12 @@ class Worker(Thread): # Get details {{{
            msg = 'Failed to parse amazon details page: %r'%self.url
            self.log.exception(msg)
            return
        if self.domain == 'jp':
            for a in root.xpath('//a[@href]'):
                if 'black-curtain-redirect.html' in a.get('href'):
                    self.url = 'http://amazon.co.jp'+a.get('href')
                    self.log('Black curtain redirect found, following')
                    return self.get_details()
        errmsg = root.xpath('//*[@id="errorMessage"]')
        if errmsg:
@ -252,8 +258,8 @@ class Worker(Thread): # Get details {{{
            self.log.exception('Error parsing asin for url: %r'%self.url)
            asin = None
        if self.testing:
-            import tempfile
+            import tempfile, uuid
-            with tempfile.NamedTemporaryFile(prefix=asin + '_',
+            with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4()))+ '_',
                    suffix='.html', delete=False) as f:
                f.write(raw)
            print ('Downloaded html for', asin, 'saved in', f.name)
@ -499,7 +505,7 @@ class Worker(Thread): # Get details {{{
    def parse_language(self, pd):
        for x in reversed(pd.xpath(self.language_xpath)):
            if x.tail:
-                raw = x.tail.strip()
+                raw = x.tail.strip().partition(',')[0].strip()
                ans = self.lang_map.get(raw, None)
                if ans:
                    return ans
@ -1004,6 +1010,11 @@ if __name__ == '__main__': # tests {{{
    ] # }}}
    jp_tests = [ # {{{
            ( # Adult filtering test
             {'identifiers':{'isbn':'4799500066'}},
             [title_test(u'Ｂｉｔｃｈ Ｔｒａｐ'),]
            ),
            ( # isbn -> title, authors
                {'identifiers':{'isbn': '9784101302720' }},
                [title_test(u'精霊の守り人',
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
 # Google covers are often poor quality (scans/errors) but they have high
 # resolution, so they trump covers from better sources. So make sure they
 # are only used if no other covers are found.
-msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
+msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}
 def create_log(ostream=None):
    from calibre.utils.logging import ThreadSafeLog, FileStream
@ -429,6 +429,40 @@ class Source(Plugin):
            mi.tags = list(map(fixcase, mi.tags))
        mi.isbn = check_isbn(mi.isbn)
    def download_multiple_covers(self, title, authors, urls, get_best_cover, timeout, result_queue, abort, log, prefs_name='max_covers'):
        if not urls:
            log('No images found for, title: %r and authors: %r'%(title, authors))
            return
        from threading import Thread
        import time
        if prefs_name:
            urls = urls[:self.prefs[prefs_name]]
        if get_best_cover:
            urls = urls[:1]
        log('Downloading %d covers'%len(urls))
        workers = [Thread(target=self.download_image, args=(u, timeout, log, result_queue)) for u in urls]
        for w in workers:
            w.daemon = True
            w.start()
        alive = True
        start_time = time.time()
        while alive and not abort.is_set() and time.time() - start_time < timeout:
            alive = False
            for w in workers:
                if w.is_alive():
                    alive = True
                    break
            abort.wait(0.1)
    def download_image(self, url, timeout, log, result_queue):
        try:
            ans = self.browser.open_novisit(url, timeout=timeout).read()
            result_queue.put((self, ans))
            log('Downloaded cover from: %s'%url)
        except Exception:
            self.log.exception('Failed to download cover from: %r'%url)
    # }}}
    # Metadata API {{{
--- a/src/calibre/ebooks/metadata/sources/big_book_search.py
+++ b/src/calibre/ebooks/metadata/sources/big_book_search.py
@ -0,0 +1,58 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.metadata.sources.base import Source, Option
 def get_urls(br, tokens):
    from urllib import quote_plus
    from mechanize import Request
    from lxml import html
    escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
    q = b'+'.join(escaped)
    url = 'http://bigbooksearch.com/books/'+q
    br.open(url).read()
    req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
    req.add_header('X-Requested-With', 'XMLHttpRequest')
    req.add_header('Referer', url)
    raw = br.open(req).read()
    root = html.fromstring(raw.decode('utf-8'))
    urls = [i.get('src') for i in root.xpath('//img[@src]')]
    return urls
 class BigBookSearch(Source):
    name = 'Big Book Search'
    description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
    capabilities = frozenset(['cover'])
    config_help_message = _('Configure the Big Book Search plugin')
    can_get_multiple_covers = True
    options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
                      _('The maximum number of covers to process from the search result')),
    )
    supports_gzip_transfer_encoding = True
    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
        if not title:
            return
        br = self.browser
        tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
        urls = get_urls(br, tokens)
        self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
 def test():
    from calibre import browser
    import pprint
    br = browser()
    urls = get_urls(br, ['consider', 'phlebas', 'banks'])
    pprint.pprint(urls)
 if __name__ == '__main__':
    test()
--- a/src/calibre/ebooks/metadata/sources/covers.py
+++ b/src/calibre/ebooks/metadata/sources/covers.py
@ -18,12 +18,13 @@ from calibre.utils.magick.draw import Image, save_cover_data_to
 class Worker(Thread):
-    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
+    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq, get_best_cover=False):
        Thread.__init__(self)
        self.daemon = True
        self.plugin = plugin
        self.abort = abort
        self.get_best_cover = get_best_cover
        self.buf = BytesIO()
        self.log = create_log(self.buf)
        self.title, self.authors, self.identifiers = (title, authors,
@ -37,7 +38,7 @@ class Worker(Thread):
            try:
                if self.plugin.can_get_multiple_covers:
                    self.plugin.download_cover(self.log, self.rq, self.abort,
-                        title=self.title, authors=self.authors, get_best_cover=True,
+                        title=self.title, authors=self.authors, get_best_cover=self.get_best_cover,
                        identifiers=self.identifiers, timeout=self.timeout)
                else:
                    self.plugin.download_cover(self.log, self.rq, self.abort,
@ -72,7 +73,7 @@ def process_result(log, result):
    return (plugin, width, height, fmt, data)
 def run_download(log, results, abort,
-        title=None, authors=None, identifiers={}, timeout=30):
+        title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
    '''
    Run the cover download, putting results into the queue :param:`results`.
@ -89,7 +90,7 @@ def run_download(log, results, abort,
    plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
    rq = Queue()
-    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
+    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq, get_best_cover=get_best_cover) for p
            in plugins]
    for w in workers:
        w.start()
@ -163,7 +164,7 @@ def download_cover(log,
    abort = Event()
    run_download(log, rq, abort, title=title, authors=authors,
-            identifiers=identifiers, timeout=timeout)
+            identifiers=identifiers, timeout=timeout, get_best_cover=True)
    results = []
--- a/src/calibre/ebooks/metadata/sources/edelweiss.py
+++ b/src/calibre/ebooks/metadata/sources/edelweiss.py
@ -106,6 +106,8 @@ class Worker(Thread): # {{{
            parts = pub.partition(':')[0::2]
            pub = parts[1] or parts[0]
            try:
                if ', Ship Date:' in pub:
                    pub = pub.partition(', Ship Date:')[0]
                q = parse_only_date(pub, assume_utc=True)
                if q.year != UNDEFINED_DATE:
                    mi.pubdate = q
--- a/src/calibre/ebooks/metadata/sources/google_images.py
+++ b/src/calibre/ebooks/metadata/sources/google_images.py
@ -39,39 +39,11 @@ class GoogleImages(Source):
            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
        if not title:
            return
        from threading import Thread
        import time
        timeout = max(60, timeout) # Needs at least a minute
        title = ' '.join(self.get_title_tokens(title))
        author = ' '.join(self.get_author_tokens(authors))
        urls = self.get_image_urls(title, author, log, abort, timeout)
-        if not urls:
+        self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
            log('No images found in Google for, title: %r and authors: %r'%(title, author))
            return
        urls = urls[:self.prefs['max_covers']]
        if get_best_cover:
            urls = urls[:1]
        workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls]
        for w in workers:
            w.daemon = True
            w.start()
        alive = True
        start_time = time.time()
        while alive and not abort.is_set() and time.time() - start_time < timeout:
            alive = False
            for w in workers:
                if w.is_alive():
                    alive = True
                    break
            abort.wait(0.1)
    def download_image(self, url, timeout, log, result_queue):
        try:
            ans = self.browser.open_novisit(url, timeout=timeout).read()
            result_queue.put((self, ans))
            log('Downloaded cover from: %s'%url)
        except Exception:
            self.log.exception('Failed to download cover from: %r'%url)
    def get_image_urls(self, title, author, log, abort, timeout):
        from calibre.utils.ipc.simple_worker import fork_job, WorkerError
--- a/src/calibre/ebooks/oeb/polish/toc.py
+++ b/src/calibre/ebooks/oeb/polish/toc.py
@ -262,6 +262,35 @@ def from_links(container):
            toc.remove(child)
    return toc
 def find_text(node):
    LIMIT = 200
    pat = re.compile(r'\s+')
    for child in node:
        if isinstance(child, etree._Element):
            text = xml2text(child).strip()
            text = pat.sub(' ', text)
            if len(text) < 1:
                continue
            if len(text) > LIMIT:
                # Look for less text in a child of this node, recursively
                ntext = find_text(child)
                return ntext or (text[:LIMIT] + '...')
            else:
                return text
 def from_files(container):
    toc = TOC()
    for spinepath in container.spine_items:
        name = container.abspath_to_name(spinepath)
        root = container.parsed(name)
        body = XPath('//h:body')(root)
        if not body:
            continue
        text = find_text(body[0])
        if text:
            toc.add(text, name)
    return toc
 def add_id(container, name, loc):
    root = container.parsed(name)
    body = root.xpath('//*[local-name()="body"]')[0]
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -333,8 +333,8 @@ class OEBReader(object):
        guide = self.oeb.guide
        manifest = self.oeb.manifest
        for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
-            href = elem.get('href')
+            ref_href = elem.get('href')
-            path = urlnormalize(urldefrag(href)[0])
+            path = urlnormalize(urldefrag(ref_href)[0])
            if path not in manifest.hrefs:
                corrected_href = None
                for href in manifest.hrefs:
@ -342,12 +342,12 @@ class OEBReader(object):
                        corrected_href = href
                        break
                if corrected_href is None:
-                    self.logger.warn(u'Guide reference %r not found' % href)
+                    self.logger.warn(u'Guide reference %r not found' % ref_href)
                    continue
-                href = corrected_href
+                ref_href = corrected_href
            typ = elem.get('type')
            if typ not in guide:
-                guide.add(typ, elem.get('title'), href)
+                guide.add(typ, elem.get('title'), ref_href)
    def _find_ncx(self, opf):
        result = xpath(opf, '/o2:package/o2:spine/@toc')
--- a/src/calibre/ebooks/rtf2xml/border_parse.py
+++ b/src/calibre/ebooks/rtf2xml/border_parse.py
@ -180,5 +180,6 @@ class BorderParse:
        elif 'single' in border_style_list:
            new_border_dict[att] = 'single'
        else:
-            new_border_dict[att] = border_style_list[0]
+            if border_style_list:
                new_border_dict[att] = border_style_list[0]
        return new_border_dict
--- a/src/calibre/gui2/actions/store.py
+++ b/src/calibre/gui2/actions/store.py
@ -88,9 +88,7 @@ class StoreAction(InterfaceAction):
        if row == None:
            error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
            return
-
+        self.search({ 'author': self._get_author(row) })
        query = 'author:"%s"' % self._get_author(row)
        self.search(query)
    def _get_title(self, row):
        title = ''
@ -107,18 +105,14 @@ class StoreAction(InterfaceAction):
        if row == None:
            error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
            return
-
+        self.search({ 'title': self._get_title(row) })
        query = 'title:"%s"' % self._get_title(row)
        self.search(query)
    def search_author_title(self):
        row = self._get_selected_row()
        if row == None:
            error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
            return
-
+        self.search({ 'author': self._get_author(row), 'title': self._get_title(row) })
        query = 'author:"%s" title:"%s"' % (self._get_author(row), self._get_title(row))
        self.search(query)
    def choose(self):
        from calibre.gui2.store.config.chooser.chooser_dialog import StoreChooserDialog
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -62,16 +62,20 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.setup_store_checks()
        # Set the search query
        if isinstance(query, (str, unicode)):
            self.search_edit.setText(query)
        elif isinstance(query, dict):
            if 'author' in query:
                self.search_author.setText(query['author'])
            if 'title' in query:
                self.search_title.setText(query['title'])
        # Title
        self.search_title.setText(query)
        self.search_title.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
        self.search_title.setMinimumContentsLength(25)
        # Author
        self.search_author.setText(query)
        self.search_author.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
        self.search_author.setMinimumContentsLength(25)
        # Keyword
        self.search_edit.setText(query)
        self.search_edit.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
        self.search_edit.setMinimumContentsLength(25)
@ -408,7 +412,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.save_state()
    def exec_(self):
-        if unicode(self.search_edit.text()).strip():
+        if unicode(self.search_edit.text()).strip() or unicode(self.search_title.text()).strip() or unicode(self.search_author.text()).strip():
            self.do_search()
        return QDialog.exec_(self)
--- a/src/calibre/gui2/store/stores/nexto_plugin.py
+++ b/src/calibre/gui2/store/stores/nexto_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
@ -67,7 +67,7 @@ class NextoStore(BasicStoreConfig, StorePlugin):
                    cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
                    cover_url = re.sub(r'%2F', '/', cover_url)
-                    cover_url = re.sub(r'\widthMax=120&heightMax=200', 'widthMax=64&heightMax=64', cover_url)
+                    cover_url = re.sub(r'widthMax=120&heightMax=200', 'widthMax=64&heightMax=64', cover_url)
                    title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                    title = re.sub(r' - ebook$', '', title)
                    formats = ', '.join(data.xpath('.//ul[@class="formats_available"]/li//b/text()'))
@ -82,7 +82,7 @@ class NextoStore(BasicStoreConfig, StorePlugin):
                    counter -= 1
                    s = SearchResult()
-                    s.cover_url = 'http://www.nexto.pl' + cover_url
+                    s.cover_url = cover_url if cover_url[:4] == 'http' else 'http://www.nexto.pl' + cover_url
                    s.title = title.strip()
                    s.author = author.strip()
                    s.price = price
--- a/src/calibre/gui2/store/stores/virtualo_plugin.py
+++ b/src/calibre/gui2/store/stores/virtualo_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
@ -41,7 +41,7 @@ class VirtualoStore(BasicStoreConfig, StorePlugin):
        url = 'http://virtualo.pl/?q=' + urllib.quote(query) + '&f=format_id:4,6,3'
        br = browser()
-        no_drm_pattern = re.compile("Znak wodny")
+        no_drm_pattern = re.compile(r'Znak wodny|Brak')
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
@ -58,8 +58,8 @@ class VirtualoStore(BasicStoreConfig, StorePlugin):
                cover_url = ''.join(data.xpath('.//div[@class="list_middle_left"]//a//img/@src'))
                title = ''.join(data.xpath('.//div[@class="list_title list_text_left"]/a/text()'))
                author = ', '.join(data.xpath('.//div[@class="list_authors list_text_left"]/a/text()'))
-                formats = [ form.split('_')[-1].replace('.png', '') for form in data.xpath('.//div[@style="width:55%;float:left;text-align:left;height:18px;"]//a/img/@src')]
+                formats = [ form.split('_')[-1].replace('.png', '') for form in data.xpath('.//div[@style="width:55%;float:left;text-align:left;height:18px;"]//a/span/img/@src')]
-                nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@style="width:45%;float:right;text-align:right;height:18px;"]/div/div/text()')))
+                nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@style="width:45%;float:right;text-align:right;height:18px;"]//span[@class="prompt_preview"]/text()')))
                counter -= 1
@ -70,6 +70,6 @@ class VirtualoStore(BasicStoreConfig, StorePlugin):
                s.price = price + ' zł'
                s.detail_item = 'http://virtualo.pl' + id.strip().split('http://')[0]
                s.formats = ', '.join(formats).upper()
-                s.drm = SearchResult.DRM_UNLOCKED if nodrm else SearchResult.DRM_UNKNOWN
+                s.drm = SearchResult.DRM_UNLOCKED if nodrm else SearchResult.DRM_LOCKED
                yield s
--- a/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 1 # Needed for dynamic plugin loading
+store_version = 2 # Needed for dynamic plugin loading
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -24,8 +24,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://clkuk.tradedoubler.com/click?p=51196&a=1951604&g=19333484'
+        url = 'http://www.awin1.com/awclick.php?mid=3787&id=120917'
-        url_details = 'http://clkuk.tradedoubler.com/click?p(51196)a(1951604)g(16460516)url({0})'
+        url_details = 'http://www.awin1.com/cread.php?awinmid=3787&awinaffid=120917&clickref=&p={0}'
        if external or self.config.get('open_external', False):
            if detail_item:
--- a/src/calibre/gui2/toc/main.py
+++ b/src/calibre/gui2/toc/main.py
@ -18,7 +18,7 @@ from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog,
 from calibre.ebooks.oeb.polish.container import get_container, AZW3Container
 from calibre.ebooks.oeb.polish.toc import (
-    get_toc, add_id, TOC, commit_toc, from_xpaths, from_links)
+    get_toc, add_id, TOC, commit_toc, from_xpaths, from_links, from_files)
 from calibre.gui2 import Application, error_dialog, gprefs
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.gui2.toc.location import ItemEdit
@ -126,6 +126,7 @@ class ItemView(QFrame): # {{{
    go_to_root = pyqtSignal()
    create_from_xpath = pyqtSignal(object)
    create_from_links = pyqtSignal()
    create_from_files = pyqtSignal()
    flatten_toc = pyqtSignal()
    def __init__(self, parent):
@ -183,6 +184,15 @@ class ItemView(QFrame): # {{{
        )))
        l.addWidget(b)
        self.cfb = b = QPushButton(_('Generate ToC from &files'))
        b.clicked.connect(self.create_from_files)
        b.setToolTip(textwrap.fill(_(
            'Generate a Table of Contents from individual files in the book.'
            ' Each entry in the ToC will point to the start of the file, the'
            ' text of the entry will be the "first line" of text from the file.'
        )))
        l.addWidget(b)
        self.xpb = b = QPushButton(_('Generate ToC from &XPath'))
        b.clicked.connect(self.create_from_user_xpath)
        b.setToolTip(textwrap.fill(_(
@ -549,11 +559,11 @@ class TOCView(QWidget): # {{{
        b.setToolTip(_('Remove all selected entries'))
        b.clicked.connect(self.del_items)
-        self.left_button = b = QToolButton(self)
+        self.right_button = b = QToolButton(self)
        b.setIcon(QIcon(I('forward.png')))
        b.setIconSize(QSize(ICON_SIZE, ICON_SIZE))
        l.addWidget(b, 4, 3)
-        b.setToolTip(_('Unindent the current entry [Ctrl+Left]'))
+        b.setToolTip(_('Indent the current entry [Ctrl+Right]'))
        b.clicked.connect(self.tocw.move_right)
        self.down_button = b = QToolButton(self)
@ -577,6 +587,7 @@ class TOCView(QWidget): # {{{
        i.add_new_item.connect(self.add_new_item)
        i.create_from_xpath.connect(self.create_from_xpath)
        i.create_from_links.connect(self.create_from_links)
        i.create_from_files.connect(self.create_from_files)
        i.flatten_item.connect(self.flatten_item)
        i.flatten_toc.connect(self.flatten_toc)
        i.go_to_root.connect(self.go_to_root)
@ -778,6 +789,14 @@ class TOCView(QWidget): # {{{
                _('No links were found that could be added to the Table of Contents.'), show=True)
        self.insert_toc_fragment(toc)
    def create_from_files(self):
        toc = from_files(self.ebook)
        if len(toc) == 0:
            return error_dialog(self, _('No items found'),
                _('No files were found that could be added to the Table of Contents.'), show=True)
        self.insert_toc_fragment(toc)
 # }}}
 class TOCEditor(QDialog): # {{{
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -54,7 +54,7 @@ def get_parser(usage):
 def get_db(dbpath, options):
    global do_notify
    if options.library_path is not None:
-        dbpath = options.library_path
+        dbpath = os.path.expanduser(options.library_path)
    if dbpath is None:
        raise ValueError('No saved library path, either run the GUI or use the'
                ' --with-library option')
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/ber.po
+++ b/src/calibre/translations/ber.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/fr_CA.po
+++ b/src/calibre/translations/fr_CA.po
--- a/src/calibre/translations/fur.po
+++ b/src/calibre/translations/fur.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/him.po
+++ b/src/calibre/translations/him.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/is.po
+++ b/src/calibre/translations/is.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/jv.po
+++ b/src/calibre/translations/jv.po
--- a/src/calibre/translations/ka.po
+++ b/src/calibre/translations/ka.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/mk.po
+++ b/src/calibre/translations/mk.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/Show More
+++ b/Show More