Merge from trunk

2025-08-30 23:00:21 -04:00 · 2013-04-06 14:51:29 +02:00 · 2013-04-06 14:51:29 +02:00 · 2f927765a5
commit 2f927765a5
parent 41e634b434 20d970c362
141 changed files with 25241 additions and 17757 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -40,6 +40,7 @@ recipes/.gitignore
 recipes/README.md
 recipes/icon_checker.py
 recipes/readme_updater.py
+recipes/garfield.recipe
 recipes/katalog_egazeciarz.recipe
 recipes/tv_axnscifi.recipe
 recipes/tv_comedycentral.recipe
@ -63,6 +64,7 @@ recipes/tv_tvppolonia.recipe
 recipes/tv_tvpuls.recipe
 recipes/tv_viasathistory.recipe
 recipes/icons/katalog_egazeciarz.png
+recipes/icons/garfield.png
 recipes/icons/tv_axnscifi.png
 recipes/icons/tv_comedycentral.png
 recipes/icons/tv_discoveryscience.png
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,58 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.26
+  date: 2013-04-05
+
+  new features:
+    - title: "PDF Output: Allow using templates to create arbitrary headers and footers. Look under PDF Output in the conversion dialog for this feature."
+
+    - title: "ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files."
+      tickets: [1163520]
+
+    - title: "ToC Editor: Add buttons to indent/unindent the current entry"
+
+    - title: "ToC Editor: Right-click menu to perform various useful actions on entries in the ToC"
+
+    - title: "Column icons: Allow use of wide images as column icons"
+
+    - title: "Add USB ids for the Palm Pre2 and Samsung Galaxy phone to the device drivers"
+      tickets: [1162293,1163115]
+ 
+  bug fixes:
+    - title: "PDF Output: Fix generating page numbers causing links to not work."
+      tickets: [1162573]
+
+    - title: "Wrong filename output in error message when 'Guide reference not found'"
+      tickets: [1163659]
+
+    - title: "Get Books: Update Amazon, Barnes & Noble, Waterstones and Gutenberg store plugins for website change"
+
+    - title: "PDF Output: Fix 1 pixel wide left and top margins on the cover page for some PDF conversions due to incorrect rounding."
+      tickets: [1162054]
+
+    - title: "ToC Editor: Fix drag and drop of multiple items resulting in the dropped items being in random order sometimes."
+      tickets: [1161999]
+
+  improved recipes:
+    - Financial Times UK
+    - Sing Tao Daily
+    - Apple Daily
+    - A List Apart
+    - Business Week
+    - Harpers printed edition
+    - Harvard Business Review
+
+  new recipes:
+    - title: AM730
+      author: Eddie Lau
+
+    - title: Arret sur images 
+      author: Francois D
+
+    - title: Diario de Noticias
+      author: Jose Pinto
+
 - version: 0.9.25
  date: 2013-03-29

--- a/recipes/am730.recipe
+++ b/recipes/am730.recipe
@ -0,0 +1,290 @@
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
+__license__   = 'GPL v3'
+__copyright__ = '2013, Eddie Lau'
+__Date__ = ''
+__HiResImg__ = True
+
+'''
+Change Log:
+2013/03/30 -- first version
+'''
+
+from calibre import (__appname__, force_unicode, strftime)
+from calibre.utils.date import now as nowf
+import os, datetime, re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang
+
+class AppleDaily(BasicNewsRecipe):
+    title          = u'AM730'
+    __author__     = 'Eddie Lau'
+    publisher      = 'AM730'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    auto_cleanup = False
+    language = 'zh'
+    encoding = 'utf-8'
+    auto_cleanup = False
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    description = 'http://www.am730.com.hk'
+    category    = 'Chinese, News, Hong Kong'
+    masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
+
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
+    keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
+                      dict(name='div', attrs={'class':'thecontent wordsnap'}),
+                      dict(name='a', attrs={'class':'lightboximg'})]
+    remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
+                   dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
+
+    def get_dtlocal(self):
+        dt_utc = datetime.datetime.utcnow()
+        # convert UTC to local hk time - at HKT 6am, all news are available
+        return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
+
+    def get_fetchdate(self):
+        if __Date__ <> '':
+            return __Date__
+        else:
+            return self.get_dtlocal().strftime("%Y%m%d")
+
+    def get_fetchformatteddate(self):
+        if __Date__ <> '':
+            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchyear(self):
+        if __Date__ <> '':
+            return __Date__[0:4]
+        else:
+            return self.get_dtlocal().strftime("%Y")
+
+    def get_fetchmonth(self):
+        if __Date__ <> '':
+            return __Date__[4:6]
+        else:
+            return self.get_dtlocal().strftime("%m")
+
+    def get_fetchday(self):
+        if __Date__ <> '':
+            return __Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%d")
+
+    # Note: does not work with custom date given by __Date__
+    def get_weekday(self):
+        return self.get_dtlocal().weekday()
+
+    def populate_article_metadata(self, article, soup, first):
+        if first and hasattr(self, 'add_toc_thumbnail'):
+            picdiv = soup.find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,picdiv['src'])
+
+    def parse_index(self):
+        feeds = []
+        soup = self.index_to_soup('http://www.am730.com.hk/')
+        ul = soup.find(attrs={'class':'nav-section'})
+        sectionList = []
+        for li in ul.findAll('li'):
+            a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
+            title = li.find('a').get('title', False).strip()
+            sectionList.append((title, a))
+        for title, url in sectionList:
+            articles = self.parse_section(url)
+            if articles:
+                feeds.append((title, articles))
+        return feeds
+
+    def parse_section(self, url):
+        soup = self.index_to_soup(url)
+        items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
+        current_articles = []
+        for item in items:
+            a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
+            articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
+            title = self.tag_to_string(a)
+            description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
+            current_articles.append({'title': title, 'url': articlelink, 'description': description})
+        return current_articles
+
+    def preprocess_html(self, soup):
+        multia = soup.findAll('a')
+        for a in multia:
+            if not (a == None):
+                image = a.find('img')
+                if not (image == None):
+                    if __HiResImg__:
+                        image['src'] = image.get('src').replace('/thumbs/', '/')
+                    caption = image.get('alt')
+                    tag = Tag(soup, "photo", [])
+                    tag2 = Tag(soup, "photocaption", [])
+                    tag.insert(0, image)
+                    if not caption == None:
+                        tag2.insert(0, caption)
+                        tag.insert(1, tag2)
+                    a.replaceWith(tag)
+        return soup
+
+    def create_opf(self, feeds, dir=None):
+        if dir is None:
+            dir = self.output_dir
+        title = self.short_title()
+        if self.output_profile.periodical_date_in_title:
+            title += strftime(self.timefmt)
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        if self.publication_type:
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        mi.timestamp = nowf()
+        article_titles, aseen = [], set()
+        for f in feeds:
+            for a in f:
+                if a.title and a.title not in aseen:
+                    aseen.add(a.title)
+                    article_titles.append(force_unicode(a.title, 'utf-8'))
+
+        mi.comments = self.description
+        if not isinstance(mi.comments, unicode):
+            mi.comments = mi.comments.decode('utf-8', 'replace')
+        mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+                '\n\n'.join(article_titles))
+
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
+        # This one affects the pub date shown in kindle title
+        #mi.pubdate = nowf()
+        # now appears to need the time field to be > 12.00noon as well
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')
+
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)
+
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))
+
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)
+
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)
+
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'
+
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}
+
+
+        def feed_index(num, parent):
+            f = feeds[num]
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(num, j)
+                    auth = a.author
+                    if not auth:
+                        auth = None
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
+                    else:
+                        desc = self.description_limiter(desc)
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
+                    entries.append('%sindex.html'%adir)
+                    po = self.play_order_map.get(entries[-1], None)
+                    if po is None:
+                        self.play_order_counter += 1
+                        po = self.play_order_counter
+                    parent.add_item('%sindex.html'%adir, None,
+                            a.title if a.title else _('Untitled Article'),
+                            play_order=po, author=auth,
+                            description=desc, toc_thumbnail=tt)
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        last = sp
+
+                    if os.path.exists(last):
+                        with open(last, 'rb') as fi:
+                            src = fi.read().decode('utf-8')
+                        soup = BeautifulSoup(src)
+                        body = soup.find('body')
+                        if body is not None:
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            templ = self.navbar.generate(True, num, j, len(f),
+                                            not self.has_single_feed,
+                                            a.orig_url, __appname__, prefix=prefix,
+                                            center=self.center_navbar)
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            body.insert(len(body.contents), elem)
+                            with open(last, 'wb') as fi:
+                                fi.write(unicode(soup).encode('utf-8'))
+        if len(feeds) == 0:
+            raise Exception('All feeds are empty, aborting.')
+
+        if len(feeds) > 1:
+            for i, f in enumerate(feeds):
+                entries.append('feed_%d/index.html'%i)
+                po = self.play_order_map.get(entries[-1], None)
+                if po is None:
+                    self.play_order_counter += 1
+                    po = self.play_order_counter
+                auth = getattr(f, 'author', None)
+                if not auth:
+                    auth = None
+                desc = getattr(f, 'description', None)
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                    f.title, play_order=po, description=desc, author=auth))
+
+        else:
+            entries.append('feed_%d/index.html'%0)
+            feed_index(0, toc)
+
+        for i, p in enumerate(entries):
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+        opf.create_spine(entries)
+        opf.set_toc(toc)
+
+        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+            opf.render(opf_file, ncx_file)
+
--- a/recipes/apple_daily.recipe
+++ b/recipes/apple_daily.recipe
@ -1,161 +1,275 @@
-# -*- coding: utf-8 -*-
-import re
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
+__license__   = 'GPL v3'
+__copyright__ = '2013, Eddie Lau'
+__Date__ = ''
+
+from calibre import (__appname__, force_unicode, strftime)
+from calibre.utils.date import now as nowf
+import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang

 class AppleDaily(BasicNewsRecipe):
-
-    title       = u'蘋果日報'
-    __author__  = u'蘋果日報'
-    __publisher__  = u'蘋果日報'
-    description = u'蘋果日報'
-    masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
-    language = 'zh_TW'
-    encoding = 'UTF-8'
-    timefmt = ' [%a, %d %b, %Y]'
-    needs_subscription = False
+    title          = u'蘋果日報 (香港)'
+    __author__     = 'Eddie Lau'
+    publisher      = '蘋果日報'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    auto_cleanup = False
+    language = 'zh'
+    encoding = 'utf-8'
+    auto_cleanup = False
    remove_javascript = True
-    remove_tags_before = dict(name=['ul', 'h1'])
-    remove_tags_after  = dict(name='form')
-    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
-                dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
-                dict(name=['script', 'noscript', 'style', 'form'])]
+    use_embedded_content   = False
    no_stylesheets = True
-    extra_css = '''
-    	@font-face {font-family: "uming", serif, sans-serif;  src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
-	    body {margin-right: 8pt; font-family: 'uming', serif;}
-        h1 {font-family: 'uming', serif, sans-serif}
-            '''
-    #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+    description = 'http://hkm.appledaily.com/'
+    category    = 'Chinese, News, Hong Kong'
+    masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'

-    preprocess_regexps = [
-       (re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE),
-        lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
-    ]
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
+    keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
+    remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
+                   dict(name='p', attrs={'class':'next'})]
+
+    def get_dtlocal(self):
+        dt_utc = datetime.datetime.utcnow()
+        # convert UTC to local hk time - at HKT 6am, all news are available
+        return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
+
+    def get_fetchdate(self):
+        if __Date__ <> '':
+            return __Date__
+        else:
+            return self.get_dtlocal().strftime("%Y%m%d")
+
+    def get_fetchformatteddate(self):
+        if __Date__ <> '':
+            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchyear(self):
+        if __Date__ <> '':
+            return __Date__[0:4]
+        else:
+            return self.get_dtlocal().strftime("%Y")
+
+    def get_fetchmonth(self):
+        if __Date__ <> '':
+            return __Date__[4:6]
+        else:
+            return self.get_dtlocal().strftime("%m")
+
+    def get_fetchday(self):
+        if __Date__ <> '':
+            return __Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%d")
+
+    # Note: does not work with custom date given by __Date__
+    def get_weekday(self):
+        return self.get_dtlocal().weekday()

    def get_cover_url(self):
-        return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
-
-
-    #def get_browser(self):
-        #br = BasicNewsRecipe.get_browser(self)
-        #if self.username is not None and self.password is not None:
-        #    br.open('http://www.nytimes.com/auth/login')
-        #    br.select_form(name='login')
-        #    br['USERID']   = self.username
-        #    br['PASSWORD'] = self.password
-        #    br.submit()
-        #return br
-
-    def preprocess_html(self, soup):
-        #process all the images
-        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
-            iurl = tag['src']
-            #print 'checking image: ' + iurl
-
-            #img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
-            p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
-
-            m = p.search(iurl)
-
-            if m is not None:
-                iurl = 'http://' + m.group('server') + '/' + m.group('path')
-                #print 'working! new url: ' + iurl
-                tag['src'] = iurl
-            #else:
-                #print 'not good'
-
-        for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
-            iurl = tag['href']
-            #print 'checking image: ' + iurl
-
-            #img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
-            p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
-
-            m = p.search(iurl)
-
-            if m is not None:
-                iurl = 'http://' + m.group('server') + '/' + m.group('path')
-                #print 'working! new url: ' + iurl
-                tag['href'] = iurl
-            #else:
-                #print 'not good'
-
-        return soup
+        soup = self.index_to_soup('http://hkm.appledaily.com/')
+        cover = soup.find(attrs={'class':'top-news'}).get('src', False)
+        br = BasicNewsRecipe.get_browser(self)
+        try:
+            br.open(cover)
+        except:
+            cover = None
+        return cover

+    def populate_article_metadata(self, article, soup, first):
+        if first and hasattr(self, 'add_toc_thumbnail'):
+            picdiv = soup.find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,picdiv['src'])

    def parse_index(self):
-        base = 'http://news.hotpot.hk/fruit'
-        soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
+        feeds = []
+        soup = self.index_to_soup('http://hkm.appledaily.com/')
+        ul = soup.find(attrs={'class':'menu'})
+        sectionList = []
+        for li in ul.findAll('li'):
+            a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False)
+            title = li.find('a', text=True).strip()
+            if not title == u'動新聞':
+                sectionList.append((title, a))
+        for title, url in sectionList:
+            articles = self.parse_section(url)
+            if articles:
+                feeds.append((title, articles))
+        return feeds

-        #def feed_title(div):
-        #    return ''.join(div.findAll(text=True, recursive=False)).strip()
+    def parse_section(self, url):
+        soup = self.index_to_soup(url)
+        ul = soup.find(attrs={'class':'list'})
+        current_articles = []
+        for li in ul.findAll('li'):
+            a = li.find('a', href=True)
+            title = li.find('p', text=True).strip()
+            if a is not None:
+                current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
+            pass
+        return current_articles

-        articles = {}
-        key = None
-        ans = []
-        for div in soup.findAll('li'):
-            key = div.find(text=True, recursive=True);
-            #if key == u'豪情':
-           #    continue;
+    def create_opf(self, feeds, dir=None):
+        if dir is None:
+            dir = self.output_dir
+        title = self.short_title()
+        if self.output_profile.periodical_date_in_title:
+            title += strftime(self.timefmt)
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        if self.publication_type:
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        mi.timestamp = nowf()
+        article_titles, aseen = [], set()
+        for f in feeds:
+            for a in f:
+                if a.title and a.title not in aseen:
+                    aseen.add(a.title)
+                    article_titles.append(force_unicode(a.title, 'utf-8'))

-            print 'section=' + key
+        mi.comments = self.description
+        if not isinstance(mi.comments, unicode):
+            mi.comments = mi.comments.decode('utf-8', 'replace')
+        mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+                '\n\n'.join(article_titles))

-            articles[key] = []
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
+        # This one affects the pub date shown in kindle title
+        #mi.pubdate = nowf()
+        # now appears to need the time field to be > 12.00noon as well
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')

-            ans.append(key)
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)

-            a = div.find('a', href=True)
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))

-            if not a:
-                continue
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)

-            url = base + '/' + a['href']
-            print 'url=' + url
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)

-            if not articles.has_key(key):
-                articles[key] = []
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'
+
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}
+
+
+        def feed_index(num, parent):
+            f = feeds[num]
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(num, j)
+                    auth = a.author
+                    if not auth:
+                        auth = None
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
                    else:
-                # sub page
-                subSoup = self.index_to_soup(url)
+                        desc = self.description_limiter(desc)
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
+                    entries.append('%sindex.html'%adir)
+                    po = self.play_order_map.get(entries[-1], None)
+                    if po is None:
+                        self.play_order_counter += 1
+                        po = self.play_order_counter
+                    parent.add_item('%sindex.html'%adir, None,
+                            a.title if a.title else _('Untitled Article'),
+                            play_order=po, author=auth,
+                            description=desc, toc_thumbnail=tt)
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        last = sp

-                for subDiv in subSoup.findAll('li'):
-                    subA = subDiv.find('a', href=True)
-                    subTitle = subDiv.find(text=True, recursive=True)
-                    subUrl = base + '/' + subA['href']
+                    if os.path.exists(last):
+                        with open(last, 'rb') as fi:
+                            src = fi.read().decode('utf-8')
+                        soup = BeautifulSoup(src)
+                        body = soup.find('body')
+                        if body is not None:
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            templ = self.navbar.generate(True, num, j, len(f),
+                                            not self.has_single_feed,
+                                            a.orig_url, __appname__, prefix=prefix,
+                                            center=self.center_navbar)
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            body.insert(len(body.contents), elem)
+                            with open(last, 'wb') as fi:
+                                fi.write(unicode(soup).encode('utf-8'))
+        if len(feeds) == 0:
+            raise Exception('All feeds are empty, aborting.')

-                    print 'subUrl' + subUrl
+        if len(feeds) > 1:
+            for i, f in enumerate(feeds):
+                entries.append('feed_%d/index.html'%i)
+                po = self.play_order_map.get(entries[-1], None)
+                if po is None:
+                    self.play_order_counter += 1
+                    po = self.play_order_counter
+                auth = getattr(f, 'author', None)
+                if not auth:
+                    auth = None
+                desc = getattr(f, 'description', None)
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                    f.title, play_order=po, description=desc, author=auth))

-                    articles[key].append(
-                        dict(title=subTitle,
-                         url=subUrl,
-                         date='',
-                         description='',
-                         content=''))
+        else:
+            entries.append('feed_%d/index.html'%0)
+            feed_index(0, toc)

+        for i, p in enumerate(entries):
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+        opf.create_spine(entries)
+        opf.set_toc(toc)

-#             elif div['class'] in ['story', 'story headline']:
-#                 a = div.find('a', href=True)
-#                 if not a:
-#                     continue
-#                 url = re.sub(r'\?.*', '', a['href'])
-#                 url += '?pagewanted=all'
-#                 title = self.tag_to_string(a, use_alt=True).strip()
-#                 description = ''
-#                 pubdate = strftime('%a, %d %b')
-#                 summary = div.find(True, attrs={'class':'summary'})
-#                 if summary:
-#                     description = self.tag_to_string(summary, use_alt=False)
-#
-#                 feed = key if key is not None else 'Uncategorized'
-#                 if not articles.has_key(feed):
-#                     articles[feed] = []
-#                 if not 'podcasts' in url:
-#                     articles[feed].append(
-#                               dict(title=title, url=url, date=pubdate,
-#                                    description=description,
-#                                    content=''))
-#        ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
-        ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
-        return ans
-
+        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+            opf.render(opf_file, ncx_file)

--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@ -9,14 +9,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    __author__ = 'Dave Asbury'
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
    oldest_article = 2
-    max_articles_per_feed = 12
+    max_articles_per_feed = 20
    linearize_tables = True
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    auto_cleanup = True
    language = 'en_GB'
-
+    compress_news_images = True
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'

    masthead_url        = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
--- a/recipes/bwmagazine.recipe
+++ b/recipes/bwmagazine.recipe
@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    #remove_tags       = [
-                           #dict(attrs={'class':'inStory'})
-                          #,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
-                          #,dict(attrs={'id':['inset','videoDisplay']})
-                        #]
-    #keep_only_tags    = [dict(name='div', attrs={'id':['story-body','storyBody']})]
-    remove_attributes = ['lang']
-    match_regexps     = [r'http://www.businessweek.com/.*_page_[1-9].*']
-

    feeds = [
-              (u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
-              (u'Top News'   , u'http://www.businessweek.com/rss/bwdaily.rss'              ),
-              (u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
-              (u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
-              (u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
-              (u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
-              (u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
-              (u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
-              (u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
-              (u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
-              (u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
-              (u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
-              (u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
-              (u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
-              (u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
-              (u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
-              (u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
-              (u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
-              (u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
-              (u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
+              (u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'),
            ]

-    def get_article_url(self, article):
-        url = article.get('guid', None)
-        if 'podcasts' in url:
-            return None
-        if 'surveys' in url:
-            return None
-        if 'images' in url:
-            return None
-        if 'feedroom' in url:
-            return None
-        if '/magazine/toc/' in url:
-            return None
-        rurl, sep, rest = url.rpartition('?')
-        if rurl:
-           return rurl
-        return rest
-
    def print_version(self, url):
-        if '/news/' in url or '/blog/ in url':
-           return url
-        rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
-        return rurl.replace('/investing/','/investor/')
+        soup = self.index_to_soup(url)
+        prntver = soup.find('li', attrs={'class':'print tracked'})
+        rurl = prntver.find('a', href=True)['href']
+        return rurl

-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        for alink in soup.findAll('a'):
-            if alink.string is not None:
-               tstr = alink.string
-               alink.replaceWith(tstr)
-        return soup

--- a/recipes/bwmagazine2.recipe
+++ b/recipes/bwmagazine2.recipe
@ -1,3 +1,4 @@
+import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from collections import OrderedDict

@ -39,7 +40,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
            title=self.tag_to_string(div.a).strip()
            url=div.a['href']
            soup0 = self.index_to_soup(url)
-            urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
+            urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
            articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})


@ -56,7 +57,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
            title=self.tag_to_string(div.a).strip()
            url=div.a['href']
            soup0 = self.index_to_soup(url)
-            urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
+            urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
            articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})

            if articles:
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@ -7,13 +7,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
    __author__ = 'Dave Asbury'
    description           = 'The official website of Countryfile Magazine'
-    # last updated 8/12/12
+    # last updated 19/10/12
    language = 'en_GB'
    oldest_article = 30
    max_articles_per_feed = 25
    remove_empty_feeds = True
    no_stylesheets = True
    auto_cleanup = True
+    compress_news_images = True
    ignore_duplicate_articles = {'title', 'url'}
    #articles_are_obfuscated = True
    #article_already_exists = False
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -13,9 +13,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):

    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'

-
+    compress_news_images = True
    oldest_article = 1
-    max_articles_per_feed = 1
+    max_articles_per_feed = 12
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
--- a/recipes/diario_de_noticias.recipe
+++ b/recipes/diario_de_noticias.recipe
@ -0,0 +1,23 @@
+# vim:fileencoding=UTF-8
+
+from __future__ import unicode_literals
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1365070687(BasicNewsRecipe):
+  title ='Diário de Notícias'
+  oldest_article = 7
+  language = 'pt'
+  __author__ = 'Jose Pinto'
+  max_articles_per_feed = 100
+  keep_only_tags = [dict(name='div', attrs={'id':'cln-esqmid'}) ]
+  remove_tags    = [ dict(name='table', attrs={'class':'TabFerramentasInf'}) ]
+
+  feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
+    (u'Globo', u'http://feeds.dn.pt/DN-Globo'),
+    (u'Economia', u'http://feeds.dn.pt/DN-Economia'),
+    (u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
+    (u'Artes', u'http://feeds.dn.pt/DN-Artes'),
+    (u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
+    (u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
+    (u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
+    ]
--- a/recipes/esensja_(rss).recipe
+++ b/recipes/esensja_(rss).recipe
@ -12,12 +12,6 @@ class EsensjaRSS(BasicNewsRecipe):
    language       = 'pl'
    encoding = 'utf-8'
    INDEX = 'http://www.esensja.pl'
-    extra_css = '''.t-title {font-size: x-large; font-weight: bold; text-align: left}
-                    .t-author {font-size: x-small; text-align: left}
-                    .t-title2 {font-size: x-small; font-style: italic; text-align: left}
-                    .text {font-size: small; text-align: left}
-                    .annot-ref {font-style: italic; text-align: left}
-                    '''
    cover_url = ''
    masthead_url = 'http://esensja.pl/img/wrss.gif'
    use_embedded_content = False
--- a/recipes/financial_times_uk.recipe
+++ b/recipes/financial_times_uk.recipe
@ -8,6 +8,7 @@ import datetime
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
+from collections import OrderedDict

 class FinancialTimes(BasicNewsRecipe):
    title                 = 'Financial Times (UK)'
@ -105,29 +106,30 @@ class FinancialTimes(BasicNewsRecipe):
        return articles

    def parse_index(self):
-        feeds = []
+        feeds = OrderedDict()
        soup = self.index_to_soup(self.INDEX)
-        dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
-        self.timefmt = ' [%s]'%dates
-        wide = soup.find('div',attrs={'class':'wide'})
-        if not wide:
-           return feeds
-        allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()})
-        if not allsections:
-           return feeds
-        count = 0
-        for item in allsections:
-            count = count + 1
-            if self.test and count > 2:
-               return feeds
-            fitem = item.h3
-            if not fitem:
-               fitem = item.h4
-            ftitle = self.tag_to_string(fitem)   
-            self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
-            feedarts = self.get_artlinks(item.ul)
-            feeds.append((ftitle,feedarts))
-        return feeds
+        #dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
+        #self.timefmt = ' [%s]'%dates
+        section_title = 'Untitled'
+
+        for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
+            for section in column. findAll('div', attrs = {'class':'feedBox'}):
+                sectiontitle=self.tag_to_string(section.find('h4'))
+                if '...' not in sectiontitle: section_title=sectiontitle
+                for article in section.ul.findAll('li'):
+                    articles = []
+                    title=self.tag_to_string(article.a)
+                    url=article.a['href']
+                    articles.append({'title':title, 'url':url, 'description':'', 'date':''})
+
+                    if articles:
+                        if section_title not in feeds:
+                            feeds[section_title] = []
+                        feeds[section_title] += articles
+
+
+        ans = [(key, val) for key, val in feeds.iteritems()]
+        return ans

    def preprocess_html(self, soup):
        items = ['promo-box','promo-title',
@ -177,6 +179,3 @@ class FinancialTimes(BasicNewsRecipe):
        tfile.close()
        self.temp_files.append(tfile)
        return tfile.name
-
-    def cleanup(self):
-        self.browser.open('https://registration.ft.com/registration/login/logout?location=')
--- a/recipes/forbes_pl.recipe
+++ b/recipes/forbes_pl.recipe
@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import datetime
+import re
+
+class forbes_pl(BasicNewsRecipe):
+    title = u'Forbes.pl'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.'
+    oldest_article = 1
+    index = 'http://www.forbes.pl'
+    cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
+    max_articles_per_feed = 100
+    extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
+    preprocess_regexps = [(re.compile(ur'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
+    remove_javascript = True
+    no_stylesheets = True
+    now = datetime.datetime.now()
+    yesterday = now - datetime.timedelta(hours=24)
+    yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S")
+    pages_count = 4
+    keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
+    remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
+
+    feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
+
+    '''def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
+
+
+    def append_page(self, soup, appendtag):
+        cleanup = False
+        nexturl = appendtag.find('a', attrs={'class':'next'})
+        if nexturl:
+            cleanup = True
+        while nexturl:
+            soup2 = self.index_to_soup(self.index + nexturl['href'])
+            nexturl = soup2.find('a', attrs={'class':'next'})
+            pagetext = soup2.findAll(id='article-body-wrapper')
+            if not pagetext:
+                pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'})
+            for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)):
+                comment.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        if cleanup:
+            for r in appendtag.findAll(attrs={'class':'paginator'}):
+                r.extract()'''
--- a/recipes/galaxys_edge.recipe
+++ b/recipes/galaxys_edge.recipe
@ -0,0 +1,108 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GalaxyEdge(BasicNewsRecipe):
+    title                 = u'The Galaxy\'s Edge'
+    language = 'en'
+
+    oldest_article        = 7
+    __author__            = 'Krittika Goyal'
+    no_stylesheets = True
+
+    auto_cleanup = True
+
+    #keep_only_tags = [dict(id='content')]
+    #remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
+            #dict(id=['email-section', 'right-column', 'printfooter', 'topover',
+                     #'slidebox', 'th_footer'])]
+
+    extra_css = '.photo-caption { font-size: smaller }'
+
+    def parse_index(self):
+        soup = self.index_to_soup('http://www.galaxysedge.com/')
+        main = soup.find('table', attrs={'width':'911'})
+        toc = main.find('td', attrs={'width':'225'})
+
+
+
+        current_section = None
+        current_articles = []
+        feeds = []
+        c = 0
+        for x in toc.findAll(['p']):
+            c = c+1
+            if c == 5:
+                if current_articles and current_section:
+                    feeds.append((current_section, current_articles))
+                edwo = x.find('a')
+                current_section = self.tag_to_string(edwo)
+                current_articles = []
+                self.log('\tFound section:', current_section)
+                title = self.tag_to_string(edwo)
+                url = edwo.get('href', True)
+                url = 'http://www.galaxysedge.com/'+url
+                print(title)
+                print(c)
+                if not url or not title:
+                    continue
+                self.log('\t\tFound article:', title)
+                self.log('\t\t\t', url)
+                current_articles.append({'title': title, 'url':url,
+                    'description':'', 'date':''})
+            elif c>5:
+                current_section = self.tag_to_string(x.find('b'))
+                current_articles = []
+                self.log('\tFound section:', current_section)
+                for y in x.findAll('a'):
+                    title = self.tag_to_string(y)
+                    url = y.get('href', True)
+                    url = 'http://www.galaxysedge.com/'+url
+                    print(title)
+                    if not url or not title:
+                        continue
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    current_articles.append({'title': title, 'url':url,
+                        'description':'', 'date':''})
+            if current_articles and current_section:
+                 feeds.append((current_section, current_articles))
+
+        return feeds
+
+
+
+
+    #def preprocess_raw_html(self, raw, url):
+        #return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
+
+    #def postprocess_html(self, soup, first_fetch):
+        #for t in soup.findAll(['table', 'tr', 'td','center']):
+            #t.name = 'div'
+        #return soup
+
+    #def parse_index(self):
+        #today = time.strftime('%Y-%m-%d')
+        #soup = self.index_to_soup(
+                #'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
+        #div = soup.find(id='left-column')
+        #feeds = []
+        #current_section = None
+        #current_articles = []
+        #for x in div.findAll(['h3', 'div']):
+            #if current_section and x.get('class', '') == 'tpaper':
+                #a = x.find('a', href=True)
+                #if a is not None:
+                    #current_articles.append({'url':a['href']+'?css=print',
+                        #'title':self.tag_to_string(a), 'date': '',
+                        #'description':''})
+            #if x.name == 'h3':
+                #if current_section and current_articles:
+                    #feeds.append((current_section, current_articles))
+                #current_section = self.tag_to_string(x)
+                #current_articles = []
+        #return feeds
+
+
--- a/recipes/gazeta_pl_krakow.recipe
+++ b/recipes/gazeta_pl_krakow.recipe
@ -10,7 +10,7 @@ krakow.gazeta.pl
 from calibre.web.feeds.news import BasicNewsRecipe

 class gw_krakow(BasicNewsRecipe):
-    title          = u'Gazeta.pl Kraków'
+    title          = u'Gazeta Wyborcza Kraków'
    __author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
    language       = 'pl'
    description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'
--- a/recipes/gazeta_pl_szczecin.recipe
+++ b/recipes/gazeta_pl_szczecin.recipe
@ -5,7 +5,7 @@ import string
 from calibre.web.feeds.news import BasicNewsRecipe

 class GazetaPlSzczecin(BasicNewsRecipe):
-    title          = u'Gazeta.pl Szczecin'
+    title          = u'Gazeta Wyborcza Szczecin'
    description    = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
    __author__     = u'Michał Szkutnik'
    __license__    = u'GPL v3'
--- a/recipes/gazeta_pl_warszawa.recipe
+++ b/recipes/gazeta_pl_warszawa.recipe
@ -10,7 +10,7 @@ warszawa.gazeta.pl
 from calibre.web.feeds.news import BasicNewsRecipe

 class gw_wawa(BasicNewsRecipe):
-    title          = u'Gazeta.pl Warszawa'
+    title          = u'Gazeta Wyborcza Warszawa'
    __author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
    language       = 'pl'
    description ='Wiadomości z Warszawy na portalu Gazeta.pl.'
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Comment

 class Gazeta_Wyborcza(BasicNewsRecipe):
-    title = u'Gazeta.pl'
+    title = u'Gazeta Wyborcza'
    __author__ = 'fenuks, Artur Stachecki'
    language = 'pl'
    description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@ -1,6 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-import re
-from datetime import date, timedelta

 class HBR(BasicNewsRecipe):

@ -11,23 +9,18 @@ class HBR(BasicNewsRecipe):
    timefmt                = ' [%B %Y]'
    language = 'en'
    no_stylesheets = True
-    # recipe_disabled = ('hbr.org has started requiring the use of javascript'
-    #         ' to log into their website. This is unsupported in calibre, so'
-    #         ' this recipe has been disabled. If you would like to see '
-    #         ' HBR supported in calibre, contact hbr.org and ask them'
-    #         ' to provide a javascript free login method.')

    LOGIN_URL = 'https://hbr.org/login?request_url=/'
    LOGOUT_URL = 'https://hbr.org/logout?request_url=/'

-    INDEX = 'http://hbr.org/archive-toc/BR'
+    INDEX = 'http://hbr.org'

    keep_only_tags = [dict(name='div', id='pageContainer')]
    remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
        'mailingListTout', 'partnerCenter', 'pageFooter',
-        'superNavHeadContainer', 'hbrDisqus',
+        'superNavHeadContainer', 'hbrDisqus', 'article-toolbox',
        'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
        dict(name='iframe')]
    extra_css = '''
@ -57,22 +50,6 @@ class HBR(BasicNewsRecipe):
        if url.endswith('/ar/1'):
            return url[:-1]+'pr'

-    def hbr_get_toc(self):
-        # return self.index_to_soup(open('/t/toc.html').read())
-
-        today = date.today()
-        future = today + timedelta(days=30)
-        past = today - timedelta(days=30)
-        for x in [x.strftime('%y%m') for x in (future, today, past)]:
-            url = self.INDEX + x
-            soup = self.index_to_soup(url)
-            if (not soup.find(text='Issue Not Found') and not soup.find(
-                text="We're Sorry.  There was an error processing your request")
-                and 'Exception: java.io.FileNotFoundException' not in
-                unicode(soup)):
-                return soup
-        raise Exception('Could not find current issue')
-
    def hbr_parse_toc(self, soup):
        feeds = []
        current_section = None
@ -105,23 +82,19 @@ class HBR(BasicNewsRecipe):

                articles.append({'title':title, 'url':url, 'description':desc,
                    'date':''})
+
+        if current_section is not None and articles:
+            feeds.append((current_section, articles))
        return feeds

    def parse_index(self):
-        soup = self.hbr_get_toc()
-        # open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
+        soup0 = self.index_to_soup('http://hbr.org/magazine')
+        datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
+        #find date & cover
+        self.cover_url=datencover.img['src']
+        dates=self.tag_to_string(datencover.img['alt'])
+        self.timefmt = u' [%s]'%dates
+        soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href'])
        feeds = self.hbr_parse_toc(soup)
        return feeds

-    def get_cover_url(self):
-        cover_url = None
-        index = 'http://hbr.org/current'
-        soup = self.index_to_soup(index)
-        link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
-
-        if link_item:
-           cover_url = 'http://hbr.org' + link_item['src']
-
-        return cover_url
-
-
--- a/recipes/icons/forbes_pl.png
+++ b/recipes/icons/forbes_pl.png
--- a/recipes/icons/gazeta_pl_krakow.png
+++ b/recipes/icons/gazeta_pl_krakow.png
--- a/recipes/icons/gazeta_pl_szczecin.png
+++ b/recipes/icons/gazeta_pl_szczecin.png
--- a/recipes/icons/gazeta_pl_warszawa.png
+++ b/recipes/icons/gazeta_pl_warszawa.png
--- a/recipes/icons/gazeta_wyborcza.png
+++ b/recipes/icons/gazeta_wyborcza.png
--- a/recipes/icons/slashdot.png
+++ b/recipes/icons/slashdot.png
--- a/recipes/icons/sportowefakty.png
+++ b/recipes/icons/sportowefakty.png
--- a/recipes/icons/wysokie_obcasy.png
+++ b/recipes/icons/wysokie_obcasy.png
--- a/recipes/list_apart.recipe
+++ b/recipes/list_apart.recipe
@ -1,33 +1,23 @@
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
 from calibre.web.feeds.news import BasicNewsRecipe

 class AListApart (BasicNewsRecipe):
-   __author__ = u'Marc Busqué <marc@lamarciana.com>'
+   __author__ = 'Marc Busqué <marc@lamarciana.com>'
   __url__ = 'http://www.lamarciana.com'
-   __version__ = '1.0'
+   __version__ = '2.0'
   __license__   = 'GPL v3'
-   __copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
+   __copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
   title = u'A List Apart'
-   description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.'
+   description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
   language = 'en'
   tags = 'web development, software'
   oldest_article = 120
   remove_empty_feeds = True
-   no_stylesheets = True
   encoding = 'utf8'
   cover_url = u'http://alistapart.com/pix/alalogo.gif'
-   keep_only_tags = [
-         dict(name='div', attrs={'id': 'content'})
-         ]
-   remove_tags = [
-         dict(name='ul', attrs={'id': 'metastuff'}),
-         dict(name='div', attrs={'class': 'discuss'}),
-         dict(name='div', attrs={'class': 'discuss'}),
-         dict(name='div', attrs={'id': 'learnmore'}),
-         ]
-   remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
-   extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}'
+   extra_css = u'img {max-width: 100%; display: block; margin: auto;}'

   feeds = [
-         (u'A List Apart', u'http://www.alistapart.com/site/rss'),
+         (u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
         ]
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -6,10 +6,10 @@ import time

 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Metro UK'
-    description = 'News as provided by The Metro -UK'
+    description = 'News from The Metro, UK'
    #timefmt = ''
-    __author__ = 'fleclerc & Dave Asbury'
-    #last update 20/1/13
+    __author__ = 'Dave Asbury'
+    #last update 4/4/13
    #cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'

    cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
@ -22,7 +22,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):

    language = 'en_GB'
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
-
+    compress_news_images = True
    def parse_index(self):
 		articles = {}
 		key = None
--- a/recipes/nme.recipe
+++ b/recipes/nme.recipe
@ -12,6 +12,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    max_articles_per_feed = 20
    #auto_cleanup = True
    language = 'en_GB'
+    compress_news_images = True

    def get_cover_url(self):
        soup = self.index_to_soup('http://www.nme.com/component/subscribe')
--- a/recipes/singtaohk.recipe
+++ b/recipes/singtaohk.recipe
@ -1,30 +1,30 @@
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
 __license__   = 'GPL v3'
-__copyright__ = '2011, Eddie Lau'
+__copyright__ = '2011-2013, Eddie Lau'

 # data source: normal, mobile
 __Source__ = 'mobile'
 # please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
 # Turn below to True if your device supports display of CJK titles (Default: False)
-__UseChineseTitle__ = False
+__UseChineseTitle__ = True
 # Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
 # Set it to True if you want to include a summary in Kindle's article view (Default: False)
-__IncludeSummary__ = False
+__IncludeSummary__ = True
 # Set it to True if you want thumbnail images in Kindle's article view (Default: True)
 __IncludeThumbnails__ = True


 '''
 Change Log:
+2013/03/31 -- fix cover retrieval code and heading size, and remove &nbsp; in summary
 2011/12/29 -- first version done
-TODO:
-* use alternative source at http://m.singtao.com/index.php
 '''

 from calibre.utils.date import now as nowf
 import os, datetime, re
-from datetime import date
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -41,7 +41,7 @@ class STHKRecipe(BasicNewsRecipe):
        title   = 'Sing Tao Daily - Hong Kong'
    description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
    category    = 'Chinese, News, Hong Kong'
-    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}'
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:200%;}'
    masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
    if __Source__ == 'normal':
        keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
@ -96,17 +96,13 @@ class STHKRecipe(BasicNewsRecipe):
        return self.get_dtlocal().strftime("%d")

    def get_cover_url(self):
-        #cover = 'http://singtao.com/media/a/a(2660).jpg'  # for 2011/12/29
-        base = 2660
-        todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()))
-        diff = todaydate - date(2011, 12, 29)
-        base = base + int(diff.total_seconds()/(3600*24))
-        cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg'
+        soup = self.index_to_soup('http://m.singtao.com/')
+        cover = soup.find(attrs={'class':'special'}).get('src', False)
        br = BasicNewsRecipe.get_browser(self)
        try:
            br.open(cover)
        except:
-            cover = 'http://singtao.com/images/stlogo.gif'
+            cover = None
        return cover

    def parse_index(self):
@ -293,7 +289,7 @@ class STHKRecipe(BasicNewsRecipe):
                            textFound = False
                            for p in paras:
                                if not textFound:
-                                    summary_candidate = self.tag_to_string(p).strip()
+                                    summary_candidate = self.tag_to_string(p).strip().replace('&nbsp;', '')
                                    if len(summary_candidate) > 0:
                                        summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
                                        article.summary = article.text_summary = summary_candidate
@ -489,3 +485,4 @@ class STHKRecipe(BasicNewsRecipe):



+
--- a/recipes/sportowefakty.recipe
+++ b/recipes/sportowefakty.recipe
@ -0,0 +1,70 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.utils.magick import Image
+
+class sportowefakty(BasicNewsRecipe):
+    title          = u'SportoweFakty'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
+    language       = 'pl'
+    description    = u'Najważniejsze informacje sportowe z kraju i ze świata, relacje, komentarze, wywiady, zdjęcia!'
+    oldest_article = 1
+    masthead_url='http://www.sportowefakty.pl/images/logo.png'
+    max_articles_per_feed = 100
+    simultaneous_downloads = 5
+    use_embedded_content=False
+    remove_javascript=True
+    no_stylesheets=True
+    ignore_duplicate_articles = {'title', 'url'}
+
+    keep_only_tags = [dict(attrs = {'class' : 'box-article'})]
+    remove_tags =[]
+    remove_tags.append(dict(attrs = {'class' : re.compile(r'^newsStream')}))
+    remove_tags.append(dict(attrs = {'target' : '_blank'}))
+
+    feeds          = [
+                      (u'Piłka Nożna', u'http://www.sportowefakty.pl/pilka-nozna/index.rss'),
+                      (u'Koszykówka', u'http://www.sportowefakty.pl/koszykowka/index.rss'),
+                      (u'Żużel', u'http://www.sportowefakty.pl/zuzel/index.rss'),
+                      (u'Siatkówka', u'http://www.sportowefakty.pl/siatkowka/index.rss'),
+                      (u'Zimowe', u'http://www.sportowefakty.pl/zimowe/index.rss'),
+                      (u'Hokej', u'http://www.sportowefakty.pl/hokej/index.rss'),
+                      (u'Moto', u'http://www.sportowefakty.pl/moto/index.rss'),
+                      (u'Tenis', u'http://www.sportowefakty.pl/tenis/index.rss')
+                     ]
+
+    def get_article_url(self, article):
+        link = article.get('link', None)
+        if 'utm_source' in link:
+            return link.split('?utm')[0]
+        else:
+            return link
+
+    def print_version(self, url):
+        print_url = url + '/drukuj'
+        return print_url
+
+    def preprocess_html(self, soup):
+        head = soup.find('h1')
+        if 'Fotorelacja' in self.tag_to_string(head):
+            return None
+        else:
+            for alink in soup.findAll('a'):
+                if alink.string is not None:
+                    tstr = alink.string
+                    alink.replaceWith(tstr)
+            return soup
+
+    def postprocess_html(self, soup, first):
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            img = Image()
+            img.open(iurl)
+            if img < 0:
+                raise RuntimeError('Out of memory')
+            img.type = "GrayscaleType"
+            img.save(iurl)
+        return soup
--- a/recipes/the_sun.recipe
+++ b/recipes/the_sun.recipe
@ -20,7 +20,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    no_stylesheets = True

    ignore_duplicate_articles = {'title','url'}
-
+    compress_news_images = True

    extra_css  = '''
    body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
--- a/recipes/vic_times.recipe
+++ b/recipes/vic_times.recipe
@ -6,17 +6,62 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
 import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe
+
 from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup


 class TimesColonist(BasicNewsRecipe):

+    # Customization -- remove sections you don't want.
+    # If your e-reader is an e-ink Kindle and your output profile is
+    # set properly this recipe will not include images because the
+    # resulting file is too large. If you have one of these and want
+    # images you can set kindle_omit_images = False
+    # and remove sections (typically the e-ink Kindles will
+    # work with about a dozen of these, but your mileage may vary).
+
+    kindle_omit_images = True
+
+    section_list = [
+        ('','Web Front Page'),
+        ('news/','News Headlines'),
+        ('news/b-c/','BC News'),
+        ('news/national/','National News'),
+        ('news/world/','World News'),
+        ('opinion/','Opinion'),
+        ('opinion/letters/','Letters'),
+        ('business/','Business'),
+        ('business/money/','Money'),
+        ('business/technology/','Technology'),
+        ('business/working/','Working'),
+        ('sports/','Sports'),
+        ('sports/hockey/','Hockey'),
+        ('sports/football/','Football'),
+        ('sports/basketball/','Basketball'),
+        ('sports/golf/','Golf'),
+        ('entertainment/','entertainment'),
+        ('entertainment/go/','Go!'),
+        ('entertainment/music/','Music'),
+        ('entertainment/books/','Books'),
+        ('entertainment/Movies/','Movies'),
+        ('entertainment/television/','Television'),
+        ('life/','Life'),
+        ('life/health/','Health'),
+        ('life/travel/','Travel'),
+        ('life/driving/','Driving'),
+        ('life/homes/','Homes'),
+        ('life/food-drink/','Food & Drink')
+    ]
+
    title = u'Victoria Times Colonist'
    url_prefix = 'http://www.timescolonist.com'
    description = u'News from Victoria, BC'
    fp_tag = 'CAN_TC'

+    masthead_url = 'http://www.timescolonist.com/gmg/img/global/logoTimesColonist.png'
+
+
    url_list = []
    language = 'en_CA'
    __author__ = 'Nick Redding'
@ -29,15 +74,21 @@ class TimesColonist(BasicNewsRecipe):
                .caption { font-size: xx-small; font-style: italic; font-weight: normal; }
                '''
    keep_only_tags = [dict(name='div', attrs={'class':re.compile('main.content')})]
-    remove_tags = [{'class':'comments'},
+
+    def __init__(self, options, log, progress_reporter):
+        self.remove_tags = [{'class':'comments'},
                       {'id':'photocredit'},
                       dict(name='div', attrs={'class':re.compile('top.controls')}),
+                       dict(name='div', attrs={'class':re.compile('^comments')}),
                       dict(name='div', attrs={'class':re.compile('social')}),
                       dict(name='div', attrs={'class':re.compile('tools')}),
                       dict(name='div', attrs={'class':re.compile('bottom.tools')}),
                       dict(name='div', attrs={'class':re.compile('window')}),
                       dict(name='div', attrs={'class':re.compile('related.news.element')})]
-
+        print("PROFILE NAME = "+options.output_profile.short_name)
+        if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']:
+            self.remove_tags.append(dict(name='div', attrs={'class':re.compile('image-container')}))
+        BasicNewsRecipe.__init__(self, options, log, progress_reporter)

    def get_cover_url(self):
        from datetime import timedelta, date
@ -122,7 +173,6 @@ class TimesColonist(BasicNewsRecipe):
    def preprocess_html(self,soup):
        byline = soup.find('p',attrs={'class':re.compile('ancillary')})
        if byline is not None:
-            byline.find('a')
            authstr = self.tag_to_string(byline,False)
            authstr = re.sub('/ *Times Colonist','/',authstr, flags=re.IGNORECASE)
            authstr = re.sub('BY */','',authstr, flags=re.IGNORECASE)
@ -149,9 +199,10 @@ class TimesColonist(BasicNewsRecipe):
        atag = htag.a
        if atag is not None:
            url = atag['href']
-            #print("Checking "+url)
-            if atag['href'].startswith('/'):
-                url = self.url_prefix+atag['href']
+            url = url.strip()
+            # print("Checking >>"+url+'<<\n\r')
+            if url.startswith('/'):
+                url = self.url_prefix+url
            if url in self.url_list:
                return
            self.url_list.append(url)
@ -171,10 +222,10 @@ class TimesColonist(BasicNewsRecipe):
            if dtag is not None:
                description = self.tag_to_string(dtag,False)
            article_list.append(dict(title=title,url=url,date='',description=description,author='',content=''))
-            #print(sectitle+title+": description = "+description+" URL="+url)
+            print(sectitle+title+": description = "+description+" URL="+url+'\n\r')

    def add_section_index(self,ans,securl,sectitle):
-        print("Add section url="+self.url_prefix+'/'+securl)
+        print("Add section url="+self.url_prefix+'/'+securl+'\n\r')
        try:
            soup = self.index_to_soup(self.url_prefix+'/'+securl)
        except:
@ -193,33 +244,7 @@ class TimesColonist(BasicNewsRecipe):

    def parse_index(self):
        ans = []
-        ans = self.add_section_index(ans,'','Web Front Page')
-        ans = self.add_section_index(ans,'news/','News Headlines')
-        ans = self.add_section_index(ans,'news/b-c/','BC News')
-        ans = self.add_section_index(ans,'news/national/','Natioanl News')
-        ans = self.add_section_index(ans,'news/world/','World News')
-        ans = self.add_section_index(ans,'opinion/','Opinion')
-        ans = self.add_section_index(ans,'opinion/letters/','Letters')
-        ans = self.add_section_index(ans,'business/','Business')
-        ans = self.add_section_index(ans,'business/money/','Money')
-        ans = self.add_section_index(ans,'business/technology/','Technology')
-        ans = self.add_section_index(ans,'business/working/','Working')
-        ans = self.add_section_index(ans,'sports/','Sports')
-        ans = self.add_section_index(ans,'sports/hockey/','Hockey')
-        ans = self.add_section_index(ans,'sports/football/','Football')
-        ans = self.add_section_index(ans,'sports/basketball/','Basketball')
-        ans = self.add_section_index(ans,'sports/golf/','Golf')
-        ans = self.add_section_index(ans,'entertainment/','entertainment')
-        ans = self.add_section_index(ans,'entertainment/go/','Go!')
-        ans = self.add_section_index(ans,'entertainment/music/','Music')
-        ans = self.add_section_index(ans,'entertainment/books/','Books')
-        ans = self.add_section_index(ans,'entertainment/Movies/','movies')
-        ans = self.add_section_index(ans,'entertainment/television/','Television')
-        ans = self.add_section_index(ans,'life/','Life')
-        ans = self.add_section_index(ans,'life/health/','Health')
-        ans = self.add_section_index(ans,'life/travel/','Travel')
-        ans = self.add_section_index(ans,'life/driving/','Driving')
-        ans = self.add_section_index(ans,'life/homes/','Homes')
-        ans = self.add_section_index(ans,'life/food-drink/','Food & Drink')
+        for (url,title) in self.section_list:
+            ans = self.add_section_index(ans,url,title)
        return ans

--- a/recipes/wyborcza_duzy_format.recipe
+++ b/recipes/wyborcza_duzy_format.recipe
@ -1,144 +0,0 @@
-#!/usr/bin/env  python
-
-from calibre.web.feeds.recipes import BasicNewsRecipe
-
-class GazetaWyborczaDuzyForma(BasicNewsRecipe):
-    cover_url             = 'http://bi.gazeta.pl/im/8/5415/m5415058.gif'
-    title                 = u"Gazeta Wyborcza Duzy Format"
-    __author__            = 'ravcio - rlelusz[at]gmail.com'
-    description           = u"Articles from Gazeta's website"
-    language              = 'pl'
-    max_articles_per_feed = 50  #you can increade it event up to maybe 600, should still work
-    recursions            = 0
-    encoding              = 'iso-8859-2'
-    no_stylesheets        = True
-    remove_javascript     = True
-    use_embedded_content  = False
-
-
-    keep_only_tags    = [
-            dict(name='div', attrs={'id':['k1']})
-                ]
-
-    remove_tags = [
-            dict(name='div', attrs={'class':['zdjM', 'rel_video', 'zdjP', 'rel_box', 'index mod_zi_dolStrony']})
-            ,dict(name='div', attrs={'id':['source', 'banP4', 'article_toolbar', 'rel', 'inContext_disabled']})
-            ,dict(name='ul', attrs={'id':['articleToolbar']})
-            ,dict(name='img', attrs={'class':['brand']})
-            ,dict(name='h5', attrs={'class':['author']})
-            ,dict(name='h6', attrs={'class':['date']})
-            ,dict(name='p', attrs={'class':['txt_upl']})
-                ]
-
-    remove_tags_after = [
-            dict(name='div', attrs={'id':['Str']})                #nawigator numerow linii
-                ]
-
-    def load_article_links(self, url, count):
-        print '--- load_article_links', url, count
-
-		#page with link to articles
-        soup = self.index_to_soup(url)
-
-		#table with articles
-        list = soup.find('div', attrs={'class':'GWdalt'})
-
-		#single articles (link, title, ...)
-        links = list.findAll('div', attrs={'class':['GWdaltE']})
-
-        if len(links) < count:
-            #load links to more articles...
-
-			#remove new link
-            pages_nav = list.find('div', attrs={'class':'pages'})
-            next = pages_nav.find('a', attrs={'class':'next'})
-            if next:
-                print 'next=', next['href']
-                url = 'http://wyborcza.pl' + next['href']
-                #e.g. url = 'http://wyborcza.pl/0,75480.html?str=2'
-
-                older_links = self.load_article_links(url, count - len(links))
-                links.extend(older_links)
-
-        return links
-
-
-    #produce list of articles to download
-    def parse_index(self):
-        print '--- parse_index'
-
-        max_articles = 8000
-        links = self.load_article_links('http://wyborcza.pl/0,75480.html', max_articles)
-
-        ans = []
-        key = None
-        articles = {}
-
-        key = 'Uncategorized'
-        articles[key] = []
-
-        for div_art in links:
-            div_date = div_art.find('div', attrs={'class':'kL'})
-            div = div_art.find('div', attrs={'class':'kR'})
-
-            a = div.find('a', href=True)
-
-            url = a['href']
-            title = a.string
-            description = ''
-            pubdate = div_date.string.rstrip().lstrip()
-            summary = div.find('span', attrs={'class':'lead'})
-
-            desc = summary.find('a', href=True)
-            if desc:
-                desc.extract()
-
-            description = self.tag_to_string(summary, use_alt=False)
-            description = description.rstrip().lstrip()
-
-            feed = key if key is not None else 'Duzy Format'
-
-            if not articles.has_key(feed):
-                articles[feed] = []
-
-            if description != '':  # skip just pictures atricle
-                articles[feed].append(
-                                   dict(title=title, url=url, date=pubdate,
-                                        description=description,
-                                        content=''))
-
-        ans = [(key, articles[key])]
-        return ans
-
-    def append_page(self, soup, appendtag, position):
-        pager = soup.find('div',attrs={'id':'Str'})
-        if pager:
-			#seek for 'a' element with nast value (if not found exit)
-            list = pager.findAll('a')
-
-            for elem in list:
-                if 'nast' in elem.string:
-                    nexturl = elem['href']
-
-                    soup2 = self.index_to_soup('http://warszawa.gazeta.pl' + nexturl)
-
-                    texttag = soup2.find('div', attrs={'id':'artykul'})
-
-                    newpos = len(texttag.contents)
-                    self.append_page(soup2,texttag,newpos)
-                    texttag.extract()
-                    appendtag.insert(position,texttag)
-
-    def preprocess_html(self, soup):
-        self.append_page(soup, soup.body, 3)
-
-        # finally remove some tags
-        pager = soup.find('div',attrs={'id':'Str'})
-        if pager:
-           pager.extract()
-
-        pager = soup.find('div',attrs={'class':'tylko_int'})
-        if pager:
-           pager.extract()
-
-        return soup
--- a/recipes/wysokie_obcasy.recipe
+++ b/recipes/wysokie_obcasy.recipe
@ -0,0 +1,57 @@
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class WysokieObcasyRecipe(BasicNewsRecipe):
+    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    version = 1
+
+    title = u'Wysokie Obcasy'
+    publisher = 'Agora SA'
+    description = u'Serwis sobotniego dodatku do Gazety Wyborczej'
+    category='magazine'
+    language = 'pl'
+    publication_type = 'magazine'
+    cover_url=''
+    remove_empty_feeds= True
+    no_stylesheets=True
+    oldest_article = 7
+    max_articles_per_feed = 100000
+    recursions = 0
+
+    no_stylesheets = True
+    remove_javascript = True
+    simultaneous_downloads = 5
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'}))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'img'))
+    remove_tags.append(dict(name = 'p', attrs = {'class' : 'info'}))
+
+    extra_css = '''
+                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
+                    h1{text-align: left;}
+                       '''
+
+    feeds          = [
+                            ('Wszystkie Artykuly', 'feed://www.wysokieobcasy.pl/pub/rss/wysokieobcasy.xml'),
+                          ]
+
+    def print_version(self,url):
+        baseURL='http://www.wysokieobcasy.pl/wysokie-obcasy'
+        segments = url.split(',')
+        subPath= '/2029020,'
+        articleURL1 = segments[1]
+        articleURL2 = segments[2]
+        printVerString=articleURL1 + ',' + articleURL2
+        s=  baseURL + subPath + printVerString + '.html'
+        return s
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.wysokieobcasy.pl/wysokie-obcasy/0,0.html')
+        self.cover_url = soup.find(attrs={'class':'holder_cr'}).find('img')['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@ -390,7 +390,6 @@


    <xsl:output method = "xml"/>
-
    <xsl:key name="style-types" match="rtf:paragraph-definition" use="@style-number"/>


@ -415,13 +414,11 @@
    </xsl:template>

    <xsl:template match="rtf:page-break">
-        <xsl:element name="br">
-            <xsl:attribute name="style">page-break-after:always</xsl:attribute>
-        </xsl:element>
+        <br style = "page-break-after:always"/>
    </xsl:template>
    
    <xsl:template match="rtf:hardline-break">
-        <xsl:element name="br"/>
+        <br/>
    </xsl:template>

    <xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
@ -472,9 +469,7 @@
    </xsl:template>

    <xsl:template match="rtf:pict">
-        <xsl:element name="img">
-            <xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
-        </xsl:element>
+        <img src = "{@num}"/>
    </xsl:template>

    <xsl:template match="*">
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -47,6 +47,10 @@ binary_includes = [
                '/usr/lib/libgthread-2.0.so.0',
                '/usr/lib/libpng14.so.14',
                '/usr/lib/libexslt.so.0',
+                # Ensure that libimobiledevice is compiled against openssl, not gnutls
+                '/usr/lib/libimobiledevice.so.3',
+                '/usr/lib/libusbmuxd.so.2',
+                '/usr/lib/libplist.so.1',
                MAGICK_PREFIX+'/lib/libMagickWand.so.5',
                MAGICK_PREFIX+'/lib/libMagickCore.so.5',
                '/usr/lib/libgcrypt.so.11',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -399,7 +399,8 @@ class Py2App(object):
    @flush
    def add_fontconfig(self):
        info('\nAdding fontconfig')
-        for x in ('fontconfig.1', 'freetype.6', 'expat.1'):
+        for x in ('fontconfig.1', 'freetype.6', 'expat.1',
+                  'plist.1', 'usbmuxd.2', 'imobiledevice.3'):
            src = os.path.join(SW, 'lib', 'lib'+x+'.dylib')
            self.install_dylib(src)
        dst = os.path.join(self.resources_dir, 'fonts')
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,13 +12,13 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-03-27 13:07+0000\n"
+"PO-Revision-Date: 2013-03-28 13:01+0000\n"
 "Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-03-28 04:41+0000\n"
+"X-Launchpad-Export-Date: 2013-03-29 04:36+0000\n"
 "X-Generator: Launchpad (build 16546)\n"
 "Language: ca\n"

@ -1884,7 +1884,7 @@ msgstr "Awera"

 #. name for aws
 msgid "Awyu; South"
-msgstr "Awyu meridional"
+msgstr "Awyu; meridional"

 #. name for awt
 msgid "Araweté"
@ -1892,7 +1892,7 @@ msgstr "Araweté"

 #. name for awu
 msgid "Awyu; Central"
-msgstr "Awyu central"
+msgstr "Awyu; Central"

 #. name for awv
 msgid "Awyu; Jair"
@ -4052,7 +4052,7 @@ msgstr "Buginès"

 #. name for buh
 msgid "Bunu; Younuo"
-msgstr "Bunu; Younuo"
+msgstr "Bunu; Younou"

 #. name for bui
 msgid "Bongili"
@ -4308,7 +4308,7 @@ msgstr "Bwa"

 #. name for bwx
 msgid "Bunu; Bu-Nao"
-msgstr "Bunu; Bu-Nao"
+msgstr "Bunu; Bu Nao"

 #. name for bwy
 msgid "Bwamu; Cwi"
@ -19804,7 +19804,7 @@ msgstr "Minoà"

 #. name for omo
 msgid "Utarmbung"
-msgstr ""
+msgstr "Utarmbung"

 #. name for omp
 msgid "Manipuri; Old"
@ -20344,7 +20344,7 @@ msgstr "Pear"

 #. name for pcc
 msgid "Bouyei"
-msgstr ""
+msgstr "Buyí"

 #. name for pcd
 msgid "Picard"
@ -20456,11 +20456,11 @@ msgstr "Pengo"

 #. name for peh
 msgid "Bonan"
-msgstr ""
+msgstr "Bonan"

 #. name for pei
 msgid "Chichimeca-Jonaz"
-msgstr ""
+msgstr "Chichimec"

 #. name for pej
 msgid "Pomo; Northern"
@ -20484,7 +20484,7 @@ msgstr "Persa Antic"

 #. name for pep
 msgid "Kunja"
-msgstr ""
+msgstr "Kunja"

 #. name for peq
 msgid "Pomo; Southern"
@ -20536,7 +20536,7 @@ msgstr "Pagi"

 #. name for pgk
 msgid "Rerep"
-msgstr ""
+msgstr "Rerep"

 #. name for pgl
 msgid "Irish; Primitive"
@ -20624,7 +20624,7 @@ msgstr "Pima Baix"

 #. name for pib
 msgid "Yine"
-msgstr ""
+msgstr "Yine"

 #. name for pic
 msgid "Pinji"
@ -20660,7 +20660,7 @@ msgstr "Pijao"

 #. name for pil
 msgid "Yom"
-msgstr ""
+msgstr "Yom"

 #. name for pim
 msgid "Powhatan"
@ -20760,7 +20760,7 @@ msgstr "Llenguatge de signes pakistaní"

 #. name for pkt
 msgid "Maleng"
-msgstr ""
+msgstr "Maleng"

 #. name for pku
 msgid "Paku"
@ -20768,7 +20768,7 @@ msgstr "Paku"

 #. name for pla
 msgid "Miani"
-msgstr ""
+msgstr "Miani"

 #. name for plb
 msgid "Polonombauk"
@ -20804,7 +20804,7 @@ msgstr "Polci"

 #. name for plk
 msgid "Shina; Kohistani"
-msgstr ""
+msgstr "Shina; Kohistani"

 #. name for pll
 msgid "Palaung; Shwe"
@ -20852,7 +20852,7 @@ msgstr "Palawà; Brooke"

 #. name for ply
 msgid "Bolyu"
-msgstr ""
+msgstr "Bolyu"

 #. name for plz
 msgid "Paluan"
@ -20896,7 +20896,7 @@ msgstr "Algonquí Carolina"

 #. name for pml
 msgid "Lingua Franca"
-msgstr ""
+msgstr "Aljamia"

 #. name for pmm
 msgid "Pomo"
@ -20924,7 +20924,7 @@ msgstr "Piemontès"

 #. name for pmt
 msgid "Tuamotuan"
-msgstr ""
+msgstr "Tuamotu"

 #. name for pmu
 msgid "Panjabi; Mirpur"
@ -20972,7 +20972,7 @@ msgstr "Penrhyn"

 #. name for pni
 msgid "Aoheng"
-msgstr ""
+msgstr "Aoheng"

 #. name for pnm
 msgid "Punan Batu 1"
@ -21008,7 +21008,7 @@ msgstr "Pontic"

 #. name for pnu
 msgid "Bunu; Jiongnai"
-msgstr ""
+msgstr "Bunu; Jiongnai"

 #. name for pnv
 msgid "Pinigura"
@ -21100,7 +21100,7 @@ msgstr "Potavatomi"

 #. name for pov
 msgid "Crioulo; Upper Guinea"
-msgstr ""
+msgstr "Crioll guineà"

 #. name for pow
 msgid "Popoloca; San Felipe Otlaltepec"
@ -21128,7 +21128,7 @@ msgstr "Paipai"

 #. name for ppk
 msgid "Uma"
-msgstr ""
+msgstr "Uma"

 #. name for ppl
 msgid "Pipil"
@ -21144,7 +21144,7 @@ msgstr "Papapana"

 #. name for ppo
 msgid "Folopa"
-msgstr ""
+msgstr "Folopa"

 #. name for ppp
 msgid "Pelende"
@ -21180,7 +21180,7 @@ msgstr "Malecite-Passamaquoddy"

 #. name for prb
 msgid "Lua'"
-msgstr ""
+msgstr "Lua"

 #. name for prc
 msgid "Parachi"
@ -21220,7 +21220,7 @@ msgstr "Llenguatge de signes peruà"

 #. name for prm
 msgid "Kibiri"
-msgstr ""
+msgstr "Kibiri"

 #. name for prn
 msgid "Prasuni"
@ -21272,7 +21272,7 @@ msgstr "Llenguatge de signes de Providencia"

 #. name for psa
 msgid "Awyu; Asue"
-msgstr ""
+msgstr "Awyu; Asue"

 #. name for psc
 msgid "Persian Sign Language"
@ -21328,7 +21328,7 @@ msgstr "Llenguatge de signes portuguès"

 #. name for pss
 msgid "Kaulong"
-msgstr ""
+msgstr "Kaulong"

 #. name for pst
 msgid "Pashto; Central"
@ -21376,11 +21376,11 @@ msgstr "Pìamatsina"

 #. name for ptt
 msgid "Enrekang"
-msgstr ""
+msgstr "Enrekang"

 #. name for ptu
 msgid "Bambam"
-msgstr ""
+msgstr "Bambam"

 #. name for ptv
 msgid "Port Vato"
@ -29584,7 +29584,7 @@ msgstr ""

 #. name for yir
 msgid "Awyu; North"
-msgstr ""
+msgstr "Awyu; Septentrional"

 #. name for yis
 msgid "Yis"
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 25)
+numeric_version = (0, 9, 26)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -757,9 +757,10 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB
 from calibre.ebooks.metadata.sources.overdrive import OverDrive
 from calibre.ebooks.metadata.sources.douban import Douban
 from calibre.ebooks.metadata.sources.ozon import Ozon
-# from calibre.ebooks.metadata.sources.google_images import GoogleImages
+from calibre.ebooks.metadata.sources.google_images import GoogleImages
+from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch

-plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
+plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]

 # }}}

--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
    config['enabled_plugins'] = ep

 default_disabled_plugins = set([
-    'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
+    'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
 ])

 def is_disabled(plugin):
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
 # Google covers are often poor quality (scans/errors) but they have high
 # resolution, so they trump covers from better sources. So make sure they
 # are only used if no other covers are found.
-msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
+msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}

 def create_log(ostream=None):
    from calibre.utils.logging import ThreadSafeLog, FileStream
@ -429,6 +429,40 @@ class Source(Plugin):
            mi.tags = list(map(fixcase, mi.tags))
        mi.isbn = check_isbn(mi.isbn)

+    def download_multiple_covers(self, title, authors, urls, get_best_cover, timeout, result_queue, abort, log, prefs_name='max_covers'):
+        if not urls:
+            log('No images found for, title: %r and authors: %r'%(title, authors))
+            return
+        from threading import Thread
+        import time
+        if prefs_name:
+            urls = urls[:self.prefs[prefs_name]]
+        if get_best_cover:
+            urls = urls[:1]
+        log('Downloading %d covers'%len(urls))
+        workers = [Thread(target=self.download_image, args=(u, timeout, log, result_queue)) for u in urls]
+        for w in workers:
+            w.daemon = True
+            w.start()
+        alive = True
+        start_time = time.time()
+        while alive and not abort.is_set() and time.time() - start_time < timeout:
+            alive = False
+            for w in workers:
+                if w.is_alive():
+                    alive = True
+                    break
+            abort.wait(0.1)
+
+    def download_image(self, url, timeout, log, result_queue):
+        try:
+            ans = self.browser.open_novisit(url, timeout=timeout).read()
+            result_queue.put((self, ans))
+            log('Downloaded cover from: %s'%url)
+        except Exception:
+            self.log.exception('Failed to download cover from: %r'%url)
+
+
    # }}}

    # Metadata API {{{
--- a/src/calibre/ebooks/metadata/sources/big_book_search.py
+++ b/src/calibre/ebooks/metadata/sources/big_book_search.py
@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.ebooks.metadata.sources.base import Source, Option
+
+def get_urls(br, tokens):
+    from urllib import quote_plus
+    from mechanize import Request
+    from lxml import html
+    escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
+    q = b'+'.join(escaped)
+    url = 'http://bigbooksearch.com/books/'+q
+    br.open(url).read()
+    req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
+    req.add_header('X-Requested-With', 'XMLHttpRequest')
+    req.add_header('Referer', url)
+    raw = br.open(req).read()
+    root = html.fromstring(raw.decode('utf-8'))
+    urls = [i.get('src') for i in root.xpath('//img[@src]')]
+    return urls
+
+class BigBookSearch(Source):
+
+    name = 'Big Book Search'
+    description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
+    capabilities = frozenset(['cover'])
+    config_help_message = _('Configure the Big Book Search plugin')
+    can_get_multiple_covers = True
+    options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
+                      _('The maximum number of covers to process from the search result')),
+    )
+    supports_gzip_transfer_encoding = True
+
+    def download_cover(self, log, result_queue, abort,
+            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
+        if not title:
+            return
+        br = self.browser
+        tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
+        urls = get_urls(br, tokens)
+        self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
+
+def test():
+    from calibre import browser
+    import pprint
+    br = browser()
+    urls = get_urls(br, ['consider', 'phlebas', 'banks'])
+    pprint.pprint(urls)
+
+if __name__ == '__main__':
+    test()
+
--- a/src/calibre/ebooks/metadata/sources/covers.py
+++ b/src/calibre/ebooks/metadata/sources/covers.py
@ -18,12 +18,13 @@ from calibre.utils.magick.draw import Image, save_cover_data_to

 class Worker(Thread):

-    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
+    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq, get_best_cover=False):
        Thread.__init__(self)
        self.daemon = True

        self.plugin = plugin
        self.abort = abort
+        self.get_best_cover = get_best_cover
        self.buf = BytesIO()
        self.log = create_log(self.buf)
        self.title, self.authors, self.identifiers = (title, authors,
@ -37,7 +38,7 @@ class Worker(Thread):
            try:
                if self.plugin.can_get_multiple_covers:
                    self.plugin.download_cover(self.log, self.rq, self.abort,
-                        title=self.title, authors=self.authors, get_best_cover=True,
+                        title=self.title, authors=self.authors, get_best_cover=self.get_best_cover,
                        identifiers=self.identifiers, timeout=self.timeout)
                else:
                    self.plugin.download_cover(self.log, self.rq, self.abort,
@ -72,7 +73,7 @@ def process_result(log, result):
    return (plugin, width, height, fmt, data)

 def run_download(log, results, abort,
-        title=None, authors=None, identifiers={}, timeout=30):
+        title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
    '''
    Run the cover download, putting results into the queue :param:`results`.

@ -89,7 +90,7 @@ def run_download(log, results, abort,
    plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]

    rq = Queue()
-    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
+    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq, get_best_cover=get_best_cover) for p
            in plugins]
    for w in workers:
        w.start()
@ -163,7 +164,7 @@ def download_cover(log,
    abort = Event()

    run_download(log, rq, abort, title=title, authors=authors,
-            identifiers=identifiers, timeout=timeout)
+            identifiers=identifiers, timeout=timeout, get_best_cover=True)

    results = []

--- a/src/calibre/ebooks/metadata/sources/google_images.py
+++ b/src/calibre/ebooks/metadata/sources/google_images.py
@ -39,39 +39,11 @@ class GoogleImages(Source):
            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
        if not title:
            return
-        from threading import Thread
-        import time
        timeout = max(60, timeout) # Needs at least a minute
        title = ' '.join(self.get_title_tokens(title))
        author = ' '.join(self.get_author_tokens(authors))
        urls = self.get_image_urls(title, author, log, abort, timeout)
-        if not urls:
-            log('No images found in Google for, title: %r and authors: %r'%(title, author))
-            return
-        urls = urls[:self.prefs['max_covers']]
-        if get_best_cover:
-            urls = urls[:1]
-        workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls]
-        for w in workers:
-            w.daemon = True
-            w.start()
-        alive = True
-        start_time = time.time()
-        while alive and not abort.is_set() and time.time() - start_time < timeout:
-            alive = False
-            for w in workers:
-                if w.is_alive():
-                    alive = True
-                    break
-            abort.wait(0.1)
-
-    def download_image(self, url, timeout, log, result_queue):
-        try:
-            ans = self.browser.open_novisit(url, timeout=timeout).read()
-            result_queue.put((self, ans))
-            log('Downloaded cover from: %s'%url)
-        except Exception:
-            self.log.exception('Failed to download cover from: %r'%url)
+        self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)

    def get_image_urls(self, title, author, log, abort, timeout):
        from calibre.utils.ipc.simple_worker import fork_job, WorkerError
--- a/src/calibre/ebooks/oeb/polish/toc.py
+++ b/src/calibre/ebooks/oeb/polish/toc.py
@ -262,6 +262,35 @@ def from_links(container):
            toc.remove(child)
    return toc

+def find_text(node):
+    LIMIT = 200
+    pat = re.compile(r'\s+')
+    for child in node:
+        if isinstance(child, etree._Element):
+            text = xml2text(child).strip()
+            text = pat.sub(' ', text)
+            if len(text) < 1:
+                continue
+            if len(text) > LIMIT:
+                # Look for less text in a child of this node, recursively
+                ntext = find_text(child)
+                return ntext or (text[:LIMIT] + '...')
+            else:
+                return text
+
+def from_files(container):
+    toc = TOC()
+    for spinepath in container.spine_items:
+        name = container.abspath_to_name(spinepath)
+        root = container.parsed(name)
+        body = XPath('//h:body')(root)
+        if not body:
+            continue
+        text = find_text(body[0])
+        if text:
+            toc.add(text, name)
+    return toc
+
 def add_id(container, name, loc):
    root = container.parsed(name)
    body = root.xpath('//*[local-name()="body"]')[0]
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -333,8 +333,8 @@ class OEBReader(object):
        guide = self.oeb.guide
        manifest = self.oeb.manifest
        for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
-            href = elem.get('href')
-            path = urlnormalize(urldefrag(href)[0])
+            ref_href = elem.get('href')
+            path = urlnormalize(urldefrag(ref_href)[0])
            if path not in manifest.hrefs:
                corrected_href = None
                for href in manifest.hrefs:
@ -342,12 +342,12 @@ class OEBReader(object):
                        corrected_href = href
                        break
                if corrected_href is None:
-                    self.logger.warn(u'Guide reference %r not found' % href)
+                    self.logger.warn(u'Guide reference %r not found' % ref_href)
                    continue
-                href = corrected_href
+                ref_href = corrected_href
            typ = elem.get('type')
            if typ not in guide:
-                guide.add(typ, elem.get('title'), href)
+                guide.add(typ, elem.get('title'), ref_href)

    def _find_ncx(self, opf):
        result = xpath(opf, '/o2:package/o2:spine/@toc')
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,13 +18,26 @@ from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store.search_result import SearchResult

+class AmazonDEKindleStore(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''

+    aff_id = {'tag': 'charhale0a-21'}
+    store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
+                 '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
+                 '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
+    store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
+                          '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
+                          '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
+    search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='

-# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
-# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
-# when modified.
+    author_article = 'von '
+
+    and_word = ' und '
+
+    # ---- Copy from here to end

-class AmazonEUBase(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
@ -108,20 +121,3 @@ class AmazonEUBase(StorePlugin):
    def get_details(self, search_result, timeout):
        pass

-class AmazonDEKindleStore(AmazonEUBase):
-    '''
-    For comments on the implementation, please see amazon_plugin.py
-    '''
-
-    aff_id = {'tag': 'charhale0a-21'}
-    store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
-                 '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
-                 '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
-    store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
-                          '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
-                          '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
-    search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
-
-    author_article = 'von '
-
-    and_word = ' und '
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,12 +18,25 @@ from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store.search_result import SearchResult

+class AmazonESKindleStore(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''

-# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
-# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
-# when modified.
+    aff_id = {'tag': 'charhale09-21'}
+    store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
+                  'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
+    store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
+                          'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
+                          '&linkCode=ur2&camp=3626&creative=24790')
+    search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
+
+    author_article = 'de '
+
+    and_word = ' y '
+
+    # ---- Copy from here to end

-class AmazonEUBase(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
@ -107,19 +120,3 @@ class AmazonEUBase(StorePlugin):
    def get_details(self, search_result, timeout):
        pass

-class AmazonESKindleStore(AmazonEUBase):
-    '''
-    For comments on the implementation, please see amazon_plugin.py
-    '''
-
-    aff_id = {'tag': 'charhale09-21'}
-    store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
-                  'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
-    store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
-                          'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
-                          '&linkCode=ur2&camp=3626&creative=24790')
-    search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
-
-    author_article = 'de '
-
-    and_word = ' y '
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,13 +18,22 @@ from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store.search_result import SearchResult

+class AmazonFRKindleStore(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''

+    aff_id = {'tag': 'charhale-21'}
+    store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
+    store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
+    search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='

-# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
-# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
-# when modified.
+    author_article = 'de '
+
+    and_word = ' et '
+
+    # ---- Copy from here to end

-class AmazonEUBase(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
@ -108,16 +117,3 @@ class AmazonEUBase(StorePlugin):
    def get_details(self, search_result, timeout):
        pass

-class AmazonFRKindleStore(AmazonEUBase):
-    '''
-    For comments on the implementation, please see amazon_plugin.py
-    '''
-
-    aff_id = {'tag': 'charhale-21'}
-    store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
-    store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
-    search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
-
-    author_article = 'de '
-
-    and_word = ' et '
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,12 +18,25 @@ from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store.search_result import SearchResult

+class AmazonITKindleStore(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''

-# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
-# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
-# when modified.
+    aff_id = {'tag': 'httpcharles07-21'}
+    store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
+                  'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
+    store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
+                          'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
+                          'linkCode=ur2&camp=3370&creative=23322')
+    search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
+
+    author_article = 'di '
+
+    and_word = ' e '
+
+    # ---- Copy from here to end

-class AmazonEUBase(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
@ -106,20 +119,3 @@ class AmazonEUBase(StorePlugin):

    def get_details(self, search_result, timeout):
        pass
-
-class AmazonITKindleStore(AmazonEUBase):
-    '''
-    For comments on the implementation, please see amazon_plugin.py
-    '''
-
-    aff_id = {'tag': 'httpcharles07-21'}
-    store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
-                  'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
-    store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
-                          'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
-                          'linkCode=ur2&camp=3370&creative=23322')
-    search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
-
-    author_article = 'di '
-
-    and_word = ' e '
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -19,11 +19,28 @@ from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult


-# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
-# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
-# when modified.

-class AmazonEUBase(StorePlugin):
+class AmazonUKKindleStore(StorePlugin):
+    aff_id = {'tag': 'calcharles-21'}
+    store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
+                  'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
+                  'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
+                  'linkCode=ur2&camp=1634&creative=19450')
+    store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
+                          'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
+                          'linkCode=ur2&camp=1634&creative=6738')
+    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
+
+    author_article = 'by '
+
+    and_word = ' and '
+
+    # This code is copy/pasted from from here to the other amazon EU. Do not
+    # modify it in any other amazon EU plugin. Be sure to paste it into all
+    # other amazon EU plugins when modified.
+
+    # ---- Copy from here to end
+
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
@ -107,18 +124,3 @@ class AmazonEUBase(StorePlugin):
    def get_details(self, search_result, timeout):
        pass

-class AmazonUKKindleStore(AmazonEUBase):
-    aff_id = {'tag': 'calcharles-21'}
-    store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
-                  'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
-                  'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
-                  'linkCode=ur2&camp=1634&creative=19450')
-    store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
-                          'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
-                          'linkCode=ur2&camp=1634&creative=6738')
-    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
-
-    author_article = 'by '
-
-    and_word = ' and '
-
--- a/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 1 # Needed for dynamic plugin loading
+store_version = 2 # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -24,8 +24,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class WaterstonesUKStore(BasicStoreConfig, StorePlugin):

    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://clkuk.tradedoubler.com/click?p=51196&a=1951604&g=19333484'
-        url_details = 'http://clkuk.tradedoubler.com/click?p(51196)a(1951604)g(16460516)url({0})'
+        url = 'http://www.awin1.com/awclick.php?mid=3787&id=120917'
+        url_details = 'http://www.awin1.com/cread.php?awinmid=3787&awinaffid=120917&clickref=&p={0}'

        if external or self.config.get('open_external', False):
            if detail_item:
--- a/src/calibre/gui2/toc/main.py
+++ b/src/calibre/gui2/toc/main.py
@ -18,7 +18,7 @@ from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog,

 from calibre.ebooks.oeb.polish.container import get_container, AZW3Container
 from calibre.ebooks.oeb.polish.toc import (
-    get_toc, add_id, TOC, commit_toc, from_xpaths, from_links)
+    get_toc, add_id, TOC, commit_toc, from_xpaths, from_links, from_files)
 from calibre.gui2 import Application, error_dialog, gprefs
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.gui2.toc.location import ItemEdit
@ -126,6 +126,7 @@ class ItemView(QFrame): # {{{
    go_to_root = pyqtSignal()
    create_from_xpath = pyqtSignal(object)
    create_from_links = pyqtSignal()
+    create_from_files = pyqtSignal()
    flatten_toc = pyqtSignal()

    def __init__(self, parent):
@ -183,6 +184,15 @@ class ItemView(QFrame): # {{{
        )))
        l.addWidget(b)

+        self.cfb = b = QPushButton(_('Generate ToC from &files'))
+        b.clicked.connect(self.create_from_files)
+        b.setToolTip(textwrap.fill(_(
+            'Generate a Table of Contents from individual files in the book.'
+            ' Each entry in the ToC will point to the start of the file, the'
+            ' text of the entry will be the "first line" of text from the file.'
+        )))
+        l.addWidget(b)
+
        self.xpb = b = QPushButton(_('Generate ToC from &XPath'))
        b.clicked.connect(self.create_from_user_xpath)
        b.setToolTip(textwrap.fill(_(
@ -577,6 +587,7 @@ class TOCView(QWidget): # {{{
        i.add_new_item.connect(self.add_new_item)
        i.create_from_xpath.connect(self.create_from_xpath)
        i.create_from_links.connect(self.create_from_links)
+        i.create_from_files.connect(self.create_from_files)
        i.flatten_item.connect(self.flatten_item)
        i.flatten_toc.connect(self.flatten_toc)
        i.go_to_root.connect(self.go_to_root)
@ -778,6 +789,14 @@ class TOCView(QWidget): # {{{
                _('No links were found that could be added to the Table of Contents.'), show=True)
        self.insert_toc_fragment(toc)

+    def create_from_files(self):
+        toc = from_files(self.ebook)
+        if len(toc) == 0:
+            return error_dialog(self, _('No items found'),
+                _('No files were found that could be added to the Table of Contents.'), show=True)
+        self.insert_toc_fragment(toc)
+
+
 # }}}

 class TOCEditor(QDialog): # {{{
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -54,7 +54,7 @@ def get_parser(usage):
 def get_db(dbpath, options):
    global do_notify
    if options.library_path is not None:
-        dbpath = options.library_path
+        dbpath = os.path.expanduser(options.library_path)
    if dbpath is None:
        raise ValueError('No saved library path, either run the GUI or use the'
                ' --with-library option')
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/ber.po
+++ b/src/calibre/translations/ber.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/fr_CA.po
+++ b/src/calibre/translations/fr_CA.po
--- a/src/calibre/translations/fur.po
+++ b/src/calibre/translations/fur.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/him.po
+++ b/src/calibre/translations/him.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/is.po
+++ b/src/calibre/translations/is.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/jv.po
+++ b/src/calibre/translations/jv.po
--- a/src/calibre/translations/ka.po
+++ b/src/calibre/translations/ka.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/Show More
+++ b/Show More