Merge from trunk

2025-07-07 18:24:30 -04:00 · 2011-06-27 15:51:31 +01:00 · 2011-06-27 15:51:31 +01:00 · 4bf6fe6eb4
commit 4bf6fe6eb4
parent 2ad45afdb2 c33d763827
76 changed files with 2247 additions and 694 deletions
--- a/recipes/financial_times.recipe
+++ b/recipes/financial_times.recipe
@ -1,32 +1,41 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-ft.com
+www.ft.com
 '''

+import datetime
 from calibre.web.feeds.news import BasicNewsRecipe

-class FinancialTimes(BasicNewsRecipe):
-    title                 = u'Financial Times'
-    __author__            = 'Darko Miletic and Sujata Raman'
-    description           = ('Financial world news. Available after 5AM '
-                                'GMT, daily.')
+class FinancialTimes_rss(BasicNewsRecipe):
+    title                 = 'Financial Times'
+    __author__            = 'Darko Miletic'
+    description           = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
+    publisher             = 'The Financial Times Ltd.'
+    category              = 'news, finances, politics, World'
    oldest_article        = 2
    language              = 'en'
-
-    max_articles_per_feed = 100
+    max_articles_per_feed = 250
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
-    simultaneous_downloads= 1
-    delay                 = 1
-
+    encoding              = 'utf8'
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://im.media.ft.com/m/img/masthead_main.jpg'
    LOGIN                 = 'https://registration.ft.com/registration/barrier/login'
+    INDEX                 = 'http://www.ft.com'
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
+        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
            br.open(self.LOGIN)
            br.select_form(name='loginForm')
@ -35,31 +44,63 @@ class FinancialTimes(BasicNewsRecipe):
            br.submit()
        return br

-    keep_only_tags    = [ dict(name='div', attrs={'id':'cont'}) ]
-    remove_tags_after = dict(name='p', attrs={'class':'copyright'})
+    keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
    remove_tags = [
                      dict(name='div', attrs={'id':'floating-con'})
+                     ,dict(name=['meta','iframe','base','object','embed','link'])
+                     ,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
                  ]
+    remove_attributes = ['width','height','lang']

-    extra_css = '''
-                body{font-family:Arial,Helvetica,sans-serif;}
-                h2(font-size:large;}
-                .ft-story-header(font-size:xx-small;}
-                .ft-story-body(font-size:small;}
-                a{color:#003399;}
+    extra_css = """
+                body{font-family: Georgia,Times,"Times New Roman",serif}
+                h2{font-size:large}
+                .ft-story-header{font-size: x-small}
                .container{font-size:x-small;}
                h3{font-size:x-small;color:#003399;}
-                '''
+                .copyright{font-size: x-small}
+                img{margin-top: 0.8em; display: block}
+                .lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
+                .byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
+                """
+
    feeds = [
               (u'UK'         , u'http://www.ft.com/rss/home/uk'        )
              ,(u'US'         , u'http://www.ft.com/rss/home/us'        )
-              ,(u'Europe'     , u'http://www.ft.com/rss/home/europe'    )
              ,(u'Asia'       , u'http://www.ft.com/rss/home/asia'      )
              ,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
            ]

    def preprocess_html(self, soup):
-        content_type = soup.find('meta', {'http-equiv':'Content-Type'})
-        if content_type:
-            content_type['content'] = 'text/html; charset=utf-8'
+        items = ['promo-box','promo-title',
+                 'promo-headline','promo-image',
+                 'promo-intro','promo-link','subhead']
+        for item in items:
+            for it in soup.findAll(item):
+                it.name = 'div'
+                it.attrs = []
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
        return soup
+
+    def get_cover_url(self):
+        cdate = datetime.date.today()
+        if cdate.isoweekday() == 7:
+           cdate -= datetime.timedelta(days=1)
+        return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_USA.pdf')
+
--- a/recipes/financial_times_uk.recipe
+++ b/recipes/financial_times_uk.recipe
@ -3,6 +3,8 @@ __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.ft.com/uk-edition
 '''
+
+import datetime
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

@ -20,7 +22,6 @@ class FinancialTimes(BasicNewsRecipe):
    needs_subscription    = True
    encoding              = 'utf8'
    publication_type      = 'newspaper'
-    cover_url             = strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
    masthead_url          = 'http://im.media.ft.com/m/img/masthead_main.jpg'
    LOGIN                 = 'https://registration.ft.com/registration/barrier/login'
    INDEX                 = 'http://www.ft.com/uk-edition'
@ -128,3 +129,10 @@ class FinancialTimes(BasicNewsRecipe):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
+
+    def get_cover_url(self):
+        cdate = datetime.date.today()
+        if cdate.isoweekday() == 7:
+           cdate -= datetime.timedelta(days=1)
+        return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
+        
--- a/recipes/icons/financial_times.png
+++ b/recipes/icons/financial_times.png
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -1,17 +1,23 @@
-# -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2010-2011, Eddie Lau'

+# Region - Hong Kong, Vancouver, Toronto
+__Region__ = 'Hong Kong'
 # Users of Kindle 3 with limited system-level CJK support
 # please replace the following "True" with "False".
 __MakePeriodical__ = True
 # Turn below to true if your device supports display of CJK titles
 __UseChineseTitle__ = False
-# Trun below to true if you wish to use life.mingpao.com as the main article source
+# Set it to False if you want to skip images
+__KeepImages__ = True
+# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
 __UseLife__ = True

+
 '''
 Change Log:
+2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
+            provide options to remove all images in the file
 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
 2011/03/06: add new articles for finance section, also a new section "Columns"
 2011/02/28: rearrange the sections
@ -34,29 +40,17 @@ Change Log:
 import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
-
-
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation

-class MPHKRecipe(BasicNewsRecipe):
+# MAIN CLASS
+class MPRecipe(BasicNewsRecipe):
+    if __Region__ == 'Hong Kong':
        title       = 'Ming Pao - Hong Kong'
-    oldest_article = 1
-    max_articles_per_feed = 100
-    __author__            = 'Eddie Lau'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
-    publisher             = 'MingPao'
        category    = 'Chinese, News, Hong Kong'
-    remove_javascript = True
-    use_embedded_content   = False
-    no_stylesheets = True
-    language = 'zh'
-    encoding = 'Big5-HKSCS'
-    recursions = 0
-    conversion_options = {'linearize_tables':True}
-    timefmt = ''
        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
@ -65,11 +59,22 @@ class MPHKRecipe(BasicNewsRecipe):
                          dict(attrs={'id':['newscontent']}), # entertainment and column page content
                          dict(attrs={'id':['newscontent01','newscontent02']}),
                          dict(attrs={'class':['photo']}),
+                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
                          ]
+        if __KeepImages__:
            remove_tags = [dict(name='style'),
                           dict(attrs={'id':['newscontent135']}),  # for the finance page from mpfinance.com
-                   dict(name='table')]  # for content fetched from life.mingpao.com
+                           dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
+                           #dict(name='table')  # for content fetched from life.mingpao.com
+                          ]
+        else:
+            remove_tags = [dict(name='style'),
+                           dict(attrs={'id':['newscontent135']}),  # for the finance page from mpfinance.com
+                           dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
+                           dict(name='img'),
+                           #dict(name='table')  # for content fetched from life.mingpao.com
+                          ]
        remove_attributes = ['width']
        preprocess_regexps = [
                              (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
@ -84,6 +89,55 @@ class MPHKRecipe(BasicNewsRecipe):
                              (re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
                              lambda match: "</b>")
                             ]
+    elif __Region__ == 'Vancouver':
+        title       = 'Ming Pao - Vancouver'
+        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
+        category    = 'Chinese, News, Vancouver'
+        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
+        masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
+        keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
+                          dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
+                          dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
+                          ]
+        if __KeepImages__:
+            remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})]  # the magnifier icon
+        else:
+            remove_tags = [dict(name='img')]
+        remove_attributes = ['width']
+        preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
+                              lambda match: ''),
+                             ]
+    elif __Region__ == 'Toronto':
+        title       = 'Ming Pao - Toronto'
+        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
+        category    = 'Chinese, News, Toronto'
+        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
+        masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
+        keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
+                          dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
+                          dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
+                          ]
+        if __KeepImages__:
+            remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})]  # the magnifier icon
+        else:
+            remove_tags = [dict(name='img')]
+        remove_attributes = ['width']
+        preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
+                              lambda match: ''),
+                             ]
+
+    oldest_article = 1
+    max_articles_per_feed = 100
+    __author__            = 'Eddie Lau'
+    publisher             = 'MingPao'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'zh'
+    encoding = 'Big5-HKSCS'
+    recursions = 0
+    conversion_options = {'linearize_tables':True}
+    timefmt = ''

    def image_url_processor(cls, baseurl, url):
        # trick: break the url at the first occurance of digit, add an additional
@ -124,8 +178,18 @@ class MPHKRecipe(BasicNewsRecipe):

    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
-        # convert UTC to local hk time - at around HKT 6.00am, all news are available
-        dt_local = dt_utc - datetime.timedelta(-2.0/24)
+        if __Region__ == 'Hong Kong':
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
+        elif __Region__ == 'Vancouver':
+            # convert UTC to local Vancouver time - at PST time 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(4.5/24)
+            #dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(4.5/24)
+        elif __Region__ == 'Toronto':
+            # convert UTC to local Toronto time - at EST time 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(4.5/24)
+            #dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(4.5/24)
        return dt_local

    def get_fetchdate(self):
@ -135,13 +199,15 @@ class MPHKRecipe(BasicNewsRecipe):
        return self.get_dtlocal().strftime("%Y-%m-%d")

    def get_fetchday(self):
-        # dt_utc = datetime.datetime.utcnow()
-        # convert UTC to local hk time - at around HKT 6.00am, all news are available
-        # dt_local = dt_utc - datetime.timedelta(-2.0/24)
        return self.get_dtlocal().strftime("%d")

    def get_cover_url(self):
+        if __Region__ == 'Hong Kong':
            cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
+        elif __Region__ == 'Vancouver':
+            cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
+        elif __Region__ == 'Toronto':
+            cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(cover)
@ -153,6 +219,7 @@ class MPHKRecipe(BasicNewsRecipe):
        feeds = []
        dateStr = self.get_fetchdate()

+        if __Region__ == 'Hong Kong':
            if __UseLife__:
                for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
                                           (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
@ -222,7 +289,34 @@ class MPHKRecipe(BasicNewsRecipe):
                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
                if col_articles:
                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
-
+        elif __Region__ == 'Vancouver':
+            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
+                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
+                               (u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
+                               (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
+                               (u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
+                               (u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
+                               (u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
+                               (u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
+                               (u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
+                               (u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
+                articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
+                if articles:
+                    feeds.append((title, articles))
+        elif __Region__ == 'Toronto':
+            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
+                               (u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
+                               (u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
+                               (u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
+                               (u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
+                               (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
+                               (u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
+                               (u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
+                               (u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
+                               (u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
+                articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
+                if articles:
+                    feeds.append((title, articles))
        return feeds

    # parse from news.mingpao.com
@ -256,11 +350,30 @@ class MPHKRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
+                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
                current_articles.append({'title': title, 'url': url, 'description': ''})
                included_urls.append(url)
        current_articles.reverse()
        return current_articles

+    # parse from www.mingpaovan.com
+    def parse_section3(self, url, baseUrl):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
+        current_articles = []
+        included_urls = []
+        divs.reverse()
+        for i in divs:
+            title = self.tag_to_string(i)
+            urlstr = i.get('href', False)
+            urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
+            if urlstr not in included_urls:
+                current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
+                included_urls.append(urlstr)
+        current_articles.reverse()
+        return current_articles
+
    def parse_ed_section(self, url):
        self.get_fetchdate()
        soup = self.index_to_soup(url)
@ -338,7 +451,12 @@ class MPHKRecipe(BasicNewsRecipe):
        if dir is None:
            dir = self.output_dir
        if __UseChineseTitle__ == True:
+            if __Region__ == 'Hong Kong':
                title = u'\u660e\u5831 (\u9999\u6e2f)'
+            elif __Region__ == 'Vancouver':
+                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
+            elif __Region__ == 'Toronto':
+                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
        else:
            title = self.short_title()
        # if not generating a periodical, force date to apply in title
--- a/recipes/ming_pao_toronto.recipe
+++ b/recipes/ming_pao_toronto.recipe
@ -0,0 +1,594 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010-2011, Eddie Lau'
+
+# Region - Hong Kong, Vancouver, Toronto
+__Region__ = 'Toronto'
+# Users of Kindle 3 with limited system-level CJK support
+# please replace the following "True" with "False".
+__MakePeriodical__ = True
+# Turn below to true if your device supports display of CJK titles
+__UseChineseTitle__ = False
+# Set it to False if you want to skip images
+__KeepImages__ = True
+# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+__UseLife__ = True
+
+
+'''
+Change Log:
+2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
+            provide options to remove all images in the file
+2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
+2011/03/06: add new articles for finance section, also a new section "Columns"
+2011/02/28: rearrange the sections
+            [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
+            View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
+            folder in Kindle 3
+2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
+            clean up the indentation
+2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
+            (to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
+2010/11/22: add English section, remove eco-news section which is not updated daily, correct
+            ordering of articles
+2010/11/12: add news image and eco-news section
+2010/11/08: add parsing of finance section
+2010/11/06: temporary work-around for Kindle device having no capability to display unicode
+            in section/article list.
+2010/10/31: skip repeated articles in section pages
+'''
+
+import os, datetime, re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+
+# MAIN CLASS
+class MPRecipe(BasicNewsRecipe):
+    if __Region__ == 'Hong Kong':
+        title       = 'Ming Pao - Hong Kong'
+        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
+        category    = 'Chinese, News, Hong Kong'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
+        keep_only_tags = [dict(name='h1'),
+                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
+                          dict(name='font', attrs={'color':['AA0000']}), # for column articles title
+                          dict(attrs={'id':['newscontent']}), # entertainment and column page content
+                          dict(attrs={'id':['newscontent01','newscontent02']}),
+                          dict(attrs={'class':['photo']}),
+                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
+                          ]
+        if __KeepImages__:
+            remove_tags = [dict(name='style'),
+                           dict(attrs={'id':['newscontent135']}),  # for the finance page from mpfinance.com
+                           dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
+                           #dict(name='table')  # for content fetched from life.mingpao.com
+                          ]
+        else:
+            remove_tags = [dict(name='style'),
+                           dict(attrs={'id':['newscontent135']}),  # for the finance page from mpfinance.com
+                           dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
+                           dict(name='img'),
+                           #dict(name='table')  # for content fetched from life.mingpao.com
+                          ]
+        remove_attributes = ['width']
+        preprocess_regexps = [
+                              (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
+                              lambda match: '<h1>'),
+                              (re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
+                              lambda match: '</h1>'),
+                              (re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
+                              lambda match: ''),
+                              # skip <br> after title in life.mingpao.com fetched article
+                              (re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
+                              lambda match: "<div id='newscontent'>"),
+                              (re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
+                              lambda match: "</b>")
+                             ]
+    elif __Region__ == 'Vancouver':
+        title       = 'Ming Pao - Vancouver'
+        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
+        category    = 'Chinese, News, Vancouver'
+        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
+        masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
+        keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
+                          dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
+                          dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
+                          ]
+        if __KeepImages__:
+            remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})]  # the magnifier icon
+        else:
+            remove_tags = [dict(name='img')]
+        remove_attributes = ['width']
+        preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
+                              lambda match: ''),
+                             ]
+    elif __Region__ == 'Toronto':
+        title       = 'Ming Pao - Toronto'
+        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
+        category    = 'Chinese, News, Toronto'
+        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
+        masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
+        keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
+                          dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
+                          dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
+                          ]
+        if __KeepImages__:
+            remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})]  # the magnifier icon
+        else:
+            remove_tags = [dict(name='img')]
+        remove_attributes = ['width']
+        preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
+                              lambda match: ''),
+                             ]
+
+    oldest_article = 1
+    max_articles_per_feed = 100
+    __author__            = 'Eddie Lau'
+    publisher             = 'MingPao'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'zh'
+    encoding = 'Big5-HKSCS'
+    recursions = 0
+    conversion_options = {'linearize_tables':True}
+    timefmt = ''
+
+    def image_url_processor(cls, baseurl, url):
+        # trick: break the url at the first occurance of digit, add an additional
+        # '_' at the front
+        # not working, may need to move this to preprocess_html() method
+#        minIdx = 10000
+#        i0 = url.find('0')
+#        if i0 >= 0 and i0 < minIdx:
+#           minIdx = i0
+#        i1 = url.find('1')
+#        if i1 >= 0 and i1 < minIdx:
+#           minIdx = i1
+#        i2 = url.find('2')
+#        if i2 >= 0 and i2 < minIdx:
+#           minIdx = i2
+#        i3 = url.find('3')
+#        if i3 >= 0 and i0 < minIdx:
+#           minIdx = i3
+#        i4 = url.find('4')
+#        if i4 >= 0 and i4 < minIdx:
+#           minIdx = i4
+#        i5 = url.find('5')
+#        if i5 >= 0 and i5 < minIdx:
+#           minIdx = i5
+#        i6 = url.find('6')
+#        if i6 >= 0 and i6 < minIdx:
+#           minIdx = i6
+#        i7 = url.find('7')
+#        if i7 >= 0 and i7 < minIdx:
+#           minIdx = i7
+#        i8 = url.find('8')
+#        if i8 >= 0 and i8 < minIdx:
+#           minIdx = i8
+#        i9 = url.find('9')
+#        if i9 >= 0 and i9 < minIdx:
+#           minIdx = i9
+        return url
+
+    def get_dtlocal(self):
+        dt_utc = datetime.datetime.utcnow()
+        if __Region__ == 'Hong Kong':
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
+        elif __Region__ == 'Vancouver':
+            # convert UTC to local Vancouver time - at PST time 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(4.5/24)
+            #dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(4.5/24)
+        elif __Region__ == 'Toronto':
+            # convert UTC to local Toronto time - at EST time 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(4.5/24)
+            #dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(4.5/24)
+        return dt_local
+
+    def get_fetchdate(self):
+        return self.get_dtlocal().strftime("%Y%m%d")
+
+    def get_fetchformatteddate(self):
+        return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchday(self):
+        return self.get_dtlocal().strftime("%d")
+
+    def get_cover_url(self):
+        if __Region__ == 'Hong Kong':
+            cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
+        elif __Region__ == 'Vancouver':
+            cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
+        elif __Region__ == 'Toronto':
+            cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            cover = None
+        return cover
+
+    def parse_index(self):
+        feeds = []
+        dateStr = self.get_fetchdate()
+
+        if __Region__ == 'Hong Kong':
+            if __UseLife__:
+                for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
+                                           (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
+                                           (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
+                                           (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
+                                           (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
+                                           (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
+                                           (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
+                                           (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
+                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
+                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
+                                           (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
+                    articles = self.parse_section2(url, keystr)
+                    if articles:
+                        feeds.append((title, articles))
+
+                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+            else:
+                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
+                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+
+                # special- editorial
+                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                if ed_articles:
+                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+
+                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
+                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
+                                   (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+
+                # special - finance
+                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
+                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                if fin_articles:
+                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+
+                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+
+                # special - entertainment
+                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                if ent_articles:
+                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+
+                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+
+
+                # special- columns
+                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
+                if col_articles:
+                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
+        elif __Region__ == 'Vancouver':
+            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
+                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
+                               (u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
+                               (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
+                               (u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
+                               (u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
+                               (u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
+                               (u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
+                               (u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
+                               (u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
+                articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
+                if articles:
+                    feeds.append((title, articles))
+        elif __Region__ == 'Toronto':
+            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
+                               (u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
+                               (u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
+                               (u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
+                               (u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
+                               (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
+                               (u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
+                               (u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
+                               (u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
+                               (u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
+                articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
+                if articles:
+                    feeds.append((title, articles))
+        return feeds
+
+    # parse from news.mingpao.com
+    def parse_section(self, url):
+        dateStr = self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
+        current_articles = []
+        included_urls = []
+        divs.reverse()
+        for i in divs:
+            a = i.find('a', href = True)
+            title = self.tag_to_string(a)
+            url = a.get('href', False)
+            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            if url not in included_urls and url.rfind('Redirect') == -1:
+                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    # parse from life.mingpao.com
+    def parse_section2(self, url, keystr):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
+                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                current_articles.append({'title': title, 'url': url, 'description': ''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    # parse from www.mingpaovan.com
+    def parse_section3(self, url, baseUrl):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
+        current_articles = []
+        included_urls = []
+        divs.reverse()
+        for i in divs:
+            title = self.tag_to_string(i)
+            urlstr = i.get('href', False)
+            urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
+            if urlstr not in included_urls:
+                current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
+                included_urls.append(urlstr)
+        current_articles.reverse()
+        return current_articles
+
+    def parse_ed_section(self, url):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
+                current_articles.append({'title': title, 'url': url, 'description': ''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    def parse_fin_section(self, url):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href= True)
+        current_articles = []
+        included_urls = []
+        for i in a:
+            #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
+            if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
+                title = self.tag_to_string(i)
+                current_articles.append({'title': title, 'url': url, 'description':''})
+                included_urls.append(url)
+        return current_articles
+
+    def parse_ent_section(self, url):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
+                current_articles.append({'title': title, 'url': url, 'description': ''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    def parse_col_section(self, url):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
+                current_articles.append({'title': title, 'url': url, 'description': ''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(style=True):
+            del item['width']
+        for item in soup.findAll(stype=True):
+            del item['absmiddle']
+        return soup
+
+    def create_opf(self, feeds, dir=None):
+        if dir is None:
+            dir = self.output_dir
+        if __UseChineseTitle__ == True:
+            if __Region__ == 'Hong Kong':
+                title = u'\u660e\u5831 (\u9999\u6e2f)'
+            elif __Region__ == 'Vancouver':
+                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
+            elif __Region__ == 'Toronto':
+                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
+        else:
+            title = self.short_title()
+        # if not generating a periodical, force date to apply in title
+        if __MakePeriodical__ == False:
+            title = title + ' ' + self.get_fetchformatteddate()
+        if True:
+            mi = MetaInformation(title, [self.publisher])
+            mi.publisher = self.publisher
+            mi.author_sort = self.publisher
+            if __MakePeriodical__ == True:
+                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+            else:
+                mi.publication_type = self.publication_type+':'+self.short_title()
+            #mi.timestamp = nowf()
+            mi.timestamp = self.get_dtlocal()
+            mi.comments = self.description
+            if not isinstance(mi.comments, unicode):
+                mi.comments = mi.comments.decode('utf-8', 'replace')
+            #mi.pubdate = nowf()
+            mi.pubdate = self.get_dtlocal()
+            opf_path = os.path.join(dir, 'index.opf')
+            ncx_path = os.path.join(dir, 'index.ncx')
+            opf = OPFCreator(dir, mi)
+            # Add mastheadImage entry to <guide> section
+            mp = getattr(self, 'masthead_path', None)
+            if mp is not None and os.access(mp, os.R_OK):
+                from calibre.ebooks.metadata.opf2 import Guide
+                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+                ref.type = 'masthead'
+                ref.title = 'Masthead Image'
+                opf.guide.append(ref)
+
+            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+            manifest.append(os.path.join(dir, 'index.html'))
+            manifest.append(os.path.join(dir, 'index.ncx'))
+
+            # Get cover
+            cpath = getattr(self, 'cover_path', None)
+            if cpath is None:
+                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+                if self.default_cover(pf):
+                    cpath =  pf.name
+            if cpath is not None and os.access(cpath, os.R_OK):
+                opf.cover = cpath
+                manifest.append(cpath)
+
+            # Get masthead
+            mpath = getattr(self, 'masthead_path', None)
+            if mpath is not None and os.access(mpath, os.R_OK):
+                manifest.append(mpath)
+
+            opf.create_manifest_from_files_in(manifest)
+            for mani in opf.manifest:
+                if mani.path.endswith('.ncx'):
+                    mani.id = 'ncx'
+                if mani.path.endswith('mastheadImage.jpg'):
+                    mani.id = 'masthead-image'
+            entries = ['index.html']
+            toc = TOC(base_path=dir)
+            self.play_order_counter = 0
+            self.play_order_map = {}
+
+        def feed_index(num, parent):
+            f = feeds[num]
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(num, j)
+                    auth = a.author
+                    if not auth:
+                        auth = None
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
+                    else:
+                        desc = self.description_limiter(desc)
+                    entries.append('%sindex.html'%adir)
+                    po = self.play_order_map.get(entries[-1], None)
+                    if po is None:
+                        self.play_order_counter += 1
+                        po = self.play_order_counter
+                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                                    play_order=po, author=auth, description=desc)
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        last = sp
+
+                    if os.path.exists(last):
+                        with open(last, 'rb') as fi:
+                            src = fi.read().decode('utf-8')
+                        soup = BeautifulSoup(src)
+                        body = soup.find('body')
+                        if body is not None:
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            templ = self.navbar.generate(True, num, j, len(f),
+                                            not self.has_single_feed,
+                                            a.orig_url, self.publisher, prefix=prefix,
+                                            center=self.center_navbar)
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            body.insert(len(body.contents), elem)
+                            with open(last, 'wb') as fi:
+                                fi.write(unicode(soup).encode('utf-8'))
+        if len(feeds) == 0:
+            raise Exception('All feeds are empty, aborting.')
+
+        if len(feeds) > 1:
+            for i, f in enumerate(feeds):
+                entries.append('feed_%d/index.html'%i)
+                po = self.play_order_map.get(entries[-1], None)
+                if po is None:
+                    self.play_order_counter += 1
+                    po = self.play_order_counter
+                auth = getattr(f, 'author', None)
+                if not auth:
+                    auth = None
+                desc = getattr(f, 'description', None)
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                           f.title, play_order=po, description=desc, author=auth))
+
+        else:
+            entries.append('feed_%d/index.html'%0)
+            feed_index(0, toc)
+
+        for i, p in enumerate(entries):
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+        opf.create_spine(entries)
+        opf.set_toc(toc)
+
+        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+            opf.render(opf_file, ncx_file)
+
--- a/recipes/ming_pao_vancouver.recipe
+++ b/recipes/ming_pao_vancouver.recipe
@ -0,0 +1,594 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010-2011, Eddie Lau'
+
+# Region - Hong Kong, Vancouver, Toronto
+__Region__ = 'Vancouver'
+# Users of Kindle 3 with limited system-level CJK support
+# please replace the following "True" with "False".
+__MakePeriodical__ = True
+# Turn below to true if your device supports display of CJK titles
+__UseChineseTitle__ = False
+# Set it to False if you want to skip images
+__KeepImages__ = True
+# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+__UseLife__ = True
+
+
+'''
+Change Log:
+2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
+            provide options to remove all images in the file
+2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
+2011/03/06: add new articles for finance section, also a new section "Columns"
+2011/02/28: rearrange the sections
+            [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
+            View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
+            folder in Kindle 3
+2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
+            clean up the indentation
+2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
+            (to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
+2010/11/22: add English section, remove eco-news section which is not updated daily, correct
+            ordering of articles
+2010/11/12: add news image and eco-news section
+2010/11/08: add parsing of finance section
+2010/11/06: temporary work-around for Kindle device having no capability to display unicode
+            in section/article list.
+2010/10/31: skip repeated articles in section pages
+'''
+
+import os, datetime, re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+
+# MAIN CLASS
+class MPRecipe(BasicNewsRecipe):
+    if __Region__ == 'Hong Kong':
+        title       = 'Ming Pao - Hong Kong'
+        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
+        category    = 'Chinese, News, Hong Kong'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
+        keep_only_tags = [dict(name='h1'),
+                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
+                          dict(name='font', attrs={'color':['AA0000']}), # for column articles title
+                          dict(attrs={'id':['newscontent']}), # entertainment and column page content
+                          dict(attrs={'id':['newscontent01','newscontent02']}),
+                          dict(attrs={'class':['photo']}),
+                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
+                          ]
+        if __KeepImages__:
+            remove_tags = [dict(name='style'),
+                           dict(attrs={'id':['newscontent135']}),  # for the finance page from mpfinance.com
+                           dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
+                           #dict(name='table')  # for content fetched from life.mingpao.com
+                          ]
+        else:
+            remove_tags = [dict(name='style'),
+                           dict(attrs={'id':['newscontent135']}),  # for the finance page from mpfinance.com
+                           dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
+                           dict(name='img'),
+                           #dict(name='table')  # for content fetched from life.mingpao.com
+                          ]
+        remove_attributes = ['width']
+        preprocess_regexps = [
+                              (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
+                              lambda match: '<h1>'),
+                              (re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
+                              lambda match: '</h1>'),
+                              (re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
+                              lambda match: ''),
+                              # skip <br> after title in life.mingpao.com fetched article
+                              (re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
+                              lambda match: "<div id='newscontent'>"),
+                              (re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
+                              lambda match: "</b>")
+                             ]
+    elif __Region__ == 'Vancouver':
+        title       = 'Ming Pao - Vancouver'
+        description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
+        category    = 'Chinese, News, Vancouver'
+        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
+        masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
+        keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
+                          dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
+                          dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
+                          ]
+        if __KeepImages__:
+            remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})]  # the magnifier icon
+        else:
+            remove_tags = [dict(name='img')]
+        remove_attributes = ['width']
+        preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
+                              lambda match: ''),
+                             ]
+    elif __Region__ == 'Toronto':
+        title       = 'Ming Pao - Toronto'
+        description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
+        category    = 'Chinese, News, Toronto'
+        extra_css   = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
+        masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
+        keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
+                          dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
+                          dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
+                          ]
+        if __KeepImages__:
+            remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})]  # the magnifier icon
+        else:
+            remove_tags = [dict(name='img')]
+        remove_attributes = ['width']
+        preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
+                              lambda match: ''),
+                             ]
+
+    oldest_article = 1
+    max_articles_per_feed = 100
+    __author__            = 'Eddie Lau'
+    publisher             = 'MingPao'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'zh'
+    encoding = 'Big5-HKSCS'
+    recursions = 0
+    conversion_options = {'linearize_tables':True}
+    timefmt = ''
+
+    def image_url_processor(cls, baseurl, url):
+        # trick: break the url at the first occurance of digit, add an additional
+        # '_' at the front
+        # not working, may need to move this to preprocess_html() method
+#        minIdx = 10000
+#        i0 = url.find('0')
+#        if i0 >= 0 and i0 < minIdx:
+#           minIdx = i0
+#        i1 = url.find('1')
+#        if i1 >= 0 and i1 < minIdx:
+#           minIdx = i1
+#        i2 = url.find('2')
+#        if i2 >= 0 and i2 < minIdx:
+#           minIdx = i2
+#        i3 = url.find('3')
+#        if i3 >= 0 and i0 < minIdx:
+#           minIdx = i3
+#        i4 = url.find('4')
+#        if i4 >= 0 and i4 < minIdx:
+#           minIdx = i4
+#        i5 = url.find('5')
+#        if i5 >= 0 and i5 < minIdx:
+#           minIdx = i5
+#        i6 = url.find('6')
+#        if i6 >= 0 and i6 < minIdx:
+#           minIdx = i6
+#        i7 = url.find('7')
+#        if i7 >= 0 and i7 < minIdx:
+#           minIdx = i7
+#        i8 = url.find('8')
+#        if i8 >= 0 and i8 < minIdx:
+#           minIdx = i8
+#        i9 = url.find('9')
+#        if i9 >= 0 and i9 < minIdx:
+#           minIdx = i9
+        return url
+
+    def get_dtlocal(self):
+        dt_utc = datetime.datetime.utcnow()
+        if __Region__ == 'Hong Kong':
+            # convert UTC to local hk time - at HKT 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
+            # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
+        elif __Region__ == 'Vancouver':
+            # convert UTC to local Vancouver time - at PST time 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(4.5/24)
+            #dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(4.5/24)
+        elif __Region__ == 'Toronto':
+            # convert UTC to local Toronto time - at EST time 4.30am, all news are available
+            dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(4.5/24)
+            #dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(4.5/24)
+        return dt_local
+
+    def get_fetchdate(self):
+        return self.get_dtlocal().strftime("%Y%m%d")
+
+    def get_fetchformatteddate(self):
+        return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchday(self):
+        return self.get_dtlocal().strftime("%d")
+
+    def get_cover_url(self):
+        if __Region__ == 'Hong Kong':
+            cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
+        elif __Region__ == 'Vancouver':
+            cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
+        elif __Region__ == 'Toronto':
+            cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            cover = None
+        return cover
+
+    def parse_index(self):
+        feeds = []
+        dateStr = self.get_fetchdate()
+
+        if __Region__ == 'Hong Kong':
+            if __UseLife__:
+                for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
+                                           (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
+                                           (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
+                                           (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
+                                           (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
+                                           (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
+                                           (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
+                                           (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
+                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
+                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
+                                           (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
+                    articles = self.parse_section2(url, keystr)
+                    if articles:
+                        feeds.append((title, articles))
+
+                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+            else:
+                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
+                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+
+                # special- editorial
+                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                if ed_articles:
+                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+
+                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
+                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
+                                   (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+
+                # special - finance
+                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
+                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                if fin_articles:
+                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+
+                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+
+                # special - entertainment
+                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                if ent_articles:
+                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+
+                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
+                    articles = self.parse_section(url)
+                    if articles:
+                        feeds.append((title, articles))
+
+
+                # special- columns
+                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
+                if col_articles:
+                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
+        elif __Region__ == 'Vancouver':
+            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
+                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
+                               (u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
+                               (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
+                               (u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
+                               (u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
+                               (u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
+                               (u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
+                               (u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
+                               (u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
+                articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
+                if articles:
+                    feeds.append((title, articles))
+        elif __Region__ == 'Toronto':
+            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
+                               (u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
+                               (u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
+                               (u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
+                               (u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
+                               (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
+                               (u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
+                               (u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
+                               (u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
+                               (u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
+                articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
+                if articles:
+                    feeds.append((title, articles))
+        return feeds
+
+    # parse from news.mingpao.com
+    def parse_section(self, url):
+        dateStr = self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
+        current_articles = []
+        included_urls = []
+        divs.reverse()
+        for i in divs:
+            a = i.find('a', href = True)
+            title = self.tag_to_string(a)
+            url = a.get('href', False)
+            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            if url not in included_urls and url.rfind('Redirect') == -1:
+                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    # parse from life.mingpao.com
+    def parse_section2(self, url, keystr):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
+                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                current_articles.append({'title': title, 'url': url, 'description': ''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    # parse from www.mingpaovan.com
+    def parse_section3(self, url, baseUrl):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
+        current_articles = []
+        included_urls = []
+        divs.reverse()
+        for i in divs:
+            title = self.tag_to_string(i)
+            urlstr = i.get('href', False)
+            urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
+            if urlstr not in included_urls:
+                current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
+                included_urls.append(urlstr)
+        current_articles.reverse()
+        return current_articles
+
+    def parse_ed_section(self, url):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
+                current_articles.append({'title': title, 'url': url, 'description': ''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    def parse_fin_section(self, url):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href= True)
+        current_articles = []
+        included_urls = []
+        for i in a:
+            #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
+            if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
+                title = self.tag_to_string(i)
+                current_articles.append({'title': title, 'url': url, 'description':''})
+                included_urls.append(url)
+        return current_articles
+
+    def parse_ent_section(self, url):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
+                current_articles.append({'title': title, 'url': url, 'description': ''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    def parse_col_section(self, url):
+        self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href=True)
+        a.reverse()
+        current_articles = []
+        included_urls = []
+        for i in a:
+            title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
+                current_articles.append({'title': title, 'url': url, 'description': ''})
+                included_urls.append(url)
+        current_articles.reverse()
+        return current_articles
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(style=True):
+            del item['width']
+        for item in soup.findAll(stype=True):
+            del item['absmiddle']
+        return soup
+
+    def create_opf(self, feeds, dir=None):
+        if dir is None:
+            dir = self.output_dir
+        if __UseChineseTitle__ == True:
+            if __Region__ == 'Hong Kong':
+                title = u'\u660e\u5831 (\u9999\u6e2f)'
+            elif __Region__ == 'Vancouver':
+                title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
+            elif __Region__ == 'Toronto':
+                title = u'\u660e\u5831 (\u591a\u502b\u591a)'
+        else:
+            title = self.short_title()
+        # if not generating a periodical, force date to apply in title
+        if __MakePeriodical__ == False:
+            title = title + ' ' + self.get_fetchformatteddate()
+        if True:
+            mi = MetaInformation(title, [self.publisher])
+            mi.publisher = self.publisher
+            mi.author_sort = self.publisher
+            if __MakePeriodical__ == True:
+                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+            else:
+                mi.publication_type = self.publication_type+':'+self.short_title()
+            #mi.timestamp = nowf()
+            mi.timestamp = self.get_dtlocal()
+            mi.comments = self.description
+            if not isinstance(mi.comments, unicode):
+                mi.comments = mi.comments.decode('utf-8', 'replace')
+            #mi.pubdate = nowf()
+            mi.pubdate = self.get_dtlocal()
+            opf_path = os.path.join(dir, 'index.opf')
+            ncx_path = os.path.join(dir, 'index.ncx')
+            opf = OPFCreator(dir, mi)
+            # Add mastheadImage entry to <guide> section
+            mp = getattr(self, 'masthead_path', None)
+            if mp is not None and os.access(mp, os.R_OK):
+                from calibre.ebooks.metadata.opf2 import Guide
+                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+                ref.type = 'masthead'
+                ref.title = 'Masthead Image'
+                opf.guide.append(ref)
+
+            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+            manifest.append(os.path.join(dir, 'index.html'))
+            manifest.append(os.path.join(dir, 'index.ncx'))
+
+            # Get cover
+            cpath = getattr(self, 'cover_path', None)
+            if cpath is None:
+                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+                if self.default_cover(pf):
+                    cpath =  pf.name
+            if cpath is not None and os.access(cpath, os.R_OK):
+                opf.cover = cpath
+                manifest.append(cpath)
+
+            # Get masthead
+            mpath = getattr(self, 'masthead_path', None)
+            if mpath is not None and os.access(mpath, os.R_OK):
+                manifest.append(mpath)
+
+            opf.create_manifest_from_files_in(manifest)
+            for mani in opf.manifest:
+                if mani.path.endswith('.ncx'):
+                    mani.id = 'ncx'
+                if mani.path.endswith('mastheadImage.jpg'):
+                    mani.id = 'masthead-image'
+            entries = ['index.html']
+            toc = TOC(base_path=dir)
+            self.play_order_counter = 0
+            self.play_order_map = {}
+
+        def feed_index(num, parent):
+            f = feeds[num]
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(num, j)
+                    auth = a.author
+                    if not auth:
+                        auth = None
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
+                    else:
+                        desc = self.description_limiter(desc)
+                    entries.append('%sindex.html'%adir)
+                    po = self.play_order_map.get(entries[-1], None)
+                    if po is None:
+                        self.play_order_counter += 1
+                        po = self.play_order_counter
+                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                                    play_order=po, author=auth, description=desc)
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        last = sp
+
+                    if os.path.exists(last):
+                        with open(last, 'rb') as fi:
+                            src = fi.read().decode('utf-8')
+                        soup = BeautifulSoup(src)
+                        body = soup.find('body')
+                        if body is not None:
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            templ = self.navbar.generate(True, num, j, len(f),
+                                            not self.has_single_feed,
+                                            a.orig_url, self.publisher, prefix=prefix,
+                                            center=self.center_navbar)
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            body.insert(len(body.contents), elem)
+                            with open(last, 'wb') as fi:
+                                fi.write(unicode(soup).encode('utf-8'))
+        if len(feeds) == 0:
+            raise Exception('All feeds are empty, aborting.')
+
+        if len(feeds) > 1:
+            for i, f in enumerate(feeds):
+                entries.append('feed_%d/index.html'%i)
+                po = self.play_order_map.get(entries[-1], None)
+                if po is None:
+                    self.play_order_counter += 1
+                    po = self.play_order_counter
+                auth = getattr(f, 'author', None)
+                if not auth:
+                    auth = None
+                desc = getattr(f, 'description', None)
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                           f.title, play_order=po, description=desc, author=auth))
+
+        else:
+            entries.append('feed_%d/index.html'%0)
+            feed_index(0, toc)
+
+        for i, p in enumerate(entries):
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+        opf.create_spine(entries)
+        opf.set_toc(toc)
+
+        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+            opf.render(opf_file, ncx_file)
+
--- a/recipes/wprost.recipe
+++ b/recipes/wprost.recipe
@ -2,6 +2,7 @@

 __license__   = 'GPL v3'
 __copyright__ = '2010, matek09, matek09@gmail.com'
+__copyright__ = 'Modified 2011,  Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'

 from calibre.web.feeds.news import BasicNewsRecipe
 import re
@ -30,15 +31,17 @@ class Wprost(BasicNewsRecipe):
        keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''

        preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
-						(re.compile(r'display: block;'), lambda match: '')]
-
+        (re.compile(r'display: block;'), lambda match: ''),
+        (re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
+        (re.compile(r'\<table .*?\>'), lambda match: ''),
+        (re.compile(r'\<tr>'), lambda match: ''),
+        (re.compile(r'\<td .*?\>'), lambda match: '')]

        remove_tags =[]
        remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
        remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
        remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))

-
        extra_css = '''
                                        .div-header {font-size: x-small; font-weight: bold}
                                        '''
@ -88,4 +91,3 @@ class Wprost(BasicNewsRecipe):
                                'description' : ''
                                }

-
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1148,7 +1148,7 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
 class StoreAmazonKindleStore(StoreBase):
    name = 'Amazon Kindle'
    description = u'Kindle books from Amazon.'
-    actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
+    actual_plugin = 'calibre.gui2.store.stores.amazon_plugin:AmazonKindleStore'

    headquarters = 'US'
    formats = ['KINDLE']
@ -1158,7 +1158,7 @@ class StoreAmazonDEKindleStore(StoreBase):
    name = 'Amazon DE Kindle'
    author = 'Charles Haley'
    description = u'Kindle Bücher von Amazon.'
-    actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
+    actual_plugin = 'calibre.gui2.store.stores.amazon_de_plugin:AmazonDEKindleStore'

    headquarters = 'DE'
    formats = ['KINDLE']
@ -1168,7 +1168,7 @@ class StoreAmazonUKKindleStore(StoreBase):
    name = 'Amazon UK Kindle'
    author = 'Charles Haley'
    description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.'
-    actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
+    actual_plugin = 'calibre.gui2.store.stores.amazon_uk_plugin:AmazonUKKindleStore'

    headquarters = 'UK'
    formats = ['KINDLE']
@ -1177,7 +1177,7 @@ class StoreAmazonUKKindleStore(StoreBase):
 class StoreArchiveOrgStore(StoreBase):
    name = 'Archive.org'
    description = u'An Internet library offering permanent access for researchers, historians, scholars, people with disabilities, and the general public to historical collections that exist in digital format.'
-    actual_plugin = 'calibre.gui2.store.archive_org_plugin:ArchiveOrgStore'
+    actual_plugin = 'calibre.gui2.store.stores.archive_org_plugin:ArchiveOrgStore'

    drm_free_only = True
    headquarters = 'US'
@ -1186,7 +1186,7 @@ class StoreArchiveOrgStore(StoreBase):
 class StoreBaenWebScriptionStore(StoreBase):
    name = 'Baen WebScription'
    description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
-    actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore'
+    actual_plugin = 'calibre.gui2.store.stores.baen_webscription_plugin:BaenWebScriptionStore'

    drm_free_only = True
    headquarters = 'US'
@ -1195,7 +1195,7 @@ class StoreBaenWebScriptionStore(StoreBase):
 class StoreBNStore(StoreBase):
    name = 'Barnes and Noble'
    description = u'The world\'s largest book seller. As the ultimate destination for book lovers, Barnes & Noble.com offers an incredible array of content.'
-    actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
+    actual_plugin = 'calibre.gui2.store.stores.bn_plugin:BNStore'

    headquarters = 'US'
    formats = ['NOOK']
@ -1205,7 +1205,7 @@ class StoreBeamEBooksDEStore(StoreBase):
    name = 'Beam EBooks DE'
    author = 'Charles Haley'
    description = u'Bei uns finden Sie: Tausende deutschsprachige eBooks; Alle eBooks ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; eBooks für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis eBooks'
-    actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
+    actual_plugin = 'calibre.gui2.store.stores.beam_ebooks_de_plugin:BeamEBooksDEStore'

    drm_free_only = True
    headquarters = 'DE'
@ -1215,7 +1215,7 @@ class StoreBeamEBooksDEStore(StoreBase):
 class StoreBeWriteStore(StoreBase):
    name = 'BeWrite Books'
    description = u'Publishers of fine books. Highly selective and editorially driven. Does not offer: books for children or exclusively YA, erotica, swords-and-sorcery fantasy and space-opera-style science fiction. All other genres are represented.'
-    actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore'
+    actual_plugin = 'calibre.gui2.store.stores.bewrite_plugin:BeWriteStore'

    drm_free_only = True
    headquarters = 'US'
@ -1224,7 +1224,7 @@ class StoreBeWriteStore(StoreBase):
 class StoreDieselEbooksStore(StoreBase):
    name = 'Diesel eBooks'
    description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
-    actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore'
+    actual_plugin = 'calibre.gui2.store.stores.diesel_ebooks_plugin:DieselEbooksStore'

    headquarters = 'US'
    formats = ['EPUB', 'PDF']
@ -1233,7 +1233,7 @@ class StoreDieselEbooksStore(StoreBase):
 class StoreEbookscomStore(StoreBase):
    name = 'eBooks.com'
    description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
-    actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
+    actual_plugin = 'calibre.gui2.store.stores.ebooks_com_plugin:EbookscomStore'

    headquarters = 'US'
    formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
@ -1243,7 +1243,7 @@ class StoreEPubBuyDEStore(StoreBase):
    name = 'EPUBBuy DE'
    author = 'Charles Haley'
    description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
-    actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
+    actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore'

    drm_free_only = True
    headquarters = 'DE'
@ -1254,7 +1254,7 @@ class StoreEBookShoppeUKStore(StoreBase):
    name = 'ebookShoppe UK'
    author = u'Charles Haley'
    description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.'
-    actual_plugin = 'calibre.gui2.store.ebookshoppe_uk_plugin:EBookShoppeUKStore'
+    actual_plugin = 'calibre.gui2.store.stores.ebookshoppe_uk_plugin:EBookShoppeUKStore'

    headquarters = 'UK'
    formats = ['EPUB', 'PDF']
@ -1263,7 +1263,7 @@ class StoreEBookShoppeUKStore(StoreBase):
 class StoreEHarlequinStore(StoreBase):
    name = 'eHarlequin'
    description = u'A global leader in series romance and one of the world\'s leading publishers of books for women. Offers women a broad range of reading from romance to bestseller fiction, from young adult novels to erotic literature, from nonfiction to fantasy, from African-American novels to inspirational romance, and more.'
-    actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
+    actual_plugin = 'calibre.gui2.store.stores.eharlequin_plugin:EHarlequinStore'

    headquarters = 'CA'
    formats = ['EPUB', 'PDF']
@ -1272,7 +1272,7 @@ class StoreEHarlequinStore(StoreBase):
 class StoreEpubBudStore(StoreBase):
    name = 'ePub Bud'
    description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.'
-    actual_plugin = 'calibre.gui2.store.epubbud_plugin:EpubBudStore'
+    actual_plugin = 'calibre.gui2.store.stores.epubbud_plugin:EpubBudStore'

    drm_free_only = True
    headquarters = 'US'
@ -1281,7 +1281,7 @@ class StoreEpubBudStore(StoreBase):
 class StoreFeedbooksStore(StoreBase):
    name = 'Feedbooks'
    description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
-    actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
+    actual_plugin = 'calibre.gui2.store.stores.feedbooks_plugin:FeedbooksStore'

    headquarters = 'FR'
    formats = ['EPUB', 'MOBI', 'PDF']
@ -1290,7 +1290,7 @@ class StoreFoylesUKStore(StoreBase):
    name = 'Foyles UK'
    author = 'Charles Haley'
    description = u'Foyles of London\'s ebook store. Provides extensive range covering all subjects.'
-    actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
+    actual_plugin = 'calibre.gui2.store.stores.foyles_uk_plugin:FoylesUKStore'

    headquarters = 'UK'
    formats = ['EPUB', 'PDF']
@ -1300,7 +1300,7 @@ class StoreGandalfStore(StoreBase):
    name = 'Gandalf'
    author = u'Tomasz Długosz'
    description = u'Księgarnia internetowa Gandalf.'
-    actual_plugin = 'calibre.gui2.store.gandalf_plugin:GandalfStore'
+    actual_plugin = 'calibre.gui2.store.stores.gandalf_plugin:GandalfStore'

    headquarters = 'PL'
    formats = ['EPUB', 'PDF']
@ -1308,7 +1308,7 @@ class StoreGandalfStore(StoreBase):
 class StoreGoogleBooksStore(StoreBase):
    name = 'Google Books'
    description = u'Google Books'
-    actual_plugin = 'calibre.gui2.store.google_books_plugin:GoogleBooksStore'
+    actual_plugin = 'calibre.gui2.store.stores.google_books_plugin:GoogleBooksStore'

    headquarters = 'US'
    formats = ['EPUB', 'PDF', 'TXT']
@ -1316,7 +1316,7 @@ class StoreGoogleBooksStore(StoreBase):
 class StoreGutenbergStore(StoreBase):
    name = 'Project Gutenberg'
    description = u'The first producer of free ebooks. Free in the United States because their copyright has expired. They may not be free of copyright in other countries. Readers outside of the United States must check the copyright laws of their countries before downloading or redistributing our ebooks.'
-    actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
+    actual_plugin = 'calibre.gui2.store.stores.gutenberg_plugin:GutenbergStore'

    drm_free_only = True
    headquarters = 'US'
@ -1325,7 +1325,7 @@ class StoreGutenbergStore(StoreBase):
 class StoreKoboStore(StoreBase):
    name = 'Kobo'
    description = u'With over 2.3 million eBooks to browse we have engaged readers in over 200 countries in Kobo eReading. Our eBook listings include New York Times Bestsellers, award winners, classics and more!'
-    actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
+    actual_plugin = 'calibre.gui2.store.stores.kobo_plugin:KoboStore'

    headquarters = 'CA'
    formats = ['EPUB']
@ -1335,7 +1335,7 @@ class StoreLegimiStore(StoreBase):
    name = 'Legimi'
    author = u'Tomasz Długosz'
    description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer'
-    actual_plugin = 'calibre.gui2.store.legimi_plugin:LegimiStore'
+    actual_plugin = 'calibre.gui2.store.stores.legimi_plugin:LegimiStore'

    headquarters = 'PL'
    formats = ['EPUB']
@ -1344,7 +1344,7 @@ class StoreLibreDEStore(StoreBase):
    name = 'Libri DE'
    author = 'Charles Haley'
    description = u'Sicher Bücher, Hörbücher und Downloads online bestellen.'
-    actual_plugin = 'calibre.gui2.store.libri_de_plugin:LibreDEStore'
+    actual_plugin = 'calibre.gui2.store.stores.libri_de_plugin:LibreDEStore'

    headquarters = 'DE'
    formats = ['EPUB', 'PDF']
@ -1353,7 +1353,7 @@ class StoreLibreDEStore(StoreBase):
 class StoreManyBooksStore(StoreBase):
    name = 'ManyBooks'
    description = u'Public domain and creative commons works from many sources.'
-    actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
+    actual_plugin = 'calibre.gui2.store.stores.manybooks_plugin:ManyBooksStore'

    drm_free_only = True
    headquarters = 'US'
@ -1362,7 +1362,7 @@ class StoreManyBooksStore(StoreBase):
 class StoreMobileReadStore(StoreBase):
    name = 'MobileRead'
    description = u'Ebooks handcrafted with the utmost care.'
-    actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
+    actual_plugin = 'calibre.gui2.store.stores.mobileread.mobileread_plugin:MobileReadStore'

    drm_free_only = True
    headquarters = 'CH'
@ -1372,7 +1372,7 @@ class StoreNextoStore(StoreBase):
    name = 'Nexto'
    author = u'Tomasz Długosz'
    description = u'Największy w Polsce sklep internetowy z audiobookami mp3, ebookami pdf oraz prasą do pobrania on-line.'
-    actual_plugin = 'calibre.gui2.store.nexto_plugin:NextoStore'
+    actual_plugin = 'calibre.gui2.store.stores.nexto_plugin:NextoStore'

    headquarters = 'PL'
    formats = ['EPUB', 'PDF']
@ -1381,7 +1381,7 @@ class StoreNextoStore(StoreBase):
 class StoreOpenBooksStore(StoreBase):
    name = 'Open Books'
    description = u'Comprehensive listing of DRM free ebooks from a variety of sources provided by users of calibre.'
-    actual_plugin = 'calibre.gui2.store.open_books_plugin:OpenBooksStore'
+    actual_plugin = 'calibre.gui2.store.stores.open_books_plugin:OpenBooksStore'
    
    drm_free_only = True
    headquarters = 'US'
@ -1389,7 +1389,7 @@ class StoreOpenBooksStore(StoreBase):
 class StoreOpenLibraryStore(StoreBase):
    name = 'Open Library'
    description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.'
-    actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
+    actual_plugin = 'calibre.gui2.store.stores.open_library_plugin:OpenLibraryStore'

    drm_free_only = True
    headquarters = 'US'
@ -1398,7 +1398,7 @@ class StoreOpenLibraryStore(StoreBase):
 class StoreOReillyStore(StoreBase):
    name = 'OReilly'
    description = u'Programming and tech ebooks from OReilly.'
-    actual_plugin = 'calibre.gui2.store.oreilly_plugin:OReillyStore'
+    actual_plugin = 'calibre.gui2.store.stores.oreilly_plugin:OReillyStore'

    drm_free_only = True
    headquarters = 'US'
@ -1407,7 +1407,7 @@ class StoreOReillyStore(StoreBase):
 class StorePragmaticBookshelfStore(StoreBase):
    name = 'Pragmatic Bookshelf'
    description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
-    actual_plugin = 'calibre.gui2.store.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
+    actual_plugin = 'calibre.gui2.store.stores.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'

    drm_free_only = True
    headquarters = 'US'
@ -1416,7 +1416,7 @@ class StorePragmaticBookshelfStore(StoreBase):
 class StoreSmashwordsStore(StoreBase):
    name = 'Smashwords'
    description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
-    actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
+    actual_plugin = 'calibre.gui2.store.stores.smashwords_plugin:SmashwordsStore'

    drm_free_only = True
    headquarters = 'US'
@ -1427,7 +1427,7 @@ class StoreVirtualoStore(StoreBase):
    name = 'Virtualo'
    author = u'Tomasz Długosz'
    description = u'Księgarnia internetowa, która oferuje bezpieczny i szeroki dostęp do książek w formie cyfrowej.'
-    actual_plugin = 'calibre.gui2.store.virtualo_plugin:VirtualoStore'
+    actual_plugin = 'calibre.gui2.store.stores.virtualo_plugin:VirtualoStore'

    headquarters = 'PL'
    formats = ['EPUB', 'PDF']
@ -1436,7 +1436,7 @@ class StoreWaterstonesUKStore(StoreBase):
    name = 'Waterstones UK'
    author = 'Charles Haley'
    description = u'Waterstone\'s mission is to be the leading Bookseller on the High Street and online providing customers the widest choice, great value and expert advice from a team passionate about Bookselling.'
-    actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
+    actual_plugin = 'calibre.gui2.store.stores.waterstones_uk_plugin:WaterstonesUKStore'

    headquarters = 'UK'
    formats = ['EPUB', 'PDF']
@ -1444,7 +1444,7 @@ class StoreWaterstonesUKStore(StoreBase):
 class StoreWeightlessBooksStore(StoreBase):
    name = 'Weightless Books'
    description = u'An independent DRM-free ebooksite devoted to ebooks of all sorts.'
-    actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
+    actual_plugin = 'calibre.gui2.store.stores.weightless_books_plugin:WeightlessBooksStore'

    drm_free_only = True
    headquarters = 'US'
@ -1454,7 +1454,7 @@ class StoreWHSmithUKStore(StoreBase):
    name = 'WH Smith UK'
    author = 'Charles Haley'
    description = u"Shop for savings on Books, discounted Magazine subscriptions and great prices on Stationery, Toys & Games"
-    actual_plugin = 'calibre.gui2.store.whsmith_uk_plugin:WHSmithUKStore'
+    actual_plugin = 'calibre.gui2.store.stores.whsmith_uk_plugin:WHSmithUKStore'

    headquarters = 'UK'
    formats = ['EPUB', 'PDF']
@ -1462,7 +1462,7 @@ class StoreWHSmithUKStore(StoreBase):
 class StoreWizardsTowerBooksStore(StoreBase):
    name = 'Wizards Tower Books'
    description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
-    actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
+    actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'

    drm_free_only = True
    headquarters = 'UK'
@ -1472,7 +1472,7 @@ class StoreWoblinkStore(StoreBase):
    name = 'Woblink'
    author = u'Tomasz Długosz'
    description = u'Czytanie zdarza się wszędzie!'
-    actual_plugin = 'calibre.gui2.store.woblink_plugin:WoblinkStore'
+    actual_plugin = 'calibre.gui2.store.stores.woblink_plugin:WoblinkStore'

    headquarters = 'PL'
    formats = ['EPUB']
@ -1481,7 +1481,7 @@ class StoreZixoStore(StoreBase):
    name = 'Zixo'
    author = u'Tomasz Długosz'
    description = u'Księgarnia z ebookami oraz książkami audio. Aby otwierać książki w formacie Zixo należy zainstalować program dostępny na stronie księgarni. Umożliwia on m.in. dodawanie zakładek i dostosowywanie rozmiaru czcionki.'
-    actual_plugin = 'calibre.gui2.store.zixo_plugin:ZixoStore'
+    actual_plugin = 'calibre.gui2.store.stores.zixo_plugin:ZixoStore'

    headquarters = 'PL'
    formats = ['PDF, ZIXO']
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -5,7 +5,7 @@ __copyright__ = '2010, Gregory Riker'
 __docformat__ = 'restructuredtext en'


-import cStringIO, ctypes, datetime, os, re, sys, tempfile, time
+import cStringIO, ctypes, datetime, os, re, shutil, sys, tempfile, time
 from calibre.constants import __appname__, __version__, DEBUG
 from calibre import fit_image, confirm_config_name
 from calibre.constants import isosx, iswindows
@ -119,11 +119,17 @@ class DriverBase(DeviceConfig, DevicePlugin):
                    'iBooks Category'),
            _('Cache covers from iTunes/iBooks') +
                ':::' +
-                _('Enable to cache and display covers from iTunes/iBooks')
+                _('Enable to cache and display covers from iTunes/iBooks'),
+            _("'Copy files to iTunes Media folder" u"\u2026" "' is enabled in iTunes Preferences|Advanced") +
+                ':::' +
+                _("<p>This setting should match your iTunes <i>Preferences</i>|<i>Advanced</i> setting.</p>"
+                  "<p>Disabling will store copies of books transferred to iTunes in your calibre configuration directory.</p>"
+                  "<p>Enabling indicates that iTunes is configured to store copies in your iTunes Media folder.</p>")
    ]
    EXTRA_CUSTOMIZATION_DEFAULT = [
                True,
                True,
+                False,
    ]


@ -193,6 +199,7 @@ class ITUNES(DriverBase):
    # EXTRA_CUSTOMIZATION_MESSAGE indexes
    USE_SERIES_AS_CATEGORY = 0
    CACHE_COVERS = 1
+    USE_ITUNES_STORAGE = 2

    OPEN_FEEDBACK_MESSAGE = _(
        'Apple device detected, launching iTunes, please wait ...')
@ -281,6 +288,7 @@ class ITUNES(DriverBase):
    description_prefix = "added by calibre"
    ejected = False
    iTunes= None
+    iTunes_local_storage = None
    library_orphans = None
    log = Log()
    manual_sync_mode = False
@ -825,7 +833,7 @@ class ITUNES(DriverBase):
        # Confirm/create thumbs archive
        if not os.path.exists(self.cache_dir):
            if DEBUG:
-                self.log.info(" creating thumb cache '%s'" % self.cache_dir)
+                self.log.info(" creating thumb cache at '%s'" % self.cache_dir)
            os.makedirs(self.cache_dir)

        if not os.path.exists(self.archive_path):
@ -837,6 +845,17 @@ class ITUNES(DriverBase):
            if DEBUG:
                self.log.info(" existing thumb cache at '%s'" % self.archive_path)

+        # If enabled in config options, create/confirm an iTunes storage folder
+        if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
+            self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
+            if not os.path.exists(self.iTunes_local_storage):
+                if DEBUG:
+                    self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
+                os.mkdir(self.iTunes_local_storage)
+            else:
+                if DEBUG:
+                    self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
+
    def remove_books_from_metadata(self, paths, booklists):
        '''
        Remove books from the metadata list. This function must not communicate
@ -1281,50 +1300,27 @@ class ITUNES(DriverBase):
        if DEBUG:
            self.log.info(" ITUNES._add_new_copy()")

-        def _save_last_known_iTunes_storage(lb_added):
-            if isosx:
-                fp = lb_added.location().path
-                index = fp.rfind('/Books') + len('/Books')
-                last_known_iTunes_storage = fp[:index]
-            elif iswindows:
-                fp = lb_added.Location
-                index = fp.rfind('\Books') + len('\Books')
-                last_known_iTunes_storage = fp[:index]
-            dynamic['last_known_iTunes_storage'] = last_known_iTunes_storage
-            self.log.warning("  last_known_iTunes_storage: %s" % last_known_iTunes_storage)
-
        db_added = None
        lb_added = None

+        # If using iTunes_local_storage, copy the file, redirect iTunes to use local copy
+        if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
+            local_copy = os.path.join(self.iTunes_local_storage, str(metadata.uuid) + os.path.splitext(fpath)[1])
+            shutil.copyfile(fpath,local_copy)
+            fpath = local_copy
+
        if self.manual_sync_mode:
            '''
-            This is the unsupported direct-connect mode.
-            In an attempt to avoid resetting the iTunes library Media folder, don't try to
-            add the book to iTunes if the last_known_iTunes_storage path is inaccessible.
-            This means that the path has to be set at least once, probably by using
-            'Connect to iTunes' and doing a transfer.
+            Unsupported direct-connect mode.
            '''
            self.log.warning("  unsupported direct connect mode")
            db_added = self._add_device_book(fpath, metadata)
-            last_known_iTunes_storage = dynamic.get('last_known_iTunes_storage', None)
-            if last_known_iTunes_storage is not None:
-                if os.path.exists(last_known_iTunes_storage):
-                    if DEBUG:
-                        self.log.warning("  iTunes storage online, adding to library")
            lb_added = self._add_library_book(fpath, metadata)
-                else:
-                    if DEBUG:
-                        self.log.warning("  iTunes storage not online, can't add to library")
-
-            if lb_added:
-                _save_last_known_iTunes_storage(lb_added)
            if not lb_added and DEBUG:
                self.log.warn("  failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title)
        else:
            lb_added = self._add_library_book(fpath, metadata)
-            if lb_added:
-                _save_last_known_iTunes_storage(lb_added)
-            else:
+            if not lb_added:
                raise UserFeedback("iTunes Media folder inaccessible",
                                   details="Failed to add '%s' to iTunes" % metadata.title,
                                   level=UserFeedback.WARN)
@ -1520,7 +1516,7 @@ class ITUNES(DriverBase):
            else:
                self.log.error("   book_playlist not found")

-            if len(dev_books):
+            if dev_books is not None and len(dev_books):
                first_book = dev_books[0]
                if False:
                    self.log.info("  determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist()))
@ -1551,7 +1547,7 @@ class ITUNES(DriverBase):
                    dev_books = pl.Tracks
                    break

-            if dev_books.Count:
+            if dev_books is not None and dev_books.Count:
                first_book = dev_books.Item(1)
                #if DEBUG:
                    #self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist))
@ -2526,7 +2522,15 @@ class ITUNES(DriverBase):
                    self.log.info("  processing %s" % fp)
                if fp.startswith(prefs['library_path']):
                    self.log.info("  '%s' stored in calibre database, not removed" % cached_book['title'])
+                elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
+                  fp.startswith(self.iTunes_local_storage) and \
+                  os.path.exists(fp):
+                    # Delete the copy in iTunes_local_storage
+                    os.remove(fp)
+                    if DEBUG:
+                        self.log("   removing from iTunes_local_storage")
                else:
+                    # Delete from iTunes Media folder
                    if os.path.exists(fp):
                        os.remove(fp)
                        if DEBUG:
@ -2544,12 +2548,6 @@ class ITUNES(DriverBase):
                                os.rmdir(author_storage_path)
                                if DEBUG:
                                    self.log.info("   removing empty author directory")
-                            '''
-                            else:
-                                if DEBUG:
-                                    self.log.info("   author_storage_path not empty:")
-                                    self.log.info("   %s" % '\n'.join(author_files))
-                            '''
                    else:
                        self.log.info("   '%s' does not exist at storage location" % cached_book['title'])

@ -2586,7 +2584,15 @@ class ITUNES(DriverBase):
                    self.log.info("  processing %s" % fp)
                if fp.startswith(prefs['library_path']):
                    self.log.info("  '%s' stored in calibre database, not removed" % cached_book['title'])
+                elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
+                  fp.startswith(self.iTunes_local_storage) and \
+                  os.path.exists(fp):
+                    # Delete the copy in iTunes_local_storage
+                    os.remove(fp)
+                    if DEBUG:
+                        self.log("   removing from iTunes_local_storage")
                else:
+                    # Delete from iTunes Media folder
                    if os.path.exists(fp):
                        os.remove(fp)
                        if DEBUG:
@ -3234,6 +3240,17 @@ class ITUNES_ASYNC(ITUNES):
            if DEBUG:
                self.log.info(" existing thumb cache at '%s'" % self.archive_path)

+        # If enabled in config options, create/confirm an iTunes storage folder
+        if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
+            self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
+            if not os.path.exists(self.iTunes_local_storage):
+                if DEBUG:
+                    self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
+                os.mkdir(self.iTunes_local_storage)
+            else:
+                if DEBUG:
+                    self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
+
    def sync_booklists(self, booklists, end_session=True):
        '''
        Update metadata on device.
--- a/src/calibre/devices/iriver/driver.py
+++ b/src/calibre/devices/iriver/driver.py
@ -20,11 +20,11 @@ class IRIVER_STORY(USBMS):
    FORMATS     = ['epub', 'fb2', 'pdf', 'djvu', 'txt']

    VENDOR_ID   = [0x1006]
-    PRODUCT_ID  = [0x4023, 0x4024, 0x4025]
-    BCD         = [0x0323]
+    PRODUCT_ID  = [0x4023, 0x4024, 0x4025, 0x4034]
+    BCD         = [0x0323, 0x0326]

    VENDOR_NAME = 'IRIVER'
-    WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI']
+    WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI', 'STORY_EB07']
    WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD']

    #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -7,12 +7,13 @@ from urllib import unquote
 from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt,
                    QByteArray, QTranslator, QCoreApplication, QThread,
                    QEvent, QTimer, pyqtSignal, QDate, QDesktopServices,
-                    QFileDialog, QFileIconProvider,
+                    QFileDialog, QFileIconProvider, QSettings,
                    QIcon, QApplication, QDialog, QUrl, QFont)

 ORG_NAME = 'KovidsBrain'
 APP_UID  = 'libprs500'
-from calibre.constants import islinux, iswindows, isbsd, isfrozen, isosx
+from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx,
+        config_dir)
 from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
 from calibre.utils.localization import set_qt_translator
 from calibre.ebooks.metadata import MetaInformation
@ -192,6 +193,11 @@ def _config(): # {{{
 config = _config()
 # }}}

+QSettings.setPath(QSettings.IniFormat, QSettings.UserScope, config_dir)
+QSettings.setPath(QSettings.IniFormat, QSettings.SystemScope,
+        config_dir)
+QSettings.setDefaultFormat(QSettings.IniFormat)
+
 # Turn off DeprecationWarnings in windows GUI
 if iswindows:
    import warnings
--- a/src/calibre/gui2/store/archive_org_plugin.py
+++ b/src/calibre/gui2/store/archive_org_plugin.py
@ -1,89 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import (unicode_literals, division, absolute_import, print_function)
-
-__license__ = 'GPL 3'
-__copyright__ = '2011, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
-import urllib
-from contextlib import closing
-
-from lxml import html
-
-from PyQt4.Qt import QUrl
-
-from calibre import browser, url_slash_cleaner
-from calibre.gui2 import open_url
-from calibre.gui2.store import StorePlugin
-from calibre.gui2.store.basic_config import BasicStoreConfig
-from calibre.gui2.store.search_result import SearchResult
-from calibre.gui2.store.web_store_dialog import WebStoreDialog
-
-class ArchiveOrgStore(BasicStoreConfig, StorePlugin):
-
-    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://www.archive.org/details/texts'
-        
-        if detail_item:
-            detail_item = url_slash_cleaner('http://www.archive.org' + detail_item)
-
-        if external or self.config.get('open_external', False):
-            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
-        else:
-            d = WebStoreDialog(self.gui, url, parent, detail_item)
-            d.setWindowTitle(self.name)
-            d.set_tags(self.config.get('tags', ''))
-            d.exec_()
-
-    def search(self, query, max_results=10, timeout=60):
-        query = query + ' AND mediatype:texts'
-        url = 'http://www.archive.org/search.php?query=' + urllib.quote(query)
-        
-        br = browser()
-        
-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
-            for data in doc.xpath('//td[@class="hitCell"]'):
-                if counter <= 0:
-                    break
-
-                id = ''.join(data.xpath('.//a[@class="titleLink"]/@href'))
-                if not id:
-                    continue
-
-                title = ''.join(data.xpath('.//a[@class="titleLink"]//text()'))
-                authors = data.xpath('.//text()')
-                if not authors:
-                    continue
-                author = None
-                for a in authors:
-                    if '-' in a:
-                        author = a.replace('-', ' ').strip()
-                        if author:
-                            break
-                if not author:
-                    continue
-
-                counter -= 1
-                
-                s = SearchResult()
-                s.title = title.strip()
-                s.author = author.strip()
-                s.price = '$0.00'
-                s.detail_item = id.strip()
-                s.drm = SearchResult.DRM_UNLOCKED
-                
-                yield s
-
-    def get_details(self, search_result, timeout):
-        url = url_slash_cleaner('http://www.archive.org' + search_result.detail_item)
-
-        br = browser()
-        with closing(br.open(url, timeout=timeout)) as nf:
-            idata = html.fromstring(nf.read())
-            formats = ', '.join(idata.xpath('//p[@id="dl" and @class="content"]//a/text()'))
-            search_result.formats = formats.upper()
-            
-        return True
--- a/src/calibre/gui2/store/epubbud_plugin.py
+++ b/src/calibre/gui2/store/epubbud_plugin.py
@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import (unicode_literals, division, absolute_import, print_function)
-
-__license__ = 'GPL 3'
-__copyright__ = '2011, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
-import urllib
-from contextlib import closing
-
-from lxml import html
-
-from PyQt4.Qt import QUrl
-
-from calibre import browser, url_slash_cleaner
-from calibre.gui2 import open_url
-from calibre.gui2.store import StorePlugin
-from calibre.gui2.store.basic_config import BasicStoreConfig
-from calibre.gui2.store.search_result import SearchResult
-from calibre.gui2.store.web_store_dialog import WebStoreDialog
-
-class EpubBudStore(BasicStoreConfig, StorePlugin):
-
-    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://epubbud.com/'
-
-        if external or self.config.get('open_external', False):
-            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
-        else:
-            d = WebStoreDialog(self.gui, url, parent, detail_item)
-            d.setWindowTitle(self.name)
-            d.set_tags(self.config.get('tags', ''))
-            d.exec_()
-
-    def search(self, query, max_results=10, timeout=60):
-        '''
-        OPDS based search.
-        
-        We really should get the catelog from http://pragprog.com/catalog.opds
-        and look for the application/opensearchdescription+xml entry.
-        Then get the opensearch description to get the search url and
-        format. However, we are going to be lazy and hard code it.
-        '''
-        url = 'http://www.epubbud.com/search.php?format=atom&q=' + urllib.quote_plus(query)
-        
-        br = browser()
-        
-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            # Use html instead of etree as html allows us
-            # to ignore the namespace easily.
-            doc = html.fromstring(f.read())
-            for data in doc.xpath('//entry'):
-                if counter <= 0:
-                    break
-
-                id = ''.join(data.xpath('.//id/text()'))
-                if not id:
-                    continue
-
-                cover_url = ''.join(data.xpath('.//link[@rel="http://opds-spec.org/thumbnail"]/@href'))
-                
-                title = u''.join(data.xpath('.//title/text()'))
-                author = u''.join(data.xpath('.//author/name/text()'))
-
-                counter -= 1
-                
-                s = SearchResult()
-                s.cover_url = cover_url
-                s.title = title.strip()
-                s.author = author.strip()
-                s.price = '$0.00'
-                s.detail_item = id.strip()
-                s.drm = SearchResult.DRM_UNLOCKED
-                s.formats = 'EPUB'
-                
-                yield s
--- a/src/calibre/gui2/store/feedbooks_plugin.py
+++ b/src/calibre/gui2/store/feedbooks_plugin.py
@ -1,106 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import (unicode_literals, division, absolute_import, print_function)
-
-__license__ = 'GPL 3'
-__copyright__ = '2011, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
-import urllib2
-from contextlib import closing
-
-from lxml import html
-
-from PyQt4.Qt import QUrl
-
-from calibre import browser, url_slash_cleaner
-from calibre.gui2 import open_url
-from calibre.gui2.store import StorePlugin
-from calibre.gui2.store.basic_config import BasicStoreConfig
-from calibre.gui2.store.search_result import SearchResult
-from calibre.gui2.store.web_store_dialog import WebStoreDialog
-
-class FeedbooksStore(BasicStoreConfig, StorePlugin):
-    
-    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://m.feedbooks.com/'
-        ext_url = 'http://feedbooks.com/'
-
-        if external or self.config.get('open_external', False):
-            if detail_item:
-                ext_url = ext_url + detail_item
-            open_url(QUrl(url_slash_cleaner(ext_url)))
-        else:
-            detail_url = None
-            if detail_item:
-                detail_url = url + detail_item
-            d = WebStoreDialog(self.gui, url, parent, detail_url)
-            d.setWindowTitle(self.name)
-            d.set_tags(self.config.get('tags', ''))
-            d.exec_()
-
-    def search(self, query, max_results=10, timeout=60):
-        url = 'http://m.feedbooks.com/search?query=' + urllib2.quote(query)
-        
-        br = browser()
-        
-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
-            for data in doc.xpath('//ul[@class="m-list"]//li'):
-                if counter <= 0:
-                    break
-                data = html.fromstring(html.tostring(data))
-                
-                id = ''
-                id_a = data.xpath('//a[@class="buy"]')
-                if id_a:
-                    id = id_a[0].get('href', None)
-                    id = id.split('/')[-2]
-                    id = '/item/' + id
-                else:
-                    id_a = data.xpath('//a[@class="download"]')
-                    if id_a:
-                        id = id_a[0].get('href', None)
-                        id = id.split('/')[-1]
-                        id = id.split('.')[0]
-                        id = '/book/' + id
-                if not id:
-                    continue
-                
-                title = ''.join(data.xpath('//h5//a/text()'))
-                author = ''.join(data.xpath('//h6//a/text()'))
-                price = ''.join(data.xpath('//a[@class="buy"]/text()'))
-                formats = 'EPUB'
-                if not price:
-                    price = '$0.00'
-                    formats = 'EPUB, MOBI, PDF'
-                cover_url = ''
-                cover_url_img =  data.xpath('//img')
-                if cover_url_img:
-                    cover_url = cover_url_img[0].get('src')
-                    cover_url.split('?')[0]
-                
-                counter -= 1
-                
-                s = SearchResult()
-                s.cover_url = cover_url
-                s.title = title.strip()
-                s.author = author.strip()
-                s.price = price.replace(' ', '').strip()
-                s.detail_item = id.strip()
-                s.formats = formats
-                
-                yield s
-
-    def get_details(self, search_result, timeout):
-        url = 'http://m.feedbooks.com/'
-        
-        br = browser()
-        with closing(br.open(url_slash_cleaner(url + search_result.detail_item), timeout=timeout)) as nf:
-            idata = html.fromstring(nf.read())
-            if idata.xpath('boolean(//div[contains(@class, "m-description-long")]//p[contains(., "DRM") or contains(b, "Protection")])'):
-                search_result.drm = SearchResult.DRM_LOCKED
-            else:
-                search_result.drm = SearchResult.DRM_UNLOCKED
-        return True
--- a/src/calibre/gui2/store/opensearch_store.py
+++ b/src/calibre/gui2/store/opensearch_store.py
@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import mimetypes
+import urllib
+from contextlib import closing
+
+from lxml import etree
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.search_result import SearchResult
+from calibre.gui2.store.web_store_dialog import WebStoreDialog
+from calibre.utils.opensearch.description import Description
+from calibre.utils.opensearch.query import Query
+
+class OpenSearchStore(StorePlugin):
+
+    open_search_url = ''
+    web_url = ''
+
+    def open(self, parent=None, detail_item=None, external=False):
+        if not hasattr(self, 'web_url'):
+            return
+        
+        if external or self.config.get('open_external', False):
+            open_url(QUrl(detail_item if detail_item else self.web_url))
+        else:
+            d = WebStoreDialog(self.gui, self.web_url, parent, detail_item)
+            d.setWindowTitle(self.name)
+            d.set_tags(self.config.get('tags', ''))
+            d.exec_()
+
+    def search(self, query, max_results=10, timeout=60):
+        if not hasattr(self, 'open_search_url'):
+            return
+
+        description = Description(self.open_search_url)
+        url_template = description.get_best_template()
+        if not url_template:
+            return
+        oquery = Query(url_template)
+
+        # set up initial values
+        oquery.searchTerms = urllib.quote_plus(query)
+        oquery.count = max_results
+        url = oquery.url()
+        
+        counter = max_results
+        br = browser()
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = etree.fromstring(f.read())
+            for data in doc.xpath('//*[local-name() = "entry"]'):
+                if counter <= 0:
+                    break
+            
+                counter -= 1
+    
+                s = SearchResult()
+                
+                s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()
+
+                for link in data.xpath('./*[local-name() = "link"]'):
+                    rel = link.get('rel')
+                    href = link.get('href')
+                    type = link.get('type')
+                    
+                    if rel and href and type:
+                        if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
+                            s.cover_url = href
+                        elif rel == u'http://opds-spec.org/acquisition/buy':
+                            s.detail_item = href
+                        elif rel == u'http://opds-spec.org/acquisition':
+                            if type:
+                                ext = mimetypes.guess_extension(type)
+                                if ext:
+                                    ext = ext[1:].upper().strip()
+                                    s.downloads[ext] = href
+                s.formats = ', '.join(s.downloads.keys()).strip()
+                
+                s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
+                s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip()
+                
+                price_e = data.xpath('.//*[local-name() = "price"][1]')
+                if price_e:
+                    price_e = price_e[0]
+                    currency_code = price_e.get('currencycode', '')
+                    price = ''.join(price_e.xpath('.//text()')).strip()
+                    s.price = currency_code + ' ' + price
+                    s.price = s.price.strip()
+                
+
+                yield s
--- a/src/calibre/gui2/store/pragmatic_bookshelf_plugin.py
+++ b/src/calibre/gui2/store/pragmatic_bookshelf_plugin.py
@ -1,84 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import (unicode_literals, division, absolute_import, print_function)
-
-__license__ = 'GPL 3'
-__copyright__ = '2011, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
-import urllib
-from contextlib import closing
-
-from lxml import html
-
-from PyQt4.Qt import QUrl
-
-from calibre import browser, url_slash_cleaner
-from calibre.gui2 import open_url
-from calibre.gui2.store import StorePlugin
-from calibre.gui2.store.basic_config import BasicStoreConfig
-from calibre.gui2.store.search_result import SearchResult
-from calibre.gui2.store.web_store_dialog import WebStoreDialog
-
-class PragmaticBookshelfStore(BasicStoreConfig, StorePlugin):
-
-    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://pragprog.com/'
-
-        if external or self.config.get('open_external', False):
-            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
-        else:
-            d = WebStoreDialog(self.gui, url, parent, detail_item)
-            d.setWindowTitle(self.name)
-            d.set_tags(self.config.get('tags', ''))
-            d.exec_()
-
-    def search(self, query, max_results=10, timeout=60):
-        '''
-        OPDS based search.
-        
-        We really should get the catelog from http://pragprog.com/catalog.opds
-        and look for the application/opensearchdescription+xml entry.
-        Then get the opensearch description to get the search url and
-        format. However, we are going to be lazy and hard code it.
-        '''
-        url = 'http://pragprog.com/catalog/search?q=' + urllib.quote_plus(query)
-        
-        br = browser()
-        
-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            # Use html instead of etree as html allows us
-            # to ignore the namespace easily.
-            doc = html.fromstring(f.read())
-            for data in doc.xpath('//entry'):
-                if counter <= 0:
-                    break
-
-                id = ''.join(data.xpath('.//link[@rel="http://opds-spec.org/acquisition/buy"]/@href'))
-                if not id:
-                    continue
-
-                price = ''.join(data.xpath('.//price/@currencycode')).strip()
-                price += ' '
-                price += ''.join(data.xpath('.//price/text()')).strip()
-                if not price.strip():
-                    continue
-
-                cover_url = ''.join(data.xpath('.//link[@rel="http://opds-spec.org/cover"]/@href'))
-                
-                title = ''.join(data.xpath('.//title/text()'))
-                author = ''.join(data.xpath('.//author//text()'))
-
-                counter -= 1
-                
-                s = SearchResult()
-                s.cover_url = cover_url
-                s.title = title.strip()
-                s.author = author.strip()
-                s.price = price.strip()
-                s.detail_item = id.strip()
-                s.drm = SearchResult.DRM_UNLOCKED
-                s.formats = 'EPUB, PDF, MOBI'
-                
-                yield s
--- a/src/calibre/gui2/store/search/adv_search_builder.py
+++ b/src/calibre/gui2/store/search/adv_search_builder.py
@ -45,6 +45,7 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
        self.author_box.setText('')
        self.price_box.setText('')
        self.format_box.setText('')
+        self.download_combo.setCurrentIndex(0)
        self.affiliate_combo.setCurrentIndex(0)

    def tokens(self, raw):
@ -119,6 +120,9 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
        format = unicode(self.format_box.text()).strip()
        if format:
            ans.append('format:"' + self.mc + format + '"')
+        download = unicode(self.download_combo.currentText()).strip()
+        if download:
+            ans.append('download:' + download)
        affiliate = unicode(self.affiliate_combo.currentText()).strip()
        if affiliate:
            ans.append('affiliate:' + affiliate)  
--- a/src/calibre/gui2/store/search/adv_search_builder.ui
+++ b/src/calibre/gui2/store/search/adv_search_builder.ui
@ -226,7 +226,7 @@
         </property>
        </widget>
       </item>
-       <item row="7" column="0" colspan="2">
+       <item row="8" column="0" colspan="2">
        <layout class="QHBoxLayout" name="horizontalLayout_6">
         <item>
          <widget class="QPushButton" name="clear_button">
@ -244,7 +244,7 @@
         </item>
        </layout>
       </item>
-       <item row="6" column="1">
+       <item row="7" column="1">
        <spacer name="verticalSpacer">
         <property name="orientation">
          <enum>Qt::Vertical</enum>
@ -283,14 +283,14 @@
       <item row="3" column="1">
        <widget class="EnLineEdit" name="price_box"/>
       </item>
-       <item row="5" column="0">
+       <item row="6" column="0">
        <widget class="QLabel" name="label_9">
         <property name="text">
          <string>Affiliate:</string>
         </property>
        </widget>
       </item>
-       <item row="5" column="1">
+       <item row="6" column="1">
        <widget class="QComboBox" name="affiliate_combo">
         <item>
          <property name="text">
@ -309,6 +309,32 @@
         </item>
        </widget>
       </item>
+       <item row="5" column="0">
+        <widget class="QLabel" name="label_12">
+         <property name="text">
+          <string>Download:</string>
+         </property>
+        </widget>
+       </item>
+       <item row="5" column="1">
+        <widget class="QComboBox" name="download_combo">
+         <item>
+          <property name="text">
+           <string/>
+          </property>
+         </item>
+         <item>
+          <property name="text">
+           <string>true</string>
+          </property>
+         </item>
+         <item>
+          <property name="text">
+           <string>false</string>
+          </property>
+         </item>
+        </widget>
+       </item>
      </layout>
     </widget>
    </widget>
--- a/src/calibre/gui2/store/search/models.py
+++ b/src/calibre/gui2/store/search/models.py
@ -33,7 +33,7 @@ class Matches(QAbstractItemModel):

    total_changed = pyqtSignal(int)

-    HEADERS = [_('Cover'), _('Title'), _('Price'), _('DRM'), _('Store'), '']
+    HEADERS = [_('Cover'), _('Title'), _('Price'), _('DRM'), _('Store'), _('Download'), _('Affiliate')]
    HTML_COLS = (1, 4)

    def __init__(self, cover_thread_count=2, detail_thread_count=4):
@ -47,6 +47,8 @@ class Matches(QAbstractItemModel):
                Qt.SmoothTransformation)
        self.DONATE_ICON = QPixmap(I('donate.png')).scaledToHeight(16,
                Qt.SmoothTransformation)
+        self.DOWNLOAD_ICON = QPixmap(I('arrow-down.png')).scaledToHeight(16,
+                Qt.SmoothTransformation)

        # All matches. Used to determine the order to display
        # self.matches because the SearchFilter returns
@ -181,9 +183,11 @@ class Matches(QAbstractItemModel):
                elif result.drm == SearchResult.DRM_UNKNOWN:
                    return QVariant(self.DRM_UNKNOWN_ICON)
            if col == 5:
+                if result.downloads:
+                    return QVariant(self.DOWNLOAD_ICON)
+            if col == 6:
                if result.affiliate:
                    return QVariant(self.DONATE_ICON)
-                return NONE
        elif role == Qt.ToolTipRole:
            if col == 1:
                return QVariant('<p>%s</p>' % result.title)
@ -199,6 +203,9 @@ class Matches(QAbstractItemModel):
            elif col == 4:
                return QVariant('<p>%s</p>' % result.formats)
            elif col == 5:
+                if result.downloads:
+                    return QVariant('<p>' + _('The following formats can be downloaded directly: %s.') % ', '.join(result.downloads.keys()) + '</p>')
+            elif col == 6:
                if result.affiliate:
                    return QVariant('<p>' + _('Buying from this store supports the calibre developer: %s.') % result.plugin_author + '</p>')
        elif role == Qt.SizeHintRole:
@ -221,6 +228,11 @@ class Matches(QAbstractItemModel):
        elif col == 4:
            text = result.store_name
        elif col == 5:
+            if result.downloads:
+                text = 'a'
+            else:
+                text = 'b'
+        elif col == 6:
            if result.affiliate:
                text = 'a'
            else:
@ -257,6 +269,8 @@ class SearchFilter(SearchQueryParser):
        'author',
        'authors',
        'cover',
+        'download',
+        'downloads',
        'drm',
        'format',
        'formats',
@ -282,6 +296,8 @@ class SearchFilter(SearchQueryParser):
        location = location.lower().strip()
        if location == 'authors':
            location = 'author'
+        elif location == 'downloads':
+            location = 'download'
        elif location == 'formats':
            location = 'format'

@ -308,12 +324,13 @@ class SearchFilter(SearchQueryParser):
             'author': lambda x: x.author.lower(),
             'cover': attrgetter('cover_url'),
             'drm': attrgetter('drm'),
+             'download': attrgetter('downloads'),
             'format': attrgetter('formats'),
             'price': lambda x: comparable_price(x.price),
             'store': lambda x: x.store_name.lower(),
             'title': lambda x: x.title.lower(),
        }
-        for x in ('author', 'format'):
+        for x in ('author', 'download', 'format'):
            q[x+'s'] = q[x]
        for sr in self.srs:
            for locvalue in locations:
@ -347,7 +364,7 @@ class SearchFilter(SearchQueryParser):
                            matches.add(sr)
                    continue
                # this is bool or treated as bool, so can't match below.
-                if locvalue in ('affiliate', 'drm'):
+                if locvalue in ('affiliate', 'drm', 'download', 'downloads'):
                    continue
                try:
                    ### Can't separate authors because comma is used for name sep and author sep
--- a/src/calibre/gui2/store/search/results_view.py
+++ b/src/calibre/gui2/store/search/results_view.py
@ -6,13 +6,18 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from PyQt4.Qt import (QTreeView)
+from functools import partial
+
+from PyQt4.Qt import (pyqtSignal, QMenu, QTreeView)

 from calibre.gui2.metadata.single_download import RichTextDelegate
 from calibre.gui2.store.search.models import Matches

 class ResultsView(QTreeView):

+    download_requested = pyqtSignal(object)
+    open_requested = pyqtSignal(object)
+
    def __init__(self, *args):
        QTreeView.__init__(self,*args)

@ -24,3 +29,18 @@ class ResultsView(QTreeView):
        for i in self._model.HTML_COLS:
            self.setItemDelegateForColumn(i, self.rt_delegate)

+    def contextMenuEvent(self, event):
+        index = self.indexAt(event.pos())
+        
+        if not index.isValid():
+            return
+        
+        result = self.model().get_result(index)
+        
+        menu = QMenu()
+        da = menu.addAction(_('Download...'), partial(self.download_requested.emit, result))
+        if not result.downloads:
+            da.setEnabled(False)
+        menu.addSeparator()
+        menu.addAction(_('Goto in store...'), partial(self.open_requested.emit, result))
+        menu.exec_(event.globalPos())
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -14,6 +14,7 @@ from PyQt4.Qt import (Qt, QDialog, QDialogButtonBox, QTimer, QCheckBox, QLabel,
                      QComboBox)

 from calibre.gui2 import JSONConfig, info_dialog
+from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.gui2.store.config.chooser.chooser_widget import StoreChooserWidget
 from calibre.gui2.store.config.search.search_widget import StoreConfigWidget
@ -72,7 +73,9 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.search.clicked.connect(self.do_search)
        self.checker.timeout.connect(self.get_results)
        self.progress_checker.timeout.connect(self.check_progress)
-        self.results_view.activated.connect(self.open_store)
+        self.results_view.activated.connect(self.result_item_activated)
+        self.results_view.download_requested.connect(self.download_book)
+        self.results_view.open_requested.connect(self.open_store)
        self.results_view.model().total_changed.connect(self.update_book_total)
        self.select_all_stores.clicked.connect(self.stores_select_all)
        self.select_invert_stores.clicked.connect(self.stores_select_invert)
@ -129,11 +132,15 @@ class SearchDialog(QDialog, Ui_Dialog):
        # Title / Author
        self.results_view.setColumnWidth(1,int(total*.40))
        # Price
-        self.results_view.setColumnWidth(2,int(total*.20))
+        self.results_view.setColumnWidth(2,int(total*.12))
        # DRM
        self.results_view.setColumnWidth(3, int(total*.15))
        # Store / Formats
        self.results_view.setColumnWidth(4, int(total*.25))
+        # Download
+        self.results_view.setColumnWidth(5, 20)
+        # Affiliate
+        self.results_view.setColumnWidth(6, 20)

    def do_search(self):
        # Stop all running threads.
@ -183,7 +190,7 @@ class SearchDialog(QDialog, Ui_Dialog):
            query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
            query = query.replace('%s:' % loc, '')
        # Remove the prefix and search text.
-        for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'):
+        for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
            query = re.sub(r'%s:"[^"]"' % loc, '', query)
            query = re.sub(r'%s:[^\s]*' % loc, '', query)
        # Remove logic.
@ -330,8 +337,21 @@ class SearchDialog(QDialog, Ui_Dialog):
    def update_book_total(self, total):
        self.total.setText('%s' % total)

-    def open_store(self, index):
+    def result_item_activated(self, index):
        result = self.results_view.model().get_result(index)
+        
+        if result.downloads:
+            self.download_book(result)
+        else:
+            self.open_store(result)
+
+    def download_book(self, result):
+        d = ChooseFormatDialog(self, _('Choose format to download to your library.'), result.downloads.keys())
+        if d.exec_() == d.Accepted:
+            ext = d.format()
+            self.gui.download_ebook(result.downloads[ext])
+    
+    def open_store(self, result):
        self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked())

    def check_progress(self):
--- a/src/calibre/gui2/store/search_result.py
+++ b/src/calibre/gui2/store/search_result.py
@ -22,6 +22,9 @@ class SearchResult(object):
        self.detail_item = ''
        self.drm = None
        self.formats = ''
+        # key = format in upper case.
+        # value = url to download the file.
+        self.downloads = {}
        self.affiliate = False
        self.plugin_author = ''

--- a/src/calibre/gui2/store/stores/init.py
+++ b/src/calibre/gui2/store/stores/init.py
@ -0,0 +1,3 @@
+'''
+All store plugins are placed here.
+'''
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
--- a/src/calibre/gui2/store/stores/amazon_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_plugin.py
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
--- a/src/calibre/gui2/store/stores/archive_org_plugin.py
+++ b/src/calibre/gui2/store/stores/archive_org_plugin.py
@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.opensearch_store import OpenSearchStore
+from calibre.gui2.store.search_result import SearchResult
+
+class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
+
+    open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml'
+    web_url = 'http://www.archive.org/details/texts'
+
+    # http://bookserver.archive.org/catalog/
+
+    def search(self, query, max_results=10, timeout=60):
+        for s in OpenSearchStore.search(self, query, max_results, timeout):
+            s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1]
+            s.price = '$0.00'
+            s.drm = SearchResult.DRM_UNLOCKED
+            yield s
+
+    def get_details(self, search_result, timeout):
+        '''
+        The opensearch feed only returns a subset of formats that are available.
+        We want to get a list of all formats that the user can get.
+        '''
+        br = browser()
+        with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
+            idata = html.fromstring(nf.read())
+            formats = ', '.join(idata.xpath('//p[@id="dl" and @class="content"]//a/text()'))
+            search_result.formats = formats.upper()
+
+        return True
--- a/src/calibre/gui2/store/stores/baen_webscription_plugin.py
+++ b/src/calibre/gui2/store/stores/baen_webscription_plugin.py
--- a/src/calibre/gui2/store/stores/beam_ebooks_de_plugin.py
+++ b/src/calibre/gui2/store/stores/beam_ebooks_de_plugin.py
--- a/src/calibre/gui2/store/stores/bewrite_plugin.py
+++ b/src/calibre/gui2/store/stores/bewrite_plugin.py
--- a/src/calibre/gui2/store/stores/bn_plugin.py
+++ b/src/calibre/gui2/store/stores/bn_plugin.py
--- a/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py
+++ b/src/calibre/gui2/store/stores/diesel_ebooks_plugin.py
--- a/src/calibre/gui2/store/stores/ebooks_com_plugin.py
+++ b/src/calibre/gui2/store/stores/ebooks_com_plugin.py
--- a/src/calibre/gui2/store/stores/ebookshoppe_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/ebookshoppe_uk_plugin.py
--- a/src/calibre/gui2/store/stores/eharlequin_plugin.py
+++ b/src/calibre/gui2/store/stores/eharlequin_plugin.py
--- a/src/calibre/gui2/store/stores/epubbud_plugin.py
+++ b/src/calibre/gui2/store/stores/epubbud_plugin.py
@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.opensearch_store import OpenSearchStore
+from calibre.gui2.store.search_result import SearchResult
+
+class EpubBudStore(BasicStoreConfig, OpenSearchStore):
+
+    open_search_url = 'http://www.epubbud.com/feeds/opensearch.xml'
+    web_url = 'http://www.epubbud.com/'
+    
+    # http://www.epubbud.com/feeds/catalog.atom
+
+    def search(self, query, max_results=10, timeout=60):
+        for s in OpenSearchStore.search(self, query, max_results, timeout):
+            s.price = '$0.00'
+            s.drm = SearchResult.DRM_UNLOCKED
+            s.formats = 'EPUB'
+            # Download links are broken for this store.
+            s.downloads = {}
+            yield s
--- a/src/calibre/gui2/store/stores/epubbuy_de_plugin.py
+++ b/src/calibre/gui2/store/stores/epubbuy_de_plugin.py
--- a/src/calibre/gui2/store/stores/feedbooks_plugin.py
+++ b/src/calibre/gui2/store/stores/feedbooks_plugin.py
@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.opensearch_store import OpenSearchStore
+from calibre.gui2.store.search_result import SearchResult
+
+class FeedbooksStore(BasicStoreConfig, OpenSearchStore):
+    
+    open_search_url = 'http://assets0.feedbooks.net/opensearch.xml?t=1253087147'
+    web_url = 'http://feedbooks.com/'
+    
+    # http://www.feedbooks.com/catalog
+
+    def search(self, query, max_results=10, timeout=60):
+        for s in OpenSearchStore.search(self, query, max_results, timeout):
+            if s.downloads:
+                s.drm = SearchResult.DRM_UNLOCKED
+                s.price = '$0.00'
+            else:
+                s.drm = SearchResult.DRM_LOCKED
+                s.formats = 'EPUB'
+            yield s
--- a/src/calibre/gui2/store/stores/foyles_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/foyles_uk_plugin.py
--- a/src/calibre/gui2/store/stores/gandalf_plugin.py
+++ b/src/calibre/gui2/store/stores/gandalf_plugin.py
--- a/src/calibre/gui2/store/stores/google_books_plugin.py
+++ b/src/calibre/gui2/store/stores/google_books_plugin.py
--- a/src/calibre/gui2/store/stores/gutenberg_plugin.py
+++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py
--- a/src/calibre/gui2/store/stores/kobo_plugin.py
+++ b/src/calibre/gui2/store/stores/kobo_plugin.py
--- a/src/calibre/gui2/store/stores/legimi_plugin.py
+++ b/src/calibre/gui2/store/stores/legimi_plugin.py
--- a/src/calibre/gui2/store/stores/libri_de_plugin.py
+++ b/src/calibre/gui2/store/stores/libri_de_plugin.py
--- a/src/calibre/gui2/store/stores/manybooks_plugin.py
+++ b/src/calibre/gui2/store/stores/manybooks_plugin.py
@ -79,7 +79,6 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin):
                    cover_name = cover_name.replace('etext', '')
                    cover_id = id.split('.')[0]
                    cover_url = 'http://www.manybooks.net/images/' + id[0] + '/' + cover_name + '/' + cover_id + '-thumb.jpg'
-                print(cover_url)

                counter -= 1

--- a/src/calibre/gui2/store/stores/mobileread/init.py
+++ b/src/calibre/gui2/store/stores/mobileread/init.py
--- a/src/calibre/gui2/store/stores/mobileread/adv_search_builder.py
+++ b/src/calibre/gui2/store/stores/mobileread/adv_search_builder.py
@ -10,7 +10,7 @@ import re

 from PyQt4.Qt import (QDialog, QDialogButtonBox)

-from calibre.gui2.store.mobileread.adv_search_builder_ui import Ui_Dialog
+from calibre.gui2.store.stores.mobileread.adv_search_builder_ui import Ui_Dialog
 from calibre.library.caches import CONTAINS_MATCH, EQUALS_MATCH

 class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
--- a/src/calibre/gui2/store/stores/mobileread/adv_search_builder.ui
+++ b/src/calibre/gui2/store/stores/mobileread/adv_search_builder.ui
--- a/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog.py
+++ b/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'

 from PyQt4.Qt import QDialog

-from calibre.gui2.store.mobileread.cache_progress_dialog_ui import Ui_Dialog
+from calibre.gui2.store.stores.mobileread.cache_progress_dialog_ui import Ui_Dialog

 class CacheProgressDialog(QDialog, Ui_Dialog):

--- a/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog.ui
+++ b/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog.ui
--- a/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py
+++ b/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py
--- a/src/calibre/gui2/store/stores/mobileread/mobileread_plugin.py
+++ b/src/calibre/gui2/store/stores/mobileread/mobileread_plugin.py
@ -15,10 +15,10 @@ from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
-from calibre.gui2.store.mobileread.models import SearchFilter
-from calibre.gui2.store.mobileread.cache_progress_dialog import CacheProgressDialog
-from calibre.gui2.store.mobileread.cache_update_thread import CacheUpdateThread
-from calibre.gui2.store.mobileread.store_dialog import MobileReadStoreDialog
+from calibre.gui2.store.stores.mobileread.models import SearchFilter
+from calibre.gui2.store.stores.mobileread.cache_progress_dialog import CacheProgressDialog
+from calibre.gui2.store.stores.mobileread.cache_update_thread import CacheUpdateThread
+from calibre.gui2.store.stores.mobileread.store_dialog import MobileReadStoreDialog

 class MobileReadStore(BasicStoreConfig, StorePlugin):
    
--- a/src/calibre/gui2/store/stores/mobileread/models.py
+++ b/src/calibre/gui2/store/stores/mobileread/models.py
--- a/src/calibre/gui2/store/stores/mobileread/store_dialog.py
+++ b/src/calibre/gui2/store/stores/mobileread/store_dialog.py
@ -9,9 +9,9 @@ __docformat__ = 'restructuredtext en'

 from PyQt4.Qt import (Qt, QDialog, QIcon, QComboBox)

-from calibre.gui2.store.mobileread.adv_search_builder import AdvSearchBuilderDialog
-from calibre.gui2.store.mobileread.models import BooksModel
-from calibre.gui2.store.mobileread.store_dialog_ui import Ui_Dialog
+from calibre.gui2.store.stores.mobileread.adv_search_builder import AdvSearchBuilderDialog
+from calibre.gui2.store.stores.mobileread.models import BooksModel
+from calibre.gui2.store.stores.mobileread.store_dialog_ui import Ui_Dialog

 class MobileReadStoreDialog(QDialog, Ui_Dialog):
    
--- a/src/calibre/gui2/store/stores/mobileread/store_dialog.ui
+++ b/src/calibre/gui2/store/stores/mobileread/store_dialog.ui
--- a/src/calibre/gui2/store/stores/nexto_plugin.py
+++ b/src/calibre/gui2/store/stores/nexto_plugin.py
--- a/src/calibre/gui2/store/stores/open_books_plugin.py
+++ b/src/calibre/gui2/store/stores/open_books_plugin.py
--- a/src/calibre/gui2/store/stores/open_library_plugin.py
+++ b/src/calibre/gui2/store/stores/open_library_plugin.py
--- a/src/calibre/gui2/store/stores/oreilly_plugin.py
+++ b/src/calibre/gui2/store/stores/oreilly_plugin.py
--- a/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py
+++ b/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py
@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.opensearch_store import OpenSearchStore
+from calibre.gui2.store.search_result import SearchResult
+
+class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchStore):
+
+    open_search_url = 'http://pragprog.com/catalog/search-description'
+    web_url = 'http://pragprog.com/'
+    
+    # http://pragprog.com/catalog.opds
+
+    def search(self, query, max_results=10, timeout=60):
+        for s in OpenSearchStore.search(self, query, max_results, timeout):
+            s.drm = SearchResult.DRM_UNLOCKED
+            s.formats = 'EPUB, PDF, MOBI'
+            yield s
--- a/src/calibre/gui2/store/stores/smashwords_plugin.py
+++ b/src/calibre/gui2/store/stores/smashwords_plugin.py
--- a/src/calibre/gui2/store/stores/virtualo_plugin.py
+++ b/src/calibre/gui2/store/stores/virtualo_plugin.py
--- a/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/waterstones_uk_plugin.py
--- a/src/calibre/gui2/store/stores/weightless_books_plugin.py
+++ b/src/calibre/gui2/store/stores/weightless_books_plugin.py
--- a/src/calibre/gui2/store/stores/whsmith_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/whsmith_uk_plugin.py
--- a/src/calibre/gui2/store/stores/wizards_tower_books_plugin.py
+++ b/src/calibre/gui2/store/stores/wizards_tower_books_plugin.py
--- a/src/calibre/gui2/store/stores/woblink_plugin.py
+++ b/src/calibre/gui2/store/stores/woblink_plugin.py
--- a/src/calibre/gui2/store/stores/zixo_plugin.py
+++ b/src/calibre/gui2/store/stores/zixo_plugin.py
--- a/src/calibre/gui2/tag_browser/model.py
+++ b/src/calibre/gui2/tag_browser/model.py
@ -20,7 +20,7 @@ from calibre.utils.config import tweaks
 from calibre.utils.icu import sort_key, lower, strcmp
 from calibre.library.field_metadata import TagsIcons, category_icon_map
 from calibre.gui2.dialogs.confirm_delete import confirm
-from calibre.utils.formatter import eval_formatter
+from calibre.utils.formatter import EvalFormatter
 from calibre.utils.search_query_parser import saved_searches

 TAG_SEARCH_STATES = {'clear': 0, 'mark_plus': 1, 'mark_plusplus': 2,
@ -224,6 +224,7 @@ class TagsModel(QAbstractItemModel): # {{{
        self.row_map = []
        self.root_item = self.create_node(icon_map=self.icon_state_map)
        self.db = None
+        self._build_in_progress = False
        self.reread_collapse_model({}, rebuild=False)

    def reread_collapse_model(self, state_map, rebuild=True):
@ -257,9 +258,17 @@ class TagsModel(QAbstractItemModel): # {{{
        self.endResetModel()

    def rebuild_node_tree(self, state_map={}):
+        if self._build_in_progress:
+            print ('Tag Browser build already in progress')
+            traceback.print_stack()
+            return
+        #traceback.print_stack()
+        #print ()
+        self._build_in_progress = True
        self.beginResetModel()
        self._run_rebuild(state_map=state_map)
        self.endResetModel()
+        self._build_in_progress = False

    def _run_rebuild(self, state_map={}):
        for node in self.node_map.itervalues():
@ -341,6 +350,8 @@ class TagsModel(QAbstractItemModel): # {{{
    def _create_node_tree(self, data, state_map):
        sort_by = config['sort_tags_by']

+        eval_formatter = EvalFormatter()
+
        if data is None:
            print ('_create_node_tree: no data!')
            traceback.print_stack()
--- a/src/calibre/gui2/tag_browser/ui.py
+++ b/src/calibre/gui2/tag_browser/ui.py
@ -91,10 +91,10 @@ class TagBrowserMixin(object): # {{{
        # Add the new category
        user_cats[new_cat] = []
        db.prefs.set('user_categories', user_cats)
-        self.tags_view.set_new_model()
+        self.tags_view.recount()
        m = self.tags_view.model()
        idx = m.index_for_path(m.find_category_node('@' + new_cat))
-        m.show_item_at_index(idx)
+        self.tags_view.show_item_at_index(idx)
        # Open the editor on the new item to rename it
        if new_category_name is None:
            self.tags_view.edit(idx)
@ -111,7 +111,7 @@ class TagBrowserMixin(object): # {{{
            for k in d.categories:
                db.field_metadata.add_user_category('@' + k, k)
            db.data.change_search_locations(db.field_metadata.get_search_terms())
-            self.tags_view.set_new_model()
+            self.tags_view.recount()

    def do_delete_user_category(self, category_name):
        '''
@ -144,7 +144,7 @@ class TagBrowserMixin(object): # {{{
            elif k.startswith(category_name + '.'):
                del user_cats[k]
        db.prefs.set('user_categories', user_cats)
-        self.tags_view.set_new_model()
+        self.tags_view.recount()

    def do_del_item_from_user_cat(self, user_cat, item_name, item_category):
        '''
@ -413,13 +413,14 @@ class TagBrowserWidget(QWidget): # {{{
        txt = unicode(self.item_search.currentText()).strip()

        if txt.startswith('*'):
-            self.tags_view.set_new_model(filter_categories_by=txt[1:])
+            model.filter_categories_by = txt[1:]
+            self.tags_view.recount()
            self.current_find_position = None
            return
-        if model.get_filter_categories_by():
-            self.tags_view.set_new_model(filter_categories_by=None)
+        if model.filter_categories_by:
+            model.filter_categories_by = None
+            self.tags_view.recount()
            self.current_find_position = None
-            model = self.tags_view.model()

        if not txt:
            return
@ -437,8 +438,9 @@ class TagBrowserWidget(QWidget): # {{{

        self.current_find_position = \
            model.find_item_node(key, txt, self.current_find_position)
+
        if self.current_find_position:
-            model.show_item_at_path(self.current_find_position, box=True)
+            self.tags_view.show_item_at_path(self.current_find_position, box=True)
        elif self.item_search.text():
            self.not_found_label.setVisible(True)
            if self.tags_view.verticalScrollBar().isVisible():
--- a/src/calibre/gui2/tag_browser/view.py
+++ b/src/calibre/gui2/tag_browser/view.py
@ -71,7 +71,6 @@ class TagsView(QTreeView): # {{{
    search_item_renamed     = pyqtSignal()
    drag_drop_finished      = pyqtSignal(object)
    restriction_error       = pyqtSignal()
-    show_at_path            = pyqtSignal()

    def __init__(self, parent=None):
        QTreeView.__init__(self, parent=None)
@ -96,8 +95,6 @@ class TagsView(QTreeView): # {{{
        self.user_category_icon = QIcon(I('tb_folder.png'))
        self.delete_icon = QIcon(I('list_remove.png'))
        self.rename_icon = QIcon(I('edit-undo.png'))
-        self.show_at_path.connect(self.show_item_at_path,
-                type=Qt.QueuedConnection)

        self._model = TagsModel(self)
        self._model.search_item_renamed.connect(self.search_item_renamed)
@ -176,7 +173,8 @@ class TagsView(QTreeView): # {{{
        state_map = self.get_state()[1]
        self.db.prefs.set('user_categories', user_cats)
        self._model.rebuild_node_tree(state_map=state_map)
-        self.show_at_path.emit('@'+nkey)
+        p = self._model.find_category_node('@'+nkey)
+        self.show_item_at_path(p)

    @property
    def match_all(self):
@ -501,6 +499,8 @@ class TagsView(QTreeView): # {{{
            return
        src_is_tb = event.mimeData().hasFormat('application/calibre+from_tag_browser')
        item = index.data(Qt.UserRole).toPyObject()
+        if item.type == TagTreeItem.ROOT:
+            return
        flags = self._model.flags(index)
        if item.type == TagTreeItem.TAG and flags & Qt.ItemIsDropEnabled:
            self.setDropIndicatorShown(not src_is_tb)
@ -570,7 +570,7 @@ class TagsView(QTreeView): # {{{

    def show_item_at_index(self, idx, box=False,
                           position=QTreeView.PositionAtCenter):
-        if idx.isValid():
+        if idx.isValid() and idx.data(Qt.UserRole).toPyObject() is not self._model.root_item:
            self.setCurrentIndex(idx)
            self.scrollTo(idx, position)
            self.setCurrentIndex(idx)
--- a/src/calibre/utils/opensearch/init.py
+++ b/src/calibre/utils/opensearch/init.py
--- a/src/calibre/utils/opensearch/description.py
+++ b/src/calibre/utils/opensearch/description.py
@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '''
+2011, John Schember <john@nachtimwald.com>,
+2006, Ed Summers <ehs@pobox.com>
+'''
+__docformat__ = 'restructuredtext en'
+
+from contextlib import closing
+
+from lxml import etree
+
+from calibre import browser
+from calibre.utils.opensearch.url import URL
+
+class Description(object):
+    '''
+    A class for representing OpenSearch Description files.
+    '''
+
+    def __init__(self, url=""):
+        '''
+        The constructor which may pass an optional url to load from.
+
+        d = Description("http://www.example.com/description")
+        '''
+        if url: 
+            self.load(url)
+
+
+    def load(self, url):
+        '''
+        For loading up a description object from a url. Normally
+        you'll probably just want to pass a URL into the constructor.
+        '''
+        br = browser()
+        with closing(br.open(url, timeout=15)) as f:
+            doc = etree.fromstring(f.read())
+        
+        # version 1.1 has repeating Url elements
+        self.urls = []
+        for element in doc.xpath('//*[local-name() = "Url"]'):
+            template = element.get('template')
+            type = element.get('type')
+            if template and type:
+                url = URL()
+                url.template = template
+                url.type = type
+                self.urls.append(url)
+
+        # this is version 1.0 specific
+        self.url = ''.join(doc.xpath('//*[local-name() = "Url"][1]//text()'))
+        self.format = ''.join(doc.xpath('//*[local-name() = "Format"][1]//text()'))
+
+        self.shortname = ''.join(doc.xpath('//*[local-name() = "ShortName"][1]//text()'))
+        self.longname = ''.join(doc.xpath('//*[local-name() = "LongName"][1]//text()'))
+        self.description = ''.join(doc.xpath('//*[local-name() = "Description"][1]//text()'))
+        self.image = ''.join(doc.xpath('//*[local-name() = "Image"][1]//text()'))
+        self.sameplesearch = ''.join(doc.xpath('//*[local-name() = "SampleSearch"][1]//text()'))
+        self.developer = ''.join(doc.xpath('//*[local-name() = "Developer"][1]//text()'))
+        self.contact = ''.join(doc.xpath('/*[local-name() = "Contact"][1]//text()'))
+        self.attribution = ''.join(doc.xpath('//*[local-name() = "Attribution"][1]//text()'))
+        self.syndicationright = ''.join(doc.xpath('//*[local-name() = "SyndicationRight"][1]//text()'))
+
+        tag_text = ' '.join(doc.xpath('//*[local-name() = "Tags"]//text()'))
+        if tag_text != None:
+            self.tags = tag_text.split(' ')
+
+        self.adultcontent = doc.xpath('boolean(//*[local-name() = "AdultContent" and contains(., "true")])')
+
+    def get_url_by_type(self, type):
+        '''
+        Walks available urls and returns them by type. Only 
+        appropriate in opensearch v1.1 where there can be multiple
+        query targets. Returns none if no such type is found.
+
+        url = description.get_url_by_type('application/rss+xml')
+        '''
+        for url in self.urls:
+            if url.type == type:
+                return url
+        return None
+
+    def get_best_template(self):
+        '''
+        OK, best is a value judgement, but so be it. You'll get 
+        back either the atom, rss or first template available. This
+        method handles the main difference between opensearch v1.0 and v1.1
+        '''
+        # version 1.0
+        if self.url: 
+            return self.url
+
+        # atom
+        if self.get_url_by_type('application/atom+xml'):
+            return self.get_url_by_type('application/atom+xml').template
+
+        # rss
+        if self.get_url_by_type('application/rss+xml'):
+            return self.get_url_by_type('application/rss+xml').template
+
+        # other possible rss type
+        if self.get_url_by_type('text/xml'):
+            return self.get_url_by_Type('text/xml').template
+
+        # otherwise just the first one
+        if len(self.urls) > 0:
+            return self.urls[0].template
+
+        # out of luck
+        return None
--- a/src/calibre/utils/opensearch/query.py
+++ b/src/calibre/utils/opensearch/query.py
@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2006, Ed Summers <ehs@pobox.com>'
+__docformat__ = 'restructuredtext en'
+
+from urlparse import urlparse, urlunparse, parse_qs
+from urllib import urlencode
+
+class Query(object):
+    '''
+    Represents an opensearch query Really this class is just a 
+    helper for substituting values into the macros in a format. 
+
+    format = 'http://beta.indeed.com/opensearch?q={searchTerms}&start={startIndex}&limit={count}'
+    q = Query(format)
+    q.searchTerms('zx81')
+    q.startIndex = 1
+    q.count = 25
+    print q.url()
+    '''
+
+    standard_macros = ['searchTerms', 'count', 'startIndex', 'startPage', 
+        'language', 'outputEncoding', 'inputEncoding']
+
+    def __init__(self, format):
+        '''
+        Create a query object by passing it the url format obtained
+        from the opensearch Description.
+        '''
+        self.format = format
+
+        # unpack the url to a tuple
+        self.url_parts = urlparse(format)
+
+        # unpack the query string to a dictionary
+        self.query_string = parse_qs(self.url_parts[4])
+
+        # look for standard macros and create a mapping of the 
+        # opensearch names to the service specific ones
+        # so q={searchTerms} will result in a mapping between searchTerms and q
+        self.macro_map = {}
+        for key,values in self.query_string.items():
+            # TODO eventually optional/required params should be 
+            # distinguished somehow (the ones with/without trailing ?
+            macro = values[0].replace('{', '').replace('}', '').replace('?', '')
+            if macro in Query.standard_macros:
+                self.macro_map[macro] = key
+
+    def url(self):
+        # copy the original query string
+        query_string = dict(self.query_string)
+
+        # iterate through macros and set the position in the querystring
+        for macro, name in self.macro_map.items():
+            if hasattr(self, macro):
+                # set the name/value pair
+                query_string[name] = [getattr(self, macro)]
+            else:
+                # remove the name/value pair
+                del(query_string[name])
+
+        # copy the url parts and substitute in our new query string
+        url_parts = list(self.url_parts)
+        url_parts[4] = urlencode(query_string, 1)
+
+        # recompose and return url
+        return urlunparse(tuple(url_parts))
+
+    def has_macro(self, macro):
+        return self.macro_map.has_key(macro)
+
--- a/src/calibre/utils/opensearch/url.py
+++ b/src/calibre/utils/opensearch/url.py
@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2006, Ed Summers <ehs@pobox.com>'
+__docformat__ = 'restructuredtext en'
+
+class URL(object):
+    '''
+    Class for representing a URL in an opensearch v1.1 query
+    '''
+
+    def __init__(self, type='', template='', method='GET'):
+        self.type = type
+        self.template = template
+        self.method = 'GET'
+        self.params = []