added ebooksgratuits.com in the stores of "Get Books"

2025-07-09 03:04:10 -04:00 · 2012-08-16 22:37:21 +02:00 · 2012-08-16 22:37:21 +02:00 · e7280044d5
commit e7280044d5
parent 0780b49eac 218a92de0e
44 changed files with 2823 additions and 926 deletions
--- a/recipes/business_spectator.recipe
+++ b/recipes/business_spectator.recipe
@ -16,6 +16,7 @@ class BusinessSpectator(BasicNewsRecipe):
    oldest_article         = 2
    max_articles_per_feed  = 100
    no_stylesheets         = True
+    auto_cleanup = True
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'utf8'
@ -32,11 +33,11 @@ class BusinessSpectator(BasicNewsRecipe):
                            ,'linearize_tables': False
                         }

-    keep_only_tags    =  [dict(id='storyHeader'), dict(id='body-html')]
+    #keep_only_tags    =  [dict(id='storyHeader'), dict(id='body-html')]

-    remove_tags = [dict(attrs={'class':'hql'})]
+    #remove_tags = [dict(attrs={'class':'hql'})]

-    remove_attributes = ['width','height','style']
+    #remove_attributes = ['width','height','style']

    feeds          = [
                      ('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
@ -46,3 +47,4 @@ class BusinessSpectator(BasicNewsRecipe):
                      ('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
                      ('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
                    ]
+ 
--- a/recipes/calgary_herald.recipe
+++ b/recipes/calgary_herald.recipe
@ -1,35 +1,320 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+import string, re
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

-class CalgaryHerald(BasicNewsRecipe):
+import string, re
+from calibre import strftime
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    postmedia_index_pages = [
+        (u'Headlines',u'/index.html'),
+        (u'Ottawa & Area',u'/news/ottawa/index.html'),
+        (u'Vancouver',u'/news/vancouver/index.html'),
+        (u'Calgary',u'/news/calgary/index.html'),
+        (u'Edmonton',u'/news/edmonton/index.html'),
+        (u'Montreal',u'/news/montreal/index.html'),
+        (u'Fraser Valley',u'/news/fraser-valley/index.html'),
+        (u'British Columbia',u'/news/bc/index.html'),
+        (u'Alberta',u'/news/alberta/index.html'),
+        (u'Canada',u'/news/canada/index.html'),
+        (u'National',u'/news/national/index.html'),
+        (u'Politics',u'/news/politics/index.html'),
+        (u'Insight',u'/news/insight/index.html'),
+        (u'Special Reports',u'/news/specialreports/index.html'),
+        (u'Gangs',u'/news/gangs/index.html'),
+        (u'Education',u'/news/education/index.html'),
+        (u'Health',u'/news/health/index.html'),
+        (u'Environment',u'/news/environment/index.html'),
+        (u'World',u'/news/world/index.html'),
+        (u'Police Blotter',u'/news/crime-and-justice/index.html'),
+        (u'Crime',u'/news/blotter/index.html'),
+        (u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
+        (u'Diplomatica',u'/news/diplomatica/index.html'),
+        (u'Opinion',u'/opinion/index.html'),
+        (u'Columnists',u'/columnists/index.html'),
+        (u'Editorials',u'/opinion/editorials/index.html'),
+        (u'Letters',u'/opinion/letters/index.html'),
+        (u'Business',u'/business/index.html'),
+        (u'Sports',u'/sports/index.html'),
+        (u'Arts',u'/entertainment/index.html'),
+        (u'Life',u'/life/index.html'),
+        (u'Technology',u'/technology/index.html'),
+        (u'Travel',u'/travel/index.html'),
+        (u'Health',u'/health/index.html')
+        ]
+
+
+    # un-comment the following six lines for the Vancouver Province
+##    title = u'Vancouver Province'
+##    url_prefix = 'http://www.theprovince.com'
+##    description = u'News from Vancouver, BC'
+##    std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
+##    logo_url = 'vplogo.jpg'
+##    fp_tag = 'CAN_TP'
+
+    # un-comment the following six lines for the Vancouver Sun
+##    title = u'Vancouver Sun'
+##    url_prefix = 'http://www.vancouversun.com'
+##    description = u'News from Vancouver, BC'
+##    std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
+##    logo_url = 'vslogo.jpg'
+##    fp_tag = 'CAN_VS'
+
+    # un-comment the following six lines for the Calgary Herald
    title = u'Calgary Herald'
-    oldest_article = 3
-    max_articles_per_feed = 100
+    url_prefix = 'http://www.calgaryherald.com'
+    description = u'News from Calgary, AB'
+    std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
+    logo_url = 'chlogo.jpg'
+    fp_tag = 'CAN_CH'

-    feeds          = [
-	(u'News', u'http://rss.canada.com/get/?F233'),
-	(u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'),
-	(u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'),
-	(u'Politics', u'http://rss.canada.com/get/?F7551'),
-	(u'National', u'http://rss.canada.com/get/?F7552'),
-	(u'World', u'http://rss.canada.com/get/?F7553'),
-	]
-    __author__ = 'rty'
-    pubisher  = 'Calgary Herald'
-    description           = 'Calgary, Alberta, Canada'
-    category              = 'News, Calgary, Alberta, Canada'
+    # un-comment the following six lines for the Edmonton Journal
+##    title = u'Edmonton Journal'
+##    url_prefix = 'http://www.edmontonjournal.com'
+##    description = u'News from Edmonton, AB'
+##    std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
+##    logo_url = 'ejlogo.jpg'
+##    fp_tag = 'CAN_EJ'

+    # un-comment the following six lines for the Ottawa Citizen
+##    title = u'Ottawa Citizen'
+##    url_prefix = 'http://www.ottawacitizen.com'
+##    description = u'News from Ottawa, ON'   
+##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
+##    logo_url = 'oclogo.jpg'
+##    fp_tag = 'CAN_OC'

-    remove_javascript = True
-    use_embedded_content   = False
-    no_stylesheets = True
+    # un-comment the following six lines for the Montreal Gazette
+##    title = u'Montreal Gazette'
+##    url_prefix = 'http://www.montrealgazette.com'
+##    description = u'News from Montreal, QC'
+##    std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
+##    logo_url = 'mglogo.jpg'
+##    fp_tag = 'CAN_MG'
+
+    Kindle_Fire=False
+    masthead_url = std_logo_url
+
+    url_list = []    
    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt =  ' [%b %d]'
    encoding = 'utf-8'
-    conversion_options = {'linearize_tables':True}
-    ##masthead_url = 'http://www.calgaryherald.com/index.html'
-    keep_only_tags = [
-	dict(name='div', attrs={'id':'storyheader'}),
-	dict(name='div', attrs={'id':'storycontent'})
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: small;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                #photocredit { font-size: xx-small; font-weight: normal; }'''
    
-                               ]
-    remove_tags_after = {'class':"story_tool_hr"}
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
+
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='h2', attrs={'id':'photocredit'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+
+    def get_cover_url(self):
+        from datetime import timedelta, datetime, date
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
+        br = BasicNewsRecipe.get_browser()
+        daysback=1
+        try:
+            br.open(cover)
+        except:
+            while daysback<7:
+                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
+                br = BasicNewsRecipe.get_browser()
+                try:
+                    br.open(cover)
+                except:
+                    daysback = daysback+1
+                    continue
+                break
+        if daysback==7:
+            self.log("\nCover unavailable")
+            cover = None
+        return cover
+
+    def prepare_masthead_image(self, path_to_image, out_path):
+        if self.Kindle_Fire:
+            from calibre import fit_image
+            from calibre.utils.magick import Image, create_canvas
+            img = Image()
+            img.open(path_to_image)
+            width, height = img.size
+            img2 = create_canvas(width, height)
+            img2.compose(img)
+            img2.save(out_path)
+        else:
+            BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
+
+    def fixChars(self,string):
+        # Replace lsquo (\x91)
+        fixed = re.sub("\x91","‘",string)
+        # Replace rsquo (\x92)
+        fixed = re.sub("\x92","’",fixed)
+        # Replace ldquo (\x93)
+        fixed = re.sub("\x93","“",fixed)
+        # Replace rdquo (\x94)
+        fixed = re.sub("\x94","”",fixed)
+        # Replace ndash (\x96)
+        fixed = re.sub("\x96","–",fixed)
+        # Replace mdash (\x97)
+        fixed = re.sub("\x97","—",fixed)
+        fixed = re.sub("&#x2019;","’",fixed)
+        return fixed
+
+    def massageNCXText(self, description):
+        # Kindle TOC descriptions won't render certain characters
+        if description:
+            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
+            # Replace '&' with '&'
+            massaged = re.sub("&","&", massaged)
+            return self.fixChars(massaged)
+        else:
+            return description
+
+    def populate_article_metadata(self, article, soup, first):
+        if first:
+            picdiv = soup.find('body').find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
+        xtitle = article.text_summary.strip()
+        if len(xtitle) == 0:
+            desc = soup.find('meta',attrs={'property':'og:description'})
+            if desc is not None:
+                article.summary = article.text_summary = desc['content']
+
+    def strip_anchors(self,soup):
+        paras = soup.findAll(True)
+        for para in paras:
+            aTags = para.findAll('a')
+            for a in aTags:
+                if a.img is None:
+                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+        return soup
+
+
+    def preprocess_html(self,soup):
+        #delete empty id attributes--they screw up the TOC for unknown reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+
+        pgall = soup.find('div',attrs={'id':'storyphoto'})
+        if pgall is not None: # photo gallery perhaps
+            if (soup.find('div',attrs={'id':'storycontent'}) is None):
+                allpics = Tag(soup,'div')
+                first_img = pgall.find('div','storyimage')
+                if first_img is not None:
+                    first_img.extract()
+                    tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
+                    if tlist is not None:
+                        for atag in tlist.findAll('a'):
+                            img = Tag(soup,'img')
+                            srcpre, sep, srcpost = atag.img['src'].partition('?')
+                            img['src'] = srcpre
+                            pdesc = Tag(soup,'p')
+                            pdesc.insert(0,atag.img['alt'])
+                            pdesc['class']='photocaption'
+                            div = Tag(soup,'div')
+                            div.insert(0,pdesc)
+                            div.insert(0,img)
+                            allpics.append(div)
+                pgall.replaceWith(allpics)
+            
+        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
+            pg.extract()
+        return self.strip_anchors(soup)
+
+                        
+
+    def parse_index(self):
+
+        articles = {}
+        ans = []
+
+
+        def handle_article(adiv,key):
+            h1tag = adiv.h1
+            if h1tag is not None:
+                atag = h1tag.a
+                if atag is not None:
+                    url = atag['href']
+                    if atag['href'].startswith('http'):
+                        return
+                    elif atag['href'].startswith('/'):
+                        url = self.url_prefix+atag['href']
+                    else:
+                        url = self.url_prefix+'/'+atag['href']
+                    if url in self.url_list:
+                        return
+                    self.url_list.append(url)
+                    title = self.tag_to_string(atag,False)
+                    if 'VIDEO' in title.upper():
+                        return
+                    if 'GALLERY' in title.upper():
+                        return
+                    if 'PHOTOS' in title.upper():
+                        return                  
+                    dtag = adiv.find('div','content')
+                    description=''
+                    print("URL "+url)
+                    print("TITLE "+title)
+                    if dtag is not None:
+                        stag = dtag.span
+                        if stag is not None:
+                            if stag['class'] != 'timestamp':
+                                description = self.tag_to_string(stag,False)
+                        else:
+                            description = self.tag_to_string(dtag,False)
+                        print("DESCRIPTION: "+description)
+                    if not articles.has_key(key):
+                        articles[key] = []
+                    articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
+
+        def parse_web_index(key, keyurl):
+            try:
+                soup = self.index_to_soup(self.url_prefix+keyurl)
+            except:
+                return
+            ans.append(key)
+            mainsoup = soup.find('div','bodywrapper')
+            footer = mainsoup.find(attrs={'id':'footerfeature'})
+            if footer is not None:
+                footer.extract()
+            print("Section: "+key)
+            for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
+                handle_article(wdiv,key)
+                wdiv.extract()
+            for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
+                for adiv in wdiv.findAll('div','featurecontent'):
+                    handle_article(adiv,key)
+
+        for (k,url) in self.postmedia_index_pages:
+            parse_web_index(k,url)
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
                    
--- a/recipes/edmonton_journal.recipe
+++ b/recipes/edmonton_journal.recipe
@ -1,105 +1,141 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
-
 __license__   = 'GPL v3'

 '''
 www.canada.com
 '''
-
-import re
+import string, re
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
+
+import string, re
+from calibre import strftime
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag


 class CanWestPaper(BasicNewsRecipe):

-    # un-comment the following four lines for the Victoria Times Colonist
-##    title = u'Victoria Times Colonist'
-##    url_prefix = 'http://www.timescolonist.com'
-##    description = u'News from Victoria, BC'
-##    fp_tag = 'CAN_TC'
+    postmedia_index_pages = [
+        (u'Headlines',u'/index.html'),
+        (u'Ottawa & Area',u'/news/ottawa/index.html'),
+        (u'Vancouver',u'/news/vancouver/index.html'),
+        (u'Calgary',u'/news/calgary/index.html'),
+        (u'Edmonton',u'/news/edmonton/index.html'),
+        (u'Montreal',u'/news/montreal/index.html'),,
+        (u'Fraser Valley',u'/news/fraser-valley/index.html'),
+        (u'British Columbia',u'/news/bc/index.html'),
+        (u'Alberta',u'/news/alberta/index.html'),
+        (u'Canada',u'/news/canada/index.html'),
+        (u'National',u'/news/national/index.html'),
+        (u'Politics',u'/news/politics/index.html'),
+        (u'Insight',u'/news/insight/index.html'),
+        (u'Special Reports',u'/news/specialreports/index.html'),
+        (u'Gangs',u'/news/gangs/index.html'),
+        (u'Education',u'/news/education/index.html'),
+        (u'Health',u'/news/health/index.html'),
+        (u'Environment',u'/news/environment/index.html'),
+        (u'World',u'/news/world/index.html'),
+        (u'Police Blotter',u'/news/crime-and-justice/index.html'),
+        (u'Crime',u'/news/blotter/index.html'),
+        (u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
+        (u'Diplomatica',u'/news/diplomatica/index.html'),
+        (u'Opinion',u'/opinion/index.html'),
+        (u'Columnists',u'/columnists/index.html'),
+        (u'Editorials',u'/opinion/editorials/index.html'),
+        (u'Letters',u'/opinion/letters/index.html'),
+        (u'Business',u'/business/index.html'),
+        (u'Sports',u'/sports/index.html'),
+        (u'Arts',u'/entertainment/index.html'),
+        (u'Life',u'/life/index.html'),
+        (u'Technology',u'/technology/index.html'),
+        (u'Travel',u'/travel/index.html'),
+        (u'Health',u'/health/index.html')
+        ]

-    # un-comment the following four lines for the Vancouver Province
+
+    # un-comment the following six lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
-##    fp_tag = 'CAN_VP'
+##    std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
+##    logo_url = 'vplogo.jpg'
+##    fp_tag = 'CAN_TP'

-    # un-comment the following four lines for the Vancouver Sun
+    # un-comment the following six lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
+##    std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
+##    logo_url = 'vslogo.jpg'
 ##    fp_tag = 'CAN_VS'

-    # un-comment the following four lines for the Edmonton Journal
-    title = u'Edmonton Journal'
-    url_prefix = 'http://www.edmontonjournal.com'
-    description = u'News from Edmonton, AB'
-    fp_tag = 'CAN_EJ'
-
-    # un-comment the following four lines for the Calgary Herald
+    # un-comment the following six lines for the Calgary Herald
 ##    title = u'Calgary Herald'
 ##    url_prefix = 'http://www.calgaryherald.com'
 ##    description = u'News from Calgary, AB'
+##    std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
+##    logo_url = 'chlogo.jpg'
 ##    fp_tag = 'CAN_CH'

-    # un-comment the following four lines for the Regina Leader-Post
-##    title = u'Regina Leader-Post'
-##    url_prefix = 'http://www.leaderpost.com'
-##    description = u'News from Regina, SK'
-##    fp_tag = ''
+    # un-comment the following six lines for the Edmonton Journal
+    title = u'Edmonton Journal'
+    url_prefix = 'http://www.edmontonjournal.com'
+    description = u'News from Edmonton, AB'
+    std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
+    logo_url = 'ejlogo.jpg'
+    fp_tag = 'CAN_EJ'

-    # un-comment the following four lines for the Saskatoon Star-Phoenix
-##    title = u'Saskatoon Star-Phoenix'
-##    url_prefix = 'http://www.thestarphoenix.com'
-##    description = u'News from Saskatoon, SK'
-##    fp_tag = ''
-
-    # un-comment the following four lines for the Windsor Star
-##    title = u'Windsor Star'
-##    url_prefix = 'http://www.windsorstar.com'
-##    description = u'News from Windsor, ON'
-##    fp_tag = 'CAN_'
-
-    # un-comment the following four lines for the Ottawa Citizen
+    # un-comment the following six lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
 ##    description = u'News from Ottawa, ON'   
+##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
+##    logo_url = 'oclogo.jpg'
 ##    fp_tag = 'CAN_OC'

-    # un-comment the following four lines for the Montreal Gazette
+    # un-comment the following six lines for the Montreal Gazette
 ##    title = u'Montreal Gazette'
 ##    url_prefix = 'http://www.montrealgazette.com'
 ##    description = u'News from Montreal, QC'
+##    std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
+##    logo_url = 'mglogo.jpg'
 ##    fp_tag = 'CAN_MG'

+    Kindle_Fire=False
+    masthead_url = std_logo_url

+    url_list = []    
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt =  ' [%b %d]'
+    encoding = 'utf-8'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
-                #storyheader h2 { font-size: large;  font-style: italic; }
+                #storyheader h2 { font-size: small;  font-style: italic; }
                .byline { font-size:xx-small; }
-                #photocaption { font-size: small; font-style: italic }
-                #photocredit { font-size: xx-small; }'''
-    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                #photocredit { font-size: xx-small; font-weight: normal; }'''
+    
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
+
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='h2', attrs={'id':'photocredit'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]

+
    def get_cover_url(self):
-        from datetime import timedelta, date
-        if self.fp_tag=='':
-            return None
+        from datetime import timedelta, datetime, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
            cover = None
        return cover

+    def prepare_masthead_image(self, path_to_image, out_path):
+        if self.Kindle_Fire:
+            from calibre import fit_image
+            from calibre.utils.magick import Image, create_canvas
+            img = Image()
+            img.open(path_to_image)
+            width, height = img.size
+            img2 = create_canvas(width, height)
+            img2.compose(img)
+            img2.save(out_path)
+        else:
+            BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
+
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup

+
    def preprocess_html(self,soup):
+        #delete empty id attributes--they screw up the TOC for unknown reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+
+        pgall = soup.find('div',attrs={'id':'storyphoto'})
+        if pgall is not None: # photo gallery perhaps
+            if (soup.find('div',attrs={'id':'storycontent'}) is None):
+                allpics = Tag(soup,'div')
+                first_img = pgall.find('div','storyimage')
+                if first_img is not None:
+                    first_img.extract()
+                    tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
+                    if tlist is not None:
+                        for atag in tlist.findAll('a'):
+                            img = Tag(soup,'img')
+                            srcpre, sep, srcpost = atag.img['src'].partition('?')
+                            img['src'] = srcpre
+                            pdesc = Tag(soup,'p')
+                            pdesc.insert(0,atag.img['alt'])
+                            pdesc['class']='photocaption'
+                            div = Tag(soup,'div')
+                            div.insert(0,pdesc)
+                            div.insert(0,img)
+                            allpics.append(div)
+                pgall.replaceWith(allpics)
+            
+        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
+            pg.extract()
        return self.strip_anchors(soup)

                        

    def parse_index(self):
-        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')

        articles = {}
-        key = 'News'
-        ans = ['News']
+        ans = []

-        # Find each instance of class="sectiontitle", class="featurecontent"
-        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
-                #self.log(" div class = %s" % divtag['class'])
-                if divtag['class'].startswith('section_title'):
-                    # div contains section title
-                    if not divtag.h3:
-                        continue
-                    key = self.tag_to_string(divtag.h3,False)
-                    ans.append(key)
-                    self.log("Section name %s" % key)
-                    continue
-                # div contains article data
-                h1tag = divtag.find('h1')
-                if not h1tag:
-                    continue
-                atag = h1tag.find('a',href=True)
-                if not atag:
-                    continue
-                url = self.url_prefix+'/news/todays-paper/'+atag['href']
-                #self.log("Section %s" % key)
-                #self.log("url %s" % url)
+
+        def handle_article(adiv,key):
+            h1tag = adiv.h1
+            if h1tag is not None:
+                atag = h1tag.a
+                if atag is not None:
+                    url = atag['href']
+                    if atag['href'].startswith('http'):
+                        return
+                    elif atag['href'].startswith('/'):
+                        url = self.url_prefix+atag['href']
+                    else:
+                        url = self.url_prefix+'/'+atag['href']
+                    if url in self.url_list:
+                        return
+                    self.url_list.append(url)
                    title = self.tag_to_string(atag,False)
-                #self.log("title %s" % title)
-                pubdate = ''
+                    if 'VIDEO' in title.upper():
+                        return
+                    if 'GALLERY' in title.upper():
+                        return
+                    if 'PHOTOS' in title.upper():
+                        return                  
+                    dtag = adiv.find('div','content')
                    description=''
-                ptag = divtag.find('p');
-                if ptag:
-                    description = self.tag_to_string(ptag,False)
-                    #self.log("description %s" % description)
-                author = ''
-                autag = divtag.find('h4')
-                if autag:
-                    author = self.tag_to_string(autag,False)
-                    #self.log("author %s" % author)
+                    print("URL "+url)
+                    print("TITLE "+title)
+                    if dtag is not None:
+                        stag = dtag.span
+                        if stag is not None:
+                            if stag['class'] != 'timestamp':
+                                description = self.tag_to_string(stag,False)
+                        else:
+                            description = self.tag_to_string(dtag,False)
+                        print("DESCRIPTION: "+description)
                    if not articles.has_key(key):
                        articles[key] = []
-                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+                    articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))

+        def parse_web_index(key, keyurl):
+            try:
+                soup = self.index_to_soup(self.url_prefix+keyurl)
+            except:
+                return
+            ans.append(key)
+            mainsoup = soup.find('div','bodywrapper')
+            footer = mainsoup.find(attrs={'id':'footerfeature'})
+            if footer is not None:
+                footer.extract()
+            print("Section: "+key)
+            for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
+                handle_article(wdiv,key)
+                wdiv.extract()
+            for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
+                for adiv in wdiv.findAll('div','featurecontent'):
+                    handle_article(adiv,key)
+
+        for (k,url) in self.postmedia_index_pages:
+            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
+                    
--- a/recipes/montreal_gazette.recipe
+++ b/recipes/montreal_gazette.recipe
@ -1,48 +1,320 @@
 #!/usr/bin/env  python
-
+# -*- coding: utf-8 -*-
 __license__   = 'GPL v3'

 '''
 www.canada.com
 '''
+import string, re
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe

+import string, re
+from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag


 class CanWestPaper(BasicNewsRecipe):

-    # un-comment the following three lines for the Montreal Gazette
+    postmedia_index_pages = [
+        (u'Headlines',u'/index.html'),
+        (u'Ottawa & Area',u'/news/ottawa/index.html'),
+        (u'Vancouver',u'/news/vancouver/index.html'),
+        (u'Calgary',u'/news/calgary/index.html'),
+        (u'Edmonton',u'/news/edmonton/index.html'),
+        (u'Montreal',u'/news/montreal/index.html'),
+        (u'Fraser Valley',u'/news/fraser-valley/index.html'),
+        (u'British Columbia',u'/news/bc/index.html'),
+        (u'Alberta',u'/news/alberta/index.html'),
+        (u'Canada',u'/news/canada/index.html'),
+        (u'National',u'/news/national/index.html'),
+        (u'Politics',u'/news/politics/index.html'),
+        (u'Insight',u'/news/insight/index.html'),
+        (u'Special Reports',u'/news/specialreports/index.html'),
+        (u'Gangs',u'/news/gangs/index.html'),
+        (u'Education',u'/news/education/index.html'),
+        (u'Health',u'/news/health/index.html'),
+        (u'Environment',u'/news/environment/index.html'),
+        (u'World',u'/news/world/index.html'),
+        (u'Police Blotter',u'/news/crime-and-justice/index.html'),
+        (u'Crime',u'/news/blotter/index.html'),
+        (u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
+        (u'Diplomatica',u'/news/diplomatica/index.html'),
+        (u'Opinion',u'/opinion/index.html'),
+        (u'Columnists',u'/columnists/index.html'),
+        (u'Editorials',u'/opinion/editorials/index.html'),
+        (u'Letters',u'/opinion/letters/index.html'),
+        (u'Business',u'/business/index.html'),
+        (u'Sports',u'/sports/index.html'),
+        (u'Arts',u'/entertainment/index.html'),
+        (u'Life',u'/life/index.html'),
+        (u'Technology',u'/technology/index.html'),
+        (u'Travel',u'/travel/index.html'),
+        (u'Health',u'/health/index.html')
+        ]
+
+
+    # un-comment the following six lines for the Vancouver Province
+##    title = u'Vancouver Province'
+##    url_prefix = 'http://www.theprovince.com'
+##    description = u'News from Vancouver, BC'
+##    std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
+##    logo_url = 'vplogo.jpg'
+##    fp_tag = 'CAN_TP'
+
+    # un-comment the following six lines for the Vancouver Sun
+##    title = u'Vancouver Sun'
+##    url_prefix = 'http://www.vancouversun.com'
+##    description = u'News from Vancouver, BC'
+##    std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
+##    logo_url = 'vslogo.jpg'
+##    fp_tag = 'CAN_VS'
+
+    # un-comment the following six lines for the Calgary Herald
+##    title = u'Calgary Herald'
+##    url_prefix = 'http://www.calgaryherald.com'
+##    description = u'News from Calgary, AB'
+##    std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
+##    logo_url = 'chlogo.jpg'
+##    fp_tag = 'CAN_CH'
+
+    # un-comment the following six lines for the Edmonton Journal
+##    title = u'Edmonton Journal'
+##    url_prefix = 'http://www.edmontonjournal.com'
+##    description = u'News from Edmonton, AB'
+##    std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
+##    logo_url = 'ejlogo.jpg'
+##    fp_tag = 'CAN_EJ'
+
+    # un-comment the following six lines for the Ottawa Citizen
+##    title = u'Ottawa Citizen'
+##    url_prefix = 'http://www.ottawacitizen.com'
+##    description = u'News from Ottawa, ON'   
+##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
+##    logo_url = 'oclogo.jpg'
+##    fp_tag = 'CAN_OC'
+
+    # un-comment the following six lines for the Montreal Gazette
    title = u'Montreal Gazette'
+    url_prefix = 'http://www.montrealgazette.com'
    description = u'News from Montreal, QC'
+    std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
+    logo_url = 'mglogo.jpg'
+    fp_tag = 'CAN_MG'

+    Kindle_Fire=False
+    masthead_url = std_logo_url

+    url_list = []    
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
-    auto_cleanup = True
-    auto_cleanup_keep = '//*[@id="imageBox"]'
    timefmt =  ' [%b %d]'
+    encoding = 'utf-8'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
-                #storyheader h2 { font-size: large;  font-style: italic; }
+                #storyheader h2 { font-size: small;  font-style: italic; }
                .byline { font-size:xx-small; }
-                #photocaption { font-size: small; font-style: italic }
-                #photocredit { font-size: xx-small; }'''
+                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                #photocredit { font-size: xx-small; font-weight: normal; }'''
+    
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
+
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='h2', attrs={'id':'photocredit'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+
+    def get_cover_url(self):
+        from datetime import timedelta, datetime, date
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
+        br = BasicNewsRecipe.get_browser()
+        daysback=1
+        try:
+            br.open(cover)
+        except:
+            while daysback<7:
+                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
+                br = BasicNewsRecipe.get_browser()
+                try:
+                    br.open(cover)
+                except:
+                    daysback = daysback+1
+                    continue
+                break
+        if daysback==7:
+            self.log("\nCover unavailable")
+            cover = None
+        return cover
+
+    def prepare_masthead_image(self, path_to_image, out_path):
+        if self.Kindle_Fire:
+            from calibre import fit_image
+            from calibre.utils.magick import Image, create_canvas
+            img = Image()
+            img.open(path_to_image)
+            width, height = img.size
+            img2 = create_canvas(width, height)
+            img2.compose(img)
+            img2.save(out_path)
+        else:
+            BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
+
+    def fixChars(self,string):
+        # Replace lsquo (\x91)
+        fixed = re.sub("\x91","‘",string)
+        # Replace rsquo (\x92)
+        fixed = re.sub("\x92","’",fixed)
+        # Replace ldquo (\x93)
+        fixed = re.sub("\x93","“",fixed)
+        # Replace rdquo (\x94)
+        fixed = re.sub("\x94","”",fixed)
+        # Replace ndash (\x96)
+        fixed = re.sub("\x96","–",fixed)
+        # Replace mdash (\x97)
+        fixed = re.sub("\x97","—",fixed)
+        fixed = re.sub("&#x2019;","’",fixed)
+        return fixed
+
+    def massageNCXText(self, description):
+        # Kindle TOC descriptions won't render certain characters
+        if description:
+            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
+            # Replace '&' with '&'
+            massaged = re.sub("&","&", massaged)
+            return self.fixChars(massaged)
+        else:
+            return description
+
+    def populate_article_metadata(self, article, soup, first):
+        if first:
+            picdiv = soup.find('body').find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
+        xtitle = article.text_summary.strip()
+        if len(xtitle) == 0:
+            desc = soup.find('meta',attrs={'property':'og:description'})
+            if desc is not None:
+                article.summary = article.text_summary = desc['content']
+
+    def strip_anchors(self,soup):
+        paras = soup.findAll(True)
+        for para in paras:
+            aTags = para.findAll('a')
+            for a in aTags:
+                if a.img is None:
+                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+        return soup
+
+
+    def preprocess_html(self,soup):
+        #delete empty id attributes--they screw up the TOC for unknown reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+
+        pgall = soup.find('div',attrs={'id':'storyphoto'})
+        if pgall is not None: # photo gallery perhaps
+            if (soup.find('div',attrs={'id':'storycontent'}) is None):
+                allpics = Tag(soup,'div')
+                first_img = pgall.find('div','storyimage')
+                if first_img is not None:
+                    first_img.extract()
+                    tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
+                    if tlist is not None:
+                        for atag in tlist.findAll('a'):
+                            img = Tag(soup,'img')
+                            srcpre, sep, srcpost = atag.img['src'].partition('?')
+                            img['src'] = srcpre
+                            pdesc = Tag(soup,'p')
+                            pdesc.insert(0,atag.img['alt'])
+                            pdesc['class']='photocaption'
+                            div = Tag(soup,'div')
+                            div.insert(0,pdesc)
+                            div.insert(0,img)
+                            allpics.append(div)
+                pgall.replaceWith(allpics)
+            
+        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
+            pg.extract()
+        return self.strip_anchors(soup)

                        

-    feeds          = [
-('News', 
- 'http://rss.canada.com/get/?F297'),
- ('Sports', 
- 'http://rss.canada.com/get/?F299'),
- ('Entertainment', 
- 'http://rss.canada.com/get/?F7366'),
- ('Business', 
- 'http://rss.canada.com/get/?F6939'),
-]
+    def parse_index(self):
+
+        articles = {}
+        ans = []


+        def handle_article(adiv,key):
+            h1tag = adiv.h1
+            if h1tag is not None:
+                atag = h1tag.a
+                if atag is not None:
+                    url = atag['href']
+                    if atag['href'].startswith('http'):
+                        return
+                    elif atag['href'].startswith('/'):
+                        url = self.url_prefix+atag['href']
+                    else:
+                        url = self.url_prefix+'/'+atag['href']
+                    if url in self.url_list:
+                        return
+                    self.url_list.append(url)
+                    title = self.tag_to_string(atag,False)
+                    if 'VIDEO' in title.upper():
+                        return
+                    if 'GALLERY' in title.upper():
+                        return
+                    if 'PHOTOS' in title.upper():
+                        return                  
+                    dtag = adiv.find('div','content')
+                    description=''
+                    print("URL "+url)
+                    print("TITLE "+title)
+                    if dtag is not None:
+                        stag = dtag.span
+                        if stag is not None:
+                            if stag['class'] != 'timestamp':
+                                description = self.tag_to_string(stag,False)
+                        else:
+                            description = self.tag_to_string(dtag,False)
+                        print("DESCRIPTION: "+description)
+                    if not articles.has_key(key):
+                        articles[key] = []
+                    articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
+
+        def parse_web_index(key, keyurl):
+            try:
+                soup = self.index_to_soup(self.url_prefix+keyurl)
+            except:
+                return
+            ans.append(key)
+            mainsoup = soup.find('div','bodywrapper')
+            footer = mainsoup.find(attrs={'id':'footerfeature'})
+            if footer is not None:
+                footer.extract()
+            print("Section: "+key)
+            for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
+                handle_article(wdiv,key)
+                wdiv.extract()
+            for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
+                for adiv in wdiv.findAll('div','featurecontent'):
+                    handle_article(adiv,key)
+
+        for (k,url) in self.postmedia_index_pages:
+            parse_web_index(k,url)
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
                    
--- a/recipes/ottawa_citizen.recipe
+++ b/recipes/ottawa_citizen.recipe
@ -1,105 +1,141 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
-
 __license__   = 'GPL v3'

 '''
 www.canada.com
 '''
-
-import re
+import string, re
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
+
+import string, re
+from calibre import strftime
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag


 class CanWestPaper(BasicNewsRecipe):

-    # un-comment the following four lines for the Victoria Times Colonist
-##    title = u'Victoria Times Colonist'
-##    url_prefix = 'http://www.timescolonist.com'
-##    description = u'News from Victoria, BC'
-##    fp_tag = 'CAN_TC'
+    postmedia_index_pages = [
+        (u'Headlines',u'/index.html'),
+        (u'Ottawa & Area',u'/news/ottawa/index.html'),
+        (u'Vancouver',u'/news/vancouver/index.html'),
+        (u'Calgary',u'/news/calgary/index.html'),
+        (u'Edmonton',u'/news/edmonton/index.html'),
+        (u'Montreal',u'/news/montreal/index.html'),
+        (u'Fraser Valley',u'/news/fraser-valley/index.html'),
+        (u'British Columbia',u'/news/bc/index.html'),
+        (u'Alberta',u'/news/alberta/index.html'),
+        (u'Canada',u'/news/canada/index.html'),
+        (u'National',u'/news/national/index.html'),
+        (u'Politics',u'/news/politics/index.html'),
+        (u'Insight',u'/news/insight/index.html'),
+        (u'Special Reports',u'/news/specialreports/index.html'),
+        (u'Gangs',u'/news/gangs/index.html'),
+        (u'Education',u'/news/education/index.html'),
+        (u'Health',u'/news/health/index.html'),
+        (u'Environment',u'/news/environment/index.html'),
+        (u'World',u'/news/world/index.html'),
+        (u'Police Blotter',u'/news/crime-and-justice/index.html'),
+        (u'Crime',u'/news/blotter/index.html'),
+        (u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
+        (u'Diplomatica',u'/news/diplomatica/index.html'),
+        (u'Opinion',u'/opinion/index.html'),
+        (u'Columnists',u'/columnists/index.html'),
+        (u'Editorials',u'/opinion/editorials/index.html'),
+        (u'Letters',u'/opinion/letters/index.html'),
+        (u'Business',u'/business/index.html'),
+        (u'Sports',u'/sports/index.html'),
+        (u'Arts',u'/entertainment/index.html'),
+        (u'Life',u'/life/index.html'),
+        (u'Technology',u'/technology/index.html'),
+        (u'Travel',u'/travel/index.html'),
+        (u'Health',u'/health/index.html')
+        ]

-    # un-comment the following four lines for the Vancouver Province
+
+    # un-comment the following six lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
-##    fp_tag = 'CAN_VP'
+##    std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
+##    logo_url = 'vplogo.jpg'
+##    fp_tag = 'CAN_TP'

-    # un-comment the following four lines for the Vancouver Sun
+    # un-comment the following six lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
+##    std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
+##    logo_url = 'vslogo.jpg'
 ##    fp_tag = 'CAN_VS'

-    # un-comment the following four lines for the Edmonton Journal
-##    title = u'Edmonton Journal'
-##    url_prefix = 'http://www.edmontonjournal.com'
-##    description = u'News from Edmonton, AB'
-##    fp_tag = 'CAN_EJ'
-
-    # un-comment the following four lines for the Calgary Herald
+    # un-comment the following six lines for the Calgary Herald
 ##    title = u'Calgary Herald'
 ##    url_prefix = 'http://www.calgaryherald.com'
 ##    description = u'News from Calgary, AB'
+##    std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
+##    logo_url = 'chlogo.jpg'
 ##    fp_tag = 'CAN_CH'

-    # un-comment the following four lines for the Regina Leader-Post
-##    title = u'Regina Leader-Post'
-##    url_prefix = 'http://www.leaderpost.com'
-##    description = u'News from Regina, SK'
-##    fp_tag = ''
+    # un-comment the following six lines for the Edmonton Journal
+##    title = u'Edmonton Journal'
+##    url_prefix = 'http://www.edmontonjournal.com'
+##    description = u'News from Edmonton, AB'
+##    std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
+##    logo_url = 'ejlogo.jpg'
+##    fp_tag = 'CAN_EJ'

-    # un-comment the following four lines for the Saskatoon Star-Phoenix
-##    title = u'Saskatoon Star-Phoenix'
-##    url_prefix = 'http://www.thestarphoenix.com'
-##    description = u'News from Saskatoon, SK'
-##    fp_tag = ''
-
-    # un-comment the following four lines for the Windsor Star
-##    title = u'Windsor Star'
-##    url_prefix = 'http://www.windsorstar.com'
-##    description = u'News from Windsor, ON'
-##    fp_tag = 'CAN_'
-
-    # un-comment the following four lines for the Ottawa Citizen
+    # un-comment the following six lines for the Ottawa Citizen
    title = u'Ottawa Citizen'
    url_prefix = 'http://www.ottawacitizen.com'
    description = u'News from Ottawa, ON'   
+    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
+    logo_url = 'oclogo.jpg'
    fp_tag = 'CAN_OC'

-    # un-comment the following four lines for the Montreal Gazette
+    # un-comment the following six lines for the Montreal Gazette
 ##    title = u'Montreal Gazette'
 ##    url_prefix = 'http://www.montrealgazette.com'
 ##    description = u'News from Montreal, QC'
+##    std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
+##    logo_url = 'mglogo.jpg'
 ##    fp_tag = 'CAN_MG'

+    Kindle_Fire=False
+    masthead_url = std_logo_url

+    url_list = []    
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt =  ' [%b %d]'
+    encoding = 'utf-8'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
-                #storyheader h2 { font-size: large;  font-style: italic; }
+                #storyheader h2 { font-size: small;  font-style: italic; }
                .byline { font-size:xx-small; }
-                #photocaption { font-size: small; font-style: italic }
-                #photocredit { font-size: xx-small; }'''
-    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                #photocredit { font-size: xx-small; font-weight: normal; }'''
+    
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
+
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='h2', attrs={'id':'photocredit'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]

+
    def get_cover_url(self):
-        from datetime import timedelta, date
-        if self.fp_tag=='':
-            return None
+        from datetime import timedelta, datetime, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
            cover = None
        return cover

+    def prepare_masthead_image(self, path_to_image, out_path):
+        if self.Kindle_Fire:
+            from calibre import fit_image
+            from calibre.utils.magick import Image, create_canvas
+            img = Image()
+            img.open(path_to_image)
+            width, height = img.size
+            img2 = create_canvas(width, height)
+            img2.compose(img)
+            img2.save(out_path)
+        else:
+            BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
+
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup

+
    def preprocess_html(self,soup):
+        #delete empty id attributes--they screw up the TOC for unknown reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+
+        pgall = soup.find('div',attrs={'id':'storyphoto'})
+        if pgall is not None: # photo gallery perhaps
+            if (soup.find('div',attrs={'id':'storycontent'}) is None):
+                allpics = Tag(soup,'div')
+                first_img = pgall.find('div','storyimage')
+                if first_img is not None:
+                    first_img.extract()
+                    tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
+                    if tlist is not None:
+                        for atag in tlist.findAll('a'):
+                            img = Tag(soup,'img')
+                            srcpre, sep, srcpost = atag.img['src'].partition('?')
+                            img['src'] = srcpre
+                            pdesc = Tag(soup,'p')
+                            pdesc.insert(0,atag.img['alt'])
+                            pdesc['class']='photocaption'
+                            div = Tag(soup,'div')
+                            div.insert(0,pdesc)
+                            div.insert(0,img)
+                            allpics.append(div)
+                pgall.replaceWith(allpics)
+            
+        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
+            pg.extract()
        return self.strip_anchors(soup)

                        

    def parse_index(self):
-        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')

        articles = {}
-        key = 'News'
-        ans = ['News']
+        ans = []

-        # Find each instance of class="sectiontitle", class="featurecontent"
-        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
-                #self.log(" div class = %s" % divtag['class'])
-                if divtag['class'].startswith('section_title'):
-                    # div contains section title
-                    if not divtag.h3:
-                        continue
-                    key = self.tag_to_string(divtag.h3,False)
-                    ans.append(key)
-                    self.log("Section name %s" % key)
-                    continue
-                # div contains article data
-                h1tag = divtag.find('h1')
-                if not h1tag:
-                    continue
-                atag = h1tag.find('a',href=True)
-                if not atag:
-                    continue
-                url = self.url_prefix+'/news/todays-paper/'+atag['href']
-                #self.log("Section %s" % key)
-                #self.log("url %s" % url)
+
+        def handle_article(adiv,key):
+            h1tag = adiv.h1
+            if h1tag is not None:
+                atag = h1tag.a
+                if atag is not None:
+                    url = atag['href']
+                    if atag['href'].startswith('http'):
+                        return
+                    elif atag['href'].startswith('/'):
+                        url = self.url_prefix+atag['href']
+                    else:
+                        url = self.url_prefix+'/'+atag['href']
+                    if url in self.url_list:
+                        return
+                    self.url_list.append(url)
                    title = self.tag_to_string(atag,False)
-                #self.log("title %s" % title)
-                pubdate = ''
+                    if 'VIDEO' in title.upper():
+                        return
+                    if 'GALLERY' in title.upper():
+                        return
+                    if 'PHOTOS' in title.upper():
+                        return                  
+                    dtag = adiv.find('div','content')
                    description=''
-                ptag = divtag.find('p');
-                if ptag:
-                    description = self.tag_to_string(ptag,False)
-                    #self.log("description %s" % description)
-                author = ''
-                autag = divtag.find('h4')
-                if autag:
-                    author = self.tag_to_string(autag,False)
-                    #self.log("author %s" % author)
+                    print("URL "+url)
+                    print("TITLE "+title)
+                    if dtag is not None:
+                        stag = dtag.span
+                        if stag is not None:
+                            if stag['class'] != 'timestamp':
+                                description = self.tag_to_string(stag,False)
+                        else:
+                            description = self.tag_to_string(dtag,False)
+                        print("DESCRIPTION: "+description)
                    if not articles.has_key(key):
                        articles[key] = []
-                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+                    articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))

+        def parse_web_index(key, keyurl):
+            try:
+                soup = self.index_to_soup(self.url_prefix+keyurl)
+            except:
+                return
+            ans.append(key)
+            mainsoup = soup.find('div','bodywrapper')
+            footer = mainsoup.find(attrs={'id':'footerfeature'})
+            if footer is not None:
+                footer.extract()
+            print("Section: "+key)
+            for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
+                handle_article(wdiv,key)
+                wdiv.extract()
+            for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
+                for adiv in wdiv.findAll('div','featurecontent'):
+                    handle_article(adiv,key)
+
+        for (k,url) in self.postmedia_index_pages:
+            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
+                    
--- a/recipes/vancouver_provice.recipe
+++ b/recipes/vancouver_provice.recipe
@ -1,136 +1,320 @@
 #!/usr/bin/env  python
-
+# -*- coding: utf-8 -*-
 __license__   = 'GPL v3'

 '''
 www.canada.com
 '''
+import string, re
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe

+import string, re
+from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag


 class CanWestPaper(BasicNewsRecipe):

-    # un-comment the following three lines for the Vancouver Province
+    postmedia_index_pages = [
+        (u'Headlines',u'/index.html'),
+        (u'Ottawa & Area',u'/news/ottawa/index.html'),
+        (u'Vancouver',u'/news/vancouver/index.html'),
+        (u'Calgary',u'/news/calgary/index.html'),
+        (u'Edmonton',u'/news/edmonton/index.html'),
+        (u'Montreal',u'/news/montreal/index.html'),
+        (u'Fraser Valley',u'/news/fraser-valley/index.html'),
+        (u'British Columbia',u'/news/bc/index.html'),
+        (u'Alberta',u'/news/alberta/index.html'),
+        (u'Canada',u'/news/canada/index.html'),
+        (u'National',u'/news/national/index.html'),
+        (u'Politics',u'/news/politics/index.html'),
+        (u'Insight',u'/news/insight/index.html'),
+        (u'Special Reports',u'/news/specialreports/index.html'),
+        (u'Gangs',u'/news/gangs/index.html'),
+        (u'Education',u'/news/education/index.html'),
+        (u'Health',u'/news/health/index.html'),
+        (u'Environment',u'/news/environment/index.html'),
+        (u'World',u'/news/world/index.html'),
+        (u'Police Blotter',u'/news/crime-and-justice/index.html'),
+        (u'Crime',u'/news/blotter/index.html'),
+        (u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
+        (u'Diplomatica',u'/news/diplomatica/index.html'),
+        (u'Opinion',u'/opinion/index.html'),
+        (u'Columnists',u'/columnists/index.html'),
+        (u'Editorials',u'/opinion/editorials/index.html'),
+        (u'Letters',u'/opinion/letters/index.html'),
+        (u'Business',u'/business/index.html'),
+        (u'Sports',u'/sports/index.html'),
+        (u'Arts',u'/entertainment/index.html'),
+        (u'Life',u'/life/index.html'),
+        (u'Technology',u'/technology/index.html'),
+        (u'Travel',u'/travel/index.html'),
+        (u'Health',u'/health/index.html')
+        ]
+
+
+    # un-comment the following six lines for the Vancouver Province
    title = u'Vancouver Province'
    url_prefix = 'http://www.theprovince.com'
    description = u'News from Vancouver, BC'
+    std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
+    logo_url = 'vplogo.jpg'
+    fp_tag = 'CAN_TP'

-    # un-comment the following three lines for the Vancouver Sun
-    #title = u'Vancouver Sun'
-    #url_prefix = 'http://www.vancouversun.com'
-    #description = u'News from Vancouver, BC'
+    # un-comment the following six lines for the Vancouver Sun
+##    title = u'Vancouver Sun'
+##    url_prefix = 'http://www.vancouversun.com'
+##    description = u'News from Vancouver, BC'
+##    std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
+##    logo_url = 'vslogo.jpg'
+##    fp_tag = 'CAN_VS'

-    # un-comment the following three lines for the Edmonton Journal
-    #title = u'Edmonton Journal'
-    #url_prefix = 'http://www.edmontonjournal.com'
-    #description = u'News from Edmonton, AB'
+    # un-comment the following six lines for the Calgary Herald
+##    title = u'Calgary Herald'
+##    url_prefix = 'http://www.calgaryherald.com'
+##    description = u'News from Calgary, AB'
+##    std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
+##    logo_url = 'chlogo.jpg'
+##    fp_tag = 'CAN_CH'

-    # un-comment the following three lines for the Calgary Herald
-    #title = u'Calgary Herald'
-    #url_prefix = 'http://www.calgaryherald.com'
-    #description = u'News from Calgary, AB'
+    # un-comment the following six lines for the Edmonton Journal
+##    title = u'Edmonton Journal'
+##    url_prefix = 'http://www.edmontonjournal.com'
+##    description = u'News from Edmonton, AB'
+##    std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
+##    logo_url = 'ejlogo.jpg'
+##    fp_tag = 'CAN_EJ'

-    # un-comment the following three lines for the Regina Leader-Post
-    #title = u'Regina Leader-Post'
-    #url_prefix = 'http://www.leaderpost.com'
-    #description = u'News from Regina, SK'
+    # un-comment the following six lines for the Ottawa Citizen
+##    title = u'Ottawa Citizen'
+##    url_prefix = 'http://www.ottawacitizen.com'
+##    description = u'News from Ottawa, ON'   
+##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
+##    logo_url = 'oclogo.jpg'
+##    fp_tag = 'CAN_OC'

-    # un-comment the following three lines for the Saskatoon Star-Phoenix
-    #title = u'Saskatoon Star-Phoenix'
-    #url_prefix = 'http://www.thestarphoenix.com'
-    #description = u'News from Saskatoon, SK'
-
-    # un-comment the following three lines for the Windsor Star
-    #title = u'Windsor Star'
-    #url_prefix = 'http://www.windsorstar.com'
-    #description = u'News from Windsor, ON'
-
-    # un-comment the following three lines for the Ottawa Citizen
-    #title = u'Ottawa Citizen'
-    #url_prefix = 'http://www.ottawacitizen.com'
-    #description = u'News from Ottawa, ON'
-
-    # un-comment the following three lines for the Montreal Gazette
-    #title = u'Montreal Gazette'
-    #url_prefix = 'http://www.montrealgazette.com'
-    #description = u'News from Montreal, QC'
+    # un-comment the following six lines for the Montreal Gazette
+##    title = u'Montreal Gazette'
+##    url_prefix = 'http://www.montrealgazette.com'
+##    description = u'News from Montreal, QC'
+##    std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
+##    logo_url = 'mglogo.jpg'
+##    fp_tag = 'CAN_MG'

+    Kindle_Fire=False
+    masthead_url = std_logo_url

+    url_list = []    
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt =  ' [%b %d]'
+    encoding = 'utf-8'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
-                #storyheader h2 { font-size: large;  font-style: italic; }
+                #storyheader h2 { font-size: small;  font-style: italic; }
                .byline { font-size:xx-small; }
-                #photocaption { font-size: small; font-style: italic }
-                #photocredit { font-size: xx-small; }'''
-    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                #photocredit { font-size: xx-small; font-weight: normal; }'''
+    
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
+
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='h2', attrs={'id':'photocredit'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]

+
+    def get_cover_url(self):
+        from datetime import timedelta, datetime, date
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
+        br = BasicNewsRecipe.get_browser()
+        daysback=1
+        try:
+            br.open(cover)
+        except:
+            while daysback<7:
+                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
+                br = BasicNewsRecipe.get_browser()
+                try:
+                    br.open(cover)
+                except:
+                    daysback = daysback+1
+                    continue
+                break
+        if daysback==7:
+            self.log("\nCover unavailable")
+            cover = None
+        return cover
+
+    def prepare_masthead_image(self, path_to_image, out_path):
+        if self.Kindle_Fire:
+            from calibre import fit_image
+            from calibre.utils.magick import Image, create_canvas
+            img = Image()
+            img.open(path_to_image)
+            width, height = img.size
+            img2 = create_canvas(width, height)
+            img2.compose(img)
+            img2.save(out_path)
+        else:
+            BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
+
+    def fixChars(self,string):
+        # Replace lsquo (\x91)
+        fixed = re.sub("\x91","‘",string)
+        # Replace rsquo (\x92)
+        fixed = re.sub("\x92","’",fixed)
+        # Replace ldquo (\x93)
+        fixed = re.sub("\x93","“",fixed)
+        # Replace rdquo (\x94)
+        fixed = re.sub("\x94","”",fixed)
+        # Replace ndash (\x96)
+        fixed = re.sub("\x96","–",fixed)
+        # Replace mdash (\x97)
+        fixed = re.sub("\x97","—",fixed)
+        fixed = re.sub("&#x2019;","’",fixed)
+        return fixed
+
+    def massageNCXText(self, description):
+        # Kindle TOC descriptions won't render certain characters
+        if description:
+            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
+            # Replace '&' with '&'
+            massaged = re.sub("&","&", massaged)
+            return self.fixChars(massaged)
+        else:
+            return description
+
+    def populate_article_metadata(self, article, soup, first):
+        if first:
+            picdiv = soup.find('body').find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
+        xtitle = article.text_summary.strip()
+        if len(xtitle) == 0:
+            desc = soup.find('meta',attrs={'property':'og:description'})
+            if desc is not None:
+                article.summary = article.text_summary = desc['content']
+
+    def strip_anchors(self,soup):
+        paras = soup.findAll(True)
+        for para in paras:
+            aTags = para.findAll('a')
+            for a in aTags:
+                if a.img is None:
+                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+        return soup
+
+
    def preprocess_html(self,soup):
-        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        #delete empty id attributes--they screw up the TOC for unknown reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
-        return soup
+
+        pgall = soup.find('div',attrs={'id':'storyphoto'})
+        if pgall is not None: # photo gallery perhaps
+            if (soup.find('div',attrs={'id':'storycontent'}) is None):
+                allpics = Tag(soup,'div')
+                first_img = pgall.find('div','storyimage')
+                if first_img is not None:
+                    first_img.extract()
+                    tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
+                    if tlist is not None:
+                        for atag in tlist.findAll('a'):
+                            img = Tag(soup,'img')
+                            srcpre, sep, srcpost = atag.img['src'].partition('?')
+                            img['src'] = srcpre
+                            pdesc = Tag(soup,'p')
+                            pdesc.insert(0,atag.img['alt'])
+                            pdesc['class']='photocaption'
+                            div = Tag(soup,'div')
+                            div.insert(0,pdesc)
+                            div.insert(0,img)
+                            allpics.append(div)
+                pgall.replaceWith(allpics)
+            
+        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
+            pg.extract()
+        return self.strip_anchors(soup)
+
                        

    def parse_index(self):
-        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')

        articles = {}
-        key = 'News'
-        ans = ['News']
+        ans = []

-        # Find each instance of class="sectiontitle", class="featurecontent"
-        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
-                #self.log(" div class = %s" % divtag['class'])
-                if divtag['class'].startswith('section_title'):
-                    # div contains section title
-                    if not divtag.h3:
-                        continue
-                    key = self.tag_to_string(divtag.h3,False)
-                    ans.append(key)
-                    self.log("Section name %s" % key)
-                    continue
-                # div contains article data
-                h1tag = divtag.find('h1')
-                if not h1tag:
-                    continue
-                atag = h1tag.find('a',href=True)
-                if not atag:
-                    continue
-                url = self.url_prefix+'/news/todays-paper/'+atag['href']
-                #self.log("Section %s" % key)
-                #self.log("url %s" % url)
+
+        def handle_article(adiv,key):
+            h1tag = adiv.h1
+            if h1tag is not None:
+                atag = h1tag.a
+                if atag is not None:
+                    url = atag['href']
+                    if atag['href'].startswith('http'):
+                        return
+                    elif atag['href'].startswith('/'):
+                        url = self.url_prefix+atag['href']
+                    else:
+                        url = self.url_prefix+'/'+atag['href']
+                    if url in self.url_list:
+                        return
+                    self.url_list.append(url)
                    title = self.tag_to_string(atag,False)
-                #self.log("title %s" % title)
-                pubdate = ''
+                    if 'VIDEO' in title.upper():
+                        return
+                    if 'GALLERY' in title.upper():
+                        return
+                    if 'PHOTOS' in title.upper():
+                        return                  
+                    dtag = adiv.find('div','content')
                    description=''
-                ptag = divtag.find('p');
-                if ptag:
-                    description = self.tag_to_string(ptag,False)
-                    #self.log("description %s" % description)
-                author = ''
-                autag = divtag.find('h4')
-                if autag:
-                    author = self.tag_to_string(autag,False)
-                    #self.log("author %s" % author)
+                    print("URL "+url)
+                    print("TITLE "+title)
+                    if dtag is not None:
+                        stag = dtag.span
+                        if stag is not None:
+                            if stag['class'] != 'timestamp':
+                                description = self.tag_to_string(stag,False)
+                        else:
+                            description = self.tag_to_string(dtag,False)
+                        print("DESCRIPTION: "+description)
                    if not articles.has_key(key):
                        articles[key] = []
-                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+                    articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))

+        def parse_web_index(key, keyurl):
+            try:
+                soup = self.index_to_soup(self.url_prefix+keyurl)
+            except:
+                return
+            ans.append(key)
+            mainsoup = soup.find('div','bodywrapper')
+            footer = mainsoup.find(attrs={'id':'footerfeature'})
+            if footer is not None:
+                footer.extract()
+            print("Section: "+key)
+            for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
+                handle_article(wdiv,key)
+                wdiv.extract()
+            for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
+                for adiv in wdiv.findAll('div','featurecontent'):
+                    handle_article(adiv,key)
+
+        for (k,url) in self.postmedia_index_pages:
+            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
+                    
--- a/recipes/vancouver_sun.recipe
+++ b/recipes/vancouver_sun.recipe
@ -1,105 +1,141 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
-
 __license__   = 'GPL v3'

 '''
 www.canada.com
 '''
-
-import re
+import string, re
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
+
+import string, re
+from calibre import strftime
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag


 class CanWestPaper(BasicNewsRecipe):

-    # un-comment the following four lines for the Victoria Times Colonist
-##    title = u'Victoria Times Colonist'
-##    url_prefix = 'http://www.timescolonist.com'
-##    description = u'News from Victoria, BC'
-##    fp_tag = 'CAN_TC'
+    postmedia_index_pages = [
+        (u'Headlines',u'/index.html'),
+        (u'Ottawa & Area',u'/news/ottawa/index.html'),
+        (u'Vancouver',u'/news/vancouver/index.html'),
+        (u'Calgary',u'/news/calgary/index.html'),
+        (u'Edmonton',u'/news/edmonton/index.html'),
+        (u'Montreal',u'/news/montreal/index.html'),
+        (u'Fraser Valley',u'/news/fraser-valley/index.html'),
+        (u'British Columbia',u'/news/bc/index.html'),
+        (u'Alberta',u'/news/alberta/index.html'),
+        (u'Canada',u'/news/canada/index.html'),
+        (u'National',u'/news/national/index.html'),
+        (u'Politics',u'/news/politics/index.html'),
+        (u'Insight',u'/news/insight/index.html'),
+        (u'Special Reports',u'/news/specialreports/index.html'),
+        (u'Gangs',u'/news/gangs/index.html'),
+        (u'Education',u'/news/education/index.html'),
+        (u'Health',u'/news/health/index.html'),
+        (u'Environment',u'/news/environment/index.html'),
+        (u'World',u'/news/world/index.html'),
+        (u'Police Blotter',u'/news/crime-and-justice/index.html'),
+        (u'Crime',u'/news/blotter/index.html'),
+        (u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
+        (u'Diplomatica',u'/news/diplomatica/index.html'),
+        (u'Opinion',u'/opinion/index.html'),
+        (u'Columnists',u'/columnists/index.html'),
+        (u'Editorials',u'/opinion/editorials/index.html'),
+        (u'Letters',u'/opinion/letters/index.html'),
+        (u'Business',u'/business/index.html'),
+        (u'Sports',u'/sports/index.html'),
+        (u'Arts',u'/entertainment/index.html'),
+        (u'Life',u'/life/index.html'),
+        (u'Technology',u'/technology/index.html'),
+        (u'Travel',u'/travel/index.html'),
+        (u'Health',u'/health/index.html')
+        ]

-    # un-comment the following four lines for the Vancouver Province
+
+    # un-comment the following six lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
-##    fp_tag = 'CAN_VP'
+##    std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
+##    logo_url = 'vplogo.jpg'
+##    fp_tag = 'CAN_TP'

-    # un-comment the following four lines for the Vancouver Sun
+    # un-comment the following six lines for the Vancouver Sun
    title = u'Vancouver Sun'
    url_prefix = 'http://www.vancouversun.com'
    description = u'News from Vancouver, BC'
+    std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
+    logo_url = 'vslogo.jpg'
    fp_tag = 'CAN_VS'

-    # un-comment the following four lines for the Edmonton Journal
-##    title = u'Edmonton Journal'
-##    url_prefix = 'http://www.edmontonjournal.com'
-##    description = u'News from Edmonton, AB'
-##    fp_tag = 'CAN_EJ'
-
-    # un-comment the following four lines for the Calgary Herald
+    # un-comment the following six lines for the Calgary Herald
 ##    title = u'Calgary Herald'
 ##    url_prefix = 'http://www.calgaryherald.com'
 ##    description = u'News from Calgary, AB'
+##    std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
+##    logo_url = 'chlogo.jpg'
 ##    fp_tag = 'CAN_CH'

-    # un-comment the following four lines for the Regina Leader-Post
-##    title = u'Regina Leader-Post'
-##    url_prefix = 'http://www.leaderpost.com'
-##    description = u'News from Regina, SK'
-##    fp_tag = ''
+    # un-comment the following six lines for the Edmonton Journal
+##    title = u'Edmonton Journal'
+##    url_prefix = 'http://www.edmontonjournal.com'
+##    description = u'News from Edmonton, AB'
+##    std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
+##    logo_url = 'ejlogo.jpg'
+##    fp_tag = 'CAN_EJ'

-    # un-comment the following four lines for the Saskatoon Star-Phoenix
-##    title = u'Saskatoon Star-Phoenix'
-##    url_prefix = 'http://www.thestarphoenix.com'
-##    description = u'News from Saskatoon, SK'
-##    fp_tag = ''
-
-    # un-comment the following four lines for the Windsor Star
-##    title = u'Windsor Star'
-##    url_prefix = 'http://www.windsorstar.com'
-##    description = u'News from Windsor, ON'
-##    fp_tag = 'CAN_'
-
-    # un-comment the following four lines for the Ottawa Citizen
+    # un-comment the following six lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
 ##    description = u'News from Ottawa, ON'   
+##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
+##    logo_url = 'oclogo.jpg'
 ##    fp_tag = 'CAN_OC'

-    # un-comment the following four lines for the Montreal Gazette
+    # un-comment the following six lines for the Montreal Gazette
 ##    title = u'Montreal Gazette'
 ##    url_prefix = 'http://www.montrealgazette.com'
 ##    description = u'News from Montreal, QC'
+##    std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
+##    logo_url = 'mglogo.jpg'
 ##    fp_tag = 'CAN_MG'

+    Kindle_Fire=False
+    masthead_url = std_logo_url

+    url_list = []    
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt =  ' [%b %d]'
+    encoding = 'utf-8'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
-                #storyheader h2 { font-size: large;  font-style: italic; }
+                #storyheader h2 { font-size: small;  font-style: italic; }
                .byline { font-size:xx-small; }
-                #photocaption { font-size: small; font-style: italic }
-                #photocredit { font-size: xx-small; }'''
-    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
+                #photocredit { font-size: xx-small; font-weight: normal; }'''
+    
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
+
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='h2', attrs={'id':'photocredit'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]

+
    def get_cover_url(self):
-        from datetime import timedelta, date
-        if self.fp_tag=='':
-            return None
+        from datetime import timedelta, datetime, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
            cover = None
        return cover

+    def prepare_masthead_image(self, path_to_image, out_path):
+        if self.Kindle_Fire:
+            from calibre import fit_image
+            from calibre.utils.magick import Image, create_canvas
+            img = Image()
+            img.open(path_to_image)
+            width, height = img.size
+            img2 = create_canvas(width, height)
+            img2.compose(img)
+            img2.save(out_path)
+        else:
+            BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
+
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup

+
    def preprocess_html(self,soup):
+        #delete empty id attributes--they screw up the TOC for unknown reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+
+        pgall = soup.find('div',attrs={'id':'storyphoto'})
+        if pgall is not None: # photo gallery perhaps
+            if (soup.find('div',attrs={'id':'storycontent'}) is None):
+                allpics = Tag(soup,'div')
+                first_img = pgall.find('div','storyimage')
+                if first_img is not None:
+                    first_img.extract()
+                    tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
+                    if tlist is not None:
+                        for atag in tlist.findAll('a'):
+                            img = Tag(soup,'img')
+                            srcpre, sep, srcpost = atag.img['src'].partition('?')
+                            img['src'] = srcpre
+                            pdesc = Tag(soup,'p')
+                            pdesc.insert(0,atag.img['alt'])
+                            pdesc['class']='photocaption'
+                            div = Tag(soup,'div')
+                            div.insert(0,pdesc)
+                            div.insert(0,img)
+                            allpics.append(div)
+                pgall.replaceWith(allpics)
+            
+        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
+            pg.extract()
        return self.strip_anchors(soup)

                        

    def parse_index(self):
-        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')

        articles = {}
-        key = 'News'
-        ans = ['News']
+        ans = []

-        # Find each instance of class="sectiontitle", class="featurecontent"
-        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
-                #self.log(" div class = %s" % divtag['class'])
-                if divtag['class'].startswith('section_title'):
-                    # div contains section title
-                    if not divtag.h3:
-                        continue
-                    key = self.tag_to_string(divtag.h3,False)
-                    ans.append(key)
-                    self.log("Section name %s" % key)
-                    continue
-                # div contains article data
-                h1tag = divtag.find('h1')
-                if not h1tag:
-                    continue
-                atag = h1tag.find('a',href=True)
-                if not atag:
-                    continue
-                url = self.url_prefix+'/news/todays-paper/'+atag['href']
-                #self.log("Section %s" % key)
-                #self.log("url %s" % url)
+
+        def handle_article(adiv,key):
+            h1tag = adiv.h1
+            if h1tag is not None:
+                atag = h1tag.a
+                if atag is not None:
+                    url = atag['href']
+                    if atag['href'].startswith('http'):
+                        return
+                    elif atag['href'].startswith('/'):
+                        url = self.url_prefix+atag['href']
+                    else:
+                        url = self.url_prefix+'/'+atag['href']
+                    if url in self.url_list:
+                        return
+                    self.url_list.append(url)
                    title = self.tag_to_string(atag,False)
-                #self.log("title %s" % title)
-                pubdate = ''
+                    if 'VIDEO' in title.upper():
+                        return
+                    if 'GALLERY' in title.upper():
+                        return
+                    if 'PHOTOS' in title.upper():
+                        return                  
+                    dtag = adiv.find('div','content')
                    description=''
-                ptag = divtag.find('p');
-                if ptag:
-                    description = self.tag_to_string(ptag,False)
-                    #self.log("description %s" % description)
-                author = ''
-                autag = divtag.find('h4')
-                if autag:
-                    author = self.tag_to_string(autag,False)
-                    #self.log("author %s" % author)
+                    print("URL "+url)
+                    print("TITLE "+title)
+                    if dtag is not None:
+                        stag = dtag.span
+                        if stag is not None:
+                            if stag['class'] != 'timestamp':
+                                description = self.tag_to_string(stag,False)
+                        else:
+                            description = self.tag_to_string(dtag,False)
+                        print("DESCRIPTION: "+description)
                    if not articles.has_key(key):
                        articles[key] = []
-                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+                    articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))

+        def parse_web_index(key, keyurl):
+            try:
+                soup = self.index_to_soup(self.url_prefix+keyurl)
+            except:
+                return
+            ans.append(key)
+            mainsoup = soup.find('div','bodywrapper')
+            footer = mainsoup.find(attrs={'id':'footerfeature'})
+            if footer is not None:
+                footer.extract()
+            print("Section: "+key)
+            for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
+                handle_article(wdiv,key)
+                wdiv.extract()
+            for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
+                for adiv in wdiv.findAll('div','featurecontent'):
+                    handle_article(adiv,key)
+
+        for (k,url) in self.postmedia_index_pages:
+            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
+                    
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -1,35 +1,229 @@
 body { background-color: white; }

+/*
+**      The following rules apply principally to the line items shown in the
+**      Authors, Titles, Genres, Series, and Recently Added sections. Rules for the
+**      Descriptions section are grouped together later in the file.
+**      ------------------------------------------------------------------------
+*/
+
+
+/*
+**   <div> grouping an author's works together
+**   Used in Sections:
+**    Authors
+**
+**	 Minimize widows and orphans by logically grouping chunks
+**   Some reports of problems with Sony (ADE) ereaders
+**	   ADE: page-break-inside:avoid;
+**	iBooks: display:inline-block;
+**		    width:100%;
+*/
+div.author_logical_group {
+	page-break-inside:avoid;
+	}
+
+/*
+**   Force page break when starting new initial letter
+**   Used in Sections:
+**    Authors
+**    Titles
+*/
+div.initial_letter {
+	page-break-before:always;
+	}
+
+/*
+**   Author name
+**   Used in Sections:
+**    Authors
+**    Genres
+**    Recently Added
+*/
+p.author_index {
+    clear:both;
+	font-size:large;
+	font-weight:bold;
+	text-align:left;
+	margin-top:0.25px;
+	margin-bottom:-2px;
+	text-indent: 0em;
+	}
+
+/*
+**   Index letter
+**   Used in Sections:
+**    Authors
+**    Titles
+*/
+p.author_title_letter_index {
+    clear:both;
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:0px;
+	margin-bottom:0px;
+	}
+
+/*
+**   Index letter
+**   Used in Sections:
+**    Series
+*/
+p.series_letter_index {
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:1em;
+	margin-bottom:0px;
+	}
+
+
+/*
+**   Month-Year
+**   Used in Sections:
+**    Recently Added
+*/
+p.date_index {
+    clear:both;
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:1em;
+	margin-bottom:0px;
+	}
+
+p.date_read {
+    clear:both;
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:6em;
+	text-indent:-6em;
+	}
+
+/*
+**   Series name
+**   Used in Sections:
+**    Authors
+**    Series
+**    Genres
+**    Recently Added
+**  Optimized for ePub
+*/
+p.series {
+    clear:both;
+	font-style:italic;
+	margin-top:0.10em;
+	margin-bottom:0em;
+	margin-left:1.5em;
+	text-align:left;
+	text-indent:-1.25em;
+	}
+
+/*
+**   Series name
+**   Used in Sections:
+**    Authors
+**    Series
+**    Genres
+**    Recently Added
+**  Optimized for mobi
+*/
+p.series_mobi {
+    clear:both;
+	font-style:italic;
+	margin-top:0em;
+	margin-bottom:0em;
+    margin-left:0em;
+	text-align:left;
+	text-indent:-30px;
+	}
+
+/*
+**   Section title
+**   Used in Sections:
+**    Authors
+**    Titles
+**    Series
+**    Genres
+**    Recently Added
+**    Descriptions
+*/
+p.title  {
+	margin-top:0em;
+	margin-bottom:0em;
+	text-align:center;
+	font-style:italic;
+	font-size:xx-large;
+	}
+
+/*
+**   Line item book listing
+**   Used in Sections:
+**    Authors
+**    Titles
+**    Series
+**    Genres
+**    Recently Added
+*/
+p.line_item {
+    clear: both;
+    font-family:monospace;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:2em;
+	text-align:left;
+	text-indent:-2em;
+	}
+
+/*
+**   Prefix
+**   Used in Sections:
+**    Authors
+**    Titles
+**    Series
+**    Genres
+**    Recently Added
+*/
+span.prefix {
+    float:left;
+    margin-left: 0.25em;
+    text-align: left;
+    vertical-align: middle;
+    width: 1.5em;
+    }
+
+/*
+**   Book details entry
+**   Used in Sections:
+**    Authors
+**    Titles
+**    Series
+**    Genres
+**    Recently Added
+*/
+span.entry {
+    font-family: serif;
+    vertical-align:middle;
+    }
+
+/*
+**      The following rules apply to Descriptions
+**      -----------------------------------------
+*/
+
+/*
+**   Link to Series
+*/
 a.series_id {
 	font-style:normal;
 	font-size:large;
 	}

 /*
-* 	Minimize widows and orphans by logically grouping chunks
-*   Some reports of problems with Sony (ADE) ereaders
-*	   ADE: page-break-inside:avoid;
-*	iBooks: display:inline-block;
-*		    width:100%;
+**   Various dividers
 */
-div.author_logical_group {
-	page-break-inside:avoid;
-	}
-
-div.description > p:first-child {
-	margin: 0 0 0 0;
-	text-indent: 0em;
-	}
-
-div.description {
-	margin: 0 0 0 0;
-	text-indent: 1em;
-	}
-
-div.initial_letter {
-	page-break-before:always;
-	}
-
 hr.annotations_divider {
 	width:50%;
 	margin-left:1em;
@ -63,47 +257,21 @@ hr.merged_comments_divider {
    border-left: solid white 0px;
 	}

-p.date_read {
-	text-align:left;
-	margin-top:0px;
-	margin-bottom:0px;
-	margin-left:6em;
-	text-indent:-6em;
-	}
-
+/*
+**   Author name
+*/
 p.author {
+    clear:both;
 	font-size:large;
 	margin-top:0em;
-	margin-bottom:0em;
+	margin-bottom:0.1em;
 	text-align: center;
 	text-indent: 0em;
  	}

-p.author_index {
-	font-size:large;
-	font-weight:bold;
-	text-align:left;
-	margin-top:0.25px;
-	margin-bottom:-2px;
-	text-indent: 0em;
-	}
-
-p.author_title_letter_index {
-	font-size:x-large;
-	text-align:center;
-	font-weight:bold;
-	margin-top:0px;
-	margin-bottom:0px;
-	}
-
-p.date_index {
-	font-size:x-large;
-	text-align:center;
-	font-weight:bold;
-	margin-top:1em;
-	margin-bottom:0px;
-	}
-
+/*
+**   Formats
+*/
 p.formats {
 	font-size:90%;
 	margin-top:0em;
@ -112,6 +280,9 @@ p.formats {
 	text-indent: 0.0in;
 	}

+/*
+**   Genres
+*/
 p.genres {
 	font-style:normal;
 	margin-top:0.5em;
@ -120,68 +291,55 @@ p.genres {
 	text-indent: 0.0in;
 	}

-p.series {
-	font-style:italic;
-	margin-top:0.25em;
-	margin-bottom:0em;
-	margin-left:2em;
-	text-align:left;
-	text-indent:-2em;
-	}

+/*
+**   Series name
+*/
 p.series_id {
 	margin-top:0em;
 	margin-bottom:0em;
 	text-align:center;
 	}

-p.series_letter_index {
-	font-size:x-large;
-	text-align:center;
-	font-weight:bold;
-	margin-top:1em;
-	margin-bottom:0px;
-	}
-
-p.title  {
-	margin-top:0em;
-	margin-bottom:0em;
-	text-align:center;
-	font-style:italic;
-	font-size:xx-large;
-	}
-
-p.wishlist_item, p.unread_book, p.read_book, p.line_item {
-    font-family:monospace;
-	margin-top:0px;
-	margin-bottom:0px;
-	margin-left:2em;
-	text-align:left;
-	text-indent:-2em;
-	}
-
-span.prefix {}
-span.entry {
-    font-family: serif;
-    }
-
 /*
-*   Book Descriptions
+**   Publisher, Publication Date
 */
 td.publisher, td.date {
 	font-weight:bold;
 	text-align:center;
 	}

+/*
+**   Rating
+*/
 td.rating{
 	text-align:center;
 	}

+/*
+**   Additional notes
+*/
 td.notes {
 	font-size: 100%;
 	text-align:center;
 	}

+/*
+**   Thumbnail
+*/
 td.thumbnail img {
 	-webkit-box-shadow: 4px 4px 12px #999;
 	}
+
+/*
+**   Comments
+*/
+div.description {
+	margin: 0 0 0 0;
+	text-indent: 1em;
+	}
+div.description > p:first-child {
+	margin: 0 0 0 0;
+	text-indent: 0em;
+	}
+
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -172,6 +172,7 @@ if iswindows:
            [
                'calibre/devices/mtp/windows/utils.cpp',
                'calibre/devices/mtp/windows/device_enumeration.cpp',
+                'calibre/devices/mtp/windows/device.cpp',
                'calibre/devices/mtp/windows/wpd.cpp',
            ],
            headers=[
@ -298,6 +299,7 @@ class Build(Command):
        self.obj_dir = os.path.join(os.path.dirname(SRC), 'build', 'objects')
        if not os.path.exists(self.obj_dir):
            os.makedirs(self.obj_dir)
+        if not opts.only:
            self.build_style(self.j(self.SRC, 'calibre', 'plugins'))
        for ext in extensions:
            if opts.only != 'all' and opts.only != ext.name:
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -38,7 +38,7 @@ binary_includes = [
                '/lib/libz.so.1',
                '/usr/lib/libtiff.so.5',
                '/lib/libbz2.so.1',
-                '/usr/lib/libpoppler.so.25',
+                '/usr/lib/libpoppler.so.27',
                '/usr/lib/libxml2.so.2',
                '/usr/lib/libopenjpeg.so.2',
                '/usr/lib/libxslt.so.1',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -379,7 +379,7 @@ class Py2App(object):
    @flush
    def add_poppler(self):
        info('\nAdding poppler')
-        for x in ('libpoppler.26.dylib',):
+        for x in ('libpoppler.27.dylib',):
            self.install_dylib(os.path.join(SW, 'lib', x))
        for x in ('pdftohtml', 'pdftoppm', 'pdfinfo'):
            self.install_dylib(os.path.join(SW, 'bin', x), False)
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -28,7 +28,8 @@ isosx     = 'darwin' in _plat
 isnewosx  = isosx and getattr(sys, 'new_app_bundle', False)
 isfreebsd = 'freebsd' in _plat
 isnetbsd = 'netbsd' in _plat
-isbsd = isfreebsd or isnetbsd
+isdragonflybsd = 'dragonfly' in _plat
+isbsd = isfreebsd or isnetbsd or isdragonflybsd
 islinux   = not(iswindows or isosx or isbsd)
 isfrozen  = hasattr(sys, 'frozen')
 isunix = isosx or islinux
@ -215,3 +216,13 @@ def get_windows_temp_path():
    ans = buf.value
    return ans if ans else None

+def get_windows_user_locale_name():
+    import ctypes
+    k32 = ctypes.windll.kernel32
+    n = 200
+    buf = ctypes.create_unicode_buffer(u'\0'*n)
+    n = k32.GetUserDefaultLocaleName(buf, n)
+    if n == 0:
+        return None
+    return u'_'.join(buf.value.split(u'-')[:2])
+
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1353,7 +1353,7 @@ class StoreEbookscomStore(StoreBase):

 class StoreEbooksGratuitsStore(StoreBase):
    name = 'EbooksGratuits.com'
-    description = u''
+    description = u'Ebooks Libres et Gratuits'
    actual_plugin = 'calibre.gui2.store.stores.ebooksgratuits_plugin:EbooksGratuitsStore'

    headquarters = 'FR'
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -87,7 +87,7 @@ class ANDROID(USBMS):

            # Google
            0x18d1 : {
-                0x0001 : [0x0223, 0x9999],
+                0x0001 : [0x0223, 0x230, 0x9999],
                0x0003 : [0x0230],
                0x4e11 : [0x0100, 0x226, 0x227],
                0x4e12 : [0x0100, 0x226, 0x227],
@ -196,7 +196,7 @@ class ANDROID(USBMS):
            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
            'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
            'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD',
-            'PMP5097C', 'MASS', 'NOVO7', 'ZEKI']
+            'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -214,7 +214,8 @@ class ANDROID(USBMS):
            'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
            'GT-S5830L_CARD', 'UNIVERSE', 'XT875', 'PRO', '.KOBO_VOX',
            'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE',
-            'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID']
+            'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
+            'S5830I_CARD', 'MID7042']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -224,7 +225,7 @@ class ANDROID(USBMS):
            'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
            'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
-            'USB_FLASH_DRIVER', 'ANDROID']
+            'USB_FLASH_DRIVER', 'ANDROID', 'MID7042']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/errors.py
+++ b/src/calibre/devices/errors.py
@ -92,6 +92,7 @@ class ControlError(ProtocolError):
    def __init__(self, query=None, response=None, desc=None):
        self.query = query
        self.response = response
+        self.desc = desc
        ProtocolError.__init__(self, desc)

    def __str__(self):
--- a/src/calibre/devices/mtp/base.py
+++ b/src/calibre/devices/mtp/base.py
@ -39,6 +39,7 @@ class MTPDeviceBase(DevicePlugin):
    def __init__(self, *args, **kwargs):
        DevicePlugin.__init__(self, *args, **kwargs)
        self.progress_reporter = None
+        self.current_friendly_name = None

    def reset(self, key='-1', log_packets=False, report_progress=None,
            detected_device=None):
@ -47,3 +48,7 @@ class MTPDeviceBase(DevicePlugin):
    def set_progress_reporter(self, report_progress):
        self.progress_reporter = report_progress

+    def get_gui_name(self):
+        return self.current_friendly_name or self.name
+
+
--- a/src/calibre/devices/mtp/unix/driver.py
+++ b/src/calibre/devices/mtp/unix/driver.py
@ -14,7 +14,7 @@ from collections import deque, OrderedDict
 from io import BytesIO

 from calibre import prints
-from calibre.devices.errors import OpenFailed
+from calibre.devices.errors import OpenFailed, DeviceError
 from calibre.devices.mtp.base import MTPDeviceBase, synchronous
 from calibre.devices.mtp.unix.detect import MTPDetect

@ -102,11 +102,6 @@ class MTP_DEVICE(MTPDeviceBase):
        if self.progress_reporter is not None:
            self.progress_reporter(p)

-    @synchronous
-    def get_gui_name(self):
-        if self.dev is None or not self.dev.friendly_name: return self.name
-        return self.dev.friendly_name
-
    @synchronous
    def is_usb_connected(self, devices_on_system, debug=False,
            only_presence=False):
@ -134,7 +129,7 @@ class MTP_DEVICE(MTPDeviceBase):

    @synchronous
    def post_yank_cleanup(self):
-        self.dev = self.filesystem_cache = None
+        self.dev = self.filesystem_cache = self.current_friendly_name = None

    @synchronous
    def startup(self):
@ -184,15 +179,18 @@ class MTP_DEVICE(MTPDeviceBase):
            self._carda_id = storage[1]['id']
        if len(storage) > 2:
            self._cardb_id = storage[2]['id']
+        self.current_friendly_name = self.dev.name

+    @synchronous
+    def read_filesystem_cache(self):
        try:
            files, errs = self.dev.get_filelist(self)
            if errs and not files:
-                raise OpenFailed('Failed to read files from device. Underlying errors:\n'
+                raise DeviceError('Failed to read files from device. Underlying errors:\n'
                        +self.format_errorstack(errs))
            folders, errs = self.dev.get_folderlist()
            if errs and not folders:
-                raise OpenFailed('Failed to read folders from device. Underlying errors:\n'
+                raise DeviceError('Failed to read folders from device. Underlying errors:\n'
                        +self.format_errorstack(errs))
            self.filesystem_cache = FilesystemCache(files, folders)
        except:
@ -202,15 +200,15 @@ class MTP_DEVICE(MTPDeviceBase):
    @synchronous
    def get_device_information(self, end_session=True):
        d = self.dev
-        return (d.friendly_name, d.device_version, d.device_version, '')
+        return (self.current_friendly_name, d.device_version, d.device_version, '')

    @synchronous
    def card_prefix(self, end_session=True):
        ans = [None, None]
        if self._carda_id is not None:
-            ans[0] = 'mtp:%d:'%self._carda_id
+            ans[0] = 'mtp:::%d:::'%self._carda_id
        if self._cardb_id is not None:
-            ans[1] = 'mtp:%d:'%self._cardb_id
+            ans[1] = 'mtp:::%d:::'%self._cardb_id
        return tuple(ans)

    @synchronous
@ -248,6 +246,7 @@ if __name__ == '__main__':
    devs = linux_scanner()
    mtp_devs = dev.detect(devs)
    dev.open(list(mtp_devs)[0], 'xxx')
+    dev.read_filesystem_cache()
    d = dev.dev
    print ("Opened device:", dev.get_gui_name())
    print ("Storage info:")
--- a/src/calibre/devices/mtp/unix/libmtp.c
+++ b/src/calibre/devices/mtp/unix/libmtp.c
@ -1,3 +1,11 @@
+/*
+ * libmtp.c
+ * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+
 #define UNICODE
 #include <Python.h>

--- a/src/calibre/devices/mtp/windows/device.cpp
+++ b/src/calibre/devices/mtp/windows/device.cpp
@ -0,0 +1,137 @@
+/*
+ * device.cpp
+ * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#include "global.h"
+
+extern IPortableDevice* wpd::open_device(const wchar_t *pnp_id, IPortableDeviceValues *client_information);
+extern IPortableDeviceValues* wpd::get_client_information();
+extern PyObject* wpd::get_device_information(IPortableDevice *device);
+
+using namespace wpd;
+// Device.__init__() {{{
+static void
+dealloc(Device* self)
+{
+    if (self->pnp_id != NULL) free(self->pnp_id);
+    self->pnp_id = NULL;
+
+    if (self->device != NULL) { 
+        Py_BEGIN_ALLOW_THREADS;
+        self->device->Close(); self->device->Release();
+        self->device = NULL; 
+        Py_END_ALLOW_THREADS;
+    }
+
+    if (self->client_information != NULL) { self->client_information->Release(); self->client_information = NULL; }
+
+    Py_XDECREF(self->device_information); self->device_information = NULL;
+
+    self->ob_type->tp_free((PyObject*)self);
+}
+
+static int
+init(Device *self, PyObject *args, PyObject *kwds)
+{
+    PyObject *pnp_id;
+    int ret = -1;
+
+    if (!PyArg_ParseTuple(args, "O", &pnp_id)) return -1;
+
+    self->pnp_id = unicode_to_wchar(pnp_id);
+    if (self->pnp_id == NULL) return -1;
+
+    self->client_information = get_client_information();
+    if (self->client_information != NULL) {
+        self->device = open_device(self->pnp_id, self->client_information);
+        if (self->device != NULL) {
+            self->device_information = get_device_information(self->device);
+            if (self->device_information != NULL) ret = 0;
+        }
+    }
+
+    return ret;
+}
+
+// }}}
+
+// update_device_data() {{{
+static PyObject*
+update_data(Device *self, PyObject *args, PyObject *kwargs) {
+    PyObject *di = NULL;
+    di = get_device_information(self->device);
+    if (di == NULL) return NULL;
+    Py_XDECREF(self->device_information); self->device_information = di;
+    Py_RETURN_NONE;
+} // }}}
+ 
+static PyMethodDef Device_methods[] = {
+    {"update_data", (PyCFunction)update_data, METH_VARARGS,
+     "update_data() -> Reread the basic device data from the device (total, space, free space, storage locations, etc.)"
+    },
+
+    {NULL}
+};
+
+// Device.data {{{
+static PyObject *
+Device_data(Device *self, void *closure) {
+    Py_INCREF(self->device_information); return self->device_information;
+} // }}}
+
+
+static PyGetSetDef Device_getsetters[] = {
+    {(char *)"data", 
+     (getter)Device_data, NULL,
+     (char *)"The basic device information.",
+     NULL},
+
+    {NULL}  /* Sentinel */
+};
+
+
+PyTypeObject wpd::DeviceType = { // {{{
+    PyObject_HEAD_INIT(NULL)
+    0,                         /*ob_size*/
+    "wpd.Device",            /*tp_name*/
+    sizeof(Device),      /*tp_basicsize*/
+    0,                         /*tp_itemsize*/
+    (destructor)dealloc, /*tp_dealloc*/
+    0,                         /*tp_print*/
+    0,                         /*tp_getattr*/
+    0,                         /*tp_setattr*/
+    0,                         /*tp_compare*/
+    0,                         /*tp_repr*/
+    0,                         /*tp_as_number*/
+    0,                         /*tp_as_sequence*/
+    0,                         /*tp_as_mapping*/
+    0,                         /*tp_hash */
+    0,                         /*tp_call*/
+    0,                         /*tp_str*/
+    0,                         /*tp_getattro*/
+    0,                         /*tp_setattro*/
+    0,                         /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,        /*tp_flags*/
+    "Device",                  /* tp_doc */
+    0,		               /* tp_traverse */
+    0,		               /* tp_clear */
+    0,		               /* tp_richcompare */
+    0,		               /* tp_weaklistoffset */
+    0,		               /* tp_iter */
+    0,		               /* tp_iternext */
+    Device_methods,             /* tp_methods */
+    0,             /* tp_members */
+    Device_getsetters,                         /* tp_getset */
+    0,                         /* tp_base */
+    0,                         /* tp_dict */
+    0,                         /* tp_descr_get */
+    0,                         /* tp_descr_set */
+    0,                         /* tp_dictoffset */
+    (initproc)init,      /* tp_init */
+    0,                         /* tp_alloc */
+    0,                 /* tp_new */
+}; // }}}
+
--- a/src/calibre/devices/mtp/windows/device_enumeration.cpp
+++ b/src/calibre/devices/mtp/windows/device_enumeration.cpp
@ -2,7 +2,7 @@
 * device_enumeration.cpp
 * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
 *
- * Distributed under terms of the MIT license.
+ * Distributed under terms of the GPL3 license.
 */

 #include "global.h"
@ -72,15 +72,131 @@ IPortableDevice *open_device(const wchar_t *pnp_id, IPortableDeviceValues *clien

 } // }}}

+PyObject* get_storage_info(IPortableDevice *device) { // {{{
+    HRESULT hr, hr2;
+    IPortableDeviceContent *content = NULL;
+    IEnumPortableDeviceObjectIDs *objects = NULL;
+    IPortableDeviceProperties *properties = NULL;
+    IPortableDeviceKeyCollection *storage_properties = NULL;
+    IPortableDeviceValues *values = NULL;
+    PyObject *ans = NULL, *storage = NULL, *so = NULL, *desc = NULL, *soid = NULL;
+    DWORD fetched, i;
+    PWSTR object_ids[10];
+    GUID guid;
+    ULONGLONG capacity, free_space, capacity_objects, free_objects;
+    ULONG access;
+    LPWSTR storage_desc = NULL;
+
+    storage = PyList_New(0);
+    if (storage == NULL) { PyErr_NoMemory(); goto end; }
+
+    Py_BEGIN_ALLOW_THREADS;
+    hr = device->Content(&content);
+    Py_END_ALLOW_THREADS;
+    if (FAILED(hr)) {hresult_set_exc("Failed to get content interface from device", hr); goto end;}
+
+    Py_BEGIN_ALLOW_THREADS;
+    hr = content->Properties(&properties);
+    Py_END_ALLOW_THREADS;
+    if (FAILED(hr)) {hresult_set_exc("Failed to get properties interface", hr); goto end;}
+
+    Py_BEGIN_ALLOW_THREADS;
+    hr = CoCreateInstance(CLSID_PortableDeviceKeyCollection, NULL,
+            CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&storage_properties));
+    Py_END_ALLOW_THREADS;
+    if (FAILED(hr)) {hresult_set_exc("Failed to create storage properties collection", hr); goto end;}
+
+    Py_BEGIN_ALLOW_THREADS;
+    hr = storage_properties->Add(WPD_OBJECT_CONTENT_TYPE);
+    hr = storage_properties->Add(WPD_FUNCTIONAL_OBJECT_CATEGORY);
+    hr = storage_properties->Add(WPD_STORAGE_DESCRIPTION);
+    hr = storage_properties->Add(WPD_STORAGE_CAPACITY);
+    hr = storage_properties->Add(WPD_STORAGE_CAPACITY_IN_OBJECTS);
+    hr = storage_properties->Add(WPD_STORAGE_FREE_SPACE_IN_BYTES);
+    hr = storage_properties->Add(WPD_STORAGE_FREE_SPACE_IN_OBJECTS);
+    hr = storage_properties->Add(WPD_STORAGE_ACCESS_CAPABILITY);
+    hr = storage_properties->Add(WPD_STORAGE_FILE_SYSTEM_TYPE);
+    hr = storage_properties->Add(WPD_OBJECT_NAME);
+    Py_END_ALLOW_THREADS;
+    if (FAILED(hr)) {hresult_set_exc("Failed to create collection of properties for storage query", hr); goto end; }
+
+    Py_BEGIN_ALLOW_THREADS;
+    hr = content->EnumObjects(0, WPD_DEVICE_OBJECT_ID, NULL, &objects);
+    Py_END_ALLOW_THREADS;
+    if (FAILED(hr)) {hresult_set_exc("Failed to get objects from device", hr); goto end;}
+
+    hr = S_OK;
+    while (hr == S_OK) {
+        Py_BEGIN_ALLOW_THREADS;
+        hr = objects->Next(10, object_ids, &fetched);
+        Py_END_ALLOW_THREADS;
+        if (SUCCEEDED(hr)) {
+            for(i = 0; i < fetched; i++) {
+                Py_BEGIN_ALLOW_THREADS;
+                hr2 = properties->GetValues(object_ids[i], storage_properties, &values);
+                Py_END_ALLOW_THREADS;
+                if SUCCEEDED(hr2) {
+                    if (
+                        SUCCEEDED(values->GetGuidValue(WPD_OBJECT_CONTENT_TYPE, &guid)) && IsEqualGUID(guid, WPD_CONTENT_TYPE_FUNCTIONAL_OBJECT) &&
+                        SUCCEEDED(values->GetGuidValue(WPD_FUNCTIONAL_OBJECT_CATEGORY, &guid)) && IsEqualGUID(guid, WPD_FUNCTIONAL_CATEGORY_STORAGE)
+                       ) {
+                        capacity = 0; capacity_objects = 0; free_space = 0; free_objects = 0;
+                        values->GetUnsignedLargeIntegerValue(WPD_STORAGE_CAPACITY, &capacity);
+                        values->GetUnsignedLargeIntegerValue(WPD_STORAGE_CAPACITY_IN_OBJECTS, &capacity_objects);
+                        values->GetUnsignedLargeIntegerValue(WPD_STORAGE_FREE_SPACE_IN_BYTES, &free_space);
+                        values->GetUnsignedLargeIntegerValue(WPD_STORAGE_FREE_SPACE_IN_OBJECTS, &free_objects);
+                        desc = Py_False;
+                        if (SUCCEEDED(values->GetUnsignedIntegerValue(WPD_STORAGE_ACCESS_CAPABILITY, &access)) && access == WPD_STORAGE_ACCESS_CAPABILITY_READWRITE) desc = Py_True;
+                        soid = PyUnicode_FromWideChar(object_ids[i], wcslen(object_ids[i]));
+                        if (soid == NULL) { PyErr_NoMemory(); goto end; }
+                        so = Py_BuildValue("{s:K,s:K,s:K,s:K,s:O,s:N}", 
+                                "capacity", capacity, "capacity_objects", capacity_objects, "free_space", free_space, "free_objects", free_objects, "rw", desc, "id", soid);
+                        if (so == NULL) { PyErr_NoMemory(); goto end; }
+                        if (SUCCEEDED(values->GetStringValue(WPD_STORAGE_DESCRIPTION, &storage_desc))) {
+                                desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
+                                if (desc != NULL) { PyDict_SetItemString(so, "description", desc); Py_DECREF(desc);}
+                                CoTaskMemFree(storage_desc); storage_desc = NULL;
+                        }
+                        if (SUCCEEDED(values->GetStringValue(WPD_OBJECT_NAME, &storage_desc))) {
+                                desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
+                                if (desc != NULL) { PyDict_SetItemString(so, "name", desc); Py_DECREF(desc);}
+                                CoTaskMemFree(storage_desc); storage_desc = NULL;
+                        }
+                        if (SUCCEEDED(values->GetStringValue(WPD_STORAGE_FILE_SYSTEM_TYPE, &storage_desc))) {
+                                desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
+                                if (desc != NULL) { PyDict_SetItemString(so, "filesystem", desc); Py_DECREF(desc);}
+                                CoTaskMemFree(storage_desc); storage_desc = NULL;
+                        }
+                        PyList_Append(storage, so);
+                        Py_DECREF(so);
+                    }
+                }
+            }
+        }
+    }
+    ans = storage;
+
+end:
+    if (content != NULL) content->Release();
+    if (objects != NULL) objects->Release();
+    if (properties != NULL) properties->Release();
+    if (storage_properties != NULL) storage_properties->Release();
+    if (values != NULL) values->Release();
+    return ans;
+} // }}}
+
 PyObject* get_device_information(IPortableDevice *device) { // {{{
    IPortableDeviceContent *content = NULL;
    IPortableDeviceProperties *properties = NULL;
    IPortableDeviceKeyCollection *keys = NULL;
    IPortableDeviceValues *values = NULL;
+    IPortableDeviceCapabilities *capabilities = NULL;
+    IPortableDevicePropVariantCollection *categories = NULL;
    HRESULT hr;
+    DWORD num_of_categories, i;
    LPWSTR temp;
    ULONG ti;
-    PyObject *t, *ans = NULL;
+    PyObject *t, *ans = NULL, *storage = NULL;
    char *type;

    Py_BEGIN_ALLOW_THREADS;
@ -117,6 +233,21 @@ PyObject* get_device_information(IPortableDevice *device) { // {{{
    Py_END_ALLOW_THREADS;
    if(FAILED(hr)) {hresult_set_exc("Failed to get device info", hr); goto end; }

+    Py_BEGIN_ALLOW_THREADS;
+    hr = device->Capabilities(&capabilities);
+    Py_END_ALLOW_THREADS;
+    if(FAILED(hr)) {hresult_set_exc("Failed to get device capabilities", hr); goto end; }
+
+    Py_BEGIN_ALLOW_THREADS;
+    hr = capabilities->GetFunctionalCategories(&categories);
+    Py_END_ALLOW_THREADS;
+    if(FAILED(hr)) {hresult_set_exc("Failed to get device functional categories", hr); goto end; }
+
+    Py_BEGIN_ALLOW_THREADS;
+    hr = categories->GetCount(&num_of_categories);
+    Py_END_ALLOW_THREADS;
+    if(FAILED(hr)) {hresult_set_exc("Failed to get device functional categories number", hr); goto end; }
+
    ans = PyDict_New();
    if (ans == NULL) {PyErr_NoMemory(); goto end;}

@ -184,11 +315,34 @@ PyObject* get_device_information(IPortableDevice *device) { // {{{
        CoTaskMemFree(temp);
    }

+    t = Py_False;
+    for (i = 0; i < num_of_categories; i++) {
+        PROPVARIANT pv;
+        PropVariantInit(&pv);
+        if (SUCCEEDED(categories->GetAt(i, &pv)) && pv.puuid != NULL) {
+            if (IsEqualGUID(WPD_FUNCTIONAL_CATEGORY_STORAGE, *pv.puuid)) {
+                t = Py_True;
+            }
+        }
+        PropVariantClear(&pv);
+        if (t == Py_True) break;
+    }
+    PyDict_SetItemString(ans, "has_storage", t);
+
+    if (t == Py_True) {
+        storage = get_storage_info(device);
+        if (storage == NULL) goto end;
+        PyDict_SetItemString(ans, "storage", storage);
+        
+    }
+
 end:
    if (keys != NULL) keys->Release();
    if (values != NULL) values->Release();
    if (properties != NULL) properties->Release();
    if (content != NULL) content->Release();
+    if (capabilities != NULL) capabilities->Release();
+    if (categories != NULL) categories->Release();
    return ans;
 } // }}}

--- a/src/calibre/devices/mtp/windows/driver.py
+++ b/src/calibre/devices/mtp/windows/driver.py
@ -0,0 +1,200 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import time
+from threading import RLock
+
+from calibre import as_unicode, prints
+from calibre.constants import plugins, __appname__, numeric_version
+from calibre.devices.errors import OpenFailed
+from calibre.devices.mtp.base import MTPDeviceBase, synchronous
+
+class MTP_DEVICE(MTPDeviceBase):
+
+    supported_platforms = ['windows']
+
+    def __init__(self, *args, **kwargs):
+        MTPDeviceBase.__init__(self, *args, **kwargs)
+        self.dev = None
+        self.lock = RLock()
+        self.blacklisted_devices = set()
+        self.ejected_devices = set()
+        self.currently_connected_pnp_id = None
+        self.detected_devices = {}
+        self.previous_devices_on_system = frozenset()
+        self.last_refresh_devices_time = time.time()
+        self.wpd = self.wpd_error = None
+        self._main_id = self._carda_id = self._cardb_id = None
+
+    @synchronous
+    def startup(self):
+        self.wpd, self.wpd_error = plugins['wpd']
+        if self.wpd is not None:
+            try:
+                self.wpd.init(__appname__, *(numeric_version[:3]))
+            except self.wpd.NoWPD:
+                self.wpd_error = _(
+                    'The Windows Portable Devices service is not available'
+                    ' on your computer. You may need to install Windows'
+                    ' Media Player 11 or newer and/or restart your computer')
+            except Exception as e:
+                self.wpd_error = as_unicode(e)
+
+    @synchronous
+    def shutdown(self):
+        self.dev = self.filesystem_cache = None
+        if self.wpd is not None:
+            self.wpd.uninit()
+
+    @synchronous
+    def detect_managed_devices(self, devices_on_system):
+        if self.wpd is None: return None
+
+        devices_on_system = frozenset(devices_on_system)
+        if (devices_on_system != self.previous_devices_on_system or time.time()
+                - self.last_refresh_devices_time > 10):
+            self.previous_devices_on_system = devices_on_system
+            self.last_refresh_devices_time = time.time()
+            try:
+                pnp_ids = frozenset(self.wpd.enumerate_devices())
+            except:
+                return None
+
+            self.detected_devices = {dev:self.detected_devices.get(dev, None)
+                    for dev in pnp_ids}
+
+        # Get device data for detected devices. If there is an error, we will
+        # try again for that device the next time this method is called.
+        for dev in tuple(self.detected_devices.iterkeys()):
+            data = self.detected_devices.get(dev, None)
+            if data is None or data is False:
+                try:
+                    data = self.wpd.device_info(dev)
+                except Exception as e:
+                    prints('Failed to get device info for device:', dev,
+                            as_unicode(e))
+                    data = {} if data is False else False
+                self.detected_devices[dev] = data
+
+        # Remove devices that have been disconnected from ejected
+        # devices and blacklisted devices
+        self.ejected_devices = set(self.detected_devices).intersection(
+                self.ejected_devices)
+        self.blacklisted_devices = set(self.detected_devices).intersection(
+                self.blacklisted_devices)
+
+        if self.currently_connected_pnp_id is not None:
+            return (self.currently_connected_pnp_id if
+                    self.currently_connected_pnp_id in self.detected_devices
+                    else None)
+
+        for dev, data in self.detected_devices.iteritems():
+            if dev in self.blacklisted_devices or dev in self.ejected_devices:
+                # Ignore blacklisted and ejected devices
+                continue
+            if data and self.is_suitable_wpd_device(data):
+                return dev
+
+        return None
+
+    def is_suitable_wpd_device(self, devdata):
+        # Check that protocol is MTP
+        protocol = devdata.get('protocol', '').lower()
+        if not protocol.startswith('mtp:'): return False
+
+        # Check that the device has some read-write storage
+        if not devdata.get('has_storage', False): return False
+        has_rw_storage = False
+        for s in devdata.get('storage', []):
+            if s.get('rw', False):
+                has_rw_storage = True
+                break
+        if not has_rw_storage: return False
+
+        return True
+
+    @synchronous
+    def post_yank_cleanup(self):
+        self.currently_connected_pnp_id = self.current_friendly_name = None
+        self._main_id = self._carda_id = self._cardb_id = None
+        self.dev = self.filesystem_cache = None
+
+    @synchronous
+    def eject(self):
+        if self.currently_connected_pnp_id is None: return
+        self.ejected_devices.add(self.currently_connected_pnp_id)
+        self.currently_connected_pnp_id = self.current_friendly_name = None
+        self._main_id = self._carda_id = self._cardb_id = None
+        self.dev = self.filesystem_cache = None
+
+    @synchronous
+    def open(self, connected_device, library_uuid):
+        self.dev = self.filesystem_cache = None
+        try:
+            self.dev = self.wpd.Device(connected_device)
+        except self.wpd.WPDError:
+            time.sleep(2)
+            try:
+                self.dev = self.wpd.Device(connected_device)
+            except self.wpd.WPDError as e:
+                self.blacklisted_devices.add(connected_device)
+                raise OpenFailed('Failed to open %s with error: %s'%(
+                    connected_device, as_unicode(e)))
+        devdata = self.dev.data
+        storage = [s for s in devdata.get('storage', []) if s.get('rw', False)]
+        if not storage:
+            self.blacklisted_devices.add(connected_device)
+            raise OpenFailed('No storage found for device %s'%(connected_device,))
+        self._main_id = storage[0]['id']
+        if len(storage) > 1:
+            self._carda_id = storage[1]['id']
+        if len(storage) > 2:
+            self._cardb_id = storage[2]['id']
+        self.current_friendly_name = devdata.get('friendly_name', None)
+
+    @synchronous
+    def get_device_information(self, end_session=True):
+        d = self.dev.data
+        dv = d.get('device_version', '')
+        return (self.current_friendly_name, dv, dv, '')
+
+    @synchronous
+    def card_prefix(self, end_session=True):
+        ans = [None, None]
+        if self._carda_id is not None:
+            ans[0] = 'mtp:::%s:::'%self._carda_id
+        if self._cardb_id is not None:
+            ans[1] = 'mtp:::%s:::'%self._cardb_id
+        return tuple(ans)
+
+    @synchronous
+    def total_space(self, end_session=True):
+        ans = [0, 0, 0]
+        dd = self.dev.data
+        for s in dd.get('storage', []):
+            i = {self._main_id:0, self._carda_id:1,
+                    self._cardb_id:2}.get(s.get('id', -1), None)
+            if i is not None:
+                ans[i] = s['capacity']
+        return tuple(ans)
+
+    @synchronous
+    def free_space(self, end_session=True):
+        self.dev.update_data()
+        ans = [0, 0, 0]
+        dd = self.dev.data
+        for s in dd.get('storage', []):
+            i = {self._main_id:0, self._carda_id:1,
+                    self._cardb_id:2}.get(s.get('id', -1), None)
+            if i is not None:
+                ans[i] = s['free_space']
+        return tuple(ans)
+
+
+
--- a/src/calibre/devices/mtp/windows/global.h
+++ b/src/calibre/devices/mtp/windows/global.h
@ -2,7 +2,7 @@
 * global.h
 * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
 *
- * Distributed under terms of the MIT license.
+ * Distributed under terms of the GPL3 license.
 */

 #pragma once
@ -34,6 +34,18 @@ typedef struct {
 } ClientInfo;
 extern ClientInfo client_info;

+// Device type
+typedef struct {
+    PyObject_HEAD
+    // Type-specific fields go here.
+    wchar_t *pnp_id;
+    IPortableDeviceValues *client_information;
+    IPortableDevice *device;
+    PyObject *device_information;
+
+} Device;
+extern PyTypeObject DeviceType;
+
 // Utility functions
 PyObject *hresult_set_exc(const char *msg, HRESULT hr);
 wchar_t *unicode_to_wchar(PyObject *o);
--- a/src/calibre/devices/mtp/windows/remote.py
+++ b/src/calibre/devices/mtp/windows/remote.py
@ -7,39 +7,70 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import subprocess, sys, os
+import subprocess, sys, os, pprint, signal, time, glob
+pprint

-def build():
+def build(mod='wpd'):
+    master = subprocess.Popen('ssh -MN getafix'.split())
+    master2 = subprocess.Popen('ssh -MN xp_build'.split())
+    try:
+        while not glob.glob(os.path.expanduser('~/.ssh/*kovid@xp_build*')):
+            time.sleep(0.05)
        builder = subprocess.Popen('ssh xp_build ~/build-wpd'.split())
-    syncer = subprocess.Popen('ssh getafix ~/test-wpd'.split())
        if builder.wait() != 0:
            raise Exception('Failed to build plugin')
+        while not glob.glob(os.path.expanduser('~/.ssh/*kovid@getafix*')):
+            time.sleep(0.05)
+        syncer = subprocess.Popen('ssh getafix ~/test-wpd'.split())
        if syncer.wait() != 0:
            raise Exception('Failed to rsync to getafix')
        subprocess.check_call(
-        'scp xp_build:build/calibre/src/calibre/plugins/wpd.pyd /tmp'.split())
+            ('scp xp_build:build/calibre/src/calibre/plugins/%s.pyd /tmp'%mod).split())
        subprocess.check_call(
-        'scp /tmp/wpd.pyd getafix:calibre/src/calibre/devices/mtp/windows'.split())
+            ('scp /tmp/%s.pyd getafix:calibre/src/calibre/devices/mtp/windows'%mod).split())
        p = subprocess.Popen(
            'ssh getafix calibre-debug -e calibre/src/calibre/devices/mtp/windows/remote.py'.split())
        p.wait()
        print()
-
+    finally:
+        for m in (master2, master):
+            m.send_signal(signal.SIGHUP)
+        for m in (master2, master):
+            m.wait()

 def main():
-    import pprint
-    sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+    fp, d = os.path.abspath(__file__), os.path.dirname
+    if b'CALIBRE_DEVELOP_FROM' not in os.environ:
+        env = os.environ.copy()
+        env[b'CALIBRE_DEVELOP_FROM'] = bytes(d(d(d(d(d(fp))))))
+        subprocess.call(['calibre-debug', '-e', fp], env=env)
+        return
+
+    sys.path.insert(0, os.path.dirname(fp))
+    if 'wpd' in sys.modules:
+        del sys.modules['wpd']
    import wpd
    from calibre.constants import plugins
    plugins._plugins['wpd'] = (wpd, '')
    sys.path.pop(0)
-    wpd.init('calibre', 1, 0, 0)
+
+    from calibre.devices.scanner import win_scanner
+    from calibre.devices.mtp.windows.driver import MTP_DEVICE
+    dev = MTP_DEVICE(None)
+    dev.startup()
+    print (dev.wpd, dev.wpd_error)
+
    try:
-        for pnp_id in wpd.enumerate_devices():
-            print (pnp_id)
-            pprint.pprint(wpd.device_info(pnp_id))
+        devices = win_scanner()
+        pnp_id = dev.detect_managed_devices(devices)
+        # pprint.pprint(dev.detected_devices)
+        print ('Trying to connect to:', pnp_id)
+        dev.open(pnp_id, '')
+        print ('Connected to:', dev.get_gui_name())
+        print ('Total space', dev.total_space())
+        print ('Free space', dev.free_space())
    finally:
-        wpd.uninit()
+        dev.shutdown()

 if __name__ == '__main__':
    main()
--- a/src/calibre/devices/mtp/windows/utils.cpp
+++ b/src/calibre/devices/mtp/windows/utils.cpp
@ -2,7 +2,7 @@
 * utils.cpp
 * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
 *
- * Distributed under terms of the MIT license.
+ * Distributed under terms of the GPL3 license.
 */

 #include "global.h"
@ -33,13 +33,12 @@ PyObject *wpd::hresult_set_exc(const char *msg, HRESULT hr) {
 wchar_t *wpd::unicode_to_wchar(PyObject *o) {
    wchar_t *buf;
    Py_ssize_t len;
-    if (!PyUnicode_Check(o)) {PyErr_Format(PyExc_TypeError, "The pnp id must be a unicode object"); return NULL;}
+    if (!PyUnicode_Check(o)) {PyErr_Format(PyExc_TypeError, "The python object must be a unicode object"); return NULL;}
    len = PyUnicode_GET_SIZE(o);
-    if (len < 1) {PyErr_Format(PyExc_TypeError, "The pnp id must not be empty."); return NULL;}
    buf = (wchar_t *)calloc(len+2, sizeof(wchar_t));
    if (buf == NULL) { PyErr_NoMemory(); return NULL; }
    len = PyUnicode_AsWideChar((PyUnicodeObject*)o, buf, len);
-    if (len == -1) { free(buf); PyErr_Format(PyExc_TypeError, "Invalid pnp id."); return NULL; }
+    if (len == -1) { free(buf); PyErr_Format(PyExc_TypeError, "Invalid python unicode object."); return NULL; }
    return buf;
 }

--- a/src/calibre/devices/mtp/windows/wpd.cpp
+++ b/src/calibre/devices/mtp/windows/wpd.cpp
@ -2,7 +2,7 @@
 * mtp.c
 * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
 *
- * Distributed under terms of the MIT license.
+ * Distributed under terms of the GPL3 license.
 */

 #include "global.h"
@ -92,14 +92,10 @@ wpd_enumerate_devices(PyObject *self, PyObject *args) {

    ENSURE_WPD(NULL);

-    if (!PyArg_ParseTuple(args, "|O", &refresh)) return NULL;
-
-    if (refresh != NULL && PyObject_IsTrue(refresh)) {
    Py_BEGIN_ALLOW_THREADS;
    hr = portable_device_manager->RefreshDeviceList();
    Py_END_ALLOW_THREADS;
    if (FAILED(hr)) return hresult_set_exc("Failed to refresh the list of portable devices", hr);
-    }

    hr = portable_device_manager->GetDevices(NULL, &num_of_devices);
    num_of_devices += 15; // Incase new devices were connected between this call and the next
@ -148,6 +144,7 @@ wpd_device_info(PyObject *self, PyObject *args) {

    if (!PyArg_ParseTuple(args, "O", &py_pnp_id)) return NULL;
    pnp_id = unicode_to_wchar(py_pnp_id);
+    if (wcslen(pnp_id) < 1) { PyErr_SetString(WPDError, "The PNP id must not be empty."); return NULL; }
    if (pnp_id == NULL) return NULL;

    client_information = get_client_information();
@ -174,7 +171,7 @@ static PyMethodDef wpd_methods[] = {
    },

    {"enumerate_devices", wpd_enumerate_devices, METH_VARARGS,
-        "enumerate_devices(refresh=False)\n\n Get the list of device PnP ids for all connected devices recognized by the WPD service. The result is cached, unless refresh=True. Do not call with refresh=True too often as it is resource intensive."
+        "enumerate_devices()\n\n Get the list of device PnP ids for all connected devices recognized by the WPD service. Do not call too often as it is resource intensive."
    },

    {"device_info", wpd_device_info, METH_VARARGS,
@ -189,6 +186,10 @@ PyMODINIT_FUNC
 initwpd(void) {
    PyObject *m;

+    wpd::DeviceType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&wpd::DeviceType) < 0)
+        return;
+ 
    m = Py_InitModule3("wpd", wpd_methods, "Interface to the WPD windows service.");
    if (m == NULL) return;

@ -197,6 +198,10 @@ initwpd(void) {

    NoWPD = PyErr_NewException("wpd.NoWPD", NULL, NULL);
    if (NoWPD == NULL) return;
+
+    Py_INCREF(&DeviceType);
+    PyModule_AddObject(m, "Device", (PyObject *)&DeviceType);
+
 }


--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -193,7 +193,11 @@ class PRST1(USBMS):

                time_offsets = {}
                for i, row in enumerate(cursor):
+                    try:
                        comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
+                    except (OSError, IOError):
+                        # In case the db has incorrect path info
+                        continue
                    device_date = int(row[1]);
                    offset = device_date - comp_date
                    time_offsets.setdefault(offset, 0)
--- a/src/calibre/devices/scanner.py
+++ b/src/calibre/devices/scanner.py
@ -10,7 +10,8 @@ from threading import RLock
 from collections import namedtuple

 from calibre import prints, as_unicode
-from calibre.constants import iswindows, isosx, plugins, islinux, isfreebsd
+from calibre.constants import (iswindows, isosx, plugins, islinux, isfreebsd,
+        isnetbsd)

 osx_scanner = win_scanner = linux_scanner = None

@ -253,13 +254,18 @@ freebsd_scanner = None
 if isfreebsd:
    freebsd_scanner = FreeBSDScanner()

+netbsd_scanner = None
+
+''' NetBSD support currently not written yet '''
+if isnetbsd:
+    netbsd_scanner = None

 class DeviceScanner(object):

    def __init__(self, *args):
        if isosx and osx_scanner is None:
            raise RuntimeError('The Python extension usbobserver must be available on OS X.')
-        self.scanner = win_scanner if iswindows else osx_scanner if isosx else freebsd_scanner if isfreebsd else linux_scanner
+        self.scanner = win_scanner if iswindows else osx_scanner if isosx else freebsd_scanner if isfreebsd else netbsd_scanner if isnetbsd else linux_scanner
        self.devices = []

    def scan(self):
--- a/src/calibre/devices/smart_device_app/driver.py
+++ b/src/calibre/devices/smart_device_app/driver.py
@ -11,11 +11,12 @@ import socket, select, json, inspect, os, traceback, time, sys, random
 import hashlib, threading
 from base64 import b64encode, b64decode
 from functools import wraps
+from errno import EAGAIN, EINTR

 from calibre import prints
 from calibre.constants import numeric_version, DEBUG
 from calibre.devices.errors import (OpenFailed, ControlError, TimeoutError,
-                                    InitialConnectionError)
+                                    InitialConnectionError, PacketError)
 from calibre.devices.interface import DevicePlugin
 from calibre.devices.usbms.books import Book, CollectionsBookList
 from calibre.devices.usbms.deviceconfig import DeviceConfig
@ -85,6 +86,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
    MAX_CLIENT_COMM_TIMEOUT     = 60.0 # Wait at most N seconds for an answer
    MAX_UNSUCCESSFUL_CONNECTS   = 5

+    SEND_NOOP_EVERY_NTH_PROBE   = 5
+    DISCONNECT_AFTER_N_SECONDS  = 30*60 # 30 minutes
+
    opcodes = {
        'NOOP'                   : 12,
        'OK'                     : 0,
@ -120,7 +124,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
        _('Use fixed network port') + ':::<p>' +
            _('If checked, use the port number in the "Port" box, otherwise '
              'the driver will pick a random port') + '</p>',
-        _('Port') + ':::<p>' +
+        _('Port number: ') + ':::<p>' +
            _('Enter the port number the driver is to use if the "fixed port" box is checked') + '</p>',
        _('Print extra debug information') + ':::<p>' +
            _('Check this box if requested when reporting problems') + '</p>',
@ -131,7 +135,13 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            _('. Two special collections are available: %(abt)s:%(abtv)s and %(aba)s:%(abav)s. Add  '
            'these values to the list to enable them. The collections will be '
            'given the name provided after the ":" character.')%dict(
-                            abt='abt', abtv=ALL_BY_TITLE, aba='aba', abav=ALL_BY_AUTHOR)
+                            abt='abt', abtv=ALL_BY_TITLE, aba='aba', abav=ALL_BY_AUTHOR),
+        '',
+        _('Enable the no-activity timeout') + ':::<p>' +
+            _('If this box is checked, calibre will automatically disconnect if '
+              'a connected device does nothing for %d minutes. Unchecking this '
+              ' box disables this timeout, so calibre will never automatically '
+              'disconnect.')%(DISCONNECT_AFTER_N_SECONDS/60,) + '</p>',
        ]
    EXTRA_CUSTOMIZATION_DEFAULT = [
                False,
@ -141,7 +151,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                False, '9090',
                False,
                '',
-                ''
+                '',
+                '',
+                True,
    ]
    OPT_AUTOSTART               = 0
    OPT_PASSWORD                = 2
@ -149,6 +161,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
    OPT_PORT_NUMBER             = 5
    OPT_EXTRA_DEBUG             = 6
    OPT_COLLECTIONS             = 8
+    OPT_AUTODISCONNECT          = 10

    def __init__(self, path):
        self.sync_lock = threading.RLock()
@ -165,6 +178,15 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                                               inspect.stack()[1][3]), end='')
        for a in args:
            try:
+                if isinstance(a, dict):
+                    printable = {}
+                    for k,v in a.iteritems():
+                        if isinstance(v, (str, unicode)) and len(v) > 50:
+                            printable[k] = 'too long'
+                        else:
+                            printable[k] = v
+                    prints('', printable, end='');
+                else:
                    prints('', a, end='')
            except:
                prints('', 'value too long', end='')
@ -339,6 +361,27 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            pos += len(v)
        return data

+    def _send_byte_string(self, s):
+        if not isinstance(s, bytes):
+            self._debug('given a non-byte string!')
+            raise PacketError("Internal error: found a string that isn't bytes")
+        sent_len = 0;
+        total_len = len(s)
+        while sent_len < total_len:
+            try:
+                if sent_len == 0:
+                    amt_sent = self.device_socket.send(s)
+                else:
+                    amt_sent = self.device_socket.send(s[sent_len:])
+                if amt_sent <= 0:
+                    raise IOError('Bad write on device socket');
+                sent_len += amt_sent
+            except socket.error as e:
+                self._debug('socket error', e, e.errno)
+                if e.args[0] != EAGAIN and e.args[0] != EINTR:
+                    raise
+                time.sleep(0.1) # lets not hammer the OS too hard
+
    def _call_client(self, op, arg, print_debug_info=True):
        if op != 'NOOP':
            self.noop_counter = 0
@ -355,9 +398,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            if print_debug_info and extra_debug:
                self._debug('send string', s)
            self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
-            self.device_socket.sendall(('%d' % len(s))+s)
-            self.device_socket.settimeout(None)
+            self._send_byte_string((b'%d' % len(s))+s)
            v = self._read_string_from_net()
+            self.device_socket.settimeout(None)
            if print_debug_info and extra_debug:
                self._debug('received string', v)
            if v:
@ -373,13 +416,13 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
        except socket.error:
            self._debug('device went away')
            self._close_device_socket()
-            raise ControlError('Device closed the network connection')
+            raise ControlError(desc='Device closed the network connection')
        except:
            self._debug('other exception')
            traceback.print_exc()
            self._close_device_socket()
            raise
-        raise ControlError('Device responded with incorrect information')
+        raise ControlError(desc='Device responded with incorrect information')

    # Write a file as a series of base64-encoded strings.
    def _put_file(self, infile, lpath, book_metadata, this_book, total_books):
@ -475,7 +518,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            self.is_connected = False
        if self.is_connected:
            self.noop_counter += 1
-            if only_presence and (self.noop_counter % 5) != 1:
+            if only_presence and (
+                    self.noop_counter % self.SEND_NOOP_EVERY_NTH_PROBE) != 1:
                try:
                    ans = select.select((self.device_socket,), (), (), 0)
                    if len(ans[0]) == 0:
@ -486,6 +530,11 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                    # This will usually toss an exception if the socket is gone.
                except:
                    pass
+            if (self.settings().extra_customization[self.OPT_AUTODISCONNECT] and
+                    self.noop_counter > self.DISCONNECT_AFTER_N_SECONDS):
+                self._close_device_socket()
+                self._debug('timeout -- disconnected')
+            else:
                try:
                    if self._call_client('NOOP', dict())[0] is None:
                        self._close_device_socket()
@ -533,7 +582,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
        self._debug()
        if not self.is_connected:
            # We have been called to retry the connection. Give up immediately
-            raise ControlError('Attempt to open a closed device')
+            raise ControlError(desc='Attempt to open a closed device')
        self.current_library_uuid = library_uuid
        self.current_library_name = current_library_name()
        try:
@ -569,6 +618,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                self._debug('Protocol error - bogus book packet length')
                self._close_device_socket()
                return False
+            self._debug('CC version #:', result.get('ccVersionNumber', 'unknown'))
            self.max_book_packet_len = result.get('maxBookContentPacketLen',
                                                  self.BASE_PACKET_LEN)
            exts = result.get('acceptedExtensions', None)
@ -689,7 +739,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                    self._set_known_metadata(book)
                    bl.add_book(book, replace_metadata=True)
                else:
-                    raise ControlError('book metadata not returned')
+                    raise ControlError(desc='book metadata not returned')
        return bl

    @synchronous('sync_lock')
@ -720,7 +770,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                                                  print_debug_info=False)
                if opcode != 'OK':
                    self._debug('protocol error', opcode, i)
-                    raise ControlError('sync_booklists')
+                    raise ControlError(desc='sync_booklists')

    @synchronous('sync_lock')
    def eject(self):
@ -748,7 +798,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            book = Book(self.PREFIX, lpath, other=mdata)
            length = self._put_file(infile, lpath, book, i, len(files))
            if length < 0:
-                raise ControlError('Sending book %s to device failed' % lpath)
+                raise ControlError(desc='Sending book %s to device failed' % lpath)
            paths.append((lpath, length))
            # No need to deal with covers. The client will get the thumbnails
            # in the mi structure
@ -789,7 +839,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            if opcode == 'OK':
                self._debug('removed book with UUID', result['uuid'])
            else:
-                raise ControlError('Protocol error - delete books')
+                raise ControlError(desc='Protocol error - delete books')

    @synchronous('sync_lock')
    def remove_books_from_metadata(self, paths, booklists):
@ -825,7 +875,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                else:
                    eof = True
            else:
-                raise ControlError('request for book data failed')
+                raise ControlError(desc='request for book data failed')

    @synchronous('sync_lock')
    def set_plugboards(self, plugboards, pb_func):
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
@ -88,6 +88,15 @@ class MOBIOutput(OutputFormatPlugin):
                'formats. This option tells calibre not to do this. '
                'Useful if your document contains lots of GIF/PNG images that '
                'become very large when converted to JPEG.')),
+        OptionRecommendation(name='mobi_file_type', choices=['old', 'both',
+            'new'], recommended_value='old',
+            help=_('By default calibre generates MOBI files that contain the '
+                'old MOBI 6 format. This format is compatible with all '
+                'devices. However, by changing this setting, you can tell '
+                'calibre to generate MOBI files that contain both MOBI 6 and '
+                'the new KF8 format, or only the new KF8 format. KF8 has '
+                'more features than MOBI 6, but only works with newer Kindles.')),
+
    ])

    def check_for_periodical(self):
@ -165,11 +174,10 @@ class MOBIOutput(OutputFormatPlugin):
            toc.nodes[0].href = toc.nodes[0].nodes[0].href

    def convert(self, oeb, output_path, input_plugin, opts, log):
-        from calibre.utils.config import tweaks
        from calibre.ebooks.mobi.writer2.resources import Resources
        self.log, self.opts, self.oeb = log, opts, oeb

-        mobi_type = tweaks.get('test_mobi_output_type', 'old')
+        mobi_type = opts.mobi_file_type
        if self.is_periodical:
            mobi_type = 'old' # Amazon does not support KF8 periodicals
        create_kf8 = mobi_type in ('new', 'both')
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -11,6 +11,7 @@ from collections import defaultdict

 from lxml import etree
 import cssutils
+from cssutils.css import Property

 from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES,
        namespace, barename, XPath)
@ -276,10 +277,16 @@ class CSSFlattener(object):
                cssdict['font-family'] = node.attrib['face']
                del node.attrib['face']
        if 'color' in node.attrib:
-            cssdict['color'] = node.attrib['color']
+            try:
+                cssdict['color'] = Property('color', node.attrib['color']).value
+            except ValueError:
+                pass
            del node.attrib['color']
        if 'bgcolor' in node.attrib:
-            cssdict['background-color'] = node.attrib['bgcolor']
+            try:
+                cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value
+            except ValueError:
+                pass
            del node.attrib['bgcolor']
        if cssdict.get('font-weight', '').lower() == 'medium':
            cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -15,13 +15,15 @@ from calibre.utils.icu import sort_key

 from catalog_epub_mobi_ui import Ui_Form
 from PyQt4.Qt import (Qt, QAbstractItemView, QCheckBox, QComboBox,
-        QDoubleSpinBox, QIcon, QLineEdit, QRadioButton, QSize, QSizePolicy,
-        QTableWidget, QTableWidgetItem, QToolButton, QVBoxLayout, QWidget)
+        QDoubleSpinBox, QIcon, QLineEdit, QObject, QRadioButton, QSize, QSizePolicy,
+        QTableWidget, QTableWidgetItem, QToolButton, QVBoxLayout, QWidget,
+        SIGNAL)

 class PluginWidget(QWidget,Ui_Form):

    TITLE = _('E-book options')
    HELP  = _('Options specific to')+' AZW3/EPUB/MOBI '+_('output')
+    DEBUG = False

    # Output synced to the connected device?
    sync_enabled = True
@ -100,6 +102,39 @@ class PluginWidget(QWidget,Ui_Form):

        self.OPTION_FIELDS = option_fields

+    def construct_tw_opts_object(self, c_name, opt_value, opts_dict):
+        '''
+        Build an opts object from the UI settings to pass to the catalog builder
+        Handles two types of rules sets, with and without ['prefix'] field
+        Store processed opts object to opt_dict
+        '''
+        rule_set = []
+        for stored_rule in opt_value:
+            rule = copy(stored_rule)
+            # Skip disabled and incomplete rules
+            if not rule['enabled']:
+                continue
+            elif not rule['field'] or not rule['pattern']:
+                continue
+            elif 'prefix' in rule and not rule['prefix']:
+                continue
+            else:
+                if rule['field'] != 'Tags':
+                    # Look up custom column friendly name
+                    rule['field'] = self.eligible_custom_fields[rule['field']]['field']
+                    if rule['pattern'] in [_('any value'),_('any date')]:
+                        rule_pattern = '.*'
+                    elif rule['pattern'] == _('unspecified'):
+                        rule['pattern'] = 'None'
+            if 'prefix' in rule:
+                pr = (rule['name'],rule['field'],rule['pattern'],rule['prefix'])
+            else:
+                pr = (rule['name'],rule['field'],rule['pattern'])
+            rule_set.append(pr)
+        opt_value = tuple(rule_set)
+        # Strip off the trailing '_tw'
+        opts_dict[c_name[:-3]] = opt_value
+
    def fetchEligibleCustomFields(self):
        self.all_custom_fields = self.db.custom_field_keys()
        custom_fields = {}
@ -194,11 +229,10 @@ class PluginWidget(QWidget,Ui_Form):
    def options(self):
        # Save/return the current options
        # exclude_genre stores literally
-        # generate_titles, generate_recently_added store as True/False
+        # Section switches store as True/False
        # others store as lists

        opts_dict = {}
-        # Save values to gprefs
        prefix_rules_processed = False
        exclusion_rules_processed = False

@ -229,56 +263,8 @@ class PluginWidget(QWidget,Ui_Form):
            gprefs.set(self.name + '_' + c_name, opt_value)

            # Construct opts object for catalog builder
-            if c_name == 'prefix_rules_tw':
-                rule_set = []
-                for stored_rule in opt_value:
-                    # Test for empty name/field/pattern/prefix, continue
-                    # If pattern = any or unspecified, convert to regex
-                    rule = copy(stored_rule)
-                    if not rule['enabled']:
-                        continue
-                    elif not rule['field'] or not rule['pattern'] or not rule['prefix']:
-                        continue
-                    else:
-                        if rule['field'] != 'Tags':
-                            # Look up custom column name
-                            #print(self.eligible_custom_fields[rule['field']]['field'])
-                            rule['field'] = self.eligible_custom_fields[rule['field']]['field']
-                            if rule['pattern'].startswith('any'):
-                                rule['pattern'] = '.*'
-                            elif rule['pattern'] == 'unspecified':
-                                rule['pattern'] = 'None'
-
-                    pr = (rule['name'],rule['field'],rule['pattern'],rule['prefix'])
-                    rule_set.append(pr)
-                opt_value = tuple(rule_set)
-                opts_dict['prefix_rules'] = opt_value
-
-            elif c_name == 'exclusion_rules_tw':
-                rule_set = []
-                for stored_rule in opt_value:
-                    # Test for empty name/field/pattern/prefix, continue
-                    # If pattern = any or unspecified, convert to regex
-                    rule = copy(stored_rule)
-                    if not rule['enabled']:
-                        continue
-                    elif not rule['field'] or not rule['pattern']:
-                        continue
-                    else:
-                        if rule['field'] != 'Tags':
-                            # Look up custom column name
-                            #print(self.eligible_custom_fields[rule['field']]['field'])
-                            rule['field'] = self.eligible_custom_fields[rule['field']]['field']
-                            if rule['pattern'].startswith('any'):
-                                rule['pattern'] = '.*'
-                            elif rule['pattern'] == 'unspecified':
-                                rule['pattern'] = 'None'
-
-                    pr = (rule['name'],rule['field'],rule['pattern'])
-                    rule_set.append(pr)
-                opt_value = tuple(rule_set)
-                opts_dict['exclusion_rules'] = opt_value
-
+            if c_name in ['exclusion_rules_tw','prefix_rules_tw']:
+                self.construct_tw_opts_object(c_name, opt_value, opts_dict)
            else:
                opts_dict[c_name] = opt_value

@ -299,7 +285,7 @@ class PluginWidget(QWidget,Ui_Form):
            opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
        except:
            opts_dict['output_profile'] = ['default']
-        if False:
+        if self.DEBUG:
            print "opts_dict"
            for opt in sorted(opts_dict.keys(), key=sort_key):
                print " %s: %s" % (opt, repr(opts_dict[opt]))
@ -343,7 +329,6 @@ class PluginWidget(QWidget,Ui_Form):
        self.header_note_source_fields = custom_fields
        self.header_note_source_field.currentIndexChanged.connect(self.header_note_source_field_changed)

-
        # Populate the 'Merge with Comments' combo box
        custom_fields = {}
        for custom_field in self.all_custom_fields:
@ -450,10 +435,11 @@ class ComboBox(NoWheelComboBox):

 class GenericRulesTable(QTableWidget):
    '''
-    Generic methods for managing rows
-    Add QTableWidget, controls to parent QGroupBox
-    placeholders for basic methods to be overriden
+    Generic methods for managing rows in a QTableWidget
    '''
+    DEBUG = False
+    MAXIMUM_TABLE_HEIGHT = 113
+    NAME_FIELD_WIDTH = 225

    def __init__(self, parent_gb, object_name, rules, eligible_custom_fields, db):
        self.rules = rules
@ -464,13 +450,12 @@ class GenericRulesTable(QTableWidget):
        self.layout = parent_gb.layout()

        # Add ourselves to the layout
-        #print("verticalHeader: %s" % dir(self.verticalHeader()))
        sizePolicy = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Minimum)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        #sizePolicy.setHeightForWidth(self.sizePolicy().hasHeightForWidth())
        self.setSizePolicy(sizePolicy)
-        self.setMaximumSize(QSize(16777215, 113))
+        self.setMaximumSize(QSize(16777215, self.MAXIMUM_TABLE_HEIGHT))

        self.setColumnCount(0)
        self.setRowCount(0)
@ -481,6 +466,9 @@ class GenericRulesTable(QTableWidget):

        self._init_controls()

+        # Hook check_box changes
+        QObject.connect(self, SIGNAL('cellChanged(int,int)'), self.enabled_state_changed)
+
    def _init_controls(self):
        # Add the control set
        vbl = QVBoxLayout()
@ -517,6 +505,8 @@ class GenericRulesTable(QTableWidget):
    def add_row(self):
        self.setFocus()
        row = self.last_row_selected + 1
+        if self.DEBUG:
+            print("%s:add_row(): at row: %d" % (self.objectName(), row))
        self.insertRow(row)
        self.populate_table_row(row, self.create_blank_row_data())
        self.select_and_scroll_to_row(row)
@ -524,19 +514,10 @@ class GenericRulesTable(QTableWidget):
        # In case table was empty
        self.horizontalHeader().setStretchLastSection(True)

-    def convert_row_to_data(self):
-        '''
-        override
-        '''
-        pass
-
-    def create_blank_row_data(self):
-        '''
-        override
-        '''
-        pass
-
    def delete_row(self):
+        if self.DEBUG:
+            print("%s:delete_row()" % self.objectName())
+
        self.setFocus()
        rows = self.last_rows_selected
        if len(rows) == 0:
@ -545,10 +526,11 @@ class GenericRulesTable(QTableWidget):
        first = rows[0].row() + 1
        last = rows[-1].row() + 1

-        message = _('Are you sure you want to delete rule %d?') % first
+        first_rule_name = unicode(self.cellWidget(first-1,self.COLUMNS['NAME']['ordinal']).text()).strip()
+        message = _("Are you sure you want to delete '%s'?") % (first_rule_name)
        if len(rows) > 1:
-            message = _('Are you sure you want to delete rules %d-%d?') % (first, last)
-        if not question_dialog(self, _('Are you sure?'), message, show_copy_button=False):
+            message = _('Are you sure you want to delete rules #%d-%d?') % (first, last)
+        if not question_dialog(self, _('Delete Rule'), message, show_copy_button=False):
            return
        first_sel_row = self.currentRow()
        for selrow in reversed(rows):
@ -558,17 +540,24 @@ class GenericRulesTable(QTableWidget):
        elif self.rowCount() > 0:
            self.select_and_scroll_to_row(first_sel_row - 1)

+    def enabled_state_changed(self, row, col):
+        if col in [self.COLUMNS['ENABLED']['ordinal']]:
+            self.select_and_scroll_to_row(row)
+            if self.DEBUG:
+                print("%s:enabled_state_changed(): row %d col %d" %
+                      (self.objectName(), row, col))
+
+    def focusInEvent(self,e):
+        if self.DEBUG:
+            print("%s:focusInEvent()" % self.objectName())
+
    def focusOutEvent(self,e):
        # Override of QTableWidget method - clear selection when table loses focus
        self.last_row_selected = self.currentRow()
        self.last_rows_selected = self.selectionModel().selectedRows()
        self.clearSelection()
-
-    def get_data(self):
-        '''
-        override
-        '''
-        pass
+        if self.DEBUG:
+            print("%s:focusOutEvent(): self.last_row_selected: %d" % (self.objectName(),self.last_row_selected))

    def move_row_down(self):
        self.setFocus()
@ -583,6 +572,8 @@ class GenericRulesTable(QTableWidget):
        for selrow in reversed(rows):
            dest_row = selrow.row() + 1
            src_row = selrow.row()
+            if self.DEBUG:
+                print("%s:move_row_down() %d -> %d" % (self.objectName(),src_row, dest_row))

            # Save the contents of the destination row
            saved_data = self.convert_row_to_data(dest_row)
@ -596,11 +587,9 @@ class GenericRulesTable(QTableWidget):
            # Populate it with the saved data
            self.populate_table_row(src_row, saved_data)

-        self.blockSignals(False)
        scroll_to_row = last_sel_row + 1
-        if scroll_to_row < self.rowCount() - 1:
-            scroll_to_row = scroll_to_row + 1
        self.select_and_scroll_to_row(scroll_to_row)
+        self.blockSignals(False)

    def move_row_up(self):
        self.setFocus()
@ -611,7 +600,11 @@ class GenericRulesTable(QTableWidget):
        if first_sel_row <= 0:
            return
        self.blockSignals(True)
+
        for selrow in rows:
+            if self.DEBUG:
+                print("%s:move_row_up() %d -> %d" % (self.objectName(),selrow.row(), selrow.row()-1))
+
            # Save the row above
            saved_data = self.convert_row_to_data(selrow.row() - 1)

@ -621,33 +614,92 @@ class GenericRulesTable(QTableWidget):

            # Delete the row above
            self.removeRow(selrow.row() - 1)
-        self.blockSignals(False)

-        scroll_to_row = first_sel_row - 1
+        scroll_to_row = first_sel_row
        if scroll_to_row > 0:
            scroll_to_row = scroll_to_row - 1
        self.select_and_scroll_to_row(scroll_to_row)
+        self.blockSignals(False)

-    def populate_table_row(self):
-        '''
-        override
-        '''
-        pass
+    def populate_table(self):
+        # Format of rules list is different if default values vs retrieved JSON
+        # Hack to normalize list style
+        rules = self.rules
+        if rules and type(rules[0]) is list:
+            rules = rules[0]
+        self.setFocus()
+        rules = sorted(rules, key=lambda k: k['ordinal'])
+        for row, rule in enumerate(rules):
+            self.insertRow(row)
+            self.select_and_scroll_to_row(row)
+            self.populate_table_row(row, rule)
+        self.selectRow(0)

-    def resize_name(self, scale):
-        #current_width = self.columnWidth(1)
-        #self.setColumnWidth(1, min(225,int(current_width * scale)))
-        self.setColumnWidth(1, 225)
+    def resize_name(self):
+        self.setColumnWidth(1, self.NAME_FIELD_WIDTH)

    def rule_name_edited(self):
+        if self.DEBUG:
+            print("%s:rule_name_edited()" % self.objectName())
+
        current_row = self.currentRow()
        self.cellWidget(current_row,1).home(False)
-        self.setFocus()
        self.select_and_scroll_to_row(current_row)

    def select_and_scroll_to_row(self, row):
+        self.setFocus()
        self.selectRow(row)
        self.scrollToItem(self.currentItem())
+        self.last_row_selected = self.currentRow()
+        self.last_rows_selected = self.selectionModel().selectedRows()
+
+    def _source_index_changed(self, combo):
+        # Figure out which row we're in
+        for row in range(self.rowCount()):
+            if self.cellWidget(row, self.COLUMNS['FIELD']['ordinal']) is combo:
+                break
+
+        if self.DEBUG:
+            print("%s:_source_index_changed(): calling source_index_changed with row: %d " %
+                  (self.objectName(), row))
+
+        self.source_index_changed(combo, row)
+
+    def source_index_changed(self, combo, row, pattern=''):
+        # Populate the Pattern field based upon the Source field
+
+        source_field = str(combo.currentText())
+        if source_field == '':
+            values = []
+        elif source_field == 'Tags':
+            values = sorted(self.db.all_tags(), key=sort_key)
+        else:
+            if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
+                values = self.db.all_custom(self.db.field_metadata.key_to_label(
+                                            self.eligible_custom_fields[unicode(source_field)]['field']))
+                values = sorted(values, key=sort_key)
+            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
+                values = [_('True'),_('False'),_('unspecified')]
+            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
+                values = [_('any value'),_('unspecified')]
+            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
+                values = [_('any date'),_('unspecified')]
+
+        values_combo = ComboBox(self, values, pattern)
+        values_combo.currentIndexChanged.connect(partial(self.values_index_changed, values_combo))
+        self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)
+        self.select_and_scroll_to_row(row)
+
+    def values_index_changed(self, combo):
+        # After edit, select row
+        for row in range(self.rowCount()):
+            if self.cellWidget(row, self.COLUMNS['PATTERN']['ordinal']) is combo:
+                self.select_and_scroll_to_row(row)
+                break
+
+        if self.DEBUG:
+            print("%s:values_index_changed(): row %d " %
+                  (self.objectName(), row))

 class ExclusionRules(GenericRulesTable):

@ -658,6 +710,7 @@ class ExclusionRules(GenericRulesTable):

    def __init__(self, parent_gb_hl, object_name, rules, eligible_custom_fields, db):
        super(ExclusionRules, self).__init__(parent_gb_hl, object_name, rules, eligible_custom_fields, db)
+        self.setObjectName("exclusion_rules_table")
        self._init_table_widget()
        self._initialize()

@ -672,7 +725,7 @@ class ExclusionRules(GenericRulesTable):
    def _initialize(self):
        self.populate_table()
        self.resizeColumnsToContents()
-        self.resize_name(1.5)
+        self.resize_name()
        self.horizontalHeader().setStretchLastSection(True)
        self.clearSelection()

@ -706,20 +759,6 @@ class ExclusionRules(GenericRulesTable):
                                'pattern':data['pattern']})
        return data_items

-    def populate_table(self):
-        # Format of rules list is different if default values vs retrieved JSON
-        # Hack to normalize list style
-        rules = self.rules
-        if rules and type(rules[0]) is list:
-            rules = rules[0]
-        self.setFocus()
-        rules = sorted(rules, key=lambda k: k['ordinal'])
-        for row, rule in enumerate(rules):
-            self.insertRow(row)
-            self.select_and_scroll_to_row(row)
-            self.populate_table_row(row, rule)
-        self.selectRow(0)
-
    def populate_table_row(self, row, data):

        def set_rule_name_in_row(row, col, name=''):
@ -730,7 +769,7 @@ class ExclusionRules(GenericRulesTable):

        def set_source_field_in_row(row, col, field=''):
            source_combo = ComboBox(self, sorted(self.eligible_custom_fields.keys(), key=sort_key), field)
-            source_combo.currentIndexChanged.connect(partial(self.source_index_changed, source_combo, row))
+            source_combo.currentIndexChanged.connect(partial(self._source_index_changed, source_combo))
            self.setCellWidget(row, col, source_combo)
            return source_combo

@ -738,7 +777,8 @@ class ExclusionRules(GenericRulesTable):
        self.blockSignals(True)

        # Enabled
-        self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], CheckableTableWidgetItem(data['enabled']))
+        check_box = CheckableTableWidgetItem(data['enabled'])
+        self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], check_box)

        # Rule name
        set_rule_name_in_row(row, self.COLUMNS['NAME']['ordinal'], name=data['name'])
@ -748,32 +788,10 @@ class ExclusionRules(GenericRulesTable):

        # Pattern
        # The contents of the Pattern field is driven by the Source field
-        self.source_index_changed(source_combo, row, self.COLUMNS['PATTERN']['ordinal'], pattern=data['pattern'])
+        self.source_index_changed(source_combo, row, pattern=data['pattern'])

        self.blockSignals(False)

-    def source_index_changed(self, combo, row, col, pattern=''):
-        # Populate the Pattern field based upon the Source field
-        source_field = str(combo.currentText())
-        if source_field == '':
-            values = []
-        elif source_field == 'Tags':
-            values = sorted(self.db.all_tags(), key=sort_key)
-        else:
-            if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
-                values = self.db.all_custom(self.db.field_metadata.key_to_label(
-                                            self.eligible_custom_fields[unicode(source_field)]['field']))
-                values = sorted(values, key=sort_key)
-            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
-                values = ['True','False','unspecified']
-            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
-                values = ['any value','unspecified']
-            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
-                values = ['any date','unspecified']
-
-        values_combo = ComboBox(self, values, pattern)
-        self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)
-
 class PrefixRules(GenericRulesTable):

    COLUMNS = { 'ENABLED':{'ordinal': 0, 'name': ''},
@ -784,6 +802,7 @@ class PrefixRules(GenericRulesTable):

    def __init__(self, parent_gb_hl, object_name, rules, eligible_custom_fields, db):
        super(PrefixRules, self).__init__(parent_gb_hl, object_name, rules, eligible_custom_fields, db)
+        self.setObjectName("prefix_rules_table")
        self._init_table_widget()
        self._initialize()

@ -799,14 +818,14 @@ class PrefixRules(GenericRulesTable):
        self.generate_prefix_list()
        self.populate_table()
        self.resizeColumnsToContents()
-        self.resize_name(1.5)
+        self.resize_name()
        self.horizontalHeader().setStretchLastSection(True)
        self.clearSelection()

    def convert_row_to_data(self, row):
        data = self.create_blank_row_data()
        data['ordinal'] = row
-        data['enabled'] = self.item(row,0).checkState() == Qt.Checked
+        data['enabled'] = self.item(row,self.COLUMNS['ENABLED']['ordinal']).checkState() == Qt.Checked
        data['name'] = unicode(self.cellWidget(row,self.COLUMNS['NAME']['ordinal']).text()).strip()
        data['prefix'] = unicode(self.cellWidget(row,self.COLUMNS['PREFIX']['ordinal']).currentText()).strip()
        data['field'] = unicode(self.cellWidget(row,self.COLUMNS['FIELD']['ordinal']).currentText()).strip()
@ -970,20 +989,6 @@ class PrefixRules(GenericRulesTable):
                                'prefix':data['prefix']})
        return data_items

-    def populate_table(self):
-        # Format of rules list is different if default values vs retrieved JSON
-        # Hack to normalize list style
-        rules = self.rules
-        if rules and type(rules[0]) is list:
-            rules = rules[0]
-        self.setFocus()
-        rules = sorted(rules, key=lambda k: k['ordinal'])
-        for row, rule in enumerate(rules):
-            self.insertRow(row)
-            self.select_and_scroll_to_row(row)
-            self.populate_table_row(row, rule)
-        self.selectRow(0)
-
    def populate_table_row(self, row, data):

        def set_prefix_field_in_row(row, col, field=''):
@ -998,14 +1003,12 @@ class PrefixRules(GenericRulesTable):

        def set_source_field_in_row(row, col, field=''):
            source_combo = ComboBox(self, sorted(self.eligible_custom_fields.keys(), key=sort_key), field)
-            source_combo.currentIndexChanged.connect(partial(self.source_index_changed, source_combo, row))
+            source_combo.currentIndexChanged.connect(partial(self._source_index_changed, source_combo))
            self.setCellWidget(row, col, source_combo)
            return source_combo

-
        # Entry point
        self.blockSignals(True)
-        #print("prefix_rules_populate_table_row processing rule:\n%s\n" % data)

        # Enabled
        self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], CheckableTableWidgetItem(data['enabled']))
@ -1021,31 +1024,7 @@ class PrefixRules(GenericRulesTable):

        # Pattern
        # The contents of the Pattern field is driven by the Source field
-        self.source_index_changed(source_combo, row, self.COLUMNS['PATTERN']['ordinal'], pattern=data['pattern'])
+        self.source_index_changed(source_combo, row, pattern=data['pattern'])

        self.blockSignals(False)

-    def source_index_changed(self, combo, row, col, pattern=''):
-        # Populate the Pattern field based upon the Source field
-        # row, col are the control that changed
-
-        source_field = str(combo.currentText())
-        if source_field == '':
-            values = []
-        elif source_field == 'Tags':
-            values = sorted(self.db.all_tags(), key=sort_key)
-        else:
-            if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
-                values = self.db.all_custom(self.db.field_metadata.key_to_label(
-                                            self.eligible_custom_fields[unicode(source_field)]['field']))
-                values = sorted(values, key=sort_key)
-            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
-                values = ['True','False','unspecified']
-            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
-                values = ['any value','unspecified']
-            elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
-                values = ['any date','unspecified']
-
-        values_combo = ComboBox(self, values, pattern)
-        self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)
-
--- a/src/calibre/gui2/convert/mobi_output.py
+++ b/src/calibre/gui2/convert/mobi_output.py
@ -25,7 +25,7 @@ class PluginWidget(Widget, Ui_Form):
                    'mobi_keep_original_images',
                    'mobi_ignore_margins', 'mobi_toc_at_start',
                'dont_compress', 'no_inline_toc', 'share_not_sync',
-                'personal_doc']#, 'mobi_navpoints_only_deepest']
+                'personal_doc', 'mobi_file_type']
                )
        self.db, self.book_id = db, book_id

@ -48,6 +48,7 @@ class PluginWidget(Widget, Ui_Form):
        self.font_family_model = font_family_model
        self.opt_masthead_font.setModel(self.font_family_model)
        '''
+        self.opt_mobi_file_type.addItems(['old', 'both', 'new'])

        self.initialize_options(get_option, get_help, db, book_id)

--- a/src/calibre/gui2/convert/mobi_output.ui
+++ b/src/calibre/gui2/convert/mobi_output.ui
@ -14,80 +14,10 @@
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
-   <item row="8" column="0" colspan="2">
-    <widget class="QGroupBox" name="groupBox">
-     <property name="title">
-      <string>Kindle options</string>
-     </property>
-     <layout class="QVBoxLayout" name="verticalLayout">
-      <item>
-       <layout class="QHBoxLayout" name="horizontalLayout">
-        <item>
-         <widget class="QLabel" name="label_3">
+   <item row="0" column="0">
+    <widget class="QCheckBox" name="opt_no_inline_toc">
     <property name="text">
-           <string>Personal Doc tag:</string>
-          </property>
-         </widget>
-        </item>
-        <item>
-         <widget class="QLineEdit" name="opt_personal_doc"/>
-        </item>
-       </layout>
-      </item>
-      <item>
-       <widget class="QCheckBox" name="opt_share_not_sync">
-        <property name="text">
-         <string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
-        </property>
-       </widget>
-      </item>
-      <item>
-       <spacer name="verticalSpacer">
-        <property name="orientation">
-         <enum>Qt::Vertical</enum>
-        </property>
-        <property name="sizeHint" stdset="0">
-         <size>
-          <width>20</width>
-          <height>40</height>
-         </size>
-        </property>
-       </spacer>
-      </item>
-     </layout>
-    </widget>
-   </item>
-   <item row="9" column="0">
-    <spacer name="verticalSpacer_2">
-     <property name="orientation">
-      <enum>Qt::Vertical</enum>
-     </property>
-     <property name="sizeHint" stdset="0">
-      <size>
-       <width>20</width>
-       <height>40</height>
-      </size>
-     </property>
-    </spacer>
-   </item>
-   <item row="2" column="0" colspan="2">
-    <widget class="QCheckBox" name="opt_mobi_toc_at_start">
-     <property name="text">
-      <string>Put generated Table of Contents at &amp;start of book instead of end</string>
-     </property>
-    </widget>
-   </item>
-   <item row="3" column="0">
-    <widget class="QCheckBox" name="opt_mobi_ignore_margins">
-     <property name="text">
-      <string>Ignore &amp;margins</string>
-     </property>
-    </widget>
-   </item>
-   <item row="4" column="0" colspan="2">
-    <widget class="QCheckBox" name="opt_prefer_author_sort">
-     <property name="text">
-      <string>Use author &amp;sort for author</string>
+      <string>Do not add Table of Contents to book</string>
     </property>
    </widget>
   </item>
@ -104,17 +34,24 @@
   <item row="1" column="1">
    <widget class="QLineEdit" name="opt_toc_title"/>
   </item>
-   <item row="6" column="0">
-    <widget class="QCheckBox" name="opt_dont_compress">
+   <item row="2" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_mobi_toc_at_start">
     <property name="text">
-      <string>Disable compression of the file contents</string>
+      <string>Put generated Table of Contents at &amp;start of book instead of end</string>
     </property>
    </widget>
   </item>
-   <item row="0" column="0">
-    <widget class="QCheckBox" name="opt_no_inline_toc">
+   <item row="3" column="0">
+    <widget class="QCheckBox" name="opt_mobi_ignore_margins">
     <property name="text">
-      <string>Do not add Table of Contents to book</string>
+      <string>Ignore &amp;margins</string>
+     </property>
+    </widget>
+   </item>
+   <item row="4" column="0">
+    <widget class="QCheckBox" name="opt_prefer_author_sort">
+     <property name="text">
+      <string>Use author &amp;sort for author</string>
     </property>
    </widget>
   </item>
@ -125,6 +62,55 @@
     </property>
    </widget>
   </item>
+   <item row="6" column="0">
+    <widget class="QCheckBox" name="opt_dont_compress">
+     <property name="text">
+      <string>Disable compression of the file contents</string>
+     </property>
+    </widget>
+   </item>
+   <item row="7" column="0" colspan="2">
+    <widget class="QGroupBox" name="groupBox">
+     <property name="title">
+      <string>Kindle options</string>
+     </property>
+     <layout class="QFormLayout" name="formLayout">
+      <property name="fieldGrowthPolicy">
+       <enum>QFormLayout::ExpandingFieldsGrow</enum>
+      </property>
+      <item row="0" column="0">
+       <widget class="QLabel" name="label_2">
+        <property name="text">
+         <string>MOBI file &amp;type:</string>
+        </property>
+        <property name="buddy">
+         <cstring>opt_mobi_file_type</cstring>
+        </property>
+       </widget>
+      </item>
+      <item row="0" column="1">
+       <widget class="QComboBox" name="opt_mobi_file_type"/>
+      </item>
+      <item row="1" column="0">
+       <widget class="QLabel" name="label_3">
+        <property name="text">
+         <string>Personal Doc tag:</string>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="1">
+       <widget class="QLineEdit" name="opt_personal_doc"/>
+      </item>
+      <item row="2" column="0" colspan="2">
+       <widget class="QCheckBox" name="opt_share_not_sync">
+        <property name="text">
+         <string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -529,6 +529,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
        self.remove_button.clicked.connect(self.s_r_remove_query)

        self.queries = JSONConfig("search_replace_queries")
+        self.saved_search_name = ''
        self.query_field.addItem("")
        self.query_field_values = sorted([q for q in self.queries], key=sort_key)
        self.query_field.addItems(self.query_field_values)
@ -1034,11 +1035,16 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
            self.queries.commit()

    def s_r_save_query(self, *args):
-        dex = self.query_field_values.index(self.saved_search_name)
+        names = ['']
+        names.extend(self.query_field_values)
+        try:
+            dex = names.index(self.saved_search_name)
+        except:
+            dex = 0
        name = ''
        while not name:
            name, ok =  QInputDialog.getItem(self, _('Save search/replace'),
-                    _('Search/replace name:'), self.query_field_values, dex, True)
+                    _('Search/replace name:'), names, dex, True)
            if not ok:
                return
            if not name:
@ -1086,6 +1092,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
    def s_r_query_change(self, item_name):
        if not item_name:
            self.s_r_reset_query_fields()
+            self.saved_search_name = ''
            return
        item = self.queries.get(unicode(item_name), None)
        if item is None:
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@ -1241,17 +1241,18 @@ not multiple and the destination field is multiple</string>
  <tabstop>search_mode</tabstop>
  <tabstop>s_r_src_ident</tabstop>
  <tabstop>s_r_template</tabstop>
+  <tabstop>search_for</tabstop>
+  <tabstop>case_sensitive</tabstop>
  <tabstop>replace_with</tabstop>
  <tabstop>replace_func</tabstop>
+  <tabstop>destination_field</tabstop>
  <tabstop>replace_mode</tabstop>
  <tabstop>comma_separated</tabstop>
  <tabstop>s_r_dst_ident</tabstop>
  <tabstop>results_count</tabstop>
-  <tabstop>scrollArea11</tabstop>
-  <tabstop>destination_field</tabstop>
-  <tabstop>search_for</tabstop>
-  <tabstop>case_sensitive</tabstop>
  <tabstop>starting_from</tabstop>
+  <tabstop>multiple_separator</tabstop>
+  <tabstop>scrollArea11</tabstop>
 </tabstops>
 <resources>
  <include location="../../../../resources/images.qrc"/>
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -310,8 +310,18 @@ class MetadataSingleDialogBase(ResizableDialog):
            self.update_from_mi(mi)

    def cover_from_format(self, *args):
+        try:
            mi, ext = self.formats_manager.get_selected_format_metadata(self.db,
                    self.book_id)
+        except (IOError, OSError) as err:
+            if getattr(err, 'errno', None) == errno.EACCES: # Permission denied
+                import traceback
+                fname = err.filename if err.filename else 'file'
+                error_dialog(self, _('Permission denied'),
+                        _('Could not open %s. Is it being used by another'
+                        ' program?')%fname, det_msg=traceback.format_exc(),
+                        show=True)
+                return
        if mi is None:
            return
        cdata = None
--- a/src/calibre/gui2/store/stores/ebooksgratuits_plugin.py
+++ b/src/calibre/gui2/store/stores/ebooksgratuits_plugin.py
@ -0,0 +1,32 @@
+
+# -*- coding: utf-8 -*-
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2012, Florent FAYOLLE <florent.fayolle69@gmail.com>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
+from calibre.gui2.store.search_result import SearchResult
+import unicodedata
+
+
+#mimetypes.add_type('application/epub+zip', '.epub')
+
+class EbooksGratuitsStore(BasicStoreConfig, OpenSearchOPDSStore):
+    
+    open_search_url = 'http://www.ebooksgratuits.com/opds/opensearch.xml'
+    web_url = 'http://www.ebooksgratuits.com/'
+    
+    def strip_accents(self, s):
+    	return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
+
+
+    def search(self, query, max_results=10, timeout=60):
+    	query = self.strip_accents(unicode(query))
+        for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
+            if s.downloads:
+                s.drm = SearchResult.DRM_UNLOCKED
+                s.price = '$0.00'
+		yield s
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -417,7 +417,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
            vprefs.set('viewer_splitter_state',
                bytearray(self.splitter.saveState()))
        vprefs['multiplier'] = self.view.multiplier
-        vprefs['in_paged_mode1'] = not self.action_toggle_paged_mode.isChecked()
+        vprefs['in_paged_mode'] = not self.action_toggle_paged_mode.isChecked()

    def restore_state(self):
        state = vprefs.get('viewer_toolbar_state', None)
@ -434,8 +434,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        # specific location, ensure they are visible.
        self.tool_bar.setVisible(True)
        self.tool_bar2.setVisible(True)
-        self.action_toggle_paged_mode.setChecked(not vprefs.get('in_paged_mode1',
-            False))
+        self.action_toggle_paged_mode.setChecked(not vprefs.get('in_paged_mode',
+            True))
        self.toggle_paged_mode(self.action_toggle_paged_mode.isChecked(),
                at_start=True)

--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -440,8 +440,7 @@ class KindlePage(QWizardPage, KindleUI):
        x = unicode(self.to_address.text()).strip()
        parts = x.split('@')

-        if (self.send_email_widget.set_email_settings(True) and len(parts) >= 2
-                and parts[0]):
+        if (len(parts) >= 2 and parts[0] and self.send_email_widget.set_email_settings(True)):
            conf = smtp_prefs()
            accounts = conf.parse().accounts
            if not accounts: accounts = {}
@ -676,8 +675,9 @@ class LibraryPage(QWizardPage, LibraryUI):
        self.language.blockSignals(True)
        self.language.clear()
        from calibre.utils.localization import (available_translations,
-            get_language, get_lang)
+            get_language, get_lang, get_lc_messages_path)
        lang = get_lang()
+        lang = get_lc_messages_path(lang) if lang else lang
        if lang is None or lang not in available_translations():
            lang = 'en'
        def get_esc_lang(l):
--- a/src/calibre/library/catalogs/epub_mobi.py
+++ b/src/calibre/library/catalogs/epub_mobi.py
@ -341,7 +341,8 @@ class EPUB_MOBI(CatalogPlugin):
            recommendations.append(('comments', '', OptionRecommendation.HIGH))

            # >>> Use to debug generated catalog code before conversion <<<
-            #setattr(opts,'debug_pipeline',os.path.expanduser("~/Desktop/Catalog debug"))
+            if False:
+                setattr(opts,'debug_pipeline',os.path.expanduser("~/Desktop/Catalog debug"))

            dp = getattr(opts, 'debug_pipeline', None)
            if dp is not None:
--- a/src/calibre/library/catalogs/epub_mobi_builder.py
+++ b/src/calibre/library/catalogs/epub_mobi_builder.py
@ -1188,11 +1188,11 @@ Author '{0}':
                current_series = book['series']
                pSeriesTag = Tag(soup,'p')
                pSeriesTag['class'] = "series"
-
+                if self.opts.fmt == 'mobi':
+                    pSeriesTag['class'] = "series_mobi"
                if self.opts.generate_series:
                    aTag = Tag(soup,'a')
-                    aTag['href'] = "%s.html#%s_series" % ('BySeries',
-                                                    re.sub('\s','',book['series']).lower())
+                    aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(book['series']))
                    aTag.insert(0, book['series'])
                    pSeriesTag.insert(0, aTag)
                else:
@ -1331,10 +1331,13 @@ Author '{0}':
                        current_series = new_entry['series']
                        pSeriesTag = Tag(soup,'p')
                        pSeriesTag['class'] = "series"
+                        if self.opts.fmt == 'mobi':
+                            pSeriesTag['class'] = "series_mobi"
                        if self.opts.generate_series:
                            aTag = Tag(soup,'a')
-                            aTag['href'] = "%s.html#%s_series" % ('BySeries',
-                                                            re.sub('\W','',new_entry['series']).lower())
+
+                            if self.letter_or_symbol(new_entry['series']) == self.SYMBOLS:
+                                aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(new_entry['series']))
                            aTag.insert(0, new_entry['series'])
                            pSeriesTag.insert(0, aTag)
                        else:
@ -1741,17 +1744,6 @@ Author '{0}':
        body = soup.find('body')

        btc = 0
-
-        pTag = Tag(soup, "p")
-        pTag['style'] = 'display:none'
-        ptc = 0
-        aTag = Tag(soup,'a')
-        aTag['id'] = 'section_start'
-        pTag.insert(ptc, aTag)
-        ptc += 1
-        body.insert(btc, pTag)
-        btc += 1
-
        divTag = Tag(soup, "div")
        dtc = 0
        current_letter = ""
@ -1787,11 +1779,10 @@ Author '{0}':
                current_series = book['series']
                pSeriesTag = Tag(soup,'p')
                pSeriesTag['class'] = "series"
+                if self.opts.fmt == 'mobi':
+                    pSeriesTag['class'] = "series_mobi"
                aTag = Tag(soup, 'a')
-                if self.letter_or_symbol(book['series']):
-                    aTag['id'] = "symbol_%s_series" % re.sub('\W','',book['series']).lower()
-                else:
-                    aTag['id'] = "%s_series" % re.sub('\W','',book['series']).lower()
+                aTag['id'] = self.generateSeriesAnchor(book['series'])
                pSeriesTag.insert(0,aTag)
                pSeriesTag.insert(1,NavigableString('%s' % book['series']))
                divTag.insert(dtc,pSeriesTag)
@ -1847,16 +1838,20 @@ Author '{0}':
            divTag.insert(dtc, pBookTag)
            dtc += 1

-        if not self.__generateForKindle:
-            # Insert the <h2> tag with book_count at the head
-            #<h2><a name="byseries" id="byseries"></a>By Series</h2>
        pTag = Tag(soup, "p")
        pTag['class'] = 'title'
+        ptc = 0
+        aTag = Tag(soup,'a')
+        aTag['id'] = 'section_start'
+        pTag.insert(ptc, aTag)
+        ptc += 1
+
+        if not self.__generateForKindle:
+            # Insert the <h2> tag with book_count at the head
            aTag = Tag(soup, "a")
            anchor_name = friendly_name.lower()
            aTag['id'] = anchor_name.replace(" ","")
            pTag.insert(0,aTag)
-            #h2Tag.insert(1,NavigableString('%s (%d)' % (friendly_name, series_count)))
            pTag.insert(1,NavigableString('%s' % friendly_name))
        body.insert(btc,pTag)
        btc += 1
@ -3353,15 +3348,23 @@ Author '{0}':
            return codeTag
        else:
            spanTag = Tag(soup, "span")
+            spanTag['class'] = "prefix"
+
+            # color:white was the original technique used to align columns.
+            # The new technique is to float the prefix left with CSS.
            if prefix_char is None:
+                if True:
+                    prefix_char = "&nbsp;"
+                else:
+                    del spanTag['class']
                    spanTag['style'] = "color:white"
                    prefix_char = self.defaultPrefix
            spanTag.insert(0,NavigableString(prefix_char))
            return spanTag

    def generateAuthorAnchor(self, author):
-        # Strip white space to ''
-        return re.sub("\W","", author)
+        # Generate a legal XHTML id/href string
+        return re.sub("\W","", ascii_text(author))

    def generateFormatArgs(self, book):
        series_index = str(book['series_index'])
@ -3436,10 +3439,11 @@ Author '{0}':
                current_series = book['series']
                pSeriesTag = Tag(soup,'p')
                pSeriesTag['class'] = "series"
+                if self.opts.fmt == 'mobi':
+                    pSeriesTag['class'] = "series_mobi"
                if self.opts.generate_series:
                    aTag = Tag(soup,'a')
-                    aTag['href'] = "%s.html#%s_series" % ('BySeries',
-                                                    re.sub('\W','',book['series']).lower())
+                    aTag['href'] = "%s.html#%s" % ('BySeries', self.generateSeriesAnchor(book['series']))
                    aTag.insert(0, book['series'])
                    pSeriesTag.insert(0, aTag)
                else:
@ -3641,12 +3645,7 @@ Author '{0}':
        if aTag:
            if book['series']:
                if self.opts.generate_series:
-                    if self.letter_or_symbol(book['series']):
-                        aTag['href'] = "%s.html#symbol_%s_series" % ('BySeries',
-                                    re.sub('\W','',book['series']).lower())
-                    else:
-                        aTag['href'] = "%s.html#%s_series" % ('BySeries',
-                                    re.sub('\s','',book['series']).lower())
+                    aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(book['series']))
            else:
                aTag.extract()

@ -3780,6 +3779,13 @@ Author '{0}':
            pass
        return rating

+    def generateSeriesAnchor(self, series):
+        # Generate a legal XHTML id/href string
+        if self.letter_or_symbol(series) == self.SYMBOLS:
+            return "symbol_%s_series" % re.sub('\W','',series).lower()
+        else:
+            return "%s_series" % re.sub('\W','',ascii_text(series)).lower()
+
    def generateShortDescription(self, description, dest=None):
        # Truncate the description, on word boundaries if necessary
        # Possible destinations:
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -11,7 +11,7 @@ import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, \
 from collections import defaultdict
 import threading, random
 from itertools import repeat
-from math import ceil
+from math import ceil, floor

 from calibre import prints, force_unicode
 from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
@ -640,12 +640,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            if name and name != fname:
                changed = True
                break
-        if path == current_path and not changed:
-            return
-
        tpath = os.path.join(self.library_path, *path.split('/'))
        if not os.path.exists(tpath):
            os.makedirs(tpath)
+        if path == current_path and not changed:
+            return
+
        spath = os.path.join(self.library_path, *current_path.split('/'))

        if current_path and os.path.exists(spath): # Migrate existing files
@ -1150,7 +1150,16 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):

        `data`: Can be either a QImage, QPixmap, file object or bytestring
        '''
-        path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
+        base_path = os.path.join(self.library_path, self.path(id,
+            index_is_id=True))
+        if not os.path.exists(base_path):
+            self.set_path(id, index_is_id=True)
+            base_path = os.path.join(self.library_path, self.path(id,
+                index_is_id=True))
+            self.dirtied([id])
+
+        path = os.path.join(base_path, 'cover.jpg')
+
        if callable(getattr(data, 'save', None)):
            data.save(path)
        else:
@ -2080,7 +2089,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            return 1.0
        series_indices = [x[0] for x in series_indices]
        if tweaks['series_index_auto_increment'] == 'next':
-            return series_indices[-1] + 1
+            return floor(series_indices[-1]) + 1
        if tweaks['series_index_auto_increment'] == 'first_free':
            for i in range(1, 10000):
                if i not in series_indices:
--- a/src/calibre/library/restore.py
+++ b/src/calibre/library/restore.py
@ -42,7 +42,7 @@ class Restore(Thread):
        self.src_library_path = os.path.abspath(library_path)
        self.progress_callback = progress_callback
        self.db_id_regexp = re.compile(r'^.* \((\d+)\)$')
-        self.bad_ext_pat = re.compile(r'[^a-z0-9]+')
+        self.bad_ext_pat = re.compile(r'[^a-z0-9_]+')
        if not callable(self.progress_callback):
            self.progress_callback = lambda x, y: x
        self.dirs = []
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -22,13 +22,18 @@ def available_translations():
        _available_translations = [x for x in stats if stats[x] > 0.1]
    return _available_translations

-def get_lang():
-    'Try to figure out what language to display the interface in'
-    from calibre.utils.config_base import prefs
-    lang = prefs['language']
-    lang = os.environ.get('CALIBRE_OVERRIDE_LANG', lang)
-    if lang:
-        return lang
+def get_system_locale():
+    from calibre.constants import iswindows
+    lang = None
+    if iswindows:
+        try:
+            from calibre.constants import get_windows_user_locale_name
+            lang = get_windows_user_locale_name()
+            lang = lang.strip()
+            if not lang: lang = None
+        except:
+            pass # Windows XP does not have the GetUserDefaultLocaleName fn
+    if lang is None:
        try:
            lang = locale.getdefaultlocale(['LANGUAGE', 'LC_ALL', 'LC_CTYPE',
                                        'LC_MESSAGES', 'LANG'])[0]
@ -39,6 +44,25 @@ def get_lang():
                lang = os.environ['LANG']
            except:
                pass
+    if lang:
+        lang = lang.replace('-', '_')
+        lang = '_'.join(lang.split('_')[:2])
+    return lang
+
+
+def get_lang():
+    'Try to figure out what language to display the interface in'
+    from calibre.utils.config_base import prefs
+    lang = prefs['language']
+    lang = os.environ.get('CALIBRE_OVERRIDE_LANG', lang)
+    if lang:
+        return lang
+    try:
+        lang = get_system_locale()
+    except:
+        import traceback
+        traceback.print_exc()
+        lang = None
    if lang:
        match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
        if match:
@ -55,7 +79,7 @@ def get_lc_messages_path(lang):
        if lang in available_translations():
            hlang = lang
        else:
-            xlang = lang.split('_')[0]
+            xlang = lang.split('_')[0].lower()
            if xlang in available_translations():
                hlang = xlang
    return hlang