Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-10-23 17:24:02 +02:00 · 2011-10-23 17:24:02 +02:00 · 682a31d9ed
commit 682a31d9ed
parent e6645d513a 52f1459a22
95 changed files with 32689 additions and 26719 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,65 @@
 #  new recipes:
 #    - title: 

+- version: 0.8.23
+  date: 2011-10-21
+
+  new features:
+    - title: "Drivers for T-Mobile Move, new Pandigital Novel, New Onyx Boox and Freescale MX 515"
+
+    - title: "SONY T1 driver: Support for periodicals and better timezone detection"
+
+    - title: "Add a remove cover entry to the right click menu of the cover display in the right panel"
+      tickets: [874689]
+ 
+  bug fixes:
+    - title: "Amazon metadata download: Fix for change in Amazon website that broke downloading metadata."
+      tickets: [878395]
+
+    - title: "MOBI metadata: When reading titles from MOBI files only use the title in the PDB header if there is no long title in the EXTH header"
+      tickets: [ 875243 ]
+
+    - title: "Fix regression that broke use of complex custom columns in save to disk templates."
+      tickets: [877366] 
+
+    - title: "Fix regression that broke reading metadata from CHM files"
+    
+    - title: "Fix a bug that broke conversion of some zipped up HTML files with non ascii filenames on certain windows installs."
+      tickets: [873288] 
+
+    - title: "RTF Input: Fix bug in handling of paragraph separators."
+      tickets: [863735]
+
+    - title: "Fix a regression that broke downloading certain periodicals for the Kindle."
+      tickets: [875595]
+
+    - title: "Fix regression that broke updating of covers inside ebook files when saving to disk"
+
+    - title: "Fix regression breaking editing the 'show in tag browser' checkbox in custom column setup editing"
+
+    - title: "Fix typo that broke stopping selected jobs in 0.8.22"
+
+  improved recipes:
+    - Columbus Dispatch
+    - Ming Pao
+    - La Republica
+    - Korea Times
+    - USA Today
+    - CNN
+    - Liberation
+    - El Pais
+    - Helsingin Sanomat
+
+  new recipes:
+    - title: Kyugyhang, Hankyoreh and Hankyoreh21
+      author: Seongkyoun Yoo.
+
+    - title: English Katherimini 
+      author: Thomas Scholl
+
+    - title: Various French news sources
+      author: Aurelien Chabot.
+
 - version: 0.8.22
  date: 2011-10-14

--- a/recipes/20minutes.recipe
+++ b/recipes/20minutes.recipe
@ -4,7 +4,6 @@ __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 20minutes.fr
 '''
-import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class Minutes(BasicNewsRecipe):
--- a/recipes/columbusdispatch.recipe
+++ b/recipes/columbusdispatch.recipe
@ -14,67 +14,43 @@ class ColumbusDispatchRecipe(BasicNewsRecipe):
    use_embedded_content = False
    remove_empty_feeds = True
    oldest_article = 1.2
-    max_articles_per_feed = 100
+    use_embedded_content = False

    no_stylesheets = True
-    remove_javascript = True
-    encoding = 'utf-8'
-    # Seems to work best, but YMMV
-    simultaneous_downloads = 2
-
+    auto_cleanup = True
+    #auto_cleanup_keep = '//div[@id="story-photos"]'
    # Feeds from http://www.dispatch.com/live/content/rss/index.html
-    feeds = []
-    feeds.append((u'News: Local and state news', u'http://www.dispatch.com/live/static/crt/2_rss_localnews.xml'))
-    feeds.append((u'News: National news', u'http://www.dispatch.com/live/static/crt/2_rss_nationalnews.xml'))
-    feeds.append((u'News: Editorials', u'http://www.dispatch.com/live/static/crt/2_rss_editorials.xml'))
-    feeds.append((u'News: Columnists', u'http://www.dispatch.com/live/static/crt/2_rss_columnists.xml'))
-    feeds.append((u'News: Health news', u'http://www.dispatch.com/live/static/crt/2_rss_health.xml'))
-    feeds.append((u'News: Science news', u'http://www.dispatch.com/live/static/crt/2_rss_science.xml'))
-    feeds.append((u'Sports: OSU football', u'http://www.dispatch.com/live/static/crt/2_rss_osufootball.xml'))
-    feeds.append((u'Sports: OSU men\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osumensbball.xml'))
-    feeds.append((u'Sports: OSU women\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osuwomensbball.xml'))
-    feeds.append((u'Sports: OSU sports', u'http://www.dispatch.com/live/static/crt/2_rss_osusports.xml'))
-    feeds.append((u'Sports: Blue Jackets', u'http://www.dispatch.com/live/static/crt/2_rss_bluejackets.xml'))
-    feeds.append((u'Sports: Crew', u'http://www.dispatch.com/live/static/crt/2_rss_crew.xml'))
-    feeds.append((u'Sports: Clippers', u'http://www.dispatch.com/live/static/crt/2_rss_clippers.xml'))
-    feeds.append((u'Sports: Indians', u'http://www.dispatch.com/live/static/crt/2_rss_indians.xml'))
-    feeds.append((u'Sports: Reds', u'http://www.dispatch.com/live/static/crt/2_rss_reds.xml'))
-    feeds.append((u'Sports: Golf', u'http://www.dispatch.com/live/static/crt/2_rss_golf.xml'))
-    feeds.append((u'Sports: Outdoors', u'http://www.dispatch.com/live/static/crt/2_rss_outdoors.xml'))
-    feeds.append((u'Sports: Cavs/NBA', u'http://www.dispatch.com/live/static/crt/2_rss_cavaliers.xml'))
-    feeds.append((u'Sports: High Schools', u'http://www.dispatch.com/live/static/crt/2_rss_highschools.xml'))
-    feeds.append((u'Sports: Browns', u'http://www.dispatch.com/live/static/crt/2_rss_browns.xml'))
-    feeds.append((u'Sports: Bengals', u'http://www.dispatch.com/live/static/crt/2_rss_bengals.xml'))
-    feeds.append((u'Sports: Auto Racing', u'http://www.dispatch.com/live/static/crt/2_rss_autoracing.xml'))
-    feeds.append((u'Business News', u'http://www.dispatch.com/live/static/crt/2_rss_business.xml'))
-    feeds.append((u'Features: Weekender', u'http://www.dispatch.com/live/static/crt/2_rss_weekender.xml'))
-    feeds.append((u'Features: Life and Arts', u'http://www.dispatch.com/live/static/crt/2_rss_lifearts.xml'))
-    feeds.append((u'Features: Food', u'http://www.dispatch.com/live/static/crt/2_rss_food.xml'))
-    feeds.append((u'Features: NOW! for kids', u'http://www.dispatch.com/live/static/crt/2_rss_now.xml'))
-    feeds.append((u'Features: Travel', u'http://www.dispatch.com/live/static/crt/2_rss_travel.xml'))
-    feeds.append((u'Features: Home and Garden', u'http://www.dispatch.com/live/static/crt/2_rss_homegarden.xml'))
-    feeds.append((u'Features: Faith and Values', u'http://www.dispatch.com/live/static/crt/2_rss_faithvalues.xml'))
-    #feeds.append((u'', u''))
+    feeds          = [
+('Local', 
+ 'http://www.dispatch.com/content/syndication/news_local-state.xml'),
+('National', 
+ 'http://www.dispatch.com/content/syndication/news_national.xml'),
+('Business', 
+ 'http://www.dispatch.com/content/syndication/news_business.xml'),
+('Editorials', 
+ 'http://www.dispatch.com/content/syndication/opinion_editorials.xml'),
+('Columnists', 
+ 'http://www.dispatch.com/content/syndication/opinion_columns.xml'),
+('Life and Arts', 
+ 'http://www.dispatch.com/content/syndication/lae_life-and-arts.xml'),
+ ('OSU Sports', 
+ 'http://www.dispatch.com/content/syndication/sports_osu.xml'),
+ ('Auto Racing', 
+ 'http://www.dispatch.com/content/syndication/sports_auto-racing.xml'),
+ ('Outdoors', 
+ 'http://www.dispatch.com/content/syndication/sports_outdoors.xml'),
+ ('Bengals', 
+ 'http://www.dispatch.com/content/syndication/sports_bengals.xml'),
+  ('Indians', 
+ 'http://www.dispatch.com/content/syndication/sports_indians.xml'),
+ ('Clippers', 
+ 'http://www.dispatch.com/content/syndication/sports_clippers.xml'),
+ ('Crew', 
+ 'http://www.dispatch.com/content/syndication/sports_crew.xml'),
+ ('Reds', 
+ 'http://www.dispatch.com/content/syndication/sports_reds.xml'),
+ ('Blue Jackets', 
+ 'http://www.dispatch.com/content/syndication/sports_bluejackets.xml'),
+]

-    keep_only_tags = []
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'colhed'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'hed'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subhed'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'date'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'byline'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'srcline'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'body'}))
-
-    remove_tags = []
-    remove_tags.append(dict(name = 'div', attrs = {'id': 'middle-story-ad-container'}))
-
-    extra_css = '''
-                body {font-family:verdana,arial,helvetica,geneva,sans-serif ;}
-                a {text-decoration: none; color: blue;}
-                div.colhed {font-weight: bold;}
-                div.hed {font-size: xx-large; font-weight: bold; margin-bottom: 0.2em;}
-                div.subhed {font-size: large;}
-                div.date {font-size: x-small; font-style: italic; color: #666666; margin-top: 0.4em; margin-bottom: 0.4em;}
-                div.byline, div.srcline {font-size: small; color: #696969;}
-                '''

--- a/recipes/frandroid.recipe
+++ b/recipes/frandroid.recipe
@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
-class BasicUserRecipe1318572550(AutomaticNewsRecipe):
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class BasicUserRecipe1318572550(BasicNewsRecipe):
    title          = u'FrAndroid'
    oldest_article = 2
    max_articles_per_feed = 100
--- a/recipes/googlemobileblog.recipe
+++ b/recipes/googlemobileblog.recipe
@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
-class BasicUserRecipe1318572445(AutomaticNewsRecipe):
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1318572445(BasicNewsRecipe):
    title          = u'Google Mobile Blog'
    oldest_article = 7
    max_articles_per_feed = 100
--- a/recipes/hankyoreh.recipe
+++ b/recipes/hankyoreh.recipe
@ -3,34 +3,31 @@ __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
 '''
 Profile to download The Hankyoreh
 '''
-import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-

 class Hankyoreh(BasicNewsRecipe):
    title          = u'Hankyoreh'
    language = 'ko'
    description = u'The Hankyoreh News articles'
-    __author__	= 'Seongkyoun Yoo'
+    __author__  = 'Seongkyoun Yoo'
    oldest_article = 5
    recursions = 1
    max_articles_per_feed = 5
    no_stylesheets         = True
    keep_only_tags    = [
-						dict(name='tr', attrs={'height':['60px']}),
-						dict(id=['fontSzArea'])
+                        dict(name='tr', attrs={'height':['60px']}),
+                        dict(id=['fontSzArea'])
                        ]
    remove_tags = [
       dict(target='_blank'),
-	   dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
-	   dict(name='iframe', attrs={'width':['590']}),
+       dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
+       dict(name='iframe', attrs={'width':['590']}),
                  ]
    remove_tags_after  = [
                          dict(target='_top')
                         ]
    feeds = [
-	('All News','http://www.hani.co.kr/rss/'),
+    ('All News','http://www.hani.co.kr/rss/'),
    ('Politics','http://www.hani.co.kr/rss/politics/'),
    ('Economy','http://www.hani.co.kr/rss/economy/'),
    ('Society','http://www.hani.co.kr/rss/society/'),
--- a/recipes/hankyoreh21.recipe
+++ b/recipes/hankyoreh21.recipe
@ -3,7 +3,6 @@ __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
 '''
 Profile to download The Hankyoreh
 '''
-import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class Hankyoreh21(BasicNewsRecipe):
--- a/recipes/japan_times.recipe
+++ b/recipes/japan_times.recipe
@ -44,7 +44,11 @@ class JapanTimes(BasicNewsRecipe):
        return rurl.partition('?')[0]

    def print_version(self, url):
-        return url.replace('/cgi-bin/','/print/')
+        if '/rss/' in url:
+            return url.replace('.jp/rss/','.jp/print/')
+        if '/text/' in url:
+            return url.replace('.jp/text/','.jp/print/')
+        return url

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/korben.recipe
+++ b/recipes/korben.recipe
@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
-class BasicUserRecipe1318619728(AutomaticNewsRecipe):
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1318619728(BasicNewsRecipe):
    title          = u'Korben'
    oldest_article = 7
    max_articles_per_feed = 100
--- a/recipes/lepoint.recipe
+++ b/recipes/lepoint.recipe
@ -4,7 +4,6 @@ __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 LePoint.fr
 '''
-import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class lepoint(BasicNewsRecipe):
--- a/recipes/lexpress.recipe
+++ b/recipes/lexpress.recipe
@ -4,7 +4,6 @@ __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
 '''
 Lexpress.fr
 '''
-import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class lepoint(BasicNewsRecipe):
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -18,10 +18,14 @@ __InclPremium__ = False
 __ParsePFF__ = True
 # (HK only) Turn below to True if you wish hi-res images (Default: False)
 __HiResImg__ = False
+# Override the date returned by the program if specifying a YYYYMMDD below
+__Date__ = ''


 '''
 Change Log:
+2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
+2011/10/19: fix a bug in txt source parsing
 2011/10/17: disable fetching of premium content, also improved txt source parsing
 2011/10/04: option to get hi-res photos for the articles
 2011/09/21: fetching "column" section is made optional. 
@ -170,13 +174,22 @@ class MPRecipe(BasicNewsRecipe):
        return dt_local

    def get_fetchdate(self):
-        return self.get_dtlocal().strftime("%Y%m%d")
+        if __Date__ <> '':
+            return __Date__
+        else:
+            return self.get_dtlocal().strftime("%Y%m%d")

    def get_fetchformatteddate(self):
-        return self.get_dtlocal().strftime("%Y-%m-%d")
+        if __Date__ <> '':
+            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%Y-%m-%d")

    def get_fetchday(self):
-        return self.get_dtlocal().strftime("%d")
+        if __Date__ <> '':
+            return __Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%d")

    def get_cover_url(self):
        if __Region__ == 'Hong Kong':
@ -477,53 +490,8 @@ class MPRecipe(BasicNewsRecipe):

    # preprocess those .txt and javascript based files
    def preprocess_raw_html(self, raw_html, url):
-        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
-        if __HiResImg__ == True:
-            # TODO: add a _ in front of an image url
-            if url.rfind('news.mingpao.com') > -1: 
-                imglist =  re.findall('src="?.*?jpg"', raw_html)
-                br = mechanize.Browser()
-                br.set_handle_redirect(False)
-                for img in imglist:
-                    gifimg = img.replace('jpg"', 'gif"')
-                    try: 
-                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
-                        raw_html = raw_html.replace(img, gifimg)
-                    except: 
-                        # find the location of the first _
-                        pos = img.find('_')
-                        if pos > -1:
-                            # if found, insert _ after the first _
-                            newimg = img[0:pos] + '_' + img[pos:]
-                            raw_html = raw_html.replace(img, newimg)
-                        else: 
-                            # if not found, insert _ after "
-                            raw_html = raw_html.replace(img[1:], '"_' + img[1:])
-            elif url.rfind('life.mingpao.com') > -1:
-                imglist = re.findall('src=\'?.*?jpg\'', raw_html)
-                br = mechanize.Browser()
-                br.set_handle_redirect(False)
-                #print 'Img list: ', imglist, '\n'
-                for img in imglist:
-                    gifimg = img.replace('jpg\'', 'gif\'')
-                    try:
-                        #print 'Original: ', url
-                        #print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
-                        gifurl = re.sub(r'dailynews.*txt', '', url)
-                        #print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
-                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
-                        #print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
-                        #br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
-                        raw_html = raw_html.replace(img, gifimg)
-                    except:
-                        #print 'GIF not found'
-                        pos = img.rfind('/')
-                        newimg = img[0:pos+1] + '_' + img[pos+1:]
-                        #print 'newimg: ', newimg
-                        raw_html = raw_html.replace(img, newimg) 
-        if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
-            return raw_html
-        else:
+        new_html = raw_html
+        if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
            if url.rfind('_print.htm') <> -1:
                # javascript based file
                splitter = re.compile(r'\n')
@ -558,48 +526,113 @@ class MPRecipe(BasicNewsRecipe):
                        photo = photo.replace('</td>', '<br>')
                        photo = photo.replace('class="photo"', '')
                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
-                return new_raw_html + '</body></html>'
+                new_html = new_raw_html + '</body></html>'
            else: 
                # .txt based file
                splitter = re.compile(r'\n') # Match non-digits
                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
-                next_is_mov_link = False
                next_is_img_txt = False
                title_started = False
                met_article_start_char = False
                for item in splitter.split(raw_html):
+                    item = item.strip()
                    if item.startswith(u'\u3010'):
                        met_article_start_char = True
                        new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    else:
-                        if next_is_img_txt == False and next_is_mov_link == False:
-                            item = item.strip()
+                        if next_is_img_txt == False:
                            if item.startswith("=@"):
-                                next_is_mov_link = True
+                                print 'skip movie link'
                            elif item.startswith("=?"):
                                next_is_img_txt = True
                                new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
+                            elif item.startswith('=='):
+                                next_is_img_txt = True
+                                if False:
+                                    # TODO: check existence of .gif first
+                                    newimg = '_' + item[2:].strip() + '.jpg'
+                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
+                                else:
+                                    new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
                            elif item.startswith('='):
                                next_is_img_txt = True
-                                new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
+                                if False:
+                                    # TODO: check existence of .gif first
+                                    newimg = '_' + item[1:].strip() + '.jpg'
+                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
+                                else:
+                                    new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
                            else:
-                                if item <> '': 
-                                    if next_is_img_txt == False and met_article_start_char == False:
+                                if next_is_img_txt == False and met_article_start_char == False:
+                                    if item <> '':
                                        if title_started == False:
                                            #print 'Title started at ', item
                                            new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
                                            title_started = True
                                        else:
                                            new_raw_html = new_raw_html + item + '\n'
-                                    else:
-                                        new_raw_html = new_raw_html + item + '<p>\n'
+                                else:
+                                    new_raw_html = new_raw_html + item + '<p>\n'
                        else:
-                            if next_is_mov_link == True:
-                                next_is_mov_link = False
-                            else: 
-                                next_is_img_txt = False
-                                new_raw_html = new_raw_html + item + '\n'
-                return new_raw_html + '</div></body></html>'
+                            next_is_img_txt = False
+                            new_raw_html = new_raw_html + item + '\n'
+                new_html = new_raw_html + '</div></body></html>'
+        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
+        if __HiResImg__ == True:
+            # TODO: add a _ in front of an image url
+            if url.rfind('news.mingpao.com') > -1: 
+                imglist =  re.findall('src="?.*?jpg"', new_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                for img in imglist:
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try: 
+                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except: 
+                        # find the location of the first _
+                        pos = img.find('_')
+                        if pos > -1:
+                            # if found, insert _ after the first _
+                            newimg = img[0:pos] + '_' + img[pos:]
+                            new_html = new_html.replace(img, newimg)
+                        else: 
+                            # if not found, insert _ after "
+                            new_html = new_html.replace(img[1:], '"_' + img[1:])
+            elif url.rfind('life.mingpao.com') > -1:
+                imglist = re.findall('src=\'?.*?jpg\'', new_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                #print 'Img list: ', imglist, '\n'
+                for img in imglist:
+                    #print 'Found img: ', img
+                    gifimg = img.replace('jpg\'', 'gif\'')
+                    try:
+                        gifurl = re.sub(r'dailynews.*txt', '', url)
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        pos = img.rfind('/')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        new_html = new_html.replace(img, newimg)
+                # repeat with src quoted by double quotes, for text parsed from src txt
+                imglist = re.findall('src="?.*?jpg"', new_html)
+                for img in imglist:
+                    #print 'Found img: ', img
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try:
+                        #print 'url', url
+                        pos = url.rfind('/')
+                        gifurl = url[:pos+1]
+                        #print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        pos = img.find('"')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        #print 'Use hi-res img', newimg
+                        new_html = new_html.replace(img, newimg)
+        return new_html
        
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/omgubuntu.recipe
+++ b/recipes/omgubuntu.recipe
@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
-class BasicUserRecipe1318619832(AutomaticNewsRecipe):
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class BasicUserRecipe1318619832(BasicNewsRecipe):
    title          = u'OmgUbuntu'
    oldest_article = 7
    max_articles_per_feed = 100
--- a/setup/iso_639/hu.po
+++ b/setup/iso_639/hu.po
@ -9,49 +9,49 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-09-27 14:31+0000\n"
-"PO-Revision-Date: 2011-09-27 18:23+0000\n"
-"Last-Translator: Kovid Goyal <Unknown>\n"
+"PO-Revision-Date: 2011-10-15 17:29+0000\n"
+"Last-Translator: Devilinside <Unknown>\n"
 "Language-Team: Hungarian <debian-l10n-hungarian@lists.d.o>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-09-28 04:50+0000\n"
-"X-Generator: Launchpad (build 14049)\n"
+"X-Launchpad-Export-Date: 2011-10-16 05:14+0000\n"
+"X-Generator: Launchpad (build 14124)\n"
 "X-Poedit-Country: HUNGARY\n"
 "Language: hu\n"
 "X-Poedit-Language: Hungarian\n"

 #. name for aaa
 msgid "Ghotuo"
-msgstr ""
+msgstr "Ghotuo"

 #. name for aab
 msgid "Alumu-Tesu"
-msgstr ""
+msgstr "Alumu-Tesu"

 #. name for aac
 msgid "Ari"
-msgstr ""
+msgstr "Ari"

 #. name for aad
 msgid "Amal"
-msgstr ""
+msgstr "Amal"

 #. name for aae
 msgid "Albanian; Arbëreshë"
-msgstr ""
+msgstr "Albán;  Arbëreshë"

 #. name for aaf
 msgid "Aranadan"
-msgstr ""
+msgstr "Aranadan"

 #. name for aag
 msgid "Ambrak"
-msgstr ""
+msgstr "Ambrak"

 #. name for aah
 msgid "Arapesh; Abu'"
-msgstr ""
+msgstr "Arapesh; Abu'"

 #. name for aai
 msgid "Arifama-Miniafia"
@ -75,7 +75,7 @@ msgstr ""

 #. name for aao
 msgid "Arabic; Algerian Saharan"
-msgstr ""
+msgstr "Arab; Algériai Szaharai"

 #. name for aap
 msgid "Arára; Pará"
@ -87,7 +87,7 @@ msgstr ""

 #. name for aar
 msgid "Afar"
-msgstr "afar"
+msgstr "Afar"

 #. name for aas
 msgid "Aasáx"
@ -498,10 +498,9 @@ msgstr ""
 msgid "Tapei"
 msgstr ""

-# src/trans.h:281 src/trans.h:318
 #. name for afr
 msgid "Afrikaans"
-msgstr "afrikaans"
+msgstr "Afrikaans"

 #. name for afs
 msgid "Creole; Afro-Seminole"
@ -801,7 +800,7 @@ msgstr ""

 #. name for aka
 msgid "Akan"
-msgstr "akan"
+msgstr "Akan"

 #. name for akb
 msgid "Batak Angkola"
@ -1015,10 +1014,9 @@ msgstr ""
 msgid "Amarag"
 msgstr ""

-# src/trans.h:283
 #. name for amh
 msgid "Amharic"
-msgstr "amhara"
+msgstr "Amhara"

 #. name for ami
 msgid "Amis"
@ -1425,10 +1423,9 @@ msgstr ""
 msgid "Arrarnta; Western"
 msgstr ""

-# src/trans.h:294
 #. name for arg
 msgid "Aragonese"
-msgstr "aragóniai"
+msgstr "Aragóniai"

 #. name for arh
 msgid "Arhuaco"
@ -1548,7 +1545,7 @@ msgstr ""

 #. name for asm
 msgid "Assamese"
-msgstr "asszámi"
+msgstr "Asszámi"

 #. name for asn
 msgid "Asuriní; Xingú"
@ -1790,10 +1787,9 @@ msgstr ""
 msgid "Arabic; Uzbeki"
 msgstr ""

-# src/trans.h:283
 #. name for ava
 msgid "Avaric"
-msgstr "avar"
+msgstr "Avar"

 #. name for avb
 msgid "Avau"
@ -1805,7 +1801,7 @@ msgstr ""

 #. name for ave
 msgid "Avestan"
-msgstr "aveszti"
+msgstr "Avesztai"

 #. name for avi
 msgid "Avikam"
@ -1945,7 +1941,7 @@ msgstr ""

 #. name for ayc
 msgid "Aymara; Southern"
-msgstr ""
+msgstr "Ajmara; Déli"

 #. name for ayd
 msgid "Ayabadhu"
@ -1977,7 +1973,7 @@ msgstr ""

 #. name for aym
 msgid "Aymara"
-msgstr "aymara"
+msgstr "Ajmara"

 #. name for ayn
 msgid "Arabic; Sanaani"
@ -1997,7 +1993,7 @@ msgstr ""

 #. name for ayr
 msgid "Aymara; Central"
-msgstr ""
+msgstr "Ajmara; Közép"

 #. name for ays
 msgid "Ayta; Sorsogon"
@ -2025,12 +2021,11 @@ msgstr ""

 #. name for azb
 msgid "Azerbaijani; South"
-msgstr ""
+msgstr "Azeri; Déli"

-# src/trans.h:311
 #. name for aze
 msgid "Azerbaijani"
-msgstr "azeri"
+msgstr "Azeri"

 #. name for azg
 msgid "Amuzgo; San Pedro Amuzgos"
@ -2038,7 +2033,7 @@ msgstr ""

 #. name for azj
 msgid "Azerbaijani; North"
-msgstr ""
+msgstr "Azeri; Északi"

 #. name for azm
 msgid "Amuzgo; Ipalapa"
@ -2090,7 +2085,7 @@ msgstr ""

 #. name for bak
 msgid "Bashkir"
-msgstr "baskír"
+msgstr "Baskír"

 #. name for bal
 msgid "Baluchi"
@ -2115,7 +2110,7 @@ msgstr ""

 #. name for bar
 msgid "Bavarian"
-msgstr ""
+msgstr "Bajor"

 #. name for bas
 msgid "Basa (Cameroon)"
@ -2497,10 +2492,9 @@ msgstr "beja"
 msgid "Bebeli"
 msgstr ""

-# src/trans.h:286
 #. name for bel
 msgid "Belarusian"
-msgstr "belorusz"
+msgstr "Belarusz"

 #. name for bem
 msgid "Bemba (Zambia)"
@ -2508,7 +2502,7 @@ msgstr ""

 #. name for ben
 msgid "Bengali"
-msgstr "bengáli"
+msgstr "Bengáli"

 #. name for beo
 msgid "Beami"
@ -3510,10 +3504,9 @@ msgstr ""
 msgid "Borôro"
 msgstr ""

-# src/trans.h:309
 #. name for bos
 msgid "Bosnian"
-msgstr "bosnyák"
+msgstr "Bosnyák"

 #. name for bot
 msgid "Bongo"
@ -3685,7 +3678,7 @@ msgstr ""

 #. name for bqn
 msgid "Bulgarian Sign Language"
-msgstr ""
+msgstr "Bolgár jelnyelv"

 #. name for bqo
 msgid "Balo"
@ -4078,10 +4071,9 @@ msgstr ""
 msgid "Bugawac"
 msgstr ""

-# src/trans.h:285
 #. name for bul
 msgid "Bulgarian"
-msgstr "bolgár"
+msgstr "Bolgár"

 #. name for bum
 msgid "Bulu (Cameroon)"
@ -7445,10 +7437,9 @@ msgstr ""
 msgid "Semimi"
 msgstr ""

-# src/trans.h:284
 #. name for eus
 msgid "Basque"
-msgstr "baszk"
+msgstr "Baszk"

 #. name for eve
 msgid "Even"
@ -7534,10 +7525,9 @@ msgstr ""
 msgid "Fang (Equatorial Guinea)"
 msgstr ""

-# src/trans.h:294
 #. name for fao
 msgid "Faroese"
-msgstr "feröi"
+msgstr "Feröeri"

 #. name for fap
 msgid "Palor"
@ -29414,7 +29404,7 @@ msgstr ""

 #. name for xzp
 msgid "Zapotec; Ancient"
-msgstr ""
+msgstr "Zapoték; Ősi"

 #. name for yaa
 msgid "Yaminahua"
@ -30326,27 +30316,27 @@ msgstr ""

 #. name for zaa
 msgid "Zapotec; Sierra de Juárez"
-msgstr ""
+msgstr "Zapoték; Sierra de Juárezi"

 #. name for zab
 msgid "Zapotec; San Juan Guelavía"
-msgstr ""
+msgstr "Zapoték; San Juan Guelavíai"

 #. name for zac
 msgid "Zapotec; Ocotlán"
-msgstr ""
+msgstr "Zapoték; Ocotláni"

 #. name for zad
 msgid "Zapotec; Cajonos"
-msgstr "zapoték; Cajonos"
+msgstr "Zapoték; Cajonesi"

 #. name for zae
 msgid "Zapotec; Yareni"
-msgstr "zapoték; Yareni"
+msgstr "Zapoték; Yareni"

 #. name for zaf
 msgid "Zapotec; Ayoquesco"
-msgstr ""
+msgstr "Zapoték; Ayoquescoi"

 #. name for zag
 msgid "Zaghawa"
@ -30358,7 +30348,7 @@ msgstr "zangval"

 #. name for zai
 msgid "Zapotec; Isthmus"
-msgstr "zapoték; Isthmus"
+msgstr "Zapoték; Isthmusi"

 #. name for zaj
 msgid "Zaramo"
@ -30374,31 +30364,31 @@ msgstr "zozu"

 #. name for zam
 msgid "Zapotec; Miahuatlán"
-msgstr ""
+msgstr "Zapoték; Miahuatláni"

 #. name for zao
 msgid "Zapotec; Ozolotepec"
-msgstr ""
+msgstr "Zapoték; Ozolotepeci"

 #. name for zap
 msgid "Zapotec"
-msgstr "zapoték"
+msgstr "Zapoték"

 #. name for zaq
 msgid "Zapotec; Aloápam"
-msgstr ""
+msgstr "Zapoték; Aloápami"

 #. name for zar
 msgid "Zapotec; Rincón"
-msgstr "zapoték; Rincón"
+msgstr "Zapoték; Rincóni"

 #. name for zas
 msgid "Zapotec; Santo Domingo Albarradas"
-msgstr ""
+msgstr "Zapoték; Santo Domingo Albarradasi"

 #. name for zat
 msgid "Zapotec; Tabaa"
-msgstr "zapoték; Tabaa"
+msgstr "Zapoték; Tabaa-i"

 # src/trans.h:193
 #. name for zau
@ -30407,15 +30397,15 @@ msgstr "zangskari"

 #. name for zav
 msgid "Zapotec; Yatzachi"
-msgstr ""
+msgstr "Zapoték; Yatzachi-i"

 #. name for zaw
 msgid "Zapotec; Mitla"
-msgstr "zapoték; Mitla"
+msgstr "Zapoték; Mitlai"

 #. name for zax
 msgid "Zapotec; Xadani"
-msgstr "zapoték; Xadani"
+msgstr "Zapoték; Xadani-i"

 #. name for zay
 msgid "Zayse-Zergulla"
@ -30991,7 +30981,7 @@ msgstr "tokano"

 #. name for zul
 msgid "Zulu"
-msgstr "zulu"
+msgstr "Zulu"

 # src/trans.h:316
 #. name for zum
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 22)
+numeric_version = (0, 8, 23)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -77,7 +77,7 @@ class ANDROID(USBMS):

            # Samsung
            0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
-                       0x681c : [0x0222, 0x0224, 0x0400],
+                       0x681c : [0x0222, 0x0223, 0x0224, 0x0400],
                       0x6640 : [0x0100],
                       0x685b : [0x0400],
                       0x685e : [0x0400],
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -376,7 +376,7 @@ class KOBO(USBMS):
            path_prefix = '.kobo/images/'
            path = self._main_prefix + path_prefix + ImageID

-            file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed',)
+            file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed', ' - N3_FULL.parsed',)

            for ending in file_endings:
                fpath = path + ending
@ -852,6 +852,7 @@ class KOBO(USBMS):
                            ' - N3_LIBRARY_FULL.parsed':(355,530),
                            ' - N3_LIBRARY_GRID.parsed':(149,233),
                            ' - N3_LIBRARY_LIST.parsed':(60,90),
+                            ' - N3_FULL.parsed':(600,800),
                            ' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)}

                    for ending, resize in file_endings.items():
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -20,9 +20,8 @@ from calibre.devices.usbms.driver import USBMS, debug_print
 from calibre.devices.usbms.device import USBDevice
 from calibre.devices.usbms.books import CollectionsBookList
 from calibre.devices.usbms.books import BookList
-from calibre.ebooks.metadata import authors_to_sort_string
+from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
 from calibre.constants import islinux
-from calibre.ebooks.metadata import authors_to_string, authors_to_sort_string

 DBPATH = 'Sony_Reader/database/books.db'
 THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
@ -40,7 +39,8 @@ class PRST1(USBMS):
    path_sep = '/'
    booklist_class = CollectionsBookList

-    FORMATS      = ['epub', 'pdf', 'txt']
+    FORMATS      = ['epub', 'pdf', 'txt', 'book', 'zbf'] # The last two are
+                                                         # used in japan
    CAN_SET_METADATA = ['collections']
    CAN_DO_DEVICE_DB_PLUGBOARD = True

--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -30,7 +30,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
                   'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
                   'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
-                   'xps', 'oxps', 'azw4']
+                   'xps', 'oxps', 'azw4', 'book', 'zbf']

 class HTMLRenderer(object):

--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -30,9 +30,11 @@ class Worker(Thread): # Get details {{{
    Get book details from amazons book page in a separate thread
    '''

-    def __init__(self, url, result_queue, browser, log, relevance, domain, plugin, timeout=20):
+    def __init__(self, url, result_queue, browser, log, relevance, domain,
+            plugin, timeout=20, testing=False):
        Thread.__init__(self)
        self.daemon = True
+        self.testing = testing
        self.url, self.result_queue = url, result_queue
        self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
@ -189,10 +191,9 @@ class Worker(Thread): # Get details {{{
                self.log.exception(msg)
            return

+        oraw = raw
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
-        #open('/t/t.html', 'wb').write(raw)
-
        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return
@ -211,14 +212,20 @@ class Worker(Thread): # Get details {{{
            self.log.error(msg)
            return

-        self.parse_details(root)
+        self.parse_details(oraw, root)

-    def parse_details(self, root):
+    def parse_details(self, raw, root):
        try:
            asin = self.parse_asin(root)
        except:
            self.log.exception('Error parsing asin for url: %r'%self.url)
            asin = None
+        if self.testing:
+            import tempfile
+            with tempfile.NamedTemporaryFile(prefix=asin + '_',
+                    suffix='.html', delete=False) as f:
+                f.write(raw)
+            print ('Downloaded html for', asin, 'saved in', f.name)

        try:
            title = self.parse_title(root)
@ -310,7 +317,7 @@ class Worker(Thread): # Get details {{{
            return l.get('href').rpartition('/')[-1]

    def parse_title(self, root):
-        tdiv = root.xpath('//h1[@class="parseasinTitle"]')[0]
+        tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
        actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
        if actual_title:
            title = tostring(actual_title[0], encoding=unicode,
@ -320,11 +327,11 @@ class Worker(Thread): # Get details {{{
        return re.sub(r'[(\[].*[)\]]', '', title).strip()

    def parse_authors(self, root):
-        x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
+        x = '//h1[contains(@class, "parseasinTitle")]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
        aname = root.xpath(x)
        if not aname:
            aname = root.xpath('''
-            //h1[@class="parseasinTitle"]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
+            //h1[contains(@class, "parseasinTitle")]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
                    ''')
        for x in aname:
            x.tail = ''
@ -666,7 +673,8 @@ class Amazon(Source):
            log.error('No matches found with query: %r'%query)
            return

-        workers = [Worker(url, result_queue, br, log, i, domain, self) for i, url in
+        workers = [Worker(url, result_queue, br, log, i, domain, self,
+            testing=getattr(self, 'running_a_test', False)) for i, url in
                enumerate(matches)]

        for w in workers:
@ -740,16 +748,6 @@ if __name__ == '__main__': # tests {{{

            ),

-            ( # An e-book ISBN not on Amazon, the title/author search matches
-              # the Kindle edition, which has different markup for ratings and
-              # isbn
-                {'identifiers':{'isbn': '9780307459671'},
-                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
-                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
-                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
-
-            ),
-
            (  # This isbn not on amazon
                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
                    'authors':['Lutz']},
@ -783,7 +781,7 @@ if __name__ == '__main__': # tests {{{
    de_tests = [ # {{{
            (
                {'identifiers':{'isbn': '3548283519'}},
-                [title_test('Wer Wind sät',
+                [title_test('Wer Wind Sät: Der Fünfte Fall Für Bodenstein Und Kirchhoff',
                    exact=True), authors_test(['Nele Neuhaus'])
                 ]

@ -835,6 +833,6 @@ if __name__ == '__main__': # tests {{{
    ] # }}}

    test_identify_plugin(Amazon.name, com_tests)
-    #test_identify_plugin(Amazon.name, es_tests)
+    #test_identify_plugin(Amazon.name, de_tests)
 # }}}

--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -196,6 +196,7 @@ class Source(Plugin):

    def __init__(self, *args, **kwargs):
        Plugin.__init__(self, *args, **kwargs)
+        self.running_a_test = False # Set to True when using identify_test()
        self._isbn_to_identifier_cache = {}
        self._identifier_to_cover_url_cache = {}
        self.cache_lock = threading.RLock()
@ -284,14 +285,15 @@ class Source(Plugin):

        if authors:
            # Leave ' in there for Irish names
-            remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]')
-            replace_pat = re.compile(r'[-+.:;]')
+            remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
+            replace_pat = re.compile(r'[-+.:;,]')
            if only_first_author:
                authors = authors[:1]
            for au in authors:
+                has_comma = ',' in au
                au = replace_pat.sub(' ', au)
                parts = au.split()
-                if ',' in au:
+                if has_comma:
                    # au probably in ln, fn form
                    parts = parts[1:] + parts[:1]
                for tok in parts:
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -183,7 +183,11 @@ def test_identify_plugin(name, tests): # {{{
        rq = Queue()
        args = (log, rq, abort)
        start_time = time.time()
-        err = plugin.identify(*args, **kwargs)
+        plugin.running_a_test = True
+        try:
+            err = plugin.identify(*args, **kwargs)
+        finally:
+            plugin.running_a_test = False
        total_time = time.time() - start_time
        times.append(total_time)
        if err is not None:
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -66,12 +66,15 @@ class EXTHHeader(object):
                # last update time
                pass
            elif id == 503: # Long title
-                if not title or title == _('Unknown') or \
-                        'USER_CONTENT' in title or title.startswith('dtp_'):
-                    try:
-                        title = content.decode(codec)
-                    except:
-                        pass
+                # Amazon seems to regard this as the definitive book title
+                # rather than the title from the PDB header. In fact when
+                # sending MOBI files through Amazon's email service if the
+                # title contains non ASCII chars or non filename safe chars
+                # they are messed up in the PDB header
+                try:
+                    title = content.decode(codec)
+                except:
+                    pass
            #else:
            #    print 'unknown record', id, repr(content)
        if title:
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -20,6 +20,7 @@ from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import Log
 from calibre import guess_type, prints, prepare_string_for_xml
 from calibre.ebooks.oeb.transforms.cover import CoverManager
+from calibre.constants import filesystem_encoding

 TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
        '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
@ -180,6 +181,8 @@ class EbookIterator(object):
        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
+        if not isinstance(self.base, unicode):
+            self.base = self.base.decode(filesystem_encoding)
        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
        plumber = Plumber(self.pathtoebook, self.base, self.log)
        plumber.setup_options()
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pa.po
+++ b/src/calibre/translations/pa.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sc.po
+++ b/src/calibre/translations/sc.po
--- a/src/calibre/translations/si.po
+++ b/src/calibre/translations/si.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/ta.po
+++ b/src/calibre/translations/ta.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/translations/th.po
+++ b/src/calibre/translations/th.po
--- a/src/calibre/translations/tr.po
+++ b/src/calibre/translations/tr.po
--- a/src/calibre/translations/uk.po
+++ b/src/calibre/translations/uk.po
--- a/src/calibre/translations/ur.po
+++ b/src/calibre/translations/ur.po
--- a/src/calibre/translations/vi.po
+++ b/src/calibre/translations/vi.po
--- a/src/calibre/translations/wa.po
+++ b/src/calibre/translations/wa.po
--- a/src/calibre/translations/yi.po
+++ b/src/calibre/translations/yi.po
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
--- a/src/calibre/translations/zh_HK.po
+++ b/src/calibre/translations/zh_HK.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po