Merge from trunk

2025-07-09 03:04:10 -04:00 · 2012-11-07 18:16:15 +01:00 · 2012-11-07 18:16:15 +01:00 · 9868fffb02
commit 9868fffb02
parent 01c361e7fe 50a8fb6d7a
29 changed files with 1287 additions and 578 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -35,3 +35,7 @@ nbproject/
 .settings/
 *.DS_Store
 calibre_plugins/
 recipes/.git
 recipes/.gitignore
 recipes/README
 recipes/katalog_egazeciarz.recipe
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -327,9 +327,8 @@ You can browse your |app| collection on your Android device is by using the
 calibre content server, which makes your collection available over the net.
 First perform the following steps in |app|
-  * Set the :guilabel:`Preferred Output Format` in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
+  * Set the :guilabel:`Preferred Output Format` in |app| to EPUB for normal Android devices or MOBI for Kindles (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
-  * Set the output profile to Tablet (this will work for phones as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
+  * Convert the books you want to read on your device to EPUB/MOBI format by selecting them and clicking the Convert button.
  * Convert the books you want to read on your device to EPUB format by selecting them and clicking the Convert button.
  * Turn on the Content Server in |app|'s preferences and leave |app| running.
 Now on your Android device, open the browser and browse to
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@ -2,7 +2,9 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class FocusRecipe(BasicNewsRecipe):
    __license__ = 'GPL v3'
    __author__ = u'intromatyk <intromatyk@gmail.com>'
    language = 'pl'
@ -12,10 +14,10 @@ class FocusRecipe(BasicNewsRecipe):
    publisher = u'Gruner + Jahr Polska'
    category = u'News'
    description = u'Newspaper'
-    category='magazine'
+    category = 'magazine'
-    cover_url=''
+    cover_url = ''
-    remove_empty_feeds= True
+    remove_empty_feeds = True
-    no_stylesheets=True
+    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100000
    recursions = 0
@ -27,15 +29,15 @@ class FocusRecipe(BasicNewsRecipe):
    simultaneous_downloads = 5
    r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
-    keep_only_tags =[]
+    keep_only_tags = []
-    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
+    keep_only_tags.append(dict(name='div', attrs={'id': 'cll'}))
-    
+
-    remove_tags =[]
+    remove_tags = []
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
+    remove_tags.append(dict(name='div', attrs={'class': 'ulm noprint'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
+    remove_tags.append(dict(name='div', attrs={'class': 'txb'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
+    remove_tags.append(dict(name='div', attrs={'class': 'h2'}))
-    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
+    remove_tags.append(dict(name='ul', attrs={'class': 'txu'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
+    remove_tags.append(dict(name='div', attrs={'class': 'ulc'}))
    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -44,18 +46,17 @@ class FocusRecipe(BasicNewsRecipe):
                    p.lead {font-weight: bold; text-align: left;}
                    .authordate {font-size: small; color: #696969;}
                    .fot{font-size: x-small; color: #666666;}
-                    '''    
+                    '''
-
+    feeds = [
-    feeds          = [
+        ('Nauka', 'http://www.focus.pl/nauka/rss/'),
-                            ('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
+        ('Historia', 'http://www.focus.pl/historia/rss/'),
-                            ('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
+        ('Cywilizacja', 'http://www.focus.pl/cywilizacja/rss/'),
-                            ('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
+        ('Sport', 'http://www.focus.pl/sport/rss/'),
-                            ('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
+        ('Technika', 'http://www.focus.pl/technika/rss/'),
-                            ('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
+        ('Przyroda', 'http://www.focus.pl/przyroda/rss/'),
-                            ('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
+        ('Technologie', 'http://www.focus.pl/gadzety/rss/')
-                            ('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),                            
+    ]
                          ]
    def skip_ad_pages(self, soup):
        if ('advertisement' in soup.find('title').string.lower()):
@ -65,20 +66,20 @@ class FocusRecipe(BasicNewsRecipe):
            return None
    def get_cover_url(self):
-        soup=self.index_to_soup('http://www.focus.pl/magazyn/')
+        soup = self.index_to_soup('http://www.focus.pl/magazyn/')
-        tag=soup.find(name='div', attrs={'class':'clr fl'})
+        tag = soup.find(name='div', attrs={'class': 'clr fl'})
        if tag:
-            self.cover_url='http://www.focus.pl/' + tag.a['href']
+            self.cover_url = 'http://www.focus.pl/' + tag.a['href']
            return getattr(self, 'cover_url', self.cover_url)
    def print_version(self, url):
-     if url.count ('focus.pl.feedsportal.com'):
+        if url.count('focus.pl.feedsportal.com'):
            u = url.find('focus0Bpl')
            u = 'http://www.focus.pl/' + url[u + 11:]
            u = u.replace('0C', '/')
            u = u.replace('A', '')
-            u = u.replace ('0E','-')
+            u = u.replace('0E', '-')
            u = u.replace('/nc/1//story01.htm', '/do-druku/1')
-     else:
+        else:
-            u = url.replace('/nc/1','/do-druku/1')           
+            u = url.replace('/nc/1', '/do-druku/1')
-     return u
+        return u
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -1,104 +1,107 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class Gazeta_Wyborcza(BasicNewsRecipe):
-    title          = u'Gazeta Wyborcza'
+    title = u'Gazeta Wyborcza'
-    __author__        = 'fenuks'
+    __author__ = 'fenuks, Artur Stachecki'
-    language       = 'pl'
+    language = 'pl'
-    description ='news from gazeta.pl'
+    description = 'news from gazeta.pl'
-    category='newspaper'
+    category = 'newspaper'
    publication_type = 'newspaper'
-    masthead_url='http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
+    masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
-    INDEX='http://wyborcza.pl'
+    INDEX = 'http://wyborcza.pl'
-    remove_empty_feeds= True
+    remove_empty_feeds = True
    oldest_article = 3
    max_articles_per_feed = 100
-    remove_javascript=True
+    remove_javascript = True
-    no_stylesheets=True
+    no_stylesheets = True
-    ignore_duplicate_articles = {'title', 'url'}
+    remove_tags_before = dict(id='k0')
-    keep_only_tags = dict(id=['gazeta_article', 'article'])
+    remove_tags_after = dict(id='banP4')
-    remove_tags_after = dict(id='gazeta_article_share')
+    remove_tags = [dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})]
-    remove_tags = [dict(attrs={'class':['artReadMore', 'gazeta_article_related_new', 'txt_upl']}), dict(id=['gazeta_article_likes', 'gazeta_article_tools', 'rel', 'gazeta_article_tags', 'gazeta_article_share', 'gazeta_article_brand', 'gazeta_article_miniatures'])]
+    feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
-
+             (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
-    feeds          = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
+             (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
-	(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
+             (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
-	(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
+             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
-	(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
+             ]
 	(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), 
 	(u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), 
 	(u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), 
 	#(u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), 
 	(u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), 
 	(u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), 
 	(u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), 
 	(u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), 
 	(u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), 
 	(u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), 
 	(u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), 
 	(u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss')
          ]
    def skip_ad_pages(self, soup):
-          tag=soup.find(name='a', attrs={'class':'btn'})
+        tag = soup.find(name='a', attrs={'class': 'btn'})
-          if tag:
+        if tag:
-            new_soup=self.index_to_soup(tag['href'], raw=True)
+            new_soup = self.index_to_soup(tag['href'], raw=True)
            return new_soup
    def append_page(self, soup, appendtag):
-        loop=False
+        loop = False
-        tag = soup.find('div', attrs={'id':'Str'})
+        tag = soup.find('div', attrs={'id': 'Str'})
-        if appendtag.find('div', attrs={'id':'Str'}):
+        if appendtag.find('div', attrs={'id': 'Str'}):
-            nexturl=tag.findAll('a')
+            nexturl = tag.findAll('a')
-            appendtag.find('div', attrs={'id':'Str'}).extract()
+            appendtag.find('div', attrs={'id': 'Str'}).extract()
-            loop=True
+            loop = True
            if appendtag.find(id='source'):
                appendtag.find(id='source').extract()
        while loop:
-            loop=False
+            loop = False
            for link in nexturl:
                if u'następne' in link.string:
-                    url= self.INDEX + link['href']
+                    url = self.INDEX + link['href']
                    soup2 = self.index_to_soup(url)
                    pagetext = soup2.find(id='artykul')
                    pos = len(appendtag.contents)
                    appendtag.insert(pos, pagetext)
-                    tag = soup2.find('div', attrs={'id':'Str'})
+                    tag = soup2.find('div', attrs={'id': 'Str'})
-                    nexturl=tag.findAll('a')
+                    nexturl = tag.findAll('a')
-                    loop=True
+                    loop = True
    def gallery_article(self, appendtag):
-        tag=appendtag.find(id='container_gal')
+        tag = appendtag.find(id='container_gal')
        if tag:
-            nexturl=appendtag.find(id='gal_btn_next').a['href']
+            nexturl = appendtag.find(id='gal_btn_next').a['href']
            appendtag.find(id='gal_navi').extract()
        while nexturl:
-            soup2=self.index_to_soup(nexturl)
+            soup2 = self.index_to_soup(nexturl)
-            pagetext=soup2.find(id='container_gal')
+            pagetext = soup2.find(id='container_gal')
-            nexturl=pagetext.find(id='gal_btn_next')
+            nexturl = pagetext.find(id='gal_btn_next')
            if nexturl:
-                nexturl=nexturl.a['href']
+                nexturl = nexturl.a['href']
                pos = len(appendtag.contents)
                appendtag.insert(pos, pagetext)
-            rem=appendtag.find(id='gal_navi')
+            rem = appendtag.find(id='gal_navi')
            if rem:
                rem.extract()
    def preprocess_html(self, soup):
-         self.append_page(soup, soup.body)
+        if soup.find(attrs={'class': 'piano_btn_1'}):
-         if soup.find(id='container_gal'):
+            return None
-             self.gallery_article(soup.body)
+        else:
-         return soup
+            self.append_page(soup, soup.body)
            if soup.find(id='container_gal'):
                self.gallery_article(soup.body)
            return soup
    def print_version(self, url):
-        if 'http://wyborcza.biz/biznes/' not in url:
+        if url.count('rss.feedsportal.com'):
-            return url
+            u = url.find('wyborcza0Bpl')
            u = 'http://www.wyborcza.pl/' + url[u + 11:]
            u = u.replace('0C', '/')
            u = u.replace('A', '')
            u = u.replace('0E', '-')
            u = u.replace('0H', ',')
            u = u.replace('0I', '_')
            u = u.replace('0B', '.')
            u = u.replace('/1,', '/2029020,')
            u = u.replace('/story01.htm', '')
            print(u)
            return u
        elif 'http://wyborcza.pl/1' in url:
            return url.replace('http://wyborcza.pl/1', 'http://wyborcza.pl/2029020')
        else:
-             return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
+            return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
    def get_cover_url(self):
        soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
-        cover=soup.find(id='GWmini2')  
+        cover = soup.find(id='GWmini2')
-        soup = self.index_to_soup('http://wyborcza.pl/'+ cover.contents[3].a['href'])
+        soup = self.index_to_soup('http://wyborcza.pl/' + cover.contents[3].a['href'])
-        self.cover_url='http://wyborcza.pl' + soup.img['src']
+        self.cover_url = 'http://wyborcza.pl' + soup.img['src']
        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/money_pl.recipe
+++ b/recipes/money_pl.recipe
@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class FocusRecipe(BasicNewsRecipe):
    __license__ = 'GPL v3'
-    __author__ = u'intromatyk <intromatyk@gmail.com>'
+    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    version = 1
--- a/recipes/rzeczpospolita.recipe
+++ b/recipes/rzeczpospolita.recipe
@ -34,16 +34,20 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'story'}))
    remove_tags =[]
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleLeftBox'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'socialNewTools'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'socialTools'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxTop'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'recommendations'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'editorPicks'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks editorPicksFirst'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightText'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightButton'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxBottom'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'addRecommendation'}))
    remove_tags.append(dict(name = 'h3', attrs = {'id' : 'tags'}))
    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -67,3 +71,4 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
        return start + '/' + index + '?print=tak'
--- a/recipes/tvn24.recipe
+++ b/recipes/tvn24.recipe
@ -1,34 +1,55 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.utils.magick import Image
 class tvn24(BasicNewsRecipe):
    title          = u'TVN24'
    oldest_article = 7
    max_articles_per_feed = 100
-    __author__        = 'fenuks'
+    __author__        = 'fenuks, Artur Stachecki'
    description   = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
    category       = 'news'
    language       = 'pl'
-    #masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
+    masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
-    cover_url= 'http://www.userlogos.org/files/logos/Struna/TVN24.jpg'
+    cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
-    extra_css = 'ul {list-style:none;} \
+    extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
                 li {list-style:none; float: left; margin: 0 0.15em;} \
                 h2 {font-size: medium} \
                 .date60m {float: left; margin: 0 10px 0 5px;}'
    remove_empty_feeds = True
    remove_javascript = True
    no_stylesheets = True
-    use_embedded_content = False
+    keep_only_tags=[	
-    ignore_duplicate_articles = {'title', 'url'}
+#	dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
-    keep_only_tags=[dict(name='h1', attrs={'class':['size30 mt10 pb10', 'size38 mt10 pb15']}), dict(name='figure', attrs={'class':'articleMainPhoto articleMainPhotoWide'}), dict(name='article', attrs={'class':['mb20', 'mb20 textArticleDefault']}), dict(name='ul', attrs={'class':'newsItem'})]
+	dict(name='div', attrs={'class':'mainContainer'}),
-    remove_tags = [dict(name='aside', attrs={'class':['innerArticleModule onRight cols externalContent', 'innerArticleModule center']}), dict(name='div', attrs={'class':['thumbsGallery', 'articleTools', 'article right rd7', 'heading', 'quizContent']}), dict(name='a', attrs={'class':'watchMaterial text'}), dict(name='section', attrs={'class':['quiz toCenter', 'quiz toRight']})]
+#	dict(name='p'),
-
+#	dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
-    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'),
+                   ]
-		(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
+    remove_tags=[
 	dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
 	dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
 	dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
 	dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey  loaded'})
 	      ]
    remove_tags_after=[dict(name='li', attrs={'class':'share'})]
    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
 		#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
-        tag = soup.find(name='ul', attrs={'class':'newsItem'})
+        return soup
-        if tag:
+
-            tag.name='div'
+    def preprocess_html(self, soup):
-            tag.li.name='div'
+        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
    def postprocess_html(self, soup, first):
        #process all the images
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            if img < 0:
                raise RuntimeError('Out of memory')
            img.type = "GrayscaleType"
            img.save(iurl)
        return soup
--- a/recipes/wprost.recipe
+++ b/recipes/wprost.recipe
@ -3,6 +3,8 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, matek09, matek09@gmail.com'
 __copyright__ = 'Modified 2011,  Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
 __copyright__ = 'Modified 2012,  Artur Stachecki <artur.stachecki@gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
@ -11,7 +13,7 @@ class Wprost(BasicNewsRecipe):
        EDITION = 0
        FIND_LAST_FULL_ISSUE = True
        EXCLUDE_LOCKED = True
-        ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
+        ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png'
        title = u'Wprost'
        __author__ = 'matek09'
@ -20,6 +22,7 @@ class Wprost(BasicNewsRecipe):
        no_stylesheets = True
        language = 'pl'
        remove_javascript = True
 	recursions = 0	
        remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
        remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
@ -35,13 +38,15 @@ class Wprost(BasicNewsRecipe):
        (re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
        (re.compile(r'\<table .*?\>'), lambda match: ''),
        (re.compile(r'\<tr>'), lambda match: ''),
-        (re.compile(r'\<td .*?\>'), lambda match: '')]
+        (re.compile(r'\<td .*?\>'), lambda match: ''),
 	(re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]
        remove_tags =[]
        remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
        remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
        remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
        extra_css = '''
                                        .div-header {font-size: x-small; font-weight: bold}
                                        '''
@ -59,27 +64,26 @@ class Wprost(BasicNewsRecipe):
                a = 0
                if self.FIND_LAST_FULL_ISSUE:
                        ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
-                        a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
+                        a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
                else:
-                        a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
+                        a = soup.find('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
                self.EDITION = a['href'].replace('/tygodnik/?I=', '')
-                self.cover_url = a.img['src']
+		self.EDITION_SHORT = a['href'].replace('/tygodnik/?I=15', '')
-
+		self.cover_url = a.img['src']
        def parse_index(self):
                self.find_last_issue()
                soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
                feeds = []
-                for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
+                for main_block in soup.findAll(attrs={'id': 'content-main-column-element-content'}):
                        articles = list(self.find_articles(main_block))
                        if len(articles) > 0:
-                                section = self.tag_to_string(main_block)
+                                section = self.tag_to_string(main_block.find('h3'))
                                feeds.append((section, articles))
                return feeds
        def find_articles(self, main_block):
-                for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
+                for a in main_block.findAll('a'):
                        if a.name in "td":
                                break
                        if self.EXCLUDE_LOCKED & self.is_blocked(a):
@ -91,3 +95,4 @@ class Wprost(BasicNewsRecipe):
                                'description' : ''
                                }
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -901,8 +901,11 @@ class Device(DeviceConfig, DevicePlugin):
            for d in drives:
                try:
                    winutil.eject_drive(bytes(d)[0])
-                except:
+                except Exception as e:
-                    pass
+                    try:
                        prints(as_unicode(e))
                    except:
                        pass
        t = Thread(target=do_it, args=[drives])
        t.daemon = True
--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -150,8 +150,15 @@ class EPUBInput(InputFormatPlugin):
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
-        zf = ZipFile(stream)
+        try:
-        zf.extractall(os.getcwdu())
+            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('EPUB appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
        opf = self.find_opf()
        if opf is None:
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -10,6 +10,7 @@ from cStringIO import StringIO
 from contextlib import closing
 from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
 from calibre.utils.localunzip import LocalZipFile
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf2 import OPF
@ -105,10 +106,13 @@ class OCFReader(OCF):
 class OCFZipReader(OCFReader):
    def __init__(self, stream, mode='r', root=None):
-        try:
+        if isinstance(stream, (LocalZipFile, ZipFile)):
-            self.archive = ZipFile(stream, mode=mode)
+            self.archive = stream
-        except BadZipfile:
+        else:
-            raise EPubException("not a ZIP .epub OCF container")
+            try:
                self.archive = ZipFile(stream, mode=mode)
            except BadZipfile:
                raise EPubException("not a ZIP .epub OCF container")
        self.root = root
        if self.root is None:
            name = getattr(stream, 'name', False)
@ -119,8 +123,18 @@ class OCFZipReader(OCFReader):
        super(OCFZipReader, self).__init__()
    def open(self, name, mode='r'):
        if isinstance(self.archive, LocalZipFile):
            return self.archive.open(name)
        return StringIO(self.archive.read(name))
 def get_zip_reader(stream, root=None):
    try:
        zf = ZipFile(stream, mode='r')
    except:
        stream.seek(0)
        zf = LocalZipFile(stream)
    return OCFZipReader(zf, root=root)
 class OCFDirReader(OCFReader):
    def __init__(self, path):
        self.root = path
@ -184,7 +198,12 @@ def render_cover(opf, opf_path, zf, reader=None):
 def get_cover(opf, opf_path, stream, reader=None):
    raster_cover = opf.raster_cover
    stream.seek(0)
-    zf = ZipFile(stream)
+    try:
        zf = ZipFile(stream)
    except:
        stream.seek(0)
        zf = LocalZipFile(stream)
    if raster_cover:
        base = posixpath.dirname(opf_path)
        cpath = posixpath.normpath(posixpath.join(base, raster_cover))
@ -207,7 +226,7 @@ def get_cover(opf, opf_path, stream, reader=None):
 def get_metadata(stream, extract_cover=True):
    """ Return metadata as a :class:`Metadata` object """
    stream.seek(0)
-    reader = OCFZipReader(stream)
+    reader = get_zip_reader(stream)
    mi = reader.opf.to_book_metadata()
    if extract_cover:
        try:
@ -232,7 +251,7 @@ def _write_new_cover(new_cdata, cpath):
 def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
    stream.seek(0)
-    reader = OCFZipReader(stream, root=os.getcwdu())
+    reader = get_zip_reader(stream, root=os.getcwdu())
    raster_cover = reader.opf.raster_cover
    mi = MetaInformation(mi)
    new_cdata = None
@ -283,7 +302,11 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
        reader.opf.timestamp = mi.timestamp
    newopf = StringIO(reader.opf.render())
-    safe_replace(stream, reader.container[OPF.MIMETYPE], newopf,
+    if isinstance(reader.archive, LocalZipFile):
        reader.archive.safe_replace(reader.container[OPF.MIMETYPE], newopf,
            extra_replacements=replacements)
    else:
        safe_replace(stream, reader.container[OPF.MIMETYPE], newopf,
            extra_replacements=replacements)
    try:
        if cpath is not None:
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -239,10 +239,11 @@ class PluginWidget(QWidget,Ui_Form):
    def initialize(self, name, db):
        '''
        CheckBoxControls (c_type: check_box):
-            ['generate_titles','generate_series','generate_genres',
+            ['cross_reference_authors',
-             'generate_recently_added','generate_descriptions','include_hr']
+             'generate_titles','generate_series','generate_genres',
             'generate_recently_added','generate_descriptions',
             'include_hr']
        ComboBoxControls (c_type: combo_box):
            ['exclude_source_field','header_note_source_field',
             'merge_source_field']
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@ -305,7 +305,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
      <string>Other options</string>
     </property>
     <layout class="QGridLayout" name="gridLayout_3">
-      <item row="2" column="1">
+      <item row="3" column="1">
       <layout class="QHBoxLayout" name="merge_with_comments_hl">
        <item>
         <widget class="QComboBox" name="merge_source_field">
@ -372,7 +372,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </item>
       </layout>
      </item>
-      <item row="2" column="0">
+      <item row="3" column="0">
       <widget class="QLabel" name="label_9">
        <property name="minimumSize">
         <size>
@ -397,7 +397,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </property>
       </widget>
      </item>
-      <item row="0" column="0">
+      <item row="1" column="0">
       <widget class="QLabel" name="label_4">
        <property name="minimumSize">
         <size>
@ -413,7 +413,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </property>
       </widget>
      </item>
-      <item row="0" column="1">
+      <item row="1" column="1">
       <layout class="QHBoxLayout" name="replace_cover_hl">
        <item>
         <widget class="QRadioButton" name="generate_new_cover">
@ -447,7 +447,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </item>
       </layout>
      </item>
-      <item row="1" column="0">
+      <item row="2" column="0">
       <widget class="QLabel" name="label_3">
        <property name="text">
         <string>E&amp;xtra Description note:</string>
@ -460,7 +460,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </property>
       </widget>
      </item>
-      <item row="1" column="1">
+      <item row="2" column="1">
       <layout class="QHBoxLayout" name="horizontalLayout">
        <item>
         <widget class="QComboBox" name="header_note_source_field">
@ -561,6 +561,27 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </item>
       </layout>
      </item>
      <item row="0" column="0">
       <widget class="QLabel" name="label_2">
        <property name="text">
         <string>Author cross-references:</string>
        </property>
        <property name="alignment">
         <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
        </property>
       </widget>
      </item>
      <item row="0" column="1">
       <layout class="QHBoxLayout" name="cross_references_hl">
        <item>
         <widget class="QCheckBox" name="cross_reference_authors">
          <property name="text">
           <string>For books with multiple authors, list each author separately</string>
          </property>
         </widget>
        </item>
       </layout>
      </item>
     </layout>
    </widget>
   </item>
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -6,102 +6,19 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-from contextlib import closing
+from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
-from lxml import html
+class AmazonDEKindleStore(AmazonUKKindleStore):
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonDEKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
-    def open(self, parent=None, detail_item=None, external=False):
+    aff_id = {'tag': 'charhale0a-21'}
-        aff_id = {'tag': 'charhale0a-21'}
+    store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
-        store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
+                 '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
-                     '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
+                 '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
-                     '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') % aff_id
+    store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
        if detail_item:
            aff_id['asin'] = detail_item
            store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
                          '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
-                          '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id
+                          '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
-        open_url(QUrl(store_link))
+    search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Apparently amazon Europe  is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = ''.join(data.xpath("@name"))
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
                author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
                if author.startswith('von '):
                    author = author[4:]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                yield s
    def get_details(self, search_result, timeout):
        drm_search_text = u'Gleichzeitige Verwendung von Geräten'
        drm_free_text = u'Keine Einschränkung'
        url = 'http://amazon.de/dp/'
        br = browser()
        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
            idata = html.fromstring(nf.read())
            if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
                           drm_search_text + '")])'):
                if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
                               drm_free_text + '") and contains(b, "' +
                               drm_search_text + '")])'):
                    search_result.drm = SearchResult.DRM_UNLOCKED
                else:
                    search_result.drm = SearchResult.DRM_UNKNOWN
            else:
                search_result.drm = SearchResult.DRM_LOCKED
        return True
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -6,78 +6,17 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-from contextlib import closing
+from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
-from lxml import html
+class AmazonESKindleStore(AmazonUKKindleStore):
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonESKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
-    def open(self, parent=None, detail_item=None, external=False):
+    aff_id = {'tag': 'charhale09-21'}
-        aff_id = {'tag': 'charhale09-21'}
+    store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
-        store_link = 'http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790' % aff_id
+                  'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
-        if detail_item:
+    store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
-            aff_id['asin'] = detail_item
+                          'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
-            store_link = 'http://www.amazon.es/gp/redirect.html?ie=UTF8&location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3626&creative=24790' % aff_id
+                          '&linkCode=ur2&camp=3626&creative=24790')
-        open_url(QUrl(store_link))
+    search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Apparently amazon Europe is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = ''.join(data.xpath("@name"))
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
                author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('de '):
                    author = author[3:]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                s.drm = SearchResult.DRM_UNKNOWN
                yield s
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -6,79 +6,16 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from contextlib import closing
-from lxml import html
+from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
-from PyQt4.Qt import QUrl
+class AmazonFRKindleStore(AmazonUKKindleStore):
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonFRKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
-    def open(self, parent=None, detail_item=None, external=False):
+    aff_id = {'tag': 'charhale-21'}
-        aff_id = {'tag': 'charhale-21'}
+    store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
-        store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
+    store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
    search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
        if detail_item:
            aff_id['asin'] = detail_item
            store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Apparently amazon Europe is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = ''.join(data.xpath("@name"))
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
                author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('de '):
                    author = author[3:]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                s.drm = SearchResult.DRM_UNKNOWN
                yield s
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -6,78 +6,17 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-from contextlib import closing
+from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
-from lxml import html
+class AmazonITKindleStore(AmazonUKKindleStore):
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonITKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
-    def open(self, parent=None, detail_item=None, external=False):
+    aff_id = {'tag': 'httpcharles07-21'}
-        aff_id = {'tag': 'httpcharles07-21'}
+    store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
-        store_link = 'http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322' % aff_id
+                  'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
-        if detail_item:
+    store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
-            aff_id['asin'] = detail_item
+                          'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
-            store_link = 'http://www.amazon.it/gp/redirect.html?ie=UTF8&location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3370&creative=23322' % aff_id
+                          'linkCode=ur2&camp=3370&creative=23322')
-        open_url(QUrl(store_link))
+    search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Apparently amazon Europe is responding in UTF-8 now
            doc = html.fromstring(f.read())
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = ''.join(data.xpath("@name"))
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
                author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('di '):
                    author = author[3:]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                s.drm = SearchResult.DRM_UNKNOWN
                yield s
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -6,8 +6,9 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-from contextlib import closing
+import re
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
@ -18,57 +19,80 @@ from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonUKKindleStore(StorePlugin):
    aff_id = {'tag': 'calcharles-21'}
    store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
                  'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
                  'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
                  'linkCode=ur2&camp=1634&creative=19450')
    store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
                          'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
                          'linkCode=ur2&camp=1634&creative=6738')
    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'calcharles-21'}
        store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id
        store_link = self.store_link % self.aff_id
        if detail_item:
-            aff_id['asin'] = detail_item
+            self.aff_id['asin'] = detail_item
-            store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
+            store_link = self.store_link_details % self.aff_id
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
-        search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            # Apparently amazon Europe is responding in UTF-8 now
+            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
            doc = html.fromstring(f.read())
-            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
+            data_xpath = '//div[contains(@class, "prod")]'
-            format_xpath = './/span[@class="format"]/text()'
+            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
            asin_xpath = './/div[@class="image"]/a[1]'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
-                # put in results for non Kindle books (author pages). So we need
+                # put in results for non Kindle books (author pages). Se we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
-                format = ''.join(data.xpath(format_xpath))
+                format_ = ''.join(data.xpath(format_xpath))
-                if 'kindle' not in format.lower():
+                if 'kindle' not in format_.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
-                asin = ''.join(data.xpath("@name"))
+                asin_href = None
                asin_a = data.xpath(asin_xpath)
                if asin_a:
                    asin_href = asin_a[0].get('href', '')
                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
                    if m:
                        asin = m.group('asin')
                    else:
                        continue
                else:
                    continue
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
+                title = ''.join(data.xpath(title_xpath))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
+                author = ''.join(data.xpath(author_xpath))
                try:
                    author = author.split('by ', 1)[1].split(" (")[0]
                except:
                    pass
-                author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
+                price = ''.join(data.xpath(price_xpath))
                if author.startswith('by '):
                    author = author[3:]
                counter -= 1
@ -78,37 +102,10 @@ class AmazonUKKindleStore(StorePlugin):
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'
                yield s
    def get_details(self, search_result, timeout):
-        # We might already have been called.
+        pass
        if search_result.drm:
            return
        url = 'http://amazon.co.uk/dp/'
        drm_search_text = u'Simultaneous Device Usage'
        drm_free_text = u'Unlimited'
        br = browser()
        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
            idata = html.fromstring(nf.read())
            if not search_result.author:
                search_result.author = ''.join(idata.xpath('//div[@class="buying" and contains(., "Author")]/a/text()'))
                is_kindle = idata.xpath('boolean(//div[@class="buying"]/h1/span/span[contains(text(), "Kindle Edition")])')
                if is_kindle:
                    search_result.formats = 'Kindle'
            if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
                           drm_search_text + '")])'):
                if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
                               drm_free_text + '") and contains(b, "' +
                               drm_search_text + '")])'):
                    search_result.drm = SearchResult.DRM_UNLOCKED
                else:
                    search_result.drm = SearchResult.DRM_UNKNOWN
            else:
                search_result.drm = SearchResult.DRM_LOCKED
        return True
--- a/src/calibre/gui2/store/stores/libri_de_plugin.py
+++ b/src/calibre/gui2/store/stores/libri_de_plugin.py
@ -25,7 +25,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        url = 'http://ad.zanox.com/ppc/?18817073C15644254T'
        url_details = ('http://ad.zanox.com/ppc/?18817073C15644254T&ULP=[['
-                       'http://www.libri.de/shop/action/productDetails?artiId={0}]]')
+                       'http://www.ebook.de/shop/action/productDetails?artiId={0}]]')
        if external or self.config.get('open_external', False):
            if detail_item:
@ -41,33 +41,38 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
-        url = ('http://www.libri.de/shop/action/quickSearch?facetNodeId=6'
+        url = ('http://www.ebook.de/de/pathSearch?nav=52122&searchString='
-               '&mainsearchSubmit=Los!&searchString=' + urllib2.quote(query))
+               + urllib2.quote(query))
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[contains(@class, "item")]'):
+            for data in doc.xpath('//div[contains(@class, "articlecontainer")]'):
                if counter <= 0:
                    break
-                details = data.xpath('./div[@class="beschreibungContainer"]')
+                details = data.xpath('./div[@class="articleinfobox"]')
                if not details:
                    continue
                details = details[0]
-                id = ''.join(details.xpath('./div[@class="text"]/a/@name')).strip()
+                id_ = ''.join(details.xpath('./a/@name')).strip()
-                if not id:
+                if not id_:
                    continue
-                cover_url = ''.join(details.xpath('.//div[@class="coverImg"]/a/img/@src'))
+                title = ''.join(details.xpath('.//a[@class="su1_c_l_titel"]/text()')).strip()
-                title = ''.join(details.xpath('./div[@class="text"]/span[@class="titel"]/a/text()')).strip()
+
-                author = ''.join(details.xpath('./div[@class="text"]/span[@class="author"]/text()')).strip()
+                author = ''.join(details.xpath('.//div[@class="author"]/text()')).strip()
                if author.startswith('von'):
                    author = author[4:]
                pdf = details.xpath(
-                        'boolean(.//span[@class="format" and contains(text(), "pdf")]/text())')
+                        'boolean(.//span[@class="bindername" and contains(text(), "pdf")]/text())')
                epub = details.xpath(
-                        'boolean(.//span[@class="format" and contains(text(), "epub")]/text())')
+                        'boolean(.//span[@class="bindername" and contains(text(), "epub")]/text())')
                mobi = details.xpath(
-                        'boolean(.//span[@class="format" and contains(text(), "mobipocket")]/text())')
+                        'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())')
                cover_url = ''.join(data.xpath('.//div[@class="coverImg"]/a/img/@src'))
                price = ''.join(data.xpath('.//span[@class="preis"]/text()')).replace('*', '').strip()
                counter -= 1
@ -78,7 +83,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNKNOWN
-                s.detail_item = id
+                s.detail_item = id_
                formats = []
                if epub:
                    formats.append('ePub')
--- a/src/calibre/library/catalogs/epub_mobi.py
+++ b/src/calibre/library/catalogs/epub_mobi.py
@ -41,6 +41,13 @@ class EPUB_MOBI(CatalogPlugin):
                          help = _('Title of generated catalog used as title in metadata.\n'
                          "Default: '%default'\n"
                          "Applies to: AZW3, ePub, MOBI output formats")),
                   Option('--cross-reference-authors',
                          default=False,
                          dest='cross_reference_authors',
                          action = 'store_true',
                          help=_("Create cross-references in Authors section for books with multiple authors.\n"
                          "Default: '%default'\n"
                          "Applies to: AZW3, ePub, MOBI output formats")),
                   Option('--debug-pipeline',
                           default=None,
                           dest='debug_pipeline',
@ -58,7 +65,6 @@ class EPUB_MOBI(CatalogPlugin):
                          help=_("Regex describing tags to exclude as genres.\n"
                          "Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n"
                          "Applies to: AZW3, ePub, MOBI output formats")),
                   Option('--exclusion-rules',
                          default="(('Catalogs','Tags','Catalog'),)",
                          dest='exclusion_rules',
@ -72,7 +78,6 @@ class EPUB_MOBI(CatalogPlugin):
                          "When multiple rules are defined, all rules will be applied.\n"
                          "Default: \n" + '"' + '%default' + '"' + "\n"
                          "Applies to AZW3, ePub, MOBI output formats")),
                   Option('--generate-authors',
                          default=False,
                          dest='generate_authors',
@ -318,8 +323,8 @@ class EPUB_MOBI(CatalogPlugin):
        build_log.append(" opts:")
        for key in keys:
            if key in ['catalog_title','author_clip','connected_kindle','creator',
-                       'description_clip','exclude_book_marker','exclude_genre',
+                       'cross_reference_authors','description_clip','exclude_book_marker',
-                       'exclude_tags','exclusion_rules', 'fmt',
+                       'exclude_genre','exclude_tags','exclusion_rules', 'fmt',
                       'header_note_source_field','merge_comments_rule',
                       'output_profile','prefix_rules','read_book_marker',
                       'search_text','sort_by','sort_descriptions_by_author','sync',
--- a/src/calibre/library/catalogs/epub_mobi_builder.py
+++ b/src/calibre/library/catalogs/epub_mobi_builder.py
@ -14,11 +14,12 @@ from calibre.customize.conversion import DummyReporter
 from calibre.customize.ui import output_profiles
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
 from calibre.ebooks.chardet import substitute_entites
 from calibre.ebooks.metadata import author_to_author_sort
 from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.config import config_dir
 from calibre.utils.date import format_date, is_date_undefined, now as nowf
-from calibre.utils.filenames import ascii_text
+from calibre.utils.filenames import ascii_text, shorten_components_to
 from calibre.utils.icu import capitalize, collation_order, sort_key
 from calibre.utils.magick.draw import thumbnail
 from calibre.utils.zipfile import ZipFile
@ -109,6 +110,7 @@ class CatalogBuilder(object):
        self.stylesheet = stylesheet
        self.cache_dir = os.path.join(config_dir, 'caches', 'catalog')
        self.catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
        self.content_dir = os.path.join(self.catalog_path, "content")
        self.excluded_tags = self.get_excluded_tags()
        self.generate_for_kindle_azw3 = True if (_opts.fmt == 'azw3' and
                                              _opts.output_profile and
@ -127,12 +129,13 @@ class CatalogBuilder(object):
        self.books_by_title = None
        self.books_by_title_no_series_prefix = None
        self.books_to_catalog = None
        self.content_dir = os.path.join(self.catalog_path, "content")
        self.current_step = 0.0
        self.error = []
        self.generate_recently_read = False
        self.genres = []
-        self.genre_tags_dict = None
+        self.genre_tags_dict = \
            self.filter_db_tags(max_len = 245 - len("%s/Genre_.html" % self.content_dir)) \
            if self.opts.generate_genres else None
        self.html_filelist_1 = []
        self.html_filelist_2 = []
        self.merge_comments_rule = dict(zip(['field','position','hr'],
@ -505,7 +508,7 @@ class CatalogBuilder(object):
        if not os.path.isdir(images_path):
            os.makedirs(images_path)
-    def detect_author_sort_mismatches(self):
+    def detect_author_sort_mismatches(self, books_to_test):
        """ Detect author_sort mismatches.
        Sort by author, look for inconsistencies in author_sort among
@ -513,17 +516,18 @@ class CatalogBuilder(object):
        annoyance for EPUB.
        Inputs:
-         self.books_to_catalog (list): list of books to catalog
+         books_by_author (list): list of books to test, possibly unsorted
        Output:
-         self.books_by_author (list): sorted by author
+         (none)
        Exceptions:
         AuthorSortMismatchException: author_sort mismatch detected
        """
-        self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
+        books_by_author = sorted(list(books_to_test), key=self._kf_books_by_author_sorter_author)
-        authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
+
        authors = [(record['author'], record['author_sort']) for record in books_by_author]
        current_author = authors[0]
        for (i,author) in enumerate(authors):
            if author != current_author and i:
@ -701,6 +705,7 @@ class CatalogBuilder(object):
    def fetch_books_by_author(self):
        """ Generate a list of books sorted by author.
        For books with multiple authors, relist book with additional authors.
        Sort the database by author. Report author_sort inconsistencies as warning when
        building EPUB or MOBI, error when building MOBI. Collect a list of unique authors
        to self.authors.
@ -720,25 +725,30 @@ class CatalogBuilder(object):
        self.update_progress_full_step(_("Sorting database"))
-        self.detect_author_sort_mismatches()
+        books_by_author = list(self.books_to_catalog)
        self.detect_author_sort_mismatches(books_by_author)
        if self.opts.cross_reference_authors:
            books_by_author = self.relist_multiple_authors(books_by_author)
        #books_by_author = sorted(list(books_by_author), key=self._kf_books_by_author_sorter_author)
        # Sort authors using sort_key to normalize accented letters
        # Determine the longest author_sort length before sorting
-        asl = [i['author_sort'] for i in self.books_by_author]
+        asl = [i['author_sort'] for i in books_by_author]
        las = max(asl, key=len)
-        self.books_by_author = sorted(self.books_to_catalog,
+
        books_by_author = sorted(books_by_author,
            key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
        if self.DEBUG and self.opts.verbose:
-            tl = [i['title'] for i in self.books_by_author]
+            tl = [i['title'] for i in books_by_author]
            lt = max(tl, key=len)
            fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las))
            print(fs.format('','Title','Author','Series'))
-            for i in self.books_by_author:
+            for i in books_by_author:
                print(fs.format('', i['title'],i['author_sort'],i['series']))
        # Build the unique_authors set from existing data
-        authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author]
+        authors = [(record['author'], capitalize(record['author_sort'])) for record in books_by_author]
        # authors[] contains a list of all book authors, with multiple entries for multiple books by author
        #        authors[]: (([0]:friendly  [1]:sort))
@ -776,6 +786,7 @@ class CatalogBuilder(object):
                    author[2])).encode('utf-8'))
        self.authors = unique_authors
        self.books_by_author = books_by_author
        return True
    def fetch_books_by_title(self):
@ -863,15 +874,15 @@ class CatalogBuilder(object):
                this_title['series_index'] = 0.0
            this_title['title_sort'] = self.generate_sort_title(this_title['title'])
            if 'authors' in record:
                # from calibre.ebooks.metadata import authors_to_string
                # return authors_to_string(self.authors)
            if 'authors' in record:
                this_title['authors'] = record['authors']
                # Synthesize author attribution from authors list
                if record['authors']:
                    this_title['author'] = " &amp; ".join(record['authors'])
                else:
-                    this_title['author'] = 'Unknown'
+                    this_title['author'] = _('Unknown')
                    this_title['authors'] = [this_title['author']]
            if 'author_sort' in record and record['author_sort'].strip():
                this_title['author_sort'] = record['author_sort']
@ -1093,7 +1104,7 @@ class CatalogBuilder(object):
            self.bookmarked_books = bookmarks
-    def filter_db_tags(self):
+    def filter_db_tags(self, max_len):
        """ Remove excluded tags from data set, return normalized genre list.
        Filter all db tags, removing excluded tags supplied in opts.
@ -1101,13 +1112,13 @@ class CatalogBuilder(object):
        tags are flattened to alphanumeric ascii_text.
        Args:
-         (none)
+         max_len: maximum length of normalized tag to fit within OS constraints
        Return:
         genre_tags_dict (dict): dict of filtered, normalized tags in data set
        """
-        def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
+        def _format_tag_list(tags, indent=1, line_break=70, header='Tag list'):
            def _next_tag(sorted_tags):
                for (i, tag) in enumerate(sorted_tags):
                    if i < len(tags) - 1:
@ -1126,6 +1137,31 @@ class CatalogBuilder(object):
                    out_str = ' ' * (indent + 1)
            return ans + out_str
        def _normalize_tag(tag, max_len):
            """ Generate an XHTML-legal anchor string from tag.
            Parse tag for non-ascii, convert to unicode name.
            Args:
             tags (str): tag name possible containing symbols
             max_len (int): maximum length of tag
            Return:
             normalized (str): unicode names substituted for non-ascii chars,
              clipped to max_len
            """
            normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
            if re.search('\W',normalized):
                normalized = ''
                for c in massaged:
                    if re.search('\W',c):
                        normalized += self.generate_unicode_name(c)
                    else:
                        normalized += c
            shortened = shorten_components_to(max_len, [normalized])[0]
            return shortened
        # Entry point
        normalized_tags = []
        friendly_tags = []
@ -1144,7 +1180,7 @@ class CatalogBuilder(object):
            if tag == ' ':
                continue
-            normalized_tags.append(self.normalize_tag(tag))
+            normalized_tags.append(_normalize_tag(tag, max_len))
            friendly_tags.append(tag)
        genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
@ -1941,8 +1977,6 @@ class CatalogBuilder(object):
        self.update_progress_full_step(_("Genres HTML"))
        self.genre_tags_dict = self.filter_db_tags()
        # Extract books matching filtered_tags
        genre_list = []
        for friendly_tag in sorted(self.genre_tags_dict, key=sort_key):
@ -2024,10 +2058,11 @@ class CatalogBuilder(object):
                        books_by_current_author += 1
                # Write the genre book list as an article
-                titles_spanned = self.generate_html_by_genre(genre, True if index==0 else False,
+                outfile = "%s/Genre_%s.html" % (self.content_dir, genre)
-                                        genre_tag_set[genre],
+                titles_spanned = self.generate_html_by_genre(genre,
-                                        "%s/Genre_%s.html" % (self.content_dir,
+                                                             True if index==0 else False,
-                                                            genre))
+                                                             genre_tag_set[genre],
                                                             outfile)
                tag_file = "content/Genre_%s.html" % genre
                master_genre_list.append({'tag':genre,
@ -2549,7 +2584,7 @@ class CatalogBuilder(object):
            for (i, tag) in enumerate(sorted(book.get('tags', []))):
                aTag = Tag(_soup,'a')
                if self.opts.generate_genres:
-                    aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag)
+                    aTag['href'] = "Genre_%s.html" % self.genre_tags_dict[tag]
                aTag.insert(0,escape(NavigableString(tag)))
                genresTag.insert(gtc, aTag)
                gtc += 1
@ -4603,28 +4638,6 @@ class CatalogBuilder(object):
        return merged
    def normalize_tag(self, tag):
        """ Generate an XHTML-legal anchor string from tag.
        Parse tag for non-ascii, convert to unicode name.
        Args:
         tags (str): tag name possible containing symbols
        Return:
         normalized (str): unicode names substituted for non-ascii chars
        """
        normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
        if re.search('\W',normalized):
            normalized = ''
            for c in massaged:
                if re.search('\W',c):
                    normalized += self.generate_unicode_name(c)
                else:
                    normalized += c
        return normalized
    def process_exclusions(self, data_set):
        """ Filter data_set based on exclusion_rules.
@ -4697,6 +4710,43 @@ class CatalogBuilder(object):
        else:
            return data_set
    def relist_multiple_authors(self, books_by_author):
        """ Create multiple entries for books with multiple authors
        Given a list of books by author, scan list for books with multiple
        authors. Add a cloned copy of the book per additional author.
        Args:
         books_by_author (list): book list possibly containing books
         with multiple authors
        Return:
         (list): books_by_author with additional cloned entries for books with
         multiple authors
        """
        multiple_author_books = []
        # Find the multiple author books
        for book in books_by_author:
            if len(book['authors']) > 1:
                multiple_author_books.append(book)
        for book in multiple_author_books:
            cloned_authors = list(book['authors'])
            for x, author in enumerate(book['authors']):
                if x:
                    first_author = cloned_authors.pop(0)
                    cloned_authors.append(first_author)
                    new_book = deepcopy(book)
                    new_book['author'] = ' & '.join(cloned_authors)
                    new_book['authors'] = list(cloned_authors)
                    asl =  [author_to_author_sort(auth) for auth in cloned_authors]
                    new_book['author_sort'] = ' & '.join(asl)
                    books_by_author.append(new_book)
        return books_by_author
    def update_progress_full_step(self, description):
        """ Update calibre's job status UI.
--- a/src/calibre/utils/fonts/sfnt/cff.py
+++ b/src/calibre/utils/fonts/sfnt/cff.py
@ -0,0 +1,153 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from struct import unpack_from, unpack
 from calibre.utils.fonts.sfnt import UnknownTable
 from calibre.utils.fonts.sfnt.errors import UnsupportedFont
 # Useful links
 # http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
 # http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
 class CFF(object):
    def __init__(self, raw):
        (self.major_version, self.minor_version, self.header_size,
                self.offset_size) = unpack_from(b'>4B', raw)
        if (self.major_version, self.minor_version) != (1, 0):
            raise UnsupportedFont('The CFF table has unknown version: '
                    '(%d, %d)'%(self.major_version, self.minor_version))
        offset = self.header_size
        # Read Names Index
        self.font_names = Index(raw, offset)
        offset = self.font_names.pos
        if len(self.font_names) > 1:
            raise UnsupportedFont('CFF table has more than one font.')
        # Read Top Dict
        self.top_index = Index(raw, offset)
        offset = self.top_index.pos
        # Read strings
        self.strings = Strings(raw, offset)
        offset = self.strings.pos
        print (self.strings[len(cff_standard_strings):])
 class Index(list):
    def __init__(self, raw, offset):
        list.__init__(self)
        count = unpack_from(b'>H', raw, offset)[0]
        offset += 2
        self.pos = offset
        if count > 0:
            self.offset_size = unpack_from(b'>B', raw, offset)[0]
            offset += 1
            if self.offset_size == 3:
                offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
                            for i in xrange(offset, 3*(count+2), 3)]
            else:
                fmt = {1:'B', 2:'H', 4:'L'}.get(self.offset_size)
                fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
                offsets = unpack_from(fmt, raw, offset)
            offset += self.offset_size * (count+1) - 1
            for i in xrange(len(offsets)-1):
                off, noff = offsets[i:i+2]
                obj = raw[offset+i:offset+noff]
                self.append(obj)
            self.pos = offset + offsets[-1]
 class Strings(Index):
    def __init__(self, raw, offset):
        super(Strings, self).__init__(raw, offset)
        for x in reversed(cff_standard_strings):
            self.insert(0, x)
 class CFFTable(UnknownTable):
    def decompile(self):
        self.cff = CFF(self.raw)
 # cff_standard_strings {{{
 # The 391 Standard Strings as used in the CFF format.
 # from Adobe Technical None #5176, version 1.0, 18 March 1998
 cff_standard_strings = [
 '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
 'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
 'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
 'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
 'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
 'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
 'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
 'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
 'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
 'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
 'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
 'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
 'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
 'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
 'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
 'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
 'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
 'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
 'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
 'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
 'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
 'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
 'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
 'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
 'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
 'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
 'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
 'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
 'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
 'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
 'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
 'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
 'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
 'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
 'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
 'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
 'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
 'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
 'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
 'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
 'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
 '001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
 'Semibold'
 ]
 # }}}
--- a/src/calibre/utils/fonts/sfnt/cff/init.py
+++ b/src/calibre/utils/fonts/sfnt/cff/init.py
@ -0,0 +1,11 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
--- a/src/calibre/utils/fonts/sfnt/cff/dict_data.py
+++ b/src/calibre/utils/fonts/sfnt/cff/dict_data.py
@ -0,0 +1,201 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from struct import unpack
 t1_operand_encoding = [None] * 256
 t1_operand_encoding[0:32] = (32) * ["do_operator"]
 t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
 t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
 t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
 t1_operand_encoding[255] = "read_long_int"
 t2_operand_encoding = t1_operand_encoding[:]
 t2_operand_encoding[28] = "read_short_int"
 t2_operand_encoding[255] = "read_fixed_1616"
 cff_dict_operand_encoding = t2_operand_encoding[:]
 cff_dict_operand_encoding[29] = "read_long_int"
 cff_dict_operand_encoding[30] = "read_real_number"
 cff_dict_operand_encoding[255] = "reserved"
 real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        '.', 'E', 'E-', None, '-']
 class SimpleConverter(object):
    def read(self, parent, value):
        return value
    def write(self, parent, value):
        return value
 class TODO(SimpleConverter):
    pass
 class Reader(dict):
    def read_byte(self, b0, data, index):
        return b0 - 139, index
    def read_small_int1(self, b0, data, index):
        b1 = ord(data[index])
        return (b0-247)*256 + b1 + 108, index+1
    def read_small_int2(self, b0, data, index):
        b1 = ord(data[index])
        return -(b0-251)*256 - b1 - 108, index+1
    def read_short_int(self, b0, data, index):
        bin = data[index] + data[index+1]
        value, = unpack(b">h", bin)
        return value, index+2
    def read_long_int(self, b0, data, index):
        bin = data[index] + data[index+1] + data[index+2] + data[index+3]
        value, = unpack(b">l", bin)
        return value, index+4
    def read_fixed_1616(self, b0, data, index):
        bin = data[index] + data[index+1] + data[index+2] + data[index+3]
        value, = unpack(b">l", bin)
        return value / 65536.0, index+4
    def read_real_number(self, b0, data, index):
        number = ''
        while True:
            b = ord(data[index])
            index = index + 1
            nibble0 = (b & 0xf0) >> 4
            nibble1 = b & 0x0f
            if nibble0 == 0xf:
                break
            number = number + real_nibbles[nibble0]
            if nibble1 == 0xf:
                break
            number = number + real_nibbles[nibble1]
        return float(number), index
 class Dict(Reader):
    operand_encoding = cff_dict_operand_encoding
    TABLE = []
    def __init__(self):
        Reader.__init__(self)
        table = self.TABLE[:]
        for i in xrange(len(table)):
            op, name, arg, default, conv = table[i]
            if conv is not None:
                continue
            if arg in ("delta", "array", 'number', 'SID'):
                conv = SimpleConverter()
            else:
                raise Exception('Should not happen')
            table[i] = op, name, arg, default, conv
        self.operators = {op:(name, arg) for op, name, arg, default, conv in
                table}
    def decompile(self, strings, global_subrs, data):
        self.strings = strings
        self.global_subrs = global_subrs
        self.stack = []
        index = 0
        while index < len(data):
            b0 = ord(data[index])
            index += 1
            handler = getattr(self, self.operand_encoding[b0])
            value, index = handler(b0, data, index)
            if value is not None:
                self.stack.append(value)
 	def do_operator(self, b0, data, index):
 		if b0 == 12:
 			op = (b0, ord(data[index]))
 			index += 1
 		else:
 			op = b0
 		operator, arg_type = self.operators[op]
 		self.handle_operator(operator, arg_type)
 		return None, index
 	def handle_operator(self, operator, arg_type):
        if isinstance(arg_type, tuple):
 			value = ()
 			for i in xrange(len(arg_type)-1, -1, -1):
 				arg = arg_type[i]
 				arghandler = getattr(self, 'arg_' + arg)
 				value = (arghandler(operator),) + value
 		else:
 			arghandler = getattr(self, 'arg_' + arg_type)
 			value = arghandler(operator)
 		self[operator] = value
 	def arg_number(self, name):
 		return self.stack.pop()
 	def arg_SID(self, name):
 		return self.strings[self.stack.pop()]
 	def arg_array(self, name):
        ans = self.stack[:]
        del self.stack[:]
        return ans
 	def arg_delta(self, name):
 		out = []
 		current = 0
 		for v in self.stack:
 			current = current + v
 			out.append(current)
        del self.stack[:]
 		return out
 class TopDict(Dict):
    TABLE = [
 	#opcode     name                  argument type   default    converter
 	((12, 30), 'ROS',        ('SID','SID','number'), None,      SimpleConverter()),
 	((12, 20), 'SyntheticBase',      'number',       None,      None),
 	(0,        'version',            'SID',          None,      None),
 	(1,        'Notice',             'SID',          None,      None),
 	((12, 0),  'Copyright',          'SID',          None,      None),
 	(2,        'FullName',           'SID',          None,      None),
 	((12, 38), 'FontName',           'SID',          None,      None),
 	(3,        'FamilyName',         'SID',          None,      None),
 	(4,        'Weight',             'SID',          None,      None),
 	((12, 1),  'isFixedPitch',       'number',       0,         None),
 	((12, 2),  'ItalicAngle',        'number',       0,         None),
 	((12, 3),  'UnderlinePosition',  'number',       None,      None),
 	((12, 4),  'UnderlineThickness', 'number',       50,        None),
 	((12, 5),  'PaintType',          'number',       0,         None),
 	((12, 6),  'CharstringType',     'number',       2,         None),
 	((12, 7),  'FontMatrix',         'array',  [0.001,0,0,0.001,0,0],  None),
 	(13,       'UniqueID',           'number',       None,      None),
 	(5,        'FontBBox',           'array',  [0,0,0,0],       None),
 	((12, 8),  'StrokeWidth',        'number',       0,         None),
 	(14,       'XUID',               'array',        None,      None),
 	((12, 21), 'PostScript',         'SID',          None,      None),
 	((12, 22), 'BaseFontName',       'SID',          None,      None),
 	((12, 23), 'BaseFontBlend',      'delta',        None,      None),
 	((12, 31), 'CIDFontVersion',     'number',       0,         None),
 	((12, 32), 'CIDFontRevision',    'number',       0,         None),
 	((12, 33), 'CIDFontType',        'number',       0,         None),
 	((12, 34), 'CIDCount',           'number',       8720,      None),
 	(15,       'charset',            'number',       0,         TODO()),
 	((12, 35), 'UIDBase',            'number',       None,      None),
 	(16,       'Encoding',           'number',       0,         TODO()),
 	(18,       'Private',       ('number','number'), None,      TODO()),
 	((12, 37), 'FDSelect',           'number',       None,      TODO()),
 	((12, 36), 'FDArray',            'number',       None,      TODO()),
 	(17,       'CharStrings',        'number',       None,      TODO()),
    ]
--- a/src/calibre/utils/fonts/sfnt/cff/table.py
+++ b/src/calibre/utils/fonts/sfnt/cff/table.py
@ -0,0 +1,166 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from struct import unpack_from, unpack
 from calibre.utils.fonts.sfnt import UnknownTable
 from calibre.utils.fonts.sfnt.errors import UnsupportedFont
 from calibre.utils.fonts.sfnt.cff.dict_data import TopDict
 # Useful links
 # http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
 # http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
 class CFF(object):
    def __init__(self, raw):
        (self.major_version, self.minor_version, self.header_size,
                self.offset_size) = unpack_from(b'>4B', raw)
        if (self.major_version, self.minor_version) != (1, 0):
            raise UnsupportedFont('The CFF table has unknown version: '
                    '(%d, %d)'%(self.major_version, self.minor_version))
        offset = self.header_size
        # Read Names Index
        self.font_names = Index(raw, offset)
        offset = self.font_names.pos
        if len(self.font_names) > 1:
            raise UnsupportedFont('CFF table has more than one font.')
        # Read Top Dict
        self.top_index = Index(raw, offset)
        self.top_dict = TopDict()
        offset = self.top_index.pos
        # Read strings
        self.strings = Strings(raw, offset)
        offset = self.strings.pos
        # Read global subroutines
        self.global_subrs = GlobalSubrs(raw, offset)
        offset = self.global_subrs.pos
        # Decompile Top Dict
        self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
        import pprint
        pprint.pprint(self.top_dict)
 class Index(list):
    def __init__(self, raw, offset, prepend=()):
        list.__init__(self)
        self.extend(prepend)
        count = unpack_from(b'>H', raw, offset)[0]
        offset += 2
        self.pos = offset
        if count > 0:
            self.offset_size = unpack_from(b'>B', raw, offset)[0]
            offset += 1
            if self.offset_size == 3:
                offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
                            for i in xrange(offset, 3*(count+2), 3)]
            else:
                fmt = {1:'B', 2:'H', 4:'L'}.get(self.offset_size)
                fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
                offsets = unpack_from(fmt, raw, offset)
            offset += self.offset_size * (count+1) - 1
            for i in xrange(len(offsets)-1):
                off, noff = offsets[i:i+2]
                obj = raw[offset+off:offset+noff]
                self.append(obj)
            self.pos = offset + offsets[-1]
 class Strings(Index):
    def __init__(self, raw, offset):
        super(Strings, self).__init__(raw, offset, prepend=cff_standard_strings)
 class GlobalSubrs(Index):
    pass
 class CFFTable(UnknownTable):
    def decompile(self):
        self.cff = CFF(self.raw)
 # cff_standard_strings {{{
 # The 391 Standard Strings as used in the CFF format.
 # from Adobe Technical None #5176, version 1.0, 18 March 1998
 cff_standard_strings = [
 '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
 'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
 'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
 'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
 'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
 'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
 'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
 'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
 'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
 'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
 'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
 'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
 'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
 'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
 'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
 'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
 'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
 'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
 'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
 'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
 'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
 'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
 'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
 'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
 'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
 'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
 'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
 'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
 'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
 'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
 'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
 'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
 'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
 'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
 'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
 'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
 'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
 'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
 'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
 'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
 'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
 'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
 'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
 '001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
 'Semibold'
 ]
 # }}}
--- a/src/calibre/utils/fonts/sfnt/container.py
+++ b/src/calibre/utils/fonts/sfnt/container.py
@ -21,6 +21,7 @@ from calibre.utils.fonts.sfnt.maxp import MaxpTable
 from calibre.utils.fonts.sfnt.loca import LocaTable
 from calibre.utils.fonts.sfnt.glyf import GlyfTable
 from calibre.utils.fonts.sfnt.cmap import CmapTable
 from calibre.utils.fonts.sfnt.cff.table import CFFTable
 # OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm
@ -42,6 +43,7 @@ class Sfnt(object):
                    b'loca' : LocaTable,
                    b'glyf' : GlyfTable,
                    b'cmap' : CmapTable,
                    b'CFF ' : CFFTable,
                    }.get(table_tag, UnknownTable)(table)
    def __getitem__(self, key):
@ -53,12 +55,24 @@ class Sfnt(object):
    def __delitem__(self, key):
        del self.tables[key]
    def __iter__(self):
        '''Iterate over the table tags in optimal order as per
        http://partners.adobe.com/public/developer/opentype/index_recs.html'''
        keys = list(self.tables.keys())
        order = {x:i for i, x in enumerate((b'head', b'hhea', b'maxp', b'OS/2',
            b'hmtx', b'LTSH', b'VDMX', b'hdmx', b'cmap', b'fpgm', b'prep',
            b'cvt ', b'loca', b'glyf', b'CFF ', b'kern', b'name', b'post',
            b'gasp', b'PCLT', b'DSIG'))}
        keys.sort(key=lambda x:order.get(x, 1000))
        for x in keys:
            yield x
    def pop(self, key, default=None):
        return self.tables.pop(key, default)
    def sizes(self):
        ans = OrderedDict()
-        for tag in sorted(self.tables):
+        for tag in self:
            ans[tag] = len(self[tag])
        return ans
@ -82,7 +96,7 @@ class Sfnt(object):
        table_data = []
        offset = stream.tell() + ( calcsize(b'>4s3L') * num_tables )
        sizes = OrderedDict()
-        for tag in sorted(self.tables):
+        for tag in self:
            table = self.tables[tag]
            raw = table()
            table_len = len(raw)
--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@ -66,6 +66,11 @@ def subset_truetype(sfnt, character_map):
 # }}}
 def subset_postscript(sfnt, character_map):
    cff = sfnt[b'CFF ']
    cff.decompile()
    raise Exception('TODO: Implement CFF subsetting')
 def subset(raw, individual_chars, ranges=()):
    chars = list(map(ord, individual_chars))
    for r in ranges:
@ -91,7 +96,11 @@ def subset(raw, individual_chars, ranges=()):
        subset_truetype(sfnt, character_map)
    elif b'CFF ' in sfnt:
        # PostScript Outlines
-        raise UnsupportedFont('This font contains PostScript outlines, '
+        from calibre.utils.config_base import tweaks
        if tweaks['subset_cff_table']:
            subset_postscript(sfnt, character_map)
        else:
            raise UnsupportedFont('This font contains PostScript outlines, '
                'subsetting not supported')
    else:
        raise UnsupportedFont('This font does not contain TrueType '
--- a/src/calibre/utils/localunzip.py
+++ b/src/calibre/utils/localunzip.py
@ -0,0 +1,267 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 '''
 Try to read invalid zip files with missing or damaged central directories.
 These are apparently produced in large numbers by the fruitcakes over at B&N.
 Tries to only use the local headers to extract data from the damaged zip file.
 '''
 import os, sys, zlib, shutil
 from struct import calcsize, unpack, pack
 from collections import namedtuple, OrderedDict
 from tempfile import SpooledTemporaryFile
 HEADER_SIG = 0x04034b50
 HEADER_BYTE_SIG = pack(b'<L', HEADER_SIG)
 local_header_fmt = b'<L5HL2L2H'
 local_header_sz = calcsize(local_header_fmt)
 ZIP_STORED, ZIP_DEFLATED = 0, 8
 LocalHeader = namedtuple('LocalHeader',
        'signature min_version flags compression_method mod_time mod_date '
        'crc32 compressed_size uncompressed_size filename_length extra_length '
        'filename extra')
 def decode_arcname(name):
    if isinstance(name, bytes):
        from calibre.ebooks.chardet import detect
        try:
            name = name.decode('utf-8')
        except:
            res = detect(name)
            encoding = res['encoding']
            try:
                name = name.decode(encoding)
            except:
                name = name.decode('utf-8', 'replace')
    return name
 def find_local_header(f):
    pos = f.tell()
    raw = f.read(50*1024)
    try:
        f.seek(pos + raw.index(HEADER_BYTE_SIG))
    except ValueError:
        f.seek(pos)
        return
    raw = f.read(local_header_sz)
    if len(raw) != local_header_sz:
        f.seek(pos)
        return
    header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
    if header.signature == HEADER_SIG:
        return header
    f.seek(pos)
 def read_local_file_header(f):
    pos = f.tell()
    raw = f.read(local_header_sz)
    if len(raw) != local_header_sz:
        f.seek(pos)
        return
    header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
    if header.signature != HEADER_SIG:
        f.seek(pos)
        header = find_local_header(f)
        if header is None:
            return
    if header.min_version > 20:
        raise ValueError('This ZIP file uses unsupported features')
    if header.flags & 0b1:
        raise ValueError('This ZIP file is encrypted')
    if header.flags & (1 << 3):
        raise ValueError('This ZIP file uses data descriptors. This is unsupported')
    if header.flags & (1 << 13):
        raise ValueError('This ZIP file uses masking, unsupported.')
    if header.compression_method not in {ZIP_STORED, ZIP_DEFLATED}:
        raise ValueError('This ZIP file uses an unsupported compression method')
    fname = extra = None
    if header.filename_length > 0:
        fname = f.read(header.filename_length)
        if len(fname) != header.filename_length:
            return
        try:
            fname = fname.decode('ascii')
        except UnicodeDecodeError:
            if header.flags & (1 << 11):
                try:
                    fname = fname.decode('utf-8')
                except UnicodeDecodeError:
                    pass
        fname = decode_arcname(fname).replace('\\', '/')
    if header.extra_length > 0:
        extra = f.read(header.extra_length)
        if len(extra) != header.extra_length:
            return
    return LocalHeader(*(
        header[:-2] + (fname, extra)
        ))
 def read_compressed_data(f, header):
    cdata = f.read(header.compressed_size)
    return cdata
 def copy_stored_file(src, size, dest):
    read = 0
    amt = min(size, 20*1024)
    while read < size:
        raw = src.read(min(size-read, amt))
        if not raw:
            raise ValueError('Premature end of file')
        dest.write(raw)
        read += len(raw)
 def copy_compressed_file(src, size, dest):
    d = zlib.decompressobj(-15)
    read = 0
    amt = min(size, 20*1024)
    while read < size:
        raw = src.read(min(size-read, amt))
        read += len(raw)
        dest.write(d.decompress(raw, 200*1024))
        count = 0
        while d.unconsumed_tail:
            count += 1
            dest.write(d.decompress(d.unconsumed_tail, 200*1024))
            if count > 100:
                raise ValueError('This ZIP file contains a ZIP bomb in %s'%
                        os.path.basename(dest.name))
 def _extractall(f, path=None, file_info=None):
    found = False
    while True:
        header = read_local_file_header(f)
        if not header:
            break
        found = True
        parts = header.filename.split('/')
        if header.uncompressed_size == 0:
            # Directory
            f.seek(f.tell() + header.compressed_size)
            if path is not None:
                bdir = os.path.join(path, *parts)
                if not os.path.exists(bdir):
                    os.makedirs(bdir)
            continue
        # File
        if file_info is not None:
            file_info[header.filename] = (f.tell(), header)
        if path is not None:
            bdir = os.path.join(path, *(parts[:-1]))
            if not os.path.exists(bdir):
                os.makedirs(bdir)
            dest = os.path.join(path, *parts)
            with open(dest, 'wb') as o:
                if header.compression_method == ZIP_STORED:
                    copy_stored_file(f, header.compressed_size, o)
                else:
                    copy_compressed_file(f, header.compressed_size, o)
        else:
            f.seek(f.tell() + header.compressed_size)
    if not found:
        raise ValueError('Not a ZIP file')
 def extractall(path_or_stream, path=None):
    f = path_or_stream
    close_at_end = False
    if not hasattr(f, 'read'):
        f = open(f, 'rb')
        close_at_end = True
    if path is None:
        path = os.getcwdu()
    pos = f.tell()
    try:
        _extractall(f, path)
    finally:
        f.seek(pos)
        if close_at_end:
            f.close()
 class LocalZipFile(object):
    def __init__(self, stream):
        self.file_info = OrderedDict()
        _extractall(stream, file_info=self.file_info)
        self.stream = stream
    def open(self, name, spool_size=5*1024*1024):
        if isinstance(name, LocalHeader):
            name = name.filename
        try:
            offset, header = self.file_info.get(name)
        except KeyError:
            raise ValueError('This ZIP container has no file named: %s'%name)
        self.stream.seek(offset)
        dest = SpooledTemporaryFile(max_size=spool_size)
        if header.compression_method == ZIP_STORED:
            copy_stored_file(self.stream, header.compressed_size, dest)
        else:
            copy_compressed_file(self.stream, header.compressed_size, dest)
        dest.seek(0)
        return dest
    def getinfo(self, name):
        try:
            offset, header = self.file_info.get(name)
        except KeyError:
            raise ValueError('This ZIP container has no file named: %s'%name)
        return header
    def read(self, name, spool_size=5*1024*1024):
        with self.open(name, spool_size=spool_size) as f:
            return f.read()
    def extractall(self, path=None):
        self.stream.seek(0)
        _extractall(self.stream, path=(path or os.getcwdu()))
    def close(self):
        pass
    def safe_replace(self, name, datastream, extra_replacements={},
        add_missing=False):
        from calibre.utils.zipfile import ZipFile, ZipInfo
        replacements = {name:datastream}
        replacements.update(extra_replacements)
        names = frozenset(replacements.keys())
        found = set([])
        with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
            ztemp = ZipFile(temp, 'w')
            for offset, header in self.file_info.itervalues():
                if header.filename in names:
                    zi = ZipInfo(header.filename)
                    zi.compress_type = header.compression_method
                    ztemp.writestr(zi, replacements[header.filename].read())
                    found.add(header.filename)
                else:
                    ztemp.writestr(header.filename, self.read(header.filename,
                        spool_size=0))
            if add_missing:
                for name in names - found:
                    ztemp.writestr(name, replacements[name].read())
            ztemp.close()
            zipstream = self.stream
            temp.seek(0)
            zipstream.seek(0)
            zipstream.truncate()
            shutil.copyfileobj(temp, zipstream)
            zipstream.flush()
 if __name__ == '__main__':
    extractall(sys.argv[-1])
--- a/src/calibre/utils/windows/winutil.c
+++ b/src/calibre/utils/windows/winutil.c
@ -467,11 +467,11 @@ eject_drive_letter(WCHAR DriveLetter) {
    DeviceNumber = -1;
-    hVolume = CreateFile(szVolumeAccessPath, 0,
+    hVolume = CreateFileW(szVolumeAccessPath, 0,
                        FILE_SHARE_READ | FILE_SHARE_WRITE,
                        NULL, OPEN_EXISTING, 0, NULL);
    if (hVolume == INVALID_HANDLE_VALUE) {
-        PyErr_SetString(PyExc_ValueError, "Invalid handle value for drive letter");
+        PyErr_SetFromWindowsErr(0);
        return FALSE;
    }
@ -529,11 +529,17 @@ eject_drive_letter(WCHAR DriveLetter) {
 static PyObject *
 winutil_eject_drive(PyObject *self, PyObject *args) {
-    char DriveLetter;
+    char letter = '0';
    WCHAR DriveLetter = L'0';
-    if (!PyArg_ParseTuple(args, "c", &DriveLetter)) return NULL;
+    if (!PyArg_ParseTuple(args, "c", &letter)) return NULL;
-    if (!eject_drive_letter((WCHAR)DriveLetter)) return NULL;
+    if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, &letter, 1, &DriveLetter, 1) == 0) {
        PyErr_SetFromWindowsErr(0);
        return NULL;
    }
    if (!eject_drive_letter(DriveLetter)) return NULL;
    Py_RETURN_NONE;
 }