Merge from trunk

2025-07-09 03:04:10 -04:00 · 2012-11-07 18:16:15 +01:00 · 2012-11-07 18:16:15 +01:00 · 9868fffb02
commit 9868fffb02
parent 01c361e7fe 50a8fb6d7a
29 changed files with 1287 additions and 578 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -35,3 +35,7 @@ nbproject/
 .settings/
 *.DS_Store
 calibre_plugins/
+recipes/.git
+recipes/.gitignore
+recipes/README
+recipes/katalog_egazeciarz.recipe
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -327,9 +327,8 @@ You can browse your |app| collection on your Android device is by using the
 calibre content server, which makes your collection available over the net.
 First perform the following steps in |app|

-  * Set the :guilabel:`Preferred Output Format` in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
-  * Set the output profile to Tablet (this will work for phones as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
-  * Convert the books you want to read on your device to EPUB format by selecting them and clicking the Convert button.
+  * Set the :guilabel:`Preferred Output Format` in |app| to EPUB for normal Android devices or MOBI for Kindles (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
+  * Convert the books you want to read on your device to EPUB/MOBI format by selecting them and clicking the Convert button.
  * Turn on the Content Server in |app|'s preferences and leave |app| running.

 Now on your Android device, open the browser and browse to
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@ -2,7 +2,9 @@ import re

 from calibre.web.feeds.news import BasicNewsRecipe

+
 class FocusRecipe(BasicNewsRecipe):
+
    __license__ = 'GPL v3'
    __author__ = u'intromatyk <intromatyk@gmail.com>'
    language = 'pl'
@ -12,10 +14,10 @@ class FocusRecipe(BasicNewsRecipe):
    publisher = u'Gruner + Jahr Polska'
    category = u'News'
    description = u'Newspaper'
-    category='magazine'
-    cover_url=''
-    remove_empty_feeds= True
-    no_stylesheets=True
+    category = 'magazine'
+    cover_url = ''
+    remove_empty_feeds = True
+    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100000
    recursions = 0
@ -27,15 +29,15 @@ class FocusRecipe(BasicNewsRecipe):
    simultaneous_downloads = 5

    r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
-    keep_only_tags =[]
-    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
+    keep_only_tags = []
+    keep_only_tags.append(dict(name='div', attrs={'id': 'cll'}))

-    remove_tags =[]
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
-    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
+    remove_tags = []
+    remove_tags.append(dict(name='div', attrs={'class': 'ulm noprint'}))
+    remove_tags.append(dict(name='div', attrs={'class': 'txb'}))
+    remove_tags.append(dict(name='div', attrs={'class': 'h2'}))
+    remove_tags.append(dict(name='ul', attrs={'class': 'txu'}))
+    remove_tags.append(dict(name='div', attrs={'class': 'ulc'}))

    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -46,15 +48,14 @@ class FocusRecipe(BasicNewsRecipe):
                    .fot{font-size: x-small; color: #666666;}
                    '''

-
    feeds = [
-                            ('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
-                            ('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
-                            ('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
-                            ('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
-                            ('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
-                            ('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
-                            ('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),                            
+        ('Nauka', 'http://www.focus.pl/nauka/rss/'),
+        ('Historia', 'http://www.focus.pl/historia/rss/'),
+        ('Cywilizacja', 'http://www.focus.pl/cywilizacja/rss/'),
+        ('Sport', 'http://www.focus.pl/sport/rss/'),
+        ('Technika', 'http://www.focus.pl/technika/rss/'),
+        ('Przyroda', 'http://www.focus.pl/przyroda/rss/'),
+        ('Technologie', 'http://www.focus.pl/gadzety/rss/')
    ]

    def skip_ad_pages(self, soup):
@ -65,20 +66,20 @@ class FocusRecipe(BasicNewsRecipe):
            return None

    def get_cover_url(self):
-        soup=self.index_to_soup('http://www.focus.pl/magazyn/')
-        tag=soup.find(name='div', attrs={'class':'clr fl'})
+        soup = self.index_to_soup('http://www.focus.pl/magazyn/')
+        tag = soup.find(name='div', attrs={'class': 'clr fl'})
        if tag:
-            self.cover_url='http://www.focus.pl/' + tag.a['href']
+            self.cover_url = 'http://www.focus.pl/' + tag.a['href']
            return getattr(self, 'cover_url', self.cover_url)

    def print_version(self, url):
-     if url.count ('focus.pl.feedsportal.com'):
+        if url.count('focus.pl.feedsportal.com'):
            u = url.find('focus0Bpl')
            u = 'http://www.focus.pl/' + url[u + 11:]
            u = u.replace('0C', '/')
            u = u.replace('A', '')
-            u = u.replace ('0E','-')
+            u = u.replace('0E', '-')
            u = u.replace('/nc/1//story01.htm', '/do-druku/1')
        else:
-            u = url.replace('/nc/1','/do-druku/1')           
+            u = url.replace('/nc/1', '/do-druku/1')
        return u
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -1,104 +1,107 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe

+
 class Gazeta_Wyborcza(BasicNewsRecipe):
    title = u'Gazeta Wyborcza'
-    __author__        = 'fenuks'
+    __author__ = 'fenuks, Artur Stachecki'
    language = 'pl'
-    description ='news from gazeta.pl'
-    category='newspaper'
+    description = 'news from gazeta.pl'
+    category = 'newspaper'
    publication_type = 'newspaper'
-    masthead_url='http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
-    INDEX='http://wyborcza.pl'
-    remove_empty_feeds= True
+    masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
+    INDEX = 'http://wyborcza.pl'
+    remove_empty_feeds = True
    oldest_article = 3
    max_articles_per_feed = 100
-    remove_javascript=True
-    no_stylesheets=True
-    ignore_duplicate_articles = {'title', 'url'}
-    keep_only_tags = dict(id=['gazeta_article', 'article'])
-    remove_tags_after = dict(id='gazeta_article_share')
-    remove_tags = [dict(attrs={'class':['artReadMore', 'gazeta_article_related_new', 'txt_upl']}), dict(id=['gazeta_article_likes', 'gazeta_article_tools', 'rel', 'gazeta_article_tags', 'gazeta_article_share', 'gazeta_article_brand', 'gazeta_article_miniatures'])]
-
+    remove_javascript = True
+    no_stylesheets = True
+    remove_tags_before = dict(id='k0')
+    remove_tags_after = dict(id='banP4')
+    remove_tags = [dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})]
    feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
             (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
             (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
             (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
-	(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), 
-	(u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), 
-	(u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), 
-	#(u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), 
-	(u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), 
-	(u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), 
-	(u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), 
-	(u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), 
-	(u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), 
-	(u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), 
-	(u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), 
-	(u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss')
+             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
             ]

    def skip_ad_pages(self, soup):
-          tag=soup.find(name='a', attrs={'class':'btn'})
+        tag = soup.find(name='a', attrs={'class': 'btn'})
        if tag:
-            new_soup=self.index_to_soup(tag['href'], raw=True)
+            new_soup = self.index_to_soup(tag['href'], raw=True)
            return new_soup

-
    def append_page(self, soup, appendtag):
-        loop=False
-        tag = soup.find('div', attrs={'id':'Str'})
-        if appendtag.find('div', attrs={'id':'Str'}):
-            nexturl=tag.findAll('a')
-            appendtag.find('div', attrs={'id':'Str'}).extract()
-            loop=True
+        loop = False
+        tag = soup.find('div', attrs={'id': 'Str'})
+        if appendtag.find('div', attrs={'id': 'Str'}):
+            nexturl = tag.findAll('a')
+            appendtag.find('div', attrs={'id': 'Str'}).extract()
+            loop = True
            if appendtag.find(id='source'):
                appendtag.find(id='source').extract()
        while loop:
-            loop=False
+            loop = False
            for link in nexturl:
                if u'następne' in link.string:
-                    url= self.INDEX + link['href']
+                    url = self.INDEX + link['href']
                    soup2 = self.index_to_soup(url)
                    pagetext = soup2.find(id='artykul')
                    pos = len(appendtag.contents)
                    appendtag.insert(pos, pagetext)
-                    tag = soup2.find('div', attrs={'id':'Str'})
-                    nexturl=tag.findAll('a')
-                    loop=True
+                    tag = soup2.find('div', attrs={'id': 'Str'})
+                    nexturl = tag.findAll('a')
+                    loop = True

    def gallery_article(self, appendtag):
-        tag=appendtag.find(id='container_gal')
+        tag = appendtag.find(id='container_gal')
        if tag:
-            nexturl=appendtag.find(id='gal_btn_next').a['href']
+            nexturl = appendtag.find(id='gal_btn_next').a['href']
            appendtag.find(id='gal_navi').extract()
        while nexturl:
-            soup2=self.index_to_soup(nexturl)
-            pagetext=soup2.find(id='container_gal')
-            nexturl=pagetext.find(id='gal_btn_next')
+            soup2 = self.index_to_soup(nexturl)
+            pagetext = soup2.find(id='container_gal')
+            nexturl = pagetext.find(id='gal_btn_next')
            if nexturl:
-                nexturl=nexturl.a['href']
+                nexturl = nexturl.a['href']
                pos = len(appendtag.contents)
                appendtag.insert(pos, pagetext)
-            rem=appendtag.find(id='gal_navi')
+            rem = appendtag.find(id='gal_navi')
            if rem:
                rem.extract()

    def preprocess_html(self, soup):
+        if soup.find(attrs={'class': 'piano_btn_1'}):
+            return None
+        else:
            self.append_page(soup, soup.body)
            if soup.find(id='container_gal'):
                self.gallery_article(soup.body)
            return soup

    def print_version(self, url):
-        if 'http://wyborcza.biz/biznes/' not in url:
-            return url
+        if url.count('rss.feedsportal.com'):
+            u = url.find('wyborcza0Bpl')
+            u = 'http://www.wyborcza.pl/' + url[u + 11:]
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace('0E', '-')
+            u = u.replace('0H', ',')
+            u = u.replace('0I', '_')
+            u = u.replace('0B', '.')
+            u = u.replace('/1,', '/2029020,')
+            u = u.replace('/story01.htm', '')
+            print(u)
+            return u
+        elif 'http://wyborcza.pl/1' in url:
+            return url.replace('http://wyborcza.pl/1', 'http://wyborcza.pl/2029020')
        else:
            return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')

    def get_cover_url(self):
        soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
-        cover=soup.find(id='GWmini2')  
-        soup = self.index_to_soup('http://wyborcza.pl/'+ cover.contents[3].a['href'])
-        self.cover_url='http://wyborcza.pl' + soup.img['src']
+        cover = soup.find(id='GWmini2')
+        soup = self.index_to_soup('http://wyborcza.pl/' + cover.contents[3].a['href'])
+        self.cover_url = 'http://wyborcza.pl' + soup.img['src']
        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/money_pl.recipe
+++ b/recipes/money_pl.recipe
@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class FocusRecipe(BasicNewsRecipe):
    __license__ = 'GPL v3'
-    __author__ = u'intromatyk <intromatyk@gmail.com>'
+    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    version = 1

--- a/recipes/rzeczpospolita.recipe
+++ b/recipes/rzeczpospolita.recipe
@ -34,16 +34,20 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'story'}))

    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleLeftBox'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'socialNewTools'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'socialTools'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxTop'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'recommendations'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'editorPicks'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks editorPicksFirst'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightText'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightButton'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxBottom'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'addRecommendation'}))
+    remove_tags.append(dict(name = 'h3', attrs = {'id' : 'tags'}))

    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -67,3 +71,4 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):

        return start + '/' + index + '?print=tak'

+
--- a/recipes/tvn24.recipe
+++ b/recipes/tvn24.recipe
@ -1,34 +1,55 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.utils.magick import Image
 class tvn24(BasicNewsRecipe):
    title          = u'TVN24'
    oldest_article = 7
    max_articles_per_feed = 100
-    __author__        = 'fenuks'
+    __author__        = 'fenuks, Artur Stachecki'
    description   = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
    category       = 'news'
    language       = 'pl'
-    #masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
-    cover_url= 'http://www.userlogos.org/files/logos/Struna/TVN24.jpg'
-    extra_css = 'ul {list-style:none;} \
-                 li {list-style:none; float: left; margin: 0 0.15em;} \
-                 h2 {font-size: medium} \
-                 .date60m {float: left; margin: 0 10px 0 5px;}'
+    masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
+    cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
+    extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
    remove_empty_feeds = True
    remove_javascript = True
    no_stylesheets = True
-    use_embedded_content = False
-    ignore_duplicate_articles = {'title', 'url'}
-    keep_only_tags=[dict(name='h1', attrs={'class':['size30 mt10 pb10', 'size38 mt10 pb15']}), dict(name='figure', attrs={'class':'articleMainPhoto articleMainPhotoWide'}), dict(name='article', attrs={'class':['mb20', 'mb20 textArticleDefault']}), dict(name='ul', attrs={'class':'newsItem'})]
-    remove_tags = [dict(name='aside', attrs={'class':['innerArticleModule onRight cols externalContent', 'innerArticleModule center']}), dict(name='div', attrs={'class':['thumbsGallery', 'articleTools', 'article right rd7', 'heading', 'quizContent']}), dict(name='a', attrs={'class':'watchMaterial text'}), dict(name='section', attrs={'class':['quiz toCenter', 'quiz toRight']})]
-
-    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'),
-		(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
+    keep_only_tags=[	
+#	dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
+	dict(name='div', attrs={'class':'mainContainer'}),
+#	dict(name='p'),
+#	dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
+                   ]
+    remove_tags=[
+	dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
+	dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
+	dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
+	dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey  loaded'})
+	      ]
+    remove_tags_after=[dict(name='li', attrs={'class':'share'})]
+    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
+		#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
-        tag = soup.find(name='ul', attrs={'class':'newsItem'})
-        if tag:
-            tag.name='div'
-            tag.li.name='div'
+        return soup
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+        return soup
+
+    def postprocess_html(self, soup, first):
+        #process all the images
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            img = Image()
+            img.open(iurl)
+            if img < 0:
+                raise RuntimeError('Out of memory')
+            img.type = "GrayscaleType"
+            img.save(iurl)
        return soup
--- a/recipes/wprost.recipe
+++ b/recipes/wprost.recipe
@ -3,6 +3,8 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, matek09, matek09@gmail.com'
 __copyright__ = 'Modified 2011,  Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
+__copyright__ = 'Modified 2012,  Artur Stachecki <artur.stachecki@gmail.com>'
+

 from calibre.web.feeds.news import BasicNewsRecipe
 import re
@ -11,7 +13,7 @@ class Wprost(BasicNewsRecipe):
        EDITION = 0
        FIND_LAST_FULL_ISSUE = True
        EXCLUDE_LOCKED = True
-        ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
+        ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png'

        title = u'Wprost'
        __author__ = 'matek09'
@ -20,6 +22,7 @@ class Wprost(BasicNewsRecipe):
        no_stylesheets = True
        language = 'pl'
        remove_javascript = True
+	recursions = 0	

        remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
        remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
@ -35,13 +38,15 @@ class Wprost(BasicNewsRecipe):
        (re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
        (re.compile(r'\<table .*?\>'), lambda match: ''),
        (re.compile(r'\<tr>'), lambda match: ''),
-        (re.compile(r'\<td .*?\>'), lambda match: '')]
+        (re.compile(r'\<td .*?\>'), lambda match: ''),
+	(re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]

        remove_tags =[]
        remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
        remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
        remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))

+
        extra_css = '''
                                        .div-header {font-size: x-small; font-weight: bold}
                                        '''
@ -59,27 +64,26 @@ class Wprost(BasicNewsRecipe):
                a = 0
                if self.FIND_LAST_FULL_ISSUE:
                        ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
-                        a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
+                        a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
                else:
-                        a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
+                        a = soup.find('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
                self.EDITION = a['href'].replace('/tygodnik/?I=', '')
+		self.EDITION_SHORT = a['href'].replace('/tygodnik/?I=15', '')
 		self.cover_url = a.img['src']

-
-
        def parse_index(self):
                self.find_last_issue()
                soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
                feeds = []
-                for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
+                for main_block in soup.findAll(attrs={'id': 'content-main-column-element-content'}):
                        articles = list(self.find_articles(main_block))
                        if len(articles) > 0:
-                                section = self.tag_to_string(main_block)
+                                section = self.tag_to_string(main_block.find('h3'))
                                feeds.append((section, articles))
                return feeds

        def find_articles(self, main_block):
-                for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
+                for a in main_block.findAll('a'):
                        if a.name in "td":
                                break
                        if self.EXCLUDE_LOCKED & self.is_blocked(a):
@ -91,3 +95,4 @@ class Wprost(BasicNewsRecipe):
                                'description' : ''
                                }

+
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -901,6 +901,9 @@ class Device(DeviceConfig, DevicePlugin):
            for d in drives:
                try:
                    winutil.eject_drive(bytes(d)[0])
+                except Exception as e:
+                    try:
+                        prints(as_unicode(e))
                    except:
                        pass

--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -150,8 +150,15 @@ class EPUBInput(InputFormatPlugin):
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
+        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
+        except:
+            log.exception('EPUB appears to be invalid ZIP file, trying a'
+                    ' more forgiving ZIP parser')
+            from calibre.utils.localunzip import extractall
+            stream.seek(0)
+            extractall(stream)
        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
        opf = self.find_opf()
        if opf is None:
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -10,6 +10,7 @@ from cStringIO import StringIO
 from contextlib import closing

 from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
+from calibre.utils.localunzip import LocalZipFile
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf2 import OPF
@ -105,6 +106,9 @@ class OCFReader(OCF):

 class OCFZipReader(OCFReader):
    def __init__(self, stream, mode='r', root=None):
+        if isinstance(stream, (LocalZipFile, ZipFile)):
+            self.archive = stream
+        else:
            try:
                self.archive = ZipFile(stream, mode=mode)
            except BadZipfile:
@ -119,8 +123,18 @@ class OCFZipReader(OCFReader):
        super(OCFZipReader, self).__init__()

    def open(self, name, mode='r'):
+        if isinstance(self.archive, LocalZipFile):
+            return self.archive.open(name)
        return StringIO(self.archive.read(name))

+def get_zip_reader(stream, root=None):
+    try:
+        zf = ZipFile(stream, mode='r')
+    except:
+        stream.seek(0)
+        zf = LocalZipFile(stream)
+    return OCFZipReader(zf, root=root)
+
 class OCFDirReader(OCFReader):
    def __init__(self, path):
        self.root = path
@ -184,7 +198,12 @@ def render_cover(opf, opf_path, zf, reader=None):
 def get_cover(opf, opf_path, stream, reader=None):
    raster_cover = opf.raster_cover
    stream.seek(0)
+    try:
        zf = ZipFile(stream)
+    except:
+        stream.seek(0)
+        zf = LocalZipFile(stream)
+
    if raster_cover:
        base = posixpath.dirname(opf_path)
        cpath = posixpath.normpath(posixpath.join(base, raster_cover))
@ -207,7 +226,7 @@ def get_cover(opf, opf_path, stream, reader=None):
 def get_metadata(stream, extract_cover=True):
    """ Return metadata as a :class:`Metadata` object """
    stream.seek(0)
-    reader = OCFZipReader(stream)
+    reader = get_zip_reader(stream)
    mi = reader.opf.to_book_metadata()
    if extract_cover:
        try:
@ -232,7 +251,7 @@ def _write_new_cover(new_cdata, cpath):

 def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
    stream.seek(0)
-    reader = OCFZipReader(stream, root=os.getcwdu())
+    reader = get_zip_reader(stream, root=os.getcwdu())
    raster_cover = reader.opf.raster_cover
    mi = MetaInformation(mi)
    new_cdata = None
@ -283,6 +302,10 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
        reader.opf.timestamp = mi.timestamp

    newopf = StringIO(reader.opf.render())
+    if isinstance(reader.archive, LocalZipFile):
+        reader.archive.safe_replace(reader.container[OPF.MIMETYPE], newopf,
+            extra_replacements=replacements)
+    else:
        safe_replace(stream, reader.container[OPF.MIMETYPE], newopf,
            extra_replacements=replacements)
    try:
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -239,10 +239,11 @@ class PluginWidget(QWidget,Ui_Form):

    def initialize(self, name, db):
        '''
-
        CheckBoxControls (c_type: check_box):
-            ['generate_titles','generate_series','generate_genres',
-             'generate_recently_added','generate_descriptions','include_hr']
+            ['cross_reference_authors',
+             'generate_titles','generate_series','generate_genres',
+             'generate_recently_added','generate_descriptions',
+             'include_hr']
        ComboBoxControls (c_type: combo_box):
            ['exclude_source_field','header_note_source_field',
             'merge_source_field']
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@ -305,7 +305,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
      <string>Other options</string>
     </property>
     <layout class="QGridLayout" name="gridLayout_3">
-      <item row="2" column="1">
+      <item row="3" column="1">
       <layout class="QHBoxLayout" name="merge_with_comments_hl">
        <item>
         <widget class="QComboBox" name="merge_source_field">
@ -372,7 +372,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </item>
       </layout>
      </item>
-      <item row="2" column="0">
+      <item row="3" column="0">
       <widget class="QLabel" name="label_9">
        <property name="minimumSize">
         <size>
@ -397,7 +397,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </property>
       </widget>
      </item>
-      <item row="0" column="0">
+      <item row="1" column="0">
       <widget class="QLabel" name="label_4">
        <property name="minimumSize">
         <size>
@ -413,7 +413,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </property>
       </widget>
      </item>
-      <item row="0" column="1">
+      <item row="1" column="1">
       <layout class="QHBoxLayout" name="replace_cover_hl">
        <item>
         <widget class="QRadioButton" name="generate_new_cover">
@ -447,7 +447,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </item>
       </layout>
      </item>
-      <item row="1" column="0">
+      <item row="2" column="0">
       <widget class="QLabel" name="label_3">
        <property name="text">
         <string>E&amp;xtra Description note:</string>
@ -460,7 +460,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </property>
       </widget>
      </item>
-      <item row="1" column="1">
+      <item row="2" column="1">
       <layout class="QHBoxLayout" name="horizontalLayout">
        <item>
         <widget class="QComboBox" name="header_note_source_field">
@ -561,6 +561,27 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
        </item>
       </layout>
      </item>
+      <item row="0" column="0">
+       <widget class="QLabel" name="label_2">
+        <property name="text">
+         <string>Author cross-references:</string>
+        </property>
+        <property name="alignment">
+         <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+        </property>
+       </widget>
+      </item>
+      <item row="0" column="1">
+       <layout class="QHBoxLayout" name="cross_references_hl">
+        <item>
+         <widget class="QCheckBox" name="cross_reference_authors">
+          <property name="text">
+           <string>For books with multiple authors, list each author separately</string>
+          </property>
+         </widget>
+        </item>
+       </layout>
+      </item>
     </layout>
    </widget>
   </item>
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -6,102 +6,19 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from contextlib import closing
+from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore

-from lxml import html
-
-from PyQt4.Qt import QUrl
-
-from calibre import browser
-from calibre.gui2 import open_url
-from calibre.gui2.store import StorePlugin
-from calibre.gui2.store.search_result import SearchResult
-
-class AmazonDEKindleStore(StorePlugin):
+class AmazonDEKindleStore(AmazonUKKindleStore):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''

-    def open(self, parent=None, detail_item=None, external=False):
    aff_id = {'tag': 'charhale0a-21'}
    store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
                 '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
-                     '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') % aff_id
-        if detail_item:
-            aff_id['asin'] = detail_item
-            store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
+                 '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
+    store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
                          '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
-                          '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id
-        open_url(QUrl(store_link))
-
-    def search(self, query, max_results=10, timeout=60):
+                          '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
    search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
-        br = browser()

-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
-            # Apparently amazon Europe  is responding in UTF-8 now
-            doc = html.fromstring(f.read())
-
-            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
-            format_xpath = './/span[@class="format"]/text()'
-            cover_xpath = './/img[@class="productImage"]/@src'
-
-            for data in doc.xpath(data_xpath):
-                if counter <= 0:
-                    break
-
-                # Even though we are searching digital-text only Amazon will still
-                # put in results for non Kindle books (author pages). So we need
-                # to explicitly check if the item is a Kindle book and ignore it
-                # if it isn't.
-                format = ''.join(data.xpath(format_xpath))
-                if 'kindle' not in format.lower():
-                    continue
-
-                # We must have an asin otherwise we can't easily reference the
-                # book later.
-                asin = ''.join(data.xpath("@name"))
-
-                cover_url = ''.join(data.xpath(cover_xpath))
-
-                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
-
-                author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
-                if author.startswith('von '):
-                    author = author[4:]
-
-                counter -= 1
-
-                s = SearchResult()
-                s.cover_url = cover_url.strip()
-                s.title = title.strip()
-                s.author = author.strip()
-                s.price = price.strip()
-                s.detail_item = asin.strip()
-                s.formats = 'Kindle'
-
-                yield s
-
-    def get_details(self, search_result, timeout):
-        drm_search_text = u'Gleichzeitige Verwendung von Geräten'
-        drm_free_text = u'Keine Einschränkung'
-        url = 'http://amazon.de/dp/'
-
-        br = browser()
-        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
-            idata = html.fromstring(nf.read())
-            if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
-                           drm_search_text + '")])'):
-                if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
-                               drm_free_text + '") and contains(b, "' +
-                               drm_search_text + '")])'):
-                    search_result.drm = SearchResult.DRM_UNLOCKED
-                else:
-                    search_result.drm = SearchResult.DRM_UNKNOWN
-            else:
-                search_result.drm = SearchResult.DRM_LOCKED
-        return True
--- a/src/calibre/gui2/store/stores/amazon_es_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py
@ -6,78 +6,17 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from contextlib import closing
+from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore

-from lxml import html
-
-from PyQt4.Qt import QUrl
-
-from calibre import browser
-from calibre.gui2 import open_url
-from calibre.gui2.store import StorePlugin
-from calibre.gui2.store.search_result import SearchResult
-
-class AmazonESKindleStore(StorePlugin):
+class AmazonESKindleStore(AmazonUKKindleStore):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''

-    def open(self, parent=None, detail_item=None, external=False):
    aff_id = {'tag': 'charhale09-21'}
-        store_link = 'http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790' % aff_id
-        if detail_item:
-            aff_id['asin'] = detail_item
-            store_link = 'http://www.amazon.es/gp/redirect.html?ie=UTF8&location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3626&creative=24790' % aff_id
-        open_url(QUrl(store_link))
-
-    def search(self, query, max_results=10, timeout=60):
+    store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
+                  'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
+    store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
+                          'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
+                          '&linkCode=ur2&camp=3626&creative=24790')
    search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
-        br = browser()
-
-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
-            # Apparently amazon Europe is responding in UTF-8 now
-            doc = html.fromstring(f.read())
-
-            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
-            format_xpath = './/span[@class="format"]/text()'
-            cover_xpath = './/img[@class="productImage"]/@src'
-
-            for data in doc.xpath(data_xpath):
-                if counter <= 0:
-                    break
-
-                # Even though we are searching digital-text only Amazon will still
-                # put in results for non Kindle books (author pages). So we need
-                # to explicitly check if the item is a Kindle book and ignore it
-                # if it isn't.
-                format = ''.join(data.xpath(format_xpath))
-                if 'kindle' not in format.lower():
-                    continue
-
-                # We must have an asin otherwise we can't easily reference the
-                # book later.
-                asin = ''.join(data.xpath("@name"))
-
-                cover_url = ''.join(data.xpath(cover_xpath))
-
-                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
-                author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
-                if author.startswith('de '):
-                    author = author[3:]
-
-                counter -= 1
-
-                s = SearchResult()
-                s.cover_url = cover_url.strip()
-                s.title = title.strip()
-                s.author = author.strip()
-                s.price = price.strip()
-                s.detail_item = asin.strip()
-                s.formats = 'Kindle'
-                s.drm = SearchResult.DRM_UNKNOWN
-
-                yield s
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -6,79 +6,16 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from contextlib import closing

-from lxml import html
+from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore

-from PyQt4.Qt import QUrl
-
-from calibre import browser
-from calibre.gui2 import open_url
-from calibre.gui2.store import StorePlugin
-from calibre.gui2.store.search_result import SearchResult
-
-class AmazonFRKindleStore(StorePlugin):
+class AmazonFRKindleStore(AmazonUKKindleStore):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''

-    def open(self, parent=None, detail_item=None, external=False):
    aff_id = {'tag': 'charhale-21'}
    store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
-
-        if detail_item:
-            aff_id['asin'] = detail_item
-            store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
-        open_url(QUrl(store_link))
-
-    def search(self, query, max_results=10, timeout=60):
+    store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
    search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
-        br = browser()

-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
-            # Apparently amazon Europe is responding in UTF-8 now
-            doc = html.fromstring(f.read())
-
-            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
-            format_xpath = './/span[@class="format"]/text()'
-            cover_xpath = './/img[@class="productImage"]/@src'
-
-            for data in doc.xpath(data_xpath):
-                if counter <= 0:
-                    break
-
-                # Even though we are searching digital-text only Amazon will still
-                # put in results for non Kindle books (author pages). So we need
-                # to explicitly check if the item is a Kindle book and ignore it
-                # if it isn't.
-                format = ''.join(data.xpath(format_xpath))
-                if 'kindle' not in format.lower():
-                    continue
-
-                # We must have an asin otherwise we can't easily reference the
-                # book later.
-                asin = ''.join(data.xpath("@name"))
-
-                cover_url = ''.join(data.xpath(cover_xpath))
-
-                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
-                author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
-                if author.startswith('de '):
-                    author = author[3:]
-
-                counter -= 1
-
-                s = SearchResult()
-                s.cover_url = cover_url.strip()
-                s.title = title.strip()
-                s.author = author.strip()
-                s.price = price.strip()
-                s.detail_item = asin.strip()
-                s.formats = 'Kindle'
-                s.drm = SearchResult.DRM_UNKNOWN
-
-                yield s
--- a/src/calibre/gui2/store/stores/amazon_it_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py
@ -6,78 +6,17 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from contextlib import closing
+from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore

-from lxml import html
-
-from PyQt4.Qt import QUrl
-
-from calibre import browser
-from calibre.gui2 import open_url
-from calibre.gui2.store import StorePlugin
-from calibre.gui2.store.search_result import SearchResult
-
-class AmazonITKindleStore(StorePlugin):
+class AmazonITKindleStore(AmazonUKKindleStore):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''

-    def open(self, parent=None, detail_item=None, external=False):
    aff_id = {'tag': 'httpcharles07-21'}
-        store_link = 'http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322' % aff_id
-        if detail_item:
-            aff_id['asin'] = detail_item
-            store_link = 'http://www.amazon.it/gp/redirect.html?ie=UTF8&location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3370&creative=23322' % aff_id
-        open_url(QUrl(store_link))
-
-    def search(self, query, max_results=10, timeout=60):
+    store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
+                  'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
+    store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
+                          'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
+                          'linkCode=ur2&camp=3370&creative=23322')
    search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
-        br = browser()
-
-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
-            # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
-            # Apparently amazon Europe is responding in UTF-8 now
-            doc = html.fromstring(f.read())
-
-            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
-            format_xpath = './/span[@class="format"]/text()'
-            cover_xpath = './/img[@class="productImage"]/@src'
-
-            for data in doc.xpath(data_xpath):
-                if counter <= 0:
-                    break
-
-                # Even though we are searching digital-text only Amazon will still
-                # put in results for non Kindle books (author pages). So we need
-                # to explicitly check if the item is a Kindle book and ignore it
-                # if it isn't.
-                format = ''.join(data.xpath(format_xpath))
-                if 'kindle' not in format.lower():
-                    continue
-
-                # We must have an asin otherwise we can't easily reference the
-                # book later.
-                asin = ''.join(data.xpath("@name"))
-
-                cover_url = ''.join(data.xpath(cover_xpath))
-
-                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
-                author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
-                if author.startswith('di '):
-                    author = author[3:]
-
-                counter -= 1
-
-                s = SearchResult()
-                s.cover_url = cover_url.strip()
-                s.title = title.strip()
-                s.author = author.strip()
-                s.price = price.strip()
-                s.detail_item = asin.strip()
-                s.formats = 'Kindle'
-                s.drm = SearchResult.DRM_UNKNOWN
-
-                yield s
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -6,8 +6,9 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from contextlib import closing
+import re

+from contextlib import closing
 from lxml import html

 from PyQt4.Qt import QUrl
@ -18,57 +19,80 @@ from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult

 class AmazonUKKindleStore(StorePlugin):
+    aff_id = {'tag': 'calcharles-21'}
+    store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
+                  'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
+                  'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
+                  'linkCode=ur2&camp=1634&creative=19450')
+    store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
+                          'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
+                          'linkCode=ur2&camp=1634&creative=6738')
+    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
+
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''

    def open(self, parent=None, detail_item=None, external=False):
-        aff_id = {'tag': 'calcharles-21'}
-        store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id

+        store_link = self.store_link % self.aff_id
        if detail_item:
-            aff_id['asin'] = detail_item
-            store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
+            self.aff_id['asin'] = detail_item
+            store_link = self.store_link_details % self.aff_id
        open_url(QUrl(store_link))

    def search(self, query, max_results=10, timeout=60):
-        search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            # Apparently amazon Europe is responding in UTF-8 now
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))

-            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
-            format_xpath = './/span[@class="format"]/text()'
+            data_xpath = '//div[contains(@class, "prod")]'
+            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
+            asin_xpath = './/div[@class="image"]/a[1]'
            cover_xpath = './/img[@class="productImage"]/@src'
+            title_xpath = './/h3[@class="newaps"]/a//text()'
+            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
+            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'

            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break

                # Even though we are searching digital-text only Amazon will still
-                # put in results for non Kindle books (author pages). So we need
+                # put in results for non Kindle books (author pages). Se we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
-                format = ''.join(data.xpath(format_xpath))
-                if 'kindle' not in format.lower():
+                format_ = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format_.lower():
                    continue

                # We must have an asin otherwise we can't easily reference the
                # book later.
-                asin = ''.join(data.xpath("@name"))
+                asin_href = None
+                asin_a = data.xpath(asin_xpath)
+                if asin_a:
+                    asin_href = asin_a[0].get('href', '')
+                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
+                    if m:
+                        asin = m.group('asin')
+                    else:
+                        continue
+                else:
+                    continue

                cover_url = ''.join(data.xpath(cover_xpath))

-                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
-                price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
+                title = ''.join(data.xpath(title_xpath))
+                author = ''.join(data.xpath(author_xpath))
+                try:
+                    author = author.split('by ', 1)[1].split(" (")[0]
+                except:
+                    pass

-                author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
-                if author.startswith('by '):
-                    author = author[3:]
+                price = ''.join(data.xpath(price_xpath))

                counter -= 1

@ -78,37 +102,10 @@ class AmazonUKKindleStore(StorePlugin):
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
+                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'

                yield s

    def get_details(self, search_result, timeout):
-        # We might already have been called.
-        if search_result.drm:
-            return
-
-        url = 'http://amazon.co.uk/dp/'
-        drm_search_text = u'Simultaneous Device Usage'
-        drm_free_text = u'Unlimited'
-
-        br = browser()
-        with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
-            idata = html.fromstring(nf.read())
-            if not search_result.author:
-                search_result.author = ''.join(idata.xpath('//div[@class="buying" and contains(., "Author")]/a/text()'))
-                is_kindle = idata.xpath('boolean(//div[@class="buying"]/h1/span/span[contains(text(), "Kindle Edition")])')
-                if is_kindle:
-                    search_result.formats = 'Kindle'
-            if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
-                           drm_search_text + '")])'):
-                if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
-                               drm_free_text + '") and contains(b, "' +
-                               drm_search_text + '")])'):
-                    search_result.drm = SearchResult.DRM_UNLOCKED
-                else:
-                    search_result.drm = SearchResult.DRM_UNKNOWN
-            else:
-                search_result.drm = SearchResult.DRM_LOCKED
-        return True
-
-
+        pass
--- a/src/calibre/gui2/store/stores/libri_de_plugin.py
+++ b/src/calibre/gui2/store/stores/libri_de_plugin.py
@ -25,7 +25,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        url = 'http://ad.zanox.com/ppc/?18817073C15644254T'
        url_details = ('http://ad.zanox.com/ppc/?18817073C15644254T&ULP=[['
-                       'http://www.libri.de/shop/action/productDetails?artiId={0}]]')
+                       'http://www.ebook.de/shop/action/productDetails?artiId={0}]]')

        if external or self.config.get('open_external', False):
            if detail_item:
@ -41,33 +41,38 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
            d.exec_()

    def search(self, query, max_results=10, timeout=60):
-        url = ('http://www.libri.de/shop/action/quickSearch?facetNodeId=6'
-               '&mainsearchSubmit=Los!&searchString=' + urllib2.quote(query))
+        url = ('http://www.ebook.de/de/pathSearch?nav=52122&searchString='
+               + urllib2.quote(query))
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[contains(@class, "item")]'):
+            for data in doc.xpath('//div[contains(@class, "articlecontainer")]'):
                if counter <= 0:
                    break

-                details = data.xpath('./div[@class="beschreibungContainer"]')
+                details = data.xpath('./div[@class="articleinfobox"]')
                if not details:
                    continue
                details = details[0]
-                id = ''.join(details.xpath('./div[@class="text"]/a/@name')).strip()
-                if not id:
+                id_ = ''.join(details.xpath('./a/@name')).strip()
+                if not id_:
                    continue
-                cover_url = ''.join(details.xpath('.//div[@class="coverImg"]/a/img/@src'))
-                title = ''.join(details.xpath('./div[@class="text"]/span[@class="titel"]/a/text()')).strip()
-                author = ''.join(details.xpath('./div[@class="text"]/span[@class="author"]/text()')).strip()
+                title = ''.join(details.xpath('.//a[@class="su1_c_l_titel"]/text()')).strip()
+
+                author = ''.join(details.xpath('.//div[@class="author"]/text()')).strip()
+                if author.startswith('von'):
+                    author = author[4:]
+
                pdf = details.xpath(
-                        'boolean(.//span[@class="format" and contains(text(), "pdf")]/text())')
+                        'boolean(.//span[@class="bindername" and contains(text(), "pdf")]/text())')
                epub = details.xpath(
-                        'boolean(.//span[@class="format" and contains(text(), "epub")]/text())')
+                        'boolean(.//span[@class="bindername" and contains(text(), "epub")]/text())')
                mobi = details.xpath(
-                        'boolean(.//span[@class="format" and contains(text(), "mobipocket")]/text())')
+                        'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())')
+
+                cover_url = ''.join(data.xpath('.//div[@class="coverImg"]/a/img/@src'))
                price = ''.join(data.xpath('.//span[@class="preis"]/text()')).replace('*', '').strip()

                counter -= 1
@ -78,7 +83,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNKNOWN
-                s.detail_item = id
+                s.detail_item = id_
                formats = []
                if epub:
                    formats.append('ePub')
--- a/src/calibre/library/catalogs/epub_mobi.py
+++ b/src/calibre/library/catalogs/epub_mobi.py
@ -41,6 +41,13 @@ class EPUB_MOBI(CatalogPlugin):
                          help = _('Title of generated catalog used as title in metadata.\n'
                          "Default: '%default'\n"
                          "Applies to: AZW3, ePub, MOBI output formats")),
+                   Option('--cross-reference-authors',
+                          default=False,
+                          dest='cross_reference_authors',
+                          action = 'store_true',
+                          help=_("Create cross-references in Authors section for books with multiple authors.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, ePub, MOBI output formats")),
                   Option('--debug-pipeline',
                           default=None,
                           dest='debug_pipeline',
@ -58,7 +65,6 @@ class EPUB_MOBI(CatalogPlugin):
                          help=_("Regex describing tags to exclude as genres.\n"
                          "Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n"
                          "Applies to: AZW3, ePub, MOBI output formats")),
-
                   Option('--exclusion-rules',
                          default="(('Catalogs','Tags','Catalog'),)",
                          dest='exclusion_rules',
@ -72,7 +78,6 @@ class EPUB_MOBI(CatalogPlugin):
                          "When multiple rules are defined, all rules will be applied.\n"
                          "Default: \n" + '"' + '%default' + '"' + "\n"
                          "Applies to AZW3, ePub, MOBI output formats")),
-
                   Option('--generate-authors',
                          default=False,
                          dest='generate_authors',
@ -318,8 +323,8 @@ class EPUB_MOBI(CatalogPlugin):
        build_log.append(" opts:")
        for key in keys:
            if key in ['catalog_title','author_clip','connected_kindle','creator',
-                       'description_clip','exclude_book_marker','exclude_genre',
-                       'exclude_tags','exclusion_rules', 'fmt',
+                       'cross_reference_authors','description_clip','exclude_book_marker',
+                       'exclude_genre','exclude_tags','exclusion_rules', 'fmt',
                       'header_note_source_field','merge_comments_rule',
                       'output_profile','prefix_rules','read_book_marker',
                       'search_text','sort_by','sort_descriptions_by_author','sync',
--- a/src/calibre/library/catalogs/epub_mobi_builder.py
+++ b/src/calibre/library/catalogs/epub_mobi_builder.py
@ -14,11 +14,12 @@ from calibre.customize.conversion import DummyReporter
 from calibre.customize.ui import output_profiles
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
 from calibre.ebooks.chardet import substitute_entites
+from calibre.ebooks.metadata import author_to_author_sort
 from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.config import config_dir
 from calibre.utils.date import format_date, is_date_undefined, now as nowf
-from calibre.utils.filenames import ascii_text
+from calibre.utils.filenames import ascii_text, shorten_components_to
 from calibre.utils.icu import capitalize, collation_order, sort_key
 from calibre.utils.magick.draw import thumbnail
 from calibre.utils.zipfile import ZipFile
@ -109,6 +110,7 @@ class CatalogBuilder(object):
        self.stylesheet = stylesheet
        self.cache_dir = os.path.join(config_dir, 'caches', 'catalog')
        self.catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
+        self.content_dir = os.path.join(self.catalog_path, "content")
        self.excluded_tags = self.get_excluded_tags()
        self.generate_for_kindle_azw3 = True if (_opts.fmt == 'azw3' and
                                              _opts.output_profile and
@ -127,12 +129,13 @@ class CatalogBuilder(object):
        self.books_by_title = None
        self.books_by_title_no_series_prefix = None
        self.books_to_catalog = None
-        self.content_dir = os.path.join(self.catalog_path, "content")
        self.current_step = 0.0
        self.error = []
        self.generate_recently_read = False
        self.genres = []
-        self.genre_tags_dict = None
+        self.genre_tags_dict = \
+            self.filter_db_tags(max_len = 245 - len("%s/Genre_.html" % self.content_dir)) \
+            if self.opts.generate_genres else None
        self.html_filelist_1 = []
        self.html_filelist_2 = []
        self.merge_comments_rule = dict(zip(['field','position','hr'],
@ -505,7 +508,7 @@ class CatalogBuilder(object):
        if not os.path.isdir(images_path):
            os.makedirs(images_path)

-    def detect_author_sort_mismatches(self):
+    def detect_author_sort_mismatches(self, books_to_test):
        """ Detect author_sort mismatches.

        Sort by author, look for inconsistencies in author_sort among
@ -513,17 +516,18 @@ class CatalogBuilder(object):
        annoyance for EPUB.

        Inputs:
-         self.books_to_catalog (list): list of books to catalog
+         books_by_author (list): list of books to test, possibly unsorted

        Output:
-         self.books_by_author (list): sorted by author
+         (none)

        Exceptions:
         AuthorSortMismatchException: author_sort mismatch detected
        """

-        self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
-        authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
+        books_by_author = sorted(list(books_to_test), key=self._kf_books_by_author_sorter_author)
+
+        authors = [(record['author'], record['author_sort']) for record in books_by_author]
        current_author = authors[0]
        for (i,author) in enumerate(authors):
            if author != current_author and i:
@ -701,6 +705,7 @@ class CatalogBuilder(object):
    def fetch_books_by_author(self):
        """ Generate a list of books sorted by author.

+        For books with multiple authors, relist book with additional authors.
        Sort the database by author. Report author_sort inconsistencies as warning when
        building EPUB or MOBI, error when building MOBI. Collect a list of unique authors
        to self.authors.
@ -720,25 +725,30 @@ class CatalogBuilder(object):

        self.update_progress_full_step(_("Sorting database"))

-        self.detect_author_sort_mismatches()
+        books_by_author = list(self.books_to_catalog)
+        self.detect_author_sort_mismatches(books_by_author)
+        if self.opts.cross_reference_authors:
+            books_by_author = self.relist_multiple_authors(books_by_author)
+
+        #books_by_author = sorted(list(books_by_author), key=self._kf_books_by_author_sorter_author)

-        # Sort authors using sort_key to normalize accented letters
        # Determine the longest author_sort length before sorting
-        asl = [i['author_sort'] for i in self.books_by_author]
+        asl = [i['author_sort'] for i in books_by_author]
        las = max(asl, key=len)
-        self.books_by_author = sorted(self.books_to_catalog,
+
+        books_by_author = sorted(books_by_author,
            key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))

        if self.DEBUG and self.opts.verbose:
-            tl = [i['title'] for i in self.books_by_author]
+            tl = [i['title'] for i in books_by_author]
            lt = max(tl, key=len)
            fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las))
            print(fs.format('','Title','Author','Series'))
-            for i in self.books_by_author:
+            for i in books_by_author:
                print(fs.format('', i['title'],i['author_sort'],i['series']))

        # Build the unique_authors set from existing data
-        authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author]
+        authors = [(record['author'], capitalize(record['author_sort'])) for record in books_by_author]

        # authors[] contains a list of all book authors, with multiple entries for multiple books by author
        #        authors[]: (([0]:friendly  [1]:sort))
@ -776,6 +786,7 @@ class CatalogBuilder(object):
                    author[2])).encode('utf-8'))

        self.authors = unique_authors
+        self.books_by_author = books_by_author
        return True

    def fetch_books_by_title(self):
@ -863,15 +874,15 @@ class CatalogBuilder(object):
                this_title['series_index'] = 0.0

            this_title['title_sort'] = self.generate_sort_title(this_title['title'])
-            if 'authors' in record:
-                # from calibre.ebooks.metadata import authors_to_string
-                # return authors_to_string(self.authors)

+            if 'authors' in record:
                this_title['authors'] = record['authors']
+                # Synthesize author attribution from authors list
                if record['authors']:
                    this_title['author'] = " &amp; ".join(record['authors'])
                else:
-                    this_title['author'] = 'Unknown'
+                    this_title['author'] = _('Unknown')
+                    this_title['authors'] = [this_title['author']]

            if 'author_sort' in record and record['author_sort'].strip():
                this_title['author_sort'] = record['author_sort']
@ -1093,7 +1104,7 @@ class CatalogBuilder(object):

            self.bookmarked_books = bookmarks

-    def filter_db_tags(self):
+    def filter_db_tags(self, max_len):
        """ Remove excluded tags from data set, return normalized genre list.

        Filter all db tags, removing excluded tags supplied in opts.
@ -1101,13 +1112,13 @@ class CatalogBuilder(object):
        tags are flattened to alphanumeric ascii_text.

        Args:
-         (none)
+         max_len: maximum length of normalized tag to fit within OS constraints

        Return:
         genre_tags_dict (dict): dict of filtered, normalized tags in data set
        """

-        def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
+        def _format_tag_list(tags, indent=1, line_break=70, header='Tag list'):
            def _next_tag(sorted_tags):
                for (i, tag) in enumerate(sorted_tags):
                    if i < len(tags) - 1:
@ -1126,6 +1137,31 @@ class CatalogBuilder(object):
                    out_str = ' ' * (indent + 1)
            return ans + out_str

+        def _normalize_tag(tag, max_len):
+            """ Generate an XHTML-legal anchor string from tag.
+
+            Parse tag for non-ascii, convert to unicode name.
+
+            Args:
+             tags (str): tag name possible containing symbols
+             max_len (int): maximum length of tag
+
+            Return:
+             normalized (str): unicode names substituted for non-ascii chars,
+              clipped to max_len
+            """
+
+            normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
+            if re.search('\W',normalized):
+                normalized = ''
+                for c in massaged:
+                    if re.search('\W',c):
+                        normalized += self.generate_unicode_name(c)
+                    else:
+                        normalized += c
+            shortened = shorten_components_to(max_len, [normalized])[0]
+            return shortened
+
        # Entry point
        normalized_tags = []
        friendly_tags = []
@ -1144,7 +1180,7 @@ class CatalogBuilder(object):
            if tag == ' ':
                continue

-            normalized_tags.append(self.normalize_tag(tag))
+            normalized_tags.append(_normalize_tag(tag, max_len))
            friendly_tags.append(tag)

        genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
@ -1941,8 +1977,6 @@ class CatalogBuilder(object):

        self.update_progress_full_step(_("Genres HTML"))

-        self.genre_tags_dict = self.filter_db_tags()
-
        # Extract books matching filtered_tags
        genre_list = []
        for friendly_tag in sorted(self.genre_tags_dict, key=sort_key):
@ -2024,10 +2058,11 @@ class CatalogBuilder(object):
                        books_by_current_author += 1

                # Write the genre book list as an article
-                titles_spanned = self.generate_html_by_genre(genre, True if index==0 else False,
+                outfile = "%s/Genre_%s.html" % (self.content_dir, genre)
+                titles_spanned = self.generate_html_by_genre(genre,
+                                                             True if index==0 else False,
                                                             genre_tag_set[genre],
-                                        "%s/Genre_%s.html" % (self.content_dir,
-                                                            genre))
+                                                             outfile)

                tag_file = "content/Genre_%s.html" % genre
                master_genre_list.append({'tag':genre,
@ -2549,7 +2584,7 @@ class CatalogBuilder(object):
            for (i, tag) in enumerate(sorted(book.get('tags', []))):
                aTag = Tag(_soup,'a')
                if self.opts.generate_genres:
-                    aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag)
+                    aTag['href'] = "Genre_%s.html" % self.genre_tags_dict[tag]
                aTag.insert(0,escape(NavigableString(tag)))
                genresTag.insert(gtc, aTag)
                gtc += 1
@ -4603,28 +4638,6 @@ class CatalogBuilder(object):

        return merged

-    def normalize_tag(self, tag):
-        """ Generate an XHTML-legal anchor string from tag.
-
-        Parse tag for non-ascii, convert to unicode name.
-
-        Args:
-         tags (str): tag name possible containing symbols
-
-        Return:
-         normalized (str): unicode names substituted for non-ascii chars
-        """
-
-        normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
-        if re.search('\W',normalized):
-            normalized = ''
-            for c in massaged:
-                if re.search('\W',c):
-                    normalized += self.generate_unicode_name(c)
-                else:
-                    normalized += c
-        return normalized
-
    def process_exclusions(self, data_set):
        """ Filter data_set based on exclusion_rules.

@ -4697,6 +4710,43 @@ class CatalogBuilder(object):
        else:
            return data_set

+    def relist_multiple_authors(self, books_by_author):
+        """ Create multiple entries for books with multiple authors
+
+        Given a list of books by author, scan list for books with multiple
+        authors. Add a cloned copy of the book per additional author.
+
+        Args:
+         books_by_author (list): book list possibly containing books
+         with multiple authors
+
+        Return:
+         (list): books_by_author with additional cloned entries for books with
+         multiple authors
+        """
+
+        multiple_author_books = []
+
+        # Find the multiple author books
+        for book in books_by_author:
+            if len(book['authors']) > 1:
+                multiple_author_books.append(book)
+
+        for book in multiple_author_books:
+            cloned_authors = list(book['authors'])
+            for x, author in enumerate(book['authors']):
+                if x:
+                    first_author = cloned_authors.pop(0)
+                    cloned_authors.append(first_author)
+                    new_book = deepcopy(book)
+                    new_book['author'] = ' & '.join(cloned_authors)
+                    new_book['authors'] = list(cloned_authors)
+                    asl =  [author_to_author_sort(auth) for auth in cloned_authors]
+                    new_book['author_sort'] = ' & '.join(asl)
+                    books_by_author.append(new_book)
+
+        return books_by_author
+
    def update_progress_full_step(self, description):
        """ Update calibre's job status UI.

--- a/src/calibre/utils/fonts/sfnt/cff.py
+++ b/src/calibre/utils/fonts/sfnt/cff.py
@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, unpack
+
+from calibre.utils.fonts.sfnt import UnknownTable
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+
+# Useful links
+# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
+# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
+
+class CFF(object):
+
+    def __init__(self, raw):
+        (self.major_version, self.minor_version, self.header_size,
+                self.offset_size) = unpack_from(b'>4B', raw)
+        if (self.major_version, self.minor_version) != (1, 0):
+            raise UnsupportedFont('The CFF table has unknown version: '
+                    '(%d, %d)'%(self.major_version, self.minor_version))
+        offset = self.header_size
+
+        # Read Names Index
+        self.font_names = Index(raw, offset)
+        offset = self.font_names.pos
+        if len(self.font_names) > 1:
+            raise UnsupportedFont('CFF table has more than one font.')
+        # Read Top Dict
+        self.top_index = Index(raw, offset)
+        offset = self.top_index.pos
+
+        # Read strings
+        self.strings = Strings(raw, offset)
+        offset = self.strings.pos
+        print (self.strings[len(cff_standard_strings):])
+
+class Index(list):
+
+    def __init__(self, raw, offset):
+        list.__init__(self)
+
+        count = unpack_from(b'>H', raw, offset)[0]
+        offset += 2
+        self.pos = offset
+
+        if count > 0:
+            self.offset_size = unpack_from(b'>B', raw, offset)[0]
+            offset += 1
+            if self.offset_size == 3:
+                offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
+                            for i in xrange(offset, 3*(count+2), 3)]
+            else:
+                fmt = {1:'B', 2:'H', 4:'L'}.get(self.offset_size)
+                fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
+                offsets = unpack_from(fmt, raw, offset)
+            offset += self.offset_size * (count+1) - 1
+
+            for i in xrange(len(offsets)-1):
+                off, noff = offsets[i:i+2]
+                obj = raw[offset+i:offset+noff]
+                self.append(obj)
+
+            self.pos = offset + offsets[-1]
+
+class Strings(Index):
+
+    def __init__(self, raw, offset):
+        super(Strings, self).__init__(raw, offset)
+        for x in reversed(cff_standard_strings):
+            self.insert(0, x)
+
+class CFFTable(UnknownTable):
+
+    def decompile(self):
+        self.cff = CFF(self.raw)
+
+# cff_standard_strings {{{
+# The 391 Standard Strings as used in the CFF format.
+# from Adobe Technical None #5176, version 1.0, 18 March 1998
+
+cff_standard_strings = [
+'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
+'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
+'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
+'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
+'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
+'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
+'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
+'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
+'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
+'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
+'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
+'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
+'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
+'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
+'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
+'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
+'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
+'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
+'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
+'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
+'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
+'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
+'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
+'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
+'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
+'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
+'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
+'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
+'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
+'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
+'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
+'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
+'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
+'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
+'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
+'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
+'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
+'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
+'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
+'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
+'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
+'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
+'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
+'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
+'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
+'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
+'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
+'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
+'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
+'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
+'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
+'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
+'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
+'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
+'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
+'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
+'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
+'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
+'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
+'Semibold'
+]
+# }}}
+
--- a/src/calibre/utils/fonts/sfnt/cff/init.py
+++ b/src/calibre/utils/fonts/sfnt/cff/init.py
@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
--- a/src/calibre/utils/fonts/sfnt/cff/dict_data.py
+++ b/src/calibre/utils/fonts/sfnt/cff/dict_data.py
@ -0,0 +1,201 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack
+
+t1_operand_encoding = [None] * 256
+t1_operand_encoding[0:32] = (32) * ["do_operator"]
+t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
+t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
+t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
+t1_operand_encoding[255] = "read_long_int"
+
+t2_operand_encoding = t1_operand_encoding[:]
+t2_operand_encoding[28] = "read_short_int"
+t2_operand_encoding[255] = "read_fixed_1616"
+
+cff_dict_operand_encoding = t2_operand_encoding[:]
+cff_dict_operand_encoding[29] = "read_long_int"
+cff_dict_operand_encoding[30] = "read_real_number"
+cff_dict_operand_encoding[255] = "reserved"
+
+real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+        '.', 'E', 'E-', None, '-']
+
+class SimpleConverter(object):
+
+    def read(self, parent, value):
+        return value
+
+    def write(self, parent, value):
+        return value
+
+class TODO(SimpleConverter):
+    pass
+
+class Reader(dict):
+
+    def read_byte(self, b0, data, index):
+        return b0 - 139, index
+
+    def read_small_int1(self, b0, data, index):
+        b1 = ord(data[index])
+        return (b0-247)*256 + b1 + 108, index+1
+
+    def read_small_int2(self, b0, data, index):
+        b1 = ord(data[index])
+        return -(b0-251)*256 - b1 - 108, index+1
+
+    def read_short_int(self, b0, data, index):
+        bin = data[index] + data[index+1]
+        value, = unpack(b">h", bin)
+        return value, index+2
+
+    def read_long_int(self, b0, data, index):
+        bin = data[index] + data[index+1] + data[index+2] + data[index+3]
+        value, = unpack(b">l", bin)
+        return value, index+4
+
+    def read_fixed_1616(self, b0, data, index):
+        bin = data[index] + data[index+1] + data[index+2] + data[index+3]
+        value, = unpack(b">l", bin)
+        return value / 65536.0, index+4
+
+    def read_real_number(self, b0, data, index):
+        number = ''
+        while True:
+            b = ord(data[index])
+            index = index + 1
+            nibble0 = (b & 0xf0) >> 4
+            nibble1 = b & 0x0f
+            if nibble0 == 0xf:
+                break
+            number = number + real_nibbles[nibble0]
+            if nibble1 == 0xf:
+                break
+            number = number + real_nibbles[nibble1]
+        return float(number), index
+
+class Dict(Reader):
+
+    operand_encoding = cff_dict_operand_encoding
+    TABLE = []
+
+    def __init__(self):
+        Reader.__init__(self)
+        table = self.TABLE[:]
+        for i in xrange(len(table)):
+            op, name, arg, default, conv = table[i]
+            if conv is not None:
+                continue
+            if arg in ("delta", "array", 'number', 'SID'):
+                conv = SimpleConverter()
+            else:
+                raise Exception('Should not happen')
+            table[i] = op, name, arg, default, conv
+
+
+        self.operators = {op:(name, arg) for op, name, arg, default, conv in
+                table}
+
+    def decompile(self, strings, global_subrs, data):
+        self.strings = strings
+        self.global_subrs = global_subrs
+        self.stack = []
+        index = 0
+        while index < len(data):
+            b0 = ord(data[index])
+            index += 1
+            handler = getattr(self, self.operand_encoding[b0])
+            value, index = handler(b0, data, index)
+            if value is not None:
+                self.stack.append(value)
+
+	def do_operator(self, b0, data, index):
+		if b0 == 12:
+			op = (b0, ord(data[index]))
+			index += 1
+		else:
+			op = b0
+		operator, arg_type = self.operators[op]
+		self.handle_operator(operator, arg_type)
+		return None, index
+
+	def handle_operator(self, operator, arg_type):
+        if isinstance(arg_type, tuple):
+			value = ()
+			for i in xrange(len(arg_type)-1, -1, -1):
+				arg = arg_type[i]
+				arghandler = getattr(self, 'arg_' + arg)
+				value = (arghandler(operator),) + value
+		else:
+			arghandler = getattr(self, 'arg_' + arg_type)
+			value = arghandler(operator)
+		self[operator] = value
+
+	def arg_number(self, name):
+		return self.stack.pop()
+
+	def arg_SID(self, name):
+		return self.strings[self.stack.pop()]
+
+	def arg_array(self, name):
+        ans = self.stack[:]
+        del self.stack[:]
+        return ans
+
+	def arg_delta(self, name):
+		out = []
+		current = 0
+		for v in self.stack:
+			current = current + v
+			out.append(current)
+        del self.stack[:]
+		return out
+
+class TopDict(Dict):
+
+    TABLE = [
+	#opcode     name                  argument type   default    converter
+	((12, 30), 'ROS',        ('SID','SID','number'), None,      SimpleConverter()),
+	((12, 20), 'SyntheticBase',      'number',       None,      None),
+	(0,        'version',            'SID',          None,      None),
+	(1,        'Notice',             'SID',          None,      None),
+	((12, 0),  'Copyright',          'SID',          None,      None),
+	(2,        'FullName',           'SID',          None,      None),
+	((12, 38), 'FontName',           'SID',          None,      None),
+	(3,        'FamilyName',         'SID',          None,      None),
+	(4,        'Weight',             'SID',          None,      None),
+	((12, 1),  'isFixedPitch',       'number',       0,         None),
+	((12, 2),  'ItalicAngle',        'number',       0,         None),
+	((12, 3),  'UnderlinePosition',  'number',       None,      None),
+	((12, 4),  'UnderlineThickness', 'number',       50,        None),
+	((12, 5),  'PaintType',          'number',       0,         None),
+	((12, 6),  'CharstringType',     'number',       2,         None),
+	((12, 7),  'FontMatrix',         'array',  [0.001,0,0,0.001,0,0],  None),
+	(13,       'UniqueID',           'number',       None,      None),
+	(5,        'FontBBox',           'array',  [0,0,0,0],       None),
+	((12, 8),  'StrokeWidth',        'number',       0,         None),
+	(14,       'XUID',               'array',        None,      None),
+	((12, 21), 'PostScript',         'SID',          None,      None),
+	((12, 22), 'BaseFontName',       'SID',          None,      None),
+	((12, 23), 'BaseFontBlend',      'delta',        None,      None),
+	((12, 31), 'CIDFontVersion',     'number',       0,         None),
+	((12, 32), 'CIDFontRevision',    'number',       0,         None),
+	((12, 33), 'CIDFontType',        'number',       0,         None),
+	((12, 34), 'CIDCount',           'number',       8720,      None),
+	(15,       'charset',            'number',       0,         TODO()),
+	((12, 35), 'UIDBase',            'number',       None,      None),
+	(16,       'Encoding',           'number',       0,         TODO()),
+	(18,       'Private',       ('number','number'), None,      TODO()),
+	((12, 37), 'FDSelect',           'number',       None,      TODO()),
+	((12, 36), 'FDArray',            'number',       None,      TODO()),
+	(17,       'CharStrings',        'number',       None,      TODO()),
+    ]
+
--- a/src/calibre/utils/fonts/sfnt/cff/table.py
+++ b/src/calibre/utils/fonts/sfnt/cff/table.py
@ -0,0 +1,166 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, unpack
+
+from calibre.utils.fonts.sfnt import UnknownTable
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from calibre.utils.fonts.sfnt.cff.dict_data import TopDict
+
+# Useful links
+# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
+# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
+
+class CFF(object):
+
+    def __init__(self, raw):
+        (self.major_version, self.minor_version, self.header_size,
+                self.offset_size) = unpack_from(b'>4B', raw)
+        if (self.major_version, self.minor_version) != (1, 0):
+            raise UnsupportedFont('The CFF table has unknown version: '
+                    '(%d, %d)'%(self.major_version, self.minor_version))
+        offset = self.header_size
+
+        # Read Names Index
+        self.font_names = Index(raw, offset)
+        offset = self.font_names.pos
+        if len(self.font_names) > 1:
+            raise UnsupportedFont('CFF table has more than one font.')
+
+        # Read Top Dict
+        self.top_index = Index(raw, offset)
+        self.top_dict = TopDict()
+        offset = self.top_index.pos
+
+        # Read strings
+        self.strings = Strings(raw, offset)
+        offset = self.strings.pos
+
+        # Read global subroutines
+        self.global_subrs = GlobalSubrs(raw, offset)
+        offset = self.global_subrs.pos
+
+        # Decompile Top Dict
+        self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
+        import pprint
+        pprint.pprint(self.top_dict)
+
+class Index(list):
+
+    def __init__(self, raw, offset, prepend=()):
+        list.__init__(self)
+        self.extend(prepend)
+
+        count = unpack_from(b'>H', raw, offset)[0]
+        offset += 2
+        self.pos = offset
+
+        if count > 0:
+            self.offset_size = unpack_from(b'>B', raw, offset)[0]
+            offset += 1
+            if self.offset_size == 3:
+                offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
+                            for i in xrange(offset, 3*(count+2), 3)]
+            else:
+                fmt = {1:'B', 2:'H', 4:'L'}.get(self.offset_size)
+                fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
+                offsets = unpack_from(fmt, raw, offset)
+            offset += self.offset_size * (count+1) - 1
+
+            for i in xrange(len(offsets)-1):
+                off, noff = offsets[i:i+2]
+                obj = raw[offset+off:offset+noff]
+                self.append(obj)
+
+            self.pos = offset + offsets[-1]
+
+class Strings(Index):
+
+    def __init__(self, raw, offset):
+        super(Strings, self).__init__(raw, offset, prepend=cff_standard_strings)
+
+class GlobalSubrs(Index):
+    pass
+
+class CFFTable(UnknownTable):
+
+    def decompile(self):
+        self.cff = CFF(self.raw)
+
+# cff_standard_strings {{{
+# The 391 Standard Strings as used in the CFF format.
+# from Adobe Technical None #5176, version 1.0, 18 March 1998
+
+cff_standard_strings = [
+'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
+'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
+'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
+'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
+'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
+'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
+'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
+'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
+'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
+'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
+'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
+'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
+'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
+'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
+'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
+'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
+'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
+'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
+'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
+'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
+'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
+'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
+'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
+'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
+'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
+'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
+'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
+'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
+'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
+'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
+'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
+'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
+'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
+'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
+'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
+'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
+'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
+'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
+'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
+'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
+'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
+'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
+'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
+'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
+'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
+'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
+'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
+'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
+'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
+'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
+'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
+'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
+'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
+'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
+'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
+'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
+'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
+'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
+'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
+'Semibold'
+]
+# }}}
+
--- a/src/calibre/utils/fonts/sfnt/container.py
+++ b/src/calibre/utils/fonts/sfnt/container.py
@ -21,6 +21,7 @@ from calibre.utils.fonts.sfnt.maxp import MaxpTable
 from calibre.utils.fonts.sfnt.loca import LocaTable
 from calibre.utils.fonts.sfnt.glyf import GlyfTable
 from calibre.utils.fonts.sfnt.cmap import CmapTable
+from calibre.utils.fonts.sfnt.cff.table import CFFTable

 # OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm

@ -42,6 +43,7 @@ class Sfnt(object):
                    b'loca' : LocaTable,
                    b'glyf' : GlyfTable,
                    b'cmap' : CmapTable,
+                    b'CFF ' : CFFTable,
                    }.get(table_tag, UnknownTable)(table)

    def __getitem__(self, key):
@ -53,12 +55,24 @@ class Sfnt(object):
    def __delitem__(self, key):
        del self.tables[key]

+    def __iter__(self):
+        '''Iterate over the table tags in optimal order as per
+        http://partners.adobe.com/public/developer/opentype/index_recs.html'''
+        keys = list(self.tables.keys())
+        order = {x:i for i, x in enumerate((b'head', b'hhea', b'maxp', b'OS/2',
+            b'hmtx', b'LTSH', b'VDMX', b'hdmx', b'cmap', b'fpgm', b'prep',
+            b'cvt ', b'loca', b'glyf', b'CFF ', b'kern', b'name', b'post',
+            b'gasp', b'PCLT', b'DSIG'))}
+        keys.sort(key=lambda x:order.get(x, 1000))
+        for x in keys:
+            yield x
+
    def pop(self, key, default=None):
        return self.tables.pop(key, default)

    def sizes(self):
        ans = OrderedDict()
-        for tag in sorted(self.tables):
+        for tag in self:
            ans[tag] = len(self[tag])
        return ans

@ -82,7 +96,7 @@ class Sfnt(object):
        table_data = []
        offset = stream.tell() + ( calcsize(b'>4s3L') * num_tables )
        sizes = OrderedDict()
-        for tag in sorted(self.tables):
+        for tag in self:
            table = self.tables[tag]
            raw = table()
            table_len = len(raw)
--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@ -66,6 +66,11 @@ def subset_truetype(sfnt, character_map):

 # }}}

+def subset_postscript(sfnt, character_map):
+    cff = sfnt[b'CFF ']
+    cff.decompile()
+    raise Exception('TODO: Implement CFF subsetting')
+
 def subset(raw, individual_chars, ranges=()):
    chars = list(map(ord, individual_chars))
    for r in ranges:
@ -91,6 +96,10 @@ def subset(raw, individual_chars, ranges=()):
        subset_truetype(sfnt, character_map)
    elif b'CFF ' in sfnt:
        # PostScript Outlines
+        from calibre.utils.config_base import tweaks
+        if tweaks['subset_cff_table']:
+            subset_postscript(sfnt, character_map)
+        else:
            raise UnsupportedFont('This font contains PostScript outlines, '
                'subsetting not supported')
    else:
--- a/src/calibre/utils/localunzip.py
+++ b/src/calibre/utils/localunzip.py
@ -0,0 +1,267 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Try to read invalid zip files with missing or damaged central directories.
+These are apparently produced in large numbers by the fruitcakes over at B&N.
+
+Tries to only use the local headers to extract data from the damaged zip file.
+'''
+
+import os, sys, zlib, shutil
+from struct import calcsize, unpack, pack
+from collections import namedtuple, OrderedDict
+from tempfile import SpooledTemporaryFile
+
+HEADER_SIG = 0x04034b50
+HEADER_BYTE_SIG = pack(b'<L', HEADER_SIG)
+local_header_fmt = b'<L5HL2L2H'
+local_header_sz = calcsize(local_header_fmt)
+ZIP_STORED, ZIP_DEFLATED = 0, 8
+
+LocalHeader = namedtuple('LocalHeader',
+        'signature min_version flags compression_method mod_time mod_date '
+        'crc32 compressed_size uncompressed_size filename_length extra_length '
+        'filename extra')
+
+def decode_arcname(name):
+    if isinstance(name, bytes):
+        from calibre.ebooks.chardet import detect
+        try:
+            name = name.decode('utf-8')
+        except:
+            res = detect(name)
+            encoding = res['encoding']
+            try:
+                name = name.decode(encoding)
+            except:
+                name = name.decode('utf-8', 'replace')
+    return name
+
+def find_local_header(f):
+    pos = f.tell()
+    raw = f.read(50*1024)
+    try:
+        f.seek(pos + raw.index(HEADER_BYTE_SIG))
+    except ValueError:
+        f.seek(pos)
+        return
+    raw = f.read(local_header_sz)
+    if len(raw) != local_header_sz:
+        f.seek(pos)
+        return
+    header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
+    if header.signature == HEADER_SIG:
+        return header
+    f.seek(pos)
+
+def read_local_file_header(f):
+    pos = f.tell()
+    raw = f.read(local_header_sz)
+    if len(raw) != local_header_sz:
+        f.seek(pos)
+        return
+    header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
+    if header.signature != HEADER_SIG:
+        f.seek(pos)
+        header = find_local_header(f)
+        if header is None:
+            return
+    if header.min_version > 20:
+        raise ValueError('This ZIP file uses unsupported features')
+    if header.flags & 0b1:
+        raise ValueError('This ZIP file is encrypted')
+    if header.flags & (1 << 3):
+        raise ValueError('This ZIP file uses data descriptors. This is unsupported')
+    if header.flags & (1 << 13):
+        raise ValueError('This ZIP file uses masking, unsupported.')
+    if header.compression_method not in {ZIP_STORED, ZIP_DEFLATED}:
+        raise ValueError('This ZIP file uses an unsupported compression method')
+    fname = extra = None
+    if header.filename_length > 0:
+        fname = f.read(header.filename_length)
+        if len(fname) != header.filename_length:
+            return
+        try:
+            fname = fname.decode('ascii')
+        except UnicodeDecodeError:
+            if header.flags & (1 << 11):
+                try:
+                    fname = fname.decode('utf-8')
+                except UnicodeDecodeError:
+                    pass
+        fname = decode_arcname(fname).replace('\\', '/')
+    if header.extra_length > 0:
+        extra = f.read(header.extra_length)
+        if len(extra) != header.extra_length:
+            return
+    return LocalHeader(*(
+        header[:-2] + (fname, extra)
+        ))
+
+def read_compressed_data(f, header):
+    cdata = f.read(header.compressed_size)
+    return cdata
+
+def copy_stored_file(src, size, dest):
+    read = 0
+    amt = min(size, 20*1024)
+    while read < size:
+        raw = src.read(min(size-read, amt))
+        if not raw:
+            raise ValueError('Premature end of file')
+        dest.write(raw)
+        read += len(raw)
+
+def copy_compressed_file(src, size, dest):
+    d = zlib.decompressobj(-15)
+    read = 0
+    amt = min(size, 20*1024)
+    while read < size:
+        raw = src.read(min(size-read, amt))
+        read += len(raw)
+        dest.write(d.decompress(raw, 200*1024))
+        count = 0
+        while d.unconsumed_tail:
+            count += 1
+            dest.write(d.decompress(d.unconsumed_tail, 200*1024))
+
+            if count > 100:
+                raise ValueError('This ZIP file contains a ZIP bomb in %s'%
+                        os.path.basename(dest.name))
+
+def _extractall(f, path=None, file_info=None):
+    found = False
+    while True:
+        header = read_local_file_header(f)
+        if not header:
+            break
+        found = True
+        parts = header.filename.split('/')
+        if header.uncompressed_size == 0:
+            # Directory
+            f.seek(f.tell() + header.compressed_size)
+            if path is not None:
+                bdir = os.path.join(path, *parts)
+                if not os.path.exists(bdir):
+                    os.makedirs(bdir)
+            continue
+
+        # File
+        if file_info is not None:
+            file_info[header.filename] = (f.tell(), header)
+        if path is not None:
+            bdir = os.path.join(path, *(parts[:-1]))
+            if not os.path.exists(bdir):
+                os.makedirs(bdir)
+            dest = os.path.join(path, *parts)
+            with open(dest, 'wb') as o:
+                if header.compression_method == ZIP_STORED:
+                    copy_stored_file(f, header.compressed_size, o)
+                else:
+                    copy_compressed_file(f, header.compressed_size, o)
+        else:
+            f.seek(f.tell() + header.compressed_size)
+
+    if not found:
+        raise ValueError('Not a ZIP file')
+
+
+def extractall(path_or_stream, path=None):
+    f = path_or_stream
+    close_at_end = False
+    if not hasattr(f, 'read'):
+        f = open(f, 'rb')
+        close_at_end = True
+    if path is None:
+        path = os.getcwdu()
+    pos = f.tell()
+    try:
+        _extractall(f, path)
+    finally:
+        f.seek(pos)
+        if close_at_end:
+            f.close()
+
+
+class LocalZipFile(object):
+
+    def __init__(self, stream):
+        self.file_info = OrderedDict()
+        _extractall(stream, file_info=self.file_info)
+        self.stream = stream
+
+    def open(self, name, spool_size=5*1024*1024):
+        if isinstance(name, LocalHeader):
+            name = name.filename
+        try:
+            offset, header = self.file_info.get(name)
+        except KeyError:
+            raise ValueError('This ZIP container has no file named: %s'%name)
+
+        self.stream.seek(offset)
+        dest = SpooledTemporaryFile(max_size=spool_size)
+
+        if header.compression_method == ZIP_STORED:
+            copy_stored_file(self.stream, header.compressed_size, dest)
+        else:
+            copy_compressed_file(self.stream, header.compressed_size, dest)
+        dest.seek(0)
+        return dest
+
+    def getinfo(self, name):
+        try:
+            offset, header = self.file_info.get(name)
+        except KeyError:
+            raise ValueError('This ZIP container has no file named: %s'%name)
+        return header
+
+    def read(self, name, spool_size=5*1024*1024):
+        with self.open(name, spool_size=spool_size) as f:
+            return f.read()
+
+    def extractall(self, path=None):
+        self.stream.seek(0)
+        _extractall(self.stream, path=(path or os.getcwdu()))
+
+    def close(self):
+        pass
+
+    def safe_replace(self, name, datastream, extra_replacements={},
+        add_missing=False):
+        from calibre.utils.zipfile import ZipFile, ZipInfo
+        replacements = {name:datastream}
+        replacements.update(extra_replacements)
+        names = frozenset(replacements.keys())
+        found = set([])
+        with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
+            ztemp = ZipFile(temp, 'w')
+            for offset, header in self.file_info.itervalues():
+                if header.filename in names:
+                    zi = ZipInfo(header.filename)
+                    zi.compress_type = header.compression_method
+                    ztemp.writestr(zi, replacements[header.filename].read())
+                    found.add(header.filename)
+                else:
+                    ztemp.writestr(header.filename, self.read(header.filename,
+                        spool_size=0))
+            if add_missing:
+                for name in names - found:
+                    ztemp.writestr(name, replacements[name].read())
+            ztemp.close()
+            zipstream = self.stream
+            temp.seek(0)
+            zipstream.seek(0)
+            zipstream.truncate()
+            shutil.copyfileobj(temp, zipstream)
+            zipstream.flush()
+
+if __name__ == '__main__':
+    extractall(sys.argv[-1])
+
--- a/src/calibre/utils/windows/winutil.c
+++ b/src/calibre/utils/windows/winutil.c
@ -467,11 +467,11 @@ eject_drive_letter(WCHAR DriveLetter) {

    DeviceNumber = -1;

-    hVolume = CreateFile(szVolumeAccessPath, 0,
+    hVolume = CreateFileW(szVolumeAccessPath, 0,
                        FILE_SHARE_READ | FILE_SHARE_WRITE,
                        NULL, OPEN_EXISTING, 0, NULL);
    if (hVolume == INVALID_HANDLE_VALUE) {
-        PyErr_SetString(PyExc_ValueError, "Invalid handle value for drive letter");
+        PyErr_SetFromWindowsErr(0);
        return FALSE;
    }

@ -529,11 +529,17 @@ eject_drive_letter(WCHAR DriveLetter) {

 static PyObject *
 winutil_eject_drive(PyObject *self, PyObject *args) {
-    char DriveLetter;
+    char letter = '0';
+    WCHAR DriveLetter = L'0';

-    if (!PyArg_ParseTuple(args, "c", &DriveLetter)) return NULL;
+    if (!PyArg_ParseTuple(args, "c", &letter)) return NULL;

-    if (!eject_drive_letter((WCHAR)DriveLetter)) return NULL;
+    if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, &letter, 1, &DriveLetter, 1) == 0) {
+        PyErr_SetFromWindowsErr(0);
+        return NULL;
+    }
+
+    if (!eject_drive_letter(DriveLetter)) return NULL;
    Py_RETURN_NONE;
 }