Merge from trunk

2025-08-30 23:00:21 -04:00 · 2013-05-24 15:56:52 +02:00 · 2013-05-24 15:56:52 +02:00 · 00c1d4ea54
commit 00c1d4ea54
parent 6bcf21cd74 ea7292f6ed
125 changed files with 33062 additions and 27652 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,56 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.32
+  date: 2013-05-24
+
+  new features:
+    - title: "Show the number of currently selected books in the status bar at the bottom of the book list"
+
+    - title: "Driver for PocketBook Touch 623 and Yarvik tablet Xenta 13c"
+      tickets: [1182850, 1181669]
+
+    - title: "When editing dates such as published, allow pressing the minus key to clear the date and the = key to set the date to today."
+      tickets: [1181449]
+ 
+  bug fixes:
+    - title: "EPUB/AZW3 Output: Fix regression that caused erros when trying to convert documents that have URLs with invalid (non-utf-8) quoting."
+      tickets: [1181049]
+
+    - title: "When backing up metadata automatically remove XML invalid chars, instead of erroring out"
+
+    - title: "ebook-viewer: Fix --debug-javascript option causing an error when running from a binary build on os x and linux"
+
+    - title: "Fix switch library dialog and menu both popping up when clicking the library button in some window managers"
+
+    - title: "Apple driver: Fix a regression in 0.9.31 that could cause sending books to the device to hang"
+
+    - title: "When setting metadata using the edit metadata dialog, convert newlines, tabs etc. to normal spaces"
+      tickets: [1182268]
+
+    - title: "EPUB/AZW3 Output: Fix pages that contain only an svg image being regarded as empty and removed during splitting"
+
+    - title: "AZW3 Input: Handle files that use unnecessary svg: prefixes."
+      tickets: [1182257]
+
+    - title: "EPUB Input: Handle EPUB files that have no <metadata> section in their OPF."
+      tickets: [1181546]
+
+    - title: "Get Books: Fix Foyles UK store plugin."
+      tickets: [1181494]
+
+  improved recipes:
+    - Wall Street Journal
+    - Various Polish news sources
+    - Handelsblatt
+    - The Australian
+    - Las Vegas Review
+    - NME
+
+  new recipes:
+    - title: WirtschaftsWoche Online 
+      author: Hegi
+

 - version: 0.9.31
  date: 2013-05-17
--- a/manual/virtual_libraries.rst
+++ b/manual/virtual_libraries.rst
@ -57,6 +57,26 @@ library. The virtual library will then be created based on the search
 you just typed in. Searches are very powerful, for examples of the kinds 
 of things you can do with them, see :ref:`search_interface`. 

+Examples of useful Virtual Libraries
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+  * Books added to |app| in the last day::
+        date:>1daysago
+  * Books added to |app| in the last month::
+        date:>30daysago
+  * Books with a rating of 5 stars::
+        rating:5
+  * Books with a rating of at least 4 stars::
+        rating:>=4
+  * Books with no rating::
+        rating:false
+  * Periodicals downloaded by the Fetch News function in |app|::
+        tags:=News and author:=calibre
+  * Books with no tags::
+        tags:false
+  * Books with no covers::
+        cover:false
+
 Working with Virtual Libraries
 -------------------------------------

--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -1,47 +1,24 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-import re
 class Adventure_zone(BasicNewsRecipe):
    title          = u'Adventure Zone'
    __author__        = 'fenuks'
    description   = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.'
    category       = 'games'
    language       = 'pl'
+    BASEURL = 'http://www.adventure-zone.info/fusion/'
    no_stylesheets = True
+    extra_css = '.image {float: left; margin-right: 5px;}'
    oldest_article = 20
    max_articles_per_feed = 100
    cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
-    index = 'http://www.adventure-zone.info/fusion/'
+    remove_attributes = ['style']
    use_embedded_content = False
-    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
-    (re.compile(r'</?table.*?>'), lambda match: ''),
-    (re.compile(r'</?tbody.*?>'), lambda match: '')]
-    remove_tags_before = dict(name='td', attrs={'class':'main-bg'})
-    remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})]
-    remove_tags_after = dict(id='comments')
-    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}'
-    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
-
-    '''def get_cover_url(self):
-        soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
-        cover=soup.find(id='box_OstatninumerAZ')
-        self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
-        return getattr(self, 'cover_url', self.cover_url)'''
-
-    def populate_article_metadata(self, article, soup, first):
-        result = re.search('(.+) - Adventure Zone', soup.title.string)
-        if result:
-            result = result.group(1)
-        else:
-            result = soup.body.find('strong')
-            if result:
-                result = result.string
-        if result:
-            result = result.replace('&amp;', '&')
-            result = result.replace('&#39;', '’')
-            article.title = result
+    keep_only_tags = [dict(attrs={'class':'content'})]
+    remove_tags = [dict(attrs={'class':'footer'})]
+    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')]

    def skip_ad_pages(self, soup):
-        skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
+        skip_tag = soup.body.find(attrs={'class':'content'})
        skip_tag = skip_tag.findAll(name='a')
        title = soup.title.string.lower()
        if (('zapowied' in title) or ('recenzj' in title)  or ('solucj' in title) or ('poradnik' in title)):
@ -49,20 +26,10 @@ class Adventure_zone(BasicNewsRecipe):
                if r.strong and r.strong.string:
                   word=r.strong.string.lower()
                   if (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word) or ('poradnik' in word)):
-                       return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
+                       return self.index_to_soup(self.BASEURL+r['href'], raw=True)

    def preprocess_html(self, soup):
-        footer=soup.find(attrs={'class':'news-footer middle-border'})
-        r = soup.find(name='td', attrs={'class':'capmain'})
-        if r:
-            r.name='h1'
-        for item in soup.findAll(name=['tr', 'td']):
-            item.name='div'
-        if footer and len(footer('a'))>=2:
-            footer('a')[1].extract()
-        for item in soup.findAll(style=True):
-            del item['style']
-        for a in soup('a'):
-            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
-                a['href']=self.index + a['href']
+        for link in soup.findAll('a', href=True):
+            if not link['href'].startswith('http'):
+                link['href'] = self.BASEURL + link['href']
        return soup
--- a/recipes/astroflesz.recipe
+++ b/recipes/astroflesz.recipe
@ -13,6 +13,7 @@ class Astroflesz(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
+    remove_empty_feeds = True
    remove_attributes = ['style']
    keep_only_tags = [dict(id="k2Container")]
    remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -6,12 +6,10 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
                 2013, Tomasz Długosz, tomek3d@gmail.com'

 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ptempfile import PersistentTemporaryFile
-from datetime import date
 import re
+from lxml import html

 class GN(BasicNewsRecipe):
-        EDITION = 0

        __author__ = 'Piotr Kontek, Tomasz Długosz'
        title = u'Gość Niedzielny'
@ -20,83 +18,23 @@ class GN(BasicNewsRecipe):
        no_stylesheets = True
        language = 'pl'
        remove_javascript = True
-        temp_files = []

-        articles_are_obfuscated = True
+        def find_last_issue(self):
+            raw = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True)
+            doc = html.fromstring(raw)
+            page = doc.xpath('//div[@class="c"]//div[@class="search-result"]/div[1]/div[2]/h1//a/@href')

-        def get_obfuscated_article(self, url):
-            br = self.get_browser()
-            br.open(url)
-            source = br.response().read()
-            page = self.index_to_soup(source)
-
-            main_section = page.find('div',attrs={'class':'txt doc_prnt_prv'})
-
-            title = main_section.find('h2')
-            info = main_section.find('div', attrs={'class' : 'cf doc_info'})
-            authors = info.find(attrs={'class':'l'})
-            article = str(main_section.find('p', attrs={'class' : 'doc_lead'}))
-            first = True
-            for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
-                if first and p.find('img') != None:
-                    article += '<p>'
-                    article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
-                    article += '<font size="-2">'
-                    for s in p.findAll('span'):
-                        article += self.tag_to_string(s)
-                    article += '</font></p>'
-                else:
-                    article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
-                first = False
-            limiter = main_section.find('p', attrs={'class' : 'limiter'})
-            if limiter:
-                article += str(limiter)
-
-            html = unicode(title)
-            #sometimes authors are not filled in:
-            if authors:
-                html += unicode(authors) + unicode(article)
-            else:
-                html += unicode(article)
-
-            self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
-            self.temp_files[-1].write(html)
-            self.temp_files[-1].close()
-            return self.temp_files[-1].name
-
-        def find_last_issue(self, year):
-                soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
-
-                #szukam zdjęcia i linka do poprzedniego pełnego numeru
-                first = True
-                for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
-                    img = d.find('img')
-                    if img != None:
-                        a = img.parent
-                        self.EDITION = a['href']
-                        #this was preventing kindles from moving old issues to 'Back Issues'  category:
-                        #self.title = img['alt']
-                        self.cover_url = 'http://www.gosc.pl' + img['src']
-                        if year != date.today().year or not first:
-                            break
-                        first = False
+            return page[1]

        def parse_index(self):
-                year = date.today().year
-                self.find_last_issue(year)
-                ##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
-                if self.EDITION == 0:
-                	self.find_last_issue(year-1)
-                soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
+                soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue())
                feeds = []
                #wstepniak
                a = soup.find('div',attrs={'class':'release-wp-b'}).find('a')
                articles = [
                            {'title' : self.tag_to_string(a),
-                             'url'   : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/'),
-                             'date'  : '',
-                             'description' : ''}
-                            ]
+                             'url'   : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/')
+                            }]
                feeds.append((u'Wstępniak',articles))
                #kategorie
                for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
@ -113,16 +51,46 @@ class GN(BasicNewsRecipe):
 						art = a.find('a')
 						yield {
                                'title' : self.tag_to_string(art),
-                                'url'   : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
-                                'date'  : '',
-                                'description' : ''
+                                'url'   : 'http://www.gosc.pl' + art['href']
                                }
                for a in main_block.findAll('div', attrs={'class':'sr-document'}):
 						art = a.find('a')
 						yield {
                                'title' : self.tag_to_string(art),
-                                'url'   : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
-                                'date'  : '',
-                                'description' : ''
+                                'url'   : 'http://www.gosc.pl' + art['href']
                                }

+        def append_page(self, soup, appendtag):
+            chpage= appendtag.find(attrs={'class':'pgr_nrs'})
+            if chpage:
+                for page in chpage.findAll('a'):
+                    soup2 = self.index_to_soup('http://gosc.pl' + page['href'])
+                    pagetext = soup2.find(attrs={'class':'intextAd'})
+                    pos = len(appendtag.contents)
+                    appendtag.insert(pos, pagetext)
+
+        def preprocess_html(self, soup):
+            self.append_page(soup, soup.body)
+            '''
+            for image_div in soup.findAll(attrs={'class':'doc_image'}):
+                link =
+                if 'm.jpg' in image['src']:
+                    image['src'] = image['src'].replace('m.jpg', '.jpg')
+            '''
+            return soup
+
+        keep_only_tags = [
+            dict(name='div', attrs={'class':'cf txt'})
+        ]
+
+        remove_tags = [
+            dict(name='p', attrs={'class':['r tr', 'l l-2', 'wykop']}),
+            dict(name='div', attrs={'class':['doc_actions', 'pgr', 'fr1_cl']}),
+            dict(name='div', attrs={'id':'vote'})
+        ]
+
+        extra_css = '''
+            h1 {font-size:150%}
+            div#doc_image {font-style:italic; font-size:70%}
+            p.limiter {font-size:150%; font-weight: bold}
+        '''
--- a/recipes/handelsblatt.recipe
+++ b/recipes/handelsblatt.recipe
@ -1,16 +1,61 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class Handelsblatt(BasicNewsRecipe):
    title          = u'Handelsblatt'
-    __author__ = 'malfi'
-    oldest_article = 7
+    __author__ = 'malfi'  # modified by Hegi, last change 2013-05-20
+    description           = u'Handelsblatt - basierend auf den RRS-Feeds von Handelsblatt.de'
+    tags 	                = 'Nachrichten, Blog, Wirtschaft'
+    publisher             = 'Verlagsgruppe Handelsblatt GmbH'
+    category              = 'business, economy, news, Germany'
+    publication_type      = 'daily newspaper'
+    language              = 'de_DE'
+    oldest_article        = 7
    max_articles_per_feed = 100
-    no_stylesheets = True
-#    cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
-    language = 'de'
+    simultaneous_downloads= 20

-    remove_tags_before =  dict(attrs={'class':'hcf-overline'})
-    remove_tags_after  =  dict(attrs={'class':'hcf-footer'})
+    auto_cleanup          = False
+    no_stylesheets        = True
+    remove_javascript     = True
+    remove_empty_feeds    = True
+
+    # don't duplicate articles from "Schlagzeilen" / "Exklusiv" to other rubrics
+    ignore_duplicate_articles = {'title', 'url'}
+
+    # if you want to reduce size for an b/w or E-ink device, uncomment this:
+    # compress_news_images  = True
+    # compress_news_images_auto_size = 16
+    # scale_news_images     = (400,300)
+
+    timefmt               = ' [%a, %d %b %Y]'
+
+    conversion_options    = {'smarten_punctuation' : True,
+                        'authors'		  : publisher,
+                        'publisher'  	  : publisher}
+    language              = 'de_DE'
+    encoding              = 'UTF-8'
+
+    cover_source          = 'http://www.handelsblatt-shop.com/epaper/482/'
+    # masthead_url          = 'http://www.handelsblatt.com/images/hb_logo/6543086/1-format3.jpg'
+    masthead_url          = 'http://www.handelsblatt-chemie.de/wp-content/uploads/2012/01/hb-logo.gif'
+
+    def get_cover_url(self):
+        cover_source_soup = self.index_to_soup(self.cover_source)
+        preview_image_div = cover_source_soup.find(attrs={'class':'vorschau'})
+        return 'http://www.handelsblatt-shop.com'+preview_image_div.a.img['src']
+
+    # remove_tags_before =  dict(attrs={'class':'hcf-overline'})
+    # remove_tags_after  =  dict(attrs={'class':'hcf-footer'})
+    # Alternatively use this:
+
+    keep_only_tags    = [
+                          dict(name='div', attrs={'class':['hcf-column hcf-column1 hcf-teasercontainer hcf-maincol']}),
+                          dict(name='div', attrs={'id':['contentMain']})
+                        ]
+
+    remove_tags = [
+                    dict(name='div', attrs={'class':['hcf-link-block hcf-faq-open', 'hcf-article-related']})
+                  ]

    feeds          = [
                        (u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
@ -25,15 +70,19 @@ class Handelsblatt(BasicNewsRecipe):
                        (u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
                     ]

-    extra_css = '''
-        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
-        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
-        '''
+    # Insert ". " after "Place" in <span class="hcf-location-mark">Place</span>
+    # If you use .epub format you could also do this as extra_css '.hcf-location-mark:after {content: ". "}'
+    preprocess_regexps    = [(re.compile(r'(<span class="hcf-location-mark">[^<]*)(</span>)',
+                              re.DOTALL|re.IGNORECASE), lambda match: match.group(1) + '. ' + match.group(2))]
+
+    extra_css      =  'h1 {font-size: 1.6em; text-align: left} \
+                       h2 {font-size: 1em; font-style: italic; font-weight: normal} \
+                       h3 {font-size: 1.3em;text-align: left} \
+                       h4, h5, h6, a {font-size: 1em;text-align: left} \
+                       .hcf-caption {font-size: 1em;text-align: left; font-style: italic} \
+                       .hcf-location-mark {font-style: italic}'

    def print_version(self, url):
-        url = url.split('/')
-        url[-1] = 'v_detail_tab_print,'+url[-1]
-        url = '/'.join(url)
-        return url
+        main, sep, id = url.rpartition('/')
+        return main + '/v_detail_tab_print/' + id
+
--- a/recipes/histmag.recipe
+++ b/recipes/histmag.recipe
@ -13,11 +13,12 @@ class Histmag(BasicNewsRecipe):
    __author__ = 'matek09'
    description = u"Artykuly historyczne i publicystyczne"
    encoding = 'utf-8'
+    extra_css = '''.center img {display: block;}'''
    #preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),(re.compile(r'<span>'), lambda match: '<br><br><span>')]
    no_stylesheets = True
    language = 'pl'
    remove_javascript = True
    keep_only_tags=[dict(id='article')]
-    remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'})]
+    remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'}), dict(attrs={'class':'twitter-share-button'})]

    feeds          = [(u'Wszystkie', u'http://histmag.org/rss/wszystkie.xml'), (u'Wydarzenia', u'http://histmag.org/rss/wydarzenia.xml'), (u'Recenzje', u'http://histmag.org/rss/recenzje.xml'), (u'Artykuły historyczne', u'http://histmag.org/rss/historia.xml'), (u'Publicystyka', u'http://histmag.org/rss/publicystyka.xml')]
--- a/recipes/icons/geopolityka.png
+++ b/recipes/icons/geopolityka.png
--- a/recipes/icons/gs24_pl.png
+++ b/recipes/icons/gs24_pl.png
--- a/recipes/icons/homopedia_pl.png
+++ b/recipes/icons/homopedia_pl.png
--- a/recipes/icons/pc_lab.png
+++ b/recipes/icons/pc_lab.png
--- a/recipes/icons/polityka.png
+++ b/recipes/icons/polityka.png
--- a/recipes/icons/rynek_zdrowia.png
+++ b/recipes/icons/rynek_zdrowia.png
--- a/recipes/osnews_pl.recipe
+++ b/recipes/osnews_pl.recipe
@ -20,7 +20,7 @@ class OSNewsRecipe(BasicNewsRecipe):
    remove_javascript = True
    encoding = 'utf-8'
    use_embedded_content = False;
-
+    remove_empty_feeds = True
    oldest_article = 7
    max_articles_per_feed = 100
    cover_url='http://osnews.pl/wp-content/themes/osnews/img/logo.png'
@ -31,22 +31,18 @@ class OSNewsRecipe(BasicNewsRecipe):
    '''

    feeds = [
-        (u'OSNews.pl', u'http://feeds.feedburner.com/OSnewspl')
+        (u'Niusy', u'http://feeds.feedburner.com/OSnewspl'),
+        (u'Wylęgarnia', u'http://feeds.feedburner.com/osnewspl_nowe')
    ]

    keep_only_tags = [
-        dict(name = 'a', attrs = {'class' : 'news-heading'}),
-        dict(name = 'div', attrs = {'class' : 'newsinformations'}),
-        dict(name = 'div', attrs = {'id' : 'news-content'})
+        dict(name = 'div', attrs = {'id' : 'content'})
    ]

    remove_tags = [
-        dict(name = 'div', attrs = {'class' : 'sociable'}),
-        dict(name = 'div', attrs = {'class' : 'post_prev'}),
-        dict(name = 'div', attrs = {'class' : 'post_next'}),
-        dict(name = 'div', attrs = {'class' : 'clr'}),
-        dict(name = 'div', attrs = {'class' : 'tw_button'}),
-        dict(name = 'div', attrs = {'style' : 'width:56px;height:60px;float:left;margin-right:10px'})
+        dict(name = 'div', attrs = {'class' : ['newstags', 'tw_button', 'post_prev']}),
+        dict(name = 'div', attrs = {'id' : 'newspage_upinfo'}),
    ]

-    preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span')]
+    remove_tags_after = dict(name = 'div', attrs = {'class' : 'post_prev'})
+    preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span'), (re.compile(u'<iframe.+?</iframe>'), lambda match: '')]
--- a/recipes/wirtscafts_woche.recipe
+++ b/recipes/wirtscafts_woche.recipe
@ -0,0 +1,86 @@
+__license__   = 'GPL v3'
+__copyright__ = '2013, Armin Geller'
+
+'''
+Fetch WirtschaftsWoche Online
+'''
+import re
+# import time
+from calibre.web.feeds.news import BasicNewsRecipe
+class WirtschaftsWocheOnline(BasicNewsRecipe):
+    title                 = u'WirtschaftsWoche Online'
+    __author__            = 'Hegi'  # Update AGE 2013-01-05; Modified by Hegi 2013-04-28
+    description           = u'Wirtschaftswoche Online - basierend auf den RRS-Feeds von Wiwo.de'
+    tags 	                = 'Nachrichten, Blog, Wirtschaft'
+    publisher             = 'Verlagsgruppe Handelsblatt GmbH / Redaktion WirtschaftsWoche Online'
+    category              = 'business, economy, news, Germany'
+    publication_type      = 'weekly magazine'
+    language              = 'de'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    simultaneous_downloads= 20
+
+    auto_cleanup          = False
+    no_stylesheets        = True
+    remove_javascript     = True
+    remove_empty_feeds    = True
+
+    # don't duplicate articles from "Schlagzeilen" / "Exklusiv" to other rubrics
+    ignore_duplicate_articles = {'title', 'url'}
+
+    # if you want to reduce size for an b/w or E-ink device, uncomment this:
+    # compress_news_images  = True
+    # compress_news_images_auto_size = 16
+    # scale_news_images     = (400,300)
+
+    timefmt               = ' [%a, %d %b %Y]'
+
+    conversion_options    = {'smarten_punctuation' : True,
+                        'authors'		  : publisher,
+                        'publisher'  	  : publisher}
+    language              = 'de_DE'
+    encoding              = 'UTF-8'
+    cover_source          = 'http://www.wiwo-shop.de/wirtschaftswoche/wirtschaftswoche-emagazin-p1952.html'
+    masthead_url          = 'http://www.wiwo.de/images/wiwo_logo/5748610/1-formatOriginal.png'
+
+    def get_cover_url(self):
+        cover_source_soup = self.index_to_soup(self.cover_source)
+        preview_image_div = cover_source_soup.find(attrs={'class':'container vorschau'})
+        return 'http://www.wiwo-shop.de'+preview_image_div.a.img['src']
+
+    # Insert ". " after "Place" in <span class="hcf-location-mark">Place</span>
+    # If you use .epub format you could also do this as extra_css '.hcf-location-mark:after {content: ". "}'
+    preprocess_regexps    = [(re.compile(r'(<span class="hcf-location-mark">[^<]*)(</span>)',
+                              re.DOTALL|re.IGNORECASE), lambda match: match.group(1) + '. ' + match.group(2))]
+
+    extra_css      =  'h1 {font-size: 1.6em; text-align: left} \
+                       h2 {font-size: 1em; font-style: italic; font-weight: normal} \
+                       h3 {font-size: 1.3em;text-align: left} \
+                       h4, h5, h6, a {font-size: 1em;text-align: left} \
+                       .hcf-caption {font-size: 1em;text-align: left; font-style: italic} \
+                       .hcf-location-mark {font-style: italic}'
+
+    keep_only_tags    = [
+                          dict(name='div', attrs={'class':['hcf-column hcf-column1 hcf-teasercontainer hcf-maincol']}),
+                          dict(name='div', attrs={'id':['contentMain']})
+                        ]
+
+    remove_tags = [
+                    dict(name='div', attrs={'class':['hcf-link-block hcf-faq-open', 'hcf-article-related']})
+                  ]
+
+    feeds = [
+              (u'Schlagzeilen', u'http://www.wiwo.de/contentexport/feed/rss/schlagzeilen'),
+              (u'Exklusiv', u'http://www.wiwo.de/contentexport/feed/rss/exklusiv'),
+              # (u'Themen', u'http://www.wiwo.de/contentexport/feed/rss/themen'), # AGE no print version
+              (u'Unternehmen', u'http://www.wiwo.de/contentexport/feed/rss/unternehmen'),
+              (u'Finanzen', u'http://www.wiwo.de/contentexport/feed/rss/finanzen'),
+              (u'Politik', u'http://www.wiwo.de/contentexport/feed/rss/politik'),
+              (u'Erfolg', u'http://www.wiwo.de/contentexport/feed/rss/erfolg'),
+              (u'Technologie', u'http://www.wiwo.de/contentexport/feed/rss/technologie'),
+              # (u'Green-WiWo', u'http://green.wiwo.de/feed/rss/') # AGE no print version
+            ]
+    def print_version(self, url):
+        main, sep, id = url.rpartition('/')
+        return main + '/v_detail_tab_print/' + id
+
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@ -112,7 +112,7 @@ class WallStreetJournal(BasicNewsRecipe):
        if date is not None:
            self.timefmt = ' [%s]'%self.tag_to_string(date)

-        cov = soup.find('div', attrs={'class':'itpSectionHeaderPdf'})
+        cov = soup.find('div', attrs={'class':lambda x: x and 'itpSectionHeaderPdf' in x.split()})
        if cov is not None:
            a = cov.find('a', href=True)
            if a is not None:
--- a/setup/iso_639/ru.po
+++ b/setup/iso_639/ru.po
@ -13,14 +13,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-03-23 10:17+0000\n"
+"PO-Revision-Date: 2013-05-21 06:13+0000\n"
 "Last-Translator: Глория Хрусталёва <gloriya@hushmail.com>\n"
 "Language-Team: Russian <debian-l10n-russian@lists.debian.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-03-24 04:45+0000\n"
-"X-Generator: Launchpad (build 16540)\n"
+"X-Launchpad-Export-Date: 2013-05-22 04:38+0000\n"
+"X-Generator: Launchpad (build 16626)\n"
 "Language: ru\n"

 #. name for aaa
@ -5361,7 +5361,7 @@ msgstr ""

 #. name for coa
 msgid "Malay; Cocos Islands"
-msgstr ""
+msgstr "Малайский; Кокосовые острова"

 #. name for cob
 msgid "Chicomuceltec"
--- a/setup/iso_639/sv.po
+++ b/setup/iso_639/sv.po
@ -30,14 +30,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-05-13 05:58+0000\n"
+"PO-Revision-Date: 2013-05-19 09:23+0000\n"
 "Last-Translator: Merarom <Unknown>\n"
 "Language-Team: Swedish <sv@li.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-05-14 05:30+0000\n"
-"X-Generator: Launchpad (build 16617)\n"
+"X-Launchpad-Export-Date: 2013-05-20 05:34+0000\n"
+"X-Generator: Launchpad (build 16626)\n"
 "Language: sv\n"

 #. name for aaa
@ -4582,35 +4582,35 @@ msgstr ""

 #. name for bzl
 msgid "Boano (Sulawesi)"
-msgstr ""
+msgstr "Boano (Sulawesi/Cebeles)"

 #. name for bzm
 msgid "Bolondo"
-msgstr ""
+msgstr "Bolondo"

 #. name for bzn
 msgid "Boano (Maluku)"
-msgstr ""
+msgstr "Boano (Maluku)"

 #. name for bzo
 msgid "Bozaba"
-msgstr ""
+msgstr "Bozaba"

 #. name for bzp
 msgid "Kemberano"
-msgstr ""
+msgstr "Kemberano"

 #. name for bzq
 msgid "Buli (Indonesia)"
-msgstr ""
+msgstr "Buli (Indonesien)"

 #. name for bzr
 msgid "Biri"
-msgstr ""
+msgstr "Biri"

 #. name for bzs
 msgid "Brazilian Sign Language"
-msgstr ""
+msgstr "Brasilianskt teckenspråk"

 #. name for bzt
 msgid "Brithenig"
@ -4618,39 +4618,39 @@ msgstr ""

 #. name for bzu
 msgid "Burmeso"
-msgstr ""
+msgstr "Burmanska"

 #. name for bzv
 msgid "Bebe"
-msgstr ""
+msgstr "Bebe"

 #. name for bzw
 msgid "Basa (Nigeria)"
-msgstr ""
+msgstr "Basa (Nigeria)"

 #. name for bzx
 msgid "Bozo; Kɛlɛngaxo"
-msgstr ""
+msgstr "Bozo; (Mali)"

 #. name for bzy
 msgid "Obanliku"
-msgstr ""
+msgstr "Obanliku"

 #. name for bzz
 msgid "Evant"
-msgstr ""
+msgstr "Evant"

 #. name for caa
 msgid "Chortí"
-msgstr ""
+msgstr "Chortí"

 #. name for cab
 msgid "Garifuna"
-msgstr ""
+msgstr "Garifuna"

 #. name for cac
 msgid "Chuj"
-msgstr ""
+msgstr "Chuj"

 #. name for cad
 msgid "Caddo"
@ -4658,59 +4658,59 @@ msgstr "Caddo"

 #. name for cae
 msgid "Lehar"
-msgstr ""
+msgstr "Lezginska"

 #. name for caf
 msgid "Carrier; Southern"
-msgstr ""
+msgstr "Carrier; södra"

 #. name for cag
 msgid "Nivaclé"
-msgstr ""
+msgstr "Nivaclé"

 #. name for cah
 msgid "Cahuarano"
-msgstr ""
+msgstr "Cahuarano; Peru"

 #. name for caj
 msgid "Chané"
-msgstr ""
+msgstr "Chané"

 #. name for cak
 msgid "Kaqchikel"
-msgstr ""
+msgstr "Kaqchikel"

 #. name for cal
 msgid "Carolinian"
-msgstr ""
+msgstr "Carolinian"

 #. name for cam
 msgid "Cemuhî"
-msgstr ""
+msgstr "Cemuhî"

 #. name for can
 msgid "Chambri"
-msgstr ""
+msgstr "Chambri"

 #. name for cao
 msgid "Chácobo"
-msgstr ""
+msgstr "Chácobo"

 #. name for cap
 msgid "Chipaya"
-msgstr ""
+msgstr "Chipaya"

 #. name for caq
 msgid "Nicobarese; Car"
-msgstr ""
+msgstr "Nicobarese; Car"

 #. name for car
 msgid "Carib; Galibi"
-msgstr ""
+msgstr "Carib; Galibi"

 #. name for cas
 msgid "Tsimané"
-msgstr ""
+msgstr "Tsimshian; Britiska Columbia"

 #. name for cat
 msgid "Catalan"
@ -4718,15 +4718,15 @@ msgstr "Katalanska"

 #. name for cav
 msgid "Cavineña"
-msgstr ""
+msgstr "Cavineña"

 #. name for caw
 msgid "Callawalla"
-msgstr ""
+msgstr "Callawalla; Bolivia"

 #. name for cax
 msgid "Chiquitano"
-msgstr ""
+msgstr "Chiquitano; Bolivia"

 #. name for cay
 msgid "Cayuga"
@ -4734,115 +4734,115 @@ msgstr ""

 #. name for caz
 msgid "Canichana"
-msgstr ""
+msgstr "Canichana"

 #. name for cbb
 msgid "Cabiyarí"
-msgstr ""
+msgstr "Cabiyarí"

 #. name for cbc
 msgid "Carapana"
-msgstr ""
+msgstr "Carapana; Colombia & Brasilien"

 #. name for cbd
 msgid "Carijona"
-msgstr ""
+msgstr "Carijona"

 #. name for cbe
 msgid "Chipiajes"
-msgstr ""
+msgstr "Chipiajes"

 #. name for cbg
 msgid "Chimila"
-msgstr ""
+msgstr "Chimila"

 #. name for cbh
 msgid "Cagua"
-msgstr ""
+msgstr "Cagua;Venezuela"

 #. name for cbi
 msgid "Chachi"
-msgstr ""
+msgstr "Chachi; Ecuador"

 #. name for cbj
 msgid "Ede Cabe"
-msgstr ""
+msgstr "Ede Cabe"

 #. name for cbk
 msgid "Chavacano"
-msgstr ""
+msgstr "Chavacano; Filippinerna"

 #. name for cbl
 msgid "Chin; Bualkhaw"
-msgstr ""
+msgstr "Chin; Bualkhaw"

 #. name for cbn
 msgid "Nyahkur"
-msgstr ""
+msgstr "Nyahkur;Australien"

 #. name for cbo
 msgid "Izora"
-msgstr ""
+msgstr "Izora"

 #. name for cbr
 msgid "Cashibo-Cacataibo"
-msgstr ""
+msgstr "Cashibo-Cacataibo;Peru"

 #. name for cbs
 msgid "Cashinahua"
-msgstr ""
+msgstr "Cashinahua;Peru"

 #. name for cbt
 msgid "Chayahuita"
-msgstr ""
+msgstr "Chayahuita;Peru"

 #. name for cbu
 msgid "Candoshi-Shapra"
-msgstr ""
+msgstr "Candoshi-Shapra;Peru"

 #. name for cbv
 msgid "Cacua"
-msgstr ""
+msgstr "Cacua;Colombia"

 #. name for cbw
 msgid "Kinabalian"
-msgstr ""
+msgstr "Kinabalian;sydöstra Filippinerna"

 #. name for cby
 msgid "Carabayo"
-msgstr ""
+msgstr "Carabayo;Colombia"

 #. name for cca
 msgid "Cauca"
-msgstr ""
+msgstr "Cauca;Colombia & Panama"

 #. name for ccc
 msgid "Chamicuro"
-msgstr ""
+msgstr "Chamicuro;Peru"

 #. name for ccd
 msgid "Creole; Cafundo"
-msgstr ""
+msgstr "Creole; Cafundo; Brasilien"

 #. name for cce
 msgid "Chopi"
-msgstr ""
+msgstr "Chopi;Moçambique"

 #. name for ccg
 msgid "Daka; Samba"
-msgstr ""
+msgstr "Daka; Samba, Nigeria"

 #. name for cch
 msgid "Atsam"
-msgstr ""
+msgstr "Atsam"

 #. name for ccj
 msgid "Kasanga"
-msgstr ""
+msgstr "Kasanga"

 #. name for ccl
 msgid "Cutchi-Swahili"
-msgstr ""
+msgstr "Cutchi-Swahili"

 #. name for ccm
 msgid "Creole Malay; Malaccan"
@ -4850,75 +4850,75 @@ msgstr ""

 #. name for cco
 msgid "Chinantec; Comaltepec"
-msgstr ""
+msgstr "Chinantec; Comaltepec"

 #. name for ccp
 msgid "Chakma"
-msgstr ""
+msgstr "Chakma"

 #. name for ccq
 msgid "Chaungtha"
-msgstr ""
+msgstr "Chaungtha"

 #. name for ccr
 msgid "Cacaopera"
-msgstr ""
+msgstr "Cacaopera"

 #. name for cda
 msgid "Choni"
-msgstr ""
+msgstr "Choni"

 #. name for cde
 msgid "Chenchu"
-msgstr ""
+msgstr "Chenchu"

 #. name for cdf
 msgid "Chiru"
-msgstr ""
+msgstr "Chiru"

 #. name for cdg
 msgid "Chamari"
-msgstr ""
+msgstr "Chamari"

 #. name for cdh
 msgid "Chambeali"
-msgstr ""
+msgstr "Chambeali"

 #. name for cdi
 msgid "Chodri"
-msgstr ""
+msgstr "Chodri"

 #. name for cdj
 msgid "Churahi"
-msgstr ""
+msgstr "Churahi"

 #. name for cdm
 msgid "Chepang"
-msgstr ""
+msgstr "Chepang"

 #. name for cdn
 msgid "Chaudangsi"
-msgstr ""
+msgstr "Chaudangsi"

 #. name for cdo
 msgid "Chinese; Min Dong"
-msgstr ""
+msgstr "Kinesiska; Min Dong"

 #. name for cdr
 msgid "Cinda-Regi-Tiyal"
-msgstr ""
+msgstr "Cinda-Regi-Tiyal"

 #. name for cds
 msgid "Chadian Sign Language"
-msgstr ""
+msgstr "Chadian teckenspråk"

 #. name for cdy
 msgid "Chadong"
-msgstr ""
+msgstr "Chadong"

 #. name for cdz
 msgid "Koda"
-msgstr ""
+msgstr "Koda"

 #. name for cea
 msgid "Chehalis; Lower"
@ -4930,11 +4930,11 @@ msgstr "Cebuano"

 #. name for ceg
 msgid "Chamacoco"
-msgstr ""
+msgstr "Chamacoco"

 #. name for cen
 msgid "Cen"
-msgstr ""
+msgstr "Cen"

 #. name for ces
 msgid "Czech"
@ -4942,7 +4942,7 @@ msgstr "Tjeckiska"

 #. name for cet
 msgid "Centúúm"
-msgstr ""
+msgstr "Centúúm"

 #. name for cfa
 msgid "Dijim-Bwilim"
@ -4950,31 +4950,31 @@ msgstr ""

 #. name for cfd
 msgid "Cara"
-msgstr ""
+msgstr "Cara"

 #. name for cfg
 msgid "Como Karim"
-msgstr ""
+msgstr "Como Karim"

 #. name for cfm
 msgid "Chin; Falam"
-msgstr ""
+msgstr "Chin; Falam"

 #. name for cga
 msgid "Changriwa"
-msgstr ""
+msgstr "Changriwa"

 #. name for cgc
 msgid "Kagayanen"
-msgstr ""
+msgstr "Kagayanen"

 #. name for cgg
 msgid "Chiga"
-msgstr ""
+msgstr "Chiga"

 #. name for cgk
 msgid "Chocangacakha"
-msgstr ""
+msgstr "Chocangacakha; Butan"

 #. name for cha
 msgid "Chamorro"
@ -4986,11 +4986,11 @@ msgstr "Chibcha"

 #. name for chc
 msgid "Catawba"
-msgstr ""
+msgstr "Catawba"

 #. name for chd
 msgid "Chontal; Highland Oaxaca"
-msgstr ""
+msgstr "Chontal; Highland Oaxaca; Mexico"

 #. name for che
 msgid "Chechen"
@ -4998,7 +4998,7 @@ msgstr "Tjetjenska"

 #. name for chf
 msgid "Chontal; Tabasco"
-msgstr ""
+msgstr "Chontal; Tabasco"

 #. name for chg
 msgid "Chagatai"
@ -5006,7 +5006,7 @@ msgstr "Chagatai"

 #. name for chh
 msgid "Chinook"
-msgstr ""
+msgstr "Chinook"

 #. name for chj
 msgid "Chinantec; Ojitlán"
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 31)
+numeric_version = (0, 9, 32)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -320,7 +320,7 @@ class ITUNES(DriverBase):
        self.verbose = self.settings().extra_customization[self.DEBUG_LOGGING]
        if self.verbose:
            logger().info("%s.__init__():" % self.__class__.__name__)
-            logger().info(" Debug logging enabled in iTunes plugin settings")
+            logger().info(" Debug logging enabled")

    @property
    def cache_dir(self):
@ -1288,7 +1288,7 @@ class ITUNES(DriverBase):
                        logger().error(" failed to add '%s' to Device|Books" % metadata.title)
                    raise UserFeedback("Unable to add '%s' in direct connect mode" % metadata.title,
                                        details=None, level=UserFeedback.ERROR)
-                self._wait_for_writable_metadata(added)
+                #self._wait_for_writable_metadata(added)
                return added

        elif iswindows:
@ -1471,6 +1471,7 @@ class ITUNES(DriverBase):

        if self.verbose:
            logger().info(" %s._cover_to_thumb()" % self.__class__.__name__)
+            #logger().info("db_added: %s  lb_added: %s" % (db_added, lb_added))

        thumb = None
        if metadata.cover:
@ -1514,13 +1515,13 @@ class ITUNES(DriverBase):
                    '''
                    if lb_added:
                        delay = 2.0
-                        self._wait_for_writable_metadata(db_added, delay=delay)

                        # Wait for updatable artwork
                        attempts = 9
                        while attempts:
                            try:
                                lb_added.artworks[1].data_.set(cover_data)
+                                break
                            except:
                                attempts -= 1
                                time.sleep(delay)
@ -3229,6 +3230,11 @@ class ITUNES(DriverBase):
        if self.verbose:
            logger().info(" %s._wait_for_writable_metadata()" % self.__class__.__name__)

+        if not db_added:
+            if self.verbose:
+                logger().info("called from %s() with null db_added" % sys._getframe(1).f_code.co_name)
+            return
+
        attempts = 9
        while attempts:
            try:
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -279,11 +279,11 @@ class POCKETBOOK602(USBMS):
 class POCKETBOOK622(POCKETBOOK602):

    name = 'PocketBook 622 Device Interface'
-    description    = _('Communicate with the PocketBook 622 reader.')
+    description    = _('Communicate with the PocketBook 622 and 623 readers.')
    EBOOK_DIR_MAIN = ''

    VENDOR_ID   = [0x0489]
-    PRODUCT_ID  = [0xe107]
+    PRODUCT_ID  = [0xe107, 0xcff1]
    BCD         = [0x0326]

    VENDOR_NAME = 'LINUX'
--- a/src/calibre/devices/idevice/libimobiledevice.py
+++ b/src/calibre/devices/idevice/libimobiledevice.py
@ -224,16 +224,19 @@ class libiMobileDevice():

    def copy_to_iDevice(self, src, dst):
        '''
-        High-level convenience method to copy src on local filesystem to
+        High-level convenience method to copy src from local filesystem to
        dst on iDevice.
+        Assumed to be a binary file (epub, sqlite, etc)
        src: file on local filesystem
        dst: file to be created on iOS filesystem
        '''
-        self._log_location("src='%s', dst='%s'" % (src, dst))
-        with open(src) as f:
+        self._log_location("src=%s, dst=%s" % (repr(src), repr(dst)))
+        mode = 'rb'
+        with open(src, mode) as f:
            content = bytearray(f.read())
+
        mode = 'wb'
-        handle = self._afc_file_open(dst, mode=mode)
+        handle = self._afc_file_open(str(dst), mode=mode)
        if handle is not None:
            success = self._afc_file_write(handle, content, mode=mode)
            if self.verbose:
@ -533,7 +536,7 @@ class libiMobileDevice():
        else:
            if self.verbose:
                self.log(" could not open file")
-            raise libiMobileDeviceIOException("could not open file '%s' for reading" % path)
+            raise libiMobileDeviceIOException("could not open file %s for reading" % repr(path))

        return data

@ -800,7 +803,7 @@ class libiMobileDevice():
         error:      (afc_error_t) AFC_E_SUCCESS (0) on success or AFC_E_* error value

        '''
-        self._log_location("'%s', mode='%s'" % (filename, mode))
+        self._log_location("%s, mode='%s'" % (repr(filename), mode))

        handle = c_ulonglong(0)

@ -1682,6 +1685,18 @@ class libiMobileDevice():
            raise libiMobileDeviceException(error_description)

    # ~~~ logging ~~~
+    def _log_diagnostic(self, msg=None):
+        '''
+        Print msg to console
+        '''
+        if not self.verbose:
+            return
+
+        if msg:
+            debug_print(" %s" % msg)
+        else:
+            debug_print()
+
    def _log_location(self, *args):
        '''
        '''
--- a/src/calibre/ebooks/docx/block_styles.py
+++ b/src/calibre/ebooks/docx/block_styles.py
@ -74,7 +74,7 @@ def read_border(parent, dest):

    for border in XPath('./w:pBdr')(parent):
        for edge in ('left', 'top', 'right', 'bottom'):
-            for elem in XPath('./w:%s' % edge):
+            for elem in XPath('./w:%s' % edge)(border):
                color = get(elem, 'w:color')
                if color is not None:
                    vals['border_%s_color' % edge] = simple_color(color)
@ -151,8 +151,8 @@ def read_spacing(parent, dest):

        l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
        if l is not None:
-            lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
-            line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
+            lh = simple_float(l, 0.05) if lr in {'exact', 'atLeast'} else simple_float(l, 1/240.0)
+            line_height = '%.3g%s' % (lh, 'pt' if lr in {'exact', 'atLeast'} else '')

    setattr(dest, 'margin_top', padding_top)
    setattr(dest, 'margin_bottom', padding_bottom)
@ -189,6 +189,89 @@ def read_numbering(parent, dest):
    val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
    setattr(dest, 'numbering', val)

+class Frame(object):
+
+    all_attributes = ('drop_cap', 'h', 'w', 'h_anchor', 'h_rule', 'v_anchor', 'wrap',
+                      'h_space', 'v_space', 'lines', 'x_align', 'y_align', 'x', 'y')
+
+    def __init__(self, fp):
+        self.drop_cap = get(fp, 'w:dropCap', 'none')
+        try:
+            self.h = int(get(fp, 'w:h'))/20
+        except (ValueError, TypeError):
+            self.h = 0
+        try:
+            self.w = int(get(fp, 'w:w'))/20
+        except (ValueError, TypeError):
+            self.w = None
+        try:
+            self.x = int(get(fp, 'w:x'))/20
+        except (ValueError, TypeError):
+            self.x = 0
+        try:
+            self.y = int(get(fp, 'w:y'))/20
+        except (ValueError, TypeError):
+            self.y = 0
+
+        self.h_anchor = get(fp, 'w:hAnchor', 'page')
+        self.h_rule = get(fp, 'w:hRule', 'auto')
+        self.v_anchor = get(fp, 'w:vAnchor', 'page')
+        self.wrap = get(fp, 'w:wrap', 'around')
+        self.x_align = get(fp, 'w:xAlign')
+        self.y_align = get(fp, 'w:yAlign')
+
+        try:
+            self.h_space = int(get(fp, 'w:hSpace'))/20
+        except (ValueError, TypeError):
+            self.h_space = 0
+        try:
+            self.v_space = int(get(fp, 'w:vSpace'))/20
+        except (ValueError, TypeError):
+            self.v_space = 0
+        try:
+            self.lines = int(get(fp, 'w:lines'))
+        except (ValueError, TypeError):
+            self.lines = 1
+
+    def css(self, page):
+        is_dropcap = self.drop_cap in {'drop', 'margin'}
+        ans = {'overflow': 'hidden'}
+
+        if is_dropcap:
+            ans['float'] = 'left'
+            ans['margin'] = '0'
+            ans['padding-right'] = '0.2em'
+        else:
+            if self.h_rule != 'auto':
+                t = 'min-height' if self.h_rule == 'atLeast' else 'height'
+                ans[t] = '%.3gpt' % self.h
+            if self.w is not None:
+                ans['width'] = '%.3gpt' % self.w
+            ans['padding-top'] = ans['padding-bottom'] = '%.3gpt' % self.v_space
+            if self.wrap not in {None, 'none'}:
+                ans['padding-left'] = ans['padding-right'] = '%.3gpt' % self.h_space
+                if self.x_align is None:
+                    fl = 'left' if self.x/page.width < 0.5 else 'right'
+                else:
+                    fl = 'right' if self.x_align == 'right' else 'left'
+                ans['float'] = fl
+        return ans
+
+    def __eq__(self, other):
+        for x in self.all_attributes:
+            if getattr(other, x, inherit) != getattr(self, x):
+                return False
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+def read_frame(parent, dest):
+    ans = inherit
+    for fp in XPath('./w:framePr')(parent):
+        ans = Frame(fp)
+    setattr(dest, 'frame', ans)
+
 # }}}

 class ParagraphStyle(object):
@ -208,7 +291,7 @@ class ParagraphStyle(object):

        # Misc.
        'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
-        'numbering', 'font_family', 'font_size',
+        'numbering', 'font_family', 'font_size', 'frame',
    )

    def __init__(self, pPr=None):
@ -225,7 +308,7 @@ class ParagraphStyle(object):
            ):
                setattr(self, p, binary_property(pPr, p))

-            for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'):
+            for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering', 'frame'):
                f = globals()['read_%s' % x]
                f(pPr, self)

@ -286,5 +369,3 @@ class ParagraphStyle(object):
        return self._css

        # TODO: keepNext must be done at markup level
-
-
--- a/src/calibre/ebooks/docx/container.py
+++ b/src/calibre/ebooks/docx/container.py
@ -11,7 +11,7 @@ import os, sys, shutil
 from lxml import etree

 from calibre import walk, guess_type
-from calibre.ebooks.metadata import string_to_authors
+from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.docx import InvalidDOCX
 from calibre.ebooks.docx.names import DOCUMENT, DOCPROPS, XPath, APPPROPS
@ -49,6 +49,7 @@ def read_doc_props(raw, mi):
            aut.extend(string_to_authors(author.text))
    if aut:
        mi.authors = aut
+        mi.author_sort = authors_to_sort_string(aut)

    desc = XPath('//dc:description')(root)
    if desc:
@ -181,7 +182,9 @@ class DOCX(object):
        else:
            root = fromstring(raw)
            for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
-                target = '/'.join((base, item.get('Target').lstrip('/')))
+                target = item.get('Target')
+                if item.get('TargetMode', None) != 'External':
+                    target = '/'.join((base, target.lstrip('/')))
                typ = item.get('Type')
                Id = item.get('Id')
                by_id[Id] = by_type[typ] = target
--- a/src/calibre/ebooks/docx/footnotes.py
+++ b/src/calibre/ebooks/docx/footnotes.py
@ -0,0 +1,62 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from collections import OrderedDict
+
+from calibre.ebooks.docx.names import get, XPath, descendants
+
+class Note(object):
+
+    def __init__(self, parent):
+        self.type = get(parent, 'w:type', 'normal')
+        self.parent = parent
+
+    def __iter__(self):
+        for p in descendants(self.parent, 'w:p'):
+            yield p
+
+class Footnotes(object):
+
+    def __init__(self):
+        self.footnotes = {}
+        self.endnotes = {}
+        self.counter = 0
+        self.notes = OrderedDict()
+
+    def __call__(self, footnotes, endnotes):
+        if footnotes is not None:
+            for footnote in XPath('./w:footnote[@w:id]')(footnotes):
+                fid = get(footnote, 'w:id')
+                if fid:
+                    self.footnotes[fid] = Note(footnote)
+
+        if endnotes is not None:
+            for endnote in XPath('./w:endnote[@w:id]')(endnotes):
+                fid = get(endnote, 'w:id')
+                if fid:
+                    self.endnotes[fid] = Note(endnote)
+
+    def get_ref(self, ref):
+        fid = get(ref, 'w:id')
+        notes = self.footnotes if ref.tag.endswith('}footnoteReference') else self.endnotes
+        note = notes.get(fid, None)
+        if note is not None and note.type == 'normal':
+            self.counter += 1
+            anchor = 'note_%d' % self.counter
+            self.notes[anchor] = (type('')(self.counter), note)
+            return anchor, type('')(self.counter)
+        return None, None
+
+    def __iter__(self):
+        for anchor, (counter, note) in self.notes.iteritems():
+            yield anchor, counter, note
+
+    @property
+    def has_notes(self):
+        return bool(self.notes)
+
--- a/src/calibre/ebooks/docx/images.py
+++ b/src/calibre/ebooks/docx/images.py
@ -0,0 +1,205 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import os
+
+from lxml.html.builder import IMG
+
+from calibre.ebooks.docx.names import XPath, get, barename
+from calibre.utils.filenames import ascii_filename
+from calibre.utils.imghdr import what
+
+def emu_to_pt(x):
+    return x / 12700
+
+def get_image_properties(parent):
+    width = height = None
+    for extent in XPath('./wp:extent')(parent):
+        try:
+            width = emu_to_pt(int(extent.get('cx')))
+        except (TypeError, ValueError):
+            pass
+        try:
+            height = emu_to_pt(int(extent.get('cy')))
+        except (TypeError, ValueError):
+            pass
+    ans = {}
+    if width is not None:
+        ans['width'] = '%.3gpt' % width
+    if height is not None:
+        ans['height'] = '%.3gpt' % height
+
+    alt = None
+    for docPr in XPath('./wp:docPr')(parent):
+        x = docPr.get('descr', None)
+        if x:
+            alt = x
+        if docPr.get('hidden', None) in {'true', 'on', '1'}:
+            ans['display'] = 'none'
+
+    return ans, alt
+
+
+def get_image_margins(elem):
+    ans = {}
+    for w, css in {'L':'left', 'T':'top', 'R':'right', 'B':'bottom'}.iteritems():
+        val = elem.get('dist%s' % w, None)
+        if val is not None:
+            try:
+                val = emu_to_pt(val)
+            except (TypeError, ValueError):
+                continue
+            ans['padding-%s' % css] = '%.3gpt' % val
+    return ans
+
+def get_hpos(anchor, page_width):
+    for ph in XPath('./wp:positionH')(anchor):
+        rp = ph.get('relativeFrom', None)
+        if rp == 'leftMargin':
+            return 0
+        if rp == 'rightMargin':
+            return 1
+        for align in XPath('./wp:align')(ph):
+            al = align.text
+            if al == 'left':
+                return 0
+            if al == 'center':
+                return 0.5
+            if al == 'right':
+                return 1
+        for po in XPath('./wp:posOffset')(ph):
+            try:
+                pos = emu_to_pt(int(po.text))
+            except (TypeError, ValueError):
+                continue
+            return pos/page_width
+
+    for sp in XPath('./wp:simplePos')(anchor):
+        try:
+            x = emu_to_pt(sp.get('x', None))
+        except (TypeError, ValueError):
+            continue
+        return x/page_width
+
+    return 0
+
+
+class Images(object):
+
+    def __init__(self):
+        self.rid_map = {}
+        self.used = {}
+        self.names = set()
+        self.all_images = set()
+
+    def __call__(self, relationships_by_id):
+        self.rid_map = relationships_by_id
+
+    def generate_filename(self, rid, base=None):
+        if rid in self.used:
+            return self.used[rid]
+        raw = self.docx.read(self.rid_map[rid])
+        base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_')
+        ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
+        base = base.rpartition('.')[0] + '.' + ext
+        exists = frozenset(self.used.itervalues())
+        c = 1
+        while base in exists:
+            n, e = base.rpartition('.')[0::2]
+            base = '%s-%d.%s' % (n, c, e)
+            c += 1
+        self.used[rid] = base
+        with open(os.path.join(self.dest_dir, base), 'wb') as f:
+            f.write(raw)
+        self.all_images.add('images/' + base)
+        return base
+
+    def pic_to_img(self, pic, alt=None):
+        name = None
+        for pr in XPath('descendant::pic:cNvPr')(pic):
+            name = pr.get('name', None)
+            if name:
+                name = ascii_filename(name).replace(' ', '_')
+            alt = pr.get('descr', None)
+            for a in XPath('descendant::a:blip[@r:embed]')(pic):
+                rid = get(a, 'r:embed')
+                if rid in self.rid_map:
+                    src = self.generate_filename(rid, name)
+                    img = IMG(src='images/%s' % src)
+                    if alt:
+                        img(alt=alt)
+                    return img
+
+    def drawing_to_html(self, drawing, page):
+        # First process the inline pictures
+        for inline in XPath('./wp:inline')(drawing):
+            style, alt = get_image_properties(inline)
+            for pic in XPath('descendant::pic:pic')(inline):
+                ans = self.pic_to_img(pic, alt)
+                if ans is not None:
+                    if style:
+                        ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
+                    yield ans
+
+        # Now process the floats
+        for anchor in XPath('./wp:anchor')(drawing):
+            style, alt = get_image_properties(anchor)
+            self.get_float_properties(anchor, style, page)
+            for pic in XPath('descendant::pic:pic')(anchor):
+                ans = self.pic_to_img(pic, alt)
+                if ans is not None:
+                    if style:
+                        ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
+                    yield ans
+
+    def get_float_properties(self, anchor, style, page):
+        if 'display' not in style:
+            style['display'] = 'block'
+        padding = get_image_margins(anchor)
+        width = float(style.get('width', '100pt')[:-2])
+
+        page_width = page.width - page.margin_left - page.margin_right
+
+        hpos = get_hpos(anchor, page_width) + width/(2*page_width)
+
+        wrap_elem = None
+        dofloat = False
+
+        for child in reversed(anchor):
+            bt = barename(child.tag)
+            if bt in {'wrapNone', 'wrapSquare', 'wrapThrough', 'wrapTight', 'wrapTopAndBottom'}:
+                wrap_elem = child
+                dofloat = bt not in {'wrapNone', 'wrapTopAndBottom'}
+                break
+
+        if wrap_elem is not None:
+            padding.update(get_image_margins(wrap_elem))
+            wt = wrap_elem.get('wrapText', None)
+            hpos = 0 if wt == 'right' else 1 if wt == 'left' else hpos
+            if dofloat:
+                style['float'] = 'left' if hpos < 0.65 else 'right'
+            else:
+                ml, mr = (None, None) if hpos < 0.34 else ('auto', None) if hpos > 0.65 else ('auto', 'auto')
+                if ml is not None:
+                    style['margin-left'] = ml
+                if mr is not None:
+                    style['margin-right'] = mr
+
+        style.update(padding)
+
+    def to_html(self, elem, page, docx, dest_dir):
+        dest = os.path.join(dest_dir, 'images')
+        if not os.path.exists(dest):
+            os.mkdir(dest)
+        self.dest_dir, self.docx = dest, docx
+        if elem.tag.endswith('}drawing'):
+            for tag in self.drawing_to_html(elem, page):
+                yield tag
+        # TODO: Handle w:pict
+
+
--- a/src/calibre/ebooks/docx/names.py
+++ b/src/calibre/ebooks/docx/names.py
@ -6,14 +6,23 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

+import re
+from future_builtins import map
+
 from lxml.etree import XPath as X

+from calibre.utils.filenames import ascii_text
+
 DOCUMENT  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
 DOCPROPS  = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
 APPPROPS  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
 STYLES    = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
 NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
 FONTS     = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
+IMAGES    = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'
+LINKS     = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink'
+FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes'
+ENDNOTES  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'

 namespaces = {
    'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
@ -65,7 +74,32 @@ def barename(x):
 def XML(x):
    return '{%s}%s' % (namespaces['xml'], x)

-def get(x, attr, default=None):
-    ns, name = attr.partition(':')[0::2]
-    return x.attrib.get('{%s}%s' % (namespaces[ns], name), default)
+def expand(name):
+    ns, tag = name.partition(':')[0::2]
+    if ns:
+        tag = '{%s}%s' % (namespaces[ns], tag)
+    return tag

+def get(x, attr, default=None):
+    return x.attrib.get(expand(attr), default)
+
+def ancestor(elem, name):
+    tag = expand(name)
+    while elem is not None:
+        elem = elem.getparent()
+        if getattr(elem, 'tag', None) == tag:
+            return elem
+
+def generate_anchor(name, existing):
+    x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_')
+    c = 1
+    while y in existing:
+        y = '%s_%d' % (x, c)
+        c += 1
+    return y
+
+def children(elem, *args):
+    return elem.iterchildren(*map(expand, args))
+
+def descendants(elem, *args):
+    return elem.iterdescendants(*map(expand, args))
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@ -13,6 +13,38 @@ from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
 from calibre.ebooks.docx.char_styles import RunStyle
 from calibre.ebooks.docx.names import XPath, get

+class PageProperties(object):
+
+    '''
+    Class representing page level properties (page size/margins) read from
+    sectPr elements.
+    '''
+
+    def __init__(self, elems=()):
+        self.width = self.height = 595.28, 841.89  # pts, A4
+        self.margin_left = self.margin_right = 72  # pts
+        for sectPr in elems:
+            for pgSz in XPath('./w:pgSz')(sectPr):
+                w, h = get(pgSz, 'w:w'), get(pgSz, 'w:h')
+                try:
+                    self.width = int(w)/20
+                except (ValueError, TypeError):
+                    pass
+                try:
+                    self.height = int(h)/20
+                except (ValueError, TypeError):
+                    pass
+            for pgMar in XPath('./w:pgMar')(sectPr):
+                l, r = get(pgMar, 'w:left'), get(pgMar, 'w:right')
+                try:
+                    self.margin_left = int(l)/20
+                except (ValueError, TypeError):
+                    pass
+                try:
+                    self.margin_right = int(r)/20
+                except (ValueError, TypeError):
+                    pass
+

 class Style(object):
    '''
@ -352,6 +384,19 @@ class Styles(object):
            p { text-indent: 1.5em }

            ul, ol, p { margin: 0; padding: 0 }
+
+            sup.noteref a { text-decoration: none }
+
+            h1.notes-header { page-break-before: always }
+
+            dl.notes dt { font-size: large }
+
+            dl.notes dt a { text-decoration: none }
+
+            dl.notes dd { page-break-after: always }
+
+            dl.notes dd:last-of-type { page-break-after: avoid }
+
            ''') % (self.body_font_family, self.body_font_size)
        if ef:
            prefix = ef + '\n' + prefix
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@ -7,17 +7,24 @@ __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

 import sys, os, re
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict

 from lxml import html
 from lxml.html.builder import (
-    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
+    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, SUP, A, DT, DL, DD, H1)

 from calibre.ebooks.docx.container import DOCX, fromstring
-from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS
-from calibre.ebooks.docx.styles import Styles, inherit
+from calibre.ebooks.docx.names import (
+    XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
+    descendants, ancestor, FOOTNOTES, ENDNOTES)
+from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
 from calibre.ebooks.docx.numbering import Numbering
 from calibre.ebooks.docx.fonts import Fonts
+from calibre.ebooks.docx.images import Images
+from calibre.ebooks.docx.footnotes import Footnotes
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1

 class Text:
@ -31,13 +38,15 @@ class Text:

 class Convert(object):

-    def __init__(self, path_or_stream, dest_dir=None, log=None):
+    def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
        self.docx = DOCX(path_or_stream, log=log)
        self.log = self.docx.log
+        self.notes_text = notes_text or _('Notes')
        self.dest_dir = dest_dir or os.getcwdu()
        self.mi = self.docx.metadata
        self.body = BODY()
        self.styles = Styles()
+        self.images = Images()
        self.object_map = OrderedDict()
        self.html = HTML(
            HEAD(
@ -64,12 +73,37 @@ class Convert(object):
        doc = self.docx.document
        relationships_by_id, relationships_by_type = self.docx.document_relationships
        self.read_styles(relationships_by_type)
+        self.images(relationships_by_id)
        self.layers = OrderedDict()
-        for wp in XPath('//w:p')(doc):
+        self.framed = [[]]
+        self.framed_map = {}
+        self.anchor_map = {}
+        self.link_map = defaultdict(list)
+
+        self.read_page_properties(doc)
+        for wp, page_properties in self.page_map.iteritems():
+            self.current_page = page_properties
            p = self.convert_p(wp)
            self.body.append(p)
+
+        notes_header = None
+        if self.footnotes.has_notes:
+            dl = DL()
+            dl.set('class', 'notes')
+            self.body.append(H1(self.notes_text))
+            notes_header = self.body[-1]
+            notes_header.set('class', 'notes-header')
+            self.body.append(dl)
+            for anchor, text, note in self.footnotes:
+                dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
+                dl[-1][0].tail = ']'
+                dl.append(DD())
+                for wp in note:
+                    p = self.convert_p(wp)
+                    dl[-1].append(p)
+
+        self.resolve_links(relationships_by_id)
        # TODO: tables <w:tbl> child of <w:body> (nested tables?)
-        # TODO: Last section properties <w:sectPr> child of <w:body>

        self.styles.cascade(self.layers)

@ -84,6 +118,7 @@ class Convert(object):
                    lvl = 0
                numbered.append((html_obj, num_id, lvl))
        self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
+        self.apply_frames()

        if len(self.body) > 0:
            self.body.text = '\n\t'
@ -100,7 +135,39 @@ class Convert(object):
                    cls = self.styles.class_name(css)
                    if cls:
                        html_obj.set('class', cls)
-        self.write()
+        for html_obj, css in self.framed_map.iteritems():
+            cls = self.styles.class_name(css)
+            if cls:
+                html_obj.set('class', cls)
+
+        if notes_header is not None:
+            for h in self.body.iterchildren('h1', 'h2', 'h3'):
+                notes_header.tag = h.tag
+                cls = h.get('class', None)
+                if cls and cls != 'notes-header':
+                    notes_header.set('class', '%s notes-header' % cls)
+                break
+
+        return self.write()
+
+    def read_page_properties(self, doc):
+        current = []
+        self.page_map = OrderedDict()
+
+        for p in descendants(doc, 'w:p'):
+            sect = tuple(descendants(p, 'w:sectPr'))
+            if sect:
+                pr = PageProperties(sect)
+                for x in current + [p]:
+                    self.page_map[x] = pr
+                current = []
+            else:
+                current.append(p)
+        if current:
+            last = XPath('./w:body/w:sectPr')(doc)
+            pr = PageProperties(last)
+            for x in current:
+                self.page_map[x] = pr

    def read_styles(self, relationships_by_type):

@ -109,16 +176,32 @@ class Convert(object):
            if name is None:
                cname = self.docx.document_name.split('/')
                cname[-1] = defname
-                if self.docx.exists(cname):
+                if self.docx.exists('/'.join(cname)):
                    name = name
            return name

        nname = get_name(NUMBERING, 'numbering.xml')
        sname = get_name(STYLES, 'styles.xml')
        fname = get_name(FONTS, 'fontTable.xml')
+        foname = get_name(FOOTNOTES, 'footnotes.xml')
+        enname = get_name(ENDNOTES, 'endnotes.xml')
        numbering = self.numbering = Numbering()
+        footnotes = self.footnotes = Footnotes()
        fonts = self.fonts = Fonts()

+        foraw = enraw = None
+        if foname is not None:
+            try:
+                foraw = self.docx.read(foname)
+            except KeyError:
+                self.log.warn('Footnotes %s do not exist' % foname)
+        if enname is not None:
+            try:
+                enraw = self.docx.read(enname)
+            except KeyError:
+                self.log.warn('Endnotes %s do not exist' % enname)
+        footnotes(fromstring(foraw) if foraw else None, fromstring(enraw) if enraw else None)
+
        if fname is not None:
            embed_relationships = self.docx.get_relationships(fname)[0]
            try:
@ -146,7 +229,48 @@ class Convert(object):

        self.styles.resolve_numbering(numbering)

+    def create_toc(self):
+        ' Create a TOC from headings in the document '
+        root = self.body
+        headings = ('h1', 'h2', 'h3')
+        tocroot = TOC()
+        xpaths = [XPath('//%s' % x) for x in headings]
+        level_prev = {i+1:None for i in xrange(len(xpaths))}
+        level_prev[0] = tocroot
+        level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)}
+        item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
+
+        self.idcount = 0
+
+        def ensure_id(elem):
+            ans = elem.get('id', None)
+            if not ans:
+                self.idcount += 1
+                ans = 'toc_id_%d' % self.idcount
+                elem.set('id', ans)
+            return ans
+
+        for item in root.iterdescendants(*headings):
+            lvl = plvl = item_level_map.get(item, None)
+            if lvl is None:
+                continue
+            parent = None
+            while parent is None:
+                plvl -= 1
+                parent = level_prev[plvl]
+            lvl = plvl + 1
+            elem_id = ensure_id(item)
+            text = elem_to_toc_text(item)
+            toc = parent.add_item('index.html', elem_id, text)
+            level_prev[lvl] = toc
+            for i in xrange(lvl+1, len(xpaths)+1):
+                level_prev[i] = None
+
+        if len(tuple(tocroot.flat())) > 1:
+            return tocroot
+
    def write(self):
+        toc = self.create_toc()
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
@ -155,19 +279,48 @@ class Convert(object):
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

+        opf = OPFCreator(self.dest_dir, self.mi)
+        opf.toc = toc
+        opf.create_manifest_from_files_in([self.dest_dir])
+        opf.create_spine(['index.html'])
+        with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
+            opf.render(of, ncx, 'toc.ncx')
+        return os.path.join(self.dest_dir, 'metadata.opf')
+
    def convert_p(self, p):
        dest = P()
        self.object_map[dest] = p
        style = self.styles.resolve_paragraph(p)
        self.layers[p] = []
-        for run in XPath('descendant::w:r')(p):
-            span = self.convert_run(run)
-            dest.append(span)
-            self.layers[p].append(run)
+        self.add_frame(dest, style.frame)
+
+        current_anchor = None
+        current_hyperlink = None
+
+        for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'):
+            if x.tag.endswith('}r'):
+                span = self.convert_run(x)
+                if current_anchor is not None:
+                    (dest if len(dest) == 0 else span).set('id', current_anchor)
+                    current_anchor = None
+                if current_hyperlink is not None:
+                    hl = ancestor(x, 'w:hyperlink')
+                    if hl is not None:
+                        self.link_map[hl].append(span)
+                    else:
+                        current_hyperlink = None
+                dest.append(span)
+                self.layers[p].append(x)
+            elif x.tag.endswith('}bookmarkStart'):
+                anchor = get(x, 'w:name')
+                if anchor and anchor not in self.anchor_map:
+                    self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.itervalues()))
+            elif x.tag.endswith('}hyperlink'):
+                current_hyperlink = x

        m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
        if m is not None:
-            n = min(1, max(6, int(m.group(1))))
+            n = min(6, max(1, int(m.group(1))))
            dest.tag = 'h%d' % n

        if style.direction == 'rtl':
@ -208,6 +361,31 @@ class Convert(object):
        for elem in elems:
            p.remove(elem)
            wrapper.append(elem)
+        return wrapper
+
+    def resolve_links(self, relationships_by_id):
+        for hyperlink, spans in self.link_map.iteritems():
+            span = spans[0]
+            if len(spans) > 1:
+                span = self.wrap_elems(spans, SPAN())
+            span.tag = 'a'
+            tgt = get(hyperlink, 'w:tgtFrame')
+            if tgt:
+                span.set('target', tgt)
+            tt = get(hyperlink, 'w:tooltip')
+            if tt:
+                span.set('title', tt)
+            rid = get(hyperlink, 'r:id')
+            if rid and rid in relationships_by_id:
+                span.set('href', relationships_by_id[rid])
+                continue
+            anchor = get(hyperlink, 'w:anchor')
+            if anchor and anchor in self.anchor_map:
+                span.set('href', '#' + self.anchor_map[anchor])
+                continue
+            self.log.warn('Hyperlink with unknown target (%s, %s), ignoring' %
+                          (rid, anchor))
+            span.set('href', '#')

    def convert_run(self, run):
        ans = SPAN()
@ -239,6 +417,17 @@ class Convert(object):
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
+            elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'):
+                for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
+                    text.add_elem(img)
+                    ans.append(text.elem)
+            elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'):
+                anchor, name = self.footnotes.get_ref(child)
+                if anchor and name:
+                    l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor)
+                    l.set('class', 'noteref')
+                    text.add_elem(l)
+                    ans.append(text.elem)
        if text.buf:
            setattr(text.elem, text.attr, ''.join(text.buf))

@ -249,7 +438,39 @@ class Convert(object):
            ans.lang = style.lang
        return ans

+    def add_frame(self, html_obj, style):
+        last_run = self.framed[-1]
+        if style is inherit:
+            if last_run:
+                self.framed.append([])
+            return
+
+        if last_run:
+            if last_run[-1][1] == style:
+                last_run.append((html_obj, style))
+            else:
+                self.framed.append((html_obj, style))
+        else:
+            last_run.append((html_obj, style))
+
+    def apply_frames(self):
+        for run in filter(None, self.framed):
+            style = run[0][1]
+            paras = tuple(x[0] for x in run)
+            parent = paras[0].getparent()
+            idx = parent.index(paras[0])
+            frame = DIV(*paras)
+            parent.insert(idx, frame)
+            self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
+            self.styles.register(css, 'frame')
+
 if __name__ == '__main__':
+    import shutil
    from calibre.utils.logging import default_log
    default_log.filter_level = default_log.DEBUG
-    Convert(sys.argv[-1], log=default_log)()
+    dest_dir = os.path.join(os.getcwdu(), 'docx_input')
+    if os.path.exists(dest_dir):
+        shutil.rmtree(dest_dir)
+    os.mkdir(dest_dir)
+    Convert(sys.argv[-1], dest_dir=dest_dir, log=default_log)()
+
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -179,7 +179,7 @@ class Metadata(object):

    def deepcopy(self):
        ''' Do not use this method unless you know what you are doing, if you want to create a simple clone of
-        this object, use :method:`deepcopy_metadata` instead. '''
+        this object, use :meth:`deepcopy_metadata` instead. '''
        m = Metadata(None)
        m.__dict__ = copy.deepcopy(self.__dict__)
        object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -21,7 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import parse_date, isoformat
 from calibre.utils.localization import get_lang, canonicalize_lang
 from calibre import prints, guess_type
-from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from calibre.utils.config import tweaks

 class Resource(object):  # {{{
@ -1436,7 +1436,10 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
            attrib['name'] = name
        if content:
            attrib['content'] = content
-        elem = metadata.makeelement(tag, attrib=attrib)
+        try:
+            elem = metadata.makeelement(tag, attrib=attrib)
+        except ValueError:
+            elem = metadata.makeelement(tag, attrib={k:clean_xml_chars(v) for k, v in attrib.iteritems()})
        elem.tail = '\n'+(' '*8)
        if text:
            try:
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@ -100,7 +100,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
    mr = mobi8_reader
    flows = []

-    img_pattern = re.compile(r'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
+    img_pattern = re.compile(r'''(<[img\s|image\s|svg:image\s][^>]*>)''', re.IGNORECASE)
    img_index_pattern = re.compile(r'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''', re.IGNORECASE)

    tag_pattern = re.compile(r'''(<[^>]*>)''')
@ -128,7 +128,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
        srcpieces = img_pattern.split(flow)
        for j in range(1, len(srcpieces), 2):
            tag = srcpieces[j]
-            if tag.startswith('<im'):
+            if tag.startswith('<im') or tag.startswith('<svg:image'):
                for m in img_index_pattern.finditer(tag):
                    num = int(m.group(1), 32)
                    href = resource_map[num-1]
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -228,7 +228,7 @@ class Mobi8Reader(object):

        self.flowinfo.append(FlowInfo(None, None, None, None))
        svg_tag_pattern = re.compile(br'''(<svg[^>]*>)''', re.IGNORECASE)
-        image_tag_pattern = re.compile(br'''(<image[^>]*>)''', re.IGNORECASE)
+        image_tag_pattern = re.compile(br'''(<(?:svg:)?image[^>]*>)''', re.IGNORECASE)
        for j in xrange(1, len(self.flows)):
            flowpart = self.flows[j]
            nstr = '%04d' % j
@ -243,7 +243,7 @@ class Mobi8Reader(object):
                    dir = None
                    fname = None
                    # strip off anything before <svg if inlining
-                    flowpart = flowpart[start:]
+                    flowpart = re.sub(br'(</?)svg:', r'\1', flowpart[start:])
                else:
                    format = 'file'
                    dir = "images"
--- a/src/calibre/ebooks/oeb/display/webview.py
+++ b/src/calibre/ebooks/oeb/display/webview.py
@ -11,7 +11,7 @@ import re

 from calibre import guess_type

-class EntityDeclarationProcessor(object): # {{{
+class EntityDeclarationProcessor(object):  # {{{

    def __init__(self, html):
        self.declared_entities = {}
@ -51,7 +51,7 @@ def load_html(path, view, codec='utf-8', mime_type=None,
    loading_url = QUrl.fromLocalFile(path)
    pre_load_callback(loading_url)

-    if force_as_html or re.search(r'<[:a-zA-Z0-9-]*svg', html) is None:
+    if force_as_html or re.search(r'<[a-zA-Z0-9-]+:svg', html) is None:
        view.setHtml(html, loading_url)
    else:
        view.setContent(QByteArray(html.encode(codec)), mime_type,
@ -61,4 +61,3 @@ def load_html(path, view, codec='utf-8', mime_type=None,
        if not elem.isNull():
            return False
    return True
-
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -32,7 +32,8 @@ def dynamic_rescale_factor(node):
    classes = node.get('class', '').split(' ')
    classes = [x.replace('calibre_rescale_', '') for x in classes if
            x.startswith('calibre_rescale_')]
-    if not classes: return None
+    if not classes:
+        return None
    factor = 1.0
    for x in classes:
        try:
@ -54,7 +55,8 @@ class KeyMapper(object):
            return base
        size = float(size)
        base = float(base)
-        if abs(size - base) < 0.1: return 0
+        if abs(size - base) < 0.1:
+            return 0
        sign = -1 if size < base else 1
        endp = 0 if size < base else 36
        diff = (abs(base - size) * 3) + ((36 - size) / 100)
@ -110,7 +112,8 @@ class EmbedFontsCSSRules(object):
        self.href = None

    def __call__(self, oeb):
-        if not self.body_font_family: return None
+        if not self.body_font_family:
+            return None
        if not self.href:
            iid, href = oeb.manifest.generate(u'page_styles', u'page_styles.css')
            rules = [x.cssText for x in self.rules]
@ -228,10 +231,10 @@ class CSSFlattener(object):
            bs.append('margin-top: 0pt')
            bs.append('margin-bottom: 0pt')
            if float(self.context.margin_left) >= 0:
-                bs.append('margin-left : %gpt'%\
+                bs.append('margin-left : %gpt'%
                        float(self.context.margin_left))
            if float(self.context.margin_right) >= 0:
-                bs.append('margin-right : %gpt'%\
+                bs.append('margin-right : %gpt'%
                        float(self.context.margin_right))
            bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
            if self.page_break_on_body:
@ -277,8 +280,10 @@ class CSSFlattener(object):
        for kind in ('margin', 'padding'):
            for edge in ('bottom', 'top'):
                property = "%s-%s" % (kind, edge)
-                if property not in cssdict: continue
-                if '%' in cssdict[property]: continue
+                if property not in cssdict:
+                    continue
+                if '%' in cssdict[property]:
+                    continue
                value = style[property]
                if value == 0:
                    continue
@ -296,7 +301,7 @@ class CSSFlattener(object):
    def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id):
        if not isinstance(node.tag, basestring) \
           or namespace(node.tag) != XHTML_NS:
-               return
+            return
        tag = barename(node.tag)
        style = stylizer.style(node)
        cssdict = style.cssdict()
@ -360,12 +365,17 @@ class CSSFlattener(object):
                pass
            del node.attrib['bgcolor']
        if cssdict.get('font-weight', '').lower() == 'medium':
-            cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium
+            cssdict['font-weight'] = 'normal'  # ADE chokes on font-weight medium

        fsize = font_size
        is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in
                       cssdict and len(node) == 0 and node.text and
                       len(node.text) == 1)
+        is_drop_cap = is_drop_cap or (
+            # The docx input plugin generates drop caps that look like this
+            len(node) == 1 and not node.text and len(node[0]) == 0 and
+            node[0].text and not node[0].tail and len(node[0].text) == 1 and
+            'line-height' in cssdict and 'font-size' in cssdict)
        if not self.context.disable_font_rescaling and not is_drop_cap:
            _sbase = self.sbase if self.sbase is not None else \
                self.context.source.fbase
@ -436,8 +446,7 @@ class CSSFlattener(object):
            keep_classes = set()

            if cssdict:
-                items = cssdict.items()
-                items.sort()
+                items = sorted(cssdict.items())
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                classes = node.get('class', '').strip() or 'calibre'
                klass = ascii_text(STRIPNUM.sub('', classes.split()[0].replace('_', '')))
@ -519,8 +528,7 @@ class CSSFlattener(object):
            if float(self.context.margin_bottom) >= 0:
                stylizer.page_rule['margin-bottom'] = '%gpt'%\
                        float(self.context.margin_bottom)
-            items = stylizer.page_rule.items()
-            items.sort()
+            items = sorted(stylizer.page_rule.items())
            css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
            css = ('@page {\n%s\n}\n'%css) if items else ''
            rules = [r.cssText for r in stylizer.font_face_rules +
@ -556,14 +564,14 @@ class CSSFlattener(object):
            body = html.find(XHTML('body'))
            fsize = self.context.dest.fbase
            self.flatten_node(body, stylizer, names, styles, pseudo_styles, fsize, item.id)
-        items = [(key, val) for (val, key) in styles.items()]
-        items.sort()
+        items = sorted([(key, val) for (val, key) in styles.items()])
        # :hover must come after link and :active must come after :hover
        psels = sorted(pseudo_styles.iterkeys(), key=lambda x :
                {'hover':1, 'active':2}.get(x, 0))
        for psel in psels:
            styles = pseudo_styles[psel]
-            if not styles: continue
+            if not styles:
+                continue
            x = sorted(((k+':'+psel, v) for v, k in styles.iteritems()))
            items.extend(x)

--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -375,6 +375,8 @@ class FlowSplitter(object):
        for img in root.xpath('//h:img', namespaces=NAMESPACES):
            if img.get('style', '') != 'display:none':
                return False
+        if root.xpath('//*[local-name() = "svg"]'):
+            return False
        return True

    def split_text(self, text, root, size):
--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -22,7 +22,7 @@ from calibre.gui2 import (gprefs, warning_dialog, Dispatcher, error_dialog,
 from calibre.library.database2 import LibraryDatabase2
 from calibre.gui2.actions import InterfaceAction

-class LibraryUsageStats(object): # {{{
+class LibraryUsageStats(object):  # {{{

    def __init__(self):
        self.stats = {}
@ -92,7 +92,7 @@ class LibraryUsageStats(object): # {{{
        self.write_stats()
 # }}}

-class MovedDialog(QDialog): # {{{
+class MovedDialog(QDialog):  # {{{

    def __init__(self, stats, location, parent=None):
        QDialog.__init__(self, parent)
@ -161,13 +161,15 @@ class ChooseLibraryAction(InterfaceAction):
    def genesis(self):
        self.base_text = _('%d books')
        self.count_changed(0)
-        self.qaction.triggered.connect(self.choose_library,
-                type=Qt.QueuedConnection)
        self.action_choose = self.menuless_qaction

        self.stats = LibraryUsageStats()
        self.popup_type = (QToolButton.InstantPopup if len(self.stats.stats) > 1 else
                QToolButton.MenuButtonPopup)
+        if len(self.stats.stats) > 1:
+            self.action_choose.triggered.connect(self.choose_library)
+        else:
+            self.qaction.triggered.connect(self.choose_library)

        self.choose_menu = self.qaction.menu()

@ -200,7 +202,6 @@ class ChooseLibraryAction(InterfaceAction):
                    type=Qt.QueuedConnection)
            self.choose_menu.addAction(ac)

-
        self.rename_separator = self.choose_menu.addSeparator()

        self.maintenance_menu = QMenu(_('Library Maintenance'))
@ -477,19 +478,20 @@ class ChooseLibraryAction(InterfaceAction):
            else:
                return

-        #from calibre.utils.mem import memory
-        #import weakref
-        #from PyQt4.Qt import QTimer
-        #self.dbref = weakref.ref(self.gui.library_view.model().db)
-        #self.before_mem = memory()/1024**2
+        # from calibre.utils.mem import memory
+        # import weakref
+        # from PyQt4.Qt import QTimer
+        # self.dbref = weakref.ref(self.gui.library_view.model().db)
+        # self.before_mem = memory()/1024**2
        self.gui.library_moved(loc, allow_rebuild=True)
-        #QTimer.singleShot(5000, self.debug_leak)
+        # QTimer.singleShot(5000, self.debug_leak)

    def debug_leak(self):
        import gc
        from calibre.utils.mem import memory
        ref = self.dbref
-        for i in xrange(3): gc.collect()
+        for i in xrange(3):
+            gc.collect()
        if ref() is not None:
            print 'DB object alive:', ref()
            for r in gc.get_referrers(ref())[:10]:
@ -500,7 +502,6 @@ class ChooseLibraryAction(InterfaceAction):
        print
        self.dbref = self.before_mem = None

-
    def qs_requested(self, idx, *args):
        self.switch_requested(self.qs_locations[idx])

@ -546,3 +547,4 @@ class ChooseLibraryAction(InterfaceAction):
            return False

        return True
+
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -907,7 +907,7 @@ class BooksModel(QAbstractTableModel):  # {{{
                if ht == 'timestamp':  # change help text because users know this field as 'date'
                    ht = 'date'
                if self.db.field_metadata[self.column_map[section]]['is_category']:
-                    is_cat = '.\n\n' + _('Click in this column and press Q to to Quickview books with the same %s' % ht)
+                    is_cat = '.\n\n' + _('Click in this column and press Q to Quickview books with the same %s' % ht)
                else:
                    is_cat = ''
                return QVariant(_('The lookup/search name is "{0}"{1}').format(ht, is_cat))
@ -1029,7 +1029,7 @@ class BooksModel(QAbstractTableModel):  # {{{
                return False
            val = (int(value.toInt()[0]) if column == 'rating' else
                    value.toDateTime() if column in ('timestamp', 'pubdate')
-                    else unicode(value.toString()).strip())
+                    else re.sub(ur'\s', u' ', unicode(value.toString()).strip()))
            id = self.db.id(row)
            books_to_refresh = set([id])
            if column == 'rating':
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@ -45,6 +45,9 @@ def save_dialog(parent, title, msg, det_msg=''):
    d.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel)
    return d.exec_()

+def clean_text(x):
+    return re.sub(r'\s', ' ', x.strip())
+
 '''
 The interface common to all widgets used to set basic metadata
 class BasicMetadataWidget(object):
@ -117,7 +120,7 @@ class TitleEdit(EnLineEdit):
    def current_val(self):

        def fget(self):
-            title = unicode(self.text()).strip()
+            title = clean_text(unicode(self.text()))
            if not title:
                title = self.get_default()
            return title
@ -289,7 +292,7 @@ class AuthorsEdit(EditWithComplete):
    def current_val(self):

        def fget(self):
-            au = unicode(self.text()).strip()
+            au = clean_text(unicode(self.text()))
            if not au:
                au = self.get_default()
            return string_to_authors(au)
@ -352,7 +355,7 @@ class AuthorSortEdit(EnLineEdit):
    def current_val(self):

        def fget(self):
-            return unicode(self.text()).strip()
+            return clean_text(unicode(self.text()))

        def fset(self, val):
            if not val:
@ -472,7 +475,7 @@ class SeriesEdit(EditWithComplete):
    def current_val(self):

        def fget(self):
-            return unicode(self.currentText()).strip()
+            return clean_text(unicode(self.currentText()))

        def fset(self, val):
            if not val:
@ -1135,7 +1138,7 @@ class TagsEdit(EditWithComplete):  # {{{
    @dynamic_property
    def current_val(self):
        def fget(self):
-            return [x.strip() for x in unicode(self.text()).split(',')]
+            return [clean_text(x) for x in unicode(self.text()).split(',')]
        def fset(self, val):
            if not val:
                val = []
@ -1237,7 +1240,7 @@ class IdentifiersEdit(QLineEdit):  # {{{
    def current_val(self):
        def fget(self):
            raw = unicode(self.text()).strip()
-            parts = [x.strip() for x in raw.split(',')]
+            parts = [clean_text(x) for x in raw.split(',')]
            ans = {}
            for x in parts:
                c = x.split(':')
@ -1376,7 +1379,7 @@ class PublisherEdit(EditWithComplete):  # {{{
    def current_val(self):

        def fget(self):
-            return unicode(self.currentText()).strip()
+            return clean_text(unicode(self.currentText()))

        def fset(self, val):
            if not val:
--- a/src/calibre/gui2/search_restriction_mixin.py
+++ b/src/calibre/gui2/search_restriction_mixin.py
@ -146,8 +146,12 @@ class CreateVirtualLibrary(QDialog):  # {{{

            <p>For example you can use a Virtual Library to only show you books with the Tag <i>"Unread"</i>
            or only books by <i>"My Favorite Author"</i> or only books in a particular series.</p>
+
+            <p>More information and examples are available in the
+            <a href="http://manual.calibre-ebook.com/virtual_libraries.html">User Manual</a>.</p>
            '''))
        hl.setWordWrap(True)
+        hl.setOpenExternalLinks(True)
        hl.setFrameStyle(hl.StyledPanel)
        gl.addWidget(hl, 0, 3, 4, 1)

--- a/src/calibre/gui2/viewer/javascript.py
+++ b/src/calibre/gui2/viewer/javascript.py
@ -41,7 +41,6 @@ class JavaScriptLoader(object):
            'hyphenation', 'hyphenator', 'utils', 'cfi', 'indexing', 'paged',
            'fs', 'math', 'extract')

-
    def __init__(self, dynamic_coffeescript=False):
        self._dynamic_coffeescript = dynamic_coffeescript
        if self._dynamic_coffeescript:
@ -68,7 +67,8 @@ class JavaScriptLoader(object):
                        allow_user_override=False).decode('utf-8')
            else:
                dynamic = (self._dynamic_coffeescript and
-                        os.path.exists(calibre.__file__))
+                           calibre.__file__ and not calibre.__file__.endswith('.pyo') and
+                           os.path.exists(calibre.__file__))
                ans = compiled_coffeescript(src, dynamic=dynamic).decode('utf-8')
            self._cache[name] = ans

@ -105,4 +105,3 @@ class JavaScriptLoader(object):
        evaljs('\n\n'.join(self._hp_cache.itervalues()))

        return lang
-
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/ber.po
+++ b/src/calibre/translations/ber.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/fr_CA.po
+++ b/src/calibre/translations/fr_CA.po
--- a/src/calibre/translations/fur.po
+++ b/src/calibre/translations/fur.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/him.po
+++ b/src/calibre/translations/him.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/is.po
+++ b/src/calibre/translations/is.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/jv.po
+++ b/src/calibre/translations/jv.po
--- a/src/calibre/translations/ka.po
+++ b/src/calibre/translations/ka.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/mk.po
+++ b/src/calibre/translations/mk.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/nn.po
+++ b/src/calibre/translations/nn.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pa.po
+++ b/src/calibre/translations/pa.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/Show More
+++ b/Show More