Merge from trunk

2025-10-23 14:58:54 -04:00 · 2012-03-14 15:36:49 +01:00 · 2012-03-14 15:36:49 +01:00 · 6dd34d73e4
commit 6dd34d73e4
parent 06904f92a5 a8d46f2f40
171 changed files with 80642 additions and 39138 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,84 @@
 #   new recipes:
 #     - title: 

+- version: 0.8.42
+  date: 2012-03-12
+
+  new features:
+    - title: "Support for reading Amazon's new KF8 format"
+      type: major
+      description: "calibre can now both view and convert MOBI files that contain Amazon's new KF8 (Kindle Fire) format"
+
+    - title: "Add a tweak to Preferences->Tweaks to control the font size used in the book details panel"
+      tickets: [948357] 
+
+    - title: "Allow specifying a list of file types to exclude when automatically adding files from a folder"
+      tickets: [943025]
+
+    - title: "Show ratings in the book details panel as stars. Also allow the user to change the alignment of the ratings column in the main books list. No longer display the stars in blue, instead their color can be customized via the column coloring rules, like any other column"
+
+    - title: "When setting metadata in EPUB ensure that the <meta name=cover> tag has its name attribute first. Needed for the Nook."
+
+    - title: "Drivers for Novo 7, LG G2x and Zenithink T-280"
+      tickets: [941671, 940625, 940527]
+
+    - title: "Update linux binaries to Qt 4.8.0"
+    
+  bug fixes:
+    - title: "Fix some rar files causing crashes on OS X (updated libunrar.dylib in the OS X build)"
+      tickets: [951185]
+
+    - title: "MOBI Output: Ignore the Table of Contents pointed to by the guide, if it contains no links"
+
+    - title: "ODT Input: Ignore margin declaration in ODT styles if more specific margin-* declarations are present"
+      tickets: [941134]
+
+    - title: "Conversion pipeline: Fix @import rules in CSS stylesheets that have comments on their first few lines being ignored."
+
+    - title: "EPUB Input: When extracting the contents of epub files on windows, do not error out if one or more of the components in the epub file have filepaths containing characters that are invalid for the windows filesystem, instead, just replace those characters, since those entries are likely to be errors in the zip container anyway."
+      tickets: [950081]
+
+    - title: "Textile output: Fix issue with blockquotes and sentences getting removed."
+
+    - title: "MOBI Output: When using the prefer author sort conversion option, handle multiple authors better."
+      tickets: [947146]
+
+    - title: "Fix regression in 0.8.41 that broke direct connection to iDevices in windows"
+      tickets: [944534]
+
+    - title: "Fix the download bulk metadata completed popup causing a crash if the Esc key is pressed."
+      tickets: [943056]
+
+    - title: "Fix rating values doubled in CSV/XML catalogs"
+      tickets: [942790]
+
+    - title: "EPUB Input: Remove non markup documents from the spine automatically, instead of erroring out"
+
+    - title: "When formatting ratings in templates, etc., do not have an unnecessary .0"
+
+    - title: "Calibre portable: Do not allow calibre portable to run if it is placed in a location whose path is too long. Also hide the library location setup in the welcome wizard when running the portable build."
+
+    - title: "Fix regression in 0.8.41 that broke calibre if the TMP or TEMP environment variable is set to the root of a drive."
+      tickets: [952284]
+
+    - title: "Fix display of ratings type custom fields in the content server"
+      tickets: [940600]
+
+
+  improved recipes:
+    - La Jornada
+    - Chicago Tribune
+    - Mediapart 
+    - rue89
+
+  new recipes:
+    - title: Racjonalista 
+      author: Racjonlista
+
+    - title: JAPAA 
+      author: adoucette
+
+
 - version: 0.8.41
  date: 2012-02-24

--- a/imgsrc/calibreSymbols.spd
+++ b/imgsrc/calibreSymbols.spd
@ -0,0 +1,152 @@
+SplineFontDB: 3.0
+FontName: calibreSymbols
+FullName: calibre Symbols
+FamilyName: calibre Symbols
+Weight: Medium
+Copyright: Created by Kovid Goyal with FontForge 2.0 (http://fontforge.sf.net)
+UComments: "2012-2-27: Created." 
+Version: 001.000
+ItalicAngle: 0
+UnderlinePosition: -100
+UnderlineWidth: 50
+Ascent: 800
+Descent: 200
+LayerCount: 2
+Layer: 0 0 "Back"  1
+Layer: 1 0 "Fore"  0
+NeedsXUIDChange: 1
+XUID: [1021 913 325894820 11538708]
+FSType: 0
+OS2Version: 0
+OS2_WeightWidthSlopeOnly: 0
+OS2_UseTypoMetrics: 1
+CreationTime: 1330331997
+ModificationTime: 1330487767
+OS2TypoAscent: 0
+OS2TypoAOffset: 1
+OS2TypoDescent: 0
+OS2TypoDOffset: 1
+OS2TypoLinegap: 90
+OS2WinAscent: 0
+OS2WinAOffset: 1
+OS2WinDescent: 0
+OS2WinDOffset: 1
+HheadAscent: 0
+HheadAOffset: 1
+HheadDescent: 0
+HheadDOffset: 1
+MarkAttachClasses: 1
+DEI: 91125
+Encoding: UnicodeFull
+UnicodeInterp: none
+NameList: Adobe Glyph List
+DisplaySize: -24
+AntiAlias: 1
+FitToEm: 1
+WidthSeparation: 150
+WinInfo: 9600 75 22
+BeginPrivate: 0
+EndPrivate
+BeginChars: 1114112 3
+
+StartChar: uni2605
+Encoding: 9733 9733 0
+Width: 979
+VWidth: -26
+Flags: W
+LayerCount: 2
+Fore
+SplineSet
+551.923 352.862 m 1
+ 749.497 369.592 l 2
+ 804.954 374.123 833.379 376.389 834.765 376.389 c 0
+ 852.095 376.389 860.761 368.896 860.761 353.907 c 0
+ 860.761 347.981 859.028 343.363 855.562 340.052 c 0
+ 852.095 336.74 825.578 319.225 776.012 287.506 c 2
+ 609.635 180.323 l 1
+ 716.22 -88.417 l 2
+ 717.606 -91.2051 718.301 -95.3877 718.301 -100.965 c 0
+ 718.301 -106.193 716.394 -110.725 712.58 -114.558 c 0
+ 708.769 -118.393 704.608 -120.31 700.104 -120.31 c 0
+ 695.943 -120.31 691.61 -118.828 687.103 -115.866 c 0
+ 682.598 -112.902 658.162 -92.251 613.795 -53.9082 c 2
+ 466.134 74.71 l 1
+ 320.554 -51.8184 l 2
+ 274.802 -91.5547 249.758 -112.902 245.426 -115.866 c 0
+ 241.092 -118.828 236.846 -120.31 232.688 -120.31 c 0
+ 227.835 -120.31 223.415 -118.306 219.429 -114.297 c 0
+ 215.442 -110.289 213.449 -105.844 213.449 -100.965 c 0
+ 213.449 -97.8281 223.329 -71.3379 243.087 -21.4932 c 2
+ 322.115 180.323 l 1
+ 152.618 289.598 l 2
+ 104.783 320.271 79.2217 337.176 75.9297 340.313 c 0
+ 72.6357 343.45 70.9893 347.981 70.9893 353.907 c 0
+ 70.9893 369.243 79.8291 376.912 97.5059 376.912 c 0
+ 98.8926 376.912 123.155 374.82 170.296 370.638 c 2
+ 379.825 352.862 l 1
+ 427.14 555.201 l 2
+ 439.271 607.834 446.811 636.764 449.757 641.992 c 0
+ 452.702 647.221 458.162 649.834 466.134 649.834 c 0
+ 474.454 649.834 480 646.96 482.772 641.208 c 0
+ 485.545 635.457 493.518 604.173 506.689 547.357 c 2
+ 551.923 352.862 l 1
+EndSplineSet
+Validated: 524289
+EndChar
+
+StartChar: zero
+Encoding: 48 48 1
+Width: 1303
+VWidth: 2048
+Flags: W
+HStem: -43.3789 76.7998<582.097 721.09> 623.341 76.7998<582.097 721.091>
+VStem: 403.82 97.4395<148.044 508.66> 802.221 96.959<148.044 508.659>
+LayerCount: 2
+Fore
+SplineSet
+651.5 623.341 m 0
+ 601.58 623.341 564.061 598.78 538.939 549.66 c 0
+ 513.82 500.541 501.26 426.7 501.26 328.141 c 0
+ 501.26 229.9 513.82 156.221 538.939 107.101 c 0
+ 564.061 57.9805 601.58 33.4209 651.5 33.4209 c 0
+ 701.74 33.4209 739.42 57.9805 764.54 107.101 c 0
+ 789.66 156.221 802.221 229.9 802.221 328.141 c 0
+ 802.221 426.7 789.66 500.541 764.54 549.66 c 0
+ 739.42 598.78 701.74 623.341 651.5 623.341 c 0
+651.5 700.141 m 0
+ 731.82 700.141 793.18 668.38 835.58 604.859 c 0
+ 877.979 541.341 899.18 449.101 899.18 328.141 c 0
+ 899.18 207.5 877.979 115.421 835.58 51.9004 c 0
+ 793.18 -11.6201 731.819 -43.3789 651.5 -43.3789 c 0
+ 571.18 -43.3789 509.82 -11.6201 467.42 51.9004 c 0
+ 425.021 115.421 403.82 207.5 403.82 328.141 c 0
+ 403.82 449.101 425.021 541.341 467.42 604.859 c 0
+ 509.82 668.38 571.18 700.141 651.5 700.141 c 0
+EndSplineSet
+Validated: 1
+EndChar
+
+StartChar: period
+Encoding: 46 46 2
+Width: 516
+VWidth: 2048
+Flags: W
+HStem: 53.4004 166.199<203.263 309.297>
+VStem: 174.6 163.801<82.9501 190.955>
+LayerCount: 2
+Fore
+SplineSet
+338.4 142.8 m 0
+ 338.4 119.2 330.5 98.4004 314.7 80.4004 c 0
+ 298.9 62.4004 277 53.4004 249 53.4004 c 0
+ 225.4 53.4004 207.1 61.2002 194.1 76.7998 c 0
+ 181.1 92.4004 174.6 111 174.6 132.6 c 0
+ 174.6 155.8 182.6 176.1 198.6 193.5 c 0
+ 214.6 210.9 236.8 219.6 265.2 219.6 c 0
+ 288.8 219.6 306.9 212.2 319.5 197.4 c 0
+ 332.1 182.6 338.4 164.4 338.4 142.8 c 0
+EndSplineSet
+Validated: 1
+EndChar
+EndChars
+EndSplineFont
--- a/recipes/chicago_tribune.recipe
+++ b/recipes/chicago_tribune.recipe
@ -3,6 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import urllib, re
 from calibre.web.feeds.news import BasicNewsRecipe

 class ChicagoTribune(BasicNewsRecipe):
@ -77,10 +78,17 @@ class ChicagoTribune(BasicNewsRecipe):


    def get_article_url(self, article):
-        url = article.get('feedburner_origlink', article.get('guid', article.get('link')))
-        if url.endswith('?track=rss'):
-            url = url.partition('?')[0]
-        return url
+        ans = None
+        try:
+            s = article.summary
+            ans = urllib.unquote(
+                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
+        except:
+            pass
+        if ans is None:
+            ans = article.get('feedburner_origlink', article.get('guid', article.get('link')))
+        if ans is not None:
+            return ans.replace('?track=rss', '')

    def skip_ad_pages(self, soup):
        text = soup.find(text='click here to continue to article')
--- a/recipes/icons/mediapart.png
+++ b/recipes/icons/mediapart.png
--- a/recipes/icons/racjonalista_pl.png
+++ b/recipes/icons/racjonalista_pl.png
--- a/recipes/icons/rue89.png
+++ b/recipes/icons/rue89.png
--- a/recipes/idg_se.recipe
+++ b/recipes/idg_se.recipe
@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class IDGse(BasicNewsRecipe):
    title               = 'IDG'
-    __author__ = 'zapt0'
+    __author__ = 'Stanislav Khromov'
    language = 'sv'
    description = 'IDG.se'
    oldest_article = 1
@ -15,6 +15,9 @@ class IDGse(BasicNewsRecipe):

    feeds          = [(u'Dagens IDG-nyheter',u'http://feeds.idg.se/idg/ETkj?format=xml')]

+    def get_article_url(self, article):
+        return article.get('guid', None)
+
    def print_version(self,url):
        return url + '?articleRenderMode=print&m=print'

--- a/recipes/instapaper.recipe
+++ b/recipes/instapaper.recipe
@ -1,8 +1,9 @@
+#v2 2011-07-25
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1299694372(BasicNewsRecipe):
    title                             = u'Instapaper'
-    __author__                  = 'Darko Miletic'
+    __author__                  = 'Darko Miletic, Stanislav Khromov'
    publisher                     = 'Instapaper.com'
    category                      = 'info, custom, Instapaper'
    oldest_article               = 365
@ -15,6 +16,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
 	,dict(name='div', attrs={'id':'text_controls'})
 	,dict(name='div', attrs={'id':'editing_controls'})
 	,dict(name='div', attrs={'class':'bar bottom'})
+    ,dict(name='div', attrs={'id':'controlbar_container'})
+    ,dict(name='div', attrs={'id':'footer'})
 	 ]
    use_embedded_content  = False
    needs_subscription    = True
--- a/recipes/japaa.recipe
+++ b/recipes/japaa.recipe
@ -0,0 +1,99 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1330393641(BasicNewsRecipe):
+     title          = u'JAAPA'
+     __author__ = 'adoucette'
+     language = 'en'
+     oldest_article = 30
+     max_articles_per_feed = 100
+     auto_cleanup = True
+
+     def get_cover_url(self):
+         cover_url = None
+         soup = self.index_to_soup('http://www.jaapa.com')
+         cover_item = soup.find('img', src=re.compile(r'\w*?cover\w{1,22}\.jpg'))
+         if cover_item:
+             cover_url = cover_item['src']
+         return cover_url
+
+     feeds          = [
+             (u'CME Articles',
+                 u'http://feeds.feedburner.com/jaapacmearticles'),
+             (u'A Day in the Life',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=490'),
+                 (u'Ask A Librarian',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=847'),
+                 (u'Case of the Month',
+                     u'http://feeds.feedburner.com/jaapacaseofthemonth'),
+                 (u'Clinical Watch',
+                     u'http://feeds.feedburner.com/jaapaclinicalwatch'),
+                 (u'Commentary',
+                     u'http://feeds.feedburner.com/jaapacommentary'),
+                 (u'Critically Appraised Topic',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=699'),
+                 (u'Dermatology Digest',
+                     u'http://feeds.feedburner.com/jaapadermatologydigest'),
+                 (u'Diagnostic Imaging Review',
+                     u'http://feeds.feedburner.com/jaapadiagnosticimagingreview'),
+                 (u'Editorial',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=759'),
+                 (u'From the Academy',
+                     u'http://feeds.feedburner.com/jaapafromtheacademy'),
+                 (u'Genomics in PA Practice',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=760'),
+                 (u'Humane Medicine',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=758'),
+                 (u'Inside the AAPA Policy Manual',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=1546'),
+                 (u'Interpreting ECGs',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=1624'),
+                 (u'Letters',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=808'),
+                 (u'PA Quandaries',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=496'),
+                 (u'Pharmacology Consult',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=1614'),
+                 (u'POEMs', u'http://feeds.feedburner.com/jaapapoems'),
+                 (u'Quick Recertification',
+                     u'http://feeds.feedburner.com/jaapaquickrecertificationseries'),
+                 (u'Sounding Board',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=698'),
+                 (u'The Surgical Patient',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=499'),
+                 (u'Topics in Infectious Diseases',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=2495'),
+                 (u"What's New", u'http://feeds.feedburner.com/jaapawhatsnew'),
+                 (u'When the Patient Asks',
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=501'),
+                 (u"Women's Health",
+                     u'http://www.jaapa.com/pages/rss.aspx?sectionid=2176'),
+                 (u'AAPA Special Article',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=1453'),
+                 (u'Case Reports',
+                         u'http://feeds.feedburner.com/jaapacasereports'),
+                 (u'Review Articles',
+                         u'http://feeds.feedburner.com/jaapareviewarticles'),
+                 (u'Surgical Reviews',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=505'),
+                 (u'Brief Report',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=2353'),
+                 (u'Research Corner',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=498'),
+                 (u'Research Reports',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=1024'),
+                 (u'The Art of Medicine',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=1289'),
+                 (u'Clinical Practice Guidelines',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=2102'),
+                 (u'Complementary and Alternative Medicine',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=2123'),
+                 (u'Drug Information',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=2089'),
+                 (u'Evidence-Based Medicine',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=1288'),
+                 (u'Patient Information',
+                         u'http://www.jaapa.com/pages/rss.aspx?sectionid=2122')]
+
+     def print_version(self, url):
+        return url.replace('/article/', '/printarticle/')
--- a/recipes/la_jornada.recipe
+++ b/recipes/la_jornada.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>, Rogelio Domínguez <rogelio.dominguez@gmail.com>'
+__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>, Rogelio Domínguez <rogelio.dominguez@gmail.com>'
 '''
 www.jornada.unam.mx
 '''
@ -86,6 +86,6 @@ class LaJornada_mx(BasicNewsRecipe):
        return soup

    def get_article_url(self, article):
-        rurl = article.get('link',  None)
+        rurl = article.get('guid',  None)
        return rurl.rpartition('&partner=')[0]

--- a/recipes/mediapart.recipe
+++ b/recipes/mediapart.recipe
@ -1,11 +1,12 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, 2011, Louis Gesbert <meta at antislash dot info>'
+__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010-2012, Louis Gesbert <meta at antislash dot info>'
 '''
 Mediapart
 '''

-import re
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+__author__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010-2012, Louis Gesbert <meta at antislash dot info>'
+
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.web.feeds.news import BasicNewsRecipe

 class Mediapart(BasicNewsRecipe):
@ -15,8 +16,9 @@ class Mediapart(BasicNewsRecipe):
    oldest_article = 7
    language = 'fr'
    needs_subscription = True
-
    max_articles_per_feed = 50
+
+    use_embedded_content = False
    no_stylesheets = True

    cover_url = 'http://static.mediapart.fr/files/pave_mediapart.jpg'
@ -27,14 +29,9 @@ class Mediapart(BasicNewsRecipe):

 # -- print-version

-    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
-        [
-            (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
-            (r'\'', lambda match: '&rsquo;')
-        ]
-    ]
+    conversion_options = { 'smarten_punctuation' : True }

-    remove_tags    = [ dict(name='div', attrs={'class':'print-source_url'}) ]
+    remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}) ]

    def print_version(self, url):
        raw = self.browser.open(url).read()
@ -55,3 +52,11 @@ class Mediapart(BasicNewsRecipe):
            br['pass'] = self.password
            br.submit()
        return br
+
+    def preprocess_html(self, soup):
+        for title in soup.findAll('p', {'class':'titre_page'}):
+            title.name = 'h3'
+        for legend in soup.findAll('span', {'class':'legend'}):
+            legend.insert(0, Tag(soup, 'br', []))
+            legend.name = 'small'
+        return soup
--- a/recipes/racjonalista_pl.recipe
+++ b/recipes/racjonalista_pl.recipe
@ -0,0 +1,54 @@
+__copyright__ = '2012, Micha\u0142 <webmaster@racjonalista.pl>'
+'''
+Racjonalista.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Racjonalista(BasicNewsRecipe):
+    __author__     = u'Micha\u0142 <webmaster@racjonalista.pl>'
+    publisher      = u'Fundacja Wolnej My\u015bli'
+    title          = u'Racjonalista.pl'
+    description    = u'Racjonalista.pl'
+    category       = 'newspaper'
+    language = 'pl'
+    encoding = 'iso-8859-2'
+    oldest_article = 7
+    max_articles_per_feed = 20
+    remove_javascript    = True
+    no_stylesheets       = True
+    use_embedded_content = False
+    simultaneous_downloads = 2
+    timeout = 30
+    cover_url      = 'http://www.racjonalista.pl/img/uimg/rac.gif'
+
+    feeds = [(u'Racjonalista.pl', u'http://www.racjonalista.pl/rss.php')]
+
+    match_regexps = [r'kk\.php']
+
+    def print_version(self, url):
+        return url.replace('/s,', '/t,')
+
+    extra_css = 'h2 {font: serif large} .cytat {text-align: right}'
+
+    remove_attributes = ['target', 'width', 'height']
+
+    preprocess_regexps = [
+        (re.compile(i[0], re.DOTALL), i[1]) for i in
+            [ (r'<p[^>]*>&nbsp;</p>', lambda match: ''),
+              (r'&nbsp;', lambda match: ' '),
+              (r'<meta[^>]+>', lambda match: ''),
+              (r'<link[^>]+>', lambda match: ''),
+              (r'</?center>', lambda match: ''),
+              (r'<a href="[^"]+" rel=author><b>(?P<a>[^<]+)</b></a>', lambda match: '<b>' + match.group('a') + '</b>'),
+              (r'<div align=center style="font-size:18px">(?P<t>[^<]+)</div>', lambda match: '<h2>' + match.group('t') + '</h2>'),
+              (r'<table align=center width=700 border=0 cellpadding=0 cellspacing=0><tr><td width="100%" bgcolor="#edeceb" height="100%" style="font-size:12px">', lambda match: ''),
+              (r'</td></tr><tr><td>', lambda match: ''),
+              (r'</td></tr></table></body>', lambda match: '</body>'),
+              (r'<a[^>]+><sup>(?P<p>[^<]+)</sup></a>', lambda match: '<sup>' + match.group('p') + '</sup>'),
+              (r'<a name=p[^>]+>(?P<a>[^<]+)</a>', lambda match: match.group('a')),
+              (r'<a href="[^"]+" target=_blank class=linkext>Orygin[^<]+</a>', lambda match: ''),
+              (r'<a href="[^"]+" class=powiazanie>Poka[^<]+</a>', lambda match: '')]
+    ]
+
--- a/recipes/rue89.recipe
+++ b/recipes/rue89.recipe
@ -1,13 +1,11 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Louis Gesbert <meta at antislash dot info>'
+__copyright__ = '2010-2012, Louis Gesbert <meta at antislash dot info>'
 '''
 Rue89
 '''

-__author__ = '2010, Louis Gesbert <meta at antislash dot info>'
+__author__ = '2010-2012, Louis Gesbert <meta at antislash dot info>'

-import re
-from calibre.ebooks.BeautifulSoup import Tag
 from calibre.web.feeds.news import BasicNewsRecipe

 class Rue89(BasicNewsRecipe):
@ -17,37 +15,45 @@ class Rue89(BasicNewsRecipe):
    title = u'Rue89'
    language = 'fr'
    oldest_article = 7
-    max_articles_per_feed = 50
+    max_articles_per_feed = 12

-    feeds = [(u'La Une', u'http://www.rue89.com/homepage/feed')]
+    use_embedded_content = False
+
+    # From http://www.rue89.com/les-flux-rss-de-rue89
+    feeds = [
+        (u'La Une',    u'http://www.rue89.com/feed'),
+        (u'Rue69',     u'http://www.rue89.com/rue69/feed'),
+        (u'Eco',       u'http://www.rue89.com/rue89-eco/feed'),
+        (u'Planète',   u'http://www.rue89.com/rue89-planete/feed'),
+        (u'Sport',     u'http://www.rue89.com/rue89-sport/feed'),
+        (u'Culture',   u'http://www.rue89.com/culture/feed'),
+        (u'Hi-tech',   u'http://www.rue89.com/hi-tech/feed'),
+        (u'Media',     u'http://www.rue89.com/medias/feed'),
+        (u'Monde',     u'http://www.rue89.com/monde/feed'),
+        (u'Politique', u'http://www.rue89.com/politique/feed'),
+        (u'Societe',   u'http://www.rue89.com/societe/feed'),
+    ]
+
+    # Follow redirection from feedsportal.com
+    def get_article_url(self,article):
+        return self.browser.open_novisit(article.link).geturl()
+
+    def print_version(self, url):
+        return url + '?imprimer=1'

    no_stylesheets = True

-    preprocess_regexps = [
-        (re.compile(r'<(/?)h2>', re.IGNORECASE|re.DOTALL),
-         lambda match : '<'+match.group(1)+'h3>'),
-        (re.compile(r'<div class="print-title">([^>]+)</div>', re.IGNORECASE|re.DOTALL),
-         lambda match : '<h2>'+match.group(1)+'</h2>'),
-        (re.compile(r'<img[^>]+src="[^"]*/numeros/(\d+)[^0-9.">]*.gif"[^>]*/>', re.IGNORECASE|re.DOTALL),
-         lambda match : '<span style="font-family: Sans-serif; color: red; font-size:24pt; padding=2pt;">'+match.group(1)+'</span>'),
-        (re.compile(r'\''), lambda match: '&rsquo;'),
-        ]
+    conversion_options = { 'smarten_punctuation' : True }

-    def preprocess_html(self,soup):
-        body = Tag(soup, 'body')
-        title = soup.find('h1', {'class':'title'})
-        content = soup.find('div', {'class':'content'})
-        soup.body.replaceWith(body)
-        body.insert(0, title)
-        body.insert(1, content)
-        return soup
+    keep_only_tags = [
+        dict(name='div', attrs={'id':'article'}),
+    ]

-    remove_tags = [ #dict(name='div', attrs={'class':'print-source_url'}),
-                    #dict(name='div', attrs={'class':'print-links'}),
-                    #dict(name='img', attrs={'class':'print-logo'}),
-                    dict(name='div', attrs={'class':'content_top'}),
-                    dict(name='div', attrs={'id':'sidebar-left'}), ]
+    remove_tags_after = [
+        dict(name='div', attrs={'id':'plus_loin'}),
+    ]

-# -- print-version has poor quality on this website, better do the conversion ourselves
-#    def print_version(self, url):
-#        return re.sub('^.*-([0-9]+)$', 'http://www.rue89.com/print/\\1',url)
+    remove_tags = [
+        dict(name='div', attrs={'id':'article_tools'}),
+        dict(name='div', attrs={'id':'plus_loin'}),
+    ]
--- a/recipes/satmagazine.recipe
+++ b/recipes/satmagazine.recipe
@ -0,0 +1,155 @@
+#!/usr/bin/env  python
+##
+## Title:        SatMagazine
+##
+## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
+##
+## Written:      Feb 2012
+## Last Edited:  Mar 2012
+##
+
+# Feb 2012: Initial release
+
+__license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
+
+'''
+satmagazine.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SatMagazine(BasicNewsRecipe):
+
+    title            = u'SatMagazine'
+    description      = u'North American Satellite Markets...'
+    publisher        = 'Satnews Publishers'
+    publication_type = 'magazine'
+    INDEX            = 'http://www.satmagazine.com/cgi-bin/display_edition.cgi'
+    __author__ = 'kiavash'
+
+    language = 'en'
+    asciiize = True
+    timeout = 120
+    simultaneous_downloads = 2
+
+    # Flattens all the tables to make it compatible with Nook
+    conversion_options = {'linearize_tables' : True}
+
+    keep_only_tags = [dict(name='span', attrs={'class':'story'})]
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
+                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
+
+    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
+    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
+                 .introduction, .first { font-weight: bold; } \
+                 .cross-head { font-weight: bold; font-size: 125%; } \
+                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
+                 .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
+                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
+                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
+                    font-size: 80%; font-style: italic; margin: 1px auto; } \
+                 .story-date, .published { font-size: 80%; } \
+                 table { width: 100%; } \
+                 td img { display: block; margin: 5px auto; } \
+                 ul { padding-top: 10px; } \
+                 ol { padding-top: 10px; } \
+                 li { padding-top: 5px; padding-bottom: 5px; } \
+                 h1 { font-size: 175%; font-weight: bold; } \
+                 h2 { font-size: 150%; font-weight: bold; } \
+                 h3 { font-size: 125%; font-weight: bold; } \
+                 h4, h5, h6 { font-size: 100%; font-weight: bold; }'
+
+    # Remove the line breaks, href links and float left/right and picture width/height.
+    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
+                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
+                              (re.compile(r'<a.*?>'), lambda h1: ''),
+                              (re.compile(r'</a>'), lambda h2: ''),
+                              (re.compile(r'float:.*?'), lambda h3: ''),
+                              (re.compile(r'width:.*?px'), lambda h4: ''),
+                              (re.compile(r'height:.*?px'), lambda h5: '')
+                              ]
+
+    def parse_index(self):
+
+        article_info = []
+        feeds = []
+
+        soup = self.index_to_soup(self.INDEX)
+
+        # Find Cover image
+        cover = soup.find('img', src=True, alt='Cover Image')
+        if cover is not None:
+            self.cover_url = cover['src']
+            self.log('Found Cover image:', self.cover_url)
+
+        soup = soup.find('div', attrs={'id':'middlecontent'}) # main part of the site that has the articles
+
+        #Find the Magazine date
+        ts = soup.find('span', attrs={'class':'master_heading'}) # contains the string with the date
+        ds = ' '.join(self.tag_to_string(ts).strip().split()[:2])
+        self.log('Found Current Issue:', ds)
+        self.timefmt = ' [%s]'%ds
+
+        #sections = soup.findAll('span', attrs={'class':'upper_heading'})
+
+        articles = soup.findAll('span', attrs={'class':'heading'})
+
+        descriptions = soup.findAll('span', attrs={'class':'story'})
+
+        title_number = 0
+
+        # Goes thru all the articles one by one and sort them out
+        for article in articles:
+
+            title = self.tag_to_string(article)
+            url = article.find('a').get('href')
+
+            self.log('\tFound article:', title, 'at', url)
+            desc = self.tag_to_string(descriptions[title_number])
+            #self.log('\t\t', desc)
+
+            article_info.append({'title':title, 'url':url, 'description':desc,
+                                'date':self.timefmt})
+
+            title_number = title_number + 1
+
+        if article_info:
+            feeds.append((self.title, article_info))
+
+        return feeds
+
+    def preprocess_html(self, soup):
+
+        # Finds all the images
+        for figure in soup.findAll('img', attrs = {'src' : True}):
+
+            # if the image is an ad then remove it.
+            if (figure['alt'].find('_ad_') >=0) or (figure['alt'].find('_snipe_') >=0):
+              del figure['src']
+              del figure['alt']
+              del figure['border']
+              del figure['hspace']
+              del figure['vspace']
+              del figure['align']
+              del figure['size']
+              figure.name = 'font'
+              continue
+
+            figure['style'] = 'display:block' # adds /n before and after the image
+
+        # Makes the title standing out
+        for title in soup.findAll('b'):
+            title.name = 'h3'
+
+        # Removes all unrelated links
+        for link in soup.findAll('a', attrs = {'href': True}):
+            link.name = 'font'
+            del link['href']
+            del link['target']
+
+        return soup
--- a/recipes/sueddeutsche.recipe
+++ b/recipes/sueddeutsche.recipe
@ -11,7 +11,7 @@ class Sueddeutsche(BasicNewsRecipe):
    title                 = u'Süddeutsche.de'                 # 2012-01-26 AGe Correct Title
    description           = 'News from Germany, Access to online content' # 2012-01-26 AGe
    __author__            = 'Oliver Niesner and Armin Geller' #Update AGe 2012-01-26
-    publisher             = 'Süddeutsche Zeitung'             # 2012-01-26 AGe add
+    publisher             = u'Süddeutsche Zeitung'             # 2012-01-26 AGe add
    category              = 'news, politics, Germany'         # 2012-01-26 AGe add
    timefmt               = ' [%a, %d %b %Y]'                 # 2012-01-26 AGe add %a
    oldest_article        = 7
--- a/recipes/sueddeutschezeitung.recipe
+++ b/recipes/sueddeutschezeitung.recipe
@ -9,10 +9,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre import strftime

 class SueddeutcheZeitung(BasicNewsRecipe):
-    title                  = 'Süddeutsche Zeitung'
+    title                  = u'Süddeutsche Zeitung'
    __author__             = 'Darko Miletic'
    description            = 'News from Germany. Access to paid content.'
-    publisher              = 'Süddeutsche Zeitung'
+    publisher              = u'Süddeutsche Zeitung'
    category               = 'news, politics, Germany'
    no_stylesheets         = True
    oldest_article         = 2
--- a/resources/calibre-portable.bat
+++ b/resources/calibre-portable.bat
@ -1,7 +1,10 @@
@echo OFF
+REM			Calibre-Portable.bat
+REM			¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬
+REM
 REM Batch File to start a Calibre configuration on Windows
 REM giving explicit control of the location of:
-REM  - Calibe Program Files
+REM  - Calibre Program Files
 REM  - Calibre Library Files
 REM  - Calibre Config Files
 REM  - Calibre Metadata database
@ -25,6 +28,19 @@ REM  - CalibreSource 		Location of Calibre Source files (Optional)
 REM
 REM This batch file is designed so that if you create the recommended
 REM folder structure then it can be used 'as is' without modification.
+REM
+REM More information on the Environment Variables used by Calibre can
+REM be found at:
+REM	http://manual.calibre-ebook.com/customize.html#environment-variables
+REM
+REM The documentation for this file in the Calibre manual can be found at:
+REM	http://manual.calibre-ebook.com/portable.html
+REM
+REM CHANGE HISTORY
+REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬
+REM 22 Jan 2012	itimpi	- Updated to keep it in line with the calibre-portable.sh
+REM			  file for Linux systems
+


 REM -------------------------------------
@ -36,7 +52,7 @@ REM -------------------------------------

 IF EXIST CalibreConfig (
 	SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
-	ECHO CONFIG FILES:    %cd%\CalibreConfig
+	ECHO CONFIG FILES:       %cd%\CalibreConfig
 )


@ -54,11 +70,11 @@ REM --------------------------------------------------------------

 IF EXIST U:\eBooks\CalibreLibrary (
 	SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
-	ECHO LIBRARY FILES:   U:\eBOOKS\CalibreLibrary
+	ECHO LIBRARY FILES:      U:\eBOOKS\CalibreLibrary
 )
 IF EXIST CalibreLibrary (
 	SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
-	ECHO LIBRARY FILES:   %cd%\CalibreLibrary
+	ECHO LIBRARY FILES:      %cd%\CalibreLibrary
 )


@ -67,20 +83,23 @@ REM Specify Location of metadata database (optional)
 REM
 REM Location where the metadata.db file is located.  If not set
 REM the same location as Books files will be assumed.  This.
-REM options is used to get better performance when the Library is
-REM on a (slow) network drive.  Putting the metadata.db file 
-REM locally then makes gives a big performance improvement.
+REM option is typically set to get better performance when the
+REM Library is on a (slow) network drive.  Putting the metadata.db 
+REM file locally then makes gives a big performance improvement.
 REM
 REM NOTE.  If you use this option, then the ability to switch
 REM        libraries within Calibre will be disabled.  Therefore
 REM        you do not want to set it if the metadata.db file
 REM        is at the same location as the book files.
+REM
+REM        Another point to watch is that plugins can cause problems
+REM        as they often store absolute path information
 REM --------------------------------------------------------------

 IF EXIST %cd%\CalibreMetadata\metadata.db (
 	IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" (
 		SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
-		ECHO DATABASE:        %cd%\CalibreMetadata\metadata.db
+		ECHO DATABASE:           %cd%\CalibreMetadata\metadata.db
 		ECHO '
 		ECHO ***CAUTION*** Library Switching will be disabled 
 		ECHO '
@ -94,61 +113,79 @@ REM It is easy to run Calibre from source
 REM Just set the environment variable to where the source is located
 REM When running from source the GUI will have a '*' after the version.
 REM number that is displayed at the bottom of the Calibre main screen.
+REM
+REM More information on setting up a development environment can
+REM be found at:
+REM	http://manual.calibre-ebook.com/develop.html#develop
 REM --------------------------------------------------------------

 IF EXIST CalibreSource\src (
 	SET CALIBRE_DEVELOP_FROM=%cd%\CalibreSource\src
-	ECHO SOURCE FILES:    %cd%\CalibreSource\src
+	ECHO SOURCE FILES:       %cd%\CalibreSource\src
+) ELSE (
+	ECHO SOURCE FILES:       *** Not being Used ***
 )


 REM --------------------------------------------------------------
-REM Specify Location of calibre binaries (optional)
+REM Specify Location of calibre Windows binaries (optional)
 REM
 REM To avoid needing Calibre to be set in the search path, ensure
 REM that Calibre Program Files is current directory when starting.
 REM The following test falls back to using search path .
 REM This folder can be populated by copying the Calibre2 folder from
 REM an existing installation or by installing direct to here.
+REM
+REM NOTE.  Do not try and put both Windows and Linux binaries into
+REM	   same folder as this can cause problems.
 REM --------------------------------------------------------------

 IF EXIST %cd%\Calibre2 (
 	CD %cd%\Calibre2
-	ECHO PROGRAM FILES:   %cd%
+	ECHO PROGRAM FILES:      %cd%
+) ELSE (
+	ECHO PROGRAM FILES:      *** Use System search PATH ***
 )


 REM --------------------------------------------------------------
 REM Location of Calibre Temporary files  (optional)
 REM
-REM Calibre creates a lot of temproary files while running
+REM Calibre creates a lot of temporary files while running
 REM In theory these are removed when Calibre finishes, but
 REM in practise files can be left behind (particularily if
-REM any errors occur.  Using this option allows some
+REM any errors occur).  Using this option allows some
 REM explicit clean-up of these files.
 REM If not set Calibre uses the normal system TEMP location
 REM --------------------------------------------------------------

 SET CALIBRE_TEMP_DIR=%TEMP%\CALIBRE_TEMP
-ECHO TEMPORARY FILES: %CALIBRE_TEMP_DIR%
+ECHO TEMPORARY FILES:    %CALIBRE_TEMP_DIR%

-IF NOT "%CALIBRE_TEMP_DIR%" == "" (
-	IF EXIST "%CALIBRE_TEMP_DIR%" RMDIR /s /q "%CALIBRE_TEMP_DIR%"
-	MKDIR "%CALIBRE_TEMP_DIR%"
-	REM set the following for any components that do
-	REM not obey the CALIBRE_TEMP_DIR setting
-	SET TMP=%CALIBRE_TEMP_DIR%
-	SET TEMP=%CALIBRE_TEMP_DIR%
-)
+IF EXIST "%CALIBRE_TEMP_DIR%" RMDIR /s /q "%CALIBRE_TEMP_DIR%"
+MKDIR "%CALIBRE_TEMP_DIR%"
+REM set the following for any components that do
+REM not obey the CALIBRE_TEMP_DIR setting
+SET TMP=%CALIBRE_TEMP_DIR%
+SET TEMP=%CALIBRE_TEMP_DIR%


+REM --------------------------------------------------------------
+REM Set the Interface language (optional)
+REM
+REM If not set Calibre uses the language set in Preferences 
+REM --------------------------------------------------------------
+
+SET CALIBRE_OVERRIDE_LANG=EN
+ECHO INTERFACE LANGUAGE: %CALIBRE_OVERRIDE_LANG%
+
 REM ----------------------------------------------------------
 REM  The following gives a chance to check the settings before
 REM  starting Calibre.  It can be commented out if not wanted.
 REM ----------------------------------------------------------

 ECHO '
-ECHO "Press CTRL-C if you do not want to continue"
+ECHO Press CTRL-C if you do not want to continue
 PAUSE


@ -160,11 +197,12 @@ REM responsive while Calibre is running.  Within Calibre itself
 REM the backgound processes should be set to run with 'low' priority.

 REM Using the START command starts up Calibre in a separate process.
-REM If used without /WAIT opotion launches Calibre and contines batch file.
+REM If used without /WAIT option it launches Calibre and contines batch file.
+REM normally this would simply run off the end and close the Command window.
 REM Use with /WAIT to wait until Calibre completes to run a task on exit
 REM --------------------------------------------------------

 ECHO "Starting up Calibre"
 ECHO OFF
 ECHO %cd%
-START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%"
+START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%"
--- a/resources/calibre-portable.sh
+++ b/resources/calibre-portable.sh
@ -0,0 +1,220 @@
+#!/bin/sh
+#			Calibre-Portable.sh
+#			¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬
+#
+# Shell script File to start a Calibre configuration on Linux
+# giving explicit control of the location of:
+#  - Calibre Program Files
+#  - Calibre Library Files
+#  - Calibre Config Files
+#  - Calibre Metadata database
+#  - Calibre Source files
+#  - Calibre Temp Files
+# By setting the paths correctly it can be used to run:
+#  - A "portable calibre" off a USB stick.
+#  - A network installation with local metadata database
+#    (for performance) and books stored on a network share 
+#  - A local installation using customised settings
+#
+# If trying to run off a USB stick then the folder structure
+# shown below is recommended (relative to the location of 
+# this script file).  This can structure can also be used
+# when running of a local hard disk if you want to get the
+# level of control this script file provides.
+#  - Calibre			Location of linux program files
+#  - CalibreConfig		Location of Configuration files
+#  - CalibreLibrary		Location of Books and metadata
+#  - CalibreSource 		Location of Calibre Source files (Optional)
+#
+# This script file is designed so that if you create the recommended
+# folder structure then it can be used 'as is' without modification.
+#
+# More information on the Environment Variables used by Calibre can
+# be found at:
+#	http://manual.calibre-ebook.com/customize.html#environment-variables
+#
+# The documentation for this file in the Calibre manual can be found at:
+#	http://manual.calibre-ebook.com/portable.html
+#
+# NOTE: It is quite possible to have both Windows and Linux binaries on the same
+#	USB stick but set up to use the same calibre settings otherwise.
+#	In this case you use:
+#	- calibre-portable.bat		to run the Windows version
+#	= calibre-portable.sh		to run the Linux version
+#
+# CHANGE HISTORY
+# ¬¬¬¬¬¬¬¬¬¬¬¬¬¬
+# 22 Jan 2012	itimpi	- First version based on the calibre-portable.bat file for Windows
+#			  It should have identical functionality but for a linux environment.
+#			  It might work on MacOS but that has not been validated
+
+
+# -------------------------------------
+# Set up Calibre Config folder
+#
+# This is where user specific settings
+# are stored.
+# -------------------------------------
+
+if [ -d CalibreConfig ]
+then
+	CALIBRE_CONFIG_DIRECTORY=`pwd`/CalibreConfig
+	echo "CONFIG FILES:       "`pwd`"/CalibreConfig"
+	export CALIBRE_CONFIG_DIRECTORY
+fi
+
+
+# --------------------------------------------------------------
+# Specify Location of ebooks
+#
+# Location where Book files are located
+# Either set explicit path, or if running from a USB stick
+# a relative path can be used to avoid need to know the
+# drive letter of the USB stick.
+#
+# Comment out any of the following that are not to be used
+# (although leaving them in does not really matter)
+# --------------------------------------------------------------
+
+if [ -d /eBooks/CalibreLibrary ]
+then
+	SET CALIBRE_LIBRARY_DIRECTORY=/eBOOKS/CalibreLibrary
+	echo "LIBRARY FILES:      /eBOOKS/CalibreLibrary"
+	export LIBRARY_FILES
+fi
+if [ -d `pwd`/CalibreLibrary ]
+then
+	CALIBRE_LIBRARY_DIRECTORY=`pwd`/CalibreLibrary
+	echo "LIBRARY FILES:      "`pwd`"/CalibreLibrary"
+	export LIBRARY_FILES
+fi
+
+
+# --------------------------------------------------------------
+# Specify Location of metadata database (optional)
+#
+# Location where the metadata.db file is located.  If not set
+# then the  same location as Books files will be assumed.  This.
+# options is typically used to get better performance when the
+# Library is on a (slow) network drive.  Putting the metadata.db
+# file locally then makes gives a big performance improvement.
+#
+# NOTE.  If you use this option, then the ability to switch
+#        libraries within Calibre will be disabled.  Therefore
+#        you do not want to set it if the metadata.db file
+#        is at the same location as the book files.
+#
+#	 Another point to watch is that plugins can cause problems
+#	 as they often store absolute path information
+# --------------------------------------------------------------
+
+if [ -d  `pwd`/CalibreMetadata/metadata.db ]
+then
+	if [ $CALIBRE_LIBRARY_DIRECTORY != `pwd`/CalibreMetadata ]
+	then
+		CALIBRE_OVERRIDE_DATABASE_PATH=`pwd`/CalibreMetadata/metadata.db
+		echo DATABASE:        `pwd`"/CalibreMetadata/metadata.db"
+		export CALIBRE_OVERRIDE_DATABASE
+		echo 
+		echo "***CAUTION*** Library Switching will be disabled" 
+		echo 
+	fi
+fi
+
+# --------------------------------------------------------------
+# Specify Location of source (optional)
+#
+# It is easy to run Calibre from source
+# Just set the environment variable to where the source is located
+# When running from source the GUI will have a '*' after the version.
+# number that is displayed at the bottom of the Calibre main screen.
+#
+# More information on setting up a development environment can
+# be found at:
+#	http://manual.calibre-ebook.com/develop.html#develop
+# --------------------------------------------------------------
+
+if [ -d  CalibreSource/src ]
+then
+	CALIBRE_DEVELOP_FROM=`pwd`/CalibreSource/src
+	echo "SOURCE FILES:       "`pwd`"/CalibreSource/src"
+	export CALIBRE_DEVELOP_FROM
+else
+	echo "SOURCE FILES:       *** Not being Used ***"
+fi
+
+
+
+# --------------------------------------------------------------
+# Specify Location of calibre linux binaries (optional)
+#
+# To avoid needing Calibre to be set in the search path, ensure
+# that Calibre Program Files is current directory when starting.
+# The following test falls back to using search path.
+#
+# This folder can be populated by copying the /opt/calibre folder
+# from an existing installation or by installing direct to here.
+#
+# NOTE.  Do not try and put both Windows and Linux binaries into
+#	 same folder as this can cause problems.
+# --------------------------------------------------------------
+
+if [ -d  `pwd`/Calibre ]
+then
+	cd `pwd`/Calibre
+	echo "PROGRAM FILES:      "`pwd`
+else
+	echo "PROGRAM FILES:      *** Using System search path ***"
+fi
+
+
+# --------------------------------------------------------------
+# Location of Calibre Temporary files  (optional)
+#
+# Calibre creates a lot of temporary files while running
+# In theory these are removed when Calibre finishes, but
+# in practise files can be left behind (particularly if
+# a crash occurs).  Using this option allows some
+# explicit clean-up of these files.
+# If not set Calibre uses the normal system TEMP location
+# --------------------------------------------------------------
+
+CALIBRE_TEMP_DIR=/tmp/CALIBRE_TEMP
+echo "TEMPORARY FILES:    $CALIBRE_TEMP_DIR"
+
+if [ -d  "$CALIBRE_TEMP_DIR" ]
+then
+	rm -fr "$CALIBRE_TEMP_DIR"
+fi
+mkdir "$CALIBRE_TEMP_DIR"
+# set the following for any components that do
+# not obey the CALIBRE_TEMP_DIR setting
+
+
+# --------------------------------------------------------------
+# Set the Interface language (optional)
+#
+# If not set Calibre uses the language set in Preferences
+# --------------------------------------------------------------
+
+CALIBRE_OVERRIDE_LANG=EN
+echo "INTERFACE LANGUAGE: $CALIBRE_OVERRIDE_LANG"
+export CALIBRE_OVERRIDE_LANG
+
+# ----------------------------------------------------------
+#  The following gives a chance to check the settings before
+#  starting Calibre.  It can be commented out if not wanted.
+# ----------------------------------------------------------
+
+echo 
+echo "Press CTRL-C if you do not want to continue"
+echo "Press ENTER to continue and start Calibre"
+read DUMMY
+
+# --------------------------------------------------------
+# Start up the calibre program.
+# --------------------------------------------------------
+
+echo "Starting up Calibre"
+echo `pwd`
+calibre --with-library "$CALIBRE_LIBRARY_DIRECTORY"
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -496,8 +496,13 @@ gui_view_history_size = 15
 # prefer HTMLZ to EPUB for tweaking, change this to 'htmlz'
 tweak_book_prefer = 'epub'

-#: Compile General Program Mode templates to Python
+#: Change the font size of book details in the interface
+# Change the font size at which book details are rendered in the side panel and
+# comments are rendered in the metadata edit dialog. Set it to a positive or
+# negative number to increase or decrease the font size.
+change_book_details_font_size_by = 0

+#: Compile General Program Mode templates to Python
 # Compiled general program mode templates are significantly faster than
 # interpreted templates. Setting this tweak to True causes calibre to compile
 # (in most cases) general program mode templates. Setting it to False causes
--- a/resources/fonts/calibreSymbols.otf
+++ b/resources/fonts/calibreSymbols.otf
--- a/resources/images/lt.png
+++ b/resources/images/lt.png
--- a/setup/installer/windows/portable.c
+++ b/setup/installer/windows/portable.c
@ -53,7 +53,7 @@ void show_last_error(LPCTSTR preamble) {
        NULL,
        dw,
        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-        &msg,
+        (LPTSTR)&msg,
        0, NULL );

    show_detailed_error(preamble, msg, (int)dw);
@ -136,7 +136,7 @@ void launch_calibre(LPCTSTR exe, LPCTSTR config_dir, LPCTSTR library_dir) {

 int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PWSTR pCmdLine, int nCmdShow)
 {
-    LPTSTR app_dir, config_dir, exe, library_dir;
+    LPTSTR app_dir, config_dir, exe, library_dir, too_long;

    app_dir = get_app_dir();
    config_dir = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
@ -147,7 +147,15 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PWSTR pCmdLine
    _sntprintf_s(exe, BUFSIZE, _TRUNCATE, _T("%sCalibre\\calibre.exe"), app_dir);
    _sntprintf_s(library_dir, BUFSIZE, _TRUNCATE, _T("%sCalibre Library"), app_dir);

-    launch_calibre(exe, config_dir, library_dir);
+    if ( _tcscnlen(library_dir, BUFSIZE) <= 74 ) {
+        launch_calibre(exe, config_dir, library_dir);
+    } else {
+        too_long = (LPTSTR)calloc(BUFSIZE+300, sizeof(TCHAR));
+        _sntprintf_s(too_long, BUFSIZE+300, _TRUNCATE, 
+                _T("Path to Calibre Portable (%s) too long. Must be less than 59 characters."), app_dir);
+
+        show_error(too_long);
+    }

    free(app_dir); free(config_dir); free(exe); free(library_dir);

--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
@ -18,14 +18,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-01-14 02:30+0000\n"
-"Last-Translator: Wolfgang Rohdewald <wolfgang@rohdewald.de>\n"
+"PO-Revision-Date: 2012-03-05 19:08+0000\n"
+"Last-Translator: Dennis Baudys <Unknown>\n"
 "Language-Team: German <debian-l10n-german@lists.debian.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-01-15 05:18+0000\n"
-"X-Generator: Launchpad (build 14664)\n"
+"X-Launchpad-Export-Date: 2012-03-06 04:47+0000\n"
+"X-Generator: Launchpad (build 14900)\n"
 "Language: de\n"

 #. name for aaa
@ -5871,7 +5871,7 @@ msgstr ""

 #. name for cym
 msgid "Welsh"
-msgstr "Kymrisch"
+msgstr "Walisisch"

 #. name for cyo
 msgid "Cuyonon"
--- a/setup/iso_639/en_GB.po
+++ b/setup/iso_639/en_GB.po
--- a/setup/iso_639/eu.po
+++ b/setup/iso_639/eu.po
@ -9,67 +9,67 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-09-27 15:37+0000\n"
-"Last-Translator: Piarres Beobide <pi@beobide.net>\n"
+"PO-Revision-Date: 2012-03-06 13:55+0000\n"
+"Last-Translator: Asier Iturralde Sarasola <Unknown>\n"
 "Language-Team: Euskara <itzulpena@comtropos.com>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-11-26 05:07+0000\n"
-"X-Generator: Launchpad (build 14381)\n"
+"X-Launchpad-Export-Date: 2012-03-07 05:12+0000\n"
+"X-Generator: Launchpad (build 14907)\n"
 "Language: eu\n"

 #. name for aaa
 msgid "Ghotuo"
-msgstr ""
+msgstr "Ghotuo"

 #. name for aab
 msgid "Alumu-Tesu"
-msgstr ""
+msgstr "Alumu-Tesu"

 #. name for aac
 msgid "Ari"
-msgstr ""
+msgstr "Ari"

 #. name for aad
 msgid "Amal"
-msgstr ""
+msgstr "Amal"

 #. name for aae
 msgid "Albanian; Arbëreshë"
-msgstr ""
+msgstr "Albaniera; Arbëreshë"

 #. name for aaf
 msgid "Aranadan"
-msgstr ""
+msgstr "Aranadan"

 #. name for aag
 msgid "Ambrak"
-msgstr ""
+msgstr "Ambrak"

 #. name for aah
 msgid "Arapesh; Abu'"
-msgstr ""
+msgstr "Arapesh; Abu'"

 #. name for aai
 msgid "Arifama-Miniafia"
-msgstr ""
+msgstr "Arifama-Miniafia"

 #. name for aak
 msgid "Ankave"
-msgstr ""
+msgstr "Ankave"

 #. name for aal
 msgid "Afade"
-msgstr ""
+msgstr "Afade"

 #. name for aam
 msgid "Aramanik"
-msgstr ""
+msgstr "Aramanik"

 #. name for aan
 msgid "Anambé"
-msgstr ""
+msgstr "Anambé"

 #. name for aao
 msgid "Arabic; Algerian Saharan"
@ -77,107 +77,107 @@ msgstr ""

 #. name for aap
 msgid "Arára; Pará"
-msgstr ""
+msgstr "Arára; Pará"

 #. name for aaq
 msgid "Abnaki; Eastern"
-msgstr ""
+msgstr "Abnaki; Ekialdekoa"

 #. name for aar
 msgid "Afar"
-msgstr ""
+msgstr "Afarera"

 #. name for aas
 msgid "Aasáx"
-msgstr ""
+msgstr "Aasáx"

 #. name for aat
 msgid "Albanian; Arvanitika"
-msgstr ""
+msgstr "Albaniera; Arvanitika"

 #. name for aau
 msgid "Abau"
-msgstr ""
+msgstr "Abau"

 #. name for aaw
 msgid "Solong"
-msgstr ""
+msgstr "Solong"

 #. name for aax
 msgid "Mandobo Atas"
-msgstr ""
+msgstr "Mandobo Atas"

 #. name for aaz
 msgid "Amarasi"
-msgstr ""
+msgstr "Amarasi"

 #. name for aba
 msgid "Abé"
-msgstr ""
+msgstr "Abé"

 #. name for abb
 msgid "Bankon"
-msgstr ""
+msgstr "Bankon"

 #. name for abc
 msgid "Ayta; Ambala"
-msgstr ""
+msgstr "Ayta; Ambala"

 #. name for abd
 msgid "Manide"
-msgstr ""
+msgstr "Manide"

 #. name for abe
 msgid "Abnaki; Western"
-msgstr ""
+msgstr "Abnaki; Mendebaldekoa"

 #. name for abf
 msgid "Abai Sungai"
-msgstr ""
+msgstr "Abai Sungai"

 #. name for abg
 msgid "Abaga"
-msgstr ""
+msgstr "Abaga"

 #. name for abh
 msgid "Arabic; Tajiki"
-msgstr ""
+msgstr "Arabiera; Tajiki"

 #. name for abi
 msgid "Abidji"
-msgstr ""
+msgstr "Abidji"

 #. name for abj
 msgid "Aka-Bea"
-msgstr ""
+msgstr "Aka-Bea"

 #. name for abk
 msgid "Abkhazian"
-msgstr ""
+msgstr "Abkhazera"

 #. name for abl
 msgid "Lampung Nyo"
-msgstr ""
+msgstr "Lampung Nyo"

 #. name for abm
 msgid "Abanyom"
-msgstr ""
+msgstr "Abanyom"

 #. name for abn
 msgid "Abua"
-msgstr ""
+msgstr "Abua"

 #. name for abo
 msgid "Abon"
-msgstr ""
+msgstr "Abon"

 #. name for abp
 msgid "Ayta; Abellen"
-msgstr ""
+msgstr "Ayta; Abellen"

 #. name for abq
 msgid "Abaza"
-msgstr ""
+msgstr "Abazera"

 #. name for abr
 msgid "Abron"
--- a/setup/iso_639/oc.po
+++ b/setup/iso_639/oc.po
--- a/setup/iso_639/sr.po
+++ b/setup/iso_639/sr.po
@ -8,23 +8,23 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-09-27 15:42+0000\n"
-"Last-Translator: Kovid Goyal <Unknown>\n"
+"PO-Revision-Date: 2012-03-03 21:35+0000\n"
+"Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
 "Language-Team: Serbian <gnu@prevod.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-11-26 05:36+0000\n"
-"X-Generator: Launchpad (build 14381)\n"
+"X-Launchpad-Export-Date: 2012-03-04 04:59+0000\n"
+"X-Generator: Launchpad (build 14886)\n"
 "Language: sr\n"

 #. name for aaa
 msgid "Ghotuo"
-msgstr ""
+msgstr "Ghotuo"

 #. name for aab
 msgid "Alumu-Tesu"
-msgstr ""
+msgstr "Alumu-Tesu"

 #. name for aac
 msgid "Ari"
@ -32,31 +32,31 @@ msgstr ""

 #. name for aad
 msgid "Amal"
-msgstr ""
+msgstr "Amal"

 #. name for aae
 msgid "Albanian; Arbëreshë"
-msgstr ""
+msgstr "Албански; Арбереше"

 #. name for aaf
 msgid "Aranadan"
-msgstr ""
+msgstr "Аранадан"

 #. name for aag
 msgid "Ambrak"
-msgstr ""
+msgstr "Амбрак"

 #. name for aah
 msgid "Arapesh; Abu'"
-msgstr ""
+msgstr "Арабеш; Абу'"

 #. name for aai
 msgid "Arifama-Miniafia"
-msgstr ""
+msgstr "Орифама-Миниафиа"

 #. name for aak
 msgid "Ankave"
-msgstr ""
+msgstr "Анкаве"

 #. name for aal
 msgid "Afade"
@ -72,7 +72,7 @@ msgstr ""

 #. name for aao
 msgid "Arabic; Algerian Saharan"
-msgstr ""
+msgstr "Арапски; Алжирска Сахара"

 #. name for aap
 msgid "Arára; Pará"
@ -84,7 +84,7 @@ msgstr ""

 #. name for aar
 msgid "Afar"
-msgstr "афар"
+msgstr "Афар"

 #. name for aas
 msgid "Aasáx"
@ -128,7 +128,7 @@ msgstr ""

 #. name for abe
 msgid "Abnaki; Western"
-msgstr ""
+msgstr "Абнаки; Западни"

 #. name for abf
 msgid "Abai Sungai"
@ -140,11 +140,11 @@ msgstr ""

 #. name for abh
 msgid "Arabic; Tajiki"
-msgstr ""
+msgstr "Арапски; Таџики"

 #. name for abi
 msgid "Abidji"
-msgstr ""
+msgstr "Абиџи"

 #. name for abj
 msgid "Aka-Bea"
@ -152,7 +152,7 @@ msgstr ""

 #. name for abk
 msgid "Abkhazian"
-msgstr "абкаски"
+msgstr "Абхазијски"

 #. name for abl
 msgid "Lampung Nyo"
@ -184,7 +184,7 @@ msgstr ""

 #. name for abs
 msgid "Malay; Ambonese"
-msgstr ""
+msgstr "Малајски; Амбонијски"

 #. name for abt
 msgid "Ambulas"
@ -228,15 +228,15 @@ msgstr ""

 #. name for ace
 msgid "Achinese"
-msgstr "акинески"
+msgstr "Акинески"

 #. name for acf
 msgid "Creole French; Saint Lucian"
-msgstr ""
+msgstr "Креолски француски; Сент Лусија"

 #. name for ach
 msgid "Acoli"
-msgstr "аколи"
+msgstr "Аколи"

 #. name for aci
 msgid "Aka-Cari"
--- a/setup/translations.py
+++ b/setup/translations.py
@ -151,7 +151,8 @@ class Translations(POT): # {{{
                    self.info('\tCopying ISO 639 translations')
                    subprocess.check_call(['msgfmt', '-o', dest, iso639])
            elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc',
-                    'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku'):
+                    'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku',
+                    'fr_CA'):
                self.warn('No ISO 639 translations for locale:', locale)

        self.write_stats()
--- a/setup/upload.py
+++ b/setup/upload.py
@ -132,12 +132,15 @@ class UploadInstallers(Command): # {{{
        with open(os.path.join(tdir, 'fmap'), 'wb') as fo:
            for f, desc in files.iteritems():
                fo.write('%s: %s\n'%(f, desc))
-        try:
-            send_data(tdir)
-        except:
-            print('\nUpload to staging failed, retrying in a minute')
-            time.sleep(60)
-            send_data(tdir)
+
+        while True:
+            try:
+                send_data(tdir)
+            except:
+                print('\nUpload to staging failed, retrying in a minute')
+                time.sleep(60)
+            else:
+                break

    def upload_to_google(self, replace):
        gdata = get_google_data()
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 41)
+numeric_version = (0, 8, 42)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

@ -199,7 +199,5 @@ def get_windows_temp_path():
    buf = ctypes.create_unicode_buffer(u'\0'*n)
    ctypes.windll.kernel32.GetTempPathW(n, buf)
    ans = buf.value
-    if ans[-1] == u'\\':
-        ans = ans[:-1]
    return ans if ans else None

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -263,7 +263,7 @@ class MOBIMetadataReader(MetadataReaderPlugin):
    description = _('Read metadata from %s files')%'MOBI'

    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.mobi.reader import get_metadata
+        from calibre.ebooks.metadata.mobi import get_metadata
        return get_metadata(stream)

 class ODTMetadataReader(MetadataReaderPlugin):
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -84,13 +84,14 @@ class ANDROID(USBMS):
                0x4e22 : [0x0100, 0x226, 0x227],
                0xb058 : [0x0222, 0x226, 0x227],
                0x0ff9 : [0x0226],
+                0xdddd : [0x216],
            },

            # Samsung
            0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
                       0x681c : [0x0222, 0x0223, 0x0224, 0x0400],
                       0x6640 : [0x0100],
-                       0x685b : [0x0400],
+                       0x685b : [0x0400, 0x0226],
                       0x685e : [0x0400],
                       0x6860 : [0x0400],
                       0x6877 : [0x0400],
@ -171,7 +172,7 @@ class ANDROID(USBMS):
            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
            'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
-            'POCKET', 'ONDA_MID']
+            'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -185,14 +186,15 @@ class ANDROID(USBMS):
            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
            'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
-            'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T']
+            'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
+            'KTABLET_PC', 'INGENIC']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
            '__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
            'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
            'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
-            'USB_2.0_DRIVER', 'I9100T']
+            'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -594,8 +594,9 @@ class ITUNES(DriverBase):
        iPad, as we have to return True if we can handle device interaction, or False if not.

        '''
+        import pythoncom
+
        if self.iTunes:
-            import pythoncom
            # We've previously run, so the user probably ejected the device
            try:
                pythoncom.CoInitialize()
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -10,7 +10,7 @@ Generates and writes an APNX page mapping file.

 import struct

-from calibre.ebooks.mobi.reader import MobiReader
+from calibre.ebooks.mobi.reader.mobi6 import MobiReader
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.utils.logging import default_log

--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -31,7 +31,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
                   'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'md',
-                   'textile', 'markdown']
+                   'textile', 'markdown', 'ibook', 'iba']

 class HTMLRenderer(object):

--- a/src/calibre/ebooks/conversion/plugins/djvu_input.py
+++ b/src/calibre/ebooks/conversion/plugins/djvu_input.py
@ -46,7 +46,7 @@ class DJVUInput(InputFormatPlugin):
            except:
                stream.seek(0) # retry with the pure python converter
        if ppdjvu:
-            from .djvu import DJVUFile
+            from calibre.ebooks.djvu.djvu import DJVUFile
            x = DJVUFile(stream)
            x.get_text(stdout)

--- a/src/calibre/ebooks/conversion/plugins/mobi_input.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py
@ -3,8 +3,26 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import os
+
 from calibre.customize.conversion import InputFormatPlugin

+def run_mobi_unpack(stream, options, log, accelerators):
+    from mobiunpack.mobi_unpack import Mobi8Reader
+    from calibre.customize.ui import plugin_for_input_format
+    from calibre.ptempfile import PersistentTemporaryDirectory
+
+    wdir = PersistentTemporaryDirectory('_unpack_space')
+    m8r = Mobi8Reader(stream, wdir)
+    if m8r.isK8():
+        epub_path = m8r.processMobi8()
+        epub_input = plugin_for_input_format('epub')
+        for opt in epub_input.options:
+            setattr(options, opt.option.name, opt.recommended_value)
+        options.input_encoding = m8r.getCodec()
+        return epub_input.convert(open(epub_path,'rb'), options,
+                'epub', log, accelerators)
+
 class MOBIInput(InputFormatPlugin):

    name        = 'MOBI Input'
@ -14,17 +32,34 @@ class MOBIInput(InputFormatPlugin):

    def convert(self, stream, options, file_ext, log,
                accelerators):
-        from calibre.ebooks.mobi.reader import MobiReader
+
+        if os.environ.get('USE_MOBIUNPACK', None) is not None:
+            pos = stream.tell()
+            try:
+                return run_mobi_unpack(stream, options, log, accelerators)
+            except Exception:
+                log.exception('mobi_unpack code not working')
+            stream.seek(pos)
+
+        from calibre.ebooks.mobi.reader.mobi6 import MobiReader
        from lxml import html
        parse_cache = {}
        try:
            mr = MobiReader(stream, log, options.input_encoding,
                        options.debug_pipeline)
-            mr.extract_content(u'.', parse_cache)
+            if mr.kf8_type is None:
+                mr.extract_content(u'.', parse_cache)
+
        except:
            mr = MobiReader(stream, log, options.input_encoding,
                        options.debug_pipeline, try_extra_data_fix=True)
-            mr.extract_content(u'.', parse_cache)
+            if mr.kf8_type is None:
+                mr.extract_content(u'.', parse_cache)
+
+        if mr.kf8_type is not None:
+            log('Found KF8 MOBI of type %r'%mr.kf8_type)
+            from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
+            return os.path.abspath(Mobi8Reader(mr, log)())

        raw = parse_cache.pop('calibre_raw_mobi_markup', False)
        if raw:
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
@ -18,9 +18,6 @@ class MOBIOutput(OutputFormatPlugin):
    file_type = 'mobi'

    options = set([
-        OptionRecommendation(name='rescale_images', recommended_value=False,
-            help=_('Modify images to meet Palm device size limitations.')
-        ),
        OptionRecommendation(name='prefer_author_sort',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('When present, use author sort field as author.')
@ -59,7 +56,16 @@ class MOBIOutput(OutputFormatPlugin):
            help=_('Enable sharing of book content via Facebook etc. '
                ' on the Kindle. WARNING: Using this feature means that '
                ' the book will not auto sync its last read position '
-                ' on multiple devices. Complain to Amazon.'))
+                ' on multiple devices. Complain to Amazon.')
+        ),
+        OptionRecommendation(name='mobi_keep_original_images',
+            recommended_value=False,
+            help=_('By default calibre converts all images to JPEG format '
+                'in the output MOBI file. This is for maximum compatibility '
+                'as some older MOBI viewers have problems with other image '
+                'formats. This option tells calibre not to do this. '
+                'Useful if your document contains lots of GIF/PNG images that '
+                'become very large when converted to JPEG.')),
    ])

    def check_for_periodical(self):
@ -167,12 +173,7 @@ class MOBIOutput(OutputFormatPlugin):
        mobimlizer(oeb, opts)
        self.check_for_periodical()
        write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
-        from calibre.utils.config import tweaks
-        if tweaks.get('new_mobi_writer', True):
-            from calibre.ebooks.mobi.writer2.main import MobiWriter
-            MobiWriter
-        else:
-            from calibre.ebooks.mobi.writer import MobiWriter
+        from calibre.ebooks.mobi.writer2.main import MobiWriter
        writer = MobiWriter(opts,
                        write_page_breaks_after_item=write_page_breaks_after_item)
        writer(oeb, output_path)
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -289,10 +289,17 @@ class CSSPreProcessor(object):
        data = self.MS_PAT.sub(self.ms_sub, data)
        if not add_namespace:
            return data
+
+        # Remove comments as the following namespace logic will break if there
+        # are commented lines before the first @import or @charset rule. Since
+        # the conversion will remove all stylesheets anyway, we don't lose
+        # anything
+        data = re.sub(ur'/\*.*?\*/', u'', data, flags=re.DOTALL)
+
        ans, namespaced = [], False
        for line in data.splitlines():
            ll = line.lstrip()
-            if not (namespaced or ll.startswith('@import') or
+            if not (namespaced or ll.startswith('@import') or not ll or
                        ll.startswith('@charset')):
                ans.append(XHTML_CSS_NAMESPACE.strip())
                namespaced = True
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -550,7 +550,12 @@ class Metadata(object):
                        if meta['datatype'] == 'text' and meta['is_multiple']:
                            # Case-insensitive but case preserving merging
                            lotags = [t.lower() for t in other_tags]
-                            lstags = [t.lower() for t in self_tags]
+                            try:
+                                lstags = [t.lower() for t in self_tags]
+                            except TypeError:
+                                # Happens if x is not a text, is_multiple field
+                                # on self
+                                lstags = []
                            ot, st = map(frozenset, (lotags, lstags))
                            for t in st.intersection(ot):
                                sidx = lstags.index(t)
@ -661,7 +666,7 @@ class Metadata(object):
            elif datatype == 'bool':
                res = _('Yes') if res else _('No')
            elif datatype == 'rating':
-                res = res/2.0
+                res = u'%.2g'%(res/2.0)
            elif datatype in ['int', 'float']:
                try:
                    fmt = cmeta['display'].get('number_format', None)
@ -701,7 +706,7 @@ class Metadata(object):
            elif datatype == 'datetime':
                res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
            elif datatype == 'rating':
-                res = res/2.0
+                res = u'%.2g'%(res/2.0)
            elif key == 'size':
                res = human_readable(res)
            return (name, unicode(res), orig_res, fmeta)
@ -736,7 +741,8 @@ class Metadata(object):
        if not self.is_null('languages'):
            fmt('Languages', ', '.join(self.languages))
        if self.rating is not None:
-            fmt('Rating', self.rating)
+            fmt('Rating', (u'%.2g'%(float(self.rating)/2.0)) if self.rating
+                    else u'')
        if self.timestamp is not None:
            fmt('Timestamp', isoformat(self.timestamp))
        if self.pubdate is not None:
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -9,15 +9,21 @@ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
    'Marshall T. Vandegrift <llasram@gmail.com>'
 __docformat__ = 'restructuredtext en'

+import os, cStringIO, imghdr
 from struct import pack, unpack
 from cStringIO import StringIO

 from calibre.ebooks import normalize
-from calibre.ebooks.mobi import MobiError
-from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
+from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN
+from calibre.ebooks.mobi.utils import rescale_image
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.utils.date import now as nowf

+def is_image(ss):
+    if ss is None:
+        return False
+    return imghdr.what(None, ss[:200]) is not None
+
 class StreamSlicer(object):

    def __init__(self, stream, start=0, stop=None):
@ -160,11 +166,10 @@ class MetadataUpdater(object):
            if id == 106:
                self.timestamp = content
            elif id == 201:
-                rindex, = self.cover_rindex, = unpack('>i', content)
-                if rindex > 0 :
-                    self.cover_record = self.record(rindex + image_base)
+                rindex, = self.cover_rindex, = unpack('>I', content)
+                self.cover_record = self.record(rindex + image_base)
            elif id == 202:
-                rindex, = self.thumbnail_rindex, = unpack('>i', content)
+                rindex, = self.thumbnail_rindex, = unpack('>I', content)
                if rindex > 0 :
                    self.thumbnail_record = self.record(rindex + image_base)

@ -415,17 +420,17 @@ class MetadataUpdater(object):
            except:
                pass
            else:
-                if self.cover_record is not None:
+                if is_image(self.cover_record):
                    size = len(self.cover_record)
                    cover = rescale_image(data, size)
                    if len(cover) <= size:
-                        cover += '\0' * (size - len(cover))
+                        cover += b'\0' * (size - len(cover))
                        self.cover_record[:] = cover
-                if self.thumbnail_record is not None:
+                if is_image(self.thumbnail_record):
                    size = len(self.thumbnail_record)
                    thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
                    if len(thumbnail) <= size:
-                        thumbnail += '\0' * (size - len(thumbnail))
+                        thumbnail += b'\0' * (size - len(thumbnail))
                        self.thumbnail_record[:] = thumbnail
                return

@ -433,3 +438,75 @@ def set_metadata(stream, mi):
    mu = MetadataUpdater(stream)
    mu.update(mi)
    return
+
+def get_metadata(stream):
+    from calibre.ebooks.metadata import MetaInformation
+    from calibre.ptempfile import TemporaryDirectory
+    from calibre.ebooks.mobi.reader.headers import MetadataHeader
+    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
+    from calibre import CurrentDir
+
+    try:
+        from PIL import Image as PILImage
+        PILImage
+    except ImportError:
+        import Image as PILImage
+
+
+    stream.seek(0)
+    try:
+        raw = stream.read(3)
+    except:
+        raw = ''
+    stream.seek(0)
+    if raw == b'TPZ':
+        from calibre.ebooks.metadata.topaz import get_metadata
+        return get_metadata(stream)
+    from calibre.utils.logging import Log
+    log = Log()
+    try:
+        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
+    except:
+        mi = MetaInformation(_('Unknown'), [_('Unknown')])
+    mh = MetadataHeader(stream, log)
+    if mh.title and mh.title != _('Unknown'):
+        mi.title = mh.title
+
+    if mh.exth is not None:
+        if mh.exth.mi is not None:
+            mi = mh.exth.mi
+    else:
+        size = 1024**3
+        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
+            pos = stream.tell()
+            stream.seek(0, 2)
+            size = stream.tell()
+            stream.seek(pos)
+        if size < 4*1024*1024:
+            with TemporaryDirectory('_mobi_meta_reader') as tdir:
+                with CurrentDir(tdir):
+                    mr = MobiReader(stream, log)
+                    parse_cache = {}
+                    mr.extract_content(tdir, parse_cache)
+                    if mr.embedded_mi is not None:
+                        mi = mr.embedded_mi
+    if hasattr(mh.exth, 'cover_offset'):
+        cover_index = mh.first_image_index + mh.exth.cover_offset
+        data  = mh.section_data(int(cover_index))
+    else:
+        try:
+            data  = mh.section_data(mh.first_image_index)
+        except:
+            data = ''
+    buf = cStringIO.StringIO(data)
+    try:
+        im = PILImage.open(buf)
+    except:
+        log.exception('Failed to read MOBI cover')
+    else:
+        obuf = cStringIO.StringIO()
+        im.convert('RGB').save(obuf, format='JPEG')
+        mi.cover_data = ('jpg', obuf.getvalue())
+    return mi
+
+
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -1081,6 +1081,15 @@ class OPF(object): # {{{
        return elem

    def render(self, encoding='utf-8'):
+        for meta in self.raster_cover_path(self.metadata):
+            # Ensure that the name attribute occurs before the content
+            # attribute. Needed for Nooks.
+            a = meta.attrib
+            c = a.get('content', None)
+            if c is not None:
+                del a['content']
+                a['content'] = c
+
        self.write_user_metadata()
        raw = etree.tostring(self.root, encoding=encoding, pretty_print=True)
        if not raw.lstrip().startswith('<?xml '):
@ -1139,7 +1148,8 @@ class OPFCreator(Metadata):
        self.manifest = Manifest.from_paths(entries)
        self.manifest.set_basedir(self.base_path)

-    def create_manifest_from_files_in(self, files_and_dirs):
+    def create_manifest_from_files_in(self, files_and_dirs,
+            exclude=lambda x:False):
        entries = []

        def dodir(dir):
@ -1147,7 +1157,7 @@ class OPFCreator(Metadata):
                root, files = spec[0], spec[-1]
                for name in files:
                    path = os.path.join(root, name)
-                    if os.path.isfile(path):
+                    if os.path.isfile(path) and not exclude(path):
                        entries.append((path, None))

        for i in files_and_dirs:
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -466,7 +466,7 @@ class Worker(Thread): # Get details {{{
        for x in reversed(pd.xpath(self.publisher_xpath)):
            if x.tail:
                ans = x.tail
-                date = ans.partition('(')[-1].replace(')', '').strip()
+                date = ans.rpartition('(')[-1].replace(')', '').strip()
                date = self.delocalize_datestr(date)
                return parse_date(date, assume_utc=True)

--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -46,7 +46,7 @@ class TOC(list):
        self.toc_thumbnail = toc_thumbnail

    def __str__(self):
-        lines = ['TOC: %s#%s'%(self.href, self.fragment)]
+        lines = ['TOC: %s#%s %s'%(self.href, self.fragment, self.text)]
        for child in self:
            c = str(child).splitlines()
            for l in c:
--- a/src/calibre/ebooks/mobi/init.py
+++ b/src/calibre/ebooks/mobi/init.py
@ -6,3 +6,8 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 class MobiError(Exception):
    pass
+
+MAX_THUMB_SIZE = 16 * 1024
+MAX_THUMB_DIMEN = (180, 240)
+
+
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -15,7 +15,7 @@ from lxml import html
 from calibre.utils.date import utc_tz
 from calibre.ebooks.mobi.langcodes import main_language, sub_language
 from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
-        get_trailing_data, decode_tbs)
+        get_trailing_data, decode_tbs, read_font_record)
 from calibre.utils.magick.draw import identify_data

 def format_bytes(byts):
@ -1149,6 +1149,25 @@ class BinaryRecord(object): # {{{

 # }}}

+class FontRecord(object): # {{{
+
+    def __init__(self, idx, record):
+        self.raw = record.raw
+        name = '%06d'%idx
+        self.font = read_font_record(self.raw)
+        if self.font['err']:
+            raise ValueError('Failed to read font record: %s Headers: %s'%(
+                self.font['err'], self.font['headers']))
+        self.payload = (self.font['font_data'] if self.font['font_data'] else
+                self.font['raw_data'])
+        self.name = '%s.%s'%(name, self.font['ext'])
+
+    def dump(self, folder):
+        with open(os.path.join(folder, self.name), 'wb') as f:
+            f.write(self.payload)
+
+# }}}
+
 class TBSIndexing(object): # {{{

    def __init__(self, text_records, indices, doc_type):
@ -1410,6 +1429,7 @@ class MOBIFile(object): # {{{
            self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
+        self.font_records = []
        image_index = 0
        for i in xrange(fntbr, len(self.records)):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
@ -1419,13 +1439,15 @@ class MOBIFile(object): # {{{
            fmt = None
            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
-                    b'AUDI', b'VIDE'}:
+                    b'AUDI', b'VIDE', b'FONT'}:
                try:
                    width, height, fmt = identify_data(r.raw)
                except:
                    pass
            if fmt is not None:
                self.image_records.append(ImageRecord(image_index, r, fmt))
+            elif r.raw[:4] == b'FONT':
+                self.font_records.append(FontRecord(i, r))
            else:
                self.binary_records.append(BinaryRecord(i, r))

@ -1465,10 +1487,11 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
            of.write(rec.raw)
            alltext += rec.raw
        of.seek(0)
-    root = html.fromstring(alltext.decode('utf-8'))
-    with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
-        of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
-            include_meta_content_type=True))
+    if f.mobi_header.file_version < 8:
+        root = html.fromstring(alltext.decode('utf-8'))
+        with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
+            of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
+                include_meta_content_type=True))


    if f.index_header is not None:
@ -1490,7 +1513,7 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
        f.tbs_indexing.dump(ddir)

    for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
-            ('binary', 'binary_records')]:
+            ('binary', 'binary_records'), ('font', 'font_records')]:
        tdir = os.path.join(ddir, tdir)
        os.mkdir(tdir)
        for rec in getattr(f, attr):
--- a/src/calibre/ebooks/mobi/reader/init.py
+++ b/src/calibre/ebooks/mobi/reader/init.py
@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@ -0,0 +1,258 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (absolute_import, print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import struct, re, os
+
+from calibre import replace_entities
+from calibre.utils.date import parse_date
+from calibre.ebooks.mobi import MobiError
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
+
+NULL_INDEX = 0xffffffff
+
+class EXTHHeader(object): # {{{
+
+    def __init__(self, raw, codec, title):
+        self.doctype = raw[:4]
+        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
+        raw = raw[12:]
+        pos = 0
+        self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
+        self.has_fake_cover = True
+        self.start_offset = None
+        left = self.num_items
+
+        while left > 0:
+            left -= 1
+            id, size = struct.unpack('>LL', raw[pos:pos + 8])
+            content = raw[pos + 8:pos + size]
+            pos += size
+            if id >= 100 and id < 200:
+                self.process_metadata(id, content, codec)
+            elif id == 203:
+                self.has_fake_cover = bool(struct.unpack('>L', content)[0])
+            elif id == 201:
+                co, = struct.unpack('>L', content)
+                if co < NULL_INDEX:
+                    self.cover_offset = co
+            elif id == 202:
+                self.thumbnail_offset, = struct.unpack('>L', content)
+            elif id == 501:
+                # cdetype
+                pass
+            elif id == 502:
+                # last update time
+                pass
+            elif id == 503: # Long title
+                # Amazon seems to regard this as the definitive book title
+                # rather than the title from the PDB header. In fact when
+                # sending MOBI files through Amazon's email service if the
+                # title contains non ASCII chars or non filename safe chars
+                # they are messed up in the PDB header
+                try:
+                    title = content.decode(codec)
+                except:
+                    pass
+            #else:
+            #    print 'unknown record', id, repr(content)
+        if title:
+            self.mi.title = replace_entities(title)
+
+    def process_metadata(self, id, content, codec):
+        if id == 100:
+            if self.mi.authors == [_('Unknown')]:
+                self.mi.authors = []
+            au = content.decode(codec, 'ignore').strip()
+            self.mi.authors.append(au)
+            if re.match(r'\S+?\s*,\s+\S+', au.strip()):
+                self.mi.author_sort = au.strip()
+        elif id == 101:
+            self.mi.publisher = content.decode(codec, 'ignore').strip()
+        elif id == 103:
+            self.mi.comments  = content.decode(codec, 'ignore')
+        elif id == 104:
+            self.mi.isbn      = content.decode(codec, 'ignore').strip().replace('-', '')
+        elif id == 105:
+            if not self.mi.tags:
+                self.mi.tags = []
+            self.mi.tags.extend([x.strip() for x in content.decode(codec,
+                'ignore').split(';')])
+            self.mi.tags = list(set(self.mi.tags))
+        elif id == 106:
+            try:
+                self.mi.pubdate = parse_date(content, as_utc=False)
+            except:
+                pass
+        elif id == 108:
+            pass # Producer
+        elif id == 113:
+            pass # ASIN or UUID
+        elif id == 116:
+            self.start_offset, = struct.unpack(b'>L', content)
+        #else:
+        #    print 'unhandled metadata record', id, repr(content)
+# }}}
+
+class BookHeader(object):
+
+    def __init__(self, raw, ident, user_encoding, log, try_extra_data_fix=False):
+        self.log = log
+        self.compression_type = raw[:2]
+        self.records, self.records_size = struct.unpack('>HH', raw[8:12])
+        self.encryption_type, = struct.unpack('>H', raw[12:14])
+        if ident == 'TEXTREAD':
+            self.codepage = 1252
+        if len(raw) <= 16:
+            self.codec = 'cp1252'
+            self.extra_flags = 0
+            self.title = _('Unknown')
+            self.language = 'ENGLISH'
+            self.sublanguage = 'NEUTRAL'
+            self.exth_flag, self.exth = 0, None
+            self.ancient = True
+            self.first_image_index = -1
+            self.mobi_version = 1
+        else:
+            self.ancient = False
+            self.doctype = raw[16:20]
+            self.length, self.type, self.codepage, self.unique_id, \
+                self.version = struct.unpack('>LLLLL', raw[20:40])
+
+            try:
+                self.codec = {
+                    1252: 'cp1252',
+                    65001: 'utf-8',
+                    }[self.codepage]
+            except (IndexError, KeyError):
+                self.codec = 'cp1252' if not user_encoding else user_encoding
+                log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
+                    self.codec))
+            # There exists some broken DRM removal tool that removes DRM but
+            # leaves the DRM fields in the header yielding a header size of
+            # 0xF8. The actual value of max_header_length should be 0xE8 but
+            # it's changed to accommodate this silly tool. Hopefully that will
+            # not break anything else.
+            max_header_length = 0xF8
+
+            if (ident == 'TEXTREAD' or self.length < 0xE4 or
+                    self.length > max_header_length or
+                    (try_extra_data_fix and self.length == 0xE4)):
+                self.extra_flags = 0
+            else:
+                self.extra_flags, = struct.unpack('>H', raw[0xF2:0xF4])
+
+            if self.compression_type == 'DH':
+                self.huff_offset, self.huff_number = struct.unpack('>LL',
+                        raw[0x70:0x78])
+
+            toff, tlen = struct.unpack('>II', raw[0x54:0x5c])
+            tend = toff + tlen
+            self.title = raw[toff:tend] if tend < len(raw) else _('Unknown')
+            langcode  = struct.unpack('!L', raw[0x5C:0x60])[0]
+            langid    = langcode & 0xFF
+            sublangid = (langcode >> 10) & 0xFF
+            self.language = main_language.get(langid, 'ENGLISH')
+            self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
+            self.mobi_version = struct.unpack('>I', raw[0x68:0x6c])[0]
+            self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c + 4])[0]
+
+            self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
+            self.exth = None
+            if not isinstance(self.title, unicode):
+                self.title = self.title.decode(self.codec, 'replace')
+            if self.exth_flag & 0x40:
+                try:
+                    self.exth = EXTHHeader(raw[16 + self.length:], self.codec,
+                            self.title)
+                    self.exth.mi.uid = self.unique_id
+                    try:
+                        self.exth.mi.language = mobi2iana(langid, sublangid)
+                    except:
+                        self.log.exception('Unknown language code')
+                except:
+                    self.log.exception('Invalid EXTH header')
+                    self.exth_flag = 0
+
+            self.ncxidx = NULL_INDEX
+            if len(raw) >= 0xF8:
+                self.ncxidx, = struct.unpack_from(b'>L', raw, 0xF4)
+
+            if self.mobi_version >= 8:
+                self.skelidx, = struct.unpack_from('>L', raw, 0xFC)
+
+                # Index into <div> sections in raw_ml
+                self.dividx, = struct.unpack_from('>L', raw, 0xF8)
+
+                # Index into Other files
+                self.othidx, = struct.unpack_from('>L', raw, 0x104)
+
+                # need to use the FDST record to find out how to properly
+                # unpack the raw_ml into pieces it is simply a table of start
+                # and end locations for each flow piece
+                self.fdstidx, = struct.unpack_from('>L', raw, 0xC0)
+                self.fdstcnt, = struct.unpack_from('>L', raw, 0xC4)
+                # if cnt is 1 or less, fdst section number can be garbage
+                if self.fdstcnt <= 1:
+                    self.fdstidx = NULL_INDEX
+            else: # Null values
+                self.skelidx = self.dividx = self.othidx = self.fdstidx = \
+                        NULL_INDEX
+
+class MetadataHeader(BookHeader):
+
+    def __init__(self, stream, log):
+        self.stream = stream
+        self.ident = self.identity()
+        self.num_sections = self.section_count()
+        if self.num_sections >= 2:
+            header = self.header()
+            BookHeader.__init__(self, header, self.ident, None, log)
+        else:
+            self.exth = None
+
+    def identity(self):
+        self.stream.seek(60)
+        ident = self.stream.read(8).upper()
+        if ident not in ['BOOKMOBI', 'TEXTREAD']:
+            raise MobiError('Unknown book type: %s' % ident)
+        return ident
+
+    def section_count(self):
+        self.stream.seek(76)
+        return struct.unpack('>H', self.stream.read(2))[0]
+
+    def section_offset(self, number):
+        self.stream.seek(78 + number * 8)
+        return struct.unpack('>LBBBB', self.stream.read(8))[0]
+
+    def header(self):
+        section_headers = []
+        # First section with the metadata
+        section_headers.append(self.section_offset(0))
+        # Second section used to get the length of the first
+        section_headers.append(self.section_offset(1))
+
+        end_off = section_headers[1]
+        off = section_headers[0]
+        self.stream.seek(off)
+        return self.stream.read(end_off - off)
+
+    def section_data(self, number):
+        start = self.section_offset(number)
+        if number == self.num_sections -1:
+            end = os.stat(self.stream.name).st_size
+        else:
+            end = self.section_offset(number + 1)
+        self.stream.seek(start)
+        try:
+            return self.stream.read(end - start)
+        except OverflowError:
+            self.stream.seek(start)
+            return self.stream.read()
+
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@ -0,0 +1,195 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import struct
+from collections import OrderedDict
+
+from calibre.ebooks.mobi.utils import decint, count_set_bits
+
+class InvalidFile(ValueError):
+    pass
+
+def check_signature(data, signature):
+    if data[:len(signature)] != signature:
+        raise InvalidFile('Not a valid %r section'%signature)
+
+class NotAnINDXRecord(InvalidFile):
+    pass
+
+class NotATAGXSection(InvalidFile):
+    pass
+
+def format_bytes(byts):
+    byts = bytearray(byts)
+    byts = [hex(b)[2:] for b in byts]
+    return ' '.join(byts)
+
+def parse_indx_header(data):
+    check_signature(data, b'INDX')
+    words = (
+            'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
+            'lng', 'total', 'ordt', 'ligt', 'nligt', 'ncncx'
+    )
+    num = len(words)
+    values = struct.unpack(b'>%dL' % num, data[4:4*(num+1)])
+    header = {words[i]:values[i] for i in xrange(num)}
+    return header
+
+class CNCX(object): # {{{
+
+    '''
+    Parses the records that contain the compiled NCX (all strings from the
+    NCX). Presents a simple offset : string mapping interface to access the
+    data.
+    '''
+
+    def __init__(self, records, codec):
+        self.records = OrderedDict()
+        record_offset = 0
+        for raw in records:
+            pos = 0
+            while pos < len(raw):
+                length, consumed = decint(raw[pos:])
+                if length > 0:
+                    try:
+                        self.records[pos+record_offset] = raw[
+                            pos+consumed:pos+consumed+length].decode(codec)
+                    except:
+                        byts = raw[pos:]
+                        r = format_bytes(byts)
+                        print ('CNCX entry at offset %d has unknown format %s'%(
+                            pos+record_offset, r))
+                        self.records[pos+record_offset] = r
+                        pos = len(raw)
+                pos += consumed+length
+            record_offset += 0x10000
+
+    def __getitem__(self, offset):
+        return self.records.get(offset)
+
+    def get(self, offset, default=None):
+        return self.records.get(offset, default)
+# }}}
+
+def parse_tag_section(data):
+    check_signature(data, b'TAGX')
+
+    tags = []
+    first_entry_offset, = struct.unpack_from(b'>L', data, 0x04)
+    control_byte_count, = struct.unpack_from(b'>L', data, 0x08)
+
+    # Skip the first 12 bytes already read above.
+    for i in xrange(12, first_entry_offset, 4):
+        pos = i
+        tags.append((ord(data[pos]), ord(data[pos+1]), ord(data[pos+2]),
+            ord(data[pos+3])))
+    return control_byte_count, tags
+
+def get_tag_map(control_byte_count, tags, data, start, end):
+    ptags = []
+    ans = {}
+    control_byte_index = 0
+    data_start = start + control_byte_count
+
+    for tag, values_per_entry, mask, end_flag in tags:
+        if end_flag == 0x01:
+            control_byte_index += 1
+            continue
+        value = ord(data[start + control_byte_index]) & mask
+        if value != 0:
+            if value == mask:
+                if count_set_bits(mask) > 1:
+                    # If all bits of masked value are set and the mask has more than one bit, a variable width value
+                    # will follow after the control bytes which defines the length of bytes (NOT the value count!)
+                    # which will contain the corresponding variable width values.
+                    value, consumed = decint(data[data_start:])
+                    data_start += consumed
+                    ptags.append((tag, None, value, values_per_entry))
+                else:
+                    ptags.append((tag, 1, None, values_per_entry))
+            else:
+                # Shift bits to get the masked value.
+                while mask & 0x01 == 0:
+                    mask = mask >> 1
+                    value = value >> 1
+                ptags.append((tag, value, None, values_per_entry))
+    for tag, value_count, value_bytes, values_per_entry in ptags:
+        values = []
+        if value_count != None:
+            # Read value_count * values_per_entry variable width values.
+            for _ in xrange(value_count*values_per_entry):
+                byts, consumed = decint(data[data_start:])
+                data_start += consumed
+                values.append(byts)
+        else:
+            # Convert value_bytes to variable width values.
+            total_consumed = 0
+            while total_consumed < value_bytes:
+                # Does this work for values_per_entry != 1?
+                byts, consumed = decint(data[data_start:])
+                data_start += consumed
+                total_consumed += consumed
+                values.append(byts)
+            if total_consumed != value_bytes:
+                print ("Error: Should consume %s bytes, but consumed %s" %
+                        (value_bytes, total_consumed))
+        ans[tag] = values
+    # Test that all bytes have been processed if end is given.
+    if end is not None and data_start < end:
+        # The last entry might have some zero padding bytes, so complain only if non zero bytes are left.
+        rest = data[data_start:end]
+        if rest.replace(b'\0', b''):
+            print ("Warning: There are unprocessed index bytes left: %s" %
+                    format_bytes(rest))
+
+    return ans
+
+def read_index(sections, idx, codec):
+    table, cncx = OrderedDict(), CNCX([], codec)
+
+    data = sections[idx][0]
+
+    indx_header = parse_indx_header(data)
+    indx_count = indx_header['count']
+
+    if indx_header['ncncx'] > 0:
+        off = idx + indx_count + 1
+        cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]]
+        cncx = CNCX(cncx_records, codec)
+
+    tag_section_start = indx_header['len']
+    control_byte_count, tags = parse_tag_section(data[tag_section_start:])
+
+    for i in xrange(idx + 1, idx + 1 + indx_count):
+        data = sections[i][0]
+        header = parse_indx_header(data)
+        idxt_pos = header['start']
+        entry_count = header['count']
+
+        # loop through to build up the IDXT position starts
+        idx_positions= []
+        for j in xrange(entry_count):
+            pos, = struct.unpack_from(b'>H', data, idxt_pos + 4 + (2 * j))
+            idx_positions.append(pos)
+        # The last entry ends before the IDXT tag (but there might be zero fill
+        # bytes we need to ignore!)
+        idx_positions.append(idxt_pos)
+
+        # For each entry in the IDXT build up the tag map and any associated
+        # text
+        for j in xrange(entry_count):
+            start, end = idx_positions[j:j+2]
+            text_length = ord(data[start])
+            text = data[start+1:start+1+text_length]
+            tag_map = get_tag_map(control_byte_count, tags, data,
+                    start+1+text_length, end)
+            table[text] = tag_map
+
+    return table, cncx
+
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@ -0,0 +1,309 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re, os
+
+def update_internal_links(mobi8_reader):
+    # need to update all links that are internal which
+    # are based on positions within the xhtml files **BEFORE**
+    # cutting and pasting any pieces into the xhtml text files
+
+    #   kindle:pos:fid:XXXX:off:YYYYYYYYYY  (used for internal link within xhtml)
+    #       XXXX is the offset in records into divtbl
+    #       YYYYYYYYYYYY is a base32 number you add to the divtbl insertpos to get final position
+
+    mr = mobi8_reader
+
+    # pos:fid pattern
+    posfid_pattern = re.compile(br'''(<a.*?href=.*?>)''', re.IGNORECASE)
+    posfid_index_pattern = re.compile(br'''['"]kindle:pos:fid:([0-9|A-V]+):off:([0-9|A-V]+).*?["']''')
+
+    parts = []
+    for part in mr.parts:
+        srcpieces = posfid_pattern.split(part)
+        for j in xrange(1, len(srcpieces), 2):
+            tag = srcpieces[j]
+            if tag.startswith(b'<'):
+                for m in posfid_index_pattern.finditer(tag):
+                    posfid = m.group(1)
+                    offset = m.group(2)
+                    filename, idtag = mr.get_id_tag_by_pos_fid(posfid, offset)
+                    suffix = (b'#' + idtag) if idtag else b''
+                    replacement = filename.encode(mr.header.codec) + suffix
+                    tag = posfid_index_pattern.sub(replacement, tag, 1)
+                srcpieces[j] = tag
+        part = ''.join([x.decode(mr.header.codec) for x in srcpieces])
+        parts.append(part)
+
+    # All parts are now unicode and have no internal links
+    return parts
+
+def remove_kindlegen_markup(parts):
+
+    # we can safely remove all of the Kindlegen generated aid tags
+    find_tag_with_aid_pattern = re.compile(r'''(<[^>]*\said\s*=[^>]*>)''',
+            re.IGNORECASE)
+    within_tag_aid_position_pattern = re.compile(r'''\said\s*=['"][^'"]*['"]''')
+
+    for i in xrange(len(parts)):
+        part = parts[i]
+        srcpieces = find_tag_with_aid_pattern.split(part)
+        for j in range(len(srcpieces)):
+            tag = srcpieces[j]
+            if tag.startswith('<'):
+                for m in within_tag_aid_position_pattern.finditer(tag):
+                    replacement = ''
+                    tag = within_tag_aid_position_pattern.sub(replacement, tag,
+                            1)
+                srcpieces[j] = tag
+        part = "".join(srcpieces)
+        parts[i] = part
+
+    # we can safely remove all of the Kindlegen generated data-AmznPageBreak tags
+    find_tag_with_AmznPageBreak_pattern = re.compile(
+            r'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
+    within_tag_AmznPageBreak_position_pattern = re.compile(
+            r'''\sdata-AmznPageBreak=['"][^'"]*['"]''')
+
+    for i in xrange(len(parts)):
+        part = parts[i]
+        srcpieces = find_tag_with_AmznPageBreak_pattern.split(part)
+        for j in range(len(srcpieces)):
+            tag = srcpieces[j]
+            if tag.startswith('<'):
+                for m in within_tag_AmznPageBreak_position_pattern.finditer(tag):
+                    replacement = ''
+                    tag = within_tag_AmznPageBreak_position_pattern.sub(replacement, tag, 1)
+                srcpieces[j] = tag
+        part = "".join(srcpieces)
+        parts[i] = part
+
+def update_flow_links(mobi8_reader, resource_map, log):
+    #   kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
+    #   kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
+    #   kindle:embed:XXXX   (used for fonts)
+
+    mr = mobi8_reader
+    flows = []
+
+    img_pattern = re.compile(r'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
+    img_index_pattern = re.compile(r'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''', re.IGNORECASE)
+
+    tag_pattern = re.compile(r'''(<[^>]*>)''')
+    flow_pattern = re.compile(r'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
+
+    url_pattern = re.compile(r'''(url\(.*?\))''', re.IGNORECASE)
+    url_img_index_pattern = re.compile(r'''kindle:embed:([0-9|A-V]+)\?mime=image/[^\)]*''', re.IGNORECASE)
+    font_index_pattern = re.compile(r'''kindle:embed:([0-9|A-V]+)''', re.IGNORECASE)
+    url_css_index_pattern = re.compile(r'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE)
+
+    for flow in mr.flows:
+        if flow is None: # 0th flow is None
+            flows.append(flow)
+            continue
+
+        if not isinstance(flow, unicode):
+            flow = flow.decode(mr.header.codec)
+
+        # links to raster image files from image tags
+        # image_pattern
+        srcpieces = img_pattern.split(flow)
+        for j in range(1, len(srcpieces), 2):
+            tag = srcpieces[j]
+            if tag.startswith('<im'):
+                for m in img_index_pattern.finditer(tag):
+                    num = int(m.group(1), 32)
+                    href = resource_map[num-1]
+                    if href:
+                        replacement = '"%s"'%('../'+ href)
+                        tag = img_index_pattern.sub(replacement, tag, 1)
+                    else:
+                        log.warn('Referenced image %s was not recognized '
+                                'as a valid image in %s' % (num, tag))
+                srcpieces[j] = tag
+        flow = "".join(srcpieces)
+
+        # replacements inside css url():
+        srcpieces = url_pattern.split(flow)
+        for j in range(1, len(srcpieces), 2):
+            tag = srcpieces[j]
+
+            # process links to raster image files
+            for m in url_img_index_pattern.finditer(tag):
+                num = int(m.group(1), 32)
+                href = resource_map[num-1]
+                if href:
+                    replacement = '"%s"'%('../'+ href)
+                    tag = url_img_index_pattern.sub(replacement, tag, 1)
+                else:
+                    log.warn('Referenced image %s was not recognized as a '
+                    'valid image in %s' % (num, tag))
+
+            # process links to fonts
+            for m in font_index_pattern.finditer(tag):
+                num = int(m.group(1), 32)
+                href = resource_map[num-1]
+                if href is None:
+                    log.warn('Referenced font %s was not recognized as a '
+                    'valid font in %s' % (num, tag))
+                else:
+                    replacement = '"%s"'%('../'+ href)
+                    if href.endswith('.failed'):
+                        replacement = '"%s"'%('failed-'+href)
+                    tag = font_index_pattern.sub(replacement, tag, 1)
+
+            # process links to other css pieces
+            for m in url_css_index_pattern.finditer(tag):
+                num = int(m.group(1), 32)
+                fi = mr.flowinfo[num]
+                replacement = '"../' + fi.dir + '/' + fi.fname + '"'
+                tag = url_css_index_pattern.sub(replacement, tag, 1)
+
+            srcpieces[j] = tag
+        flow = "".join(srcpieces)
+
+        # flow pattern not inside url()
+        srcpieces = re.split(tag_pattern, flow)
+        for j in range(1, len(srcpieces), 2):
+            tag = srcpieces[j]
+            if tag.startswith('<'):
+                for m in re.finditer(flow_pattern, tag):
+                    num = int(m.group(1), 32)
+                    fi = mr.flowinfo[num]
+                    if fi.format == 'inline':
+                        flowtext = mr.flows[num]
+                        tag = flowtext
+                    else:
+                        replacement = '"../' + fi.dir + '/' + fi.fname + '"'
+                        tag = flow_pattern.sub(replacement, tag, 1)
+                srcpieces[j] = tag
+        flow = "".join(srcpieces)
+
+        flows.append(flow)
+
+    # All flows are now unicode and have links resolved
+    return flows
+
+def insert_flows_into_markup(parts, flows, mobi8_reader):
+    mr = mobi8_reader
+
+    # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
+    tag_pattern = re.compile(r'''(<[^>]*>)''')
+    flow_pattern = re.compile(r'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
+    for i in xrange(len(parts)):
+        part = parts[i]
+
+        # flow pattern
+        srcpieces = tag_pattern.split(part)
+        for j in range(1, len(srcpieces),2):
+            tag = srcpieces[j]
+            if tag.startswith('<'):
+                for m in flow_pattern.finditer(tag):
+                    num = int(m.group(1), 32)
+                    fi = mr.flowinfo[num]
+                    if fi.format == 'inline':
+                        tag = flows[num]
+                    else:
+                        replacement = '"../' + fi.dir + '/' + fi.fname + '"'
+                        tag = flow_pattern.sub(replacement, tag, 1)
+                srcpieces[j] = tag
+        part = "".join(srcpieces)
+        # store away modified version
+        parts[i] = part
+
+def insert_images_into_markup(parts, resource_map, log):
+    # Handle any embedded raster images links in the xhtml text
+    # kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
+    img_pattern = re.compile(r'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
+    img_index_pattern = re.compile(r'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''')
+    for i in xrange(len(parts)):
+        part = parts[i]
+        #[partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
+
+        # links to raster image files
+        # image_pattern
+        srcpieces = img_pattern.split(part)
+        for j in range(1, len(srcpieces), 2):
+            tag = srcpieces[j]
+            if tag.startswith('<im'):
+                for m in img_index_pattern.finditer(tag):
+                    num = int(m.group(1), 32)
+                    href = resource_map[num-1]
+                    if href:
+                        replacement = '"%s"'%('../' + href)
+                        tag = img_index_pattern.sub(replacement, tag, 1)
+                    else:
+                        log.warn('Referenced image %s was not recognized as '
+                                'a valid image in %s' % (num, tag))
+                srcpieces[j] = tag
+        part = "".join(srcpieces)
+        # store away modified version
+        parts[i] = part
+
+def upshift_markup(parts):
+    tag_pattern = re.compile(r'''(<(?:svg)[^>]*>)''', re.IGNORECASE)
+
+    for i in xrange(len(parts)):
+        part = parts[i]
+
+        # tag pattern
+        srcpieces = re.split(tag_pattern, part)
+        for j in range(1, len(srcpieces), 2):
+            tag = srcpieces[j]
+            if tag[:4].lower() == '<svg':
+                tag = tag.replace('preserveaspectratio','preserveAspectRatio')
+                tag = tag.replace('viewbox','viewBox')
+            srcpieces[j] = tag
+        part = "".join(srcpieces)
+        # store away modified version
+        parts[i] = part
+
+def expand_mobi8_markup(mobi8_reader, resource_map, log):
+    # First update all internal links that are based on offsets
+    parts = update_internal_links(mobi8_reader)
+
+    # Remove pointless markup inserted by kindlegen
+    remove_kindlegen_markup(parts)
+
+    # Handle substitutions for the flows pieces first as they may
+    # be inlined into the xhtml text
+    flows = update_flow_links(mobi8_reader, resource_map, log)
+
+    # Insert inline flows into the markup
+    insert_flows_into_markup(parts, flows, mobi8_reader)
+
+    # Insert raster images into markup
+    insert_images_into_markup(parts, resource_map, log)
+
+    # Perform general markup cleanups
+    upshift_markup(parts)
+
+    # Update the parts and flows stored in the reader
+    mobi8_reader.parts = parts
+    mobi8_reader.flows = flows
+
+    # write out the parts and file flows
+    os.mkdir('text') # directory containing all parts
+    spine = []
+    for i, part in enumerate(parts):
+        pi = mobi8_reader.partinfo[i]
+        with open(os.path.join(pi.type, pi.filename), 'wb') as f:
+            f.write(part.encode('utf-8'))
+            spine.append(f.name)
+
+    for i, flow in enumerate(flows):
+        fi = mobi8_reader.flowinfo[i]
+        if fi.format == 'file':
+            if not os.path.exists(fi.dir):
+                os.mkdir(fi.dir)
+            with open(os.path.join(fi.dir, fi.fname), 'wb') as f:
+                f.write(flow.encode('utf-8'))
+
+    return spine
+
--- a/src/calibre/ebooks/mobi/reader/mobi6.py
+++ b/src/calibre/ebooks/mobi/reader/mobi6.py
@ -1,10 +1,12 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-Read data from .mobi files
-'''
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (absolute_import, print_function)

-import shutil, os, re, struct, textwrap, cStringIO, sys
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import shutil, os, re, struct, textwrap, cStringIO

 try:
    from PIL import Image as PILImage
@ -14,235 +16,22 @@ except ImportError:

 from lxml import html, etree

-from calibre import xml_entity_to_unicode, CurrentDir, entity_to_unicode, \
-    replace_entities
+from calibre import (xml_entity_to_unicode, entity_to_unicode)
 from calibre.utils.filenames import ascii_filename
-from calibre.utils.date import parse_date
 from calibre.utils.cleantext import clean_ascii_chars
-from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks import DRMError, unit_convert
 from calibre.ebooks.chardet import ENCODING_PATS
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.huffcdic import HuffReader
-from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
 from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
 from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.mobi.reader.headers import BookHeader

 class TopazError(ValueError):
    pass

-class EXTHHeader(object):
-
-    def __init__(self, raw, codec, title):
-        self.doctype = raw[:4]
-        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
-        raw = raw[12:]
-        pos = 0
-        self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
-        self.has_fake_cover = True
-        left = self.num_items
-
-        while left > 0:
-            left -= 1
-            id, size = struct.unpack('>LL', raw[pos:pos + 8])
-            content = raw[pos + 8:pos + size]
-            pos += size
-            if id >= 100 and id < 200:
-                self.process_metadata(id, content, codec)
-            elif id == 203:
-                self.has_fake_cover = bool(struct.unpack('>L', content)[0])
-            elif id == 201:
-                co, = struct.unpack('>L', content)
-                if co < 1e7:
-                    self.cover_offset = co
-            elif id == 202:
-                self.thumbnail_offset, = struct.unpack('>L', content)
-            elif id == 501:
-                # cdetype
-                pass
-            elif id == 502:
-                # last update time
-                pass
-            elif id == 503: # Long title
-                # Amazon seems to regard this as the definitive book title
-                # rather than the title from the PDB header. In fact when
-                # sending MOBI files through Amazon's email service if the
-                # title contains non ASCII chars or non filename safe chars
-                # they are messed up in the PDB header
-                try:
-                    title = content.decode(codec)
-                except:
-                    pass
-            #else:
-            #    print 'unknown record', id, repr(content)
-        if title:
-            self.mi.title = replace_entities(title)
-
-    def process_metadata(self, id, content, codec):
-        if id == 100:
-            if self.mi.authors == [_('Unknown')]:
-                self.mi.authors = []
-            au = content.decode(codec, 'ignore').strip()
-            self.mi.authors.append(au)
-            if re.match(r'\S+?\s*,\s+\S+', au.strip()):
-                self.mi.author_sort = au.strip()
-        elif id == 101:
-            self.mi.publisher = content.decode(codec, 'ignore').strip()
-        elif id == 103:
-            self.mi.comments  = content.decode(codec, 'ignore')
-        elif id == 104:
-            self.mi.isbn      = content.decode(codec, 'ignore').strip().replace('-', '')
-        elif id == 105:
-            if not self.mi.tags:
-                self.mi.tags = []
-            self.mi.tags.extend([x.strip() for x in content.decode(codec,
-                'ignore').split(';')])
-            self.mi.tags = list(set(self.mi.tags))
-        elif id == 106:
-            try:
-                self.mi.pubdate = parse_date(content, as_utc=False)
-            except:
-                pass
-        elif id == 108:
-            pass # Producer
-        elif id == 113:
-            pass # ASIN or UUID
-        #else:
-        #    print 'unhandled metadata record', id, repr(content)
-
-
-class BookHeader(object):
-
-    def __init__(self, raw, ident, user_encoding, log, try_extra_data_fix=False):
-        self.log = log
-        self.compression_type = raw[:2]
-        self.records, self.records_size = struct.unpack('>HH', raw[8:12])
-        self.encryption_type, = struct.unpack('>H', raw[12:14])
-        if ident == 'TEXTREAD':
-            self.codepage = 1252
-        if len(raw) <= 16:
-            self.codec = 'cp1252'
-            self.extra_flags = 0
-            self.title = _('Unknown')
-            self.language = 'ENGLISH'
-            self.sublanguage = 'NEUTRAL'
-            self.exth_flag, self.exth = 0, None
-            self.ancient = True
-            self.first_image_index = -1
-            self.mobi_version = 1
-        else:
-            self.ancient = False
-            self.doctype = raw[16:20]
-            self.length, self.type, self.codepage, self.unique_id, \
-                self.version = struct.unpack('>LLLLL', raw[20:40])
-
-            try:
-                self.codec = {
-                    1252: 'cp1252',
-                    65001: 'utf-8',
-                    }[self.codepage]
-            except (IndexError, KeyError):
-                self.codec = 'cp1252' if not user_encoding else user_encoding
-                log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
-                    self.codec))
-            # There exists some broken DRM removal tool that removes DRM but
-            # leaves the DRM fields in the header yielding a header size of
-            # 0xF8. The actual value of max_header_length should be 0xE8 but
-            # it's changed to accommodate this silly tool. Hopefully that will
-            # not break anything else.
-            max_header_length = 0xF8
-
-            if (ident == 'TEXTREAD' or self.length < 0xE4 or
-                    self.length > max_header_length or
-                    (try_extra_data_fix and self.length == 0xE4)):
-                self.extra_flags = 0
-            else:
-                self.extra_flags, = struct.unpack('>H', raw[0xF2:0xF4])
-
-            if self.compression_type == 'DH':
-                self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
-
-            toff, tlen = struct.unpack('>II', raw[0x54:0x5c])
-            tend = toff + tlen
-            self.title = raw[toff:tend] if tend < len(raw) else _('Unknown')
-            langcode  = struct.unpack('!L', raw[0x5C:0x60])[0]
-            langid    = langcode & 0xFF
-            sublangid = (langcode >> 10) & 0xFF
-            self.language = main_language.get(langid, 'ENGLISH')
-            self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
-            self.mobi_version = struct.unpack('>I', raw[0x68:0x6c])[0]
-            self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c + 4])[0]
-
-            self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
-            self.exth = None
-            if not isinstance(self.title, unicode):
-                self.title = self.title.decode(self.codec, 'replace')
-            if self.exth_flag & 0x40:
-                try:
-                    self.exth = EXTHHeader(raw[16 + self.length:], self.codec, self.title)
-                    self.exth.mi.uid = self.unique_id
-                    try:
-                        self.exth.mi.language = mobi2iana(langid, sublangid)
-                    except:
-                        self.log.exception('Unknown language code')
-                except:
-                    self.log.exception('Invalid EXTH header')
-                    self.exth_flag = 0
-
-
-class MetadataHeader(BookHeader):
-    def __init__(self, stream, log):
-        self.stream = stream
-        self.ident = self.identity()
-        self.num_sections = self.section_count()
-        if self.num_sections >= 2:
-            header = self.header()
-            BookHeader.__init__(self, header, self.ident, None, log)
-        else:
-            self.exth = None
-
-    def identity(self):
-        self.stream.seek(60)
-        ident = self.stream.read(8).upper()
-        if ident not in ['BOOKMOBI', 'TEXTREAD']:
-            raise MobiError('Unknown book type: %s' % ident)
-        return ident
-
-    def section_count(self):
-        self.stream.seek(76)
-        return struct.unpack('>H', self.stream.read(2))[0]
-
-    def section_offset(self, number):
-        self.stream.seek(78 + number * 8)
-        return struct.unpack('>LBBBB', self.stream.read(8))[0]
-
-    def header(self):
-        section_headers = []
-        # First section with the metadata
-        section_headers.append(self.section_offset(0))
-        # Second section used to get the lengh of the first
-        section_headers.append(self.section_offset(1))
-
-        end_off = section_headers[1]
-        off = section_headers[0]
-        self.stream.seek(off)
-        return self.stream.read(end_off - off)
-
-    def section_data(self, number):
-        start = self.section_offset(number)
-        if number == self.num_sections -1:
-            end = os.stat(self.stream.name).st_size
-        else:
-            end = self.section_offset(number + 1)
-        self.stream.seek(start)
-        try:
-            return self.stream.read(end - start)
-        except OverflowError:
-            return self.stream.read(os.stat(self.stream.name).st_size - start)
-
-
 class MobiReader(object):
    PAGE_BREAK_PAT = re.compile(
        r'<\s*/{0,1}\s*mbp:pagebreak((?:\s+[^/>]*){0,1})/{0,1}\s*>\s*(?:<\s*/{0,1}\s*mbp:pagebreak\s*/{0,1}\s*>)*',
@ -312,15 +101,47 @@ class MobiReader(object):
            self.sections.append((section(i), self.section_headers[i]))


-        self.book_header = BookHeader(self.sections[0][0], self.ident,
+        self.book_header = bh = BookHeader(self.sections[0][0], self.ident,
            user_encoding, self.log, try_extra_data_fix=try_extra_data_fix)
        self.name = self.name.decode(self.book_header.codec, 'replace')
+        self.kf8_type = None
+        is_kf8 = self.book_header.mobi_version == 8
+        if is_kf8:
+            self.kf8_type = 'standalone'
+        else: # Check for joint mobi 6 and kf 8 file
+            KF8_BOUNDARY = b'BOUNDARY'
+            for i, x in enumerate(self.sections[:-1]):
+                sec = x[0]
+                if (len(sec) == len(KF8_BOUNDARY) and sec ==
+                        KF8_BOUNDARY):
+                    try:
+                        self.book_header = BookHeader(self.sections[i+1][0],
+                                self.ident, user_encoding, self.log)
+                        # The following are only correct in the Mobi 6
+                        # header not the Mobi 8 header
+                        for x in ('first_image_index',):
+                            setattr(self.book_header, x, getattr(bh, x))
+                        if hasattr(self.book_header, 'huff_offset'):
+                            self.book_header.huff_offset += i + 1
+                        self.kf8_type = 'joint'
+                        self.kf8_boundary = i
+                    except:
+                        self.book_header = bh
+                    break
+
+    def check_for_drm(self):
+        if self.book_header.encryption_type != 0:
+            try:
+                name = self.book_header.exth.mi.title
+            except:
+                name = self.name
+            if not name:
+                name = self.name
+            raise DRMError(name)

    def extract_content(self, output_dir, parse_cache):
        output_dir = os.path.abspath(output_dir)
-        if self.book_header.encryption_type != 0:
-            raise DRMError(self.name)
-
+        self.check_for_drm()
        processed_records = self.extract_text()
        if self.debug is not None:
            parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
@ -916,11 +737,12 @@ class MobiReader(object):
        trail_size = self.sizeof_trailing_entries(data)
        return data[:len(data)-trail_size]

-    def extract_text(self):
+    def extract_text(self, offset=1):
        self.log.debug('Extracting text...')
-        text_sections = [self.text_section(i) for i in range(1,
-            min(self.book_header.records + 1, len(self.sections)))]
-        processed_records = list(range(0, self.book_header.records + 1))
+        text_sections = [self.text_section(i) for i in xrange(offset,
+            min(self.book_header.records + offset, len(self.sections)))]
+        processed_records = list(range(offset-1, self.book_header.records +
+            offset))

        self.mobi_html = ''

@ -1027,63 +849,6 @@ class MobiReader(object):
            self.image_names.append(os.path.basename(path))
            im.save(open(path, 'wb'), format='JPEG')

-def get_metadata(stream):
-    stream.seek(0)
-    try:
-        raw = stream.read(3)
-    except:
-        raw = ''
-    stream.seek(0)
-    if raw == 'TPZ':
-        from calibre.ebooks.metadata.topaz import get_metadata
-        return get_metadata(stream)
-    from calibre.utils.logging import Log
-    log = Log()
-    try:
-        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
-    except:
-        mi = MetaInformation(_('Unknown'), [_('Unknown')])
-    mh = MetadataHeader(stream, log)
-    if mh.title and mh.title != _('Unknown'):
-        mi.title = mh.title
-
-    if mh.exth is not None:
-        if mh.exth.mi is not None:
-            mi = mh.exth.mi
-    else:
-        size = sys.maxint
-        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
-            pos = stream.tell()
-            stream.seek(0, 2)
-            size = stream.tell()
-            stream.seek(pos)
-        if size < 4*1024*1024:
-            with TemporaryDirectory('_mobi_meta_reader') as tdir:
-                with CurrentDir(tdir):
-                    mr = MobiReader(stream, log)
-                    parse_cache = {}
-                    mr.extract_content(tdir, parse_cache)
-                    if mr.embedded_mi is not None:
-                        mi = mr.embedded_mi
-    if hasattr(mh.exth, 'cover_offset'):
-        cover_index = mh.first_image_index + mh.exth.cover_offset
-        data  = mh.section_data(int(cover_index))
-    else:
-        try:
-            data  = mh.section_data(mh.first_image_index)
-        except:
-            data = ''
-    buf = cStringIO.StringIO(data)
-    try:
-        im = PILImage.open(buf)
-    except:
-        log.exception('Failed to read MOBI cover')
-    else:
-        obuf = cStringIO.StringIO()
-        im.convert('RGB').save(obuf, format='JPEG')
-        mi.cover_data = ('jpg', obuf.getvalue())
-    return mi
-
 def test_mbp_regex():
    for raw, m in {
        '<mbp:pagebreak></mbp:pagebreak>':'',
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -0,0 +1,388 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import struct, re, os, imghdr
+from collections import namedtuple
+from itertools import repeat
+
+from calibre.ebooks.mobi.reader.headers import NULL_INDEX
+from calibre.ebooks.mobi.reader.index import read_index
+from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
+from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
+from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
+from calibre.ebooks.mobi.utils import read_font_record
+
+Part = namedtuple('Part',
+    'num type filename start end aid')
+
+Elem = namedtuple('Elem',
+    'insert_pos toc_text file_number sequence_number start_pos '
+    'length')
+
+FlowInfo = namedtuple('FlowInfo',
+        'type format dir fname')
+
+class Mobi8Reader(object):
+
+    def __init__(self, mobi6_reader, log):
+        self.mobi6_reader, self.log = mobi6_reader, log
+        self.header = mobi6_reader.book_header
+
+    def __call__(self):
+        self.mobi6_reader.check_for_drm()
+        offset = 1
+        res_end = len(self.mobi6_reader.sections)
+        if self.mobi6_reader.kf8_type == 'joint':
+            offset = self.mobi6_reader.kf8_boundary + 2
+            res_end = self.mobi6_reader.kf8_boundary
+
+        self.processed_records = self.mobi6_reader.extract_text(offset=offset)
+        self.raw_ml = self.mobi6_reader.mobi_html
+        with open('debug-raw.html', 'wb') as f:
+            f.write(self.raw_ml)
+
+        self.kf8_sections = self.mobi6_reader.sections[offset-1:]
+        first_resource_index = self.header.first_image_index
+        if first_resource_index in {-1, NULL_INDEX}:
+            first_resource_index = self.header.records + 1
+        self.resource_sections = \
+                self.mobi6_reader.sections[first_resource_index:res_end]
+        self.cover_offset = getattr(self.header.exth, 'cover_offset', None)
+
+        self.read_indices()
+        self.build_parts()
+        guide = self.create_guide()
+        ncx = self.create_ncx()
+        resource_map = self.extract_resources()
+        spine = self.expand_text(resource_map)
+        return self.write_opf(guide, ncx, spine, resource_map)
+
+    def read_indices(self):
+        self.flow_table = (0, NULL_INDEX)
+
+        if self.header.fdstidx != NULL_INDEX:
+            header = self.kf8_sections[self.header.fdstidx][0]
+            if header[:4] != b'FDST':
+                raise ValueError('KF8 does not have a valid FDST record')
+            num_sections, = struct.unpack_from(b'>L', header, 0x08)
+            sections = header[0x0c:]
+            self.flow_table = struct.unpack_from(b'>%dL' % (num_sections*2),
+                    sections, 0)[::2] + (NULL_INDEX,)
+
+        self.files = []
+        if self.header.skelidx != NULL_INDEX:
+            table = read_index(self.kf8_sections, self.header.skelidx,
+                    self.header.codec)[0]
+            File = namedtuple('File',
+                'file_number name divtbl_count start_position length')
+
+            for i, text in enumerate(table.iterkeys()):
+                tag_map = table[text]
+                self.files.append(File(i, text, tag_map[1][0],
+                    tag_map[6][0], tag_map[6][1]))
+
+        self.elems = []
+        if self.header.dividx != NULL_INDEX:
+            table, cncx = read_index(self.kf8_sections, self.header.dividx,
+                    self.header.codec)
+            for i, text in enumerate(table.iterkeys()):
+                tag_map = table[text]
+                toc_text = cncx[tag_map[2][0]]
+                self.elems.append(Elem(int(text), toc_text, tag_map[3][0],
+                    tag_map[4][0], tag_map[6][0], tag_map[6][1]))
+
+        self.guide = []
+        if self.header.othidx != NULL_INDEX:
+            table, cncx = read_index(self.kf8_sections, self.header.othidx,
+                    self.header.codec)
+            Item = namedtuple('Item',
+                'type title div_frag_num')
+
+            for i, ref_type in enumerate(table.iterkeys()):
+                tag_map = table[ref_type]
+                 # ref_type, ref_title, div/frag number
+                title = cncx[tag_map[1][0]]
+                fileno = None
+                if 3 in tag_map.keys():
+                    fileno  = tag_map[3][0]
+                if 6 in tag_map.keys():
+                    fileno = tag_map[6][0]
+                self.guide.append(Item(ref_type.decode(self.header.codec),
+                    title, fileno))
+
+    def build_parts(self):
+        raw_ml = self.mobi6_reader.mobi_html
+        self.flows = []
+        self.flowinfo = []
+
+        # now split the raw_ml into its flow pieces
+        for j in xrange(0, len(self.flow_table)-1):
+            start = self.flow_table[j]
+            end = self.flow_table[j+1]
+            if end == NULL_INDEX:
+                end = len(raw_ml)
+            self.flows.append(raw_ml[start:end])
+
+        # the first piece represents the xhtml text
+        text = self.flows[0]
+        self.flows[0] = b''
+
+        # walk the <skeleton> and <div> tables to build original source xhtml
+        # files *without* destroying any file position information needed for
+        # later href processing and create final list of file separation start:
+        # stop points and etc in partinfo
+        self.parts = []
+        self.partinfo = []
+        divptr = 0
+        baseptr = 0
+        for skelnum, skelname, divcnt, skelpos, skellen in self.files:
+            baseptr = skelpos + skellen
+            skeleton = text[skelpos:baseptr]
+            for i in xrange(divcnt):
+                insertpos, idtext, filenum, seqnum, startpos, length = \
+                                    self.elems[divptr]
+                if i == 0:
+                    aidtext = idtext[12:-2]
+                    filename = 'part%04d.html' % filenum
+                part = text[baseptr:baseptr + length]
+                insertpos = insertpos - skelpos
+                skeleton = skeleton[0:insertpos] + part + skeleton[insertpos:]
+                baseptr = baseptr + length
+                divptr += 1
+            self.parts.append(skeleton)
+            self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
+                baseptr, aidtext))
+
+        # The primary css style sheet is typically stored next followed by any
+        # snippets of code that were previously inlined in the
+        # original xhtml but have been stripped out and placed here.
+        # This can include local CDATA snippets and and svg sections.
+
+        # The problem is that for most browsers and ereaders, you can not
+        # use <img src="imageXXXX.svg" /> to import any svg image that itself
+        # properly uses an <image/> tag to import some raster image - it
+        # should work according to the spec but does not for almost all browsers
+        # and ereaders and causes epub validation issues because those  raster
+        # images are in manifest but not in xhtml text - since they only
+        # referenced from an svg image
+
+        # So we need to check the remaining flow pieces to see if they are css
+        # or svg images.  if svg images, we must check if they have an <image/>
+        # and if so inline them into the xhtml text pieces.
+
+        # there may be other sorts of pieces stored here but until we see one
+        # in the wild to reverse engineer we won't be able to tell
+
+        self.flowinfo.append(FlowInfo(None, None, None, None))
+        svg_tag_pattern = re.compile(br'''(<svg[^>]*>)''', re.IGNORECASE)
+        image_tag_pattern = re.compile(br'''(<image[^>]*>)''', re.IGNORECASE)
+        for j in xrange(1, len(self.flows)):
+            flowpart = self.flows[j]
+            nstr = '%04d' % j
+            m = svg_tag_pattern.search(flowpart)
+            if m != None:
+                # svg
+                typ = 'svg'
+                start = m.start()
+                m2 = image_tag_pattern.search(flowpart)
+                if m2 != None:
+                    format = 'inline'
+                    dir = None
+                    fname = None
+                    # strip off anything before <svg if inlining
+                    flowpart = flowpart[start:]
+                else:
+                    format = 'file'
+                    dir = "images"
+                    fname = 'svgimg' + nstr + '.svg'
+            else:
+                # search for CDATA and if exists inline it
+                if flowpart.find('[CDATA[') >= 0:
+                    typ = 'css'
+                    flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n'
+                    format = 'inline'
+                    dir = None
+                    fname = None
+                else:
+                    # css - assume as standalone css file
+                    typ = 'css'
+                    format = 'file'
+                    dir = "styles"
+                    fname = nstr + '.css'
+
+            self.flows[j] = flowpart
+            self.flowinfo.append(FlowInfo(typ, format, dir, fname))
+
+    def get_file_info(self, pos):
+        ''' Get information about the part (file) that exists at pos in
+        the raw markup '''
+        for part in self.partinfo:
+            if pos >= part.start and pos < part.end:
+                return part
+        return Part(*repeat(None, len(Part._fields)))
+
+    def get_id_tag_by_pos_fid(self, posfid, offset):
+        # first convert kindle:pos:fid and offset info to position in file
+        row = int(posfid, 32)
+        off = int(offset, 32)
+        [insertpos, idtext, filenum, seqnm, startpos, length] = self.elems[row]
+        pos = insertpos + off
+        fname = self.get_file_info(pos).filename
+        # an existing "id=" must exist in original xhtml otherwise it would not
+        # have worked for linking.  Amazon seems to have added its own
+        # additional "aid=" inside tags whose contents seem to represent some
+        # position information encoded into Base32 name.
+
+        # so find the closest "id=" before position the file by actually
+        # searching in that file
+        idtext = self.get_id_tag(pos)
+        return fname, idtext
+
+    def get_id_tag(self, pos):
+        # find the correct tag by actually searching in the destination
+        # textblock at position
+        fi = self.get_file_info(pos)
+        if fi.num is None and fi.start is None:
+            raise ValueError('No file contains pos: %d'%pos)
+        textblock = self.parts[fi.num]
+        id_map = []
+        npos = pos - fi.start
+        # if npos inside a tag then search all text before the its end of tag
+        # marker
+        pgt = textblock.find(b'>', npos)
+        plt = textblock.find(b'<', npos)
+        if pgt < plt:
+            npos = pgt + 1
+        # find id links only inside of tags
+        #    inside any < > pair find all "id=' and return whatever is inside
+        #    the quotes
+        id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"][^>]*>''',
+                re.IGNORECASE)
+        for m in re.finditer(id_pattern, textblock):
+            id_map.append((m.start(), m.group(1)))
+
+        if not id_map:
+            # Found no id in the textblock, link must be to top of file
+            return b''
+        # if npos is before first id= inside a tag, return the first
+        if npos < id_map[0][0]:
+            return id_map[0][1]
+        # if npos is after the last id= inside a tag, return the last
+        if npos > id_map[-1][0]:
+            return id_map[-1][1]
+        # otherwise find last id before npos
+        for i, item in enumerate(id_map):
+            if npos < item[0]:
+                return id_map[i-1][1]
+        return id_map[0][1]
+
+    def create_guide(self):
+        guide = Guide()
+        for ref_type, ref_title, fileno in self.guide:
+            elem = self.elems[fileno]
+            fi = self.get_file_info(elem.insert_pos)
+            idtext = self.get_id_tag(elem.insert_pos).decode(self.header.codec)
+            linktgt = fi.filename
+            if idtext:
+                linktgt += b'#' + idtext
+            g = Guide.Reference('%s/%s'%(fi.type, linktgt), os.getcwdu())
+            g.title, g.type = ref_title, ref_type
+            guide.append(g)
+
+        so = self.header.exth.start_offset
+        if so not in {None, NULL_INDEX}:
+            fi = self.get_file_info(so)
+            if fi.filename is not None:
+                idtext = self.get_id_tag(so).decode(self.header.codec)
+                linktgt = fi.filename
+                if idtext:
+                    linktgt += '#' + idtext
+                g = Guide.Reference('%s/%s'%(fi.type, linktgt), os.getcwdu())
+                g.title, g.type = 'start', 'text'
+                guide.append(g)
+
+        return guide
+
+    def create_ncx(self):
+        index_entries = read_ncx(self.kf8_sections, self.header.ncxidx,
+                self.header.codec)
+
+        # Add href and anchor info to the index entries
+        for entry in index_entries:
+            pos = entry['pos']
+            fi = self.get_file_info(pos)
+            if fi.filename is None:
+                raise ValueError('Index entry has invalid pos: %d'%pos)
+            idtag = self.get_id_tag(pos).decode(self.header.codec)
+            entry['href'] = '%s/%s'%(fi.type, fi.filename)
+            entry['idtag'] = idtag
+
+        # Build the TOC object
+        return build_toc(index_entries)
+
+    def extract_resources(self):
+        resource_map = []
+        for x in ('fonts', 'images'):
+            os.mkdir(x)
+
+        for i, sec in enumerate(self.resource_sections):
+            fname_idx = i+1
+            data = sec[0]
+            typ = data[:4]
+            href = None
+            if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
+                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
+                pass # Ignore these records
+            elif typ == b'FONT':
+                font = read_font_record(data)
+                href = "fonts/%05d.%s" % (fname_idx, font['ext'])
+                if font['err']:
+                    self.log.warn('Reading font record %d failed: %s'%(
+                        fname_idx, font['err']))
+                    if font['headers']:
+                        self.log.debug('Font record headers: %s'%font['headers'])
+                with open(href.replace('/', os.sep), 'wb') as f:
+                    f.write(font['font_data'] if font['font_data'] else
+                            font['raw_data'])
+            else:
+                imgtype = imghdr.what(None, data)
+                if imgtype is None:
+                    imgtype = 'unknown'
+                href = 'images/%05d.%s'%(fname_idx, imgtype)
+                with open(href.replace('/', os.sep), 'wb') as f:
+                    f.write(data)
+
+            resource_map.append(href)
+
+        return resource_map
+
+    def expand_text(self, resource_map):
+        return expand_mobi8_markup(self, resource_map, self.log)
+
+    def write_opf(self, guide, toc, spine, resource_map):
+        mi = self.header.exth.mi
+        if (self.cover_offset is not None and self.cover_offset <
+                len(resource_map)):
+            mi.cover = resource_map[self.cover_offset]
+
+        opf = OPFCreator(os.getcwdu(), mi)
+        opf.guide = guide
+
+        def exclude(path):
+            return os.path.basename(path) == 'debug-raw.html'
+
+        opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
+        opf.create_spine(spine)
+        opf.set_toc(toc)
+
+        with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx:
+            opf.render(of, ncx, 'toc.ncx')
+        return 'metadata.opf'
+
+
--- a/src/calibre/ebooks/mobi/reader/ncx.py
+++ b/src/calibre/ebooks/mobi/reader/ncx.py
@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.mobi.utils import to_base
+from calibre.ebooks.mobi.reader.headers import NULL_INDEX
+from calibre.ebooks.mobi.reader.index import read_index
+
+tag_fieldname_map = {
+        1:  ['pos',0],
+        2:  ['len',0],
+        3:  ['noffs',0],
+        4:  ['hlvl',0],
+        5:  ['koffs',0],
+        6:  ['pos_fid',0],
+        21: ['parent',0],
+        22: ['child1',0],
+        23: ['childn',0]
+}
+
+def read_ncx(sections, index, codec):
+    index_entries = []
+
+    if index != NULL_INDEX:
+        table, cncx = read_index(sections, index, codec)
+
+        for num, x in enumerate(table.iteritems()):
+            text, tag_map = x
+            entry = {
+                    'name': text,
+                    'pos':  -1,
+                    'len':  0,
+                    'noffs': -1,
+                    'text' : "Unknown Text",
+                    'hlvl' : -1,
+                    'kind' : "Unknown Kind",
+                    'pos_fid' : None,
+                    'parent' : -1,
+                    'child1' : -1,
+                    'childn' : -1,
+                    'num'  : num
+            }
+
+            for tag in tag_fieldname_map.keys():
+                fieldname, i = tag_fieldname_map[tag]
+                if tag in tag_map:
+                    fieldvalue = tag_map[tag][i]
+                    if tag == 6:
+                        fieldvalue = to_base(fieldvalue, base=32)
+                    entry[fieldname] = fieldvalue
+                    if tag == 3:
+                        entry['text'] = cncx.get(fieldvalue, 'Unknown Text')
+                    if tag == 5:
+                        entry['kind'] = cncx.get(fieldvalue, 'Unknown Kind')
+            index_entries.append(entry)
+
+    return index_entries
+
+def build_toc(index_entries):
+    ans = TOC(base_path=os.getcwdu())
+    levels = {x['hlvl'] for x in index_entries}
+    num_map = {-1: ans}
+    level_map = {l:[x for x in index_entries if x['hlvl'] == l] for l in
+            levels}
+    for lvl in sorted(levels):
+        for item in level_map[lvl]:
+            parent = num_map[item['parent']]
+            child = parent.add_item(item['href'], item['idtag'], item['text'])
+            num_map[item['num']] = child
+
+    # Set play orders in depth first order
+    for i, item in enumerate(ans.flat()):
+        item.play_order = i
+
+    return ans
+
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import struct
+import struct, string, imghdr, zlib
 from collections import OrderedDict

 from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
@ -340,4 +340,149 @@ def detect_periodical(toc, log=None):
            return False
    return True

+def count_set_bits(num):
+    if num < 0:
+        num = -num
+    ans = 0
+    while num > 0:
+        ans += (num & 0b1)
+        num >>= 1
+    return ans
+
+def to_base(num, base=32):
+    digits = string.digits + string.ascii_uppercase
+    sign = 1 if num >= 0 else -1
+    if num == 0: return '0'
+    num *= sign
+    ans = []
+    while num:
+        ans.append(digits[(num % base)])
+        num //= base
+    if sign < 0:
+        ans.append('-')
+    ans.reverse()
+    return ''.join(ans)
+
+def mobify_image(data):
+    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
+    what = imghdr.what(None, data)
+
+    if what == 'png':
+        im = Image()
+        im.load(data)
+        data = im.export('gif')
+    return data
+
+def read_zlib_header(header):
+    header = bytearray(header)
+    # See sec 2.2 of RFC 1950 for the zlib stream format
+    # http://www.ietf.org/rfc/rfc1950.txt
+    if (header[0]*256 + header[1])%31 != 0:
+        return None, 'Bad zlib header, FCHECK failed'
+
+    cmf = header[0] & 0b1111
+    cinfo = header[0] >> 4
+    if cmf != 8:
+        return None, 'Unknown zlib compression method: %d'%cmf
+    if cinfo > 7:
+        return None, 'Invalid CINFO field in zlib header: %d'%cinfo
+    fdict = (header[1]&0b10000)>>5
+    if fdict != 0:
+        return None, 'FDICT based zlib compression not supported'
+    wbits = cinfo + 8
+    return wbits, None
+
+
+def read_font_record(data, extent=1040): # {{{
+    '''
+    Return the font encoded in the MOBI FONT record represented by data.
+    The return value in a dict with fields raw_data, font_data, err, ext,
+    headers.
+
+    :param extent: The number of obfuscated bytes. So far I have only
+    encountered files with 1040 obfuscated bytes. If you encounter an
+    obfuscated record for which this function fails, try different extent
+    values (easily automated).
+
+    raw_data is the raw data in the font record
+    font_data is the decoded font_data or None if an error occurred
+    err is not None if some error occurred
+    ext is the font type (ttf for TrueType, dat for unknown and failed if an
+    error occurred)
+    headers is the list of decoded headers from the font record or None if
+    decoding failed
+    '''
+    # Format:
+    # bytes  0 -  3:  'FONT'
+    # bytes  4 -  7:  Uncompressed size
+    # bytes  8 - 11:  flags
+    #                   bit 1 - zlib compression
+    #                   bit 2 - XOR obfuscated
+    # bytes 12 - 15:  offset to start of compressed data
+    # bytes 16 - 19:  length of XOR string
+    # bytes 19 - 23:  offset to start of XOR data
+    # The zlib compressed data begins with 2 bytes of header and
+    # has 4 bytes of checksum at the end
+    ans = {'raw_data':data, 'font_data':None, 'err':None, 'ext':'failed',
+            'headers':None}
+
+    try:
+        usize, flags, dstart, xor_len, xor_start = struct.unpack_from(
+                b'>LLLLL', data, 4)
+    except:
+        ans['err'] = 'Failed to read font record header fields'
+        return ans
+    font_data = data[dstart:]
+    ans['headers'] = {'usize':usize, 'flags':bin(flags), 'xor_len':xor_len,
+            'xor_start':xor_start, 'dstart':dstart}
+
+    if flags & 0b10:
+        # De-obfuscate the data
+        key = bytearray(data[xor_start:xor_start+xor_len])
+        buf = bytearray(font_data)
+        extent = len(font_data) if extent is None else extent
+        extent = min(extent, len(font_data))
+
+        for n in xrange(extent):
+            buf[n] ^= key[n%xor_len] # XOR of buf and key
+
+        font_data = bytes(buf)
+
+    if flags & 0b1:
+        # ZLIB compressed data
+        wbits, err = read_zlib_header(font_data[:2])
+        if err is not None:
+            ans['err'] = err
+            return ans
+        adler32, = struct.unpack_from(b'>I', font_data, len(font_data) - 4)
+        try:
+            # remove two bytes of zlib header and 4 bytes of trailing checksum
+            # negative wbits indicates no standard gzip header
+            font_data = zlib.decompress(font_data[2:-4], -wbits, usize)
+        except Exception as e:
+            ans['err'] = 'Failed to zlib decompress font data (%s)'%e
+            return ans
+
+        if len(font_data) != usize:
+            ans['err'] = 'Uncompressed font size mismatch'
+            return ans
+
+        if False:
+            # For some reason these almost never match, probably Amazon has a
+            # buggy Adler32 implementation
+            sig = (zlib.adler32(font_data) & 0xffffffff)
+            if sig != adler32:
+                ans['err'] = ('Adler checksum did not match. Stored: %d '
+                        'Calculated: %d')%(adler32, sig)
+                return ans
+
+    ans['font_data'] = font_data
+    ans['ext'] = ('ttf' if font_data[:4] in {b'\0\1\0\0', b'true', b'ttcf'}
+                    else 'dat')
+
+    return ans
+# }}}
+
+
+

--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -18,9 +18,10 @@ from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
-from calibre.ebooks.mobi.utils import (rescale_image, encint,
+from calibre.ebooks.mobi.utils import (rescale_image, encint, mobify_image,
        encode_trailing_data, align_block, detect_periodical)
 from calibre.ebooks.mobi.writer2.indexer import Indexer
+from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE

 EXTH_CODES = {
    'creator': 100,
@ -46,9 +47,6 @@ EXTH_CODES = {
 # Disabled as I dont care about uncrossable breaks
 WRITE_UNCROSSABLE_BREAKS = False

-MAX_THUMB_SIZE = 16 * 1024
-MAX_THUMB_DIMEN = (180, 240)
-
 class MobiWriter(object):
    COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')

@ -181,7 +179,11 @@ class MobiWriter(object):
        for item in self.oeb.manifest.values():
            if item.media_type not in OEB_RASTER_IMAGES: continue
            try:
-                data = rescale_image(item.data)
+                data = item.data
+                if self.opts.mobi_keep_original_images:
+                    data = mobify_image(data)
+                else:
+                    data = rescale_image(data)
            except:
                oeb.logger.warn('Bad image file %r' % item.href)
                continue
@ -489,7 +491,8 @@ class MobiWriter(object):
            items = oeb.metadata[term]
            if term == 'creator':
                if self.prefer_author_sort:
-                    creators = [normalize(unicode(c.file_as or c)) for c in items]
+                    creators = [normalize(unicode(c.file_as or c)) for c in
+                            items][:1]
                else:
                    creators = [normalize(unicode(c)) for c in items]
                items = ['; '.join(creators)]
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -832,22 +832,8 @@ class Manifest(object):


        def _parse_css(self, data):
-            from cssutils.css import CSSRule
-            from cssutils import CSSParser, log
+            from cssutils import CSSParser, log, resolveImports
            log.setLevel(logging.WARN)
-            def get_style_rules_from_import(import_rule):
-                ans = []
-                if not import_rule.styleSheet:
-                    return ans
-                rules = import_rule.styleSheet.cssRules
-                for rule in rules:
-                    if rule.type == CSSRule.IMPORT_RULE:
-                        ans.extend(get_style_rules_from_import(rule))
-                    elif rule.type in (CSSRule.FONT_FACE_RULE,
-                            CSSRule.STYLE_RULE):
-                        ans.append(rule)
-                return ans
-
            self.oeb.log.debug('Parsing', self.href, '...')
            data = self.oeb.decode(data)
            data = self.oeb.css_preprocessor(data, add_namespace=True)
@ -855,19 +841,8 @@ class Manifest(object):
                               fetcher=self.override_css_fetch or self._fetch_css,
                               log=_css_logger)
            data = parser.parseString(data, href=self.href)
+            data = resolveImports(data)
            data.namespaces['h'] = XHTML_NS
-            import_rules = list(data.cssRules.rulesOfType(CSSRule.IMPORT_RULE))
-            rules_to_append = []
-            insert_index = None
-            for r in data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
-                insert_index = data.cssRules.index(r)
-                break
-            for rule in import_rules:
-                rules_to_append.extend(get_style_rules_from_import(rule))
-            for r in reversed(rules_to_append):
-                data.insertRule(r, index=insert_index)
-            for rule in import_rules:
-                data.deleteRule(rule)
            return data

        def _fetch_css(self, path):
@ -880,7 +855,8 @@ class Manifest(object):
                self.oeb.logger.warn('CSS import of non-CSS file %r' % path)
                return (None, None)
            data = item.data.cssText
-            return ('utf-8', data)
+            enc = None if isinstance(data, unicode) else 'utf-8'
+            return (enc, data)

        # }}}

@ -1487,9 +1463,17 @@ class TOC(object):
        except ValueError:
            return 1

-    def __str__(self):
-        return 'TOC: %s --> %s'%(self.title, self.href)
+    def get_lines(self, lvl=0):
+        ans = [(u'\t'*lvl) + u'TOC: %s --> %s'%(self.title, self.href)]
+        for child in self:
+            ans.extend(child.get_lines(lvl+1))
+        return ans

+    def __str__(self):
+        return b'\n'.join([x.encode('utf-8') for x in self.get_lines()])
+
+    def __unicode__(self):
+        return u'\n'.join(self.get_lines())

    def to_opf1(self, tour):
        for node in self.nodes:
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -318,6 +318,12 @@ class OEBReader(object):
                continue
            item = manifest.ids[idref]
            spine.add(item, elem.get('linear'))
+        for item in spine:
+            if item.media_type.lower() not in OEB_DOCS:
+                if not hasattr(item.data, 'xpath'):
+                    self.oeb.log.warn('The item %s is not a XML document.'
+                            ' Removing it from spine.'%item.href)
+                    spine.remove(item)
        if len(spine) == 0:
            raise OEBError("Spine is empty")
        self._spine_add_extra()
--- a/src/calibre/ebooks/oeb/transforms/htmltoc.py
+++ b/src/calibre/ebooks/oeb/transforms/htmltoc.py
@ -8,7 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'

 from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS
 from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME
-from calibre.ebooks.oeb.base import element
+from calibre.ebooks.oeb.base import element, XPath

 __all__ = ['HTMLTOCAdder']

@ -62,18 +62,24 @@ class HTMLTOCAdder(object):
        return cls(title=opts.toc_title)

    def __call__(self, oeb, context):
+        has_toc = getattr(getattr(oeb, 'toc', False), 'nodes', False)
+
        if 'toc' in oeb.guide:
            # Ensure toc pointed to in <guide> is in spine
            from calibre.ebooks.oeb.base import urlnormalize
            href = urlnormalize(oeb.guide['toc'].href)
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
-                if oeb.spine.index(item) < 0:
-                    oeb.spine.add(item, linear=False)
-                return
+                if (hasattr(item.data, 'xpath') and
+                    XPath('//h:a[@href]')(item.data)):
+                    if oeb.spine.index(item) < 0:
+                        oeb.spine.add(item, linear=False)
+                    return
+                elif has_toc:
+                    oeb.guide.remove('toc')
            else:
                oeb.guide.remove('toc')
-        if not getattr(getattr(oeb, 'toc', False), 'nodes', False):
+        if not has_toc:
            return
        oeb.logger.info('Generating in-line TOC...')
        title = self.title or oeb.translate(DEFAULT_TITLE)
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@ -36,7 +36,9 @@ class RescaleImages(object):
                    ext = 'JPEG'

                raw = item.data
-                if not raw: continue
+                if hasattr(raw, 'xpath') or not raw:
+                    # Probably an svg image
+                    continue
                try:
                    img = Image()
                    img.load(raw)
--- a/src/calibre/ebooks/txt/textileml.py
+++ b/src/calibre/ebooks/txt/textileml.py
@ -106,7 +106,7 @@ class TextileMLizer(OEB2HTML):
        #correct blockcode paras
        text = re.sub(r'\npre\.\n?\nbc\.', r'\nbc.', text)
        #correct blockquote paras
-        text = re.sub(r'\nbq\.\n?\np.*\. ', r'\nbq. ', text)
+        text = re.sub(r'\nbq\.\n?\np.*?\. ', r'\nbq. ', text)

        #reduce blank lines
        text = re.sub(r'\n{3}', r'\n\np. \n\n', text)
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -104,6 +104,7 @@ gprefs.defaults['worker_max_time'] = 0
 gprefs.defaults['show_files_after_save'] = True
 gprefs.defaults['auto_add_path'] = None
 gprefs.defaults['auto_add_check_for_duplicates'] = False
+gprefs.defaults['blocked_auto_formats'] = []
 # }}}

 NONE = QVariant() #: Null value to return from the data function of item models
@ -806,6 +807,23 @@ def is_gui_thread():
    global gui_thread
    return gui_thread is QThread.currentThread()

+_rating_font = None
+def rating_font():
+    global _rating_font
+    if _rating_font is None:
+        from PyQt4.Qt import QFontDatabase
+        _rating_font = 'Arial Unicode MS' if iswindows else 'sans-serif'
+        fontid = QFontDatabase.addApplicationFont(
+                #P('fonts/liberation/LiberationSerif-Regular.ttf')
+                P('fonts/calibreSymbols.otf')
+                )
+        if fontid > -1:
+            try:
+                _rating_font = unicode(list(
+                    QFontDatabase.applicationFontFamilies(fontid))[0])
+            except:
+                pass
+    return _rating_font

 def find_forms(srcdir):
    base = os.path.join(srcdir, 'calibre', 'gui2')
--- a/src/calibre/gui2/auto_add.py
+++ b/src/calibre/gui2/auto_add.py
@ -17,6 +17,8 @@ from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.gui2 import question_dialog, gprefs

+AUTO_ADDED = frozenset(BOOK_EXTENSIONS) - {'pdr', 'mbp', 'tan'}
+
 class Worker(Thread):

    def __init__(self, path, callback):
@ -26,7 +28,7 @@ class Worker(Thread):
        self.wake_up = Event()
        self.path, self.callback = path, callback
        self.staging = set()
-        self.be = frozenset(BOOK_EXTENSIONS) - {'pdr', 'mbp', 'tan'}
+        self.allowed = AUTO_ADDED - frozenset(gprefs['blocked_auto_formats'])

    def run(self):
        self.tdir = PersistentTemporaryDirectory('_auto_adder')
@ -56,7 +58,7 @@ class Worker(Thread):
                    # Must have read and write permissions
                    and os.access(os.path.join(self.path, x), os.R_OK|os.W_OK)
                    # Must be a known ebook file type
-                    and os.path.splitext(x)[1][1:].lower() in self.be
+                    and os.path.splitext(x)[1][1:].lower() in self.allowed
                ]
        data = {}
        # Give any in progress copies time to complete
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -20,11 +20,12 @@ from calibre.ebooks.metadata.sources.identify import urls_from_identifiers
 from calibre.constants import filesystem_encoding
 from calibre.library.comments import comments_to_html
 from calibre.gui2 import (config, open_local_file, open_url, pixmap_to_data,
-        gprefs)
+        gprefs, rating_font)
 from calibre.utils.icu import sort_key
 from calibre.utils.formatter import EvalFormatter
 from calibre.utils.date import is_date_undefined
 from calibre.utils.localization import calibre_langcode_to_name
+from calibre.utils.config import tweaks

 def render_html(mi, css, vertical, widget, all_fields=False): # {{{
    table = render_data(mi, all_fields=all_fields,
@ -39,7 +40,7 @@ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
        return ans

    fi = QFontInfo(QApplication.font(widget))
-    f = fi.pixelSize()+1
+    f = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
    fam = unicode(fi.family()).strip().replace('"', '')
    if not fam:
        fam = 'sans-serif'
@ -116,6 +117,14 @@ def render_data(mi, use_roman_numbers=True, all_fields=False):
                val = force_unicode(val)
                ans.append((field,
                    u'<td class="comments" colspan="2">%s</td>'%comments_to_html(val)))
+        elif metadata['datatype'] == 'rating':
+            val = getattr(mi, field)
+            if val:
+                val = val/2.0
+                ans.append((field,
+                    u'<td class="title">%s</td><td class="rating" '
+                    'style=\'font-family:"%s"\'>%s</td>'%(
+                        name, rating_font(), u'\u2605'*int(val))))
        elif metadata['datatype'] == 'composite' and \
                            metadata['display'].get('contains_html', False):
            val = getattr(mi, field)
--- a/src/calibre/gui2/comments_editor.py
+++ b/src/calibre/gui2/comments_editor.py
@ -19,6 +19,7 @@ from calibre.ebooks.chardet import xml_to_unicode
 from calibre import xml_replace_entities
 from calibre.gui2 import open_url
 from calibre.utils.soupparser import fromstring
+from calibre.utils.config import tweaks

 class PageAction(QAction): # {{{

@ -252,7 +253,7 @@ class EditorWidget(QWebView): # {{{
        def fset(self, val):
            self.setHtml(val)
            fi = QFontInfo(QApplication.font(self))
-            f  = fi.pixelSize()+1
+            f  = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
            fam = unicode(fi.family()).strip().replace('"', '')
            if not fam:
                fam = 'sans-serif'
--- a/src/calibre/gui2/convert/mobi_output.py
+++ b/src/calibre/gui2/convert/mobi_output.py
@ -21,7 +21,8 @@ class PluginWidget(Widget, Ui_Form):

    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
-                ['prefer_author_sort', 'rescale_images', 'toc_title',
+                ['prefer_author_sort', 'toc_title',
+                    'mobi_keep_original_images',
                    'mobi_ignore_margins', 'mobi_toc_at_start',
                'dont_compress', 'no_inline_toc', 'share_not_sync',
                'personal_doc']#, 'mobi_navpoints_only_deepest']
--- a/src/calibre/gui2/convert/mobi_output.ui
+++ b/src/calibre/gui2/convert/mobi_output.ui
@ -6,7 +6,7 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>521</width>
+    <width>588</width>
    <height>342</height>
   </rect>
  </property>
@ -14,47 +14,6 @@
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
-   <item row="1" column="0">
-    <widget class="QLabel" name="label">
-     <property name="text">
-      <string>&amp;Title for Table of Contents:</string>
-     </property>
-     <property name="buddy">
-      <cstring>opt_toc_title</cstring>
-     </property>
-    </widget>
-   </item>
-   <item row="1" column="1">
-    <widget class="QLineEdit" name="opt_toc_title"/>
-   </item>
-   <item row="4" column="0" colspan="2">
-    <widget class="QCheckBox" name="opt_rescale_images">
-     <property name="text">
-      <string>Rescale images for &amp;Palm devices</string>
-     </property>
-    </widget>
-   </item>
-   <item row="5" column="0" colspan="2">
-    <widget class="QCheckBox" name="opt_prefer_author_sort">
-     <property name="text">
-      <string>Use author &amp;sort for author</string>
-     </property>
-    </widget>
-   </item>
-   <item row="6" column="0">
-    <widget class="QCheckBox" name="opt_dont_compress">
-     <property name="text">
-      <string>Disable compression of the file contents</string>
-     </property>
-    </widget>
-   </item>
-   <item row="0" column="0">
-    <widget class="QCheckBox" name="opt_no_inline_toc">
-     <property name="text">
-      <string>Do not add Table of Contents to book</string>
-     </property>
-    </widget>
-   </item>
   <item row="8" column="0" colspan="2">
    <widget class="QGroupBox" name="groupBox">
     <property name="title">
@ -125,6 +84,47 @@
     </property>
    </widget>
   </item>
+   <item row="4" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_prefer_author_sort">
+     <property name="text">
+      <string>Use author &amp;sort for author</string>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="0">
+    <widget class="QLabel" name="label">
+     <property name="text">
+      <string>&amp;Title for Table of Contents:</string>
+     </property>
+     <property name="buddy">
+      <cstring>opt_toc_title</cstring>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="1">
+    <widget class="QLineEdit" name="opt_toc_title"/>
+   </item>
+   <item row="6" column="0">
+    <widget class="QCheckBox" name="opt_dont_compress">
+     <property name="text">
+      <string>Disable compression of the file contents</string>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="0">
+    <widget class="QCheckBox" name="opt_no_inline_toc">
+     <property name="text">
+      <string>Do not add Table of Contents to book</string>
+     </property>
+    </widget>
+   </item>
+   <item row="5" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_mobi_keep_original_images">
+     <property name="text">
+      <string>Do not convert all images to &amp;JPEG (may result in images not working in older viewers)</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/cover_flow.py
+++ b/src/calibre/gui2/cover_flow.py
@ -10,10 +10,11 @@ Module to implement the Cover Flow feature
 import sys, os, time

 from PyQt4.Qt import (QImage, QSizePolicy, QTimer, QDialog, Qt, QSize, QAction,
-        QStackedLayout, QLabel, QByteArray, pyqtSignal, QKeySequence)
+        QStackedLayout, QLabel, QByteArray, pyqtSignal, QKeySequence, QFont)

 from calibre import plugins
-from calibre.gui2 import config, available_height, available_width, gprefs
+from calibre.gui2 import (config, available_height, available_width, gprefs,
+        rating_font)

 pictureflow, pictureflowerror = plugins['pictureflow']

@ -102,6 +103,8 @@ if pictureflow is not None:
                    type=Qt.QueuedConnection)
            self.context_menu = None
            self.setContextMenuPolicy(Qt.DefaultContextMenu)
+            if hasattr(self, 'setSubtitleFont'):
+                self.setSubtitleFont(QFont(rating_font()))

        def set_context_menu(self, cm):
            self.context_menu = cm
--- a/src/calibre/gui2/dialogs/message_box.py
+++ b/src/calibre/gui2/dialogs/message_box.py
@ -5,11 +5,13 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import sys

 from PyQt4.Qt import (QDialog, QIcon, QApplication, QSize, QKeySequence,
-    QAction, Qt, QTextBrowser, QDialogButtonBox, QVBoxLayout)
+    QAction, Qt, QTextBrowser, QDialogButtonBox, QVBoxLayout, QGridLayout,
+    QLabel, QPlainTextEdit, QTextDocument)

-from calibre.constants import __version__
+from calibre.constants import __version__, isfrozen
 from calibre.gui2.dialogs.message_box_ui import Ui_Dialog

 class MessageBox(QDialog, Ui_Dialog): # {{{
@ -170,7 +172,7 @@ class ProceedNotification(MessageBox): # {{{
        :param payload: Arbitrary object, passed to callback
        :param html_log: An HTML or plain text log
        :param log_viewer_title: The title for the log viewer window
-        :param title: The title fo rthis popup
+        :param title: The title for this popup
        :param msg: The msg to display
        :param det_msg: Detailed message
        '''
@ -180,7 +182,6 @@ class ProceedNotification(MessageBox): # {{{
        self.payload = payload
        self.html_log = html_log
        self.log_viewer_title = log_viewer_title
-        self.finished.connect(self.do_proceed)

        self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
        self.vlb.setIcon(QIcon(I('debug.png')))
@ -203,9 +204,13 @@ class ProceedNotification(MessageBox): # {{{
        # Ensure this notification is garbage collected
        self.callback = self.cancel_callback = self.payload = None
        self.setParent(None)
-        self.finished.disconnect()
        self.vlb.clicked.disconnect()
        _proceed_memory.remove(self)
+
+    def done(self, r):
+        self.do_proceed(r)
+        return MessageBox.done(self, r)
+
 # }}}

 class ErrorNotification(MessageBox): # {{{
@ -248,9 +253,96 @@ class ErrorNotification(MessageBox): # {{{
        _proceed_memory.remove(self)
 # }}}

+class JobError(QDialog): # {{{
+
+    WIDTH = 600
+
+    def __init__(self, gui):
+        QDialog.__init__(self, gui)
+        self.setAttribute(Qt.WA_DeleteOnClose, False)
+        self.gui = gui
+        self.queue = []
+
+        self._layout = l = QGridLayout()
+        self.setLayout(l)
+        self.icon = QIcon(I('dialog_error.png'))
+        self.setWindowIcon(self.icon)
+        self.icon_label = QLabel()
+        self.icon_label.setPixmap(self.icon.pixmap(128, 128))
+        self.icon_label.setMaximumSize(QSize(128, 128))
+        self.msg_label = QLabel('<p>&nbsp;')
+        self.msg_label.setWordWrap(True)
+        self.msg_label.setTextFormat(Qt.RichText)
+        self.det_msg = QPlainTextEdit(self)
+        self.det_msg.setVisible(False)
+
+        self.bb = QDialogButtonBox(QDialogButtonBox.Close, parent=self)
+        self.bb.accepted.connect(self.accept)
+        self.bb.rejected.connect(self.reject)
+        self.ctc_button = self.bb.addButton(_('&Copy to clipboard'),
+                self.bb.ActionRole)
+        self.ctc_button.clicked.connect(self.copy_to_clipboard)
+        self.show_det_msg = _('Show &details')
+        self.hide_det_msg = _('Hide &details')
+        self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole)
+        self.det_msg_toggle.clicked.connect(self.toggle_det_msg)
+        self.det_msg_toggle.setToolTip(
+                _('Show detailed information about this error'))
+
+        l.addWidget(self.icon_label, 0, 0, 1, 1)
+        l.addWidget(self.msg_label,  0, 1, 1, 1, Qt.AlignLeft|Qt.AlignTop)
+        l.addWidget(self.det_msg, 1, 0, 1, 2)
+
+        l.addWidget(self.bb, 2, 0, 1, 2, Qt.AlignRight|Qt.AlignBottom)
+
+        self.setModal(False)
+        self.base_height = max(200, self.sizeHint().height() + 20)
+        self.do_resize()
+
+    def copy_to_clipboard(self, *args):
+        d = QTextDocument()
+        d.setHtml(self.msg_label.text())
+        QApplication.clipboard().setText(
+                u'calibre, version %s (%s, isfrozen: %s)\n%s: %s\n\n%s' %
+                (__version__, sys.platform, isfrozen,
+                    unicode(self.windowTitle()), unicode(d.toPlainText()),
+                    unicode(self.det_msg.toPlainText())))
+        if hasattr(self, 'ctc_button'):
+            self.ctc_button.setText(_('Copied'))
+
+    def toggle_det_msg(self, *args):
+        vis = unicode(self.det_msg_toggle.text()) == self.hide_det_msg
+        self.det_msg_toggle.setText(self.show_det_msg if vis else
+                self.hide_det_msg)
+        self.det_msg.setVisible(not vis)
+        self.do_resize()
+
+    def do_resize(self):
+        h = self.base_height
+        if self.det_msg.isVisible():
+            h += 250
+        self.resize(QSize(self.WIDTH, h))
+
+    def showEvent(self, ev):
+        ret = QDialog.showEvent(self, ev)
+        self.bb.button(self.bb.Close).setFocus(Qt.OtherFocusReason)
+        return ret
+
+# }}}
+
 if __name__ == '__main__':
    app = QApplication([])
-    from calibre.gui2 import question_dialog
-    print question_dialog(None, 'title', 'msg <a href="http://google.com">goog</a> ',
-            det_msg='det '*1000,
-            show_copy_button=True)
+    from calibre.gui2.preferences import init_gui
+    gui = init_gui()
+    d = JobError(gui)
+    d.show()
+    app.exec_()
+    gui.shutdown()
+
+# if __name__ == '__main__':
+#     app = QApplication([])
+#     from calibre.gui2 import question_dialog
+#     print question_dialog(None, 'title', 'msg <a href="http://google.com">goog</a> ',
+#             det_msg='det '*1000,
+#             show_copy_button=True)
+
--- a/src/calibre/gui2/library/delegates.py
+++ b/src/calibre/gui2/library/delegates.py
@ -5,16 +5,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-from math import cos, sin, pi
+import sys

-from PyQt4.Qt import (QColor, Qt, QModelIndex, QSize, QApplication,
-                     QPainterPath, QLinearGradient, QBrush,
-                     QPen, QStyle, QPainter, QStyleOptionViewItemV4,
-                     QIcon,  QDoubleSpinBox, QVariant, QSpinBox,
-                     QStyledItemDelegate, QComboBox, QTextDocument,
-                     QAbstractTextDocumentLayout)
+from PyQt4.Qt import (Qt, QApplication, QStyle, QIcon,  QDoubleSpinBox,
+        QVariant, QSpinBox, QStyledItemDelegate, QComboBox, QTextDocument,
+        QAbstractTextDocumentLayout, QFont, QFontInfo)

-from calibre.gui2 import UNDEFINED_QDATETIME, error_dialog
+from calibre.gui2 import UNDEFINED_QDATETIME, error_dialog, rating_font
+from calibre.constants import iswindows
 from calibre.gui2.widgets import EnLineEdit
 from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
 from calibre.utils.date import now, format_date, qt_to_dt
@ -27,81 +25,39 @@ from calibre.gui2.languages import LanguagesEdit


 class RatingDelegate(QStyledItemDelegate): # {{{
-    COLOR    = QColor("blue")
-    SIZE     = 16

-    def __init__(self, parent):
-        QStyledItemDelegate.__init__(self, parent)
-        self._parent = parent
-        self.dummy = QModelIndex()
-        self.star_path = QPainterPath()
-        self.star_path.moveTo(90, 50)
-        for i in range(1, 5):
-            self.star_path.lineTo(50 + 40 * cos(0.8 * i * pi), \
-                                  50 + 40 * sin(0.8 * i * pi))
-        self.star_path.closeSubpath()
-        self.star_path.setFillRule(Qt.WindingFill)
-        self.gradient = QLinearGradient(0, 0, 0, 100)
-        self.factor = self.SIZE/100.
-
-    def sizeHint(self, option, index):
-        #num = index.model().data(index, Qt.DisplayRole).toInt()[0]
-        return QSize(5*(self.SIZE), self.SIZE+4)
-
-    def paint(self, painter, option, index):
-        style = self._parent.style()
-        option = QStyleOptionViewItemV4(option)
-        self.initStyleOption(option, index)
-        option.text = u''
-        num = index.model().data(index, Qt.DisplayRole).toInt()[0]
-        def draw_star():
-            painter.save()
-            painter.scale(self.factor, self.factor)
-            painter.translate(50.0, 50.0)
-            painter.rotate(-20)
-            painter.translate(-50.0, -50.0)
-            painter.drawPath(self.star_path)
-            painter.restore()
-
-        painter.save()
-        if hasattr(QStyle, 'CE_ItemViewItem'):
-            style.drawControl(QStyle.CE_ItemViewItem, option,
-                    painter, self._parent)
-        elif option.state & QStyle.State_Selected:
-            painter.fillRect(option.rect, option.palette.highlight())
-        else:
-            painter.fillRect(option.rect, option.backgroundBrush)
-
-        try:
-            painter.setRenderHint(QPainter.Antialiasing)
-            painter.setClipRect(option.rect)
-            y = option.rect.center().y()-self.SIZE/2.
-            x = option.rect.left()
-            color = index.data(Qt.ForegroundRole)
-            if color.isNull() or not color.isValid():
-                color = self.COLOR
-            else:
-                color = QColor(color)
-            painter.setPen(QPen(color,  1, Qt.SolidLine, Qt.RoundCap, Qt.RoundJoin))
-            self.gradient.setColorAt(0.0, color)
-            self.gradient.setColorAt(1.0, color)
-            painter.setBrush(QBrush(self.gradient))
-            painter.translate(x, y)
-            i = 0
-            while i < num:
-                draw_star()
-                painter.translate(self.SIZE, 0)
-                i += 1
-        except:
-            import traceback
-            traceback.print_exc()
-        painter.restore()
+    def __init__(self, *args, **kwargs):
+        QStyledItemDelegate.__init__(self, *args, **kwargs)
+        self.rf = QFont(rating_font())
+        self.em = Qt.ElideMiddle
+        delta = 0
+        if iswindows and sys.getwindowsversion().major >= 6:
+            delta = 2
+        self.rf.setPointSize(QFontInfo(QApplication.font()).pointSize()+delta)

    def createEditor(self, parent, option, index):
        sb = QStyledItemDelegate.createEditor(self, parent, option, index)
        sb.setMinimum(0)
        sb.setMaximum(5)
+        sb.setSuffix(' ' + _('stars'))
        return sb
+
+    def displayText(self, value, locale):
+        r = value.toInt()[0]
+        if r < 0 or r > 5:
+            r = 0
+        return u'\u2605'*r
+
+    def sizeHint(self, option, index):
+        option.font = self.rf
+        option.textElideMode = self.em
+        return QStyledItemDelegate.sizeHint(self, option, index)
+
+    def paint(self, painter, option, index):
+        option.font = self.rf
+        option.textElideMode = self.em
+        return QStyledItemDelegate.paint(self, painter, option, index)
+
 # }}}

 class DateDelegate(QStyledItemDelegate): # {{{
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -588,8 +588,8 @@ class BooksModel(QAbstractTableModel): # {{{

        def rating_type(r, idx=-1):
            r = self.db.data[r][idx]
-            r = r/2 if r else 0
-            return QVariant(r)
+            r = r/2.0 if r else 0
+            return QVariant(int(r))

        def datetime_type(r, idx=-1):
            val = self.db.data[r][idx]
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -200,10 +200,10 @@ class BooksView(QTableView): # {{{
                ac = a if self._model.sorted_on[1] else d
                ac.setCheckable(True)
                ac.setChecked(True)
-            if col not in ('ondevice', 'rating', 'inlibrary') and \
+            if col not in ('ondevice', 'inlibrary') and \
                    (not self.model().is_custom_column(col) or \
                    self.model().custom_columns[col]['datatype'] not in ('bool',
-                        'rating')):
+                        )):
                m = self.column_header_context_menu.addMenu(
                        _('Change text alignment for %s') % name)
                al = self._model.alignment_map.get(col, 'left')
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@ -27,11 +27,13 @@ from calibre.utils.logging import GUILog as Log
 from calibre.ebooks.metadata.sources.identify import (identify,
        urls_from_identifiers)
 from calibre.ebooks.metadata.book.base import Metadata
-from calibre.gui2 import error_dialog, NONE
+from calibre.gui2 import error_dialog, NONE, rating_font
 from calibre.utils.date import (utcnow, fromordinal, format_date,
        UNDEFINED_DATE, as_utc)
 from calibre.library.comments import comments_to_html
 from calibre import force_unicode
+from calibre.utils.config import tweaks
+
 # }}}

 class RichTextDelegate(QStyledItemDelegate): # {{{
@ -254,6 +256,7 @@ class ResultsView(QTableView): # {{{
        return ret

    def show_details(self, index):
+        f = rating_font()
        book = self.model().data(index, Qt.UserRole)
        parts = [
            '<center>',
@ -265,7 +268,8 @@ class ResultsView(QTableView): # {{{
            if series[1]:
                parts.append('<div>%s: %s</div>'%series)
        if not book.is_null('rating'):
-            parts.append('<div>%s</div>'%('\u2605'*int(book.rating)))
+            style = 'style=\'font-family:"%s"\''%f
+            parts.append('<div %s>%s</div>'%(style, '\u2605'*int(book.rating)))
        parts.append('</center>')
        if book.identifiers:
            urls = urls_from_identifiers(book.identifiers)
@ -324,7 +328,7 @@ class Comments(QWebView): # {{{
            return ans

        fi = QFontInfo(QApplication.font(self.parent()))
-        f = fi.pixelSize()+1
+        f = fi.pixelSize()+1+int(tweaks['change_book_details_font_size_by'])
        fam = unicode(fi.family()).strip().replace('"', '')
        if not fam:
            fam = 'sans-serif'
--- a/src/calibre/gui2/pictureflow/pictureflow.cpp
+++ b/src/calibre/gui2/pictureflow/pictureflow.cpp
@ -364,6 +364,8 @@ public:
  QTime  previousPosTimestamp;
  int    pixelDistanceMoved;
  int    pixelsToMovePerSlide;
+  QFont subtitleFont;
+
  void setImages(FlowImages *images);
  void dataChanged();
  
@ -422,6 +424,7 @@ PictureFlowPrivate::PictureFlowPrivate(PictureFlow* w, int queueLength_)
  step = 0;
  target = 0;
  fade = 256;
+  subtitleFont = QFont();

  triggerTimer.setSingleShot(true);
  triggerTimer.setInterval(0);
@ -674,12 +677,17 @@ void PictureFlowPrivate::render_text(QPainter *painter, int index) {
    caption = slideImages->caption(index);
    subtitle = slideImages->subtitle(index);
    buffer_width = buffer.width(); buffer_height = buffer.height();
+    subtitleFont.setPixelSize(fontSize);

    brect = painter->boundingRect(QRect(0, 0, buffer_width, fontSize), TEXT_FLAGS, caption);
+    painter->save();
+    painter->setFont(subtitleFont);
    brect2 = painter->boundingRect(QRect(0, 0, buffer_width, fontSize), TEXT_FLAGS, subtitle);
+    painter->restore();

    // So that if there is no subtitle, the caption is not flush with the bottom
    if (brect2.height() < fontSize) brect2.setHeight(fontSize);
+    brect2.setHeight(brect2.height()+5); // A bit of buffer

    // So that the text does not occupy more than the lower half of the buffer
    if (brect.height() > ((int)(buffer.height()/3.0)) - fontSize*2)
@ -691,7 +699,11 @@ void PictureFlowPrivate::render_text(QPainter *painter, int index) {
    painter->drawText(brect, TEXT_FLAGS, caption);
    
    brect2.moveTop(buffer_height - brect2.height());
+
+    painter->save();
+    painter->setFont(subtitleFont);
    painter->drawText(brect2, TEXT_FLAGS, slideImages->subtitle(index));
+    painter->restore();
 }

 // Render the slides. Updates only the offscreen buffer.
@ -1168,6 +1180,17 @@ void PictureFlow::setSlideSize(QSize size)
  d->setSlideSize(size);
 }

+void PictureFlow::setSubtitleFont(QFont font)
+{
+  d->subtitleFont = font;
+}
+
+QFont PictureFlow::subtitleFont() const
+{
+  return d->subtitleFont;
+}
+
+
 QImage PictureFlow::slide(int index) const
 {
  return d->slide(index);
--- a/src/calibre/gui2/pictureflow/pictureflow.h
+++ b/src/calibre/gui2/pictureflow/pictureflow.h
@ -92,6 +92,7 @@ Q_OBJECT

  Q_PROPERTY(int currentSlide READ currentSlide WRITE setCurrentSlide)
  Q_PROPERTY(QSize slideSize READ slideSize WRITE setSlideSize)
+  Q_PROPERTY(QFont subtitleFont READ subtitleFont WRITE setSubtitleFont)

 public:
  /*!
@ -120,6 +121,17 @@ public:
  */  
  void setSlideSize(QSize size);

+  /*!
+    Returns the font used to render subtitles
+  */  
+  QFont subtitleFont() const;
+
+  /*!
+    Sets the font used to render subtitles
+  */  
+  void setSubtitleFont(QFont font);
+
+
  /*!
    Clears any caches held to free up memory
  */
--- a/src/calibre/gui2/pictureflow/pictureflow.sip
+++ b/src/calibre/gui2/pictureflow/pictureflow.sip
@ -41,6 +41,10 @@ public :

  void setSlideSize(QSize size);

+  QFont subtitleFont() const;
+
+  void setSubtitleFont(QFont font);
+
  void clearCaches();

  virtual QImage slide(int index) const;
--- a/src/calibre/gui2/preferences/adding.py
+++ b/src/calibre/gui2/preferences/adding.py
@ -7,11 +7,14 @@ __docformat__ = 'restructuredtext en'

 import os

+from PyQt4.Qt import Qt
+
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \
    CommaSeparatedList, AbortCommit
 from calibre.gui2.preferences.adding_ui import Ui_Form
 from calibre.utils.config import prefs
 from calibre.gui2.widgets import FilenamePattern
+from calibre.gui2.auto_add import AUTO_ADDED
 from calibre.gui2 import gprefs, choose_dir, error_dialog, question_dialog

 class ConfigWidget(ConfigWidgetBase, Ui_Form):
@ -38,6 +41,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.metadata_box.layout().insertWidget(0, self.filename_pattern)
        self.filename_pattern.changed_signal.connect(self.changed_signal.emit)
        self.auto_add_browse_button.clicked.connect(self.choose_aa_path)
+        for signal in ('Activated', 'Changed', 'DoubleClicked', 'Clicked'):
+            signal = getattr(self.opt_blocked_auto_formats, 'item'+signal)
+            signal.connect(self.blocked_auto_formats_changed)

    def choose_aa_path(self):
        path = choose_dir(self, 'auto add path choose',
@ -50,11 +56,47 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.filename_pattern.blockSignals(True)
        self.filename_pattern.initialize()
        self.filename_pattern.blockSignals(False)
+        self.init_blocked_auto_formats()
        self.opt_automerge.setEnabled(self.opt_add_formats_to_existing.isChecked())

+    # Blocked auto formats {{{
+    def blocked_auto_formats_changed(self, *args):
+        fmts = self.current_blocked_auto_formats
+        old = gprefs['blocked_auto_formats']
+        if set(fmts) != set(old):
+            self.changed_signal.emit()
+
+    def init_blocked_auto_formats(self, defaults=False):
+        if defaults:
+            fmts = gprefs.defaults['blocked_auto_formats']
+        else:
+            fmts = gprefs['blocked_auto_formats']
+        viewer = self.opt_blocked_auto_formats
+        viewer.blockSignals(True)
+        exts = set(AUTO_ADDED)
+        viewer.clear()
+        for ext in sorted(exts):
+            viewer.addItem(ext)
+            item = viewer.item(viewer.count()-1)
+            item.setFlags(Qt.ItemIsEnabled|Qt.ItemIsUserCheckable)
+            item.setCheckState(Qt.Checked if
+                    ext in fmts else Qt.Unchecked)
+        viewer.blockSignals(False)
+
+    @property
+    def current_blocked_auto_formats(self):
+        fmts = []
+        viewer = self.opt_blocked_auto_formats
+        for i in range(viewer.count()):
+            if viewer.item(i).checkState() == Qt.Checked:
+                fmts.append(unicode(viewer.item(i).text()))
+        return fmts
+    # }}}
+
    def restore_defaults(self):
        ConfigWidgetBase.restore_defaults(self)
        self.filename_pattern.initialize(defaults=True)
+        self.init_blocked_auto_formats(defaults=True)

    def commit(self):
        path = unicode(self.opt_auto_add_path.text()).strip()
@ -80,7 +122,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                    return
        pattern = self.filename_pattern.commit()
        prefs['filename_pattern'] = pattern
-        return ConfigWidgetBase.commit(self)
+        fmts = self.current_blocked_auto_formats
+        old = gprefs['blocked_auto_formats']
+        changed = set(fmts) != set(old)
+        if changed:
+            gprefs['blocked_auto_formats'] = self.current_blocked_auto_formats
+        ret = ConfigWidgetBase.commit(self)
+        return changed or ret

 if __name__ == '__main__':
    from PyQt4.Qt import QApplication
--- a/src/calibre/gui2/preferences/adding.ui
+++ b/src/calibre/gui2/preferences/adding.ui
@ -150,8 +150,8 @@ Author matching is exact.</string>
      <attribute name="title">
       <string>&amp;Automatic Adding</string>
      </attribute>
-      <layout class="QVBoxLayout" name="verticalLayout_2">
-       <item>
+      <layout class="QGridLayout" name="gridLayout_3">
+       <item row="0" column="0" colspan="2">
        <widget class="QLabel" name="label">
         <property name="text">
          <string>Specify a folder. Any files you put into this folder will be automatically added to calibre (restart required).</string>
@ -161,7 +161,46 @@ Author matching is exact.</string>
         </property>
        </widget>
       </item>
-       <item>
+       <item row="2" column="0" colspan="2">
+        <widget class="QLabel" name="label_2">
+         <property name="text">
+          <string>&lt;b&gt;WARNING:&lt;/b&gt; Files in the above folder will be deleted after being added to calibre.</string>
+         </property>
+        </widget>
+       </item>
+       <item row="4" column="0">
+        <widget class="QGroupBox" name="groupBox">
+         <property name="title">
+          <string>Ignore files with the following extensions when automatically adding </string>
+         </property>
+         <layout class="QHBoxLayout" name="horizontalLayout_3">
+          <item>
+           <widget class="QListWidget" name="opt_blocked_auto_formats">
+            <property name="alternatingRowColors">
+             <bool>true</bool>
+            </property>
+            <property name="selectionMode">
+             <enum>QAbstractItemView::NoSelection</enum>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </widget>
+       </item>
+       <item row="4" column="1">
+        <spacer name="horizontalSpacer_2">
+         <property name="orientation">
+          <enum>Qt::Horizontal</enum>
+         </property>
+         <property name="sizeHint" stdset="0">
+          <size>
+           <width>272</width>
+           <height>20</height>
+          </size>
+         </property>
+        </spacer>
+       </item>
+       <item row="1" column="0" colspan="2">
        <layout class="QHBoxLayout" name="horizontalLayout_2">
         <item>
          <widget class="QLineEdit" name="opt_auto_add_path">
@ -179,21 +218,14 @@ Author matching is exact.</string>
            <string>...</string>
           </property>
           <property name="icon">
-            <iconset resource="../../../work/calibre/resources/images.qrc">
+            <iconset resource="../../../../resources/images.qrc">
             <normaloff>:/images/document_open.png</normaloff>:/images/document_open.png</iconset>
           </property>
          </widget>
         </item>
        </layout>
       </item>
-       <item>
-        <widget class="QLabel" name="label_2">
-         <property name="text">
-          <string>&lt;b&gt;WARNING:&lt;/b&gt; Files in the above folder will be deleted after being added to calibre.</string>
-         </property>
-        </widget>
-       </item>
-       <item>
+       <item row="3" column="0" colspan="2">
        <widget class="QCheckBox" name="opt_auto_add_check_for_duplicates">
         <property name="toolTip">
          <string>If set, this option will causes calibre to check if a file
@ -206,19 +238,6 @@ Author matching is exact.</string>
         </property>
        </widget>
       </item>
-       <item>
-        <spacer name="verticalSpacer_2">
-         <property name="orientation">
-          <enum>Qt::Vertical</enum>
-         </property>
-         <property name="sizeHint" stdset="0">
-          <size>
-           <width>20</width>
-           <height>40</height>
-          </size>
-         </property>
-        </spacer>
-       </item>
      </layout>
     </widget>
    </widget>
@ -226,7 +245,7 @@ Author matching is exact.</string>
  </layout>
 </widget>
 <resources>
-  <include location="../../../work/calibre/resources/images.qrc"/>
+  <include location="../../../../resources/images.qrc"/>
 </resources>
 <connections>
  <connection>
--- a/src/calibre/gui2/viewer/config.ui
+++ b/src/calibre/gui2/viewer/config.ui
@ -170,7 +170,7 @@
         <item row="8" column="0" colspan="2">
          <widget class="QCheckBox" name="opt_remember_window_size">
           <property name="text">
-            <string>Remember last used &amp;window size</string>
+            <string>Remember last used &amp;window size and layout</string>
           </property>
          </widget>
         </item>
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -689,7 +689,6 @@ class DocumentView(QWebView): # {{{
            self.manager.load_started()
        self.loading_url = QUrl.fromLocalFile(path)
        if has_svg:
-            prints('Rendering as XHTML...')
            self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path))
        else:
            self.setHtml(html, self.loading_url)
--- a/src/calibre/gui2/viewer/javascript.py
+++ b/src/calibre/gui2/viewer/javascript.py
@ -36,6 +36,15 @@ class JavaScriptLoader(object):

    def __init__(self, dynamic_coffeescript=False):
        self._dynamic_coffeescript = dynamic_coffeescript
+        if self._dynamic_coffeescript:
+            try:
+                from calibre.utils.serve_coffee import compile_coffeescript
+                compile_coffeescript
+            except:
+                self._dynamic_coffeescript = False
+                print ('WARNING: Failed to load serve_coffee, not compiling '
+                        'coffeescript dynamically.')
+
        self._cache = {}
        self._hp_cache = {}

--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -16,7 +16,8 @@ from PyQt4.Qt import (QWizard, QWizardPage, QPixmap, Qt, QAbstractListModel,
 from calibre import __appname__, patheq
 from calibre.library.database2 import LibraryDatabase2
 from calibre.library.move import MoveLibrary
-from calibre.constants import filesystem_encoding, iswindows, plugins
+from calibre.constants import (filesystem_encoding, iswindows, plugins,
+        isportable)
 from calibre.gui2.wizard.send_email import smtp_prefs
 from calibre.gui2.wizard.device_ui import Ui_WizardPage as DeviceUI
 from calibre.gui2.wizard.library_ui import Ui_WizardPage as LibraryUI
@ -769,6 +770,10 @@ class LibraryPage(QWizardPage, LibraryUI):
                    traceback.print_exc()
                    lp = os.path.expanduser(u'~')
        self.location.setText(lp)
+        # Hide the library location settings if we are a portable install
+        for x in ('location', 'button_change', 'libloc_label1',
+                'libloc_label2'):
+            getattr(self, x).setVisible(not isportable)

    def isComplete(self):
        try:
--- a/src/calibre/gui2/wizard/library.ui
+++ b/src/calibre/gui2/wizard/library.ui
@ -34,7 +34,7 @@
    <widget class="QComboBox" name="language"/>
   </item>
   <item row="2" column="0" colspan="3">
-    <widget class="QLabel" name="label">
+    <widget class="QLabel" name="libloc_label1">
     <property name="text">
      <string>&lt;p&gt;Choose a location for your books. When you add books to calibre, they will be copied here. Use an &lt;b&gt;empty folder&lt;/b&gt; for a new calibre library:</string>
     </property>
@ -58,7 +58,7 @@
    </widget>
   </item>
   <item row="4" column="0" colspan="3">
-    <widget class="QLabel" name="label_2">
+    <widget class="QLabel" name="libloc_label2">
     <property name="text">
      <string>If you have an existing calibre library, it will be copied to the new location. If a calibre library already exists at the new location, calibre will switch to using it.</string>
     </property>
--- a/src/calibre/library/catalogs/csv_xml.py
+++ b/src/calibre/library/catalogs/csv_xml.py
@ -93,6 +93,8 @@ class CSV_XML(CatalogPlugin):
            for entry in data:
                entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']

+        fm = {x:db.field_metadata.get(x, {}) for x in fields}
+
        if self.fmt == 'csv':
            outfile = codecs.open(path_to_output, 'w', 'utf8')

@ -131,6 +133,8 @@ class CSV_XML(CatalogPlugin):
                    elif field == 'comments':
                        item = item.replace(u'\r\n',u' ')
                        item = item.replace(u'\n',u' ')
+                    elif fm.get(field, {}).get('datatype', None) == 'rating' and item:
+                        item = u'%.2g'%(item/2.0)

                    # Convert HTML to markdown text
                    if type(item) is unicode:
@ -168,6 +172,9 @@ class CSV_XML(CatalogPlugin):
                        if not val:
                            continue
                        if not isinstance(val, (str, unicode)):
+                            if (fm.get(field, {}).get('datatype', None) ==
+                                    'rating' and val):
+                                val = u'%.2g'%(val/2.0)
                            val = unicode(val)
                        item = getattr(E, field)(val)
                        record.append(item)
--- a/src/calibre/library/coloring.py
+++ b/src/calibre/library/coloring.py
@ -117,7 +117,10 @@ class Rule(object): # {{{
                'lt': ('1', '', ''),
                'gt': ('', '', '1')
        }[action]
-        return "cmp(raw_field('%s'), %s, '%s', '%s', '%s')" % (col, val, lt, eq, gt)
+        if col == 'size':
+            return "cmp(booksize(), %s, '%s', '%s', '%s')" % (val, lt, eq, gt)
+        else:
+            return "cmp(raw_field('%s'), %s, '%s', '%s', '%s')" % (col, val, lt, eq, gt)

    def rating_condition(self, col, action, val):
        lt, eq, gt = {
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -235,6 +235,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        defs['gui_restriction'] = defs['cs_restriction'] = ''
        defs['categories_using_hierarchy'] = []
        defs['column_color_rules'] = []
+        defs['grouped_search_make_user_categories'] = []

        # Migrate the bool tristate tweak
        defs['bools_are_tristate'] = \
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@ -173,7 +173,7 @@ class FieldMetadata(dict):
                           'datatype':'rating',
                           'is_multiple':{},
                           'kind':'field',
-                           'name':_('Ratings'),
+                           'name':_('Rating'),
                           'search_terms':['rating'],
                           'is_custom':False,
                           'is_category':True,
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -29,6 +29,7 @@ It can convert every input format in the following list, to every output format.
    PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers.
    PDB is also a generic format. |app| supports eReder, Plucker, PML and zTxt PDB files.
    DJVU support is only for converting DJVU files that contain embedded text. These are typically generated by OCR software.
+    MOBI books can be of two types Mobi6 and KF8. |app| currently fully supports Mobi6 and supports conversion from, but not to, KF8

 .. _best-source-formats:

--- a/src/calibre/manual/template_lang.rst
+++ b/src/calibre/manual/template_lang.rst
@ -57,7 +57,7 @@ For example, assume you want to use the template::

        {series} - {series_index} - {title}

-If the book has no series, the answer will be ``- - title``. Many people would rather the result be simply ``title``, without the hyphens. To do this, use the extended syntax ``{field:|prefix_text|suffix_text}``. When you use this syntax, if field has the value SERIES then the result will be ``prefix_textSERIESsuffix_text``. If field has no value, then the result will be the empty string (nothing); the prefix and suffix are ignored. The prefix and suffix can contain blanks.
+If the book has no series, the answer will be ``- - title``. Many people would rather the result be simply ``title``, without the hyphens. To do this, use the extended syntax ``{field:|prefix_text|suffix_text}``. When you use this syntax, if field has the value SERIES then the result will be ``prefix_textSERIESsuffix_text``. If field has no value, then the result will be the empty string (nothing); the prefix and suffix are ignored. The prefix and suffix can contain blanks. **Do not use subtemplates (`{ ... }`) or functions (see below) as the prefix or the suffix.**

 Using this syntax, we can solve the above series problem with the template::

@ -65,7 +65,7 @@ Using this syntax, we can solve the above series problem with the template::

 The hyphens will be included only if the book has a series index, which it will have only if it has a series.

-Notes: you must include the : character if you want to use a prefix or a suffix. You must either use no \| characters or both of them; using one, as in ``{field:| - }``, is not allowed. It is OK not to provide any text for one side or the other, such as in ``{series:|| - }``. Using ``{title:||}`` is the same as using ``{title}``.
+Notes: you must include the : character if you want to use a prefix or a suffix. You must either use no \| characters or both of them; using one, as in ``{field:| - }``, is not allowed. It is OK not to provide any text for one side or the other, such as in ``{series:|| - }``. Using ``{title:||}`` is the same as using ``{title}``. 

 Second: formatting. Suppose you wanted to ensure that the series_index is always formatted as three digits with leading zeros. This would do the trick::

@ -112,7 +112,7 @@ Functions are always applied before format specifications. See further down for

 The syntax for using functions is ``{field:function(arguments)}``, or ``{field:function(arguments)|prefix|suffix}``. Arguments are separated by commas. Commas inside arguments must be preceeded by a backslash ( '\\' ). The last (or only) argument cannot contain a closing parenthesis ( ')' ). Functions return the value of the field used in the template, suitably modified.

-If you have programming experience, please note that the syntax in this mode (single function) is not what you might expect. Strings are not quoted. Spaces are significant. All arguments must be constants; there is no sub-evaluation. **Do not use subtemplates (`{ ... }`) as function arguments.** Instead, use :ref:`template program mode <template_mode>` and :ref:`general program mode <general_mode>`.
+Important: If you have programming experience, please note that the syntax in this mode (single function) is not what you might expect. Strings are not quoted. Spaces are significant. All arguments must be constants; there is no sub-evaluation. **Do not use subtemplates (`{ ... }`) as function arguments.** Instead, use :ref:`template program mode <template_mode>` and :ref:`general program mode <general_mode>`.

 Many functions use regular expressions. In all cases, regular expression matching is case-insensitive.

--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/Show More
+++ b/Show More