Merge

2025-07-09 03:04:10 -04:00 · 2012-05-12 12:16:42 +02:00 · 2012-05-12 12:16:42 +02:00 · 16a9c42edc
commit 16a9c42edc
parent 3682756f67 cddf8eaf52
126 changed files with 47367 additions and 36095 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -2,6 +2,7 @@
 .check-cache.pickle
 src/calibre/plugins
 resources/images.qrc
+resources/compiled_coffeescript.zip
 src/calibre/ebooks/oeb/display/test/*.js
 src/calibre/manual/.build/
 src/calibre/manual/cli/
@ -16,7 +17,6 @@ resources/ebook-convert-complete.pickle
 resources/builtin_recipes.xml
 resources/builtin_recipes.zip
 resources/template-functions.json
-resources/display/*.js
 setup/installer/windows/calibre/build.log
 src/calibre/translations/.errors
 src/cssutils/.svn/
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,67 @@
 #   new recipes:
 #     - title: 

+- version: 0.8.51
+  date: 2012-05-11
+
+  new features:
+    - title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
+      tickets: [994514]
+
+    - title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system"
+
+    - title: "Driver for Motorola XT875 and Pandigital SuperNova"
+      tickets: [996890]
+
+    - title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors"
+      tickets: [994811] 
+
+    - title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB"
+
+    - title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices"
+      tickets: [994838] 
+
+  bug fixes:
+    - title: "E-book viewer: Improve performance when switching between normal and fullscreen views."
+      tickets: [996102]
+
+    - title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags"
+
+    - title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes."
+      tickets: [997034]
+
+    - title: "Fix download of news in AZW3 format not working"
+      tickets: [996439]
+
+    - title: "Pocketbook driver: Update for new PB 611 firmware."
+      tickets: [903079]
+
+    - title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them"
+      tickets: [994939] 
+
+    - title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers."
+      tickets: [994861]
+
+    - title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word"
+
+  improved recipes:
+    - Mainichi news
+    - derStandard
+    - Endgadget Japan
+
+  new recipes:
+    - title: Mainichi English
+      author: Hiroshi Miura
+
+    - title: The Grid TO
+      author: Yusuf W
+
+    - title: National Geographic (Italy)
+      author: faber1971
+
+    - title: Rebelion 
+      author: Marc Busque
+
 - version: 0.8.50
  date: 2012-05-04

--- a/recipes/der_standard.recipe
+++ b/recipes/der_standard.recipe
@ -7,10 +7,11 @@ __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
 ''' http://www.derstandard.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from time import strftime

 class DerStandardRecipe(BasicNewsRecipe):
    title = u'derStandard'
-    __author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira'
+    __author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
    description = u'Nachrichten aus Österreich'
    publisher ='derStandard.at'
    category = 'news, politics, nachrichten, Austria'
@ -88,3 +89,41 @@ class DerStandardRecipe(BasicNewsRecipe):
        for t in soup.findAll(['ul', 'li']):
            t.name = 'div'
        return soup
+
+    def get_cover_url(self):
+        highResolution = True
+
+        date    = strftime("%Y/%Y%m%d")
+        # it is also possible for the past
+        #date    = '2012/20120503'
+
+        urlP1   = 'http://epaper.derstandarddigital.at/'
+        urlP2   = 'data_ep/STAN/' + date
+        urlP3   = '/V.B1/'
+        urlP4   = 'paper.htm'
+        urlHTML = urlP1 + urlP2 + urlP3 + urlP4
+
+        br = self.clone_browser(self.browser)
+        htmlF  = br.open_novisit(urlHTML)
+        htmlC  = htmlF.read()
+
+
+        # URL EXAMPLE: data_ep/STAN/2012/20120504/V.B1/pages/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE.htm
+        # consists of part2 + part3 + 'pages/' + code
+        # 'pages/' has length 6, code has lenght 36
+
+        index   = htmlC.find(urlP2) + len(urlP2 + urlP3) + 6
+        code    = htmlC[index:index + 36]
+
+
+        # URL EXAMPLE HIGH RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE_b.png
+        # URL EXAMPLE LOW RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/2AB52F71-11C1-4859-9114-CDCD79BEFDCB.png
+
+        urlPic  = urlP1 + urlP2 + '/pagejpg/' + code
+
+        if highResolution:
+            urlPic  = urlPic + '_b'
+
+        urlPic  = urlPic + '.png'
+
+        return urlPic
--- a/recipes/endgadget_ja.recipe
+++ b/recipes/endgadget_ja.recipe
@ -17,7 +17,25 @@ class EndgadgetJapan(BasicNewsRecipe):
    no_stylesheets = True
    language = 'ja'
    encoding = 'utf-8'
-    feeds          = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
+    index = 'http://japanese.engadget.com/'
+    remove_javascript = True
+
+    remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
+    remove_tags_after = dict(name='div', attrs={'class':'post_body'})
+
+    def parse_index(self):
+        feeds = []
+        newsarticles = []
+        soup   = self.index_to_soup(self.index)
+        for topstories in soup.findAll('div',attrs={'class':'post_content'}):
+           itt = topstories.find('h4')
+           itema = itt.find('a',href=True)
+           newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''
+                                     ,'url'        :itema['href']
+                                     ,'description':''
+                                    })
+        feeds.append(('Latest Posts', newsarticles))
+        return feeds

-    remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
-    remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})
--- a/recipes/folha.recipe
+++ b/recipes/folha.recipe
@ -0,0 +1,82 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.folha.uol.com.br
+'''
+import urllib
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Folha_de_s_paulo(BasicNewsRecipe):
+    title                 = u'Folha de São Paulo - portal'
+    __author__            = 'Darko Miletic'
+    description           = 'Um Jornala a servicao do Brasil'
+    publisher             = 'Folhapress'
+    category              = 'news, politics, Brasil'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'pt_BR'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [dict(name=['meta','link','base','iframe','embed','object'])]
+    keep_only_tags = [dict(attrs={'id':'articleNew'})]
+
+
+    feeds = [
+              (u'Poder'          , u'http://feeds.folha.uol.com.br/poder/rss091.xml'               )
+             ,(u'Mundo'          , u'http://feeds.folha.uol.com.br/mundo/rss091.xml'               )
+             ,(u'Mercado'        , u'http://feeds.folha.uol.com.br/mercado/rss091.xml'             )
+             ,(u'Cotidiano'      , u'http://feeds.folha.uol.com.br/cotidiano/rss091.xml'           )
+             ,(u'Esporte'        , u'http://feeds.folha.uol.com.br/esporte/rss091.xml'             )
+             ,(u'Ilustrada'      , u'http://feeds.folha.uol.com.br/ilustrada/rss091.xml'           )
+             ,(u'F5'             , u'http://feeds.folha.uol.com.br/f5/rss091.xml'                  )
+             ,(u'Ciência'        , u'http://feeds.folha.uol.com.br/ciencia/rss091.xml'             )
+             ,(u'Tec'            , u'http://feeds.folha.uol.com.br/tec/rss091.xml'                 )
+             ,(u'Ambiente'       , u'http://feeds.folha.uol.com.br/ambiente/rss091.xml'            )
+             ,(u'Bichos'         , u'http://feeds.folha.uol.com.br/bichos/rss091.xml'              )
+             ,(u'Celebridades'   , u'http://feeds.folha.uol.com.br/celebridades/rss091.xml'        )
+             ,(u'Comida'         , u'http://feeds.folha.uol.com.br/comida/rss091.xml'              )
+             ,(u'Equilibrio'     , u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml'    )
+             ,(u'Folhateen'      , u'http://feeds.folha.uol.com.br/folhateen/rss091.xml'           )
+             ,(u'Folhinha'       , u'http://feeds.folha.uol.com.br/folhinha/rss091.xml'            )
+             ,(u'Ilustrissima'   , u'http://feeds.folha.uol.com.br/ilustrissima/rss091.xml'        )
+             ,(u'Saber'          , u'http://feeds.folha.uol.com.br/saber/rss091.xml'               )
+             ,(u'Turismo'        , u'http://feeds.folha.uol.com.br/turismo/rss091.xml'             )
+             ,(u'Panel do Leitor', u'http://feeds.folha.uol.com.br/folha/paineldoleitor/rss091.xml')
+             ,(u'Publifolha'     , u'http://feeds.folha.uol.com.br/folha/publifolha/rss091.xml'    )
+             ,(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml'        )
+            ]
+
+    def get_article_url(self, article):
+        url = BasicNewsRecipe.get_article_url(self, article)
+        curl = url.partition('/*')[2]
+        return curl
+
+    def print_version(self, url):
+        return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.folha.uol.com.br/')
+        cont = soup.find('div', attrs={'id':'newspaper'})
+        if cont:
+           ai = cont.find('a', href='http://www1.folha.uol.com.br/fsp/')
+           if ai:
+              return ai.img['src']
+        return None
--- a/recipes/folhadesaopaulo.recipe
+++ b/recipes/folhadesaopaulo.recipe
@ -8,7 +8,7 @@ from urllib2 import Request, urlopen, URLError
 class FolhaOnline(BasicNewsRecipe):
    THUMBALIZR_API        = '' # ---->Get your at http://www.thumbalizr.com/ and put here
    LANGUAGE              = 'pt_br'
-    language = 'pt'
+    language = 'pt_BR'
    LANGHTM               = 'pt-br'
    ENCODING              = 'cp1252'
    ENCHTM                = 'iso-8859-1'
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -14,7 +14,7 @@ class FSP(BasicNewsRecipe):
    HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'

-    language = 'pt'
+    language = 'pt_BR'
    no_stylesheets = True
    max_articles_per_feed  = 40
    remove_javascript     = True
--- a/recipes/grid_to.recipe
+++ b/recipes/grid_to.recipe
@ -0,0 +1,79 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheGridTO(BasicNewsRecipe):
+    #: The title to use for the ebook
+    title               = u'The Grid TO'
+
+    #: A couple of lines that describe the content this recipe downloads.
+    #: This will be used primarily in a GUI that presents a list of recipes.
+    description         = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
+                    'accessible voice for Toronto.')
+
+    #: The author of this recipe
+    __author__          = u'Yusuf W'
+
+    #: The language that the news is in. Must be an ISO-639 code either
+    #: two or three characters long
+    language            = 'en_CA'
+
+    #: Publication type
+    #: Set to newspaper, magazine or blog
+    publication_type    = 'newspaper'
+
+    #: Convenient flag to disable loading of stylesheets for websites
+    #: that have overly complex stylesheets unsuitable for conversion
+    #: to ebooks formats
+    #: If True stylesheets are not downloaded and processed
+    no_stylesheets      = True
+
+    #: List of tags to be removed. Specified tags are removed from downloaded HTML.
+    remove_tags_before  = dict(name='div', id='content')
+    remove_tags_after   = dict(name='div', id='content')
+    remove_tags         =  [
+                                dict(name='div', attrs={'class':'right-content pull-right'}),
+                                dict(name='div', attrs={'class':'right-content'}),
+                                dict(name='div', attrs={'class':'ftr-line'}),
+                                dict(name='div', attrs={'class':'pull-right'}),
+                                dict(name='div', id='comments'),
+                                dict(name='div', id='tags')
+                            ]
+
+    #: Keep only the specified tags and their children.
+    #keep_only_tags        = [dict(name='div', id='content')]
+
+    cover_margins       = (0, 0, '#ffffff')
+
+    INDEX               = 'http://www.thegridto.com'
+
+    def get_cover_url(self):
+        soup = self.index_to_soup(self.INDEX)
+        cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src']
+
+        return cover_url
+
+    def parse_index(self):
+
+        # Get the latest issue
+        soup = self.index_to_soup(self.INDEX)
+        a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
+
+        # Parse the index of the latest issue
+        self.INDEX = self.INDEX + a['href']
+        soup = self.index_to_soup(self.INDEX)
+
+        feeds = []
+        for section in ['city', 'life', 'culture']:
+            section_class = 'left-content article-listing ' + section + ' pull-left'
+            div = soup.find(attrs={'class': section_class})
+
+            articles = []
+            for tag in div.findAllNext(attrs={'class':'search-block'}):
+                a = tag.findAll('a', href=True)[1]
+
+                title = self.tag_to_string(a)
+                url = a['href']
+
+                articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+
+            feeds.append((section, articles))
+        return feeds
--- a/recipes/icons/folha.png
+++ b/recipes/icons/folha.png
--- a/recipes/jijinews.recipe
+++ b/recipes/jijinews.recipe
@ -20,6 +20,8 @@ class JijiDotCom(BasicNewsRecipe):
    top_url        = 'http://www.jiji.com/'

    feeds          = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
+
+    remove_tags_before = dict(id="article-area")
    remove_tags_after = dict(id="ad_google")

    def get_cover_url(self):
--- a/recipes/la_republica.recipe
+++ b/recipes/la_republica.recipe
@ -1,7 +1,7 @@
 __license__   = 'GPL v3'
-__author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
-__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
-description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'
+__author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini; minor fixes by faber1971'
+__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>, faber1971'
+description   = 'Italian daily newspaper - v1.02 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version; 11.05.2012 new version'

 '''
 http://www.repubblica.it/
@ -12,14 +12,14 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class LaRepubblica(BasicNewsRecipe):
    title                   = 'La Repubblica'
-    __author__              = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
+    __author__              = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971'
    description             = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
    masthead_url            = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
    publisher               = 'Gruppo editoriale L\'Espresso'
    category                = 'News, politics, culture, economy, general interest'
    language                = 'it'
    timefmt                 = '[%a, %d %b, %Y]'
-    oldest_article          = 5
+    oldest_article          = 1
    encoding                = 'utf8'
    use_embedded_content    = False
    no_stylesheets          = True
@ -59,6 +59,7 @@ class LaRepubblica(BasicNewsRecipe):
                          dict(attrs={'class':'articolo'}),
                          dict(attrs={'class':'body-text'}),
                          dict(name='p', attrs={'class':'disclaimer clearfix'}),
+                          dict(name='div', attrs={'id':'main'}),
                          dict(attrs={'id':'contA'})
                         ]

@ -67,7 +68,7 @@ class LaRepubblica(BasicNewsRecipe):
                            dict(name=['object','link','meta','iframe','embed']),
                            dict(name='span',attrs={'class':'linkindice'}),
                            dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
-                            dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
+                            dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head', 'sidebar']}),
                            dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
                            dict(name='div', attrs={'class':'generalbox'}),
                            dict(name='ul', attrs={'id':'hystory'})
@ -88,11 +89,12 @@ class LaRepubblica(BasicNewsRecipe):
                       (u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
                       (u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
                       (u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
-                       (u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
-                       (u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
-                       (u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
-                       (u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
-                       (u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
+                       (u'Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
+                       (u'Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
+                       (u'Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
+                       (u'Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
+                       (u'Bari', u'http://bari.repubblica.it/rss/rss2.0.xml'),
+                       (u'Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
                      ]

    def preprocess_html(self, soup):
--- a/recipes/mainichi.recipe
+++ b/recipes/mainichi.recipe
@ -16,12 +16,12 @@ class MainichiDailyNews(BasicNewsRecipe):
    publisher      = 'Mainichi Daily News'
    category       = 'news, japan'
    language       = 'ja'
-
-    feeds          = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
+    index          = 'http://mainichi.jp/select/'
+    remove_javascript = True
+    masthead_title = u'MAINICHI DAILY NEWS'

    remove_tags_before = {'class':"NewsTitle"}
-    remove_tags = [{'class':"RelatedArticle"}]
-    remove_tags_after = {'class':"Credit"}
+    remove_tags_after = {'class':"NewsBody clr"}

    def parse_feeds(self):

@ -32,9 +32,30 @@ class MainichiDailyNews(BasicNewsRecipe):
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'pheedo.jp', curarticle.url):
                    delList.append(curarticle)
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []

        return feeds
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        topstories = soup.find('ul',attrs={'class':'MaiLink'})
+        if topstories:
+           newsarticles = []
+           for itt in topstories.findAll('li'):
+                itema = itt.find('a',href=True)
+                if itema:
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''
+                                     ,'url'        :itema['href']
+                                     ,'description':''
+                                    })
+           feeds.append(('latest', newsarticles))
+        return feeds
+
--- a/recipes/mainichi_en.recipe
+++ b/recipes/mainichi_en.recipe
@ -0,0 +1,67 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.mainichi.jp
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MainichiEnglishNews(BasicNewsRecipe):
+    title          = u'The Mainichi'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    max_articles_per_feed = 40
+    description    = 'Japanese traditional newspaper Mainichi news in English'
+    publisher      = 'Mainichi News'
+    category       = 'news, japan'
+    language       = 'en_JP'
+    index          = 'http://mainichi.jp/english/english/index.html'
+    remove_javascript = True
+    masthead_url = 'http://mainichi.jp/english/images/themainichi.png'
+
+    remove_tags_before = {'class':"NewsTitle"}
+    remove_tags_after = {'class':"NewsBody clr"}
+
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'pheedo.jp', curarticle.url):
+                    delList.append(curarticle)
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        for section in soup.findAll('section'):
+           newsarticles = []
+           section_name = 'news'
+           hds = section.find('div', attrs={'class':'CategoryHead clr'})
+           if hds:
+               section_item = hds.find('h1')
+               if section_item:
+                   section_name = section_item.find('a').string
+               items = section.find('ul', attrs={'class':'MaiLink'})
+               for item in items.findAll('li'):
+                   if item:
+                       itema = item.find('a')
+                       newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''
+                                     ,'url'        :itema['href']
+                                     ,'description':''
+                                    })
+               feeds.append((section_name, newsarticles))
+        return feeds
+
--- a/recipes/mainichi_it_news.recipe
+++ b/recipes/mainichi_it_news.recipe
@ -1,34 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-import re
-
-class MainichiDailyITNews(BasicNewsRecipe):
-    title          = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
-    __author__     = 'Hiroshi Miura'
-    oldest_article = 2
-    max_articles_per_feed = 100
-    description    = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
-    publisher      = 'Mainichi Daily News'
-    category       = 'news, Japan, IT, Electronics'
-    language       = 'ja'
-
-    feeds          = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
-
-    remove_tags_before = {'class':"NewsTitle"}
-    remove_tags = [{'class':"RelatedArticle"}]
-    remove_tags_after = {'class':"Credit"}
-
-    def parse_feeds(self):
-
-        feeds = BasicNewsRecipe.parse_feeds(self)
-
-        for curfeed in feeds:
-            delList = []
-            for a,curarticle in enumerate(curfeed.articles):
-                if re.search(r'pheedo.jp', curarticle.url):
-                    delList.append(curarticle)
-            if len(delList)>0:
-                for d in delList:
-                    index = curfeed.articles.index(d)
-                    curfeed.articles[index:index+1] = []
-
-        return feeds
--- a/recipes/mainichi_science_news.recipe
+++ b/recipes/mainichi_science_news.recipe
@ -0,0 +1,59 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.mainichi.jp
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MainichiDailyScienceNews(BasicNewsRecipe):
+    title          = u'\u6bce\u65e5\u65b0\u805e(Science)'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    max_articles_per_feed = 20
+    description    = 'Japanese traditional newspaper Mainichi Daily News - science'
+    publisher      = 'Mainichi Daily News'
+    category       = 'news, japan'
+    language       = 'ja'
+    index          = 'http://mainichi.jp/select/science'
+    remove_javascript = True
+    masthead_title = u'MAINICHI DAILY NEWS'
+
+    remove_tags_before = {'class':"NewsTitle"}
+    remove_tags_after = {'class':"NewsBody clr"}
+
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        topstories = soup.find('ul',attrs={'class':'MaiLink'})
+        if topstories:
+           newsarticles = []
+           for itt in topstories.findAll('li'):
+                itema = itt.find('a',href=True)
+                if itema:
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''
+                                     ,'url'        :itema['href']
+                                     ,'description':''
+                                    })
+           feeds.append(('Science', newsarticles))
+        return feeds
+
--- a/recipes/national_geographic_it.recipe
+++ b/recipes/national_geographic_it.recipe
@ -0,0 +1,16 @@
+__version__     = 'v1.0'
+__date__        = '5, May 2012'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1336226255(BasicNewsRecipe):
+    title          = u'National Geographic'
+    __author__      = 'faber1971'
+    description = 'Science magazine'
+    language = 'it'
+
+    oldest_article = 15
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_tags        = [dict(name='div',attrs={'class':'banner-abbonamenti'})]
+    feeds          = [(u'National Geographic', u'http://www.nationalgeographic.it/rss/all/rss2.0.xml')]
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -1,5 +1,5 @@
 """
-Pocket Calibre Recipe v1.0
+Pocket Calibre Recipe v1.1
 """
 __license__   = 'GPL v3'
 __copyright__ = '''
@ -73,6 +73,9 @@ class Pocket(BasicNewsRecipe):
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul', attrs={'id':'list'})
+            if ritem is None:
+                self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
+                continue
            for item in reversed(ritem.findAll('li')):
                if articlesToGrab < 1:
                    break
@ -94,7 +97,7 @@ class Pocket(BasicNewsRecipe):
                    self.readList.append(readLink)
            totalfeeds.append((feedtitle, articles))
        if len(self.readList) < self.minimum_articles:
-            raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
+            raise Exception("Not enough articles in Pocket! Change minimum_articles or add more articles.")
        return totalfeeds

    def mark_as_read(self, markList):
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,14 +12,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-04-28 10:42+0000\n"
-"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
+"PO-Revision-Date: 2012-05-03 16:09+0000\n"
+"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-04-29 04:45+0000\n"
-"X-Generator: Launchpad (build 15149)\n"
+"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
+"X-Generator: Launchpad (build 15195)\n"
 "Language: ca\n"

 #. name for aaa
@ -9936,11 +9936,11 @@ msgstr "Ibani"

 #. name for ica
 msgid "Ede Ica"
-msgstr ""
+msgstr "Ede Ica"

 #. name for ich
 msgid "Etkywan"
-msgstr ""
+msgstr "Etkywan"

 #. name for icl
 msgid "Icelandic Sign Language"
@ -9952,7 +9952,7 @@ msgstr "Anglès crioll; Islander"

 #. name for ida
 msgid "Idakho-Isukha-Tiriki"
-msgstr ""
+msgstr "Idakho-Isukha-Tiriki"

 #. name for idb
 msgid "Indo-Portuguese"
@ -9960,15 +9960,15 @@ msgstr "Indo-portuguès"

 #. name for idc
 msgid "Idon"
-msgstr ""
+msgstr "Idon"

 #. name for idd
 msgid "Ede Idaca"
-msgstr ""
+msgstr "Ede Idaca"

 #. name for ide
 msgid "Idere"
-msgstr ""
+msgstr "Idere"

 #. name for idi
 msgid "Idi"
@ -9976,43 +9976,43 @@ msgstr ""

 #. name for ido
 msgid "Ido"
-msgstr ""
+msgstr "ido"

 #. name for idr
 msgid "Indri"
-msgstr ""
+msgstr "Indri"

 #. name for ids
 msgid "Idesa"
-msgstr ""
+msgstr "Idesa"

 #. name for idt
 msgid "Idaté"
-msgstr ""
+msgstr "Idaté"

 #. name for idu
 msgid "Idoma"
-msgstr ""
+msgstr "Idoma"

 #. name for ifa
 msgid "Ifugao; Amganad"
-msgstr ""
+msgstr "Ifugao; Amganad"

 #. name for ifb
 msgid "Ifugao; Batad"
-msgstr ""
+msgstr "Ifugao; Batad"

 #. name for ife
 msgid "Ifè"
-msgstr ""
+msgstr "Ifè"

 #. name for iff
 msgid "Ifo"
-msgstr ""
+msgstr "Ifo"

 #. name for ifk
 msgid "Ifugao; Tuwali"
-msgstr ""
+msgstr "Ifugao; Tuwali"

 #. name for ifm
 msgid "Teke-Fuumu"
@ -10020,15 +10020,15 @@ msgstr "Teke; Fuumu"

 #. name for ifu
 msgid "Ifugao; Mayoyao"
-msgstr ""
+msgstr "Ifugao; Mayoyao"

 #. name for ify
 msgid "Kallahan; Keley-I"
-msgstr ""
+msgstr "Kallahan; Keley-I"

 #. name for igb
 msgid "Ebira"
-msgstr ""
+msgstr "Ebira"

 #. name for ige
 msgid "Igede"
--- a/setup/iso_639/sr.po
+++ b/setup/iso_639/sr.po
@ -8,14 +8,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-03-25 12:19+0000\n"
-"Last-Translator: Radan Putnik <srastral@gmail.com>\n"
+"PO-Revision-Date: 2012-05-03 14:49+0000\n"
+"Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
 "Language-Team: Serbian <gnu@prevod.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-03-26 04:37+0000\n"
-"X-Generator: Launchpad (build 15008)\n"
+"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
+"X-Generator: Launchpad (build 15195)\n"
 "Language: sr\n"

 #. name for aaa
@ -6152,7 +6152,7 @@ msgstr ""

 #. name for deu
 msgid "German"
-msgstr "немачки"
+msgstr "Немачки"

 #. name for dev
 msgid "Domung"
@ -8416,7 +8416,7 @@ msgstr "ирски"

 #. name for glg
 msgid "Galician"
-msgstr ""
+msgstr "Галицијски"

 #. name for glh
 msgid "Pashayi; Northwest"
@ -8472,11 +8472,11 @@ msgstr ""

 #. name for gmh
 msgid "German; Middle High (ca. 1050-1500)"
-msgstr ""
+msgstr "Немачки; средње високи (ca. 1050-1500)"

 #. name for gml
 msgid "German; Middle Low"
-msgstr ""
+msgstr "Немачки; средње низак"

 #. name for gmm
 msgid "Gbaya-Mbodomo"
@ -8792,7 +8792,7 @@ msgstr ""

 #. name for gsg
 msgid "German Sign Language"
-msgstr ""
+msgstr "Немачки језик"

 #. name for gsl
 msgid "Gusilay"
@ -8820,7 +8820,7 @@ msgstr ""

 #. name for gsw
 msgid "German; Swiss"
-msgstr ""
+msgstr "Немачки ; Швајцарска"

 #. name for gta
 msgid "Guató"
@ -17954,7 +17954,7 @@ msgstr ""

 #. name for nds
 msgid "German; Low"
-msgstr ""
+msgstr "Немачки; низак"

 #. name for ndt
 msgid "Ndunga"
@ -18778,7 +18778,7 @@ msgstr ""

 #. name for nno
 msgid "Norwegian Nynorsk"
-msgstr "норвешки модерни"
+msgstr "Норвешки модерни"

 #. name for nnp
 msgid "Naga; Wancho"
@ -18830,7 +18830,7 @@ msgstr ""

 #. name for nob
 msgid "Norwegian Bokmål"
-msgstr ""
+msgstr "Норвешки (књижевни)"

 #. name for noc
 msgid "Nuk"
@ -18886,7 +18886,7 @@ msgstr ""

 #. name for nor
 msgid "Norwegian"
-msgstr "норвешки"
+msgstr "Норвешки"

 #. name for nos
 msgid "Nisu; Eastern"
@ -19066,7 +19066,7 @@ msgstr ""

 #. name for nsl
 msgid "Norwegian Sign Language"
-msgstr ""
+msgstr "Норвешки језик"

 #. name for nsm
 msgid "Naga; Sumi"
@ -20406,7 +20406,7 @@ msgstr ""

 #. name for pdc
 msgid "German; Pennsylvania"
-msgstr ""
+msgstr "Немачки ; Пенсилванија"

 #. name for pdi
 msgid "Pa Di"
@ -22086,7 +22086,7 @@ msgstr ""

 #. name for rmg
 msgid "Norwegian; Traveller"
-msgstr ""
+msgstr "Норвешки; путнички"

 #. name for rmh
 msgid "Murkim"
@ -22871,7 +22871,7 @@ msgstr ""

 #. name for sgg
 msgid "Swiss-German Sign Language"
-msgstr ""
+msgstr "Швајцарско-Немачки језик"

 #. name for sgh
 msgid "Shughni"
--- a/setup/resources.py
+++ b/setup/resources.py
@ -26,7 +26,7 @@ def get_opts_from_parser(parser):
 class Coffee(Command): # {{{

    description = 'Compile coffeescript files into javascript'
-    COFFEE_DIRS = {'ebooks/oeb/display': 'display'}
+    COFFEE_DIRS = ('ebooks/oeb/display',)

    def add_options(self, parser):
        parser.add_option('--watch', '-w', action='store_true', default=False,
@ -47,47 +47,67 @@ class Coffee(Command): # {{{
            except KeyboardInterrupt:
                pass

-    def show_js(self, jsfile):
+    def show_js(self, raw):
        from pygments.lexers import JavascriptLexer
        from pygments.formatters import TerminalFormatter
        from pygments import highlight
-        with open(jsfile, 'rb') as f:
-            raw = f.read()
        print highlight(raw, JavascriptLexer(), TerminalFormatter())

    def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
-        for toplevel, dest in self.COFFEE_DIRS.iteritems():
-            dest = self.j(self.RESOURCES, dest)
-            for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
-                js = self.j(dest, os.path.basename(x.rpartition('.')[0]+'.js'))
-                if self.newer(js, x):
+        src_files = {}
+        for src in self.COFFEE_DIRS:
+            for f in glob.glob(self.j(self.SRC, __appname__, src,
+                '*.coffee')):
+                bn = os.path.basename(f).rpartition('.')[0]
+                arcname = src.replace('/', '.') + '.' + bn + '.js'
+                src_files[arcname] = (f, os.stat(f).st_mtime)
+
+        existing = {}
+        dest = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
+        if os.path.exists(dest):
+            with zipfile.ZipFile(dest, 'r') as zf:
+                for info in zf.infolist():
+                    mtime = time.mktime(info.date_time + (0, 0, -1))
+                    arcname = info.filename
+                    if (arcname in src_files and src_files[arcname][1] <
+                            mtime):
+                        existing[arcname] = (zf.read(info), info)
+
+        todo = set(src_files) - set(existing)
+        updated = {}
+        for arcname in todo:
+            name = arcname.rpartition('.')[0]
            print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
-                        timestamp else '', os.path.basename(x)))
+                        timestamp else '', name))
+            src = src_files[arcname][0]
            try:
-                        cs = subprocess.check_output(self.compiler +
-                                [x]).decode('utf-8')
+                js = subprocess.check_output(self.compiler +
+                        [src]).decode('utf-8')
            except Exception as e:
-                        print ('\n\tCompilation of %s failed'%os.path.basename(x))
+                print ('\n\tCompilation of %s failed'%name)
                print (e)
                if ignore_errors:
-                            with open(js, 'wb') as f:
-                                f.write('# Compilation from coffeescript failed')
+                    js = u'# Compilation from coffeescript failed'
                else:
                    raise SystemExit(1)
            else:
-                        with open(js, 'wb') as f:
-                            f.write(cs.encode('utf-8'))
                if opts.show_js:
                    self.show_js(js)
                    print ('#'*80)
                    print ('#'*80)
+            zi = zipfile.ZipInfo()
+            zi.filename = arcname
+            zi.date_time = time.localtime()[:6]
+            updated[arcname] = (js.encode('utf-8'), zi)
+        if updated:
+            with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
+                for raw, zi in updated.itervalues():
+                    zf.writestr(zi, raw)
+                for raw, zi in existing.itervalues():
+                    zf.writestr(zi, raw)

    def clean(self):
-        for toplevel, dest in self.COFFEE_DIRS.iteritems():
-            dest = self.j(self.RESOURCES, dest)
-            for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
-                x = x.rpartition('.')[0] + '.js'
-                x = self.j(dest, os.path.basename(x))
+        x = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
        if os.path.exists(x):
            os.remove(x)
 # }}}
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 50)
+numeric_version = (0, 8, 51)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -57,6 +57,7 @@ class ANDROID(USBMS):
                       0x4316 : [0x216],
                       0x42d6 : [0x216],
                       0x42d7 : [0x216],
+                       0x42f7 : [0x216],
                     },
            # Freescale
            0x15a2 : {
@ -193,7 +194,7 @@ class ANDROID(USBMS):
            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
            'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
            'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
-            'GT-S5830L_CARD', 'UNIVERSE']
+            'GT-S5830L_CARD', 'UNIVERSE', 'XT875']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -201,7 +202,7 @@ class ANDROID(USBMS):
            'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
            'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
            'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
-            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']
+            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -92,6 +92,10 @@ class POCKETBOOK360(EB600):
    name = 'PocketBook 360 Device Interface'

    gui_name = 'PocketBook 360'
+    VENDOR_ID   = [0x1f85, 0x525]
+    PRODUCT_ID  = [0x1688, 0xa4a5]
+    BCD         = [0x110]
+

    FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']

--- a/src/calibre/ebooks/conversion/plugins/mobi_input.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py
@ -12,7 +12,7 @@ class MOBIInput(InputFormatPlugin):
    name        = 'MOBI Input'
    author      = 'Kovid Goyal'
    description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
-    file_types  = set(['mobi', 'prc', 'azw', 'azw3'])
+    file_types  = set(['mobi', 'prc', 'azw', 'azw3', 'pobi'])

    def convert(self, stream, options, file_ext, log,
                accelerators):
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@ -306,6 +306,11 @@ class MOBIHeader(object): # {{{
        self.extra_data_flags = 0
        if self.has_extra_data_flags:
            self.unknown4 = self.raw[184:192]
+            if self.file_version < 8:
+                self.first_text_record, self.last_text_record = \
+                    struct.unpack_from(b'>HH', self.raw, 192)
+                self.fdst_count = struct.unpack_from(b'>L', self.raw, 196)
+            else:
                self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
                        self.raw, 192)
                if self.fdst_count <= 1:
@ -409,6 +414,10 @@ class MOBIHeader(object): # {{{
            a('DRM Flags: %r'%self.drm_flags)
        if self.has_extra_data_flags:
            a('Unknown4: %r'%self.unknown4)
+            if hasattr(self, 'first_text_record'):
+                a('First content record: %d'%self.first_text_record)
+                a('Last content record: %d'%self.last_text_record)
+            else:
                r('FDST Index', 'fdst_idx')
            a('FDST Count: %d'% self.fdst_count)
            r('FCIS number', 'fcis_number')
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@ -111,7 +111,11 @@ def update_flow_links(mobi8_reader, resource_map, log):
            continue

        if not isinstance(flow, unicode):
+            try:
                flow = flow.decode(mr.header.codec)
+            except UnicodeDecodeError:
+                log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
+                flow = flow.decode(mr.header.codec, 'replace')

        # links to raster image files from image tags
        # image_pattern
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -207,9 +207,9 @@ class Mobi8Reader(object):
                    fname = 'svgimg' + nstr + '.svg'
            else:
                # search for CDATA and if exists inline it
-                if flowpart.find('[CDATA[') >= 0:
+                if flowpart.find(b'[CDATA[') >= 0:
                    typ = 'css'
-                    flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n'
+                    flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
                    format = 'inline'
                    dir = None
                    fname = None
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -382,6 +382,7 @@ class MobiWriter(object):
            first_image_record  = len(self.records)
            self.resources.serialize(self.records, used_images)
        resource_record_count = len(self.records) - old
+        last_content_record = len(self.records) - 1

        # FCIS/FLIS (Seems to serve no purpose)
        flis_number = len(self.records)
@ -406,7 +407,7 @@ class MobiWriter(object):
        # header
        header_fields['first_resource_record'] = first_image_record
        header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
-        header_fields['fdst_record'] = NULL_INDEX
+        header_fields['fdst_record'] = pack(b'>HH', 1, last_content_record)
        header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
        header_fields['flis_record'] = flis_number
        header_fields['fcis_record'] = fcis_number
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@ -314,9 +314,9 @@ class KF8Writer(object):
            return

        # Flatten the ToC into a depth first list
-        fl = toc.iter() if is_periodical else toc.iterdescendants()
+        fl = toc.iterdescendants()
        for i, item in enumerate(fl):
-            entry = {'id': id(item), 'index': i, 'href':item.href,
+            entry = {'id': id(item), 'index': i, 'href':item.href or '',
                    'label':(item.title or _('Unknown')),
                    'children':[]}
            entry['depth'] = getattr(item, 'ncx_hlvl', 0)
--- a/src/calibre/ebooks/mobi/writer8/mobi.py
+++ b/src/calibre/ebooks/mobi/writer8/mobi.py
@ -138,6 +138,8 @@ class MOBIHeader(Header): # {{{
    unknown2 = zeroes(8)

    # 192: FDST
+    # In MOBI 6 the fdst record is instead two two byte fields storing the
+    # index of the first and last content records
    fdst_record = DYN
    fdst_count = DYN

--- a/src/calibre/ebooks/oeb/display/cfi.coffee
+++ b/src/calibre/ebooks/oeb/display/cfi.coffee
@ -389,8 +389,17 @@ class CanonicalFragmentIdentifier
        # Drill down into iframes, etc.
        while true
            target = cdoc.elementFromPoint x, y
-            if not target or target.localName == 'html'
-                log("No element at (#{ x }, #{ y })")
+            if not target or target.localName in ['html', 'body']
+                # We ignore both html and body even though body could
+                # have text nodes under it as performance is very poor if body
+                # has large margins/padding (for e.g. in fullscreen mode)
+                # A possible solution for this is to wrap all text node
+                # children of body in <span> but that is seriously ugly and
+                # might have side effects. Lets do this only if there are lots of
+                # books in the wild that actually have text children of body,
+                # and even in this case it might be better to change the input
+                # plugin to prevent this from happening.
+                # log("No element at (#{ x }, #{ y })")
                return null

            name = target.localName
--- a/src/calibre/ebooks/oeb/display/indexing.coffee
+++ b/src/calibre/ebooks/oeb/display/indexing.coffee
@ -0,0 +1,76 @@
+#!/usr/bin/env coffee
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+###
+ Copyright 2012, Kovid Goyal <kovid@kovidgoyal.net>
+ Released under the GPLv3 License
+###
+
+body_height = () ->
+    db = document.body
+    dde = document.documentElement
+    if db? and dde?
+        return Math.max(db.scrollHeight, dde.scrollHeight, db.offsetHeight,
+            dde.offsetHeight, db.clientHeight, dde.clientHeight)
+    return 0
+
+abstop = (elem) ->
+    ans = elem.offsetTop
+    while elem.offsetParent
+        elem = elem.offsetParent
+        ans += elem.offsetTop
+    return ans
+
+class BookIndexing
+    ###
+    This class is a namespace to expose indexing functions via the
+    window.book_indexing object. The most important functions are:
+
+    anchor_positions(): Get the absolute (document co-ordinate system) position
+    for elements with the specified id/name attributes.
+
+    ###
+
+    constructor: () ->
+        this.cache = {}
+        this.body_height_at_last_check = null
+
+    cache_valid: (anchors) ->
+        for a in anchors
+            if not Object.prototype.hasOwnProperty.call(this.cache, a)
+                return false
+        for p of this.cache
+            if Object.prototype.hasOwnProperty.call(this.cache, p) and p not in anchors
+                return false
+        return true
+
+    anchor_positions: (anchors, use_cache=false) ->
+        if use_cache and body_height() == this.body_height_at_last_check and this.cache_valid(anchors)
+            return this.cache
+
+        ans = {}
+        for anchor in anchors
+            elem = document.getElementById(anchor)
+            if elem == null
+                # Look for an <a name="anchor"> element
+                try
+                    result = document.evaluate(
+                        ".//*[local-name() = 'a' and @name='#{ anchor }']",
+                        document.body, null,
+                        XPathResult.FIRST_ORDERED_NODE_TYPE, null)
+                    elem = result.singleNodeValue
+                catch error
+                    # The anchor had a ' or other invalid char
+                    elem = null
+            if elem == null
+                pos = body_height() + 10000
+            else
+                pos = abstop(elem)
+            ans[anchor] = pos
+        this.cache = ans
+        this.body_height_at_last_check = body_height()
+        return ans
+
+if window?
+    window.book_indexing = new BookIndexing()
+
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -1,383 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008 Kovid Goyal <kovid at kovidgoyal.net>'
-
-'''
-Iterate over the HTML files in an ebook. Useful for writing viewers.
-'''
-
-import re, os, math
-from cStringIO import StringIO
-
-from PyQt4.Qt import QFontDatabase
-
-from calibre.customize.ui import available_input_formats
-from calibre.ebooks.metadata.opf2 import OPF
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.utils.zipfile import safe_replace
-from calibre.utils.config import DynamicConfig
-from calibre.utils.logging import Log
-from calibre import (guess_type, prints, prepare_string_for_xml,
-        xml_replace_entities)
-from calibre.ebooks.oeb.transforms.cover import CoverManager
-from calibre.constants import filesystem_encoding
-
-TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
-        '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
-        ).replace('__width__', '600').replace('__height__', '800')
-BM_FIELD_SEP = u'*|!|?|*'
-BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
-
-def character_count(html):
-    '''
-    Return the number of "significant" text characters in a HTML string.
-    '''
-    count = 0
-    strip_space = re.compile(r'\s+')
-    for match in re.finditer(r'>[^<]+<', html):
-        count += len(strip_space.sub(' ', match.group()))-2
-    return count
-
-class UnsupportedFormatError(Exception):
-
-    def __init__(self, fmt):
-        Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
-
-class SpineItem(unicode):
-
-    def __new__(cls, path, mime_type=None):
-        ppath = path.partition('#')[0]
-        if not os.path.exists(path) and os.path.exists(ppath):
-            path = ppath
-        obj = super(SpineItem, cls).__new__(cls, path)
-        raw = open(path, 'rb').read()
-        raw, obj.encoding = xml_to_unicode(raw)
-        obj.character_count = character_count(raw)
-        obj.start_page = -1
-        obj.pages      = -1
-        obj.max_page   = -1
-        if mime_type is None:
-            mime_type = guess_type(obj)[0]
-        obj.mime_type = mime_type
-        return obj
-
-class FakeOpts(object):
-    verbose = 0
-    breadth_first = False
-    max_levels = 5
-    input_encoding = None
-
-def is_supported(path):
-    ext = os.path.splitext(path)[1].replace('.', '').lower()
-    ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
-    return ext in available_input_formats()
-
-
-def write_oebbook(oeb, path):
-    from calibre.ebooks.oeb.writer import OEBWriter
-    from calibre import walk
-    w = OEBWriter()
-    w(oeb, path)
-    for f in walk(path):
-        if f.endswith('.opf'):
-            return f
-
-class EbookIterator(object):
-
-    CHARACTERS_PER_PAGE = 1000
-
-    def __init__(self, pathtoebook, log=None):
-        self.log = log
-        if log is None:
-            self.log = Log()
-        pathtoebook = pathtoebook.strip()
-        self.pathtoebook = os.path.abspath(pathtoebook)
-        self.config = DynamicConfig(name='iterator')
-        ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
-        ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
-        self.ebook_ext = ext.replace('original_', '')
-
-    def search(self, text, index, backwards=False):
-        text = prepare_string_for_xml(text.lower())
-        pmap = [(i, path) for i, path in enumerate(self.spine)]
-        if backwards:
-            pmap.reverse()
-        for i, path in pmap:
-            if (backwards and i < index) or (not backwards and i > index):
-                with open(path, 'rb') as f:
-                    raw = f.read().decode(path.encoding)
-                try:
-                    raw = xml_replace_entities(raw)
-                except:
-                    pass
-                if text in raw.lower():
-                    return i
-
-    def find_missing_css_files(self):
-        for x in os.walk(os.path.dirname(self.pathtoopf)):
-            for f in x[-1]:
-                if f.endswith('.css'):
-                    yield os.path.join(x[0], f)
-
-    def find_declared_css_files(self):
-        for item in self.opf.manifest:
-            if item.mime_type and 'css' in item.mime_type.lower():
-                yield item.path
-
-    def find_embedded_fonts(self):
-        '''
-        This will become unnecessary once Qt WebKit supports the @font-face rule.
-        '''
-        css_files = set(self.find_declared_css_files())
-        if not css_files:
-            css_files = set(self.find_missing_css_files())
-        bad_map = {}
-        font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
-        for csspath in css_files:
-            try:
-                css = open(csspath, 'rb').read().decode('utf-8', 'replace')
-            except:
-                continue
-            for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
-                block  = match.group(1)
-                family = font_family_pat.search(block)
-                url    = re.compile(r'url\s*\([\'"]*(.+?)[\'"]*\)', re.DOTALL).search(block)
-                if url:
-                    path = url.group(1).split('/')
-                    path = os.path.join(os.path.dirname(csspath), *path)
-                    if not os.access(path, os.R_OK):
-                        continue
-                    id = QFontDatabase.addApplicationFont(path)
-                    if id != -1:
-                        families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
-                        if family:
-                            family = family.group(1)
-                            specified_families = [x.strip().replace('"',
-                                '').replace("'", '') for x in family.split(',')]
-                            aliasing_ok = False
-                            for f in specified_families:
-                                bad_map[f] = families[0]
-                                if not aliasing_ok and f in families:
-                                    aliasing_ok = True
-
-                            if not aliasing_ok:
-                                prints('WARNING: Family aliasing not fully supported.')
-                                prints('\tDeclared family: %r not in actual families: %r'
-                                        % (family, families))
-                            else:
-                                prints('Loaded embedded font:', repr(family))
-        if bad_map:
-            def prepend_embedded_font(match):
-                for bad, good in bad_map.items():
-                    if bad in match.group(1):
-                        prints('Substituting font family: %s -> %s'%(bad, good))
-                        return match.group().replace(bad, '"%s"'%good)
-
-            from calibre.ebooks.chardet import force_encoding
-            for csspath in css_files:
-                with open(csspath, 'r+b') as f:
-                    css = f.read()
-                    enc = force_encoding(css, False)
-                    css = css.decode(enc, 'replace')
-                    ncss = font_family_pat.sub(prepend_embedded_font, css)
-                    if ncss != css:
-                        f.seek(0)
-                        f.truncate()
-                        f.write(ncss.encode(enc))
-
-    def __enter__(self, processed=False, only_input_plugin=False):
-        self.delete_on_exit = []
-        self._tdir = TemporaryDirectory('_ebook_iter')
-        self.base  = self._tdir.__enter__()
-        if not isinstance(self.base, unicode):
-            self.base = self.base.decode(filesystem_encoding)
-        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
-        plumber = Plumber(self.pathtoebook, self.base, self.log)
-        plumber.setup_options()
-        if self.pathtoebook.lower().endswith('.opf'):
-            plumber.opts.dont_package = True
-        if hasattr(plumber.opts, 'no_process'):
-            plumber.opts.no_process = True
-
-        plumber.input_plugin.for_viewer = True
-        with plumber.input_plugin:
-            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
-                plumber.opts, plumber.input_fmt, self.log,
-                {}, self.base)
-
-        if not only_input_plugin:
-            if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
-                    not hasattr(self.pathtoopf, 'manifest'):
-                if hasattr(self.pathtoopf, 'manifest'):
-                    self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
-                self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
-                        plumber.opts)
-
-        if hasattr(self.pathtoopf, 'manifest'):
-            self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
-
-        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
-        if getattr(plumber.input_plugin, 'is_kf8', False):
-            self.book_format = 'KF8'
-
-        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
-        if self.opf is None:
-            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
-        self.language = self.opf.language
-        if self.language:
-            self.language = self.language.lower()
-        ordered = [i for i in self.opf.spine if i.is_linear] + \
-                  [i for i in self.opf.spine if not i.is_linear]
-        self.spine = []
-        for i in ordered:
-            spath = i.path
-            mt = None
-            if i.idref is not None:
-                mt = self.opf.manifest.type_for_id(i.idref)
-            if mt is None:
-                mt = guess_type(spath)[0]
-            try:
-                self.spine.append(SpineItem(spath, mime_type=mt))
-            except:
-                self.log.warn('Missing spine item:', repr(spath))
-
-        cover = self.opf.cover
-        if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf', 'fb2') and cover:
-            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
-            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
-            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
-            open(cfile, 'wb').write(chtml)
-            self.spine[0:0] = [SpineItem(cfile,
-                mime_type='application/xhtml+xml')]
-            self.delete_on_exit.append(cfile)
-
-        if self.opf.path_to_html_toc is not None and \
-           self.opf.path_to_html_toc not in self.spine:
-            try:
-                self.spine.append(SpineItem(self.opf.path_to_html_toc))
-            except:
-                import traceback
-                traceback.print_exc()
-
-
-        sizes = [i.character_count for i in self.spine]
-        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
-        for p, s in zip(self.pages, self.spine):
-            s.pages = p
-        start = 1
-
-        for s in self.spine:
-            s.start_page = start
-            start += s.pages
-            s.max_page = s.start_page + s.pages - 1
-        self.toc = self.opf.toc
-
-        self.read_bookmarks()
-
-        return self
-
-    def parse_bookmarks(self, raw):
-        for line in raw.splitlines():
-            bm = None
-            if line.count('^') > 0:
-                tokens = line.rpartition('^')
-                title, ref = tokens[0], tokens[2]
-                try:
-                    spine, _, pos = ref.partition('#')
-                    spine = int(spine.strip())
-                except:
-                    continue
-                bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
-            elif BM_FIELD_SEP in line:
-                try:
-                    title, spine, pos = line.strip().split(BM_FIELD_SEP)
-                    spine = int(spine)
-                except:
-                    continue
-                # Unescape from serialization
-                pos = pos.replace(BM_LEGACY_ESC, u'^')
-                # Check for pos being a scroll fraction
-                try:
-                    pos = float(pos)
-                except:
-                    pass
-                bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
-
-            if bm:
-                self.bookmarks.append(bm)
-
-    def serialize_bookmarks(self, bookmarks):
-        dat = []
-        for bm in bookmarks:
-            if bm['type'] == 'legacy':
-                rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
-            else:
-                pos = bm['pos']
-                if isinstance(pos, (int, float)):
-                    pos = unicode(pos)
-                else:
-                    pos = pos.replace(u'^', BM_LEGACY_ESC)
-                rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
-            dat.append(rec)
-        return (u'\n'.join(dat) +u'\n')
-
-    def read_bookmarks(self):
-        self.bookmarks = []
-        bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
-        raw = ''
-        if os.path.exists(bmfile):
-            with open(bmfile, 'rb') as f:
-                raw = f.read()
-        else:
-            saved = self.config['bookmarks_'+self.pathtoebook]
-            if saved:
-                raw = saved
-        if not isinstance(raw, unicode):
-            raw = raw.decode('utf-8')
-        self.parse_bookmarks(raw)
-
-    def save_bookmarks(self, bookmarks=None):
-        if bookmarks is None:
-            bookmarks = self.bookmarks
-        dat = self.serialize_bookmarks(bookmarks)
-        if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
-            os.access(self.pathtoebook, os.R_OK):
-            try:
-                zf = open(self.pathtoebook, 'r+b')
-            except IOError:
-                return
-            safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
-                    StringIO(dat.encode('utf-8')),
-                    add_missing=True)
-        else:
-            self.config['bookmarks_'+self.pathtoebook] = dat
-
-    def add_bookmark(self, bm):
-        self.bookmarks = [x for x in self.bookmarks if x['title'] !=
-                bm['title']]
-        self.bookmarks.append(bm)
-        self.save_bookmarks()
-
-    def set_bookmarks(self, bookmarks):
-        self.bookmarks = bookmarks
-
-    def __exit__(self, *args):
-        self._tdir.__exit__(*args)
-        for x in self.delete_on_exit:
-            if os.path.exists(x):
-                os.remove(x)
-
-def get_preprocess_html(path_to_ebook, output):
-    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
-    iterator = EbookIterator(path_to_ebook)
-    iterator.__enter__(only_input_plugin=True)
-    preprocessor = HTMLPreProcessor(None, False)
-    with open(output, 'wb') as out:
-        for path in iterator.spine:
-            with open(path, 'rb') as f:
-                html = f.read().decode('utf-8', 'replace')
-            html = preprocessor(html, get_preprocess_html=True)
-            out.write(html.encode('utf-8'))
-            out.write(b'\n\n' + b'-'*80 + b'\n\n')
-
--- a/src/calibre/ebooks/oeb/iterator/init.py
+++ b/src/calibre/ebooks/oeb/iterator/init.py
@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, re
+
+from calibre.customize.ui import available_input_formats
+
+def is_supported(path):
+    ext = os.path.splitext(path)[1].replace('.', '').lower()
+    ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
+    return ext in available_input_formats()
+
+class UnsupportedFormatError(Exception):
+
+    def __init__(self, fmt):
+        Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
+
+def EbookIterator(*args, **kwargs):
+    'For backwards compatibility'
+    from calibre.ebooks.oeb.iterator.book import EbookIterator
+    return EbookIterator(*args, **kwargs)
+
+def get_preprocess_html(path_to_ebook, output):
+    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
+    iterator = EbookIterator(path_to_ebook)
+    iterator.__enter__(only_input_plugin=True, run_char_count=False,
+            read_anchor_map=False)
+    preprocessor = HTMLPreProcessor(None, False)
+    with open(output, 'wb') as out:
+        for path in iterator.spine:
+            with open(path, 'rb') as f:
+                html = f.read().decode('utf-8', 'replace')
+            html = preprocessor(html, get_preprocess_html=True)
+            out.write(html.encode('utf-8'))
+            out.write(b'\n\n' + b'-'*80 + b'\n\n')
+
--- a/src/calibre/ebooks/oeb/iterator/book.py
+++ b/src/calibre/ebooks/oeb/iterator/book.py
@ -0,0 +1,187 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+'''
+Iterate over the HTML files in an ebook. Useful for writing viewers.
+'''
+
+import re, os, math
+from functools import partial
+
+from calibre.ebooks.metadata.opf2 import OPF
+from calibre.ptempfile import TemporaryDirectory
+from calibre.utils.config import DynamicConfig
+from calibre.utils.logging import default_log
+from calibre import (guess_type, prepare_string_for_xml,
+        xml_replace_entities)
+from calibre.ebooks.oeb.transforms.cover import CoverManager
+
+from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
+from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
+
+TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
+        '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
+        ).replace('__width__', '600').replace('__height__', '800')
+
+class FakeOpts(object):
+    verbose = 0
+    breadth_first = False
+    max_levels = 5
+    input_encoding = None
+
+
+def write_oebbook(oeb, path):
+    from calibre.ebooks.oeb.writer import OEBWriter
+    from calibre import walk
+    w = OEBWriter()
+    w(oeb, path)
+    for f in walk(path):
+        if f.endswith('.opf'):
+            return f
+
+class EbookIterator(BookmarksMixin):
+
+    CHARACTERS_PER_PAGE = 1000
+
+    def __init__(self, pathtoebook, log=None):
+        self.log = log or default_log
+        pathtoebook = pathtoebook.strip()
+        self.pathtoebook = os.path.abspath(pathtoebook)
+        self.config = DynamicConfig(name='iterator')
+        ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
+        ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
+        self.ebook_ext = ext.replace('original_', '')
+
+    def search(self, text, index, backwards=False):
+        text = prepare_string_for_xml(text.lower())
+        pmap = [(i, path) for i, path in enumerate(self.spine)]
+        if backwards:
+            pmap.reverse()
+        for i, path in pmap:
+            if (backwards and i < index) or (not backwards and i > index):
+                with open(path, 'rb') as f:
+                    raw = f.read().decode(path.encoding)
+                try:
+                    raw = xml_replace_entities(raw)
+                except:
+                    pass
+                if text in raw.lower():
+                    return i
+
+    def __enter__(self, processed=False, only_input_plugin=False,
+            run_char_count=True, read_anchor_map=True):
+        ''' Convert an ebook file into an exploded OEB book suitable for
+        display in viewers/preprocessing etc. '''
+
+        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
+
+        self.delete_on_exit = []
+        self._tdir = TemporaryDirectory('_ebook_iter')
+        self.base  = self._tdir.__enter__()
+        plumber = Plumber(self.pathtoebook, self.base, self.log)
+        plumber.setup_options()
+        if self.pathtoebook.lower().endswith('.opf'):
+            plumber.opts.dont_package = True
+        if hasattr(plumber.opts, 'no_process'):
+            plumber.opts.no_process = True
+
+        plumber.input_plugin.for_viewer = True
+        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
+            self.pathtoopf = plumber.input_plugin(inf,
+                plumber.opts, plumber.input_fmt, self.log,
+                {}, self.base)
+
+            if not only_input_plugin:
+                # Run the HTML preprocess/parsing from the conversion pipeline as
+                # well
+                if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
+                        and not hasattr(self.pathtoopf, 'manifest')):
+                    if hasattr(self.pathtoopf, 'manifest'):
+                        self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
+                    self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
+                            plumber.opts)
+
+            if hasattr(self.pathtoopf, 'manifest'):
+                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
+
+        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
+        if getattr(plumber.input_plugin, 'is_kf8', False):
+            self.book_format = 'KF8'
+
+        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
+        if self.opf is None:
+            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
+        self.language = self.opf.language
+        if self.language:
+            self.language = self.language.lower()
+        ordered = [i for i in self.opf.spine if i.is_linear] + \
+                  [i for i in self.opf.spine if not i.is_linear]
+        self.spine = []
+        Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
+                run_char_count=run_char_count)
+        for i in ordered:
+            spath = i.path
+            mt = None
+            if i.idref is not None:
+                mt = self.opf.manifest.type_for_id(i.idref)
+            if mt is None:
+                mt = guess_type(spath)[0]
+            try:
+                self.spine.append(Spiny(spath, mime_type=mt))
+            except:
+                self.log.warn('Missing spine item:', repr(spath))
+
+        cover = self.opf.cover
+        if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
+                'azw', 'azw3'}:
+            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
+            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
+            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
+            with open(cfile, 'wb') as f:
+                f.write(chtml)
+            self.spine[0:0] = [Spiny(cfile,
+                mime_type='application/xhtml+xml')]
+            self.delete_on_exit.append(cfile)
+
+        if self.opf.path_to_html_toc is not None and \
+           self.opf.path_to_html_toc not in self.spine:
+            try:
+                self.spine.append(Spiny(self.opf.path_to_html_toc))
+            except:
+                import traceback
+                traceback.print_exc()
+
+        sizes = [i.character_count for i in self.spine]
+        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
+        for p, s in zip(self.pages, self.spine):
+            s.pages = p
+        start = 1
+
+        for s in self.spine:
+            s.start_page = start
+            start += s.pages
+            s.max_page = s.start_page + s.pages - 1
+        self.toc = self.opf.toc
+        if read_anchor_map:
+            create_indexing_data(self.spine, self.toc)
+
+        self.read_bookmarks()
+
+        return self
+
+    def __exit__(self, *args):
+        self._tdir.__exit__(*args)
+        for x in self.delete_on_exit:
+            try:
+                os.remove(x)
+            except:
+                pass
+
+
--- a/src/calibre/ebooks/oeb/iterator/bookmarks.py
+++ b/src/calibre/ebooks/oeb/iterator/bookmarks.py
@ -0,0 +1,105 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+from io import BytesIO
+
+from calibre.utils.zipfile import safe_replace
+
+BM_FIELD_SEP = u'*|!|?|*'
+BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
+
+class BookmarksMixin(object):
+
+    def parse_bookmarks(self, raw):
+        for line in raw.splitlines():
+            bm = None
+            if line.count('^') > 0:
+                tokens = line.rpartition('^')
+                title, ref = tokens[0], tokens[2]
+                try:
+                    spine, _, pos = ref.partition('#')
+                    spine = int(spine.strip())
+                except:
+                    continue
+                bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
+            elif BM_FIELD_SEP in line:
+                try:
+                    title, spine, pos = line.strip().split(BM_FIELD_SEP)
+                    spine = int(spine)
+                except:
+                    continue
+                # Unescape from serialization
+                pos = pos.replace(BM_LEGACY_ESC, u'^')
+                # Check for pos being a scroll fraction
+                try:
+                    pos = float(pos)
+                except:
+                    pass
+                bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
+
+            if bm:
+                self.bookmarks.append(bm)
+
+    def serialize_bookmarks(self, bookmarks):
+        dat = []
+        for bm in bookmarks:
+            if bm['type'] == 'legacy':
+                rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
+            else:
+                pos = bm['pos']
+                if isinstance(pos, (int, float)):
+                    pos = unicode(pos)
+                else:
+                    pos = pos.replace(u'^', BM_LEGACY_ESC)
+                rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
+            dat.append(rec)
+        return (u'\n'.join(dat) +u'\n')
+
+    def read_bookmarks(self):
+        self.bookmarks = []
+        bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
+        raw = ''
+        if os.path.exists(bmfile):
+            with open(bmfile, 'rb') as f:
+                raw = f.read()
+        else:
+            saved = self.config['bookmarks_'+self.pathtoebook]
+            if saved:
+                raw = saved
+        if not isinstance(raw, unicode):
+            raw = raw.decode('utf-8')
+        self.parse_bookmarks(raw)
+
+    def save_bookmarks(self, bookmarks=None):
+        if bookmarks is None:
+            bookmarks = self.bookmarks
+        dat = self.serialize_bookmarks(bookmarks)
+        if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
+            os.access(self.pathtoebook, os.R_OK):
+            try:
+                zf = open(self.pathtoebook, 'r+b')
+            except IOError:
+                return
+            safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
+                    BytesIO(dat.encode('utf-8')),
+                    add_missing=True)
+        else:
+            self.config['bookmarks_'+self.pathtoebook] = dat
+
+    def add_bookmark(self, bm):
+        self.bookmarks = [x for x in self.bookmarks if x['title'] !=
+                bm['title']]
+        self.bookmarks.append(bm)
+        self.save_bookmarks()
+
+    def set_bookmarks(self, bookmarks):
+        self.bookmarks = bookmarks
+
+
--- a/src/calibre/ebooks/oeb/iterator/spine.py
+++ b/src/calibre/ebooks/oeb/iterator/spine.py
@ -0,0 +1,120 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+from future_builtins import map
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re, os
+from functools import partial
+from operator import attrgetter
+from collections import namedtuple
+
+from calibre import guess_type
+from calibre.ebooks.chardet import xml_to_unicode
+
+def character_count(html):
+    ''' Return the number of "significant" text characters in a HTML string. '''
+    count = 0
+    strip_space = re.compile(r'\s+')
+    for match in re.finditer(r'>[^<]+<', html):
+        count += len(strip_space.sub(' ', match.group()))-2
+    return count
+
+def anchor_map(html):
+    ''' Return map of all anchor names to their offsets in the html '''
+    ans = {}
+    for match in re.finditer(
+        r'''(?:id|name)\s*=\s*['"]([^'"]+)['"]''', html):
+        anchor = match.group(0)
+        ans[anchor] = ans.get(anchor, match.start())
+    return ans
+
+class SpineItem(unicode):
+
+    def __new__(cls, path, mime_type=None, read_anchor_map=True,
+            run_char_count=True):
+        ppath = path.partition('#')[0]
+        if not os.path.exists(path) and os.path.exists(ppath):
+            path = ppath
+        obj = super(SpineItem, cls).__new__(cls, path)
+        with open(path, 'rb') as f:
+            raw = f.read()
+        raw, obj.encoding = xml_to_unicode(raw)
+        obj.character_count = character_count(raw) if run_char_count else 10000
+        obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
+        obj.start_page = -1
+        obj.pages      = -1
+        obj.max_page   = -1
+        obj.index_entries = []
+        if mime_type is None:
+            mime_type = guess_type(obj)[0]
+        obj.mime_type = mime_type
+        return obj
+
+class IndexEntry(object):
+
+    def __init__(self, spine, toc_entry, num):
+        self.num = num
+        self.text = toc_entry.text or _('Unknown')
+        self.key = toc_entry.abspath
+        self.anchor = self.start_anchor = toc_entry.fragment or None
+        try:
+            self.spine_pos = spine.index(self.key)
+        except ValueError:
+            self.spine_pos = -1
+        self.anchor_pos = 0
+        if self.spine_pos > -1:
+            self.anchor_pos = spine[self.spine_pos].anchor_map.get(self.anchor,
+                    0)
+
+        self.depth = 0
+        p = toc_entry.parent
+        while p is not None:
+            self.depth += 1
+            p = p.parent
+
+        self.sort_key = (self.spine_pos, self.anchor_pos)
+        self.spine_count = len(spine)
+
+    def find_end(self, all_entries):
+        potential_enders = [i for i in all_entries if
+                i.depth <= self.depth and
+                (
+                    (i.spine_pos == self.spine_pos and i.anchor_pos >
+                                                            self.anchor_pos)
+                    or
+                    i.spine_pos > self.spine_pos
+                )]
+        if potential_enders:
+            # potential_enders is sorted by (spine_pos, anchor_pos)
+            end = potential_enders[0]
+            self.end_spine_pos = end.spine_pos
+            self.end_anchor = end.anchor
+        else:
+            self.end_spine_pos = self.spine_count - 1
+            self.end_anchor = None
+
+def create_indexing_data(spine, toc):
+    if not toc: return
+    f = partial(IndexEntry, spine)
+    index_entries = list(map(f,
+        (t for t in toc.flat() if t is not toc),
+        (i-1 for i, t in enumerate(toc.flat()) if t is not toc)
+        ))
+    index_entries.sort(key=attrgetter('sort_key'))
+    [ i.find_end(index_entries) for i in index_entries ]
+
+    ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor')
+
+    for spine_pos, spine_item in enumerate(spine):
+        for i in index_entries:
+            if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos:
+                continue # Does not touch this file
+            start = i.anchor if i.spine_pos == spine_pos else None
+            end = i.end_anchor if i.spine_pos == spine_pos else None
+            spine_item.index_entries.append(ie(i, start, end))
+
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -361,9 +361,11 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
    # Remove any encoding-specifying <meta/> elements
    for meta in META_XP(data):
        meta.getparent().remove(meta)
-    etree.SubElement(head, XHTML('meta'),
-        attrib={'http-equiv': 'Content-Type',
-                'content': '%s; charset=utf-8' % XHTML_NS})
+    meta = etree.SubElement(head, XHTML('meta'),
+        attrib={'http-equiv': 'Content-Type'})
+    meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
+                                                    # attribute
+
    # Ensure has a <body/>
    if not xpath(data, '/h:html/h:body'):
        body = xpath(data, '//h:body')
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -347,6 +347,10 @@ class Stylizer(object):
            style = self.flatten_style(rule.style)
            self.page_rule.update(style)
        elif isinstance(rule, CSSFontFaceRule):
+            if rule.style.length > 1:
+                # Ignore the meaningless font face rules generated by the
+                # benighted MS Word that contain only a font-family declaration
+                # and nothing else
                self.font_face_rules.append(rule)
        return results

--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -137,8 +137,9 @@ def _config(): # {{{
    c.add_opt('LRF_ebook_viewer_options', default=None,
              help=_('Options for the LRF ebook viewer'))
    c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
-        'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB', 'HTMLZ'],
-              help=_('Formats that are viewed using the internal viewer'))
+        'MOBI', 'PRC', 'POBI', 'AZW', 'AZW3', 'HTML', 'FB2', 'PDB', 'RB',
+        'SNB', 'HTMLZ'], help=_(
+            'Formats that are viewed using the internal viewer'))
    c.add_opt('column_map', default=ALL_COLUMNS,
              help=_('Columns to be displayed in the book list'))
    c.add_opt('autolaunch_server', default=False, help=_('Automatically launch content server on application startup'))
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -357,7 +357,9 @@ class MetadataSingleDialogBase(ResizableDialog):
            old_tags = self.tags.current_val
            tags = mi.tags if mi.tags else []
            if old_tags and merge_tags:
-                tags += old_tags
+                ltags, lotags = {t.lower() for t in tags}, {t.lower() for t in
+                        old_tags}
+                tags = [t for t in tags if t.lower() in ltags-lotags] + old_tags
            self.tags.current_val = tags
        if not mi.is_null('identifiers'):
            current = self.identifiers.current_val
@ -463,7 +465,12 @@ class MetadataSingleDialogBase(ResizableDialog):
        ResizableDialog.reject(self)

    def save_state(self):
+        try:
            gprefs['metasingle_window_geometry3'] = bytearray(self.saveGeometry())
+        except:
+            # Weird failure, see https://bugs.launchpad.net/bugs/995271
+            import traceback
+            traceback.print_exc()

    # Dialog use methods {{{
    def start(self, row_list, current_row, view_slot=None,
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@ -955,7 +955,7 @@ class FullFetch(QDialog): # {{{
        # QWebView. Seems to only happen on windows, but keep it for all
        # platforms just in case.
        self.identify_widget.comments_view.setMaximumHeight(500)
-        self.resize(850, 550)
+        self.resize(850, 600)

        self.finished.connect(self.cleanup)

@ -1034,7 +1034,7 @@ class CoverFetch(QDialog): # {{{
        self.covers_widget.chosen.connect(self.accept)
        l.addWidget(self.covers_widget)

-        self.resize(850, 550)
+        self.resize(850, 600)

        self.finished.connect(self.cleanup)

--- a/src/calibre/gui2/viewer/javascript.py
+++ b/src/calibre/gui2/viewer/javascript.py
@ -11,6 +11,7 @@ import os, zipfile

 import calibre
 from calibre.utils.localization import lang_as_iso639_1
+from calibre.utils.resources import compiled_coffeescript

 class JavaScriptLoader(object):

@ -27,7 +28,7 @@ class JavaScriptLoader(object):
        }.iteritems()}

    CS = {
-            'cfi':('ebooks/oeb/display/cfi.coffee', 'display/cfi.js'),
+            'cfi':'ebooks.oeb.display.cfi',
        }

    ORDER = ('jquery', 'jquery_scrollTo', 'bookmarks', 'referencing', 'images',
@ -59,21 +60,9 @@ class JavaScriptLoader(object):
                ans = P(src, data=True,
                        allow_user_override=False).decode('utf-8')
            else:
-                f = getattr(calibre, '__file__', None)
-                if self._dynamic_coffeescript and f and os.path.exists(f):
-                    src = src[0]
-                    src = os.path.join(os.path.dirname(f), *(src.split('/')))
-                    from calibre.utils.serve_coffee import compile_coffeescript
-                    with open(src, 'rb') as f:
-                        cs, errors = compile_coffeescript(f.read(), src)
-                        if errors:
-                            for line in errors:
-                                print (line)
-                            raise Exception('Failed to compile coffeescript'
-                                    ': %s'%src)
-                        ans = cs
-                else:
-                    ans = P(src[1], data=True, allow_user_override=False)
+                dynamic = (self._dynamic_coffeescript and
+                        os.path.exists(calibre.__file__))
+                ans = compiled_coffeescript(src, dynamic=dynamic).decode('utf-8')
            self._cache[name] = ans
        return ans

--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -1,24 +1,24 @@
-from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import traceback, os, sys, functools, collections, re
+
+import traceback, os, sys, functools, collections, textwrap
 from functools import partial
 from threading import Thread

-from PyQt4.Qt import (QApplication, Qt, QIcon, QTimer, SIGNAL, QByteArray,
-        QSize, QDoubleSpinBox, QLabel, QTextBrowser, QPropertyAnimation,
-        QPainter, QBrush, QColor, QStandardItemModel, QPalette, QStandardItem,
-        QUrl, QRegExpValidator, QRegExp, QLineEdit, QToolButton, QMenu,
-        QInputDialog, QAction, QKeySequence)
+from PyQt4.Qt import (QApplication, Qt, QIcon, QTimer, QByteArray, QSize,
+        QDoubleSpinBox, QLabel, QTextBrowser, QPropertyAnimation, QPainter,
+        QBrush, QColor, pyqtSignal, QUrl, QRegExpValidator, QRegExp, QLineEdit,
+        QToolButton, QMenu, QInputDialog, QAction, QKeySequence, QModelIndex)

 from calibre.gui2.viewer.main_ui import Ui_EbookViewer
 from calibre.gui2.viewer.printing import Printing
 from calibre.gui2.viewer.bookmarkmanager import BookmarkManager
+from calibre.gui2.viewer.toc import TOC
 from calibre.gui2.widgets import ProgressIndicator
 from calibre.gui2.main_window import MainWindow
-from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \
-    info_dialog, error_dialog, open_url, available_height
-from calibre.ebooks.oeb.iterator import EbookIterator
+from calibre.gui2 import (Application, ORG_NAME, APP_UID, choose_files,
+    info_dialog, error_dialog, open_url, available_height)
+from calibre.ebooks.oeb.iterator.book import EbookIterator
 from calibre.ebooks import DRMError
 from calibre.constants import islinux, isbsd, isosx, filesystem_encoding
 from calibre.utils.config import Config, StringConfig, JSONConfig
@ -31,31 +31,6 @@ from calibre.ptempfile import reset_base_dir

 vprefs = JSONConfig('viewer')

-class TOCItem(QStandardItem):
-
-    def __init__(self, toc):
-        text = toc.text
-        if text:
-            text = re.sub(r'\s', ' ', text)
-        QStandardItem.__init__(self, text if text else '')
-        self.abspath = toc.abspath
-        self.fragment = toc.fragment
-        for t in toc:
-            self.appendRow(TOCItem(t))
-        self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable)
-
-    @classmethod
-    def type(cls):
-        return QStandardItem.UserType+10
-
-class TOC(QStandardItemModel):
-
-    def __init__(self, toc):
-        QStandardItemModel.__init__(self)
-        for t in toc:
-            self.appendRow(TOCItem(t))
-        self.setHorizontalHeaderItem(0, QStandardItem(_('Table of Contents')))
-
 class Worker(Thread):

    def run(self):
@ -142,31 +117,22 @@ class DoubleSpinBox(QDoubleSpinBox):
                ' [{0:.0%}]'.format(float(val)/self.maximum()))
        self.blockSignals(False)

-class HelpfulLineEdit(QLineEdit):
+class Reference(QLineEdit):

-    HELP_TEXT = _('Go to...')
+    goto = pyqtSignal(object)

    def __init__(self, *args):
        QLineEdit.__init__(self, *args)
-        self.default_palette = QApplication.palette(self)
-        self.gray = QPalette(self.default_palette)
-        self.gray.setBrush(QPalette.Text, QBrush(QColor('gray')))
-        self.connect(self, SIGNAL('editingFinished()'),
-                     lambda : self.emit(SIGNAL('goto(PyQt_PyObject)'), unicode(self.text())))
-        self.clear_to_help_mode()
+        self.setValidator(QRegExpValidator(QRegExp(r'\d+\.\d+'), self))
+        self.setToolTip(textwrap.fill('<p>'+_(
+            'Go to a reference. To get reference numbers, use the <i>reference '
+            'mode</i>, by clicking the reference mode button in the toolbar.')))
+        if hasattr(self, 'setPlaceholderText'):
+            self.setPlaceholderText(_('Go to...'))
+        self.editingFinished.connect(self.editing_finished)

-    def focusInEvent(self, ev):
-        self.setPalette(QApplication.palette(self))
-        if self.in_help_mode():
-            self.setText('')
-        return QLineEdit.focusInEvent(self, ev)
-
-    def in_help_mode(self):
-        return unicode(self.text()) == self.HELP_TEXT
-
-    def clear_to_help_mode(self):
-        self.setPalette(self.gray)
-        self.setText(self.HELP_TEXT)
+    def editing_finished(self):
+        self.goto.emit(unicode(self.text()))

 class RecentAction(QAction):

@ -207,9 +173,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.pos.setMinimum(1.)
        self.pos.setMinimumWidth(150)
        self.tool_bar2.insertWidget(self.action_find_next, self.pos)
-        self.reference = HelpfulLineEdit()
-        self.reference.setValidator(QRegExpValidator(QRegExp(r'\d+\.\d+'), self.reference))
-        self.reference.setToolTip(_('Go to a reference. To get reference numbers, use the reference mode.'))
+        self.reference = Reference()
        self.tool_bar2.insertSeparator(self.action_find_next)
        self.tool_bar2.insertWidget(self.action_find_next, self.reference)
        self.tool_bar2.insertSeparator(self.action_find_next)
@ -233,8 +197,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        if isosx:
            qs += [Qt.CTRL+Qt.Key_W]
        self.action_quit.setShortcuts(qs)
-        self.connect(self.action_quit, SIGNAL('triggered(bool)'),
-                     lambda x:QApplication.instance().quit())
+        self.action_quit.triggered.connect(self.quit)
        self.action_focus_search = QAction(self)
        self.addAction(self.action_focus_search)
        self.action_focus_search.setShortcuts([Qt.Key_Slash,
@ -247,42 +210,34 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.action_table_of_contents.setCheckable(True)
        self.toc.setMinimumWidth(80)
        self.action_reference_mode.setCheckable(True)
-        self.connect(self.action_reference_mode, SIGNAL('triggered(bool)'),
-                     lambda x: self.view.reference_mode(x))
-        self.connect(self.action_metadata, SIGNAL('triggered(bool)'), lambda x:self.metadata.setVisible(x))
+        self.action_reference_mode.triggered[bool].connect(self.view.reference_mode)
+        self.action_metadata.triggered[bool].connect(self.metadata.setVisible)
        self.action_table_of_contents.toggled[bool].connect(self.set_toc_visible)
-        self.connect(self.action_copy, SIGNAL('triggered(bool)'), self.copy)
+        self.action_copy.triggered[bool].connect(self.copy)
        self.action_font_size_larger.triggered.connect(self.font_size_larger)
        self.action_font_size_smaller.triggered.connect(self.font_size_smaller)
-        self.connect(self.action_open_ebook, SIGNAL('triggered(bool)'),
-                     self.open_ebook)
-        self.connect(self.action_next_page, SIGNAL('triggered(bool)'),
-                     lambda x:self.view.next_page())
-        self.connect(self.action_previous_page, SIGNAL('triggered(bool)'),
-                     lambda x:self.view.previous_page())
-        self.connect(self.action_find_next, SIGNAL('triggered(bool)'),
-                     lambda x:self.find(unicode(self.search.text()), repeat=True))
-        self.connect(self.action_find_previous, SIGNAL('triggered(bool)'),
-                     lambda x:self.find(unicode(self.search.text()),
-                         repeat=True, backwards=True))
-
-        self.connect(self.action_full_screen, SIGNAL('triggered(bool)'),
-                     self.toggle_fullscreen)
+        self.action_open_ebook.triggered[bool].connect(self.open_ebook)
+        self.action_next_page.triggered.connect(self.view.next_page)
+        self.action_previous_page.triggered.connect(self.view.previous_page)
+        self.action_find_next.triggered.connect(self.find_next)
+        self.action_find_previous.triggered.connect(self.find_previous)
+        self.action_full_screen.triggered[bool].connect(self.toggle_fullscreen)
        self.action_full_screen.setShortcuts([Qt.Key_F11, Qt.CTRL+Qt.SHIFT+Qt.Key_F])
        self.action_full_screen.setToolTip(_('Toggle full screen (%s)') %
                _(' or ').join([unicode(x.toString(x.NativeText)) for x in
                    self.action_full_screen.shortcuts()]))
-        self.connect(self.action_back, SIGNAL('triggered(bool)'), self.back)
-        self.connect(self.action_bookmark, SIGNAL('triggered(bool)'), self.bookmark)
-        self.connect(self.action_forward, SIGNAL('triggered(bool)'), self.forward)
-        self.connect(self.action_preferences, SIGNAL('triggered(bool)'), lambda x: self.view.config(self))
+        self.action_back.triggered[bool].connect(self.back)
+        self.action_forward.triggered[bool].connect(self.forward)
+        self.action_bookmark.triggered[bool].connect(self.bookmark)
+        self.action_preferences.triggered.connect(lambda :
+                self.view.config(self))
        self.pos.editingFinished.connect(self.goto_page_num)
-        self.connect(self.vertical_scrollbar, SIGNAL('valueChanged(int)'),
-                     lambda x: self.goto_page(x/100.))
+        self.vertical_scrollbar.valueChanged[int].connect(lambda
+                x:self.goto_page(x/100.))
        self.search.search.connect(self.find)
        self.search.focus_to_library.connect(lambda: self.view.setFocus(Qt.OtherFocusReason))
-        self.connect(self.toc, SIGNAL('clicked(QModelIndex)'), self.toc_clicked)
-        self.connect(self.reference, SIGNAL('goto(PyQt_PyObject)'), self.goto)
+        self.toc.clicked[QModelIndex].connect(self.toc_clicked)
+        self.reference.goto.connect(self.goto)

        self.bookmarks_menu = QMenu()
        self.action_bookmark.setMenu(self.bookmarks_menu)
@ -335,8 +290,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.print_menu.addAction(QIcon(I('print-preview.png')), _('Print Preview'))
        self.action_print.setMenu(self.print_menu)
        self.tool_bar.widgetForAction(self.action_print).setPopupMode(QToolButton.MenuButtonPopup)
-        self.connect(self.action_print, SIGNAL("triggered(bool)"), partial(self.print_book, preview=False))
-        self.connect(self.print_menu.actions()[0], SIGNAL("triggered(bool)"), partial(self.print_book, preview=True))
+        self.action_print.triggered.connect(self.print_book)
+        self.print_menu.actions()[0].triggered.connect(self.print_preview)
        ca = self.view.copy_action
        ca.setShortcut(QKeySequence.Copy)
        self.addAction(ca)
@ -381,13 +336,22 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
                m.addAction(RecentAction(path, m))
                count += 1

-    def closeEvent(self, e):
+    def shutdown(self):
        if self.isFullScreen():
            self.action_full_screen.trigger()
-            e.ignore()
-            return
+            return False
        self.save_state()
+        return True
+
+    def quit(self):
+        if self.shutdown():
+            QApplication.instance().quit()
+
+    def closeEvent(self, e):
+        if self.shutdown():
            return MainWindow.closeEvent(self, e)
+        else:
+            e.ignore()

    def toggle_toolbars(self):
        for x in ('tool_bar', 'tool_bar2'):
@ -440,8 +404,11 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        c = config().parse()
        return c.remember_current_page

-    def print_book(self, preview):
-        Printing(self.iterator.spine, preview)
+    def print_book(self):
+        Printing(self.iterator.spine, False)
+
+    def print_preview(self):
+        Printing(self.iterator.spine, True)

    def toggle_fullscreen(self, x):
        if self.isFullScreen():
@ -629,6 +596,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.pending_search_dir = 'backwards' if backwards else 'forwards'
        self.load_path(self.iterator.spine[index])

+    def find_next(self):
+        self.find(unicode(self.search.text()), repeat=True)
+
+    def find_previous(self):
+        self.find(unicode(self.search.text()), repeat=True, backwards=True)
+
    def do_search(self, text, backwards):
        self.pending_search = None
        self.pending_search_dir = None
@ -829,11 +802,13 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
            if not title:
                title = os.path.splitext(os.path.basename(pathtoebook))[0]
            if self.iterator.toc:
-                self.toc_model = TOC(self.iterator.toc)
+                self.toc_model = TOC(self.iterator.spine, self.iterator.toc)
                self.toc.setModel(self.toc_model)
                if self.show_toc_on_open:
                    self.action_table_of_contents.setChecked(True)
            else:
+                self.toc_model = TOC(self.iterator.spine)
+                self.toc.setModel(self.toc_model)
                self.action_table_of_contents.setChecked(False)
            if isbytestring(pathtoebook):
                pathtoebook = force_unicode(pathtoebook, filesystem_encoding)
--- a/src/calibre/gui2/viewer/toc.py
+++ b/src/calibre/gui2/viewer/toc.py
@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+from PyQt4.Qt import QStandardItem, QStandardItemModel, Qt
+
+from calibre.ebooks.metadata.toc import TOC as MTOC
+
+class TOCItem(QStandardItem):
+
+    def __init__(self, toc):
+        text = toc.text
+        if text:
+            text = re.sub(r'\s', ' ', text)
+        QStandardItem.__init__(self, text if text else '')
+        self.abspath = toc.abspath
+        self.fragment = toc.fragment
+        for t in toc:
+            self.appendRow(TOCItem(t))
+        self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable)
+
+    @classmethod
+    def type(cls):
+        return QStandardItem.UserType+10
+
+class TOC(QStandardItemModel):
+
+    def __init__(self, spine, toc=None):
+        QStandardItemModel.__init__(self)
+        if toc is None:
+            toc = MTOC()
+        for t in toc:
+            self.appendRow(TOCItem(t))
+        self.setHorizontalHeaderItem(0, QStandardItem(_('Table of Contents')))
+
+
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/fr_CA.po
+++ b/src/calibre/translations/fr_CA.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/is.po
+++ b/src/calibre/translations/is.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/mk.po
+++ b/src/calibre/translations/mk.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/nn.po
+++ b/src/calibre/translations/nn.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pa.po
+++ b/src/calibre/translations/pa.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/Show More
+++ b/Show More