merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-07-30 18:08:14 +08:00 · 2011-07-30 18:08:14 +08:00 · 76d48d0a98
commit 76d48d0a98
parent 94e0ca17a3 80e7b3e52e
149 changed files with 41307 additions and 32204 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,139 @@
 #  new recipes:
 #    - title: 
 - version: 0.8.12
  date: 2011-07-29
  new features:
    - title: "Content server: Return the correct last modified date when serving ebook files. Also allow getting of book metadata as /get/opf/<book_id>"
    - title: "Driver for the COBY MP977"
    - title: "Get Books: Remove epub bud store. Add Ozon.ru and e-knigni.net stores. Fix broken amazon UK and DE stores."
      tickets: [816091]
    - title: "Add a new tweak to Preferences->Tweaks that allows auto generation of series numbers when importing books with a series name, but no number"
      tickets: [815573]
  bug fixes:
    - title: "Fix a regression in 0.8.11 that broke calibre on linux systems that use a file system encoding that cannot support cyrillic characters"
      tickets: [815224]
    - title: "Fix long titles not wrapping in cover browser"
      tickets: [816595]
    - title: "When adding books, handle the case of files without read permission more gracefully."
      tickets: [814771]
    - title: "When changing metadata in EPUB files do not use the opf: namespace prefix on newly created elements. Apparently, FBReaderJ doesn't understand XML namespaces."
      tickets: [814722]
    - title: "Prevent metadata download from returning published dates earlier than 101 A.D."
    - title: "Fix a bug where dates before 101AD in the database could cause errors"
      tickets: [814964]
    - title: "Fix an error in the book details panel if the user sets the default author link to blank"
  improved recipes:
    - The Economist
    - Instapaper
    - Corren
  new recipes:
    - title: Counterpunch
      author: O. Emmerson
    - title: National Geographic (PL)
      author: Marcin Urban
    - title: Caros Amigos
      author: Pablo Aldama
    - title: Aksiyon Dergisi
      author: thomass
    - title: Dnevnik (MK) and +Info
      author: Darko Spasovski
    - title: Dagens Industri
      author: Jonas Svensson
 - version: 0.8.11
  date: 2011-07-22
  new features:
    - title: "When doing a conversion from some format to the same format, save the original file"
      description: "When calibre does a conversion from the same format to the same format, for
        example, from EPUB to EPUB, the original file is saved as original_epub, so that in case the
        conversion is poor, you can change the settings and run it again. The original is automatically used
        every time you run a conversion with that format as input. If you want to disable this,
        there is a tweak that prevents calibre from saving the originals in Preferences->Tweaks. You can
        easily replace the converted version with the original in the Edit metadata dialog by right 
        clicking on the list of formats in the top right corner."
      type: major
    - title: "Conversion pipeline: Add an option to control the height of the blank lines inserted by calibre"
    - title: "Drivers for bq DaVinci, Samsung Galaxy ACE GT-S5830 and Medion e-reader"
    - title: "Get Books: Add stores Chitanka and Bookoteka. Remove epubbuy.de at store's request"
    - title: "Content server: Add a link at the bottom of the mobile interface to switch to the full interface."
      tickets: [812525]
    - title: "Update the kindle icon shown when a Kindle is connected to use a picture of the Kindle 3"
      tickets: [810852]
    - title: "MOBI Output: When converting epub documents that have a start element in their guide, use it to mark the starting position at which the MOBI file will be opened."
      tickets: [804755]
    - title: "News download: Add a default Accept header to all requests"
  bug fixes:
    - title: "Fix regression that broke loading translations from .po files in the working directory"
    - title: "Fix conversion dialog not allowing series numbers larger than 9999"
      tickets: [813281]
    - title: "Conversion pipeline: When adding/removing entries to the manifest, ignore unparseable URLs instead of erroring out on them"
    - title: "SD Card in Azbooka not being detected"
      tickets: [812750]
    - title: "Conversion pipeline: Strip out large blocks of contiguous space (more than 10000 contiguous blanks) as these slow down the conversion process and are almost always indicative of an error in the input document."
    - title: "ebook-convert: Abort if a keyboard interrupt is raised during parsing"
    - title: "Regex builder: Show a nicer error message when the user has the file open in another program on windows."
      tickets: [811641]
    - title: "When converting in the GUI, set all identifiers present in the book's metadata in the output file, if the output format supports them."
  improved recipes:
    - NBObline
    - JBPress
    - Instapaper
    - Die Zeit
    - Wired (UK)
  new recipes:
    - title: Utrinski Vesnik
      author: Darko Spasovski
    - title: IDG.se
      author: zapt0
    - title: Los Andes
      author: Darko Miletic
    - title: De Luns a Venres
      author: Susana Sotelo Docío
    - title: "Nikkei News subscription version"
      author: Ado Nishimura
 - version: 0.8.10
  date: 2011-07-15
@ -669,7 +802,7 @@
 - version: 0.8.0
-  date: 2010-05-06
+  date: 2011-05-06
  new features:
    - title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0"
--- a/recipes/aksiyon_derigisi.recipe
+++ b/recipes/aksiyon_derigisi.recipe
@ -0,0 +1,53 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class Aksiyon (BasicNewsRecipe):
    title               = u'Aksiyon Dergisi'
    __author__            = u'thomass'
    description            = 'Haftalık haber dergisi '
    oldest_article         =13
    max_articles_per_feed  =100
    no_stylesheets         = True
    #delay                  = 1
    #use_embedded_content   = False
    encoding               = 'utf-8'
    publisher              = 'Aksiyon'
    category               = 'news, haberler,TR,gazete'
    language               = 'tr'
    publication_type = 'magazine'
    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
   #keep_only_tags    = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
    remove_tags  = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ]
    cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
    masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
    remove_empty_feeds= True
    remove_attributes = ['width','height']
    feeds          = [
                      ( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
                      ( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
                      ( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
                      ( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
                      ( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
                      ( u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
                      ( u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
                      ( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
                      ( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
                      ( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
                      ( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
                      ( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
                      ( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
                      ( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
                      ( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
                      ( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
                      ( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
                        ]
    def print_version(self, url):
     return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
--- a/recipes/caros_amigos.recipe
+++ b/recipes/caros_amigos.recipe
@ -0,0 +1,17 @@
 __copyright__ = '2011, Pablo Aldama <pabloaldama at gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1311839910(BasicNewsRecipe):
    title          = u'Caros Amigos'
    oldest_article = 20
    max_articles_per_feed = 100
    language = 'pt_BR'
    __author__ =  'Pablo Aldama'
    feeds          = [(u'Caros Amigos', u'http://carosamigos.terra.com.br/index/index.php?format=feed&type=rss')]
    keep_only_tags    = [dict(name='div', attrs={'class':['blog']})
                        ,dict(name='div', attrs={'class':['blogcontent']})
                        ]
    remove_tags    = [dict(name='div', attrs={'class':'addtoany'})]
--- a/recipes/corren2.recipe
+++ b/recipes/corren2.recipe
@ -1,39 +1,34 @@
 # -*- coding: utf-8 -*-
 __license__	= 'GPLv3'
 from calibre.web.feeds.news import BasicNewsRecipe
-class AdvancedUserRecipe1255797795(BasicNewsRecipe):
+class AdvancedUserRecipe1311446032(BasicNewsRecipe):
-    title          = u'Corren'
+    title                 = 'Corren'
    language = 'sv'
    __author__            = 'Jonas Svensson'
-    simultaneous_downloads = 1
+    description           = 'News from Sweden'
-    no_stylesheets = True
+    publisher             = 'Corren'
-    oldest_article = 7
+    category              = 'news, politics, Sweden'
    oldest_article        = 2
    delay                 = 1
    max_articles_per_feed = 100
-    remove_attributes = ['onload']
+    no_stylesheets        = True
-    timefmt = ''
+    use_embedded_content  = False
    encoding              = 'iso-8859-1'
    language              = 'sv'
    feeds = [
-                   (u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'),
+              (u'Toppnyheter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/')
-                   (u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'),
+              ,(u'Ekonomi', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/')
-                   (u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'),
+              ,(u'Link\xf6ping', u'http://www.corren.se/inc/RssHandler.ashx?id=4122234')
-                   (u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'),
+              ,(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223,4122224,4122226,4122227,4122228,4122229,4122230')
                   (u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'),
                   (u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'),
                   (u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'),
                   (u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'),
                   (u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'),
                   (u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/')
            ]
-    def print_version(self, url):
+    keep_only_tags = [dict(name='div', attrs={'id':'article'}),dict(name='div', attrs={'class':'body'})]
-        url = url.replace("ekonomi/artikel.aspx", "Print.aspx")
+    remove_tags = [
-        url = url.replace("bostad/artikel.aspx", "Print.aspx")
+                     dict(name='ul',attrs={'class':'functions'})
-        url = url.replace("kultur/artikel.aspx", "Print.aspx")
+                     ,dict(name='a',attrs={'href':'javascript*'})
-        url = url.replace("motor/artikel.aspx", "Print.aspx")
+                     ,dict(name='div',attrs={'class':'box'})
-        url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
+                     ,dict(name='div',attrs={'class':'functionsbottom'})
-        url = url.replace("sport/artikel.aspx", "Print.aspx")
+                  ]
        url = url.replace("asikter/artikel.aspx", "Print.aspx")
        url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
        url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx")
        url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx")
        return url.replace("nyheter/artikel.aspx", "Print.aspx")
--- a/recipes/counterpunch.recipe
+++ b/recipes/counterpunch.recipe
@ -0,0 +1,40 @@
 import re
 from lxml.html import parse
 from calibre.web.feeds.news import BasicNewsRecipe
 class Counterpunch(BasicNewsRecipe):
    '''
    Parses counterpunch.com for articles
    '''
    title = 'Counterpunch'
    description = 'Daily political opinion from www.Counterpunch.com'
    language = 'en'
    __author__ = 'O. Emmerson'
    keep_only_tags = [dict(name='td', attrs={'width': '522'})]
    max_articles_per_feed = 10
    def parse_index(self):
        feeds = []
        title, url = 'Counterpunch', 'http://www.counterpunch.com'
        articles = self.parse_page(url)
        if articles:
            feeds.append((title, articles))
        return feeds
    def parse_page(self, url):
        parsed_page = parse(url).getroot()
        articles = []
        unwanted_text = re.compile('Website\ of\ the|I\ urge\ you|Subscribe\ now|DONATE|\@asis\.com|donation\ button|click\ over\ to\ our')
        parsed_articles = [a for a in parsed_page.cssselect("html>body>table tr>td>p[class='style2']") if not unwanted_text.search(a.text_content())]
        for art in parsed_articles:
            try:
                author = art.text
                title = art.cssselect("a")[0].text + ' by {0}'.format(author)
                art_url = 'http://www.counterpunch.com/' + art.cssselect("a")[0].attrib['href']
                articles.append({'title': title, 'url': art_url})
            except Exception as e:
                e
                #print('Handler Error: ', e, 'title :', a.text_content())
                pass
        return articles
--- a/recipes/dagens_industri.recipe
+++ b/recipes/dagens_industri.recipe
@ -0,0 +1,32 @@
 # -*- coding: utf-8 -*-
 __license__	= 'GPLv3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1311450855(BasicNewsRecipe):
    title          = u'Dagens Industri'
    __author__            = 'Jonas Svensson'
    description           = 'Economy news from Sweden'
    publisher             = 'DI'
    category              = 'news, politics, Sweden'
    oldest_article        = 2
    delay                 = 1
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    language              = 'sv'
    feeds          = [(u'DI', u'http://di.se/rss')]
    keep_only_tags = [dict(name='h1', attrs={'id':'ctl00_ExtraWideContentRegion_WideContentRegion_MainRegion_MainContentRegion_MainBodyRegion_headlineNormal'}),dict(name='div', attrs={'id':'articleBody'})]
    remove_tags = [
                     dict(name='div',attrs={'class':'article-actions clear'})
                     ,dict(name='div',attrs={'class':'article-action-popup'})
                     ,dict(name='div',attrs={'class':'header'})
                     ,dict(name='div',attrs={'class':'content clear'})
                     ,dict(name='div',attrs={'id':'articleAdvertisementDiv'})
                     ,dict(name='ul',attrs={'class':'action-list'})
                  ]
--- a/recipes/dnevnik_mk.recipe
+++ b/recipes/dnevnik_mk.recipe
@ -0,0 +1,98 @@
 #!/usr/bin/env  python
 __author__    = 'Darko Spasovski'
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
 '''
 dnevnik.com.mk
 '''
 import re
 import datetime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre import browser
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class Dnevnik(BasicNewsRecipe):
    INDEX                 = 'http://www.dnevnik.com.mk'
    __author__ = 'Darko Spasovski'
    title                 = 'Dnevnik - mk'
    description           = 'Daily Macedonian newspaper'
    masthead_url          = 'http://www.dnevnik.com.mk/images/re-logo.gif'
    language              = 'mk'
    publication_type      = 'newspaper'
    category              = 'news, Macedonia'
    max_articles_per_feed = 100
    remove_javascript     = True
    no_stylesheets        = True
    use_embedded_content  = False
    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
    [
        ## Remove anything before the start of the article.
        (r'<body.*?<\?xml version=\"1.0\"\?><!--Article start-->', lambda match: '<body>'),
        ## Remove anything after the end of the article.
        (r'<!--Article end.*?</body>', lambda match : '</body>'),
        ]
    ]
    extra_css = """
                    body{font-family: Arial,Helvetica,sans-serif}
                    .WB_DNEVNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
                """
    conversion_options = {
                          'comment'  : description,
                          'tags'     : category,
                          'language' : language,
                          'linearize_tables' : True
                        }
    def parse_index(self):
        datum = datetime.datetime.today().strftime('%d.%m.%Y')
        soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
        feeds = []
        for section in soup.findAll('td', attrs={'class':'WB_DNEVNIK_ArhivaFormTitle'}):
            sectionTitle = section.contents[0].string
            if sectionTitle.lower().startswith('online'):
                # Skip online articles
                continue
            containerTable = section.findPrevious(name='table').findNextSibling(name='table')
            if containerTable==None:
                print 'No container table found - page layout may have been changed.'
                continue
            articles = []
            for article in containerTable.findAll('a', attrs={'class': 'WB_DNEVNIK_ArhivaFormText'}):
                title = self.tag_to_string(article, use_alt=True).strip()
                articles.append({'title': title, 'url':'http://www.dnevnik.com.mk/' + article['href'], 'description':'', 'date':''})
            if articles:
                feeds.append((sectionTitle, articles))
        return sorted(feeds, key=lambda section: self.get_weight(section))
    def get_weight(self, section):
        """
        Returns 'weight' of a section.
        Used for sorting the sections based on their 'natural' order in the printed edition.
        """
        natural_order = { u'во фокусот': 1, u'актуелно': 2, u'економија': 3,
                          u'отворена': 4, u'свет': 5, u'интервју': 6, u'џубокс': 7,
                          u'репортажа': 8, u'наш туризам': 9, u'живот': 10,
                          u'автомобилизам': 11, u'спорт': 12, u'омнибус': 13 }
        if section[0].string.lower() in natural_order:
            return natural_order[section[0].string.lower()]
        else:
            return 999  # section names not on the list go to the bottom
    def get_cover_url(self):
        datum = datetime.datetime.today().strftime('%d.%m.%Y')
        soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
        anchor = soup.find('a', attrs={'class': 'WB_DNEVNIK_MoreLink'})
        if anchor != None:
            raw = browser().open_novisit(self.INDEX + '/' + anchor['href']).read()
            cover_soup = BeautifulSoup(raw)
            url = cover_soup.find('div', attrs={'class':'WB_DNEVNIK_Datum2'}).findNext('img')['src']
            return self.INDEX + '/' + url
        return ''
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -6,10 +6,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 economist.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 from collections import OrderedDict
-import string, time, re
+import time, re
 class Economist(BasicNewsRecipe):
@ -22,10 +22,12 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
-    cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
+    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
-            dict(attrs={'class':['dblClkTrk', 'ec-article-info', 'share_inline_header']}),
+            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
                'share_inline_header', 'related-items']}),
            {'class': lambda x: x and 'share-links-header' in x},
    ]
    keep_only_tags = [dict(id='ec-article-body')]
@ -67,52 +69,54 @@ class Economist(BasicNewsRecipe):
            return self.economist_parse_index()
    def economist_parse_index(self):
-        soup = BeautifulSoup(self.browser.open(self.INDEX).read(),
+        soup = self.index_to_soup(self.INDEX)
-                             convertEntities=BeautifulSoup.HTML_ENTITIES)
+        feeds = OrderedDict()
-        index_started = False
+        for section in soup.findAll(attrs={'class':lambda x: x and 'section' in
-        feeds = {}
+            x}):
-        ans = []
+            h4 = section.find('h4')
-        key = None
+            if h4 is None:
        for tag in soup.findAll(['h1', 'h2']):
            text = ''.join(tag.findAll(text=True))
            if tag.name in ('h1', 'h2') and 'Classified ads' in text:
                break
            if tag.name == 'h1':
                if 'The world this week' in text or 'The world this year' in text:
                    index_started = True
                if not index_started:
                continue
-                text = string.capwords(text)
+            section_title = self.tag_to_string(h4).strip()
-                if text not in feeds.keys():
+            if not section_title:
                    feeds[text] = []
                if text not in ans:
                    ans.append(text)
                key = text
                continue
-            if key is None:
+            self.log('Found section: %s'%section_title)
            articles = []
            for h5 in section.findAll('h5'):
                article_title = self.tag_to_string(h5).strip()
                if not article_title:
                    continue
-            a = tag.find('a', href=True)
+                data = h5.findNextSibling(attrs={'class':'article'})
                if data is None: continue
                a = data.find('a', href=True)
                if a is None: continue
                url = a['href']
                if url.startswith('/'): url = 'http://www.economist.com'+url
                url += '/print'
                article_title += ': %s'%self.tag_to_string(a).strip()
                articles.append({'title':article_title, 'url':url,
                    'description':'', 'date':''})
            if not articles:
                # We have last or first section
                for art in section.findAll(attrs={'class':'article'}):
                    a = art.find('a', href=True)
                    if a is not None:
                        url = a['href']
-                id_ = re.search(r'story_id=(\d+)', url).group(1)
+                        if url.startswith('/'): url = 'http://www.economist.com'+url
-                url = 'http://www.economist.com/node/%s/print'%id_
+                        url += '/print'
-                if url.startswith('Printer'):
+                        title = self.tag_to_string(a)
-                    url = '/'+url
+                        if title:
-                if url.startswith('/'):
+                            articles.append({'title':title, 'url':url,
-                    url = 'http://www.economist.com' + url
+                            'description':'', 'date':''})
                try:
                   subtitle = tag.previousSibling.contents[0].contents[0]
                   text = subtitle + ': ' + text
                except:
                   pass
                article = dict(title=text,
                    url = url,
                    description='', content='', date='')
                feeds[key].append(article)
-        ans = [(key, feeds[key]) for key in ans if feeds.has_key(key)]
+            if articles:
                feeds[section_title] = articles
        ans = [(key, val) for key, val in feeds.iteritems()]
        if not ans:
-            raise Exception('Could not find any articles. Has your subscription expired?')
+            raise Exception('Could not find any articles, either the '
                    'economist.com server is having trouble and you should '
                    'try later or the website format has changed and the '
                    'recipe needs to be updated.')
        return ans
    def eco_find_image_tables(self, soup):
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -16,11 +16,12 @@ class Economist(BasicNewsRecipe):
            ' Much slower than the print edition based version.')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
-    cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
+    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
-                'share_inline_header']}),
+                'share_inline_header', 'related-items']}),
            {'class': lambda x: x and 'share-links-header' in x},
    ]
    keep_only_tags = [dict(id='ec-article-body')]
--- a/recipes/el_colombiano.recipe
+++ b/recipes/el_colombiano.recipe
@ -0,0 +1,58 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1311790237(BasicNewsRecipe):
    title          = u'Periódico El Colombiano'
    language = 'es_CO'
    __author__  = 'BIGO-CAVA'
    cover_url     = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
    remove_tags_before = dict(id='contenidoArt')
    remove_tags_after  = dict(id='enviaTips')
    remove_tags_after  = dict(id='zonaPata')
    oldest_article = 1
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets        = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    masthead_url          = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
    publication_type      = 'newspaper'
    extra_css             = """
                               p{text-align: justify; font-size: 100%}
                               body{ text-align: left; font-size:100% }
                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
                               h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
                                 """
    feeds          = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'),
              (u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'),
              (u'Colombia', u'http://www.elcolombiano.com/rss/Colombia.xml'),
              (u'Economia', u'http://www.elcolombiano.com/rss/Economia.xml'),
              (u'Internacional', u'http://www.elcolombiano.com/rss/Internacional.xml'),
              (u'Politica', u'http://www.elcolombiano.com/rss/Politica.xml'),
              (u'Cultura', u'http://www.elcolombiano.com/rss/Cultura.xml'),
              (u'Entretenimiento', u'http://www.elcolombiano.com/rss/Farandula.xml'),
              (u'Tecnologia', u'http://www.elcolombiano.com/rss/Tecnologia.xml'),
              (u'Television', u'http://www.elcolombiano.com/rss/Television.xml'),
              (u'Vida y Sociedad', u'http://www.elcolombiano.com/rss/Vida.xml'),
              (u'Turismo', u'http://www.elcolombiano.com/rss/Turismo.xm'),
              (u'Salud', u'http://www.elcolombiano.com/rss/Salud.xml'),
              (u'Ciencia', u'http://www.elcolombiano.com/rss/Ciencia.xml')]
    remove_tags = [dict(name='div', attrs={'class':'objetosRelacionados'}),
 dict(name='div', attrs={'class':'notasRelacionadas contenedor'}),
 dict(name='div', attrs={'class':'comentarios'}),
 dict(name='div', attrs={'class':'mapaDelSitio'}),
 dict(name='div', attrs={'class':'creditos'}),
 dict(name='div', attrs={'class':'votos'}),
 dict(name='div', attrs={'class':'divopt2'}),
 dict(name='div', attrs={'class':'comentarios'}),
 dict(name='div', attrs={'class':'pestanasLateral'}),
 dict(name='div', attrs={'class':'resumenSeccion'}),
 dict(name='div', attrs={'class':'zonaComercial'}),
 dict(name='div', attrs={'id':'zonaPata'})]
--- a/recipes/el_tiempo.recipe
+++ b/recipes/el_tiempo.recipe
@ -0,0 +1,53 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class ColombiaElTiempo02(BasicNewsRecipe):
    title          = u'Periódico el Tiempo'
    language = 'es_CO'
    __author__  = 'BIGO-CAVA'
    cover_url     = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
    remove_tags_before = dict(id='fb-root')
    remove_tags_after  = [dict(name='div', attrs={'class':'modulo reporte'})]
    keep_only_tags = [dict(name='div', id='contenidoArt')]
    remove_tags        = [dict(name='div', attrs={'class':'social-media'}),
                          dict(name='div', attrs={'class':'caja-facebook'}),
                          dict(name='div', attrs={'class':'caja-twitter'}),
                          dict(name='div', attrs={'class':'caja-buzz'}),
                          dict(name='div', attrs={'class':'ico-mail2'}),
                          dict(name='div', attrs={'id':'caja-instapaper'}),
                          dict(name='div', attrs={'class':'modulo herramientas'})]
    oldest_article = 2
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets        = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    masthead_url          = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
    publication_type      = 'newspaper'
    extra_css             = """
                               p{text-align: justify; font-size: 100%}
                               body{ text-align: left; font-size:100% }
                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
                               h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
                                 """
    feeds          = [(u'Colombia', u'http://www.eltiempo.com/colombia/rss.xml'),
                      (u'Medellin', u'http://www.eltiempo.com/colombia/medellin/rss.xml'),
              (u'Economia', u'http://www.eltiempo.com/economia/rss.xml'),
              (u'Deportes', u'http://www.eltiempo.com/deportes/rss.xml'),
              (u'Mundo', u'http://www.eltiempo.com/mundo/rss.xml'),
              (u'Gente', u'http://www.eltiempo.com/gente/rss.xml'),
              (u'Vida de Hoy', u'http://www.eltiempo.com/vida-de-hoy/rss.xml'),
              (u'EEUU', u'http://www.eltiempo.com/mundo/estados-unidos/rss.xml'),
              (u'LatinoAmerica', u'http://www.eltiempo.com/mundo/latinoamerica/rss.xml'),
              (u'Europa', u'http://www.eltiempo.com/mundo/europa/rss.xml'),
                                              (u'Medio Oriente', u'http://www.eltiempo.com/mundo/medio-oriente/rss.xml'),
              (u'Vive in Medellin', u'http://medellin.vive.in/medellin/rss.xml'),
                                              (u'Don Juan', u'http://www.revistadonjuan.com/feedrss/'),
              (u'Alo', u'http://www.eltiempo.com/alo/rss.xml')]
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -12,7 +12,7 @@ from datetime import date
 class Guardian(BasicNewsRecipe):
-    title = u'The Guardian / The Observer'
+    title = u'The Guardian and The Observer'
    if date.today().weekday() == 6:
        base_url = "http://www.guardian.co.uk/theobserver"
    else:
--- a/recipes/icons/national_geographic_pl.png
+++ b/recipes/icons/national_geographic_pl.png
--- a/recipes/idg_se.recipe
+++ b/recipes/idg_se.recipe
@ -0,0 +1,33 @@
 __license__ = 'GPLv3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class IDGse(BasicNewsRecipe):
    title               = 'IDG'
    description = 'IDG.se'
    language = 'se'
    __author__ = 'zapt0'
    oldest_article = 1
    max_articles_per_feed = 40
    no_stylesheets = True
    encoding = 'ISO-8859-1'
    remove_javascript = True
    feeds          = [(u'Senaste nytt',u'http://feeds.idg.se/idg/vzzs')]
    def print_version(self,url):
            return url + '?articleRenderMode=print&m=print'
    def get_cover_url(this):
        return 'http://idgmedia.idg.se/polopoly_fs/2.3275!images/idgmedia_logo_75.jpg'
    keep_only_tags = [
                                            dict(name='h1'),
                                            dict(name='div', attrs={'class':['divColumn1Article']}),
                                            ]
    #remove ads
    remove_tags = [
                                    dict(name='div', attrs={'id':['preamble_ad']}),
                                    dict(name='ul', attrs={'class':['share']})
                                ]
--- a/recipes/instapaper.recipe
+++ b/recipes/instapaper.recipe
@ -43,7 +43,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
-            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll('div', attrs={'class':'cornerControls'}):
@ -63,3 +63,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
    def populate_article_metadata(self, article, soup, first):
        article.title  = soup.find('title').contents[0].strip()
    def postprocess_html(self, soup, first_fetch):
        for link_tag in soup.findAll(attrs={"id" : "story"}):
            link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
        return soup
--- a/recipes/irish_times.recipe
+++ b/recipes/irish_times.recipe
@ -18,6 +18,7 @@ class IrishTimes(BasicNewsRecipe):
    oldest_article = 1.0
    max_articles_per_feed  = 100
    no_stylesheets = True
    simultaneous_downloads= 5
    r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    remove_tags    = [dict(name='div', attrs={'class':'footer'})]
@ -25,17 +26,17 @@ class IrishTimes(BasicNewsRecipe):
    feeds          = [
                      ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
-                      ('Ireland', 'http://rss.feedsportal.com/c/851/f/10845/index.rss'),
+                      ('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
-                      ('World', 'http://rss.feedsportal.com/c/851/f/10846/index.rss'),
+                      ('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
-                      ('Finance', 'http://rss.feedsportal.com/c/851/f/10847/index.rss'),
+                      ('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
-                      ('Features', 'http://rss.feedsportal.com/c/851/f/10848/index.rss'),
+                      ('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
-                      ('Sport', 'http://rss.feedsportal.com/c/851/f/10849/index.rss'),
+                      ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
-                      ('Opinion', 'http://rss.feedsportal.com/c/851/f/10850/index.rss'),
+                      ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
-                      ('Letters', 'http://rss.feedsportal.com/c/851/f/10851/index.rss'),
+                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
                      ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
-                      ('Health', 'http://rss.feedsportal.com/c/851/f/10852/index.rss'),
+                      ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
-                      ('Education & Parenting', 'http://rss.feedsportal.com/c/851/f/10853/index.rss'),
+                      ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
-                      ('Motors', 'http://rss.feedsportal.com/c/851/f/10854/index.rss'),
+                      ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
                      ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
                      ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
                      ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
@ -49,10 +50,16 @@ class IrishTimes(BasicNewsRecipe):
    def print_version(self, url):
        if url.count('rss.feedsportal.com'):
-            u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
+            #u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
            u = url.find('irishtimes')
            u = 'http://www.irishtimes.com' + url[u + 12:]
            u = u.replace('0C', '/')
            u = u.replace('A', '')
            u = u.replace('0Bhtml/story01.htm', '_pf.html')
        else:
            u = url.replace('.html','_pf.html')
        return u
    def get_article_url(self, article):
        return article.link
--- a/recipes/national_geographic_pl.recipe
+++ b/recipes/national_geographic_pl.recipe
@ -0,0 +1,52 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = 'Marcin Urban 2011'
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class recipeMagic(BasicNewsRecipe):
    title                  = 'National Geographic PL'
    __author__             = 'Marcin Urban 2011'
    description            = 'legenda wśród magazynów z historią sięgającą 120 lat'
    cover_url      	       = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
    oldest_article         = 7
    max_articles_per_feed  = 100
    no_stylesheets         = True
    #delay                 = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'G+J Gruner+Jahr Polska'
    category               = 'news, PL,'
    language               = 'pl'
    publication_type       = 'newsportal'
    extra_css              = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
 	                    h1{text-align: center;}
        	 	           h2{font-size: medium; font-weight: bold;}
        	   	         .authordate {font-size: small; color: #696969;}
        		            p.lead {font-weight: bold; text-align: center;}
        		            .fot{font-size: x-small; color: #666666;} '''
    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                         }
    remove_tags 	= [
 			dict(name='div', attrs={'class':'add_inf'}),
 			dict(name='div', attrs={'class':'add_f'}),
                        ]
    remove_attributes = ['width','height']
    feeds          = [
                      ('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
                    ]
    def print_version(self, url):
        return url.replace('artykuly0Cpokaz', 'drukuj-artykul')
--- a/recipes/plus_info.recipe
+++ b/recipes/plus_info.recipe
@ -0,0 +1,47 @@
 #!/usr/bin/env python
 __author__    = 'Darko Spasovski'
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
 '''
 www.plusinfo.mk
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class PlusInfo(BasicNewsRecipe):
    INDEX                 = 'www.plusinfo.mk'
    title                 = u'+info'
    __author__ = 'Darko Spasovski'
    description           = 'Macedonian news portal'
    publication_type      = 'newsportal'
    category              = 'news, Macedonia'
    language              = 'mk'
    masthead_url          = 'http://www.plusinfo.mk/style/images/logo.jpg'
    remove_javascript     = True
    no_stylesheets        = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    oldest_article        = 1
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class': 'vest'})]
    remove_tags = [dict(name='div', attrs={'class':['komentari_holder', 'objava']})]
    feeds          = [(u'Македонија', u'http://www.plusinfo.mk/rss/makedonija'),
                      (u'Бизнис', u'http://www.plusinfo.mk/rss/biznis'),
                      (u'Скопје', u'http://www.plusinfo.mk/rss/skopje'),
                      (u'Култура', u'http://www.plusinfo.mk/rss/kultura'),
                      (u'Свет', u'http://www.plusinfo.mk/rss/svet'),
                      (u'Сцена', u'http://www.plusinfo.mk/rss/scena'),
                      (u'Здравје', u'http://www.plusinfo.mk/rss/zdravje'),
                      (u'Магазин', u'http://www.plusinfo.mk/rss/magazin'),
                      (u'Спорт', u'http://www.plusinfo.mk/rss/sport')]
    # uncomment the following block if you want the print version (note: it lacks photos)
 #    def print_version(self,url):
 #        segments = url.split('/')
 #        printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5:])
 #        return printURL
--- a/recipes/portafolio.recipe
+++ b/recipes/portafolio.recipe
@ -0,0 +1,36 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1311799898(BasicNewsRecipe):
    title          = u'Periódico Portafolio Colombia'
    language = 'es_CO'
    __author__  = 'BIGO-CAVA'
    cover_url     = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
    remove_tags_before = dict(id='contenidoArt')
    remove_tags_after  = [dict(name='div', attrs={'class':'articulo-mas'})]
    keep_only_tags = [dict(name='div', id='contenidoArt')]
    oldest_article = 1
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets        = True
    use_embedded_content  = False
    remove_empty_feeds    = True
    masthead_url          = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
    publication_type      = 'newspaper'
    extra_css             = """
                               p{text-align: justify; font-size: 100%}
                               body{ text-align: left; font-size:100% }
                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
                               h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
                                 """
    feeds          = [(u'Negocios', u'http://www.portafolio.co/negocios/feed'),
              (u'Economia', u'http://www.portafolio.co/economia/feed'),
              (u'Internacional', u'http://www.portafolio.co/internacional/feed'),
              (u'Indicadores', u'http://www.portafolio.co/indicadores/feed'),
              (u'Opinion', u'http://www.portafolio.co/opinion/feed'),
              (u'Finanzas Personales', u'http://www.portafolio.co/finanzas-personales/feed'),
              (u'Herramientas', u'http://www.portafolio.co/herramientas/feed')]
--- a/recipes/united_daily.recipe
+++ b/recipes/united_daily.recipe
@ -64,7 +64,7 @@ class UnitedDaily(BasicNewsRecipe):
    __author__ = 'Eddie Lau'
    __version__ = '1.1'
-    language = 'zh-TW'
+    language = 'zh_TW'
    publisher = 'United Daily News Group'
    description = 'United Daily (Taiwan)'
    category = 'News, Chinese, Taiwan'
--- a/recipes/utrinski.recipe
+++ b/recipes/utrinski.recipe
@ -0,0 +1,71 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
 '''
 utrinski.com.mk
 '''
 import re
 import datetime
 from calibre.web.feeds.news import BasicNewsRecipe
 class UtrinskiVesnik(BasicNewsRecipe):
    __author__            = 'Darko Spasovski'
    INDEX                 = 'http://www.utrinski.com.mk/'
    title                 = 'Utrinski Vesnik'
    description           = 'Daily Macedonian newspaper'
    masthead_url          = 'http://www.utrinski.com.mk/images/LogoTop.jpg'
    language              = 'mk'
    remove_javascript     = True
    publication_type      = 'newspaper'
    category              = 'news, Macedonia'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    preprocess_regexps    = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
    [
        ## Remove anything before the start of the article.
        (r'<body.*?Article start-->', lambda match: '<body>'),
        ## Remove anything after the end of the article.
        (r'<!--Article end.*?</body>', lambda match : '</body>'),
        ]
    ]
    extra_css             = """
                                body{font-family: Arial,Helvetica,sans-serif}
                                .WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
                            """
    conversion_options = {
                          'comment'  : description,
                          'tags'     : category,
                          'language' : language,
                          'linearize_tables' : True
                        }
    def parse_index(self):
        soup = self.index_to_soup(self.INDEX)
        feeds = []
        for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_TOCTitleBig'}):
            sectionTitle = section.contents[0].string
            tocItemTable = section.findAllPrevious('table')[1]
            if tocItemTable is None: continue
            articles = []
            while True:
                tocItemTable = tocItemTable.nextSibling
                if tocItemTable is None: break
                article = tocItemTable.findAll('a', attrs={'class': 'WB_UTRINSKIVESNIK_TocItem'})
                if len(article)==0: break
                title = self.tag_to_string(article[0], use_alt=True).strip()
                articles.append({'title': title, 'url':'http://www.utrinski.com.mk/' + article[0]['href'], 'description':'', 'date':''})
            if articles:
                feeds.append((sectionTitle, articles))
        return feeds
    def get_cover_url(self):
        datum = datetime.datetime.today().strftime('%d_%m_%Y')
        return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -11,7 +11,7 @@ defaults.
 '''
 #: Auto increment series index
-# The algorithm used to assign a new book in an existing series a series number.
+# The algorithm used to assign a book added to an existing series a series number.
 # New series numbers assigned using this tweak are always integer values, except
 # if a constant non-integer is specified.
 # Possible values are:
@ -27,7 +27,19 @@ defaults.
 # series_index_auto_increment = 'next'
 # series_index_auto_increment = 'next_free'
 # series_index_auto_increment = 16.5
 #
 # Set the use_series_auto_increment_tweak_when_importing tweak to True to
 # use the above values when importing/adding books. If this tweak is set to
 # False (the default) then the series number will be set to 1 if it is not
 # explicitly set to during the import. If set to True, then the
 # series index will be set according to the series_index_auto_increment setting.
 # Note that the use_series_auto_increment_tweak_when_importing tweak is used
 # only when a value is not provided during import. If the importing regular
 # expression produces a value for series_index, or if you are reading metadata
 # from books and the import plugin produces a value, than that value will
 # be used irrespective of the setting of the tweak.
 series_index_auto_increment = 'next'
 use_series_auto_increment_tweak_when_importing = False
 #: Add separator after completing an author name
 # Should the completion separator be append
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -373,7 +373,7 @@ class Win32Freeze(Command, WixMixIn):
        src = self.j(self.src_root, 'setup', 'installer', 'windows',
                'portable.c')
        obj = self.j(self.obj_dir, self.b(src)+'.obj')
-        cflags  = '/c /EHsc /MT /W3 /Ox /nologo /D_UNICODE'.split()
+        cflags  = '/c /EHsc /MT /W3 /Ox /nologo /D_UNICODE /DUNICODE'.split()
        if self.newer(obj, [src]):
            self.info('Compiling', obj)
@ -386,6 +386,7 @@ class Win32Freeze(Command, WixMixIn):
            cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
                    '/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
                    '/RELEASE',
                    '/ENTRY:wWinMainCRTStartup',
                    '/OUT:'+exe, self.embed_resources(exe),
                    obj, 'User32.lib']
            self.run_builder(cmd)
--- a/setup/installer/windows/portable.c
+++ b/setup/installer/windows/portable.c
@ -2,15 +2,21 @@
 #define UNICODE
 #endif 
 #ifndef _UNICODE
 #define _UNICODE
 #endif 
 #include <windows.h>
 #include <tchar.h>
 #include <wchar.h>
 #include <stdio.h>
 #define BUFSIZE 4096
 void show_error(LPCTSTR msg) {
    MessageBeep(MB_ICONERROR);
-    MessageBox(NULL, msg, TEXT("Error"), MB_OK|MB_ICONERROR);
+    MessageBox(NULL, msg, _T("Error"), MB_OK|MB_ICONERROR);
 }
 void show_detailed_error(LPCTSTR preamble, LPCTSTR msg, int code) {
@ -20,7 +26,7 @@ void show_detailed_error(LPCTSTR preamble, LPCTSTR msg, int code) {
    _sntprintf_s(buf, 
        LocalSize(buf) / sizeof(TCHAR), _TRUNCATE,
-        TEXT("%s\r\n  %s (Error Code: %d)\r\n"), 
+        _T("%s\r\n  %s (Error Code: %d)\r\n"), 
        preamble, msg, code);
    show_error(buf);
@ -32,7 +38,7 @@ void show_last_error_crt(LPCTSTR preamble) {
    int err = 0;
    _get_errno(&err);
-    _wcserror_s(buf, BUFSIZE, err);
+    _tcserror_s(buf, BUFSIZE, err);
    show_detailed_error(preamble, buf, err);
 }
@ -57,7 +63,7 @@ void show_last_error(LPCTSTR preamble) {
 LPTSTR get_app_dir() {
    LPTSTR buf, buf2, buf3;
    DWORD sz;
-    TCHAR drive[4] = TEXT("\0\0\0");
+    TCHAR drive[4] = _T("\0\0\0");
    errno_t err;
    buf = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
@ -67,18 +73,18 @@ LPTSTR get_app_dir() {
    sz = GetModuleFileName(NULL, buf, BUFSIZE);
    if (sz == 0 || sz > BUFSIZE-1) {
-        show_error(TEXT("Failed to get path to calibre-portable.exe"));
+        show_error(_T("Failed to get path to calibre-portable.exe"));
        ExitProcess(1);
    }
    err = _tsplitpath_s(buf, drive, 4, buf2, BUFSIZE, NULL, 0, NULL, 0);
    if (err != 0) {
-        show_last_error_crt(TEXT("Failed to split path to calibre-portable.exe"));
+        show_last_error_crt(_T("Failed to split path to calibre-portable.exe"));
        ExitProcess(1);
    }
-    _sntprintf_s(buf3, BUFSIZE-1, _TRUNCATE, TEXT("%s%s"), drive, buf2);
+    _sntprintf_s(buf3, BUFSIZE-1, _TRUNCATE, _T("%s%s"), drive, buf2);
    free(buf); free(buf2);
    return buf3;
 }
@ -90,18 +96,18 @@ void launch_calibre(LPCTSTR exe, LPCTSTR config_dir, LPCTSTR library_dir) {
    BOOL fSuccess; 
    TCHAR cmdline[BUFSIZE];
-    if (! SetEnvironmentVariable(TEXT("CALIBRE_CONFIG_DIRECTORY"), config_dir)) {
+    if (! SetEnvironmentVariable(_T("CALIBRE_CONFIG_DIRECTORY"), config_dir)) {
-        show_last_error(TEXT("Failed to set environment variables"));
+        show_last_error(_T("Failed to set environment variables"));
        ExitProcess(1);
    }
-    if (! SetEnvironmentVariable(TEXT("CALIBRE_PORTABLE_BUILD"), exe)) {
+    if (! SetEnvironmentVariable(_T("CALIBRE_PORTABLE_BUILD"), exe)) {
-        show_last_error(TEXT("Failed to set environment variables"));
+        show_last_error(_T("Failed to set environment variables"));
        ExitProcess(1);
    }
    dwFlags = CREATE_UNICODE_ENVIRONMENT | CREATE_NEW_PROCESS_GROUP;
-    _sntprintf_s(cmdline, BUFSIZE, _TRUNCATE, TEXT(" \"--with-library=%s\""), library_dir);
+    _sntprintf_s(cmdline, BUFSIZE, _TRUNCATE, _T(" \"--with-library=%s\""), library_dir);
    ZeroMemory( &si, sizeof(si) );
    si.cb = sizeof(si);
@ -119,7 +125,7 @@ void launch_calibre(LPCTSTR exe, LPCTSTR config_dir, LPCTSTR library_dir) {
    );
    if (fSuccess == 0) {
-        show_last_error(TEXT("Failed to launch the calibre program"));
+        show_last_error(_T("Failed to launch the calibre program"));
    }
    // Close process and thread handles.
@ -137,9 +143,9 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PWSTR pCmdLine
    library_dir = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
    exe = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
-    _sntprintf_s(config_dir, BUFSIZE, _TRUNCATE, TEXT("%sCalibre Settings"), app_dir);
+    _sntprintf_s(config_dir, BUFSIZE, _TRUNCATE, _T("%sCalibre Settings"), app_dir);
-    _sntprintf_s(exe, BUFSIZE, _TRUNCATE, TEXT("%sCalibre\\calibre.exe"), app_dir);
+    _sntprintf_s(exe, BUFSIZE, _TRUNCATE, _T("%sCalibre\\calibre.exe"), app_dir);
-    _sntprintf_s(library_dir, BUFSIZE, _TRUNCATE, TEXT("%sCalibre Library"), app_dir);
+    _sntprintf_s(library_dir, BUFSIZE, _TRUNCATE, _T("%sCalibre Library"), app_dir);
    launch_calibre(exe, config_dir, library_dir);
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -353,9 +353,14 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
    if user_agent is None:
        user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
    opener.addheaders = [('User-agent', user_agent)]
-    http_proxy = get_proxies().get('http', None)
+    proxies = get_proxies()
    http_proxy = proxies.get('http', None)
    if http_proxy:
        opener.set_proxies({'http':http_proxy})
    https_proxy = proxies.get('https', None)
    if https_proxy:
        opener.set_proxies({'https':https_proxy})
    return opener
 def fit_image(width, height, pwidth, pheight):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 10)
+numeric_version = (0, 8, 12)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -570,7 +570,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
-        TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK)
+        TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY)
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
@ -705,7 +705,7 @@ plugins += [
    EEEREADER,
    NEXTBOOK,
    ADAM,
-    MOOVYBOOK,
+    MOOVYBOOK, COBY,
    ITUNES,
    BOEYE_BEX,
    BOEYE_BDX,
@ -1228,17 +1228,6 @@ class StoreEbookscomStore(StoreBase):
    formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
    affiliate = True
 #class StoreEPubBuyDEStore(StoreBase):
 #    name = 'EPUBBuy DE'
 #    author = 'Charles Haley'
 #    description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
 #    actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore'
 #
 #    drm_free_only = True
 #    headquarters = 'DE'
 #    formats = ['EPUB']
 #    affiliate = True
 class StoreEBookShoppeUKStore(StoreBase):
    name = 'ebookShoppe UK'
    author = u'Charles Haley'
@ -1258,14 +1247,15 @@ class StoreEHarlequinStore(StoreBase):
    formats = ['EPUB', 'PDF']
    affiliate = True
-class StoreEpubBudStore(StoreBase):
+class StoreEKnigiStore(StoreBase):
-    name = 'ePub Bud'
+    name = u'еКниги'
-    description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.'
+    author = 'Alex Stanev'
-    actual_plugin = 'calibre.gui2.store.stores.epubbud_plugin:EpubBudStore'
+    description = u'Онлайн книжарница за електронни книги и аудио риалити романи'
    actual_plugin = 'calibre.gui2.store.stores.eknigi_plugin:eKnigiStore'
-    drm_free_only = True
+    headquarters = 'BG'
-    headquarters = 'US'
+    formats = ['EPUB', 'PDF', 'HTML']
-    formats = ['EPUB']
+    affiliate = True
 class StoreFeedbooksStore(StoreBase):
    name = 'Feedbooks'
@ -1301,6 +1291,7 @@ class StoreGoogleBooksStore(StoreBase):
    headquarters = 'US'
    formats = ['EPUB', 'PDF', 'TXT']
    affiliate = True
 class StoreGutenbergStore(StoreBase):
    name = 'Project Gutenberg'
@ -1384,6 +1375,17 @@ class StoreOReillyStore(StoreBase):
    headquarters = 'US'
    formats = ['APK', 'DAISY', 'EPUB', 'MOBI', 'PDF']
 class StoreOzonRUStore(StoreBase):
    name = 'OZON.ru'
    description = u'ebooks from OZON.ru'
    actual_plugin = 'calibre.gui2.store.stores.ozon_ru_plugin:OzonRUStore'
    author = 'Roman Mukhin'
    drm_free_only = True
    headquarters = 'RU'
    formats = ['TXT', 'PDF', 'DJVU', 'RTF', 'DOC', 'JAR', 'FB2']
    affiliate = True
 class StorePragmaticBookshelfStore(StoreBase):
    name = 'Pragmatic Bookshelf'
    description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
@ -1481,9 +1483,8 @@ plugins += [
    StoreEbookNLStore,
    StoreEbookscomStore,
    StoreEBookShoppeUKStore,
 #    StoreEPubBuyDEStore,
    StoreEHarlequinStore,
-    StoreEpubBudStore,
+    StoreEKnigiStore,
    StoreFeedbooksStore,
    StoreFoylesUKStore,
    StoreGandalfStore,
@ -1497,6 +1498,7 @@ plugins += [
    StoreNextoStore,
    StoreOpenBooksStore,
    StoreOReillyStore,
    StoreOzonRUStore,
    StorePragmaticBookshelfStore,
    StoreSmashwordsStore,
    StoreVirtualoStore,
--- a/src/calibre/db/tables.py
+++ b/src/calibre/db/tables.py
@ -12,7 +12,7 @@ from datetime import datetime
 from dateutil.tz import tzoffset
 from calibre.constants import plugins
-from calibre.utils.date import parse_date, local_tz
+from calibre.utils.date import parse_date, local_tz, UNDEFINED_DATE
 from calibre.ebooks.metadata import author_to_author_sort
 _c_speedup = plugins['speedup'][0]
@ -29,8 +29,11 @@ def _c_convert_timestamp(val):
    if ret is None:
        return parse_date(val, as_utc=False)
    year, month, day, hour, minutes, seconds, tzsecs = ret
    try:
        return datetime(year, month, day, hour, minutes, seconds,
                tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
    except OverflowError:
        return UNDEFINED_DATE.astimezone(local_tz)
 class Table(object):
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -47,10 +47,12 @@ class ANDROID(USBMS):
            # Google
            0x18d1 : {
                0x0001 : [0x0223],
                0x4e11 : [0x0100, 0x226, 0x227],
                0x4e12 : [0x0100, 0x226, 0x227],
                0x4e21 : [0x0100, 0x226, 0x227],
-                0xb058: [0x0222, 0x226, 0x227]},
+                0xb058 : [0x0222, 0x226, 0x227]
            },
            # Samsung
            0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
@ -126,7 +128,7 @@ class ANDROID(USBMS):
            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
            'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
-            'GT-S5830_CARD']
+            'GT-S5830_CARD', 'GT-S5570_CARD']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -35,9 +35,9 @@ class EB600(USBMS):
    PRODUCT_ID  = [0x1688]
    BCD         = [0x110]
-    VENDOR_NAME      = ['NETRONIX', 'WOLDER']
+    VENDOR_NAME      = ['NETRONIX', 'WOLDER', 'MD86371']
-    WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2']
+    WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2', 'MD86371']
-    WINDOWS_CARD_A_MEM = 'EBOOK'
+    WINDOWS_CARD_A_MEM = ['EBOOK', 'MD86371']
    OSX_MAIN_MEM = 'EB600 Internal Storage Media'
    OSX_CARD_A_MEM = 'EB600 Card Storage Media'
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -351,3 +351,29 @@ class MOOVYBOOK(USBMS):
    def get_main_ebook_dir(self, for_upload=False):
        return 'Books' if for_upload else self.EBOOK_DIR_MAIN
 class COBY(USBMS):
    name           = 'COBY MP977 device interface'
    gui_name       = 'COBY'
    description    = _('Communicate with the COBY')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    # Ordered list of supported formats
    FORMATS     = ['epub', 'pdf']
    VENDOR_ID   = [0x1e74]
    PRODUCT_ID  = [0x7121]
    BCD         = [0x02]
    VENDOR_NAME = 'USB_2.0'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MP977_DRIVER'
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = False
    def get_carda_ebook_dir(self, for_upload=False):
        if for_upload:
            return 'eBooks'
        return self.EBOOK_DIR_CARD_A
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -210,12 +210,13 @@ def add_pipeline_options(parser, plumber):
            if rec.level < rec.HIGH:
                option_recommendation_to_cli_option(add_option, rec)
    parser.add_option('--list-recipes', default=False, action='store_true',
            help=_('List builtin recipes'))
 def option_parser():
-    return OptionParser(usage=USAGE)
+    parser = OptionParser(usage=USAGE)
-
+    parser.add_option('--list-recipes', default=False, action='store_true',
            help=_('List builtin recipe names. You can create an ebook from '
                'a builtin recipe like this: ebook-convert "Recipe Name.recipe" '
                'output.epub'))
    return parser
 class ProgressBar(object):
--- a/src/calibre/ebooks/metadata/fb2.py
+++ b/src/calibre/ebooks/metadata/fb2.py
@ -24,10 +24,9 @@ XPath = partial(etree.XPath, namespaces=NAMESPACES)
 tostring = partial(etree.tostring, method='text', encoding=unicode)
 def get_metadata(stream):
-    """ Return fb2 metadata as a L{MetaInformation} object """
+    ''' Return fb2 metadata as a L{MetaInformation} object '''
    root = _get_fbroot(stream)
    book_title = _parse_book_title(root)
    authors = _parse_authors(root)
@ -181,6 +180,7 @@ def _parse_series(root, mi):
 def _parse_isbn(root, mi):
    # some people try to put several isbn in this field, but it is not allowed.  try to stick to the 1-st one in this case
    isbn = XPath('normalize-space(//fb2:publish-info/fb2:isbn/text())')(root)
    if isbn:
        # some people try to put several isbn in this field, but it is not allowed.  try to stick to the 1-st one in this case
        if ',' in isbn:
            isbn = isbn[:isbn.index(',')]
@ -232,4 +232,3 @@ def _get_fbroot(stream):
    raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
    root = etree.fromstring(raw, parser=parser)
    return root
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -22,6 +22,7 @@ from calibre.utils.date import parse_date, isoformat
 from calibre.utils.localization import get_lang
 from calibre import prints, guess_type
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.utils.config import tweaks
 class Resource(object): # {{{
    '''
@ -527,7 +528,12 @@ class OPF(object): # {{{
    category        = MetadataField('type')
    rights          = MetadataField('rights')
    series          = MetadataField('series', is_dc=False)
-    series_index    = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
+    if tweaks['use_series_auto_increment_tweak_when_importing']:
        series_index    = MetadataField('series_index', is_dc=False,
                                        formatter=float, none_is=None)
    else:
        series_index    = MetadataField('series_index', is_dc=False,
                                        formatter=float, none_is=1)
    title_sort      = TitleSortField('title_sort', is_dc=False)
    rating          = MetadataField('rating', is_dc=False, formatter=int)
    pubdate         = MetadataField('date', formatter=parse_date,
@ -1024,8 +1030,10 @@ class OPF(object): # {{{
            attrib = attrib or {}
            attrib['name'] = 'calibre:' + name
            name = '{%s}%s' % (self.NAMESPACES['opf'], 'meta')
        nsmap = dict(self.NAMESPACES)
        del nsmap['opf']
        elem = etree.SubElement(self.metadata, name, attrib=attrib,
-                                nsmap=self.NAMESPACES)
+                                nsmap=nsmap)
        elem.tail = '\n'
        return elem
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -22,6 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import utc_tz, as_utc
 from calibre.utils.html2text import html2text
 from calibre.utils.icu import lower
 from calibre.utils.date import UNDEFINED_DATE
 # Download worker {{{
 class Worker(Thread):
@ -490,6 +491,8 @@ def identify(log, abort, # {{{
    max_tags = msprefs['max_tags']
    for r in results:
        r.tags = r.tags[:max_tags]
        if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year:
            r.pubdate = None
    if msprefs['swap_author_names']:
        for r in results:
--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -151,7 +151,7 @@ class ISBNDB(Source):
        bl = feed.find('BookList')
        if bl is None:
-            err = tostring(etree.find('errormessage'))
+            err = tostring(feed.find('errormessage'))
            raise ValueError('ISBNDb query failed:' + err)
        total_results = int(bl.get('total_results'))
        shown_results = int(bl.get('shown_results'))
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -8,11 +8,14 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import struct, datetime, sys, os, shutil
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from lxml import html
 from calibre.utils.date import utc_tz
 from calibre.ebooks.mobi.langcodes import main_language, sub_language
-from calibre.ebooks.mobi.writer2.utils import (decode_hex_number, decint,
+from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
-        get_trailing_data)
+        get_trailing_data, decode_tbs)
 from calibre.utils.magick.draw import identify_data
 # PalmDB {{{
@ -73,7 +76,7 @@ class PalmDB(object):
        self.ident = self.type + self.creator
        if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
            raise ValueError('Unknown book ident: %r'%self.ident)
-        self.uid_seed = self.raw[68:72]
+        self.uid_seed, = struct.unpack(b'>I', self.raw[68:72])
        self.next_rec_list_id = self.raw[72:76]
        self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
@ -182,6 +185,7 @@ class EXTHHeader(object):
        self.records = []
        for i in xrange(self.count):
            pos = self.read_record(pos)
        self.records.sort(key=lambda x:x.type)
    def read_record(self, pos):
        type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
@ -214,10 +218,11 @@ class MOBIHeader(object): # {{{
        self.number_of_text_records, self.text_record_size = \
                struct.unpack(b'>HH', self.raw[8:12])
        self.encryption_type_raw, = struct.unpack(b'>H', self.raw[12:14])
-        self.encryption_type = {0: 'No encryption',
+        self.encryption_type = {
                0: 'No encryption',
                1: 'Old mobipocket encryption',
-                2:'Mobipocket encryption'}.get(self.encryption_type_raw,
+                2: 'Mobipocket encryption'
-                repr(self.encryption_type_raw))
+            }.get(self.encryption_type_raw, repr(self.encryption_type_raw))
        self.unknown = self.raw[14:16]
        self.identifier = self.raw[16:20]
@ -289,7 +294,12 @@ class MOBIHeader(object): # {{{
            (self.fcis_number, self.fcis_count, self.flis_number,
                    self.flis_count) = struct.unpack(b'>IIII',
                            self.raw[200:216])
-            self.unknown6 = self.raw[216:240]
+            self.unknown6 = self.raw[216:224]
            self.srcs_record_index = struct.unpack(b'>I',
                self.raw[224:228])[0]
            self.num_srcs_records = struct.unpack(b'>I',
                self.raw[228:232])[0]
            self.unknown7 = self.raw[232:240]
            self.extra_data_flags = struct.unpack(b'>I',
                self.raw[240:244])[0]
            self.has_multibytes = bool(self.extra_data_flags & 0b1)
@ -338,7 +348,7 @@ class MOBIHeader(object): # {{{
        ans.append('Huffman record offset: %d'%self.huffman_record_offset)
        ans.append('Huffman record count: %d'%self.huffman_record_count)
        ans.append('Unknown2: %r'%self.unknown2)
-        ans.append('EXTH flags: %r (%s)'%(self.exth_flags, self.has_exth))
+        ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
        if self.has_drm_data:
            ans.append('Unknown3: %r'%self.unknown3)
            ans.append('DRM Offset: %s'%self.drm_offset)
@ -355,6 +365,9 @@ class MOBIHeader(object): # {{{
            ans.append('FLIS number: %d'% self.flis_number)
            ans.append('FLIS count: %d'% self.flis_count)
            ans.append('Unknown6: %r'% self.unknown6)
            ans.append('SRCS record index: %d'%self.srcs_record_index)
            ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
            ans.append('Unknown7: %r'%self.unknown7)
            ans.append(('Extra data flags: %s (has multibyte: %s) '
                '(has indexing: %s) (has uncrossable breaks: %s)')%(
                    bin(self.extra_data_flags), self.has_multibytes,
@ -398,6 +411,7 @@ class IndexHeader(object): # {{{
    def __init__(self, record):
        self.record = record
        raw = self.record.raw
        #open('/t/index_header.bin', 'wb').write(raw)
        if raw[:4] != b'INDX':
            raise ValueError('Invalid Primary Index Record')
@ -405,7 +419,7 @@ class IndexHeader(object): # {{{
        self.unknown1 = raw[8:16]
        self.index_type, = struct.unpack('>I', raw[16:20])
        self.index_type_desc = {0: 'normal', 2:
-                'inflection'}.get(self.index_type, 'unknown')
+                'inflection', 6: 'calibre'}.get(self.index_type, 'unknown')
        self.idxt_start, = struct.unpack('>I', raw[20:24])
        self.index_count, = struct.unpack('>I', raw[24:28])
        self.index_encoding_num, = struct.unpack('>I', raw[28:32])
@ -414,12 +428,7 @@ class IndexHeader(object): # {{{
        if self.index_encoding == 'unknown':
            raise ValueError(
                'Unknown index encoding: %d'%self.index_encoding_num)
-        self.locale_raw, = struct.unpack(b'>I', raw[32:36])
+        self.possibly_language = raw[32:36]
        langcode = self.locale_raw
        langid    = langcode & 0xFF
        sublangid = (langcode >> 10) & 0xFF
        self.language = main_language.get(langid, 'ENGLISH')
        self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
        self.num_index_entries, = struct.unpack('>I', raw[36:40])
        self.ordt_start, = struct.unpack('>I', raw[40:44])
        self.ligt_start, = struct.unpack('>I', raw[44:48])
@ -479,8 +488,7 @@ class IndexHeader(object): # {{{
        a('Number of index records: %d'%self.index_count)
        a('Index encoding: %s (%d)'%(self.index_encoding,
                self.index_encoding_num))
-        a('Index language: %s - %s (%s)'%(self.language, self.sublanguage,
+        a('Unknown (possibly language?): %r'%(self.possibly_language))
            hex(self.locale_raw)))
        a('Number of index entries: %d'% self.num_index_entries)
        a('ORDT start: %d'%self.ordt_start)
        a('LIGT start: %d'%self.ligt_start)
@ -530,21 +538,21 @@ class Tag(object): # {{{
            },
            'chapter_with_subchapters' : {
-                    22 : ('First subchapter index', 'first_subchapter_index'),
+                    22 : ('First subchapter index', 'first_child_index'),
-                    23 : ('Last subchapter index', 'last_subchapter_index'),
+                    23 : ('Last subchapter index', 'last_child_index'),
            },
            'periodical' : {
                    5  : ('Class offset in cncx', 'class_offset'),
-                    22 : ('First section index', 'first_section_index'),
+                    22 : ('First section index', 'first_child_index'),
-                    23 : ('Last section index', 'last_section_index'),
+                    23 : ('Last section index', 'last_child_index'),
            },
            'section' : {
                    5  : ('Class offset in cncx', 'class_offset'),
-                    21 : ('Periodical index', 'periodical_index'),
+                    21 : ('Periodical index', 'parent_index'),
-                    22 : ('First article index', 'first_article_index'),
+                    22 : ('First article index', 'first_child_index'),
-                    23 : ('Last article index', 'last_article_index'),
+                    23 : ('Last article index', 'last_child_index'),
            },
    }
@ -595,10 +603,14 @@ class IndexEntry(object): # {{{
            0x3f : 'article',
    }
-    def __init__(self, ident, entry_type, raw, cncx, tagx_entries):
+    def __init__(self, ident, entry_type, raw, cncx, tagx_entries, flags=0):
        self.index = ident
        self.raw = raw
        self.tags = []
        self.entry_type_raw = entry_type
        self.byte_size = len(raw)
        orig_raw = raw
        try:
            self.entry_type = self.TYPES[entry_type]
@ -618,6 +630,27 @@ class IndexEntry(object): # {{{
                vals.append(val)
            self.tags.append(Tag(tag, vals, self.entry_type, cncx))
        if flags & 0b10:
            # Look for optional description and author
            desc_tag = [t for t in tagx_entries if t.tag == 22]
            if desc_tag and raw:
                val, consumed = decint(raw)
                raw = raw[consumed:]
                if val:
                    self.tags.append(Tag(desc_tag[0], [val], self.entry_type,
                        cncx))
        if flags & 0b100:
            aut_tag = [t for t in tagx_entries if t.tag == 23]
            if aut_tag and raw:
                val, consumed = decint(raw)
                raw = raw[consumed:]
                if val:
                    self.tags.append(Tag(aut_tag[0], [val], self.entry_type,
                        cncx))
        self.consumed = len(orig_raw) - len(raw)
        self.trailing_bytes = raw
    @property
    def label(self):
        for tag in self.tags:
@ -625,11 +658,59 @@ class IndexEntry(object): # {{{
                return tag.cncx_value
        return ''
    @property
    def offset(self):
        for tag in self.tags:
            if tag.attr == 'offset':
                return tag.value
        return 0
    @property
    def size(self):
        for tag in self.tags:
            if tag.attr == 'size':
                return tag.value
        return 0
    @property
    def depth(self):
        for tag in self.tags:
            if tag.attr == 'depth':
                return tag.value
        return 0
    @property
    def parent_index(self):
        for tag in self.tags:
            if tag.attr == 'parent_index':
                return tag.value
        return -1
    @property
    def first_child_index(self):
        for tag in self.tags:
            if tag.attr == 'first_child_index':
                return tag.value
        return -1
    @property
    def last_child_index(self):
        for tag in self.tags:
            if tag.attr == 'last_child_index':
                return tag.value
        return -1
    def __str__(self):
-        ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%(
+        ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d, byte_size=%d)'%(
-            self.index, self.entry_type, len(self.tags))]
+            self.index, self.entry_type, bin(self.entry_type_raw)[2:],
            len(self.tags), self.byte_size)]
        for tag in self.tags:
            ans.append('\t'+str(tag))
        if self.first_child_index != -1:
            ans.append('\tNumber of children: %d'%(self.last_child_index -
                self.first_child_index + 1))
        if self.trailing_bytes:
            ans.append('\tTrailing bytes: %r'%self.trailing_bytes)
        return '\n'.join(ans)
 # }}}
@ -644,6 +725,7 @@ class IndexRecord(object): # {{{
    def __init__(self, record, index_header, cncx):
        self.record = record
        raw = self.record.raw
        if raw[:4] != b'INDX':
            raise ValueError('Invalid Primary Index Record')
@ -667,8 +749,12 @@ class IndexRecord(object): # {{{
        for i in range(self.idxt_count):
            off, = u(b'>H', indices[i*2:(i+1)*2])
            self.index_offsets.append(off-192)
        rest = indices[(i+1)*2:]
        if rest.replace(b'\0', ''): # There can be padding null bytes
            raise ValueError('Extra bytes after IDXT table: %r'%rest)
        indxt = raw[192:self.idxt_offset]
        self.size_of_indxt_block = len(indxt)
        self.indices = []
        for i, off in enumerate(self.index_offsets):
            try:
@ -677,8 +763,26 @@ class IndexRecord(object): # {{{
                next_off = len(indxt)
            index, consumed = decode_hex_number(indxt[off:])
            entry_type = ord(indxt[off+consumed])
            d, flags = 1, 0
            if index_header.index_type == 6:
                flags = ord(indxt[off+consumed+d])
                d += 1
            pos = off+consumed+d
            self.indices.append(IndexEntry(index, entry_type,
-                indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries))
+                indxt[pos:next_off], cncx,
                index_header.tagx_entries, flags=flags))
        rest = indxt[pos+self.indices[-1].consumed:]
        if rest.replace(b'\0', ''): # There can be padding null bytes
            raise ValueError('Extra bytes after IDXT table: %r'%rest)
    def get_parent(self, index):
        if index.depth < 1:
            return None
        parent_depth = index.depth - 1
        for p in self.indices:
            if p.depth != parent_depth:
                continue
    def __str__(self):
@ -689,14 +793,15 @@ class IndexRecord(object): # {{{
                len(w), not bool(w.replace(b'\0', b'')) ))
        a('Header length: %d'%self.header_length)
        u(self.unknown1)
-        a('Header Type: %d'%self.header_type)
+        a('Unknown (header type? index record number? always 1?): %d'%self.header_type)
        u(self.unknown2)
-        a('IDXT Offset: %d'%self.idxt_offset)
+        a('IDXT Offset (%d block size): %d'%(self.size_of_indxt_block,
            self.idxt_offset))
        a('IDXT Count: %d'%self.idxt_count)
        u(self.unknown3)
        u(self.unknown4)
        a('Index offsets: %r'%self.index_offsets)
-        a('\nIndex Entries:')
+        a('\nIndex Entries (%d entries):'%len(self.indices))
        for entry in self.indices:
            a(str(entry)+'\n')
@ -714,15 +819,17 @@ class CNCX(object) : # {{{
    def __init__(self, records, codec):
        self.records = OrderedDict()
-        pos = 0
+        record_offset = 0
        for record in records:
            raw = record.raw
            pos = 0
            while pos < len(raw):
                length, consumed = decint(raw[pos:])
                if length > 0:
-                    self.records[pos] = raw[pos+consumed:pos+consumed+length].decode(
+                    self.records[pos+record_offset] = raw[
-                        codec)
+                            pos+consumed:pos+consumed+length].decode(codec)
                pos += consumed+length
            record_offset += 0x10000
    def __getitem__(self, offset):
        return self.records.get(offset)
@ -738,9 +845,9 @@ class CNCX(object) : # {{{
 class TextRecord(object): # {{{
-    def __init__(self, idx, record, extra_data_flags, decompress, index_record,
+    def __init__(self, idx, record, extra_data_flags, decompress):
            doc_type):
        self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
        raw_trailing_bytes = record.raw[len(self.raw):]
        self.raw = decompress(self.raw)
        if 0 in self.trailing_data:
            self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
@ -748,63 +855,10 @@ class TextRecord(object): # {{{
            self.trailing_data['indexing'] = self.trailing_data.pop(1)
        if 2 in self.trailing_data:
            self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
        self.trailing_data['raw_bytes'] = raw_trailing_bytes
        self.idx = idx
        if 'indexing' in self.trailing_data and index_record is not None:
            self.interpret_indexing(doc_type, index_record.indices)
    def interpret_indexing(self, doc_type, indices):
        raw = self.trailing_data['indexing']
        ident, consumed = decint(raw)
        raw = raw[consumed:]
        entry_type = ident & 0b111
        index_entry_idx = ident >> 3
        index_entry = None
        for i in indices:
            if i.index == index_entry_idx:
                index_entry = i.label
                break
        self.trailing_data['interpreted_indexing'] = (
                'Type: %s, Index Entry: %s'%(entry_type, index_entry))
        if doc_type == 2: # Book
            self.interpret_book_indexing(raw, entry_type)
    def interpret_book_indexing(self, raw, entry_type):
        arg1, consumed = decint(raw)
        raw = raw[consumed:]
        if arg1 != 0:
            raise ValueError('TBS index entry has unknown arg1: %d'%
                    arg1)
        if entry_type == 2:
            desc = ('This record has only a single starting or a single'
                    ' ending point')
            if raw:
                raise ValueError('TBS index entry has unknown extra bytes:'
                        ' %r'%raw)
        elif entry_type == 3:
            desc = ('This record is spanned by a single node (i.e. it'
                    ' has no start or end points)')
            arg2, consumed = decint(raw)
            if arg2 != 0:
                raise ValueError('TBS index entry has unknown arg2: %d'%
                        arg2)
        elif entry_type == 6:
            if len(raw) != 1:
                raise ValueError('TBS index entry has unknown extra bytes:'
                        ' %r'%raw)
            num = ord(raw[0])
            # An unmatched starting or ending point each contributes 1 to
            # this count. A matched pair of starting and ending points
            # together contribute 1 to this count. Note that you can only
            # ever have either 1 unmatched start point or 1 unmatched end
            # point, never both (logically impossible).
            desc = ('This record has %d starting/ending points and/or complete'
                    ' nodes.')%num
        else:
            raise ValueError('Unknown TBS index entry type: %d for book'%entry_type)
        self.trailing_data['interpreted_indexing'] += ' :: ' + desc
    def dump(self, folder):
        name = '%06d'%self.idx
        with open(os.path.join(folder, name+'.txt'), 'wb') as f:
@ -848,6 +902,189 @@ class BinaryRecord(object): # {{{
 # }}}
 class TBSIndexing(object): # {{{
    def __init__(self, text_records, indices, doc_type):
        self.record_indices = OrderedDict()
        self.doc_type = doc_type
        self.indices = indices
        pos = 0
        for r in text_records:
            start = pos
            pos += len(r.raw)
            end = pos - 1
            self.record_indices[r] = x = {'starts':[], 'ends':[],
                    'complete':[], 'geom': (start, end)}
            for entry in indices:
                istart, sz = entry.offset, entry.size
                iend = istart + sz - 1
                has_start = istart >= start and istart <= end
                has_end = iend >= start and iend <= end
                rec = None
                if has_start and has_end:
                    rec = 'complete'
                elif has_start and not has_end:
                    rec = 'starts'
                elif not has_start and has_end:
                    rec = 'ends'
                if rec:
                    x[rec].append(entry)
    def get_index(self, idx):
        for i in self.indices:
            if i.index == idx: return i
        raise IndexError('Index %d not found'%idx)
    def __str__(self):
        ans = ['*'*20 + ' TBS Indexing (%d records) '%len(self.record_indices)+ '*'*20]
        for r, dat in self.record_indices.iteritems():
            ans += self.dump_record(r, dat)[-1]
        return '\n'.join(ans)
    def dump(self, bdir):
        types = defaultdict(list)
        for r, dat in self.record_indices.iteritems():
            tbs_type, strings = self.dump_record(r, dat)
            if tbs_type == 0: continue
            types[tbs_type] += strings
        for typ, strings in types.iteritems():
            with open(os.path.join(bdir, 'tbs_type_%d.txt'%typ), 'wb') as f:
                f.write('\n'.join(strings))
    def dump_record(self, r, dat):
        ans = []
        ans.append('\nRecord #%d: Starts at: %d Ends at: %d'%(r.idx,
            dat['geom'][0], dat['geom'][1]))
        s, e, c = dat['starts'], dat['ends'], dat['complete']
        ans.append(('\tContains: %d index entries '
            '(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e,
                c, s))))
        byts = bytearray(r.trailing_data.get('indexing', b''))
        sbyts = tuple(hex(b)[2:] for b in byts)
        ans.append('TBS bytes: %s'%(' '.join(sbyts)))
        for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)):
            if entries:
                ans.append('\t%s:'%typ)
                for x in entries:
                    ans.append(('\t\tIndex Entry: %d (Parent index: %d, '
                            'Depth: %d, Offset: %d, Size: %d) [%s]')%(
                        x.index, x.parent_index, x.depth, x.offset, x.size, x.label))
        def bin4(num):
            ans = bin(num)[2:]
            return bytes('0'*(4-len(ans)) + ans)
        def repr_extra(x):
            return str({bin4(k):v for k, v in extra.iteritems()})
        tbs_type = 0
        is_periodical = self.doc_type in (257, 258, 259)
        if len(byts):
            outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if
                    is_periodical else 3)
            byts = byts[consumed:]
            for k in extra:
                tbs_type |= k
            ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type)))
            ans.append('Outermost index: %d'%outermost_index)
            ans.append('Unknown extra start bytes: %s'%repr_extra(extra))
            if is_periodical: # Hierarchical periodical
                byts, a = self.interpret_periodical(tbs_type, byts,
                        dat['geom'][0])
                ans += a
            if byts:
                sbyts = tuple(hex(b)[2:] for b in byts)
                ans.append('Remaining bytes: %s'%' '.join(sbyts))
        ans.append('')
        return tbs_type, ans
    def interpret_periodical(self, tbs_type, byts, record_offset):
        ans = []
        def read_section_transitions(byts, psi=None): # {{{
            if psi is None:
                # Assume previous section is 1
                psi = self.get_index(1)
            while byts:
                ai, extra, consumed = decode_tbs(byts)
                byts = byts[consumed:]
                if extra.get(0b0010, None) is not None:
                    raise ValueError('Dont know how to interpret flag 0b0010'
                            ' while reading section transitions')
                if extra.get(0b1000, None) is not None:
                    if len(extra) > 1:
                        raise ValueError('Dont know how to interpret flags'
                                ' %r while reading section transitions'%extra)
                    nsi = self.get_index(psi.index+1)
                    ans.append('Last article in this record of section %d'
                            ' (relative to next section index [%d]): '
                            '%d [%d absolute index]'%(psi.index, nsi.index, ai,
                                ai+nsi.index))
                    psi = nsi
                    continue
                ans.append('First article in this record of section %d'
                        ' (relative to its parent section): '
                        '%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
                num = extra.get(0b0100, None)
                if num is None:
                    msg = ('The section %d has at most one article'
                            ' in this record')%psi.index
                else:
                    msg = ('Number of articles in this record of '
                        'section %d: %d')%(psi.index, num)
                ans.append(msg)
                offset = extra.get(0b0001, None)
                if offset is not None:
                    if offset == 0:
                        ans.append('This record is spanned by the article:'
                                '%d'%(ai+psi.index))
                    else:
                        ans.append('->Offset to start of next section (%d) from start'
                            ' of record: %d [%d absolute offset]'%(psi.index+1,
                                offset, offset+record_offset))
            return byts
        # }}}
        def read_starting_section(byts): # {{{
            orig = byts
            si, extra, consumed = decode_tbs(byts)
            byts = byts[consumed:]
            if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra:
                raise ValueError('Dont know how to interpret flags %r'
                        ' when reading starting section'%extra)
            si = self.get_index(si)
            ans.append('The section at the start of this record is:'
                    ' %d'%si.index)
            if 0b0100 in extra:
                num = extra[0b0100]
                ans.append('The number of articles from the section %d'
                        ' in this record: %d'%(si.index, num))
            elif 0b0001 in extra:
                eof = extra[0b0001]
                if eof != 0:
                    raise ValueError('Unknown eof value %s when reading'
                            ' starting section. All bytes: %r'%(eof, orig))
                ans.append('??This record has more than one article from '
                        ' the section: %d'%si.index)
            return si, byts
        # }}}
        if tbs_type & 0b0100:
            # Starting section is the first section
            ssi = self.get_index(1)
        else:
            ssi, byts = read_starting_section(byts)
        byts = read_section_transitions(byts, ssi)
        return byts, ans
 # }}}
 class MOBIFile(object): # {{{
    def __init__(self, stream):
@ -910,8 +1147,7 @@ class MOBIFile(object): # {{{
        if fntbr == 0xffffffff:
            fntbr = len(self.records)
        self.text_records = [TextRecord(r, self.records[r],
-            self.mobi_header.extra_data_flags, decompress, self.index_record,
+            self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
            self.mobi_header.type_raw) for r in xrange(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        for i in xrange(fntbr, len(self.records)):
@ -930,6 +1166,9 @@ class MOBIFile(object): # {{{
            else:
                self.binary_records.append(BinaryRecord(i, r))
        if self.index_record is not None:
            self.tbs_indexing = TBSIndexing(self.text_records,
                    self.index_record.indices, self.mobi_header.type_raw)
    def print_header(self, f=sys.stdout):
        print (str(self.palmdb).encode('utf-8'), file=f)
@ -961,6 +1200,9 @@ def inspect_mobi(path_or_stream, prefix='decompiled'):
            print(str(f.cncx).encode('utf-8'), file=out)
            print('\n\n', file=out)
            print(str(f.index_record), file=out)
        with open(os.path.join(ddir, 'tbs_indexing.txt'), 'wb') as out:
            print(str(f.tbs_indexing), file=out)
        f.tbs_indexing.dump(ddir)
    for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
            ('binary', 'binary_records')]:
@ -969,6 +1211,19 @@ def inspect_mobi(path_or_stream, prefix='decompiled'):
        for rec in getattr(f, attr):
            rec.dump(tdir)
    alltext = os.path.join(ddir, 'text.html')
    with open(alltext, 'wb') as of:
        alltext = b''
        for rec in f.text_records:
            of.write(rec.raw)
            alltext += rec.raw
        of.seek(0)
    root = html.fromstring(alltext.decode('utf-8'))
    with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
        of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
            include_meta_content_type=True))
    print ('Debug data saved to:', ddir)
 def main():
--- a/src/calibre/ebooks/mobi/kindlegen.py
+++ b/src/calibre/ebooks/mobi/kindlegen.py
@ -0,0 +1,86 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, subprocess, shutil, tempfile
 from lxml import etree
 from calibre.constants import iswindows
 from calibre.customize.ui import plugin_for_output_format
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.mobi.utils import detect_periodical
 from calibre import CurrentDir
 exe = 'kindlegen.exe' if iswindows else 'kindlegen'
 def refactor_opf(opf, is_periodical, toc):
    with open(opf, 'rb') as f:
        root = etree.fromstring(f.read())
    '''
    for spine in root.xpath('//*[local-name() = "spine" and @toc]'):
        # Do not use the NCX toc as kindlegen requires the section structure
        # in the TOC to be duplicated in the HTML, asinine!
        del spine.attrib['toc']
    '''
    if is_periodical:
        metadata = root.xpath('//*[local-name() = "metadata"]')[0]
        xm = etree.SubElement(metadata, 'x-metadata')
        xm.tail = '\n'
        xm.text = '\n\t'
        mobip = etree.SubElement(xm, 'output', attrib={'encoding':"utf-8",
            'content-type':"application/x-mobipocket-subscription-magazine"})
        mobip.tail = '\n\t'
    with open(opf, 'wb') as f:
        f.write(etree.tostring(root, method='xml', encoding='utf-8',
            xml_declaration=True))
 def refactor_guide(oeb):
    for key in list(oeb.guide):
        if key not in ('toc', 'start', 'masthead'):
            oeb.guide.remove(key)
 def run_kindlegen(opf, log):
    log.info('Running kindlegen on MOBIML created by calibre')
    oname = os.path.splitext(opf)[0] + '.mobi'
    p = subprocess.Popen([exe, opf, '-c1', '-verbose', '-o', oname],
        stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
    ko = p.stdout.read()
    returncode = p.wait()
    log.debug('kindlegen verbose output:')
    log.debug(ko.decode('utf-8', 'replace'))
    log.info('kindlegen returned returncode: %d'%returncode)
    if not os.path.exists(oname) or os.stat(oname).st_size < 100:
        raise RuntimeError('kindlegen did not produce any output. '
                'kindlegen return code: %d'%returncode)
    return oname
 def kindlegen(oeb, opts, input_plugin, output_path):
    is_periodical = detect_periodical(oeb.toc, oeb.log)
    refactor_guide(oeb)
    with TemporaryDirectory('_kindlegen_output') as tdir:
        oeb_output = plugin_for_output_format('oeb')
        oeb_output.convert(oeb, tdir, input_plugin, opts, oeb.log)
        opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
        refactor_opf(os.path.join(tdir, opf), is_periodical, oeb.toc)
        try:
            td = tempfile.gettempdir()
            kd = os.path.join(td, 'kindlegen')
            if os.path.exists(kd):
                shutil.rmtree(kd)
            shutil.copytree(tdir, kd)
            oeb.log('kindlegen intermediate output stored in: %s'%kd)
        except:
            pass
        with CurrentDir(tdir):
            oname = run_kindlegen(opf, oeb.log)
            shutil.copyfile(oname, output_path)
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@ -50,6 +50,19 @@ class MOBIOutput(OutputFormatPlugin):
            help=_('When adding the Table of Contents to the book, add it at the start of the '
                'book instead of the end. Not recommended.')
        ),
        OptionRecommendation(name='mobi_navpoints_only_deepest',
            recommended_value=False,
            help=_('When adding navpoints for the chapter-to-chapter'
                ' navigation on the kindle, use only the lowest level '
                'of items in the TOC, instead of items at every level.')
        ),
        OptionRecommendation(name='kindlegen',
            recommended_value=False,
            help=('Use kindlegen (must be in your PATH) to generate the'
                ' binary wrapper for the MOBI format. Useful to debug '
                ' the calibre MOBI output.')
        ),
    ])
@ -82,26 +95,6 @@ class MOBIOutput(OutputFormatPlugin):
        else:
            self.oeb.log.debug('Using mastheadImage supplied in manifest...')
    def dump_toc(self, toc) :
        self.log( "\n         >>> TOC contents <<<")
        self.log( "     toc.title: %s" % toc.title)
        self.log( "      toc.href: %s" % toc.href)
        for periodical in toc.nodes :
            self.log( "\tperiodical title: %s" % periodical.title)
            self.log( "\t            href: %s" % periodical.href)
            for section in periodical :
                self.log( "\t\tsection title: %s" % section.title)
                self.log( "\t\tfirst article: %s" % section.href)
                for article in section :
                    self.log( "\t\t\tarticle title: %s" % repr(article.title))
                    self.log( "\t\t\t         href: %s" % article.href)
    def dump_manifest(self) :
        self.log( "\n         >>> Manifest entries <<<")
        for href in self.oeb.manifest.hrefs :
            self.log ("\t%s" % href)
    def periodicalize_toc(self):
        from calibre.ebooks.oeb.base import TOC
        toc = self.oeb.toc
@ -156,12 +149,6 @@ class MOBIOutput(OutputFormatPlugin):
            # Fix up the periodical href to point to first section href
            toc.nodes[0].href = toc.nodes[0].nodes[0].href
            # diagnostics
            if self.opts.verbose > 3:
                self.dump_toc(toc)
                self.dump_manifest()
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb
        from calibre.ebooks.mobi.mobiml import MobiMLizer
@ -190,6 +177,10 @@ class MOBIOutput(OutputFormatPlugin):
            MobiWriter
        else:
            from calibre.ebooks.mobi.writer import MobiWriter
        if opts.kindlegen:
            from calibre.ebooks.mobi.kindlegen import kindlegen
            kindlegen(oeb, opts, input_plugin, output_path)
        else:
            writer = MobiWriter(opts,
                            write_page_breaks_after_item=write_page_breaks_after_item)
            writer(oeb, output_path)
--- a/src/calibre/ebooks/mobi/tbs_periodicals.rst
+++ b/src/calibre/ebooks/mobi/tbs_periodicals.rst
@ -0,0 +1,363 @@
 Reverse engineering the trailing byte sequences for hierarchical periodicals
 ===============================================================================
 In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag. All the following information/inferences are from examining the output of kindlegen on a sample periodical. Given the general level of Amazon's incompetence, there are no guarantees that this information is the *best/most complete* way to do TBS indexing.
 Sequence encoding:
 0b1000 : Continuation bit
 First sequences:
 0b0010 : 80
 0b0011 : 80 80
 0b0110 : 80 2
 0b0111 : 80 2 80
 Other sequences:
 0b0101 : 4 1a
 0b0001 : c b1
 Opening record
 ----------------
 The text record that contains the opening node for the periodical (depth=0 node in the NCX) can have TBS of 3 different forms:
    1. If it has only the periodical node and no section/article nodes, TBS of type 2, like this::
            Record #1: Starts at: 0 Ends at: 4095
                Contains: 1 index entries (0 ends, 0 complete, 1 starts)
            TBS bytes: 82 80
                Starts:
                    Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 68470) [j_x's Google reader]
            TBS Type: 010 (2)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
    2. A periodical and a section node, but no article nodes, TBS type of 6, like this::
            Record #1: Starts at: 0 Ends at: 4095
                Contains: 2 index entries (0 ends, 0 complete, 2 starts)
            TBS bytes: 86 80 2
                Starts:
                    Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 93254) [j_x's Google reader]
                    Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 541, Size: 49280) [Ars Technica]
            TBS Type: 110 (6)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown (byte: always 2?): 2
    3. If it has both the section 1 node and at least one article node, TBS of type 6, like this::
            Record #1: Starts at: 0 Ends at: 4095
                Contains: 4 index entries (0 ends, 1 complete, 3 starts)
            TBS bytes: 86 80 2 c4 2
                Complete:
                    Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 549, Size: 1866) [Week in gaming: 3DS review, Crysis 2, George Hotz]
                Starts:
                    Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 79253) [j_x's Google reader]
                    Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 541, Size: 35279) [Ars Technica]
                    Index Entry: 6 (Parent index: 1, Depth: 2, Offset: 2415, Size: 2764) [Week in Apple: ZFS on Mac OS X, rogue tethering, DUI apps, and more]
            TBS Type: 110 (6)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown (byte: always 2?): 2
            Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
            Number of article nodes in the record (byte): 2
        If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record.
        Starting record with two section transitions::
            Record #1: Starts at: 0 Ends at: 4095
                Contains: 7 index entries (0 ends, 4 complete, 3 starts)
            TBS bytes: 86 80 2 c0 b8 c4 3
                Complete:
                    Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica]
                    Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz]
                    Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 1014) [Max and the Magic Marker for iPad: Review]
                    Index Entry: 7 (Parent index: 2, Depth: 2, Offset: 1961, Size: 1077) [iPad 2 steers itself into home console gaming territory with Real Racing 2 HD]
                Starts:
                    Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 35372) [j_x's Google reader]
                    Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 10368) [Neowin.net]
                    Index Entry: 8 (Parent index: 2, Depth: 2, Offset: 3038, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
            TBS Type: 110 (6)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown (byte: always 2?): 2
            Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
            Remaining bytes: b8 c4 3
        Starting record with three section transitions::
            Record #1: Starts at: 0 Ends at: 4095
                Contains: 10 index entries (0 ends, 7 complete, 3 starts)
            TBS bytes: 86 80 2 c0 b8 c0 b8 c4 4
                Complete:
                    Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica]
                    Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 316) [Neowin.net]
                    Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz]
                    Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 308) [Max and the Magic Marker for iPad: Review]
                    Index Entry: 7 (Parent index: 3, Depth: 2, Offset: 1263, Size: 760) [OSnews Asks on Interrupts: The Results]
                    Index Entry: 8 (Parent index: 3, Depth: 2, Offset: 2023, Size: 693) [Apple Ditches SAMBA in Favour of Homegrown Replacement]
                    Index Entry: 9 (Parent index: 3, Depth: 2, Offset: 2716, Size: 747) [ITC: Apple's Mobile Products Do Not Violate Nokia Patents]
                Starts:
                    Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 25320) [j_x's Google reader]
                    Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 1255, Size: 6829) [OSNews]
                    Index Entry: 10 (Parent index: 3, Depth: 2, Offset: 3463, Size: 666) [Transparent Monitor Embedded in Window Glass]
            TBS Type: 110 (6)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown (byte: always 2?): 2
            Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
            Remaining bytes: b8 c0 b8 c4 4
 Records with no nodes
 ------------------------
 subtype = 010
 These records are spanned by a single article. They are of two types:
    1. If the parent section index is 1, TBS type of 6, like this::
            Record #4: Starts at: 12288 Ends at: 16383
                Contains: 0 index entries (0 ends, 0 complete, 0 starts)
            TBS bytes: 86 80 2 c1 80
            TBS Type: 110 (6)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown (byte: always 2?): 2
            Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
            EOF (vwi: should be 0): 0
        If the record is before the first article, the TBS bytes would be: 86 80 2
    2. If the parent section index is > 1, TBS type of 2, like this::
            Record #14: Starts at: 53248 Ends at: 57343
                Contains: 0 index entries (0 ends, 0 complete, 0 starts)
            TBS bytes: 82 80 a0 1 e1 80
            TBS Type: 010 (2)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Parent section index (fvwi): 2
            Flags: 0
            Article index at start of record or first article index, relative to parent section (fvwi): 14 [16 absolute]
            EOF (vwi: should be 0): 0
 Records with only article nodes
 -----------------------------------
 Such records have no section transitions (i.e. a section end/section start pair). They have only one or more article nodes. They are of two types:
    1. If the parent section index is 1, TBS type of 7, like this::
            Record #6: Starts at: 20480 Ends at: 24575
                Contains: 2 index entries (1 ends, 0 complete, 1 starts)
            TBS bytes: 87 80 2 80 1 84 2
                Ends:
                    Index Entry: 9 (Parent index: 1, Depth: 2, Offset: 16453, Size: 4199) [Vaccine's success spurs whooping cough comeback]
                Starts:
                    Index Entry: 10 (Parent index: 1, Depth: 2, Offset: 20652, Size: 4246) [Apple's mobile products do not violate Nokia patents, says ITC]
            TBS Type: 111 (7)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown: '\x02\x80' (vwi?: Always 256)
            Article at start of record (fvwi): 8
            Number of articles in record (byte): 2
        If there was only one article in the record, the last two bytes would be replaced by a single byte: 80
        If this record is the first record with an article, then the article at the start of the record should be the last section index. At least, that's what kindlegen does, though if you ask me, it should be the first section index.
    2. If the parent section index is > 1, TBS type of 2, like this::
            Record #16: Starts at: 61440 Ends at: 65535
                Contains: 5 index entries (1 ends, 3 complete, 1 starts)
            TBS bytes: 82 80 a1 80 1 f4 5
                Ends:
                    Index Entry: 17 (Parent index: 2, Depth: 2, Offset: 60920, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
                Complete:
                    Index Entry: 18 (Parent index: 2, Depth: 2, Offset: 62002, Size: 1016) [Rumour: OS X Lion nearing Golden Master stage]
                    Index Entry: 19 (Parent index: 2, Depth: 2, Offset: 63018, Size: 1045) [iOS 4.3.1 released]
                    Index Entry: 20 (Parent index: 2, Depth: 2, Offset: 64063, Size: 972) [Windows 8 'system reset' image leaks]
                Starts:
                    Index Entry: 21 (Parent index: 2, Depth: 2, Offset: 65035, Size: 1057) [Windows Phone 7: Why it's failing]
            TBS Type: 010 (2)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Parent section index (fvwi) : 2
            Flags: 1
            Unknown (vwi: always 0?): 0
            Article index at start of record or first article index, relative to parent section (fvwi): 15 [17 absolute]
            Number of article nodes in the record (byte): 5
        If there was only one article in the record, the last two bytes would be replaced by a single byte: f0
 Records with a section transition
 -----------------------------------
 In such a record there is a transition from one section to the next. As such the record must have at least one article ending and one article starting, except in the case of the first section.
    1. The first section::
        Record #2: Starts at: 4096 Ends at: 8191
            Contains: 2 index entries (0 ends, 0 complete, 2 starts)
        TBS bytes: 83 80 80 90 c0
            Starts:
                Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
                Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 7766, Size: 1866) [Week in gaming: 3DS review, Crysis 2, George Hotz]
        TBS Type: 011 (3)
        Outer Index entry: 0
        Unknown (vwi: always 0?): 0
        Unknown (vwi: always 0?): 0
        First section index (fvwi) : 1
        Extra bits: 0
        First section starts
        Article at start of block as offset from parent index (fvwi): 4 [5 absolute]
        Flags: 0
    If there was more than one article at the start then the last byte would be replaced by: c4 n where n is the number of articles
    2. A record with a section transition and only one article from the ending section::
        Record #9: Starts at: 32768 Ends at: 36863
            Contains: 6 index entries (2 ends, 2 complete, 2 starts)
        TBS bytes: 83 80 80 90 1 d0 1 c8 1 d4 3
            Ends:
                Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
                Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe]
            Complete:
                Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 1014) [Max and the Magic Marker for iPad: Review]
                Index Entry: 16 (Parent index: 2, Depth: 2, Offset: 35059, Size: 1077) [iPad 2 steers itself into home console gaming territory with Real Racing 2 HD]
            Starts:
                Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 10368) [Neowin.net]
                Index Entry: 17 (Parent index: 2, Depth: 2, Offset: 36136, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
        TBS Type: 011 (3)
        Outer Index entry: 0
        Unknown (vwi: always 0?): 0
        Unknown (vwi: always 0?): 0
        First section index (fvwi): 1
        Extra bits (flag: always 0?): 0
        First article of ending section, relative to its parent's index (fvwi): 13 [14 absolute]
        Last article of ending section w.r.t. starting section offset (fvwi): 12 [14 absolute]
        Flags (always 8?): 8
        Article index at start of record or first article index, relative to parent section (fvwi): 13 [15 absolute]
        Number of article nodes in the record (byte): 3
    3. A record with a section transition and more than one article from the ending section::
        Record #11: Starts at: 40960 Ends at: 45055
            Contains: 7 index entries (2 ends, 3 complete, 2 starts)
        TBS bytes: 83 80 80 a0 2 b5 4 1a f5 2 d8 2 e0
            Ends:
                Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 10368) [Neowin.net]
                Index Entry: 21 (Parent index: 2, Depth: 2, Offset: 40251, Size: 1057) [Windows Phone 7: Why it's failing]
            Complete:
                Index Entry: 22 (Parent index: 2, Depth: 2, Offset: 41308, Size: 1050) [RIM announces Android app support for Blackberry Playbook]
                Index Entry: 23 (Parent index: 2, Depth: 2, Offset: 42358, Size: 1087) [Microsoft buys $7.5m worth of IPv4 addresses]
                Index Entry: 24 (Parent index: 2, Depth: 2, Offset: 43445, Size: 960) [TechSpot: Apple iPad 2 Review]
            Starts:
                Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 44405, Size: 6829) [OSNews]
                Index Entry: 25 (Parent index: 3, Depth: 2, Offset: 44413, Size: 760) [OSnews Asks on Interrupts: The Results]
        TBS Type: 011 (3)
        Outer Index entry: 0
        Unknown (vwi: always 0?): 0
        Unknown (vwi: always 0?): 0
        First section index (fvwi): 2
        Extra bits (flag: always 0?): 0
        First article of ending section, relative to its parent's index (fvwi): 19 [21 absolute]
        Number of article nodes in the record (byte): 4
        ->Offset from start of record to beginning of last starting section in this record (vwi)): 3445
        Last article of ending section w.r.t. starting section offset (fvwi): 21 [24 absolute]
        Flags (always 8?): 8
        Article index at start of record or first article index, relative to parent section (fvwi): 22 [25 absolute]
    The difference to the previous case is the extra two bytes that encode the offset of the opening section from the start of the record.
    4. A record with multiple section transitions::
        Record #9: Starts at: 32768 Ends at: 36863
            Contains: 9 index entries (2 ends, 5 complete, 2 starts)
        TBS bytes: 83 80 80 90 1 d0 1 c8 1 d1 c b1 1 c8 1 d4 4
            Ends:
                Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
                Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe]
            Complete:
                Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 316) [Neowin.net]
                Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 308) [Max and the Magic Marker for iPad: Review]
                Index Entry: 16 (Parent index: 3, Depth: 2, Offset: 34361, Size: 760) [OSnews Asks on Interrupts: The Results]
                Index Entry: 17 (Parent index: 3, Depth: 2, Offset: 35121, Size: 693) [Apple Ditches SAMBA in Favour of Homegrown Replacement]
                Index Entry: 18 (Parent index: 3, Depth: 2, Offset: 35814, Size: 747) [ITC: Apple's Mobile Products Do Not Violate Nokia Patents]
            Starts:
                Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 34353, Size: 6829) [OSNews]
                Index Entry: 19 (Parent index: 3, Depth: 2, Offset: 36561, Size: 666) [Transparent Monitor Embedded in Window Glass]
        TBS Type: 011 (3)
        Outer Index entry: 0
        Unknown (vwi: always 0?): 0
        Unknown (vwi: always 0?): 0
        First section index (fvwi): 1
        Extra bits (flag: always 0?): 0
        First article of ending section, relative to its parent's index (fvwi): 13 [14 absolute]
        Last article of ending section w.r.t. starting section offset (fvwi): 12 [14 absolute]
        Flags (always 8?): 8
        Article index at start of record or first article index, relative to parent section (fvwi): 13 [15 absolute]
        ->Offset from start of record to beginning ofnext starting section in this record: 1585
        Last article of ending section w.r.t. starting section offset (fvwi): 12 [15 absolute]
        Flags (always 8?): 8
        Article index at start of record or first article index, relative to parent section (fvwi): 13 [16 absolute]
        Number of article nodes in the record belonging ot the last section (byte): 4
 Ending record
 ----------------
 Logically, ending records must have at least one article ending, one section ending and the periodical ending. They are of TBS type 2, like this::
    Record #17: Starts at: 65536 Ends at: 68684
        Contains: 4 index entries (3 ends, 1 complete, 0 starts)
    TBS bytes: 82 80 c0 4 f4 2
        Ends:
            Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 68470) [j_x's Google reader]
            Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 51234, Size: 17451) [Slashdot]
            Index Entry: 43 (Parent index: 4, Depth: 2, Offset: 65422, Size: 1717) [US ITC May Reverse Judge&#39;s Ruling In Kodak vs. Apple]
        Complete:
            Index Entry: 44 (Parent index: 4, Depth: 2, Offset: 67139, Size: 1546) [Google Starts Testing Google Music Internally]
    TBS Type: 010 (2)
    Outer Index entry: 0
    Unknown (vwi: always 0?): 0
    Parent section index (fvwi): 4
    Flags: 0
    Article at start of block as offset from parent index (fvwi): 39 [43 absolute]
    Number of nodes (byte): 2
 If the record had only a single article end, the last two bytes would be replaced with: f0
 If the last record has multiple section transitions, it is of type 6 and looks like::
    Record #9: Starts at: 32768 Ends at: 34953
        Contains: 9 index entries (3 ends, 6 complete, 0 starts)
    TBS bytes: 86 80 2 1 d0 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0
        Ends:
            Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 34739) [j_x's Google reader]
            Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
            Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe]
        Complete:
            Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 316) [Neowin.net]
            Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 34353, Size: 282) [OSNews]
            Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 34635, Size: 319) [Slashdot]
            Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 308) [Max and the Magic Marker for iPad: Review]
            Index Entry: 16 (Parent index: 3, Depth: 2, Offset: 34361, Size: 274) [OSnews Asks on Interrupts: The Results]
            Index Entry: 17 (Parent index: 4, Depth: 2, Offset: 34643, Size: 311) [Leonard Nimoy Turns 80]
    TBS Type: 110 (6)
    Outer Index entry: 0
    Unknown (vwi: always 0?): 0
    Unknown (byte: always 2?): 2
    Article index at start of record or first article index, relative to parent section (fvwi): 13 [14 absolute]
    Remaining bytes: 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0
--- a/src/calibre/ebooks/mobi/writer2/utils.py
+++ b/src/calibre/ebooks/mobi/writer2/utils.py
@ -11,6 +11,7 @@ import struct
 from collections import OrderedDict
 from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
 from calibre.ebooks import normalize
 IMAGE_MAX_SIZE = 10 * 1024 * 1024
@ -39,7 +40,10 @@ def encode_number_as_hex(num):
    The bytes that follow are simply the hexadecimal representation of the
    number.
    '''
-    num = bytes(hex(num)[2:])
+    num = bytes(hex(num)[2:].upper())
    nlen = len(num)
    if nlen % 2 != 0:
        num = b'0'+num
    ans = bytearray(num)
    ans.insert(0, len(num))
    return bytes(ans)
@ -65,11 +69,14 @@ def encint(value, forward=True):
    If forward is True the bytes returned are suitable for prepending to the
    output buffer, otherwise they must be append to the output buffer.
    '''
    if value < 0:
        raise ValueError('Cannot encode negative numbers as vwi')
    # Encode vwi
    byts = bytearray()
    while True:
        b = value & 0b01111111
        value >>= 7 # shift value to the right by 7 bits
        byts.append(b)
        if value == 0:
            break
@ -79,7 +86,7 @@ def encint(value, forward=True):
 def decint(raw, forward=True):
    '''
-    Read a variable width integer from the bytestring raw and return the
+    Read a variable width integer from the bytestring or bytearray raw and return the
    integer and the number of bytes read. If forward is True bytes are read
    from the start of raw, otherwise from the end of raw.
@ -88,8 +95,10 @@ def decint(raw, forward=True):
    '''
    val = 0
    byts = bytearray()
-    for byte in raw if forward else reversed(raw):
+    src = bytearray(raw)
-        bnum = ord(byte)
+    if not forward:
        src.reverse()
    for bnum in src:
        byts.append(bnum & 0b01111111)
        if bnum & 0b10000000:
            break
@ -161,7 +170,7 @@ def get_trailing_data(record, extra_data_flags):
    '''
    data = OrderedDict()
    for i in xrange(16, -1, -1):
-        flag = 2**i
+        flag = 1 << i # 2**i
        if flag & extra_data_flags:
            if i == 0:
                # Only the first two bits are used for the size since there can
@ -175,3 +184,151 @@ def get_trailing_data(record, extra_data_flags):
            record = record[:-sz]
    return data, record
 def encode_trailing_data(raw):
    '''
    Given some data in the bytestring raw, return a bytestring of the form
        <data><size>
    where size is a backwards encoded vwi whose value is the length of the
    entire returned bytestring. data is the bytestring passed in as raw.
    This is the encoding used for trailing data entries at the end of text
    records. See get_trailing_data() for details.
    '''
    lsize = 1
    while True:
        encoded = encint(len(raw) + lsize, forward=False)
        if len(encoded) == lsize:
            break
        lsize += 1
    return raw + encoded
 def encode_fvwi(val, flags, flag_size=4):
    '''
    Encode the value val and the flag_size bits from flags as a fvwi. This encoding is
    used in the trailing byte sequences for indexing. Returns encoded
    bytestring.
    '''
    ans = val << flag_size
    for i in xrange(flag_size):
        ans |= (flags & (1 << i))
    return encint(ans)
 def decode_fvwi(byts, flag_size=4):
    '''
    Decode encoded fvwi. Returns number, flags, consumed
    '''
    arg, consumed = decint(bytes(byts))
    val = arg >> flag_size
    flags = 0
    for i in xrange(flag_size):
        flags |= (arg & (1 << i))
    return val, flags, consumed
 def decode_tbs(byts, flag_size=4):
    '''
    Trailing byte sequences for indexing consists of series of fvwi numbers.
    This function reads the fvwi number and its associated flags. It them uses
    the flags to read any more numbers that belong to the series. The flags are
    the lowest 4 bits of the vwi (see the encode_fvwi function above).
    Returns the fvwi number, a dictionary mapping flags bits to the associated
    data and the number of bytes consumed.
    '''
    byts = bytes(byts)
    val, flags, consumed = decode_fvwi(byts, flag_size=flag_size)
    extra = {}
    byts = byts[consumed:]
    if flags & 0b1000 and flag_size > 3:
        extra[0b1000] = True
    if flags & 0b0010:
        x, consumed2 = decint(byts)
        byts = byts[consumed2:]
        extra[0b0010] = x
        consumed += consumed2
    if flags & 0b0100:
        extra[0b0100] = ord(byts[0])
        byts = byts[1:]
        consumed += 1
    if flags & 0b0001:
        x, consumed2 = decint(byts)
        byts = byts[consumed2:]
        extra[0b0001] = x
        consumed += consumed2
    return val, extra, consumed
 def encode_tbs(val, extra, flag_size=4):
    '''
    Encode the number val and the extra data in the extra dict as an fvwi. See
    decode_tbs above.
    '''
    flags = 0
    for flag in extra:
        flags |= flag
    ans = encode_fvwi(val, flags, flag_size=flag_size)
    if 0b0010 in extra:
        ans += encint(extra[0b0010])
    if 0b0100 in extra:
        ans += bytes(bytearray([extra[0b0100]]))
    if 0b0001 in extra:
        ans += encint(extra[0b0001])
    return ans
 def utf8_text(text):
    '''
    Convert a possibly null string to utf-8 bytes, guaranteeing to return a non
    empty, normalized bytestring.
    '''
    if text and text.strip():
        text = text.strip()
        if not isinstance(text, unicode):
            text = text.decode('utf-8', 'replace')
        text = normalize(text).encode('utf-8')
    else:
        text = _('Unknown').encode('utf-8')
    return text
 def align_block(raw, multiple=4, pad=b'\0'):
    '''
    Return raw with enough pad bytes append to ensure its length is a multiple
    of 4.
    '''
    extra = len(raw) % multiple
    if extra == 0: return raw
    return raw + pad*(multiple - extra)
 def detect_periodical(toc, log=None):
    '''
    Detect if the TOC object toc contains a periodical that conforms to the
    structure required by kindlegen to generate a periodical.
    '''
    for node in toc.iterdescendants():
        if node.depth() == 1 and node.klass != 'article':
            if log is not None:
                log.debug(
                'Not a periodical: Deepest node does not have '
                'class="article"')
            return False
        if node.depth() == 2 and node.klass != 'section':
            if log is not None:
                log.debug(
                'Not a periodical: Second deepest node does not have'
                ' class="section"')
            return False
        if node.depth() == 3 and node.klass != 'periodical':
            if log is not None:
                log.debug('Not a periodical: Third deepest node'
                    ' does not have class="periodical"')
            return False
        if node.depth() > 3:
            if log is not None:
                log.debug('Not a periodical: Has nodes of depth > 3')
            return False
    return True
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -1231,6 +1231,9 @@ class MobiWriter(object):
            self._oeb.logger.info('  Compressing markup content...')
        data, overlap = self._read_text_record(text)
        if not self.opts.mobi_periodical:
            self._flatten_toc()
        # Evaluate toc for conformance
        if self.opts.mobi_periodical :
            self._oeb.logger.info('  MOBI periodical specified, evaluating TOC for periodical conformance ...')
@ -1697,6 +1700,32 @@ class MobiWriter(object):
    # Index {{{
    def _flatten_toc(self):
        '''
        Flatten and re-order entries in TOC so that chapter to chapter jumping
        never fails on the Kindle.
        '''
        from calibre.ebooks.oeb.base import TOC
        items = list(self._oeb.toc.iterdescendants())
        if self.opts.mobi_navpoints_only_deepest:
            items = [i for i in items if i.depth == 1]
        offsets = {i:self._id_offsets.get(i.href, -1) for i in items if i.href}
        items = [i for i in items if offsets[i] > -1]
        items.sort(key=lambda i:offsets[i])
        filt = []
        seen = set()
        for i in items:
            off = offsets[i]
            if off in seen: continue
            seen.add(off)
            filt.append(i)
        items = filt
        newtoc = TOC()
        for c, i in enumerate(items):
            newtoc.add(i.title, i.href, play_order=c+1, id=str(c),
                    klass='chapter')
        self._oeb.toc = newtoc
    def _generate_index(self):
        self._oeb.log('Generating INDX ...')
        self._primary_index_record = None
--- a/src/calibre/ebooks/mobi/writer2/init.py
+++ b/src/calibre/ebooks/mobi/writer2/init.py
@ -12,4 +12,5 @@ UNCOMPRESSED = 1
 PALMDOC = 2
 HUFFDIC = 17480
 PALM_MAX_IMAGE_SIZE = 63 * 1024
 RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@ -0,0 +1,727 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 from future_builtins import filter
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from struct import pack
 from cStringIO import StringIO
 from collections import OrderedDict, defaultdict
 from calibre.ebooks.mobi.writer2 import RECORD_SIZE
 from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
        encode_tbs, align_block, utf8_text, detect_periodical)
 class CNCX(object): # {{{
    '''
    Create the CNCX records. These are records containing all the strings from
    the NCX. Each record is of the form: <vwi string size><utf-8 encoded
    string>
    '''
    MAX_STRING_LENGTH = 500
    def __init__(self, toc, is_periodical):
        self.strings = OrderedDict()
        for item in toc.iterdescendants(breadth_first=True):
            self.strings[item.title] = 0
            if is_periodical:
                self.strings[item.klass] = 0
        self.records = []
        offset = 0
        buf = StringIO()
        for key in tuple(self.strings.iterkeys()):
            utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
            l = len(utf8)
            sz_bytes = encint(l)
            raw = sz_bytes + utf8
            if 0xfbf8 - buf.tell() < 6 + len(raw):
                # Records in PDB files cannot be larger than 0x10000, so we
                # stop well before that.
                pad = 0xfbf8 - self._ctoc.tell()
                buf.write(b'\0' * pad)
                self.records.append(buf.getvalue())
                buf.truncate(0)
                offset = len(self.records) * 0x10000
            buf.write(raw)
            self.strings[key] = offset
            offset += len(raw)
        self.records.append(align_block(buf.getvalue()))
    def __getitem__(self, string):
        return self.strings[string]
 # }}}
 class IndexEntry(object): # {{{
    TAG_VALUES = {
            'offset': 1,
            'size': 2,
            'label_offset': 3,
            'depth': 4,
            'class_offset': 5,
            'parent_index': 21,
            'first_child_index': 22,
            'last_child_index': 23,
    }
    RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()}
    BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,]
    def __init__(self, offset, label_offset, depth=0, class_offset=None):
        self.offset, self.label_offset = offset, label_offset
        self.depth, self.class_offset = depth, class_offset
        self.length = 0
        self.index = 0
        self.parent_index = None
        self.first_child_index = None
        self.last_child_index = None
    def __repr__(self):
        return ('IndexEntry(offset=%r, depth=%r, length=%r, index=%r,'
                ' parent_index=%r)')%(self.offset, self.depth, self.length,
                        self.index, self.parent_index)
    @dynamic_property
    def size(self):
        def fget(self): return self.length
        def fset(self, val): self.length = val
        return property(fget=fget, fset=fset, doc='Alias for length')
    @classmethod
    def tagx_block(cls, for_periodical=True):
        buf = bytearray()
        def add_tag(tag, num_values=1):
            buf.append(tag)
            buf.append(num_values)
            # bitmask
            buf.append(1 << (cls.BITMASKS.index(tag)))
            # eof
            buf.append(0)
        for tag in xrange(1, 5):
            add_tag(tag)
        if for_periodical:
            for tag in (5, 21, 22, 23):
                add_tag(tag)
        # End of TAGX record
        for i in xrange(3): buf.append(0)
        buf.append(1)
        header = b'TAGX'
        header += pack(b'>I', 12+len(buf)) # table length
        header += pack(b'>I', 1) # control byte count
        return header + bytes(buf)
    @property
    def next_offset(self):
        return self.offset + self.length
    @property
    def tag_nums(self):
        for i in range(1, 5):
            yield i
        for attr in ('class_offset', 'parent_index', 'first_child_index',
                'last_child_index'):
            if getattr(self, attr) is not None:
                yield self.TAG_VALUES[attr]
    @property
    def entry_type(self):
        ans = 0
        for tag in self.tag_nums:
            ans |= (1 << self.BITMASKS.index(tag)) # 1 << x == 2**x
        return ans
    @property
    def bytestring(self):
        buf = StringIO()
        buf.write(encode_number_as_hex(self.index))
        et = self.entry_type
        buf.write(bytes(bytearray([et])))
        for tag in self.tag_nums:
            attr = self.RTAG_MAP[tag]
            val = getattr(self, attr)
            buf.write(encint(val))
        ans = buf.getvalue()
        return ans
 # }}}
 class TBS(object): # {{{
    '''
    Take the list of index nodes starting/ending on a record and calculate the
    trailing byte sequence for the record.
    '''
    def __init__(self, data, is_periodical, first=False, section_map={},
            after_first=False):
        self.section_map = section_map
        #import pprint
        #pprint.pprint(data)
        #print()
        if is_periodical:
            # The starting bytes.
            # The value is zero which I think indicates the periodical
            # index entry. The values for the various flags seem to be
            # unused. If the 0b100 is present, it means that the record
            # deals with section 1 (or is the final record with section
            # transitions).
            self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3)
            self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0},
                    flag_size=3)
            self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0},
                    flag_size=3)
            self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001:
                0}, flag_size=3)
            if not data:
                byts = b''
                if after_first:
                    # This can happen if a record contains only text between
                    # the periodical start and the first section
                    byts = self.type_011
                self.bytestring = byts
            else:
                depth_map = defaultdict(list)
                for x in ('starts', 'ends', 'completes'):
                    for idx in data[x]:
                        depth_map[idx.depth].append(idx)
                for l in depth_map.itervalues():
                    l.sort(key=lambda x:x.offset)
                self.periodical_tbs(data, first, depth_map)
        else:
            if not data:
                self.bytestring = b''
            else:
                self.book_tbs(data, first)
    def periodical_tbs(self, data, first, depth_map):
        buf = StringIO()
        has_section_start = (depth_map[1] and
                set(depth_map[1]).intersection(set(data['starts'])))
        spanner = data['spans']
        parent_section_index = -1
        if depth_map[0]:
            # We have a terminal record
            # Find the first non periodical node
            first_node = None
            for nodes in (depth_map[1], depth_map[2]):
                for node in nodes:
                    if (first_node is None or (node.offset, node.depth) <
                            (first_node.offset, first_node.depth)):
                        first_node = node
            typ = (self.type_110 if has_section_start else self.type_010)
            # parent_section_index is needed for the last record
            if first_node is not None and first_node.depth > 0:
                parent_section_index = (first_node.index if first_node.depth
                        == 1 else first_node.parent_index)
            else:
                parent_section_index = max(self.section_map.iterkeys())
        else:
            # Non terminal record
            if spanner is not None:
                # record is spanned by a single article
                parent_section_index = spanner.parent_index
                typ = (self.type_110 if parent_section_index == 1 else
                        self.type_010)
            elif not depth_map[1]:
                # has only article nodes, i.e. spanned by a section
                parent_section_index = depth_map[2][0].parent_index
                typ = (self.type_111 if parent_section_index == 1 else
                        self.type_010)
            else:
                # has section transitions
                if depth_map[2]:
                    parent_section_index = depth_map[2][0].parent_index
                else:
                    parent_section_index = depth_map[1][0].index
                typ = self.type_011
        buf.write(typ)
        if typ not in (self.type_110, self.type_111) and parent_section_index > 0:
            extra = {}
            # Write starting section information
            if spanner is None:
                num_articles = len([a for a in depth_map[1] if a.parent_index
                    == parent_section_index])
                if not depth_map[1]:
                    extra = {0b0001: 0}
                if num_articles > 1:
                    extra = {0b0100: num_articles}
            buf.write(encode_tbs(parent_section_index, extra))
        if spanner is None:
            articles = depth_map[2]
            sections = set([self.section_map[a.parent_index] for a in
                articles])
            sections = sorted(sections, key=lambda x:x.offset)
            section_map = {s:[a for a in articles if a.parent_index ==
                s.index] for s in sections}
            for i, section in enumerate(sections):
                # All the articles in this record that belong to section
                articles = section_map[section]
                first_article = articles[0]
                last_article = articles[-1]
                num = len(articles)
                try:
                    next_sec = sections[i+1]
                except:
                    next_sec = None
                extra = {}
                if num > 1:
                    extra[0b0100] = num
                if False and i == 0 and next_sec is not None:
                    # Write offset to next section from start of record
                    # I can't figure out exactly when Kindlegen decides to
                    # write this so I have disabled it for now.
                    extra[0b0001] = next_sec.offset - data['offset']
                buf.write(encode_tbs(first_article.index-section.index, extra))
                if next_sec is not None:
                    buf.write(encode_tbs(last_article.index-next_sec.index,
                        {0b1000: 0}))
        else:
            buf.write(encode_tbs(spanner.index - parent_section_index,
                {0b0001: 0}))
        self.bytestring = buf.getvalue()
    def book_tbs(self, data, first):
        self.bytestring = b''
 # }}}
 class Indexer(object): # {{{
    def __init__(self, serializer, number_of_text_records,
            size_of_last_text_record, opts, oeb):
        self.serializer = serializer
        self.number_of_text_records = number_of_text_records
        self.text_size = (RECORD_SIZE * (self.number_of_text_records-1) +
                            size_of_last_text_record)
        self.oeb = oeb
        self.log = oeb.log
        self.opts = opts
        self.is_periodical = detect_periodical(self.oeb.toc, self.log)
        self.log('Generating MOBI index for a %s'%('periodical' if
            self.is_periodical else 'book'))
        self.is_flat_periodical = False
        if self.is_periodical:
            periodical_node = iter(oeb.toc).next()
            sections = tuple(periodical_node)
            self.is_flat_periodical = len(sections) == 1
        self.records = []
        self.cncx = CNCX(oeb.toc, self.is_periodical)
        if self.is_periodical:
            self.indices = self.create_periodical_index()
        else:
            self.indices = self.create_book_index()
        self.records.append(self.create_index_record())
        self.records.insert(0, self.create_header())
        self.records.extend(self.cncx.records)
        self.calculate_trailing_byte_sequences()
    def create_index_record(self): # {{{
        header_length = 192
        buf = StringIO()
        indices = self.indices
        # Write index entries
        offsets = []
        for i in indices:
            offsets.append(buf.tell())
            buf.write(i.bytestring)
        index_block = align_block(buf.getvalue())
        # Write offsets to index entries as an IDXT block
        idxt_block = b'IDXT'
        buf.truncate(0)
        for offset in offsets:
            buf.write(pack(b'>H', header_length+offset))
        idxt_block = align_block(idxt_block + buf.getvalue())
        body = index_block + idxt_block
        header = b'INDX'
        buf.truncate(0)
        buf.write(pack(b'>I', header_length))
        buf.write(b'\0'*4) # Unknown
        buf.write(pack(b'>I', 1)) # Header type? Or index record number?
        buf.write(b'\0'*4) # Unknown
        # IDXT block offset
        buf.write(pack(b'>I', header_length + len(index_block)))
        # Number of index entries
        buf.write(pack(b'>I', len(offsets)))
        # Unknown
        buf.write(b'\xff'*8)
        # Unknown
        buf.write(b'\0'*156)
        header += buf.getvalue()
        ans = header + body
        if len(ans) > 0x10000:
            raise ValueError('Too many entries (%d) in the TOC'%len(offsets))
        return ans
    # }}}
    def create_header(self): # {{{
        buf = StringIO()
        tagx_block = IndexEntry.tagx_block(self.is_periodical)
        header_length = 192
        # Ident 0 - 4
        buf.write(b'INDX')
        # Header length 4 - 8
        buf.write(pack(b'>I', header_length))
        # Unknown 8-16
        buf.write(b'\0'*8)
        # Index type: 0 - normal, 2 - inflection 16 - 20
        buf.write(pack(b'>I', 2))
        # IDXT offset 20-24
        buf.write(pack(b'>I', 0)) # Filled in later
        # Number of index records 24-28
        buf.write(pack(b'>I', len(self.records)))
        # Index Encoding 28-32
        buf.write(pack(b'>I', 65001)) # utf-8
        # Unknown 32-36
        buf.write(b'\xff'*4)
        # Number of index entries 36-40
        buf.write(pack(b'>I', len(self.indices)))
        # ORDT offset 40-44
        buf.write(pack(b'>I', 0))
        # LIGT offset 44-48
        buf.write(pack(b'>I', 0))
        # Number of LIGT entries 48-52
        buf.write(pack(b'>I', 0))
        # Number of CNCX records 52-56
        buf.write(pack(b'>I', len(self.cncx.records)))
        # Unknown 56-180
        buf.write(b'\0'*124)
        # TAGX offset 180-184
        buf.write(pack(b'>I', header_length))
        # Unknown 184-192
        buf.write(b'\0'*8)
        # TAGX block
        buf.write(tagx_block)
        num = len(self.indices)
        # The index of the last entry in the NCX
        buf.write(encode_number_as_hex(num-1))
        # The number of entries in the NCX
        buf.write(pack(b'>H', num))
        # Padding
        pad = (4 - (buf.tell()%4))%4
        if pad:
            buf.write(b'\0'*pad)
        idxt_offset = buf.tell()
        buf.write(b'IDXT')
        buf.write(pack(b'>H', header_length + len(tagx_block)))
        buf.write(b'\0')
        buf.seek(20)
        buf.write(pack(b'>I', idxt_offset))
        return align_block(buf.getvalue())
    # }}}
    def create_book_index(self): # {{{
        indices = []
        seen = set()
        id_offsets = self.serializer.id_offsets
        for node in self.oeb.toc.iterdescendants():
            try:
                offset = id_offsets[node.href]
                label = self.cncx[node.title]
            except:
                self.log.warn('TOC item %s not found in document'%node.href)
                continue
            if offset in seen:
                continue
            seen.add(offset)
            index = IndexEntry(offset, label)
            indices.append(index)
        indices.sort(key=lambda x:x.offset)
        # Set lengths
        for i, index in enumerate(indices):
            try:
                next_offset = indices[i+1].offset
            except:
                next_offset = self.serializer.body_end_offset
            index.length = next_offset - index.offset
        # Remove empty nodes
        indices = [i for i in indices if i.length > 0]
        # Set index values
        for i, index in enumerate(indices):
            index.index = i
        # Set lengths again to close up any gaps left by filtering
        for i, index in enumerate(indices):
            try:
                next_offset = indices[i+1].offset
            except:
                next_offset = self.serializer.body_end_offset
            index.length = next_offset - index.offset
        return indices
    # }}}
    def create_periodical_index(self): # {{{
        periodical_node = iter(self.oeb.toc).next()
        periodical_node_offset = self.serializer.body_start_offset
        periodical_node_size = (self.serializer.body_end_offset -
                periodical_node_offset)
        normalized_sections = []
        id_offsets = self.serializer.id_offsets
        periodical = IndexEntry(periodical_node_offset,
                self.cncx[periodical_node.title],
                class_offset=self.cncx[periodical_node.klass])
        periodical.length = periodical_node_size
        periodical.first_child_index = 1
        seen_sec_offsets = set()
        seen_art_offsets = set()
        for sec in periodical_node:
            normalized_articles = []
            try:
                offset = id_offsets[sec.href]
                label = self.cncx[sec.title]
                klass = self.cncx[sec.klass]
            except:
                continue
            if offset in seen_sec_offsets:
                continue
            seen_sec_offsets.add(offset)
            section = IndexEntry(offset, label, class_offset=klass, depth=1)
            section.parent_index = 0
            for art in sec:
                try:
                    offset = id_offsets[art.href]
                    label = self.cncx[art.title]
                    klass = self.cncx[art.klass]
                except:
                    continue
                if offset in seen_art_offsets:
                    continue
                seen_art_offsets.add(offset)
                article = IndexEntry(offset, label, class_offset=klass,
                        depth=2)
                normalized_articles.append(article)
            if normalized_articles:
                normalized_articles.sort(key=lambda x:x.offset)
                normalized_sections.append((section, normalized_articles))
        normalized_sections.sort(key=lambda x:x[0].offset)
        # Set lengths
        for s, x in enumerate(normalized_sections):
            sec, normalized_articles = x
            try:
                sec.length = normalized_sections[s+1][0].offset - sec.offset
            except:
                sec.length = self.serializer.body_end_offset - sec.offset
            for i, art in enumerate(normalized_articles):
                try:
                    art.length = normalized_articles[i+1].offset - art.offset
                except:
                    art.length = sec.offset + sec.length - art.offset
        # Filter
        for i, x in list(enumerate(normalized_sections)):
            sec, normalized_articles = x
            normalized_articles = list(filter(lambda x: x.length > 0,
                normalized_articles))
            normalized_sections[i] = (sec, normalized_articles)
        normalized_sections = list(filter(lambda x: x[0].length > 0 and x[1],
            normalized_sections))
        # Set indices
        i = 0
        for sec, articles in normalized_sections:
            i += 1
            sec.index = i
            sec.parent_index = 0
        for sec, articles in normalized_sections:
            for art in articles:
                i += 1
                art.index = i
                art.parent_index = sec.index
        for sec, normalized_articles in normalized_sections:
            sec.first_child_index = normalized_articles[0].index
            sec.last_child_index = normalized_articles[-1].index
        # Set lengths again to close up any gaps left by filtering
        for s, x in enumerate(normalized_sections):
            sec, articles = x
            try:
                next_offset = normalized_sections[s+1][0].offset
            except:
                next_offset = self.serializer.body_end_offset
            sec.length = next_offset - sec.offset
            for a, art in enumerate(articles):
                try:
                    next_offset = articles[a+1].offset
                except:
                    next_offset = sec.next_offset
                art.length = next_offset - art.offset
        # Sanity check
        for s, x in enumerate(normalized_sections):
            sec, articles = x
            try:
                next_sec = normalized_sections[s+1][0]
            except:
                if (sec.length == 0 or sec.next_offset !=
                        self.serializer.body_end_offset):
                    raise ValueError('Invalid section layout')
            else:
                if next_sec.offset != sec.next_offset or sec.length == 0:
                    raise ValueError('Invalid section layout')
            for a, art in enumerate(articles):
                try:
                    next_art = articles[a+1]
                except:
                    if (art.length == 0 or art.next_offset !=
                            sec.next_offset):
                        raise ValueError('Invalid article layout')
                else:
                    if art.length == 0 or art.next_offset != next_art.offset:
                        raise ValueError('Invalid article layout')
        # Flatten
        indices = [periodical]
        for sec, articles in normalized_sections:
            indices.append(sec)
            periodical.last_child_index = sec.index
        for sec, articles in normalized_sections:
            for a in articles:
                indices.append(a)
        return indices
    # }}}
    # TBS {{{
    def calculate_trailing_byte_sequences(self):
        self.tbs_map = {}
        found_node = False
        sections = [i for i in self.indices if i.depth == 1]
        section_map = OrderedDict((i.index, i) for i in
                sorted(sections, key=lambda x:x.offset))
        deepest = max(i.depth for i in self.indices)
        for i in xrange(self.number_of_text_records):
            offset = i * RECORD_SIZE
            next_offset = offset + RECORD_SIZE
            data = {'ends':[], 'completes':[], 'starts':[],
                    'spans':None, 'offset':offset, 'record_number':i+1}
            for index in self.indices:
                if index.offset >= next_offset:
                    # Node starts after current record
                    if index.depth == deepest:
                        break
                    else:
                        continue
                if index.next_offset <= offset:
                    # Node ends before current record
                    continue
                if index.offset >= offset:
                    # Node starts in current record
                    if index.next_offset <= next_offset:
                        # Node ends in current record
                        data['completes'].append(index)
                    else:
                        data['starts'].append(index)
                else:
                    # Node starts before current records
                    if index.next_offset <= next_offset:
                        # Node ends in current record
                        data['ends'].append(index)
                    elif index.depth == deepest:
                        data['spans'] = index
            if (data['ends'] or data['completes'] or data['starts'] or
                    data['spans'] is not None):
                self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not
                        found_node, section_map=section_map)
                found_node = True
            else:
                self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False,
                        after_first=found_node, section_map=section_map)
    def get_trailing_byte_sequence(self, num):
        return self.tbs_map[num].bytestring
    # }}}
 # }}}
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -17,8 +17,10 @@ from calibre.ebooks.mobi.writer2.serializer import Serializer
 from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.utils.filenames import ascii_filename
-from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
+from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
-from calibre.ebooks.mobi.writer2.utils import (rescale_image, encint)
+from calibre.ebooks.mobi.utils import (rescale_image, encint,
        encode_trailing_data, align_block)
 from calibre.ebooks.mobi.writer2.indexer import Indexer
 EXTH_CODES = {
    'creator': 100,
@ -27,7 +29,6 @@ EXTH_CODES = {
    'identifier': 104,
    'subject': 105,
    'pubdate': 106,
    'date': 106,
    'review': 107,
    'contributor': 108,
    'rights': 109,
@ -39,9 +40,6 @@ EXTH_CODES = {
 # Disabled as I dont care about uncrossable breaks
 WRITE_UNCROSSABLE_BREAKS = False
 RECORD_SIZE = 0x1000 # 4096
 MAX_THUMB_SIZE = 16 * 1024
 MAX_THUMB_DIMEN = (180, 240)
@ -53,8 +51,10 @@ class MobiWriter(object):
        self.write_page_breaks_after_item = write_page_breaks_after_item
        self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
        self.prefer_author_sort = opts.prefer_author_sort
        self.last_text_record_idx = 1
    def __call__(self, oeb, path_or_stream):
        self.log = oeb.log
        if hasattr(path_or_stream, 'write'):
            return self.dump_stream(oeb, path_or_stream)
        with open(path_or_stream, 'w+b') as stream:
@ -79,9 +79,63 @@ class MobiWriter(object):
    def generate_content(self):
        self.map_image_names()
        self.generate_text()
-        # Image records come after text records
+        # Index records come after text records
        self.generate_index()
        self.write_uncrossable_breaks()
        # Image records come after index records
        self.generate_images()
    # Indexing {{{
    def generate_index(self):
        self.primary_index_record_idx = None
        try:
            self.indexer = Indexer(self.serializer, self.last_text_record_idx,
                    len(self.records[self.last_text_record_idx]),
                    self.opts, self.oeb)
        except:
            self.log.exception('Failed to generate MOBI index:')
        else:
            self.primary_index_record_idx = len(self.records)
            for i in xrange(len(self.records)):
                if i == 0: continue
                tbs = self.indexer.get_trailing_byte_sequence(i)
                self.records[i] += encode_trailing_data(tbs)
            self.records.extend(self.indexer.records)
    @property
    def is_periodical(self):
        return (self.primary_index_record_idx is None or not
                self.indexer.is_periodical)
    # }}}
    def write_uncrossable_breaks(self): # {{{
        '''
        Write information about uncrossable breaks (non linear items in
        the spine.
        '''
        if not WRITE_UNCROSSABLE_BREAKS:
            return
        breaks = self.serializer.breaks
        for i in xrange(1, self.last_text_record_idx+1):
            offset = i * RECORD_SIZE
            pbreak = 0
            running = offset
            buf = StringIO()
            while breaks and (breaks[0] - offset) < RECORD_SIZE:
                pbreak = (breaks.pop(0) - running) >> 3
                encoded = encint(pbreak)
                buf.write(encoded)
                running += pbreak << 3
            encoded = encode_trailing_data(buf.getvalue())
            self.records[i] += encoded
    # }}}
    # Images {{{
    def map_image_names(self):
        '''
        Map image names to record indices, ensuring that the masthead image if
@ -120,65 +174,49 @@ class MobiWriter(object):
            if self.first_image_record is None:
                self.first_image_record = len(self.records) - 1
    def add_thumbnail(self, item):
        try:
            data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
                    maxsizeb=MAX_THUMB_SIZE)
        except IOError:
            self.oeb.logger.warn('Bad image file %r' % item.href)
            return None
        manifest = self.oeb.manifest
        id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
        manifest.add(id, href, 'image/jpeg', data=data)
        index = len(self.images) + 1
        self.images[href] = index
        self.records.append(data)
        return index
    # }}}
    # Text {{{
    def generate_text(self):
        self.oeb.logger.info('Serializing markup content...')
-        serializer = Serializer(self.oeb, self.images,
+        self.serializer = Serializer(self.oeb, self.images,
                write_page_breaks_after_item=self.write_page_breaks_after_item)
-        text = serializer()
+        text = self.serializer()
        breaks = serializer.breaks
        self.anchor_offset_kindle = serializer.anchor_offset_kindle
        self.id_offsets = serializer.id_offsets
        self.content_length = len(text)
        self.text_length = len(text)
        text = StringIO(text)
        buf = []
        nrecords = 0
        offset = 0
        if self.compression != UNCOMPRESSED:
            self.oeb.logger.info('  Compressing markup content...')
        data, overlap = self.read_text_record(text)
-        while len(data) > 0:
+        while text.tell() < self.text_length:
            data, overlap = self.read_text_record(text)
            if self.compression == PALMDOC:
                data = compress_doc(data)
            record = StringIO()
            record.write(data)
-            self.records.append(record.getvalue())
+            data += overlap
-            buf.append(self.records[-1])
+            data += pack(b'>B', len(overlap))
            self.records.append(data)
            nrecords += 1
            offset += RECORD_SIZE
            data, overlap = self.read_text_record(text)
-            # Write information about the mutibyte character overlap, if any
+        self.last_text_record_idx = nrecords
            record.write(overlap)
            record.write(pack(b'>B', len(overlap)))
            # Write information about uncrossable breaks (non linear items in
            # the spine)
            if WRITE_UNCROSSABLE_BREAKS:
                nextra = 0
                pbreak = 0
                running = offset
                # Write information about every uncrossable break that occurs in
                # the next record.
                while breaks and (breaks[0] - offset) < RECORD_SIZE:
                    pbreak = (breaks.pop(0) - running) >> 3
                    encoded = encint(pbreak)
                    record.write(encoded)
                    running += pbreak << 3
                    nextra += len(encoded)
                lsize = 1
                while True:
                    size = encint(nextra + lsize, forward=False)
                    if len(size) == lsize:
                        break
                    lsize += 1
                record.write(size)
        self.text_nrecords = nrecords + 1
    def read_text_record(self, text):
        '''
@ -230,25 +268,40 @@ class MobiWriter(object):
        return data, overlap
-    def generate_end_records(self):
+    # }}}
        self.flis_number = len(self.records)
        self.records.append('\xE9\x8E\x0D\x0A')
-    def generate_record0(self): # {{{
+    def generate_record0(self): #  MOBI header {{{
        metadata = self.oeb.metadata
        exth = self.build_exth()
        last_content_record = len(self.records) - 1
-        self.generate_end_records()
+        # FCIS/FLIS (Seem to server no purpose)
        flis_number = len(self.records)
        self.records.append(
            b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+
            b'\xff'*4)
        fcis = b'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
        fcis += pack(b'>I', self.text_length)
        fcis += b'\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
        fcis_number = len(self.records)
        self.records.append(fcis)
        # EOF record
        self.records.append(b'\xE9\x8E\x0D\x0A')
        record0 = StringIO()
-        # The PalmDOC Header
+        # The MOBI Header
-        record0.write(pack(b'>HHIHHHH', self.compression, 0,
+        record0.write(pack(b'>HHIHHHH',
-            self.text_length,
+            self.compression, # compression type # compression type
-            self.text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
+            0, # Unused
            self.text_length, # Text length
            self.last_text_record_idx, # Number of text records or last tr idx
            RECORD_SIZE, # Text record size
            0, # Unused
            0  # Unused
        )) # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
        title = normalize(unicode(metadata.title[0])).encode('utf-8')
        # The MOBI Header
        # 0x0 - 0x3
        record0.write(b'MOBI')
@ -264,13 +317,19 @@ class MobiWriter(object):
        # 0x10 - 0x13 : UID
        # 0x14 - 0x17 : Generator version
        bt = 0x002
        if self.primary_index_record_idx is not None:
            if self.indexer.is_flat_periodical:
                bt = 0x102
            elif self.indexer.is_periodical:
                bt = 0x103
        record0.write(pack(b'>IIIII',
-            0xe8, 0x002, 65001, uid, 6))
+            0xe8, bt, 65001, uid, 6))
        # 0x18 - 0x1f : Unknown
        record0.write(b'\xff' * 8)
        # 0x20 - 0x23 : Secondary index record
        record0.write(pack(b'>I', 0xffffffff))
@ -279,7 +338,7 @@ class MobiWriter(object):
        # 0x40 - 0x43 : Offset of first non-text record
        record0.write(pack(b'>I',
-            self.text_nrecords + 1))
+            self.last_text_record_idx + 1))
        # 0x44 - 0x4b : title offset, title length
        record0.write(pack(b'>II',
@ -289,13 +348,14 @@ class MobiWriter(object):
        record0.write(iana2mobi(
            str(metadata.language[0])))
-        # 0x50 - 0x57 : Unknown
+        # 0x50 - 0x57 : Input language and Output language
        record0.write(b'\0' * 8)
        # 0x58 - 0x5b : Format version
        # 0x5c - 0x5f : First image record number
        record0.write(pack(b'>II',
-            6, self.first_image_record if self.first_image_record else 0))
+            6, self.first_image_record if self.first_image_record else
            len(self.records)-1))
        # 0x60 - 0x63 : First HUFF/CDIC record number
        # 0x64 - 0x67 : Number of HUFF/CDIC records
@ -304,7 +364,12 @@ class MobiWriter(object):
        record0.write(b'\0' * 16)
        # 0x70 - 0x73 : EXTH flags
-        record0.write(pack(b'>I', 0x50))
+        # Bit 6 (0b1000000) being set indicates the presence of an EXTH header
        # The purpose of the other bits is unknown
        exth_flags = 0b1010000
        if self.is_periodical:
            exth_flags |= 0b1000
        record0.write(pack(b'>I', exth_flags))
        # 0x74 - 0x93 : Unknown
        record0.write(b'\0' * 32)
@ -329,13 +394,13 @@ class MobiWriter(object):
        record0.write(b'\0\0\0\x01')
        # 0xb8 - 0xbb : FCIS record number
-        record0.write(pack(b'>I', 0xffffffff))
+        record0.write(pack(b'>I', fcis_number))
        # 0xbc - 0xbf : Unknown (FCIS record count?)
-        record0.write(pack(b'>I', 0xffffffff))
+        record0.write(pack(b'>I', 1))
        # 0xc0 - 0xc3 : FLIS record number
-        record0.write(pack(b'>I', 0xffffffff))
+        record0.write(pack(b'>I', flis_number))
        # 0xc4 - 0xc7 : Unknown (FLIS record count?)
        record0.write(pack(b'>I', 1))
@ -348,19 +413,20 @@ class MobiWriter(object):
        # 0xe0 - 0xe3 : Extra record data
        # Extra record data flags:
-        #   - 0x1: <extra multibyte bytes><size> (?)
+        #   - 0b1  : <extra multibyte bytes><size>
-        #   - 0x2: <TBS indexing description of this HTML record><size> GR
+        #   - 0b10 : <TBS indexing description of this HTML record><size>
-        #   - 0x4: <uncrossable breaks><size>
+        #   - 0b100: <uncrossable breaks><size>
        # GR: Use 7 for indexed files, 5 for unindexed
        # Setting bit 2 (0x2) disables <guide><reference type="start"> functionality
        extra_data_flags = 0b1 # Has multibyte overlap bytes
        if self.primary_index_record_idx is not None:
            extra_data_flags |= 0b10
        if WRITE_UNCROSSABLE_BREAKS:
            extra_data_flags |= 0b100
        record0.write(pack(b'>I', extra_data_flags))
        # 0xe4 - 0xe7 : Primary index record
-        record0.write(pack(b'>I', 0xffffffff))
+        record0.write(pack(b'>I', 0xffffffff if self.primary_index_record_idx
            is None else self.primary_index_record_idx))
        record0.write(exth)
        record0.write(title)
@ -368,10 +434,10 @@ class MobiWriter(object):
        # Add some buffer so that Amazon can add encryption information if this
        # MOBI is submitted for publication
        record0 += (b'\0' * (1024*8))
-        self.records[0] = record0
+        self.records[0] = align_block(record0)
    # }}}
-    def build_exth(self): # {{{
+    def build_exth(self): # EXTH Header {{{
        oeb = self.oeb
        exth = StringIO()
        nrecs = 0
@ -426,25 +492,32 @@ class MobiWriter(object):
        nrecs += 1
        # Write cdetype
-        if not self.opts.mobi_periodical:
+        if self.is_periodical:
            data = b'EBOK'
            exth.write(pack(b'>II', 501, len(data)+8))
            exth.write(data)
            nrecs += 1
        # Add a publication date entry
-        if oeb.metadata['date'] != [] :
+        if oeb.metadata['date']:
            datestr = str(oeb.metadata['date'][0])
-        elif oeb.metadata['timestamp'] != [] :
+        elif oeb.metadata['timestamp']:
            datestr = str(oeb.metadata['timestamp'][0])
        if datestr is not None:
            datestr = bytes(datestr)
            exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
            exth.write(datestr)
            nrecs += 1
        else:
            raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
        # Write the same creator info as kindlegen 1.2
        for code, val in [(204, 201), (205, 1), (206, 2), (207, 33307)]:
            exth.write(pack(b'>II', code, 12))
            exth.write(pack(b'>I', val))
            nrecs += 1
        if (oeb.metadata.cover and
                unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
            id = unicode(oeb.metadata.cover[0])
@ -467,23 +540,12 @@ class MobiWriter(object):
        return b''.join(exth)
    # }}}
-    def add_thumbnail(self, item):
+    def write_header(self): # PalmDB header {{{
-        try:
+        '''
-            data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
+        Write the PalmDB header
-                    maxsizeb=MAX_THUMB_SIZE)
+        '''
-        except IOError:
+        title = ascii_filename(unicode(self.oeb.metadata.title[0])).replace(
-            self.oeb.logger.warn('Bad image file %r' % item.href)
+                ' ', '_')
            return None
        manifest = self.oeb.manifest
        id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
        manifest.add(id, href, 'image/jpeg', data=data)
        index = len(self.images) + 1
        self.images[href] = index
        self.records.append(data)
        return index
    def write_header(self):
        title = ascii_filename(unicode(self.oeb.metadata.title[0]))
        title = title + (b'\0' * (32 - len(title)))
        now = int(time.time())
        nrecords = len(self.records)
@ -494,6 +556,7 @@ class MobiWriter(object):
            self.write(pack(b'>I', offset), b'\0', pack(b'>I', 2*i)[1:])
            offset += len(record)
        self.write(b'\0\0')
    # }}}
    def write_content(self):
        for record in self.records:
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -53,6 +53,50 @@ class Serializer(object):
        # become uncrossable breaks in the MOBI
        self.breaks = []
        self.find_blocks()
    def find_blocks(self):
        '''
        Mark every item in the spine if it is the start/end of a
        section/article, so that it can be wrapped in divs appropriately.
        '''
        for item in self.oeb.spine:
            item.is_section_start = item.is_section_end = False
            item.is_article_start = item.is_article_end = False
        def spine_item(tocitem):
            href = urldefrag(tocitem.href)[0]
            for item in self.oeb.spine:
                if item.href == href:
                    return item
        for item in self.oeb.toc.iterdescendants():
            if item.klass == 'section':
                articles = list(item)
                if not articles: continue
                spine_item(item).is_section_start = True
                for i, article in enumerate(articles):
                    si = spine_item(article)
                    si.is_article_start = True
        items = list(self.oeb.spine)
        in_sec = in_art = False
        for i, item in enumerate(items):
            try:
                prev_item = items[i-1]
            except:
                prev_item = None
            if in_art and item.is_article_start == True:
                prev_item.is_article_end = True
                in_art = False
            if in_sec and item.is_section_start == True:
                prev_item.is_section_end = True
                in_sec = False
            if item.is_section_start: in_sec = True
            if item.is_article_start: in_art = True
        item.is_section_end = item.is_article_end = True
    def __call__(self):
        '''
        Return the document serialized as a single UTF-8 encoded bytestring.
@ -138,11 +182,12 @@ class Serializer(object):
        buf = self.buf
        self.anchor_offset = buf.tell()
        buf.write(b'<body>')
-        self.anchor_offset_kindle = buf.tell()
+        self.body_start_offset = buf.tell()
        spine = [item for item in self.oeb.spine if item.linear]
        spine.extend([item for item in self.oeb.spine if not item.linear])
        for item in spine:
            self.serialize_item(item)
        self.body_end_offset = buf.tell()
        buf.write(b'</body>')
    def serialize_item(self, item):
@ -154,14 +199,20 @@ class Serializer(object):
        if not item.linear:
            self.breaks.append(buf.tell() - 1)
        self.id_offsets[urlnormalize(item.href)] = buf.tell()
-        # Kindle periodical articles are contained in a <div> tag
+        if item.is_section_start:
            buf.write(b'<div>')
        if item.is_article_start:
            buf.write(b'<div>')
        for elem in item.data.find(XHTML('body')):
            self.serialize_elem(elem, item)
        if item.is_article_end:
            # Kindle periodical article end marker
            buf.write(b'<div></div>')
        if self.write_page_breaks_after_item:
            buf.write(b'<mbp:pagebreak/>')
        if item.is_article_end:
            buf.write(b'</div>')
        if item.is_section_end:
            buf.write(b'</div>')
        self.anchor_offset = None
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -1680,8 +1680,15 @@ class TOC(object):
                return True
        return False
-    def iterdescendants(self):
+    def iterdescendants(self, breadth_first=False):
        """Iterate over all descendant nodes in depth-first order."""
        if breadth_first:
            for child in self.nodes:
                yield child
            for child in self.nodes:
                for node in child.iterdescendants(breadth_first=True):
                    yield node
        else:
            for child in self.nodes:
                for node in child.iter():
                    yield node
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@ -165,6 +165,7 @@ class PDFWriter(QObject): # {{{
            printer = get_pdf_printer(self.opts)
            printer.setOutputFileName(item_path)
            self.view.print_(printer)
            printer.abort()
        self._render_book()
    def _delete_tmpdir(self):
@ -186,6 +187,7 @@ class PDFWriter(QObject): # {{{
            draw_image_page(printer, painter, p,
                    preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
            painter.end()
        printer.abort()
    def _write(self):
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@ -8,7 +8,8 @@ from functools import partial
 from PyQt4.Qt import QThread, QObject, Qt, QProgressDialog, pyqtSignal, QTimer
 from calibre.gui2.dialogs.progress import ProgressDialog
-from calibre.gui2 import question_dialog, error_dialog, info_dialog, gprefs
+from calibre.gui2 import (question_dialog, error_dialog, info_dialog, gprefs,
        warning_dialog)
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.metadata import MetaInformation
 from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
@ -275,6 +276,24 @@ class Adder(QObject): # {{{
                    _('No books found'), show=True)
            return self.canceled()
        books = [[b] if isinstance(b, basestring) else b for b in books]
        restricted = set()
        for i in xrange(len(books)):
            files = books[i]
            restrictedi = set(f for f in files if not os.access(f, os.R_OK))
            if restrictedi:
                files = [f for f in files if os.access(f, os.R_OK)]
                books[i] = files
            restricted |= restrictedi
        if restrictedi:
            det_msg = u'\n'.join(restrictedi)
            warning_dialog(self.pd, _('No permission'),
                    _('Cannot add some files as you do not have '
                        ' permission to access them. Click Show'
                        ' Details to see the list of such files.'),
                    det_msg=det_msg, show=True)
        books = list(filter(None, books))
        if not books:
            return self.canceled()
        self.rfind = None
        from calibre.ebooks.metadata.worker import read_metadata
        self.rq = Queue()
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -133,6 +133,7 @@ def render_data(mi, use_roman_numbers=True, all_fields=False):
            authors = []
            formatter = EvalFormatter()
            for aut in mi.authors:
                link = ''
                if mi.author_link_map[aut]:
                    link = mi.author_link_map[aut]
                elif gprefs.get('default_author_link'):
--- a/src/calibre/gui2/convert/mobi_output.py
+++ b/src/calibre/gui2/convert/mobi_output.py
@ -25,7 +25,8 @@ class PluginWidget(Widget, Ui_Form):
        Widget.__init__(self, parent,
                ['prefer_author_sort', 'rescale_images', 'toc_title',
                    'mobi_ignore_margins', 'mobi_toc_at_start',
-                'dont_compress', 'no_inline_toc', 'masthead_font','personal_doc']
+                'dont_compress', 'no_inline_toc',
                'masthead_font','personal_doc', 'mobi_navpoints_only_deepest']
                )
        from calibre.utils.fonts import fontconfig
        self.db, self.book_id = db, book_id
--- a/src/calibre/gui2/convert/mobi_output.ui
+++ b/src/calibre/gui2/convert/mobi_output.ui
@ -55,7 +55,7 @@
     </property>
    </widget>
   </item>
-   <item row="8" column="0" colspan="2">
+   <item row="9" column="0" colspan="2">
    <widget class="QGroupBox" name="groupBox">
     <property name="title">
      <string>Kindle options</string>
@ -101,7 +101,7 @@
     </layout>
    </widget>
   </item>
-   <item row="9" column="0">
+   <item row="10" column="0">
    <spacer name="verticalSpacer_2">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -128,6 +128,13 @@
     </property>
    </widget>
   </item>
   <item row="7" column="0" colspan="2">
    <widget class="QCheckBox" name="opt_mobi_navpoints_only_deepest">
     <property name="text">
      <string>Use only &amp;lowest level of items in the TOC for chapter-to-chapter navigation</string>
     </property>
    </widget>
   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/cover_flow.py
+++ b/src/calibre/gui2/cover_flow.py
@ -29,12 +29,14 @@ if pictureflow is not None:
            pictureflow.FlowImages.__init__(self)
            self.images = []
            self.captions = []
            self.subtitles = []
            for f in os.listdir(dirpath):
                f = os.path.join(dirpath, f)
                img = QImage(f)
                if not img.isNull():
                    self.images.append(img)
                    self.captions.append(os.path.basename(f))
                    self.subtitles.append('%d bytes'%os.stat(f).st_size)
        def count(self):
            return len(self.images)
@ -45,6 +47,9 @@ if pictureflow is not None:
        def caption(self, index):
            return self.captions[index]
        def subtitle(self, index):
            return self.subtitles[index]
        def currentChanged(self, index):
            print 'current changed:', index
--- a/src/calibre/gui2/dialogs/quickview.py
+++ b/src/calibre/gui2/dialogs/quickview.py
@ -183,7 +183,6 @@ class Quickview(QDialog, Ui_Quickview):
        self.items.blockSignals(False)
    def indicate_no_items(self):
        print 'no items'
        self.no_valid_items = True
        self.items.clear()
        self.items.addItem(QListWidgetItem(_('**No items found**')))
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -477,6 +477,8 @@ class BooksView(QTableView): # {{{
        # arbitrary: scroll bar + header + some
        max_width = self.width() - (self.verticalScrollBar().width() +
                                    self.verticalHeader().width() + 10)
        if max_width < 200:
            max_width = 200
        if new_size > max_width:
            self.column_header.blockSignals(True)
            self.setColumnWidth(col, max_width)
@ -567,7 +569,8 @@ class BooksView(QTableView): # {{{
        if md.hasFormat('text/uri-list') and not \
                md.hasFormat('application/calibre+from_library'):
            urls = [unicode(u.toLocalFile()) for u in md.urls()]
-            return [u for u in urls if os.path.splitext(u)[1] and os.access(u, os.R_OK)]
+            return [u for u in urls if os.path.splitext(u)[1] and
                    os.path.exists(u)]
    def drag_icon(self, cover, multiple):
        cover = cover.scaledToHeight(120, Qt.SmoothTransformation)
--- a/src/calibre/gui2/pictureflow/pictureflow.cpp
+++ b/src/calibre/gui2/pictureflow/pictureflow.cpp
@ -99,6 +99,8 @@ typedef unsigned short QRgb565;
 #define PFREAL_ONE (1 << PFREAL_SHIFT)
 #define PFREAL_HALF (PFREAL_ONE >> 1)
 #define TEXT_FLAGS (Qt::TextWordWrap|Qt::TextWrapAnywhere|Qt::TextHideMnemonic|Qt::AlignCenter)
 inline PFreal fmul(PFreal a, PFreal b)
 {
  return ((long long)(a))*((long long)(b)) >> PFREAL_SHIFT;
@ -401,6 +403,7 @@ private:
  QImage* surface(int slideIndex);
  void triggerRender();
  void resetSlides();
  void render_text(QPainter*, int);
 };
 PictureFlowPrivate::PictureFlowPrivate(PictureFlow* w, int queueLength_)
@ -663,6 +666,34 @@ void PictureFlowPrivate::triggerRender()
  triggerTimer.start();
 }
 void PictureFlowPrivate::render_text(QPainter *painter, int index) {
    QRect brect, brect2;
    int buffer_width, buffer_height;
    QString caption, subtitle;
    caption = slideImages->caption(index);
    subtitle = slideImages->subtitle(index);
    buffer_width = buffer.width(); buffer_height = buffer.height();
    brect = painter->boundingRect(QRect(0, 0, buffer_width, fontSize), TEXT_FLAGS, caption);
    brect2 = painter->boundingRect(QRect(0, 0, buffer_width, fontSize), TEXT_FLAGS, subtitle);
    // So that if there is no subtitle, the caption is not flush with the bottom
    if (brect2.height() < fontSize) brect2.setHeight(fontSize);
    // So that the text does not occupy more than the lower half of the buffer
    if (brect.height() > ((int)(buffer.height()/3.0)) - fontSize*2)
        brect.setHeight(((int)buffer.height()/3.0) - fontSize*2);
    brect.moveTop(buffer_height - (brect.height() + brect2.height()));
    //printf("top: %d, height: %d\n", brect.top(), brect.height());
    //
    painter->drawText(brect, TEXT_FLAGS, caption);
    brect2.moveTop(buffer_height - brect2.height());
    painter->drawText(brect2, TEXT_FLAGS, slideImages->subtitle(index));
 }
 // Render the slides. Updates only the offscreen buffer.
 void PictureFlowPrivate::render()
 {
@ -708,10 +739,7 @@ void PictureFlowPrivate::render()
    //painter.setPen(QColor(255,255,255,127));
    if (centerIndex < slideCount() && centerIndex > -1) { 
-    	painter.drawText( QRect(0,0, buffer.width(), buffer.height()*2-fontSize*4),
+        render_text(&painter, centerIndex);
                      Qt::AlignCenter, slideImages->caption(centerIndex));
    	painter.drawText( QRect(0,0, buffer.width(), buffer.height()*2-fontSize*2),
                      Qt::AlignCenter, slideImages->subtitle(centerIndex));
    }
    painter.end();
@ -764,20 +792,12 @@ void PictureFlowPrivate::render()
    painter.setPen(QColor(255,255,255, (255-fade) ));
    if (leftTextIndex < sc && leftTextIndex > -1) {
-    	painter.drawText( QRect(0,0, buffer.width(), buffer.height()*2 - fontSize*4),
+        render_text(&painter, leftTextIndex);
                      Qt::AlignCenter, slideImages->caption(leftTextIndex));
    	painter.drawText( QRect(0,0, buffer.width(), buffer.height()*2 - fontSize*2),
                      Qt::AlignCenter, slideImages->subtitle(leftTextIndex));
    }
    painter.setPen(QColor(255,255,255, fade));
    if (leftTextIndex+1 < sc && leftTextIndex > -2) {
-    	painter.drawText( QRect(0,0, buffer.width(), buffer.height()*2 - fontSize*4),
+        render_text(&painter, leftTextIndex+1);
                      Qt::AlignCenter, slideImages->caption(leftTextIndex+1));
    	painter.drawText( QRect(0,0, buffer.width(), buffer.height()*2 - fontSize*2),
                      Qt::AlignCenter, slideImages->subtitle(leftTextIndex+1));
    }
    painter.end();
--- a/src/calibre/gui2/store/init.py
+++ b/src/calibre/gui2/store/init.py
@ -6,6 +6,8 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.utils.filenames import ascii_filename
 class StorePlugin(object): # {{{
    '''
    A plugin representing an online ebook repository (store). The store can
@ -53,7 +55,7 @@ class StorePlugin(object): # {{{
        self.gui = gui
        self.name = name
        self.base_plugin = None
-        self.config = JSONConfig('store/stores/' + self.name)
+        self.config = JSONConfig('store/stores/' + ascii_filename(self.name))
    def open(self, gui, parent=None, detail_item=None, external=False):
        '''
--- a/src/calibre/gui2/store/search/models.py
+++ b/src/calibre/gui2/store/search/models.py
@ -22,11 +22,15 @@ from calibre.utils.icu import sort_key
 from calibre.utils.search_query_parser import SearchQueryParser
 def comparable_price(text):
-    text = re.sub(r'[^0-9.,]', '', text)
+    # this keep thousand and fraction separators
-    if len(text) < 3 or text[-3] not in ('.', ','):
+    match = re.search(r'(?:\d|[,.](?=\d))(?:\d*(?:[,.\' ](?=\d))?)+', text)
-        text += '00'
+    if match:
-    text = re.sub(r'\D', '', text)
+        # replace all separators with '.'
-    text = text.rjust(6, '0')
+        m = re.sub(r'[.,\' ]', '.', match.group())
        # remove all separators accept fraction, 
        # leave only 2 digits in fraction
        m = re.sub(r'\.(?!\d*$)', r'', m)
        text = '{0:0>8.0f}'.format(float(m) * 100.)
    return text  
@ -334,6 +338,11 @@ class SearchFilter(SearchQueryParser):
        }
        for x in ('author', 'download', 'format'):
            q[x+'s'] = q[x]
        # make the price in query the same format as result
        if location == 'price':
            query = comparable_price(query)
        for sr in self.srs:
            for locvalue in locations:
                accessor = q[locvalue]
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -6,7 +6,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import re, urllib
+import urllib
 from contextlib import closing
 from lxml import html
@ -45,24 +45,26 @@ class AmazonDEKindleStore(StorePlugin):
            doc = html.fromstring(f.read())
            # Amazon has two results pages.
-            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
+            # 20110725: seems that is_shot is gone.
-            # Horizontal grid of books.
+#            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
-            if is_shot:
+#            # Horizontal grid of books.
-                data_xpath = '//div[contains(@class, "result")]'
+#            if is_shot:
-                format_xpath = './/div[@class="productTitle"]/text()'
+#                data_xpath = '//div[contains(@class, "result")]'
-                cover_xpath = './/div[@class="productTitle"]//img/@src'
+#                format_xpath = './/div[@class="productTitle"]/text()'
-            # Vertical list of books.
+#                cover_xpath = './/div[@class="productTitle"]//img/@src'
-            else:
+#            # Vertical list of books.
-                data_xpath = '//div[@class="productData"]'
+#            else:
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
-                cover_xpath = '../div[@class="productImage"]/a/img/@src'
+            cover_xpath = './/img[@class="productImage"]/@src'
 # end is_shot else
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
-                # put in results for non Kindle books (author pages). Se we need
+                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
@ -71,27 +73,17 @@ class AmazonDEKindleStore(StorePlugin):
                # We must have an asin otherwise we can't easily reference the
                # book later.
-                asin_href = None
+                asin = ''.join(data.xpath("@name"))
                asin_a = data.xpath('.//div[@class="productTitle"]/a[1]')
                if asin_a:
                    asin_href = asin_a[0].get('href', '')
                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
                    if m:
                        asin = m.group('asin')
                    else:
                        continue
                else:
                    continue
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
+                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
-                if is_shot:
+#                if is_shot:
-                    author = format.split(' von ')[-1]
+#                    author = format.split(' von ')[-1]
-                else:
+#                else:
-                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
+                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
                author = author.split('von ')[-1]
                counter -= 1
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -42,48 +42,55 @@ class AmazonUKKindleStore(StorePlugin):
            doc = html.fromstring(f.read())
            # Amazon has two results pages.
-            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
+            # 20110725: seems that is_shot is gone.
-            # Horizontal grid of books.
+#            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
-            if is_shot:
+#            # Horizontal grid of books.
-                data_xpath = '//div[contains(@class, "result")]'
+#            if is_shot:
-                cover_xpath = './/div[@class="productTitle"]//img/@src'
+#                data_xpath = '//div[contains(@class, "result")]'
-            # Vertical list of books.
+#                format_xpath = './/div[@class="productTitle"]/text()'
-            else:
+#                cover_xpath = './/div[@class="productTitle"]//img/@src'
-                data_xpath = '//div[contains(@class, "product")]'
+#            # Vertical list of books.
-                cover_xpath = './div[@class="productImage"]/a/img/@src'
+#            else:
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
 # end is_shot else
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
-                asin = ''.join(data.xpath('./@name'))
+                asin = ''.join(data.xpath("@name"))
-                if not asin:
+
                    continue
                cover_url = ''.join(data.xpath(cover_xpath))
-                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
+                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
 #                if is_shot:
 #                    author = format.split(' von ')[-1]
 #                else:
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
                author = author.split('by ')[-1]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = ''
                if is_shot:
                    # Amazon UK does not include the author on the grid layout
                    s.author = ''
                    self.get_details(s, timeout)
                    if s.formats != 'Kindle':
                        continue
                else:
                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
                    s.author = author.split(' by ')[-1].strip()
                s.formats = 'Kindle'
                yield s
--- a/src/calibre/gui2/store/stores/chitanka_plugin.py
+++ b/src/calibre/gui2/store/stores/chitanka_plugin.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, Alex Stanev <alex@stanev.org>'
 __docformat__ = 'restructuredtext en'
 import re
 import urllib
 from contextlib import closing
@ -55,36 +54,21 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
                if counter <= 0:
                    break
-                id = ''.join(data.xpath('.//a[@class="booklink"]/@href'))
+                id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip()
                if not id:
                    continue
                cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src'))
                title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()'))
                author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()'))
                fb2 = ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href'))
                epub = ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href'))
                txt = ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href'))
                # remove .zip extensions
                if fb2.find('.zip') != -1:
                    fb2 = fb2[:fb2.find('.zip')]
                if epub.find('.zip') != -1:
                    epub = epub[:epub.find('.zip')]
                if txt.find('.zip') != -1:
                    txt = txt[:txt.find('.zip')]
                counter -= 1
                s = SearchResult()
-                s.cover_url = cover_url
+                s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip()
-                s.title = title.strip()
+                s.title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip()
-                s.author = author.strip()
+                s.author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip()
-                s.detail_item = id.strip()
+                s.detail_item = id
                s.drm = SearchResult.DRM_UNLOCKED
-                s.downloads['FB2'] = base_url + fb2.strip()
+                s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '')
-                s.downloads['EPUB'] = base_url + epub.strip()
+                s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '')
-                s.downloads['TXT'] = base_url + txt.strip()
+                s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '')
                s.formats = 'FB2, EPUB, TXT, SFB'
                yield s
@ -106,35 +90,20 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
                    if counter <= 0:
                        break
-                    id = ''.join(data.xpath('.//a[@class="booklink"]/@href'))
+                    id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip()
                    if not id:
                        continue
                    cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src'))
                    title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()'))
                    author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()'))
                    fb2 = ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href'))
                    epub = ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href'))
                    txt = ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href'))
                    # remove .zip extensions
                    if fb2.find('.zip') != -1:
                        fb2 = fb2[:fb2.find('.zip')]
                    if epub.find('.zip') != -1:
                        epub = epub[:epub.find('.zip')]
                    if txt.find('.zip') != -1:
                        txt = txt[:txt.find('.zip')]
                    counter -= 1
                    s = SearchResult()
-                    s.cover_url = cover_url
+                    s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip()
-                    s.title = title.strip()
+                    s.title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip()
-                    s.author = author.strip()
+                    s.author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip()
-                    s.detail_item = id.strip()
+                    s.detail_item = id
                    s.drm = SearchResult.DRM_UNLOCKED
-                    s.downloads['FB2'] = base_url + fb2.strip()
+                    s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '')
-                    s.downloads['EPUB'] = base_url + epub.strip()
+                    s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '')
-                    s.downloads['TXT'] = base_url + txt.strip()
+                    s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '')
                    s.formats = 'FB2, EPUB, TXT, SFB'
                    yield s
--- a/src/calibre/gui2/store/stores/eknigi_plugin.py
+++ b/src/calibre/gui2/store/stores/eknigi_plugin.py
@ -0,0 +1,88 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, Alex Stanev <alex@stanev.org>'
 __docformat__ = 'restructuredtext en'
 import random
 import urllib2
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser, url_slash_cleaner
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class eKnigiStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        # Use Kovid's affiliate id 30% of the time
        if random.randint(1, 10) in (1, 2, 3):
            aff_suffix = '&amigosid=23'
        else:
            aff_suffix = '&amigosid=22'
        url = 'http://e-knigi.net/?' + aff_suffix[1:]
        if external or self.config.get('open_external', False):
            if detail_item:
                url = detail_item + aff_suffix
            open_url(QUrl(url_slash_cleaner(url)))
        else:
            detail_url = None
            if detail_item:
                url = detail_item + aff_suffix
            d = WebStoreDialog(self.gui, url, parent, detail_url)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
        base_url = 'http://e-knigi.net'
        url = base_url + '/virtuemart?page=shop.browse&search_category=0&search_limiter=anywhere&limitstart=0&limit=' + str(max_results) + '&keyword=' + urllib2.quote(query)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            # if the store finds only one product, it opens directly detail view
            for data in doc.xpath('//div[@class="prod_details"]'):
                s = SearchResult()
                s.cover_url = ''.join(data.xpath('.//div[@class="vm_main_info clearfix"]/div[@class="lf"]/a/img/@src')).strip()
                s.title = ''.join(data.xpath('.//div[@class="vm_main_info clearfix"]/div[@class="lf"]/a/img/@alt')).strip()
                s.author = ''.join(data.xpath('.//div[@class="td_bg clearfix"]/div[@class="gk_product_tab"]/div/table/tr[3]/td[2]/text()')).strip()
                s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip()
                s.detail_item = url
                s.drm = SearchResult.DRM_UNLOCKED
                yield s
                return
            # search in store results
            for data in doc.xpath('//div[@class="browseProductContainer"]'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('.//a[1]/@href')).strip()
                if not id:
                    continue
                counter -= 1
                s = SearchResult()
                s.cover_url = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@src')).strip()
                s.title = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@title')).strip()
                s.author = ''.join(data.xpath('.//div[@style="float:left;width:90%"]/b/text()')).strip().replace('Автор: ', '')
                s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip()
                s.detail_item = base_url + id
                s.drm = SearchResult.DRM_UNLOCKED
                yield s
--- a/src/calibre/gui2/store/stores/epubbud_plugin.py
+++ b/src/calibre/gui2/store/stores/epubbud_plugin.py
@ -1,27 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
 from calibre.gui2.store.search_result import SearchResult
 class EpubBudStore(BasicStoreConfig, OpenSearchOPDSStore):
    open_search_url = 'http://www.epubbud.com/feeds/opensearch.xml'
    web_url = 'http://www.epubbud.com/'
    # http://www.epubbud.com/feeds/catalog.atom
    def search(self, query, max_results=10, timeout=60):
        for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
            s.price = '$0.00'
            s.drm = SearchResult.DRM_UNLOCKED
            s.formats = 'EPUB'
            # Download links are broken for this store.
            s.downloads = {}
            yield s
--- a/src/calibre/gui2/store/stores/epubbuy_de_plugin.py
+++ b/src/calibre/gui2/store/stores/epubbuy_de_plugin.py
@ -1,80 +0,0 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import urllib2
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class EPubBuyDEStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        url = 'http://klick.affiliwelt.net/klick.php?bannerid=47653&pid=32307&prid=2627'
        url_details = ('http://klick.affiliwelt.net/klick.php?bannerid=47653'
                       '&pid=32307&prid=2627&prodid={0}')
        if external or self.config.get('open_external', False):
            if detail_item:
                url = url_details.format(detail_item)
            open_url(QUrl(url))
        else:
            detail_url = None
            if detail_item:
                detail_url = url_details.format(detail_item)
            d = WebStoreDialog(self.gui, url, parent, detail_url)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.epubbuy.com/search.php?search_query=' + urllib2.quote(query)
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[contains(@class, "ajax_block_product")]'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('./div[@class="center_block"]'
                                        '/p[contains(text(), "artnr:")]/text()')).strip()
                if not id:
                    continue
                id = id[6:].strip()
                if not id:
                    continue
                cover_url = ''.join(data.xpath('./div[@class="center_block"]'
                                               '/a[@class="product_img_link"]/img/@src'))
                if cover_url:
                    cover_url = 'http://www.epubbuy.com' + cover_url
                title = ''.join(data.xpath('./div[@class="center_block"]'
                                           '/a[@class="product_img_link"]/@title'))
                author = ''.join(data.xpath('./div[@class="center_block"]/a[2]/text()'))
                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id
                s.formats = 'ePub'
                yield s
--- a/src/calibre/gui2/store/stores/google_books_plugin.py
+++ b/src/calibre/gui2/store/stores/google_books_plugin.py
@ -6,6 +6,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import random
 import urllib
 from contextlib import closing
@ -23,7 +24,24 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class GoogleBooksStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
-        url = 'http://books.google.com/'
+        aff_id = {
            'lid': '41000000033185143',
            'pubid': '21000000000352219',
            'ganpub': 'k352219',
            'ganclk': 'GOOG_1335334761',
        }
        # Use Kovid's affiliate id 30% of the time.
        if random.randint(1, 10) in (1, 2, 3):
            aff_id = {
                'lid': '41000000031855266',
                'pubid': '21000000000352583',
                'ganpub': 'k352583',
                'ganclk': 'GOOG_1335335464',
            }
        url = 'http://gan.doubleclick.net/gan_click?lid=%(lid)s&pubid=%(pubid)s' % aff_id
        if detail_item:
            detail_item += '&ganpub=%(ganpub)s&ganclk=%(ganclk)s' % aff_id
        if external or self.config.get('open_external', False):
            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
--- a/src/calibre/gui2/store/stores/libri_de_plugin.py
+++ b/src/calibre/gui2/store/stores/libri_de_plugin.py
@ -24,7 +24,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
    def open(self, parent=None, detail_item=None, external=False):
        url = 'http://ad.zanox.com/ppc/?18817073C15644254T'
-        url_details = ('http://ad.zanox.com/ppc/?18845780C1371495675T&ULP=[['
+        url_details = ('http://ad.zanox.com/ppc/?18848208C1197627693T&ULP=[['
                       'http://www.libri.de/shop/action/productDetails?artiId={0}]]')
        if external or self.config.get('open_external', False):
--- a/src/calibre/gui2/store/stores/ozon_ru_plugin.py
+++ b/src/calibre/gui2/store/stores/ozon_ru_plugin.py
@ -0,0 +1,126 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
 __docformat__ = 'restructuredtext en'
 import random
 import re
 import urllib2
 from contextlib import closing
 from lxml import etree, html
 from PyQt4.Qt import QUrl
 from calibre import browser, url_slash_cleaner
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class OzonRUStore(BasicStoreConfig, StorePlugin):
    shop_url = 'http://www.ozon.ru'
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = '?partner=romuk'
        # Use Kovid's affiliate id 30% of the time.
        if random.randint(1, 10) in (1, 2, 3):
            aff_id = '?partner=kovidgoyal'
        url = self.shop_url + aff_id
        detail_url = None
        if detail_item:
            # http://www.ozon.ru/context/detail/id/3037277/
            detail_url = self.shop_url + '/context/detail/id/' + urllib2.quote(detail_item) + aff_id
        if external or self.config.get('open_external', False):
            open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
        else:
            d = WebStoreDialog(self.gui, url, parent, detail_url)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()        
    def search(self, query, max_results=10, timeout=60):
        search_url = self.shop_url + '/webservice/webservice.asmx/SearchWebService?'\
                    'searchText=%s&searchContext=ebook' % urllib2.quote(query)
        counter = max_results
        br = browser()
        with closing(br.open(search_url, timeout=timeout)) as f:
            raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
            doc = etree.fromstring(raw)
            for data in doc.xpath('//*[local-name() = "SearchItems"]'):
                if counter <= 0:
                    break
                counter -= 1
                xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
                s = SearchResult()
                s.detail_item = data.xpath(xp_template.format('ID'))
                s.title = data.xpath(xp_template.format('Name'))
                s.author = data.xpath(xp_template.format('Author'))
                s.price = data.xpath(xp_template.format('Price'))
                s.cover_url = data.xpath(xp_template.format('Picture'))
                if re.match("^\d+?\.\d+?$", s.price):
                    s.price = u'{:.2F} руб.'.format(float(s.price))
                yield s
    def get_details(self, search_result, timeout=60):
        url = self.shop_url + '/context/detail/id/' + urllib2.quote(search_result.detail_item)
        br = browser()
        result = False
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            # example where we are going to find formats
            # <div class="box">
            # ...
            #     <b>Доступные&nbsp;форматы:</b>
            #     <div class="vertpadd">.epub, .fb2, .pdf, .pdf, .txt</div>
            # ...
            # </div>
            xpt = u'normalize-space(//div[@class="box"]//*[contains(normalize-space(text()), "Доступные форматы:")][1]/following-sibling::div[1]/text())'
            formats = doc.xpath(xpt)
            if formats:
                result = True
                search_result.drm = SearchResult.DRM_UNLOCKED
                search_result.formats = ', '.join(_parse_ebook_formats(formats))
                # unfortunately no direct links to download books (only buy link)
                # search_result.downloads['BF2'] = self.shop_url + '/order/digitalorder.aspx?id=' + + urllib2.quote(search_result.detail_item)
        return result
 def _parse_ebook_formats(formatsStr):
    '''
    Creates a list with displayable names of the formats
    :param formatsStr: string with comma separated book formats 
           as it provided by ozon.ru
    :return: a list with displayable book formats
    '''
    formatsUnstruct = formatsStr.lower()
    formats = []
    if 'epub' in formatsUnstruct:
        formats.append('ePub')
    if 'pdf' in formatsUnstruct:
        formats.append('PDF')
    if 'fb2' in formatsUnstruct:
        formats.append('FB2')
    if 'rtf' in formatsUnstruct:
        formats.append('RTF')
    if 'txt' in formatsUnstruct:
        formats.append('TXT')
    if 'djvu' in formatsUnstruct:
        formats.append('DjVu')
    if 'doc' in formatsUnstruct:
        formats.append('DOC')
    return formats
--- a/src/calibre/gui2/update.py
+++ b/src/calibre/gui2/update.py
@ -15,6 +15,7 @@ from calibre.gui2 import config, dynamic, open_url
 from calibre.gui2.dialogs.plugin_updater import get_plugin_updates_available
 URL = 'http://status.calibre-ebook.com/latest'
 #URL = 'http://localhost:8000/latest'
 NO_CALIBRE_UPDATE = '-0.0.0'
 VSEP = '|'
--- a/src/calibre/library/check_library.py
+++ b/src/calibre/library/check_library.py
@ -150,6 +150,8 @@ class CheckLibrary(object):
        if not ext:
            return False
        ext = ext[1:].lower()
        if ext.startswith('original_'):
            ext = ext[len('original_'):]
        if ext in EBOOK_EXTENSIONS:
            return True
        return False
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1892,6 +1892,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                            yield r[iindex]
    def get_next_series_num_for(self, series):
        series_id = None
        if series:
            series_id = self.conn.get('SELECT id from series WHERE name=?',
                (series,), all=False)
        if series_id is None:
@ -3023,8 +3025,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        stream.seek(0)
        mi = get_metadata(stream, format, use_libprs_metadata=False)
        stream.seek(0)
-        if not mi.series_index:
+        if mi.series_index is None:
-            mi.series_index = 1.0
+            mi.series_index = self.get_next_series_num_for(mi.series)
        mi.tags = [_('News')]
        if arg['add_title_tag']:
            mi.tags += [arg['title']]
@ -3076,7 +3078,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self._add_newbook_tag(mi)
        if not add_duplicates and self.has_book(mi):
            return None
-        series_index = 1.0 if mi.series_index is None else mi.series_index
+        series_index = self.get_next_series_num_for(mi.series) \
                    if mi.series_index is None else mi.series_index
        aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors)
        title = mi.title
        if isbytestring(aus):
@ -3123,7 +3126,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            if not add_duplicates and self.has_book(mi):
                duplicates.append((path, format, mi))
                continue
-            series_index = 1.0 if mi.series_index is None else mi.series_index
+            series_index = self.get_next_series_num_for(mi.series) \
                            if mi.series_index is None else mi.series_index
            aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors)
            title = mi.title
            if isinstance(aus, str):
@ -3157,7 +3161,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    def import_book(self, mi, formats, notify=True, import_hooks=True,
            apply_import_tags=True, preserve_uuid=False):
-        series_index = 1.0 if mi.series_index is None else mi.series_index
+        series_index = self.get_next_series_num_for(mi.series) \
                        if mi.series_index is None else mi.series_index
        if apply_import_tags:
            self._add_newbook_tag(mi)
        if not mi.title:
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@ -110,8 +110,9 @@ def config(defaults=None):
                'saving, depending on how well the filesystem you are saving '
                'to supports unicode.'))
    x('timefmt', default='%b, %Y',
-            help=_('The format in which to display dates. %d - day, %b - month, '
+            help=_('The format in which to display dates. %(day)s - day,'
-                '%Y - year. Default is: %b, %Y'))
+                ' %(month)s - month, %(year)s - year. Default is: %(default)s'
                )%dict(day='%d', month='%b', year='%Y', default='%b, %Y'))
    x('send_timefmt', default='%b, %Y',
            help=_('The format in which to display dates. %(day)s - day,'
                ' %(month)s - month, %(year)s - year. Default is: %(default)s'
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -10,13 +10,14 @@ import re, os, posixpath
 import cherrypy
 from calibre import fit_image, guess_type
-from calibre.utils.date import fromtimestamp, utcnow
+from calibre.utils.date import fromtimestamp
 from calibre.library.caches import SortKeyGenerator
 from calibre.library.save_to_disk import find_plugboard
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.magick.draw import (save_cover_data_to, Image,
        thumbnail as generate_thumbnail)
 from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 plugboard_content_server_value = 'content_server'
 plugboard_content_server_formats = ['epub']
@ -32,7 +33,7 @@ class CSSortKeyGenerator(SortKeyGenerator):
 class ContentServer(object):
    '''
-    Handles actually serving content files/covers. Also has
+    Handles actually serving content files/covers/metadata. Also has
    a few utility methods.
    '''
@ -68,9 +69,8 @@ class ContentServer(object):
    # }}}
    def get(self, what, id):
-        'Serves files, covers, thumbnails from the calibre database'
+        'Serves files, covers, thumbnails, metadata from the calibre database'
        try:
            id = int(id)
        except ValueError:
@ -90,6 +90,8 @@ class ContentServer(object):
                    thumb_height=height)
        if what == 'cover':
            return self.get_cover(id)
        if what == 'opf':
            return self.get_metadata_as_opf(id)
        return self.get_format(id, what)
    def static(self, name):
@ -180,6 +182,17 @@ class ContentServer(object):
            cherrypy.log.error(traceback.print_exc())
            raise cherrypy.HTTPError(404, 'Failed to generate cover: %r'%err)
    def get_metadata_as_opf(self, id_):
        cherrypy.response.headers['Content-Type'] = \
                'application/oebps-package+xml; charset=UTF-8'
        mi = self.db.get_metadata(id_, index_is_id=True)
        data = metadata_to_opf(mi)
        cherrypy.response.timeout = 3600
        cherrypy.response.headers['Last-Modified'] = \
                self.last_modified(mi.last_modified)
        return data
    def get_format(self, id, format):
        format = format.upper()
        fmt = self.db.format(id, format, index_is_id=True, as_file=True,
@ -217,7 +230,8 @@ class ContentServer(object):
        cherrypy.response.headers['Content-Disposition'] = \
                b'attachment; filename="%s"'%fname
        cherrypy.response.timeout = 3600
-        cherrypy.response.headers['Last-Modified'] = self.last_modified(utcnow())
+        cherrypy.response.headers['Last-Modified'] = \
            self.last_modified(self.db.format_last_modified(id, format))
        return fmt
    # }}}
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@ -17,7 +17,7 @@ from datetime import datetime
 from functools import partial
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
-from calibre.utils.date import parse_date, isoformat, local_tz
+from calibre.utils.date import parse_date, isoformat, local_tz, UNDEFINED_DATE
 from calibre import isbytestring, force_unicode
 from calibre.constants import iswindows, DEBUG, plugins
 from calibre.utils.icu import strcmp
@ -39,8 +39,11 @@ def _c_convert_timestamp(val):
    if ret is None:
        return parse_date(val, as_utc=False)
    year, month, day, hour, minutes, seconds, tzsecs = ret
    try:
        return datetime(year, month, day, hour, minutes, seconds,
                tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
    except OverflowError:
        return UNDEFINED_DATE.astimezone(local_tz)
 def _py_convert_timestamp(val):
    if val:
--- a/src/calibre/library/sqlite_custom.c
+++ b/src/calibre/library/sqlite_custom.c
@ -45,7 +45,7 @@ static void sort_concat_step(sqlite3_context *context, int argc, sqlite3_value *
    }
    if (list->count == list->length) {
-        list->vals = (SortConcatItem**)realloc(list->vals, list->length + 100);
+        list->vals = (SortConcatItem**)realloc(list->vals, sizeof(SortConcatItem*)*(list->length + 100));
        if (list->vals == NULL) return;
        list->length = list->length + 100;
    }
@ -122,7 +122,6 @@ static void sort_concat_finalize(sqlite3_context *context) {
        free(ans);
        sort_concat_free(list);
    }
 }
 static void sort_concat_finalize2(sqlite3_context *context) {
@ -190,7 +189,7 @@ static void identifiers_concat_step(sqlite3_context *context, int argc, sqlite3_
    }
    if (list->count == list->length) {
-        list->vals = (IdentifiersConcatItem**)realloc(list->vals, list->length + 100);
+        list->vals = (IdentifiersConcatItem**)realloc(list->vals, sizeof(IdentifiersConcatItem*)*(list->length + 100));
        if (list->vals == NULL) return;
        list->length = list->length + 100;
    }
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@ -401,7 +401,7 @@ with undefined values in the column. Searching for ``true`` will find all books
 values in the column. Searching for ``yes`` or ``checked`` will find all books with ``Yes`` in the column.
 Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column.
-Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.C`, but not the tag `A`.
+Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.B.C`, but not the tag `A`.
 Identifiers (e.g., isbn, doi, lccn etc) also use an extended syntax. First, note that an identifier has the form ``type:value``, as in ``isbn:123456789``. The extended syntax permits you to specify independently which type and value to search for. Both the type and the value parts of the query can use `equality`, `contains`, or `regular expression` matches. Examples:
--- a/src/calibre/manual/metadata.rst
+++ b/src/calibre/manual/metadata.rst
@ -72,7 +72,7 @@ Regular expression mode has some differences from character mode, beyond (of cou
 The third and most important is that the replace string can make reference to parts of the search string by using backreferences. A backreference is ``\\n`` where n is an integer that refers to the n'th parenthesized group in the search expression. For example, given the same example as above, `a bad cat`, a search expression `a (...) (...)`, and a replace expression `a \\2 \\1`, the result will be `a cat bad`. Please see the :ref:`regexptutorial` for more information on backreferences.
-One useful pattern: assume you want to change the case of an entire field. The easiest way to do this is to use character mode, but lets further assume you want to use regular expression mode. The search expression should be `(.*)` the replace expression should be `\1`, and the desired case change function should be selected.
+One useful pattern: assume you want to change the case of an entire field. The easiest way to do this is to use character mode, but lets further assume you want to use regular expression mode. The search expression should be `(.*)` the replace expression should be `\\1`, and the desired case change function should be selected.
 Finally, in regular expression mode you can copy values from one field to another. Simply make the source and destination field different. The copy can replace the destination field, prepend to the field (add to the front), or append to the field (add at the end). The 'use comma' checkbox tells |app| to (or not to) add a comma between the text and the destination field in prepend and append modes. If the destination is multiple (e.g., tags), then you cannot uncheck this box.
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/Show More
+++ b/Show More