Merge from trunk

2025-07-09 03:04:10 -04:00 · 2012-04-28 14:10:32 +02:00 · 2012-04-28 14:10:32 +02:00 · ee82fdac05
commit ee82fdac05
parent e24adc0e35 b1e35bdaf3
213 changed files with 58766 additions and 45210 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,158 @@
 #   new recipes:
 #     - title: 

+- version: 0.8.49
+  date: 2012-04-27
+
+  new features:
+    - title: "Experimental support for generating Amazon's new KF8 format MOBI files"
+      description: "calibre can now generate Amazon's new KF8 format MOBI files.
+      To turn on this feature, go to Preferences->Tweaks and click Plugin Tweaks. In the box add:
+      test_mobi_output_type = 'both'
+      calibre will now produce MOBI files that have both the old MOBI format and the new KF8 format in them. 
+      To learn more about KF8, see: http://www.amazon.com/gp/feature.html?docId=1000729511
+      Note that calibre support for KF8 is still experimental and there will likely be bugs."
+
+    - title: "Upgrade to using cssutils 0.9.9 for CSS parsing. Improved speed and robustness."
+
+    - title: "Show cover size in a tooltip in the conversion dialog"
+      tickets: [986958]
+
+    - title: "Driver for Nook Simple Touch with Glow Light"
+      tickets: [989264]
+ 
+  bug fixes:
+    - title: "Heuristics: When italicizing words do not operate on words not in between HTML tags."
+      tickets: [986298]
+
+    - title: "Fix (I hope) the bulk metadata download process crashing for some people on OS X when clicking the Yes button to apply the updates."
+      tickets: [986658] 
+
+    - title: "Fix tooltip not being updated in the book details panel when pasting in a new cover"
+      tickets: [986958]
+
+    - title: "Cover Browser: Wrap the title on space only, not in between words."
+      tickets: [986516]
+
+    - title: "Edit metadata dialog: If a permission denied error occurs when clicking the next or prev buttons, stay on the current book."
+      tickets: [986903]
+
+    - title: "Fix heuristics not removing unnecessary hyphens from the end of lines."
+      tickets: [822744]
+
+  improved recipes:
+    - Metro Nieuws NL
+    - Der Tagesspiegel
+
+  new recipes:
+    - title: Berria 
+      author: Alayn Gortazar
+
+    - title: Sol Haber 
+      author: Onur Gungor
+
+    - title: Telam
+      author: Darko Miletic
+
+    - title: Richmond Times-Dispatch
+      author: jde
+
+- version: 0.8.48
+  date: 2012-04-20
+
+  new features:
+    - title: "Conversion: The search and replace feature has been completely revamped."
+      description: "You can now use any number of search and replace
+      expression, not just three. You can also store and load frequently used
+      sets of search and replace expressions. Also, the wizard generates its
+      preview in a separate process to protect against crashes/memory leaks."
+      tickets: [983476,983484,983478]
+
+    - title: "Support for the new '.azw3' files that Amazon recently started generating. calibre will now detect them as ebooks. It can also view/convert them, if they are DRM free."
+
+    - title: "Drivers for Samsung Galaxy ACE GT-S5830L and HTC One X"
+      tickets: [981185] 
+ 
+  bug fixes:
+    - title: "Get Books: Support the new website design of Barnes & Noble"
+
+    - title: "T1 driver: Fix books sent to SD card sometimes resulting problems when deleted."
+      tickets: [943586]
+
+    - title: "Do not allow author names to be set to blank via the Manage authors function. Blank authors are now automatically set to 'Unknown'"
+
+    - title: "MOBI Output: Handle background color specified on <td> and <tr> in addition to <table> tags."
+      tickets: [980813]
+
+    - title: "MOBI Output: Fix underline style applied to parent element not getting inherited by <a> children."
+      tickets: [985711]
+
+  improved recipes:
+    - xkcd
+    - Metro Nieuws
+    - Calgary Herald
+    - Orlando Sentinel
+    - countryfile
+    - Heise
+
+  new recipes:
+    - title: Various new Polish news sources
+      author: fenuks
+
+    - title: Various Italian news sources 
+      author: faber1971
+
+    - title: Jakarta Globe 
+      author: rty
+
+    - title: Acim Bilim Dergisi
+      author: thomass
+
+- version: 0.8.47
+  date: 2012-04-13
+
+  new features:
+    - title: "Conversion pipeline: Add support for all the named entities in the HTML 5 spec."
+      tickets: [976056]
+
+    - title: "Support for viewing and converting the Haodoo PDB ebook format"
+      tickets: [976478]
+
+    - title: "Device driver for Laser EB720"
+ 
+  bug fixes:
+    - title: "Fix regression in automatic adding in 0.8.46 that broke automatic adding if adding of duplicates is enabled and auto convert is also enabled"
+      tickets: [976336]
+
+    - title: 'Fix "Tags" field in advanced search does not obey regex setting'
+      tickets: [980221]
+
+    - title: "EPUB Input: Automatically extract cover image from simple HTML title page that consists of only a single <img> tag, instead of rendering the page"
+
+    - title: "Prevent errors when both author and author_sort are used in a template for reading metadata from filenames for files on a device"
+
+    - title: "Amazon metadata download: Handle books whose titles start with a bracket."
+      tickets: [976365]
+
+    - title: "Get Books: Fix downloading of purchased books from Baen"
+      tickets: [975929]
+
+
+  improved recipes:
+    - Forbes
+    - Caros Amigos
+    - Trouw
+    - Sun UK
+    - Metro
+    - Daily Mirror
+
+  new recipes:
+    - title: "Melbourne Herald Sun"
+      author: Ray Hartley
+
+    - title: "Editoriali and Zerocalcare"
+      author: faber1971
+
 - version: 0.8.46
  date: 2012-04-06

--- a/recipes/acim_bilim_dergisi.recipe
+++ b/recipes/acim_bilim_dergisi.recipe
@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1334868409(BasicNewsRecipe):
+    title          = u'AÇIK BİLİM DERGİSİ'
+    description            =  ' Aylık çevrimiçi bilim dergisi'
+    __author__             = u'thomass'
+    oldest_article = 30
+    max_articles_per_feed = 300
+    auto_cleanup = True
+    encoding               = 'UTF-8'
+    publisher              = 'açık bilim'
+    category               = 'haber, bilim,TR,dergi'
+    language               = 'tr'
+    publication_type = 'magazine '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
+    masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
+
+
+    feeds          = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')]
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -9,6 +9,7 @@ class Adventure_zone(BasicNewsRecipe):
    no_stylesheets = True
    oldest_article = 20
    max_articles_per_feed = 100
+    index='http://www.adventure-zone.info/fusion/'
    use_embedded_content=False
    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
@ -45,6 +46,19 @@ class Adventure_zone(BasicNewsRecipe):
        skip_tag = skip_tag.findAll(name='a')
        for r in skip_tag:
           if r.strong:
-                 word=r.strong.string
-                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word)):
-                   return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
+                 word=r.strong.string.lower()
+                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word) or ('poradnik' in word)):
+                   return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
+
+    def preprocess_html(self, soup):
+        footer=soup.find(attrs={'class':'news-footer middle-border'})
+        if footer and len(footer('a'))>=2:
+            footer('a')[1].extract()
+        for item in soup.findAll(style=True):
+            del item['style']
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
+           
+            
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -68,4 +68,7 @@ class Benchmark_pl(BasicNewsRecipe):
            self.image_article(soup, soup.body)
        else:
            self.append_page(soup, soup.body)
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.INDEX + a['href']
        return soup
--- a/recipes/berria.recipe
+++ b/recipes/berria.recipe
@ -0,0 +1,44 @@
+__license__   = 'GPL v3'
+__copyright__ = '2012, Alayn Gortazar <zutoin at gmail dot com>'
+'''
+www.berria.info
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Berria(BasicNewsRecipe):
+    title                 = 'Berria'
+    __author__            = 'Alayn Gortazar'
+    description           = 'Euskal Herriko euskarazko egunkaria'
+    publisher             = 'Berria'
+    category              = 'news, politics, sports, Basque Country'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    language              = 'eu'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png'
+
+    keep_only_tags = [
+                        dict(id='goiburua'),
+                        dict(name='div', attrs={'class':['ber_ikus']}),
+                        dict(name='section', attrs={'class':'ber_ikus'})
+                     ]
+    remove_tags = [
+                        dict(name='a', attrs={'class':'iruzkinak'}),
+                        dict(name='div', attrs={'class':'laguntzaileak'})
+                  ]
+
+    extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}'
+
+    feeds = [
+              (u'Edizioa jarraia', u'http://berria.info/rss/ediziojarraia.xml'),
+              (u'Iritzia', u'http://berria.info/rss/iritzia.xml'),
+              (u'Euskal Herria', u'http://berria.info/rss/euskalherria.xml'),
+              (u'Ekonomia', u'http://berria.info/rss/ekonomia.xml'),
+              (u'Mundua', u'http://berria.info/rss/mundua.xml'),
+              (u'Kirola', u'http://berria.info/rss/kirola.xml'),
+              (u'Plaza', u'http://berria.info/rss/plaza.xml')
+            ]
--- a/recipes/calgary_herald.recipe
+++ b/recipes/calgary_herald.recipe
@ -1,220 +1,35 @@
-#!/usr/bin/env  python
-# -*- coding: utf-8 -*-
-
-__license__   = 'GPL v3'
-
-'''
-www.canada.com
-'''
-
-import re
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
-
-
-class CanWestPaper(BasicNewsRecipe):
-
-    # un-comment the following four lines for the Victoria Times Colonist
-##    title = u'Victoria Times Colonist'
-##    url_prefix = 'http://www.timescolonist.com'
-##    description = u'News from Victoria, BC'
-##    fp_tag = 'CAN_TC'
-
-    # un-comment the following four lines for the Vancouver Province
-##    title = u'Vancouver Province'
-##    url_prefix = 'http://www.theprovince.com'
-##    description = u'News from Vancouver, BC'
-##    fp_tag = 'CAN_VP'
-
-    # un-comment the following four lines for the Vancouver Sun
-##    title = u'Vancouver Sun'
-##    url_prefix = 'http://www.vancouversun.com'
-##    description = u'News from Vancouver, BC'
-##    fp_tag = 'CAN_VS'
-
-    # un-comment the following four lines for the Edmonton Journal
-##    title = u'Edmonton Journal'
-##    url_prefix = 'http://www.edmontonjournal.com'
-##    description = u'News from Edmonton, AB'
-##    fp_tag = 'CAN_EJ'
-
-    # un-comment the following four lines for the Calgary Herald
-    title = u'Calgary Herald'
-    url_prefix = 'http://www.calgaryherald.com'
-    description = u'News from Calgary, AB'
-    fp_tag = 'CAN_CH'
-
-    # un-comment the following four lines for the Regina Leader-Post
-##    title = u'Regina Leader-Post'
-##    url_prefix = 'http://www.leaderpost.com'
-##    description = u'News from Regina, SK'
-##    fp_tag = ''
-
-    # un-comment the following four lines for the Saskatoon Star-Phoenix
-##    title = u'Saskatoon Star-Phoenix'
-##    url_prefix = 'http://www.thestarphoenix.com'
-##    description = u'News from Saskatoon, SK'
-##    fp_tag = ''
-
-    # un-comment the following four lines for the Windsor Star
-##    title = u'Windsor Star'
-##    url_prefix = 'http://www.windsorstar.com'
-##    description = u'News from Windsor, ON'
-##    fp_tag = 'CAN_'
-
-    # un-comment the following four lines for the Ottawa Citizen
-##    title = u'Ottawa Citizen'
-##    url_prefix = 'http://www.ottawacitizen.com'
-##    description = u'News from Ottawa, ON'
-##    fp_tag = 'CAN_OC'
-
-    # un-comment the following four lines for the Montreal Gazette
-##    title = u'Montreal Gazette'
-##    url_prefix = 'http://www.montrealgazette.com'
-##    description = u'News from Montreal, QC'
-##    fp_tag = 'CAN_MG'
-
-
-    language = 'en_CA'
-    __author__ = 'Nick Redding'
-    no_stylesheets = True
-    timefmt = ' [%b %d]'
-    extra_css = '''
-                .timestamp {  font-size:xx-small; display: block; }
-                #storyheader { font-size: medium; }
-                #storyheader h1 { font-size: x-large; }
-                #storyheader h2 { font-size: large;  font-style: italic; }
-                .byline { font-size:xx-small; }
-                #photocaption { font-size: small; font-style: italic }
-                #photocredit { font-size: xx-small; }'''
-    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
-    remove_tags = [{'class':'comments'},
-                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
-                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
-                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
-                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
-                   dict(name='div', attrs={'class':'rule_grey_solid'}),
-                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-
-    def get_cover_url(self):
-        from datetime import timedelta, date
-        if self.fp_tag=='':
-            return None
-        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
-        br = BasicNewsRecipe.get_browser()
-        daysback=1
-        try:
-            br.open(cover)
-        except:
-            while daysback<7:
-                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
-                br = BasicNewsRecipe.get_browser()
-                try:
-                    br.open(cover)
-                except:
-                    daysback = daysback+1
-                    continue
-                break
-        if daysback==7:
-            self.log("\nCover unavailable")
-            cover = None
-        return cover
-
-    def fixChars(self,string):
-        # Replace lsquo (\x91)
-        fixed = re.sub("\x91","‘",string)
-        # Replace rsquo (\x92)
-        fixed = re.sub("\x92","’",fixed)
-        # Replace ldquo (\x93)
-        fixed = re.sub("\x93","“",fixed)
-        # Replace rdquo (\x94)
-        fixed = re.sub("\x94","”",fixed)
-        # Replace ndash (\x96)
-        fixed = re.sub("\x96","–",fixed)
-        # Replace mdash (\x97)
-        fixed = re.sub("\x97","—",fixed)
-        fixed = re.sub("&#x2019;","’",fixed)
-        return fixed
-
-    def massageNCXText(self, description):
-        # Kindle TOC descriptions won't render certain characters
-        if description:
-            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
-            # Replace '&' with '&'
-            massaged = re.sub("&","&", massaged)
-            return self.fixChars(massaged)
-        else:
-            return description
-
-    def populate_article_metadata(self, article, soup, first):
-        if first:
-            picdiv = soup.find('body').find('img')
-            if picdiv is not None:
-                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
-        xtitle = article.text_summary.strip()
-        if len(xtitle) == 0:
-            desc = soup.find('meta',attrs={'property':'og:description'})
-            if desc is not None:
-                article.summary = article.text_summary = desc['content']
-
-    def strip_anchors(self,soup):
-        paras = soup.findAll(True)
-        for para in paras:
-            aTags = para.findAll('a')
-            for a in aTags:
-                if a.img is None:
-                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
-        return soup
-
-    def preprocess_html(self, soup):
-        return self.strip_anchors(soup)
-
-
-
-    def parse_index(self):
-        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
-
-        articles = {}
-        key = 'News'
-        ans = ['News']
-
-        # Find each instance of class="sectiontitle", class="featurecontent"
-        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
-                #self.log(" div class = %s" % divtag['class'])
-                if divtag['class'].startswith('section_title'):
-                    # div contains section title
-                    if not divtag.h3:
-                        continue
-                    key = self.tag_to_string(divtag.h3,False)
-                    ans.append(key)
-                    self.log("Section name %s" % key)
-                    continue
-                # div contains article data
-                h1tag = divtag.find('h1')
-                if not h1tag:
-                    continue
-                atag = h1tag.find('a',href=True)
-                if not atag:
-                    continue
-                url = self.url_prefix+'/news/todays-paper/'+atag['href']
-                #self.log("Section %s" % key)
-                #self.log("url %s" % url)
-                title = self.tag_to_string(atag,False)
-                #self.log("title %s" % title)
-                pubdate = ''
-                description = ''
-                ptag = divtag.find('p');
-                if ptag:
-                    description = self.tag_to_string(ptag,False)
-                    #self.log("description %s" % description)
-                author = ''
-                autag = divtag.find('h4')
-                if autag:
-                    author = self.tag_to_string(autag,False)
-                    #self.log("author %s" % author)
-                if not articles.has_key(key):
-                    articles[key] = []
-                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
-
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        return ans
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CalgaryHerald(BasicNewsRecipe):
+    title          = u'Calgary Herald'
+    oldest_article = 3
+    max_articles_per_feed = 100
+
+    feeds          = [
+	(u'News', u'http://rss.canada.com/get/?F233'),
+	(u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'),
+	(u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'),
+	(u'Politics', u'http://rss.canada.com/get/?F7551'),
+	(u'National', u'http://rss.canada.com/get/?F7552'),
+	(u'World', u'http://rss.canada.com/get/?F7553'),
+	]
+    __author__ = 'rty'
+    pubisher  = 'Calgary Herald'
+    description           = 'Calgary, Alberta, Canada'
+    category              = 'News, Calgary, Alberta, Canada'
+
+
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en_CA'
+    encoding               = 'utf-8'
+    conversion_options = {'linearize_tables':True}
+    ##masthead_url = 'http://www.calgaryherald.com/index.html'
+    keep_only_tags = [
+	dict(name='div', attrs={'id':'storyheader'}),
+	dict(name='div', attrs={'id':'storycontent'})
+
+                               ]
+    remove_tags_after = {'class':"story_tool_hr"}
+
--- a/recipes/camera_di_commercio_di_bari.recipe
+++ b/recipes/camera_di_commercio_di_bari.recipe
@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1331729727(BasicNewsRecipe):
+    title          = u'Camera di Commercio di Bari'
+    oldest_article = 7
+    __author__      = 'faber1971'
+    description = 'News from the Chamber of Commerce of Bari'
+    language = 'it'
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    masthead_url            = 'http://www.ba.camcom.it/grafica/layout-bordo/logo_camcom_bari.png'
+    feeds          = [(u'Camera di Commercio di Bari', u'http://feed43.com/4715147488845101.xml')]
+
+__license__     = 'GPL v3'
+__copyright__   = '2012, faber1971'
+__version__     = 'v1.00'
+__date__        = '17, April 2012'
--- a/recipes/cd_action.recipe
+++ b/recipes/cd_action.recipe
@ -6,6 +6,7 @@ class CD_Action(BasicNewsRecipe):
    description   = 'cdaction.pl - polish games magazine site'
    category       = 'games'
    language       = 'pl'
+    index='http://www.cdaction.pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets= True
@ -17,4 +18,10 @@ class CD_Action(BasicNewsRecipe):
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
        self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
-        return getattr(self, 'cover_url', self.cover_url)
+        return getattr(self, 'cover_url', self.cover_url)
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@ -1,11 +1,12 @@
+from calibre import browser
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'Countryfile.com'
-    cover_url = 'http://www.buysubscriptions.com/static_content/the-immediate/en/images/covers/CFIL_maxi.jpg'
+    #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
    __author__ = 'Dave Asbury'
    description           = 'The official website of Countryfile Magazine'
-    # last updated 29/1/12
+    # last updated 15/4/12
    language = 'en_GB'
    oldest_article = 30
    max_articles_per_feed = 25
@ -13,7 +14,23 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    no_stylesheets = True
    auto_cleanup = True
    #articles_are_obfuscated = True
+    def get_cover_url(self):
+            soup = self.index_to_soup('http://www.countryfile.com/')
+            cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'})
+            #print '******** ',cov,' ***'
+            cov2 = str(cov)
+            cov2=cov2[124:-90]
+            #print '******** ',cov2,' ***'

+            # try to get cover - if can't get known cover
+            br = browser()
+            br.set_handle_redirect(False)
+            try:
+                br.open_novisit(cov2)
+                cover_url = cov2
+            except:
+                  cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
+            return cover_url
    remove_tags    = [
                             # dict(attrs={'class' : ['player']}),

--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -1,20 +1,21 @@
+
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
+import mechanize
 class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    title          = u'The Daily Mirror'
    description = 'News as provide by The Daily Mirror -UK'

    __author__ = 'Dave Asbury'
-    # last updated 11/2/12
+    # last updated 7/4/12
    language = 'en_GB'
-
-    cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
+    #cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'

    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'


    oldest_article = 1
-    max_articles_per_feed = 5
+    max_articles_per_feed = 10
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
@ -75,3 +76,28 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
                    img { display:block}
                	 '''

+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
+        # look for the block containing the mirror button and url
+        cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'})
+        cov2 = str(cov)
+        cov2='http://www.politicshome.com'+cov2[9:-142]
+        #cov2 now contains url of the page containing pic
+        soup = self.index_to_soup(cov2)
+        cov = soup.find(attrs={'id' : 'large'})
+        cov2 = str(cov)
+        cov2=cov2[27:-18]
+        #cov2 now is pic url, now  go back to original function
+        br = mechanize.Browser()
+        br.set_handle_redirect(False)
+        try:
+                br.open_novisit(cov2)
+                cover_url = cov2
+        except:
+                cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
+
+        #cover_url = cov2
+        #cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
+        return cover_url
+
+
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@ -11,6 +11,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
    description = u'Aktualności i blogi z dobreprogramy.pl'
    encoding = 'utf-8'
+    index='http://www.dobreprogramy.pl/'
    no_stylesheets = True
    language       = 'pl'
    extra_css      = '.title {font-size:22px;}'
@ -22,3 +23,10 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
    feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
                 ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
+
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/dzieje_pl.recipe
+++ b/recipes/dzieje_pl.recipe
@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe):
    cover_url      = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
    category       = 'history'
    language       = 'pl'
+    index='http://dzieje.pl'
    oldest_article = 8
    max_articles_per_feed = 100
    remove_javascript=True
@ -15,3 +16,10 @@ class Dzieje(BasicNewsRecipe):
    remove_tags_after= dict(id='dogory')
    remove_tags=[dict(id='dogory')]
    feeds          = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
+
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/eioba.recipe
+++ b/recipes/eioba.recipe
@ -21,3 +21,8 @@ class eioba(BasicNewsRecipe):
 	(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
 	(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
 	]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -7,6 +7,7 @@ class eMuzyka(BasicNewsRecipe):
    description   = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
    category       = 'music'
    language       = 'pl'
+    index='http://www.emuzyka.pl'
    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
@ -14,3 +15,9 @@ class eMuzyka(BasicNewsRecipe):
    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
    remove_tags=[dict(name='span', attrs={'id':'date'})]
    feeds          = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/fhm_uk.recipe
+++ b/recipes/fhm_uk.recipe
@ -7,7 +7,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    #   cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
    __author__ = 'Dave Asbury'
-    # last updated 17/3/12
+    # last updated 14/4/12
    language = 'en_GB'
    oldest_article = 28
    max_articles_per_feed = 12
@ -28,7 +28,8 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):

    #]
    feeds          = [
-    (u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
+    (u'From the Homepage',u'http://feed43.com/0032328550253453.xml'),
+                           #http://feed43.com/8053226782885416.xml'),
    (u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
                           (u'Upgrade',u'http://feed43.com/0877305847443234.xml'),
    #(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -7,6 +7,7 @@ class Filmweb_pl(BasicNewsRecipe):
    cover_url      = 'http://userlogos.org/files/logos/crudus/filmweb.png'
    category       = 'movies'
    language       = 'pl'
+    index='http://www.filmweb.pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets= True
@ -39,3 +40,9 @@ class Filmweb_pl(BasicNewsRecipe):
            self.log.warn(skip_tag)
            return self.index_to_soup(skip_tag['href'], raw=True)
        
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/forbes.recipe
+++ b/recipes/forbes.recipe
@ -1,39 +1,49 @@
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class Forbes(BasicNewsRecipe):
    title          = u'Forbes'
    description    = 'Business and Financial News'
-    __author__     = 'Darko Miletic'
+    __author__     = 'Kovid Goyal'
    oldest_article = 30
-    max_articles_per_feed = 100
+    max_articles_per_feed = 20
    language = 'en'
+    encoding = 'utf-8'
+    recursions = 1

    no_stylesheets = True
-    html2lrf_options = ['--base-font-size', '10']     

    cover_url  = u'http://www.forbes.com/media/current_covers/forbes_120_160.gif'
-    
-    feeds          = [(u'Latest', u'http://www.forbes.com/news/index.xml'), 
-        (u'Most Popular', u'http://www.forbes.com/feeds/popstories.xml'), 
-        (u'Most Emailed', u'http://www.forbes.com/feeds/mostemailed.xml'), 
-        (u'Faces', u'http://www.forbes.com/facesscan/index.xml'), 
-        (u'Technology', u'http://www.forbes.com/technology/index.xml'), 
-        (u'Personal Tech', u'http://www.forbes.com/personaltech/index.xml'), 
-        (u'Wireless', u'http://www.forbes.com/wireless/index.xml'),
-        (u'Business', u'http://www.forbes.com/business/index.xml'), 
-        (u'Sports Money', u'http://www.forbes.com/sportsmoney/index.xml'), 
-        (u'Sports', u'http://www.forbes.com/forbeslife/sports/index.xml'),
-        (u'Vehicles', u'http://www.forbes.com/forbeslife/vehicles/index.xml'),
-        (u'Leadership', u'http://www.forbes.com/leadership/index.xml'), 
-        (u'Careers', u'http://www.forbes.com/leadership/careers/index.xml'),
-        (u'Compensation', u'http://www.forbes.com/leadership/compensation/index.xml'),
-        (u'Managing', u'http://www.forbes.com/leadership/managing/index.xml')]

-    def print_version(self, url):
-        raw = self.browser.open(url).read()
-        soup = BeautifulSoup(raw.decode('latin1', 'replace'))
-        print_link = soup.find('a', {'onclick':"s_linkTrackVars='prop18';s_linkType='o';s_linkName='Print';if(typeof(globalPageName)!='undefined')s_prop18=globalPageName;s_lnk=s_co(this);s_gs(s_account);"})
-        if print_link is None:
-            return ''
-        return 'http://www.forbes.com' + print_link['href']
+    feeds          = [(u'Latest', u'http://www.forbes.com/news/index.xml'),
+        (u'Most Popular', u'http://www.forbes.com/feeds/popstories.xml'),
+        (u'Technology', u'http://www.forbes.com/technology/index.xml'),
+        (u'Business', u'http://www.forbes.com/business/index.xml'),
+        (u'Sports Money', u'http://www.forbes.com/sportsmoney/index.xml'),
+        (u'Leadership', u'http://www.forbes.com/leadership/index.xml'),]
+
+    keep_only_tags = \
+            {'class':lambda x: x and (set(x.split()) & {'body', 'pagination',
+                'articleHead', 'article_head'})}
+    remove_tags_before = {'name':'h1'}
+    remove_tags = [
+            {'class':['comment_bug', 'engagement_block',
+                        'video_promo_block', 'article_actions']},
+            {'id':'comments'}
+        ]
+
+    def is_link_wanted(self, url, tag):
+        ans = re.match(r'http://.*/[2-9]/', url) is not None
+        if ans:
+            self.log('Following multipage link: %s'%url)
+        return ans
+
+    def postprocess_html(self, soup, first_fetch):
+        for pag in soup.findAll(True, 'pagination'):
+            pag.extract()
+        if not first_fetch:
+            h1 = soup.find('h1')
+            if h1 is not None:
+                h1.extract()
+        return soup
+
--- a/recipes/fotoblogia_pl.recipe
+++ b/recipes/fotoblogia_pl.recipe
@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Fotoblogia_pl(BasicNewsRecipe):
+    title          = u'Fotoblogia.pl'
+    __author__        = 'fenuks'
+    category       = 'photography'
+    language       = 'pl'
+    masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg'
+    cover_url= 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    use_embedded_content = False
+    keep_only_tags=[dict(name='div', attrs={'class':'post-view post-standard'})]
+    remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})]
+    feeds          = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')]
--- a/recipes/gameplay_pl.recipe
+++ b/recipes/gameplay_pl.recipe
@ -6,16 +6,24 @@ class Gameplay_pl(BasicNewsRecipe):
    description   = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
    category       = 'games, movies, books, music'
    language       = 'pl'
+    index='http://gameplay.pl'
    masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
    cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
    max_articles_per_feed = 100
+    remove_javascript= True
    no_stylesheets= True
    keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
-    remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
+    remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]

    def image_url_processor(self, baseurl, url):
        if 'http' not in url:
            return 'http://gameplay.pl'+ url[2:]
        else:
-		    return url
+	  return url
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and '../' in a['href']:
+                a['href']=self.index + a['href'][2:]
+        return soup
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@ -9,6 +9,7 @@ class Gildia(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
+    remove_empty_feeds=True
    no_stylesheets=True
    remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
    keep_only_tags=dict(name='div', attrs={'class':'widetext'})
@ -24,3 +25,16 @@ class Gildia(BasicNewsRecipe):
                    self.log.warn('odnosnik')
                    self.log.warn(link['href'])
                    return self.index_to_soup(link['href'], raw=True)
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                if '/gry/' in a['href']:
+                    a['href']='http://www.gry.gildia.pl' + a['href']
+                elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
+                    a['href']='http://www.literatura.gildia.pl' + a['href']
+                elif u'komiks' in soup.title.string.lower():
+                    a['href']='http://www.literatura.gildia.pl' + a['href']
+                else:
+                    a['href']='http://www.gildia.pl' + a['href']
+        return soup
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@ -7,6 +7,7 @@ class Gram_pl(BasicNewsRecipe):
    category       = 'games'
    language       = 'pl'
    oldest_article = 8
+    index='http://www.gram.pl'
    max_articles_per_feed = 100
    no_stylesheets= True
    extra_css = 'h2 {font-style: italic;  font-size:20px;} .picbox div {float: left;}'
@ -52,4 +53,7 @@ class Gram_pl(BasicNewsRecipe):
        tag=soup.findAll(name='div', attrs={'class':'picbox'})
        for t in tag:
            t['style']='float: left;'
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
        return soup
--- a/recipes/heise.recipe
+++ b/recipes/heise.recipe
@ -59,6 +59,7 @@ class heiseDe(BasicNewsRecipe):
 		   dict(name='span', attrs={'class':'rsaquo'}),
 		   dict(name='div', attrs={'class':'news_logo'}),
 		   dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
+		   dict(name='div', attrs={'class':'navi_top_container'}),
 		   dict(name='p', attrs={'class':'news_option'}),
 		   dict(name='p', attrs={'class':'news_navi'}),
 		   dict(name='div', attrs={'class':'news_foren'})]
@ -69,3 +70,5 @@ class heiseDe(BasicNewsRecipe):



+
+
--- a/recipes/historia_news.recipe
+++ b/recipes/historia_news.recipe
@ -0,0 +1,20 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class historia_news(BasicNewsRecipe):
+    title          = u'historia-news'
+    __author__        = 'fenuks'
+    description   = u'Historia-news to portal dla ludzi kochających historię. Najnowsze newsy z historii bliższej i dalszej, archeologii, paleontologii oraz ciekawostki i podcasty z historii kultury, sportu, motoryzacji i inne.'
+    masthead_url = 'http://historia-news.pl/templates/hajak4/images/header.jpg'
+    cover_url= 'http://www.historia-news.pl/templates/hajak4/images/header.jpg'
+    category       = 'history'
+    language       = 'pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    remove_tags=[dict(name='form'), dict(name='img', attrs={'alt':'Print'}), dict(attrs={'class':['commbutt', 'cpr']}), dict(id=['plusone', 'facebook'])]
+    feeds          = [(u'Wiadomo\u015bci', u'http://historia-news.pl/wiadomoci.feed?type=rss'), (u'Artyku\u0142y', u'http://historia-news.pl/artykuy.feed?type=rss')]
+
+
+    def print_version(self, url):
+        return url + '?tmpl=component&print=1&layout=default&page='
--- a/recipes/icons/fotoblogia_pl.png
+++ b/recipes/icons/fotoblogia_pl.png
--- a/recipes/icons/historia_news.png
+++ b/recipes/icons/historia_news.png
--- a/recipes/icons/swiat_obrazu.png
+++ b/recipes/icons/swiat_obrazu.png
--- a/recipes/icons/telam.png
+++ b/recipes/icons/telam.png
--- a/recipes/in4_pl.recipe
+++ b/recipes/in4_pl.recipe
@ -8,6 +8,7 @@ class in4(BasicNewsRecipe):
    description   = u'Serwis Informacyjny - Aktualnosci, recenzje'
    category       = 'IT'
    language       = 'pl'
+    index='http://www.in4.pl/'
    #cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
    no_stylesheets = True
    remove_empty_feeds = True
@ -39,6 +40,7 @@ class in4(BasicNewsRecipe):

    def preprocess_html(self, soup):
        self.append_page(soup, soup.body)
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
        return soup
-   
-        
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@ -8,6 +8,7 @@ class INFRA(BasicNewsRecipe):
    description   = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
    cover_url      = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
    category       = 'UFO'
+    index='http://infra.org.pl'
    language       = 'pl'
    max_articles_per_feed = 100
    no_stylesheers=True
@ -15,3 +16,11 @@ class INFRA(BasicNewsRecipe):
    remove_tags_after=dict(attrs={'class':'pagenav'})
    remove_tags=[dict(attrs={'class':'pagenav'})]
    feeds          = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/jakarta_globe.recipe
+++ b/recipes/jakarta_globe.recipe
@ -0,0 +1,34 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JakartaGlobe(BasicNewsRecipe):
+    title          = u'Jakarta Globe'
+    oldest_article = 3
+    max_articles_per_feed = 100
+
+    feeds          = [
+	(u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'),
+	(u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'),
+	(u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'),
+	(u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'),
+	(u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'),
+	(u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'),
+	]
+    __author__ = 'rty'
+    pubisher  = 'JakartaGlobe.com'
+    description           = 'JakartaGlobe, Indonesia, Newspaper'
+    category              = 'News, Indonesia'
+
+
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en_ID'
+    encoding               = 'utf-8'
+    conversion_options = {'linearize_tables':True}
+    masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg'
+    keep_only_tags = [
+							dict(name='div', attrs={'class':'story'}),
+							dict(name='span', attrs={'class':'headline'}),
+							dict(name='div', attrs={'class':'story'}),
+							dict(name='p', attrs={'id':'bodytext'})
+                               ]
--- a/recipes/konflikty_zbrojne.recipe
+++ b/recipes/konflikty_zbrojne.recipe
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup

 class Konflikty(BasicNewsRecipe):
    title          = u'Konflikty Zbrojne'
@ -10,6 +11,23 @@ class Konflikty(BasicNewsRecipe):
    category='military, history'
    oldest_article = 7
    max_articles_per_feed = 100
-    auto_cleanup = True
+    no_stylesheets = True
+    keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]

-    feeds          = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]
+    feeds          = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
+		(u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'),
+		(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
+		(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
+		(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
+		(u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
+		(u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for image in soup.findAll(name='a', attrs={'class':'image'}):
+            if image.img and image.img.has_key('alt'):
+                image.name='div'
+                pos = len(image.contents)
+                image.insert(pos, BeautifulSoup('<p style="font-style:italic;">'+image.img['alt']+'</p>'))
+        return soup
--- a/recipes/liberatorio_politico.recipe
+++ b/recipes/liberatorio_politico.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1334649829(BasicNewsRecipe):
+    title          = u'Liberatorio Politico'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    masthead_url            = 'http://liberatorio.altervista.org/wp-content/uploads/2012/01/Testata-LIBERATORIO-Altervista1.jpg'
+    feeds          = [(u'Liberatorio Politico', u'http://liberatorio.altervista.org/feed/')]
+    __author__    = 'faber1971'
+    description   = 'Inquiry journalism - a blog on Molfetta, Land of Bari, Apulia and Italy - v1.00 (07, April 2012)'
+    language = 'it'
--- a/recipes/limes.recipe
+++ b/recipes/limes.recipe
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__copyright__   = '2012, faber1971'
+__version__     = 'v1.00'
+__date__        = '16, April 2012'
+__description__ = 'Geopolitical Italian magazine'
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Limes(BasicNewsRecipe):
+    description    = 'Italian weekly magazine'
+    __author__      = 'faber1971'
+
+    cover_url      = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif'
+    title          = 'Limes'
+    category       = 'Geopolitical news'
+
+    language       = 'it'
+#    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 16
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets = True
+    masthead_url            = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif'
+
+    feeds          = [
+                       (u'Limes', u'http://temi.repubblica.it/limes/feed/')
+                    ]
+
+
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
+                            dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
+                            dict(name='div', attrs={'id':['content-second-right','content2']})
+                          ]
+
+    remove_tags        = [
+                            dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
+                            dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}),
+                            dict(name='ul',attrs={'id':'user-utility'}),
+                            dict(name=['script','noscript','iframe'])
+                         ]
+
--- a/recipes/marketing_magazine.recipe
+++ b/recipes/marketing_magazine.recipe
@ -1,11 +1,13 @@
 __license__   = 'GPL v3'
 __author__    = 'faber1971'
-description   = 'Collection of Italian marketing websites - v1.04 (17, March 2012)'

 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1327062445(BasicNewsRecipe):
    title          = u'Marketing Magazine'
+    description   = 'Collection of Italian marketing websites'
+    language = 'it'
+    __author__ = 'faber1971'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
@ -16,4 +18,4 @@ class AdvancedUserRecipe1327062445(BasicNewsRecipe):
                     dict(name='ul', attrs={'id':'ads0'})
                  ]
    masthead_url            = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
-    feeds          = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'MarketingArena', u'http://feeds.feedburner.com/marketingarena'), (u'minimarketing', u'http://feeds.feedburner.com/minimarketingit'), (u'Marketing Journal', u'http://feeds.feedburner.com/marketingjournal/jPwA'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
+    feeds          = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'MarketingArena', u'http://feeds.feedburner.com/marketingarena'), (u'Marketing Journal', u'http://feeds.feedburner.com/marketingjournal/jPwA'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'minimarketing', u'http://feeds.feedburner.com/minimarketingit'), (u'[4]marketing.biz', u'http://feeds.feedburner.com/4marketing'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Bloguerrilla', u'http://feeds.feedburner.com/Bloguerrilla'), (u'Nonconvenzionale', u'http://feeds.feedburner.com/nonconvenzionale'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@ -3,25 +3,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
 import re
 from calibre.utils.magick import Image
 from BeautifulSoup import BeautifulSoup
-try:
-    from calibre_plugins.drMerry.debug import debuglogger as mlog
-    print 'drMerry debuglogger found, debug options can be used'
-    from calibre_plugins.drMerry.stats import statslogger as mstat
-    print 'drMerry stats tracker found, stat can be tracked'
-    mlog.setLoglevel(1) #-1 == no log; 0 for normal output
-    mstat.calculateStats(False) #track stats (to track stats loglevel must be > 0
-    KEEPSTATS = mstat.keepmystats()
-    SHOWDEBUG0 = mlog.showdebuglevel(0)
-    SHOWDEBUG1 = mlog.showdebuglevel(1)
-    SHOWDEBUG2 = mlog.showdebuglevel(2)
-except:
-    #print 'drMerry debuglogger not found, skipping debug options'
-    SHOWDEBUG0 = False
-    SHOWDEBUG1 = False
-    SHOWDEBUG2 = False
-    KEEPSTATS = False
-
-#print ('level0: %s\nlevel1: %s\nlevel2: %s' % (SHOWDEBUG0,SHOWDEBUG1,SHOWDEBUG2))

 ''' Version 1.2, updated cover image to match the changed website.
 added info date on title
@ -43,80 +24,75 @@ except:
      extended timeout from 2 to 10
      changed oldest article from 10 to 1.2
      changed max articles from 15 to 25
+ Version 1.9.1 18-04-2012
+    removed some debug settings
+    updated code to match new metro-layout
+ Version 1.9.2 24-04-2012
+    updated code to match new metro-layout
+ Version 1.9.3 25-04-2012
+    Changed a lot of custom code into calibre code as the default code of calibre has become much faster since the first version fo this recipe
+    Added new feeds
+    Updated css
+    Changed order of regex to speedup proces
 '''

 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title = u'Metro Nieuws NL'
    oldest_article = 1.2
    max_articles_per_feed = 25
-    __author__     = u'DrMerry'
-    description    = u'Metro Nederland'
-    language       = u'nl'
-    simultaneous_downloads = 3
+    __author__  = u'DrMerry'
+    description = u'Metro Nederland'
+    language = u'nl'
+    simultaneous_downloads = 5
    masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
    timeout = 10
-    center_navbar  = True
-    timefmt        = ' [%A, %d %b %Y]'
+    center_navbar = True
+    timefmt = ' [%A, %d %b %Y]'
    no_stylesheets = True
    remove_javascript = True
    remove_empty_feeds = True
-    cover_url      = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
+    cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
    publication_type = 'newspaper'
-    encoding              = 'utf-8'
-    remove_attributes = ['style', 'font', 'width', 'height']
+    encoding = 'utf-8'
+    remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href']
    use_embedded_content = False
-    conversion_options = {
-        'authors'        : 'Metro Nederland & calibre & DrMerry',
-        'author_sort'    : 'Metro Nederland & calibre & DrMerry',
-        'publisher'      : 'DrMerry/Metro Nederland'
-    }
-    extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
-        #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear: both;margin-bottom: 10px;font-size:0.5em; color: #616262;}\
-        .article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
-        h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
-        .article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\
-        div.column-1-2 {display: inline;padding-right: 7px;}\
-        p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \
-        p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
-        div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
-        div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
-        img {border:0px; padding:2px;} hr.merryhr {width:30%;  border-width:0px; color:green; margin-left:5px; background-color: green} div.column-3 {background-color:#eee; width:50%; margin:2px; float:right; padding:2px;} div.column-3 module-title {border: 1px solid #aaa} div.article-box-fact div.subtitle {font-weight:bold; color:green;}'
+    extra_css = 'body{font-size:1em;padding:5px 0}body,a,h2{background-color:#fff;text-decoration:none;color:#000}#date,div.byline,p.article-image-caption .credits,.calibrenavbar{font-size:.5em}.article-box-fact.module-title,#date,div.byline{clear:both}.article-box-fact.module-title{margin:8px 0}.article-box-fact.module-title,h2{font-size:1.1em}h1.title{font-size:1.4em}h1.title,.article-body p,div.article-image-caption-2column,div.article-image-caption-3column,#date,div.byline{margin-bottom:.6em}div.article-box-fact div.subtitle,.article-box-fact.module-title,h1.title,p.article-image-caption{font-weight:700}div.column-1-3{margin-left:19px}div.column-1-2{display:inline}div.column-1-2,div.column-1-3{margin-right:7px}p.article-image-caption{font-size:.6em;margin-top:5px}p.article-image-caption,#date,div.byline{color:#616262}p.article-image-caption .credits{font-style:italic}div.article-image-caption{width:246px}div.article-image-caption-2column{width:373px}div.column-3{background-color:#eee;float:right;width:50%}div.column-3 module-title{border:1px solid #aaa}div.article-box-fact div.subtitle,.article-box-fact.module-title{color:#24763b}div.byline{border-top:2px solid #24763b}div.column-3,img,div.column-3,p.small,div.article-image-caption{margin:.5em}img,p.small,.column1,h2{border:0;padding:0}.column1,h1,h2{margin:0}'
+

    preprocess_regexps = [
-        (re.compile(r'<img[^>]+top-line[^>]+>', re.DOTALL|re.IGNORECASE),
-        lambda match: '<hr class="merryhr" />'),
-        (re.compile(r'(<img[^>]+metronieuws\.nl/[^>]+/templates/[^>]+jpe?g[^>]+>|metronieuws\.nl/internal\-roxen\-unit\.gif)', re.DOTALL|re.IGNORECASE),
-        lambda match: ''),
+        (re.compile(r'(&nbsp;|\s|<img[^>]+metronieuws\.nl/([^>]+/templates/[^>]+\.jpe?g|internal\-roxen\-unit\.gif)[^>]+>)', re.DOTALL|re.IGNORECASE),lambda match: ' '),
+        #(re.compile(r'(&nbsp;|\s)+', re.DOTALL|re.IGNORECASE),lambda match:' '),
+        #(re.compile(r'<(a |/a)[^>]*>', re.DOTALL|re.IGNORECASE),lambda match:'')
+        #(re.compile('(</?)h2', re.DOTALL|re.IGNORECASE),lambda match:'\1em')
        ]
+        
+    remove_tags_before= dict(id='date')
+    remove_tags_after = [dict(name='div', attrs={'class':['column-1-3','gallery-text']})]#id='share-and-byline')]

+    remove_tags = [
+        dict(name=['iframe','script','noscript','style']),
+        dict(name='div', attrs={'class':[re.compile('column-[14]-5'),'col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)')]}),
+        dict(id=['column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'sidebar',re.compile('^article-\d'),'comments','gallery-1']),
+        dict(name='a', attrs={'name':'comments'}),
+        #dict(name='div', attrs={'data-href'}),
+        dict(name='img', attrs={'class':'top-line'}),
+        dict(attrs={'style':re.compile('^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$'),'title':'volledig scherm'})]
+        
+    '''removed by before/after:
+        id:
+        column-1-5-top,'hidden_div','footer',
+        class:
+        'header',re.compile('^footer-[a-zA-Z0-9]+$),'header-links',
+        '''
    def preprocess_html(self, soup):
-        if SHOWDEBUG0 == True:
-            mlog.setdefaults()
-            mlog.addTextAndTag(['Show debug = on with level'], [str(mlog.debuglevel)])
-            if KEEPSTATS == True:
-                mlog.addDebug('Stats will be calculated')
-            else:
-                mlog.addTextAndTag(['Stats won\'t be calculated\nTo be enabled, stats must be true, currently','and debug level must be 1 or higher, currently'],[mstat.dokeepmystats, mlog.debuglevel])
-            mlog.showDebug()
        myProcess = MerryProcess()
+        myProcess.moveTitleAndAuthor(soup)
        myProcess.removeUnwantedTags(soup)
        return soup

    def postprocess_html(self, soup, first):
        myProcess = MerryProcess()
        myProcess.optimizeLayout(soup)
-        if SHOWDEBUG0 == True:
-            if KEEPSTATS == True:
-                statinfo = 'generated stats:'
-                statinfo += str(mstat.stats(mstat.statslist))
-                print statinfo
-                statinfo = 'generated stats (for removed tags):'
-                statinfo += str(mstat.stats(mstat.removedtagslist))
-                print statinfo
-            #show all Debug info we forgot to report
-            #Using print to be sure that this text will not be added at the end of the log.
-            print '\n!!!!!unreported messages:\n(should be empty)\n'
-            mlog.showDebug()
        return soup

    feeds = [
@ -128,295 +104,109 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
        (u'Buitenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-4'),
        (u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
        (u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
-        (u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),
+        (u'Strips',u'http://www.metronieuws.nl/rss.xml?c=1325037714-0'),
+        (u'Tech', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),
        (u'Familie', u'http://www.metronieuws.nl/rss.xml?c=1283166782-9'),
        (u'Blogs', u'http://www.metronieuws.nl/rss.xml?c=1295586825-6'),
        (u'Reizen', u'http://www.metronieuws.nl/rss.xml?c=1277377288-13'),
-        (u'Carri&egrave;re', u'http://www.metronieuws.nl/rss.xml?c=1278070988-1'),
+        (u'Carrière', u'http://www.metronieuws.nl/rss.xml?c=1278070988-1'),
+        (u'Wetenschap',u'http://www.metronieuws.nl/rss.xml?c=1303088437-0'),
+        (u'Planeet',u'http://www.metronieuws.nl/rss.xml?c=1277377288-14'),
+        (u'Gezondheid',u'http://www.metronieuws.nl/rss.xml?c=1277377288-15'),
        (u'Sport', u'http://www.metronieuws.nl/rss.xml?c=1277377288-12')
        ]

 class MerryPreProcess():
-    def replacePictures(self, soup):
-        #to be implemented
-        return soup
-
    def optimizePicture(self,soup):
-        if SHOWDEBUG0 == True:
-            mlog.addDebug('start image optimize')
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
-            iurl = tag['src']
-            img = Image()
-            img.open(iurl)
-            img.trim(0)
-            img.save(iurl)
-        if SHOWDEBUG0 == True:
-            mlog.addDebug('Images optimized')
-            mlog.showDebug()
+            try:
+                iurl = tag['src']
+                img = Image()
+                img.open(iurl)
+                img.trim(0)
+                img.save(iurl)
+            except:
+                print '\n!!image optimize failed!!\n'
+                continue
        return soup

 class MerryExtract():
    def safeRemovePart(self, killingSoup, soupIsArray):
        if killingSoup and not killingSoup == None:
-            if SHOWDEBUG2 == True:
-                mlog.addTextAndTag(['items to remove'],[killingSoup])
            try:
                if soupIsArray == True:
                    for killer in killingSoup:
                        killer.extract()
                else:
                    killingSoup.extract()
-                if SHOWDEBUG1 == True:
-                    mlog.addDebug('tag extracted')
-                    mlog.showDebug()
-                    if KEEPSTATS == True:
-                        try:
-                            mstat.addstat(mstat.removedtagslist,str(killingSoup.name))
-                        except:
-                            mstat.addstat(mstat.removedtagslist,'unknown')
            except:
-                if SHOWDEBUG1 == True:
-                    mlog.addDebug('tag extraction failed')
-                    mlog.showDebug()
-                    if KEEPSTATS == True:
-                        mstat.addstat(mstat.removedtagslist,'exception')
                return False
        else:
            return False
        return killingSoup

-class MerryReplace():
-    myKiller = MerryExtract()
-    def replaceATag(self, soup):
-        anchors = []
-        anchors = soup.findAll('a')
-        if anchors and not (anchors == None or anchors == []):
-          try:
-            for link in anchors:
-                # print str(link)
-                if link and not link == None:
-                    # print ('type: %s'%(str(type(link))))
-                    # print ('link: %s' % (link))
-                    myParent = link.parent
-                    # print str('parent: %s'%(myParent))
-                    try:
-                        myIndex = link.parent.index(link)
-                        hasIndex = True
-                    except:
-                        myIndex = 0
-                        hasIndex = False
-                    # print str('index %s'%(myIndex))
-                    if not link.string == None:
-                        # print 'link=notnone'
-                        if hasIndex == True:
-                            myParent.insert(myIndex, link.string)
-                        else:
-                            myParent.append(link.string)
-                    else:
-                        # print 'link=none'
-                        myParent.insert(myIndex, link.contents)
-                    self.myKiller.safeRemovePart(link, False)
-                else:
-                     notshown = 'tag received is empty' # print
-          except:
-            notshown = 'tag received is empty' # print
-            notshown
-        return soup
-
 class MerryProcess(BeautifulSoup):
    myKiller = MerryExtract()
-    myReplacer = MerryReplace()
    myPrepare = MerryPreProcess()

    def optimizeLayout(self,soup):
        self.myPrepare.optimizePicture(soup)
-        if SHOWDEBUG0 == True:
-            mlog.addDebug('End of Optimize Layout')
-            mlog.showDebug()
        return soup

    def insertFacts(self, soup):
-        allfacts = soup.findAll('div', {'class':re.compile('^article-box-fact.*$')})
-        if SHOWDEBUG0 == True:
-            mlog.addTextAndTag(['allfacts'],[allfacts])
-            mlog.showDebug()
+        thefactpart = re.compile('^article-box-fact.*$')
+        allfacts = soup.findAll('div', {'class':thefactpart})
        if allfacts and not allfacts == None:
-            allfactsparent = soup.find('div', {'class':re.compile('^article-box-fact.*$')}).parent
-            if SHOWDEBUG0 == True:
-                mlog.addTextAndTag(['allfactsparent'],[allfactsparent])
-                mlog.showDebug()
+            allfactsparent = soup.find('div', {'class':thefactpart}).parent
            for part in allfactsparent:
                if not part in allfacts:
-                    if SHOWDEBUG0 == True:
-                        mlog.addTextAndTag(['FOUND A non-fact'],[part])
-                        mlog.showDebug()
                    self.myKiller.safeRemovePart(part, True)
-            if SHOWDEBUG1 == True:
-                mlog.addTextAndTag(['New All Facts'],[allfacts])
-                mlog.showDebug()
        articlefacts = soup.find('div', {'class':'article-box-fact column'})
-        errorOccured=False
        if (articlefacts and not articlefacts==None):
          try:
            contenttag = soup.find('div', {'class':'article-body'})
-            if SHOWDEBUG0 == True:
-                mlog.addTextAndTag(['curcontag'],[contenttag])
-                mlog.showDebug()
            foundrighttag = False
            if contenttag and not contenttag == None:
                foundrighttag = True
-            if SHOWDEBUG0 == True:
-                if errorOccured == False:
-                    mlog.addTextAndTag(['type','curcontag (in while)'],[type(contenttag),contenttag])
-                else:
-                    mlog.addDebug('Could not find right parent tag. Error Occured')
-                mlog.showDebug()
            if foundrighttag == True:
                contenttag.insert(0, allfactsparent)
-                if SHOWDEBUG2 == True:
-                    mlog.addTextAndTag(['added parent'],[soup.prettify()])
-                    mlog.showDebug()
          except:
-            errorOccured=True
-            mlog.addTrace()
-        else:
-            errorOccured=True
-        if SHOWDEBUG0 == True and errorOccured == True:
-            mlog.addTextAndTag(['no articlefacts'],[articlefacts])
-            mlog.showDebug()
+              pass
+        return soup
+        
+    def moveTitleAndAuthor(self, soup):
+        moveitem = soup.h1
+        pubdate = soup.find(id="date")
+        if moveitem and not moveitem == None and pubdate and not pubdate == None:
+            try:
+                pubdate.parent.insert(0, moveitem)
+            except:
+                print '\n!!error in moving title!!\n'
+                pass
+        moveitem = None
+        moveitem = soup.find('div', {'class':'byline'})
+        if moveitem and not moveitem == None:
+            try:
+                moveitem.parent.parent.insert(-1, moveitem)
+            except:
+                print '\n!!error in moving byline!!\n'
+                pass
        return soup
-
-    def previousNextSibRemover(self, soup, previous=True, soupIsArray=False):
-        findsibsof = soup
-        firstpart = previous
-        if findsibsof and not findsibsof == None:
-            if soupIsArray == True:
-                for foundsib in findsibsof:
-                    self.previousNextSibRemover(foundsib, firstpart, soupIsArray=False)
-            else:
-                if firstpart == True and soupIsArray == False:
-                    sibs = findsibsof.previousSiblingGenerator()
-                else:
-                    sibs = findsibsof.nextSiblingGenerator()
-                for sib in sibs:
-                    self.myKiller.safeRemovePart(sib, True)
-        else:
-            if SHOWDEBUG1 == True:
-                mlog.addDebug('Not any sib found')
-        return

    def removeUnwantedTags(self,soup):
-        if SHOWDEBUG1 == True:
-            mlog.addTextAndTag(['Len of Soup before RemoveTagsByName'],[len(str(soup))])
-            mlog.showDebug()
-        self.removeTagsByName(soup)
-        if SHOWDEBUG1 == True:
-            mlog.addDebug('Len of Soup before firstandlastpart: %s' % len(str(soup)))
-            mlog.showDebug()
        self.insertFacts(soup)
-        self.removeFirstAndLastPart(soup)
-        if SHOWDEBUG1 == True:
-            mlog.addDebug('Len of Soup before unwantedpart: %s' % len(str(soup)))
-            mlog.showDebug()
-        self.removeUnwantedParts(soup)
-        if SHOWDEBUG1 == True:
-            mlog.addDebug('Len of Soup before EmptyParts: %s' % len(str(soup)))
-            mlog.showDebug()
        self.removeEmptyTags(soup)
-        if SHOWDEBUG1 == True:
-            mlog.addDebug('Len of Soup after EmptyParts: %s' % len(str(soup)))
-            mlog.showDebug()
-        self.myReplacer.replaceATag(soup)
-        return soup
-
-    def removeUnwantedParts(self, soup):
-        if SHOWDEBUG1 == True:
-            mlog.addDebug('Len of Soup before UnwantedID: %s' % len(str(soup)))
-            mlog.showDebug()
-        self.removeUnwantedTagsByID(soup)
-        if SHOWDEBUG1 == True:
-            mlog.addDebug('Len of Soup before Class: %s' % len(str(soup)))
-            mlog.showDebug()
-        self.removeUnwantedTagsByClass(soup)
-        if SHOWDEBUG1 == True:
-            mlog.addDebug('Len of Soup before Style: %s' % len(str(soup)))
-            mlog.showDebug()
-        self.removeUnwantedTagsByStyle(soup)
-        return soup
-
-    def removeUnwantedTagsByStyle(self,soup):
-        self.removeArrayOfTags(soup.findAll(attrs={'style' : re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")}))
-        if SHOWDEBUG0 == True:
-            mlog.addDebug('end remove by style')
+        self.removeArrayOfTags(soup.findAll(attrs={'class': 'share-tools-bottom'})) # at end to keep author
        return soup

    def removeArrayOfTags(self,souparray):
        return self.myKiller.safeRemovePart(souparray, True)

-    def removeUnwantedTagsByClass(self,soup):
-        if SHOWDEBUG0 == True:
-            mlog.addDebug('start remove by class')
-        self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15)$')}))
-        return soup
-
-    def removeUnwantedTagsByID(self,soup):
-        defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer']
-        for removeid in defaultids:
-            if SHOWDEBUG1 == True:
-                mlog.addDebug('RemoveTagByID, tag: %s, Len of Soup: %s' % (str(removeid), len(str(soup))))
-                mlog.showDebug()
-            self.removeArrayOfTags(soup.findAll(id=removeid))
-        return soup
-
-    # def safeRemoveTag(self, subtree):
-        # return self.myKiller.safeRemovePart(subtree, True)
-
-
-    def removeTagsByName(self, soup):
-        self.myKiller.safeRemovePart(soup.script, True)
-        self.myKiller.safeRemovePart(soup.iframe, True)
-        self.myKiller.safeRemovePart(soup.style, True)
-        self.myKiller.safeRemovePart(soup.noscript, True)
-        return soup
-
    def removeEmptyTags(self,soup,run=0):
-        if SHOWDEBUG0 == True:
-            mlog.addDebug('starting removeEmptyTags')
-            if SHOWDEBUG1 == True:
-                run += 1
-                mlog.addDebug(run)
-                if SHOWDEBUG2 == True:
-                    mlog.addDebug(str(soup.prettify()))
-            mlog.showDebug()
-        emptymatches = re.compile('^(&nbsp;|\s|\n|\r|\t)*$')
+        emptymatches = re.compile('^[&nbsp;\s\n\r\t ]*$')
        emptytags = soup.findAll(lambda tag: tag.find(True) is None and (tag.string is None or tag.string.strip()=="" or tag.string.strip()==emptymatches) and not tag.isSelfClosing)
        if emptytags and not (emptytags == None or emptytags == []):
-            if SHOWDEBUG1 == True:
-                mlog.addDebug('tags found')
-                mlog.addDebug(str(emptytags))
            self.removeArrayOfTags(emptytags)
            #recursive in case removing empty tag creates new empty tag
            self.removeEmptyTags(soup, run=run)
-        else:
-            if SHOWDEBUG1 == True:
-                mlog.addDebug('no empty tags found')
-                mlog.showDebug()
-        if SHOWDEBUG0 == True:
-            if SHOWDEBUG2 == True:
-                mlog.addDebug('new soup:')
-                mlog.addDebug(str(soup.prettify()))
-            mlog.addDebug('RemoveEmptyTags Completed')
-            mlog.showDebug()
-        return soup
-
-    def removeFirstAndLastPart(self,soup):
-        def findparenttag(lookuptag):
-            if lookuptag and not lookuptag == None:
-                return lookuptag.findParents()
-        findtag = soup.find(id="date")
-        self.previousNextSibRemover(findtag, previous=True, soupIsArray=False)
-        self.previousNextSibRemover(findparenttag(findtag), previous=True, soupIsArray=True)
-        for endtag in [soup.find(id="share-and-byline"), soup.find("div", { "class" : "gallery-text" })]:
-            self.previousNextSibRemover(endtag, previous=False, soupIsArray=False)
-            self.previousNextSibRemover(findparenttag(endtag), previous=False, soupIsArray=True)
-        return soup
+        return soup
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -1,52 +1,30 @@
-import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Metro UK'
    description = 'News as provide by The Metro -UK'
-
+    #timefmt = ''
    __author__ = 'Dave Asbury'
-    #last update 3/12/11
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
-    no_stylesheets = True
+    #no_stylesheets = True
    oldest_article = 1
-    max_articles_per_feed = 20
+    max_articles_per_feed = 10
    remove_empty_feeds = True
    remove_javascript     = True
+    auto_cleanup = True

-    #preprocess_regexps = [(re.compile(r'Tweet'), lambda  a : '')]
-    preprocess_regexps = [
-    (re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
-    preprocess_regexps = [
-    (re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]

    language = 'en_GB'
-
-
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
-
-
    keep_only_tags = [
-    dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
-                    dict(attrs={'class':['img-cnt figure']}),
-        dict(attrs={'class':['art-img']}),
-                    dict(name='div', attrs={'class':'art-lft'}),
-                    dict(name='p')
+
    ]
    remove_tags    = [
-                             dict(name = 'div',attrs={'id' : ['comments-news','formSubmission']}),
-                             dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
-                             'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r','username','clrd' ]}),
-              dict(attrs={'class':['username', 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime','addYourComment','displayName']})
-                              ,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
+
                               ]
+
+
    feeds          = [
        (u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
-
    extra_css  = '''
-                    body {font: sans-serif medium;}'
-    h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
-                h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
-                    span{ font-size:9.5px; font-weight:bold;font-style:italic}
-                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
-
-     '''
+	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+                	 '''
--- a/recipes/national_geographic_pl.recipe
+++ b/recipes/national_geographic_pl.recipe
@ -9,8 +9,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class recipeMagic(BasicNewsRecipe):
    title                  = 'National Geographic PL'
    __author__             = 'Marcin Urban 2011'
+    __modified_by__        = 'fenuks'
    description            = 'legenda wśród magazynów z historią sięgającą 120 lat'
-    cover_url      	       = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
+    #cover_url      	       = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
    oldest_article         = 7
    max_articles_per_feed  = 100
    no_stylesheets         = True
@ -42,11 +43,43 @@ class recipeMagic(BasicNewsRecipe):
                        ]

    remove_attributes = ['width','height']
+    feeds=[]

-    feeds          = [
-                      ('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
-                    ]
+    def find_articles(self, url):
+        articles = []
+        soup=self.index_to_soup(url)
+        tag=soup.find(attrs={'class':'arl'})
+        art=tag.ul.findAll('li')
+        for i in art:
+            title=i.a['title']
+            url=i.a['href']
+            #date=soup.find(id='footer').ul.li.string[41:-1]
+            desc=i.div.p.string
+            articles.append({'title' : title,
+                   'url'   : url,
+                   'date'  : '',
+                   'description' : desc
+                    })
+        return articles
+
+    def parse_index(self):
+         feeds = []
+         feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/')))
+         feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/')))
+
+         return feeds

    def print_version(self, url):
-        return url.replace('artykuly0Cpokaz', 'drukuj-artykul')
+        if 'artykuly' in url:
+            return url.replace('artykuly/pokaz', 'drukuj-artykul')
+        elif 'aktualnosci' in url:
+            return url.replace('aktualnosci/pokaz', 'drukuj-artykul')  
+        else:
+            return url
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/')
+        tag=soup.find(attrs={'class':'txt jus'})
+        self.cover_url=tag.img['src']
+        return getattr(self, 'cover_url', self.cover_url)

--- a/recipes/non_leggerlo.recipe
+++ b/recipes/non_leggerlo.recipe
@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1335362999(BasicNewsRecipe):
+    title          = u'Non leggerlo'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = False
+    keep_only_tags = [
+                       dict(name='div', attrs={'class':'post hentry'})
+                     ]
+    feeds          = [(u'Non leggerlo', u'http://nonleggerlo.blogspot.com/feeds/posts/default')]
+    description = 'An Italian satirical blog'
+    language = 'it'
+    __author__      = 'faber1971'
+__version__     = 'v1.0'
+__date__        = '24, April 2012'
--- a/recipes/nowa_fantastyka.recipe
+++ b/recipes/nowa_fantastyka.recipe
@ -81,5 +81,7 @@ class Nowa_Fantastyka(BasicNewsRecipe):
        title=soup.find(attrs={'class':'tytul'})
        if title:
            title['style']='font-size: 20px; font-weight: bold;'
-        self.log.warn(soup)
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.INDEX + a['href']
        return soup
--- a/recipes/orlando_sentinel.recipe
+++ b/recipes/orlando_sentinel.recipe
@ -1,3 +1,4 @@
+import urllib, re
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1279258912(BasicNewsRecipe):
@ -27,12 +28,30 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe):
    encoding               = 'utf-8'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://www.orlandosentinel.com/media/graphic/2009-07/46844851.gif'
-    keep_only_tags = [
-                              dict(name='div', attrs={'class':'story'})
-                               ]
-    remove_tags = [
-                  dict(name='div', attrs={'class':['articlerail','tools','comment-group','clearfix']}),
-                         ]
-    remove_tags_after = [
-                  dict(name='p', attrs={'class':'copyright'}),
-                         ]
+
+    auto_cleanup = True
+
+    def get_article_url(self, article):
+        ans = None
+        try:
+            s = article.summary
+            ans = urllib.unquote(
+                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
+        except:
+            pass
+        if ans is None:
+            link = article.get('feedburner_origlink', None)
+            if link and link.split('/')[-1]=="story01.htm":
+                link=link.split('/')[-2]
+                encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+                        '0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
+                        '0S':'//'}
+                for k, v in encoding.iteritems():
+                    link = link.replace(k, v)
+                ans = link
+            elif link:
+                ans = link
+        if ans is not None:
+            return ans.replace('?track=rss', '')
+
+
--- a/recipes/pc_arena.recipe
+++ b/recipes/pc_arena.recipe
@ -7,6 +7,7 @@ class PC_Arena(BasicNewsRecipe):
    description   = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
    category       = 'IT'
    language       = 'pl'
+    index='http://pcarena.pl'
    masthead_url='http://pcarena.pl/pcarena/img/logo.png'
    cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
    no_stylesheets = True
@ -22,4 +23,10 @@ class PC_Arena(BasicNewsRecipe):
        if 'http' not in url:
            return 'http://pcarena.pl' + url
        else:
-            return url
+            return url
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -1,5 +1,5 @@
 """
-readitlaterlist.com
+Pocket Calibre Recipe v1.0
 """
 __license__   = 'GPL v3'
 __copyright__ = '''
@ -12,22 +12,23 @@ from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe


-class Readitlater(BasicNewsRecipe):
-    title                 = 'ReadItLater'
+class Pocket(BasicNewsRecipe):
+    title                 = 'Pocket'
    __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
-    description           = '''Personalized news feeds. Go to readitlaterlist.com to setup \
-                            up your news. This version displays pages of articles from \
+    description           = '''Personalized news feeds. Go to getpocket.com to setup up \
+                            your news. This version displays pages of articles from \
                            oldest to newest, with max & minimum counts, and marks articles \
                            read after downloading.'''
-    publisher             = 'readitlaterlist.com'
+    publisher             = 'getpocket.com'
    category              = 'news, custom'
    oldest_article        = 7
    max_articles_per_feed = 50
-    minimum_articles      = 1
+    minimum_articles      = 10
+    mark_as_read_after_dl = True
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
-    INDEX                 = u'http://readitlaterlist.com'
+    INDEX                 = u'http://getpocket.com'
    LOGIN                 = INDEX + u'/l'
    readList              = []

@ -100,9 +101,31 @@ class Readitlater(BasicNewsRecipe):
        br = self.get_browser()
        for link in markList:
            url = self.INDEX + link
+            print 'Marking read: ', url
            response = br.open(url)
-            response
+            print response.info()

    def cleanup(self):
-        self.mark_as_read(self.readList)
+        if self.mark_as_read_after_dl:
+            self.mark_as_read(self.readList)
+        else:
+            pass

+    def default_cover(self, cover_file):
+        '''
+        Create a generic cover for recipes that don't have a cover
+        This override adds time to the cover
+        '''
+        try:
+            from calibre.ebooks import calibre_cover
+            title = self.title if isinstance(self.title, unicode) else \
+                    self.title.decode('utf-8', 'replace')
+            date = strftime(self.timefmt)
+            time = strftime('[%I:%M %p]')
+            img_data = calibre_cover(title, date, time)
+            cover_file.write(img_data)
+            cover_file.flush()
+        except:
+            self.log.exception('Failed to generate default cover')
+            return False
+        return True
--- a/recipes/richmond_times_dispatch.recipe
+++ b/recipes/richmond_times_dispatch.recipe
@ -0,0 +1,59 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class AdvancedUserRecipe1335532466(BasicNewsRecipe):
+    title                = u'Richmond Times-Dispatch'
+    description      = 'News from Richmond, Virginia, USA'
+    __author__     = 'jde'
+    cover_url        = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
+    language        = 'en'
+    encoding        = 'utf8'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    needs_subscription = False
+    remove_javascript = True
+    recursions      = 0
+    use_embedded_content = False
+    no_stylesheets = True
+    auto_cleanup = True
+
+    feeds          =   [
+
+('News',
+'http://www2.timesdispatch.com/list/feed/rss/news-archive'),
+('Breaking News',
+'http://www2.timesdispatch.com/list/feed/rss/breaking-news'),
+('National News',
+'http://www2.timesdispatch.com/list/feed/rss/national-news'),
+('Local News',
+'http://www2.timesdispatch.com/list/feed/rss/local-news'),
+('Business',
+'http://www2.timesdispatch.com/list/feed/rss/business'),
+('Local Business',
+'http://www2.timesdispatch.com/list/feed/rss/local-business'),
+('Politics',
+'http://www2.timesdispatch.com/list/feed/rss/politics'),
+('Virginia Politics',
+'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'),
+('Editorials',
+'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'),
+('Columnists and Blogs',
+'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'),
+('Opinion Columnists',
+'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'),
+('Letters to the Editor',
+'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'),
+('Traffic',
+'http://www2.timesdispatch.com/list/feed/rss/traffic'),
+('Sports',
+'http://www2.timesdispatch.com/list/feed/rss/sports2'),
+('Entertainment/Life',
+'http://www2.timesdispatch.com/list/feed/rss/entertainment'),
+('Movies',
+'http://www2.timesdispatch.com/list/feed/rss/movies'),
+('Music',
+'http://www2.timesdispatch.com/list/feed/rss/music'),
+('Dining & Food',
+'http://www2.timesdispatch.com/list/feed/rss/dining'),
+
+    ]
+
--- a/recipes/sol_haber.recipe
+++ b/recipes/sol_haber.recipe
@ -0,0 +1,141 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Onur Gungor onurgu@gmail.com'
+__docformat__ = 'restructuredtext en'
+
+'''
+www.sol.org.tr
+'''
+
+import datetime
+
+import re
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class SolHaberRecipe(BasicNewsRecipe):
+    title = u'soL Haber'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    language = 'tr'
+    __author__ = 'Onur Güngör'
+    description = 'Hayata soL''dan bakın..'
+    publisher = 'soL Haber'
+    tags = 'news, haberler, siyaset, türkiye, turkey, politics'
+
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : tags
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    category_dict = { 'sonuncu-kavga':'Sonuncu Kavga',
+                      'devlet-ve-siyaset':'Devlet ve Siyaset',
+                      'ekonomi':'Ekonomi',
+                      'enternasyonal-gundem':'Enternasyonel Gündem',
+                      'kent-gundemleri':'Kent Gündemleri',
+                      'kultur-sanat':'Kültür Sanat',
+                      'dunyadan':'Dünyadan',
+                      'serbest-kursu':'Serbest Kürsü',
+                      'medya':'Medya',
+                      'liseliler':'Liseliler',
+                      'yazarlar':'Köşe Yazıları'}
+
+    end_date = datetime.date.today().isoformat()
+    start_date = (datetime.date.today()-datetime.timedelta(days=1)).isoformat()
+
+
+    section_tuples = [['Köşe Yazıları', 'http://haber.sol.org.tr/arsiv?icerik=kose_yazisi&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
+                    ['Haberler', 'http://haber.sol.org.tr/arsiv?icerik=haber&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
+                    ['soL postal', 'http://haber.sol.org.tr/arsiv?icerik=postal&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)],
+                    ['Bizim Amerika', 'http://haber.sol.org.tr/arsiv?icerik=bizim_amerika&tarih%%5Bmin%%5D%%5Bdate%%5D=%s&tarih%%5Bmax%%5D%%5Bdate%%5D=%s' % (start_date, end_date)]]
+
+
+    # Disable stylesheets from site.
+    no_stylesheets = True
+
+    cover_margins = (20, 20, '#ffffff')
+
+    storybody_reg_exp = '^\s*(haber|kose)\s*$'
+
+    comments_reg_exp = '^\s*makale-elestiri\s*$'
+
+    remove_tags = [dict(name='div', attrs={'class':re.compile(comments_reg_exp, re.IGNORECASE)})]
+
+    keep_only_tags = [dict(name='div', attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)})]
+
+    def get_masthead_title(self):
+        return self.title + "(" + self.end_date + ")"
+
+    def parse_index(self):
+
+        result = []
+        articles_dict = dict()
+
+        author_regexp = re.compile('^http://.*?/yazarlar/(.*?)/.*$')
+        category_regexp = re.compile('^http://.*?/(.+?)/.*$')
+
+        for section_tuple in self.section_tuples:
+
+            section_title = section_tuple[0]
+            section_index_url = section_tuple[1]
+
+            self.log('Bölüm:', section_title, 'URL:', section_index_url)
+
+            soup = self.index_to_soup(section_index_url)
+
+            logo = soup.find('div', id='logo').find('img', src=True)
+            if logo is not None:
+                self.cover_url = logo['src']
+                if self.cover_url.startswith('/'):
+                    self.cover_url = 'http://haber.sol.org.tr'+self.cover_url
+
+            view_content = soup.find('div', id='ana-icerik').find('div', attrs={'class':'view-content'})
+            if view_content == None:
+                break
+            rows = view_content.find('tbody').findAll('tr')
+
+            self.log('Row sayısı', len(rows))
+            for row in rows:
+                cells = row.findAll('td')
+
+                a = cells[1].find('a', href=True)
+
+                url = a['href']
+                title = self.tag_to_string(a)
+
+                if url.startswith('/'):
+                    url = 'http://haber.sol.org.tr'+url
+
+                category = section_title
+                category_match_result = category_regexp.match(url)
+                if category_match_result:
+                    category = category_match_result.group(1)
+
+                date = self.tag_to_string(cells[2])
+
+                author = 'soL haber'
+
+                author_match_result = author_regexp.match(url)
+                if author_match_result:
+                    author = author_match_result.group(1)
+
+                self.log('\tFound article:', title, 'at', url, 'published at ', date, 'by', author)
+                article = {'title':title, 'url':url, 'description':None, 'date':date, 'author':author}
+                if category in articles_dict:
+                    articles_dict[category].append(article)
+                else:
+                    articles_dict[category] = [article]
+
+        for category in articles_dict.keys():
+            if category in self.category_dict:
+                result.append((self.category_dict[category], articles_dict[category]))
+            else:
+                result.append((category, articles_dict[category]))
+
+        return result
--- a/recipes/swiat_obrazu.recipe
+++ b/recipes/swiat_obrazu.recipe
@ -0,0 +1,25 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Swiat_Obrazu(BasicNewsRecipe):
+    title          = u'Swiat Obrazu'
+    __author__        = 'fenuks'
+    description   = u'Internetowy Dziennik o Fotografii i Wideo www.SwiatObrazu.pl to źródło informacji o technice fotografii i wideo, o sprzęcie najbardziej znanych i uznanych firm: Canon, Nikon, Sony, Hasselblad i wielu innych. Znajdziecie tu programy do obróbki zdjęć, forum foto i forum wideo i galerie zdjęć. Codziennie najświeższe informacje: aktualności, testy, poradniki, wywiady, felietony. Swiatobrazu.pl stale organizuje konkursy oraz warsztaty fotograficzne i wideo.'
+    category       = 'photography'
+    masthead_url = 'http://www.swiatobrazu.pl/img/logo.jpg'
+    cover_url = 'http://www.swiatobrazu.pl/img/logo.jpg'
+    language       = 'pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_javascript= True
+    use_embedded_content = False
+    feeds          = [(u'Wszystko', u'http://www.swiatobrazu.pl/rss')]
+
+    def print_version(self, url):
+        return url + ',drukuj'
+
+    def image_url_processor(self, baseurl, url):
+        if 'http://' not in url or 'https://' not in url:
+            return 'http://www.swiatobrazu.pl' + url[5:]
+        else:
+            return url
--- a/recipes/tagesspiegel.recipe
+++ b/recipes/tagesspiegel.recipe
@ -34,7 +34,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
    no_javascript = True
    remove_empty_feeds = True
    encoding = 'utf-8'
-    remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-date hcf-separate'}]
+    remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-colon'}, {'class':'hcf-date hcf-separate'}]

    def print_version(self, url):
        url = url.split('/')
@ -51,6 +51,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
            return ''.join(div.findAll(text=True, recursive=False)).strip() if div is not None else None

        articles = {}
+        links = set()
        key = None
        ans = []
        maincol = soup.find('div', attrs={'class':re.compile('hcf-main-col')})
@ -59,7 +60,7 @@ class TagesspiegelRSS(BasicNewsRecipe):

             if div['class'] == 'hcf-header':
                 try:
-                     key = string.capwords(feed_title(div.em.a))
+                     key = string.capwords(feed_title(div.em))
                     articles[key] = []
                     ans.append(key)
                 except:
@ -70,6 +71,12 @@ class TagesspiegelRSS(BasicNewsRecipe):
                 if not a:
                     continue
                 url = 'http://www.tagesspiegel.de' + a['href']
+
+                 # check for duplicates
+                 if url in links:
+                     continue	
+                 links.add(url)					 
+
                 title = self.tag_to_string(a, use_alt=True).strip()
                 description = ''
                 pubdate = strftime('%a, %d %b')
--- a/recipes/tanuki.recipe
+++ b/recipes/tanuki.recipe
@ -34,4 +34,12 @@ class tanuki(BasicNewsRecipe):

    def preprocess_html(self, soup):
        self.append_page(soup, soup.body)
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                if 'tanuki-anime' in soup.title.string.lower():
+                    a['href']='http://anime.tanuki.pl' + a['href']
+                elif 'tanuki-manga' in soup.title.string.lower():
+                    a['href']='http://manga.tanuki.pl' + a['href']
+                elif 'tanuki-czytelnia' in soup.title.string.lower():
+                    a['href']='http://czytelnia.tanuki.pl' + a['href']
        return soup
--- a/recipes/telam.recipe
+++ b/recipes/telam.recipe
@ -0,0 +1,62 @@
+__license__   = 'GPL v3'
+__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.telam.com.ar
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Telam(BasicNewsRecipe):
+    title                 = 'Telam'
+    __author__            = 'Darko Miletic'
+    description           = 'AGENCIA DE NOTICIAS DE LA REPUBLICA ARGENTINA'
+    publisher             = 'Telam S.E.'
+    category              = 'news, politics, Argentina'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'es_AR'
+    remove_empty_feeds    = True
+    publication_type      = 'newsportal'
+    masthead_url          = 'http://www.telam.com.ar/front/imagenes/encabezado/logotelam.jpg'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags        = [dict(name=['meta','link'])]
+    remove_tags_before = dict(attrs={'class':'nota_fecha'})
+    remove_tags_after  = dict(attrs={'class':'nota_completa'})
+    remove_attributes  = ['lang']
+
+
+    feeds = [
+              (u'Ultimas noticias', u'http://www.telam.com.ar/xml/rss/' )
+             ,(u'Politica'        , u'http://www.telam.com.ar/xml/rss/1')
+             ,(u'Economia'        , u'http://www.telam.com.ar/xml/rss/2')
+             ,(u'Sociedad'        , u'http://www.telam.com.ar/xml/rss/3')
+             ,(u'Policiales'      , u'http://www.telam.com.ar/xml/rss/4')
+             ,(u'Internacionales' , u'http://www.telam.com.ar/xml/rss/6')
+             ,(u'Espectaculos'    , u'http://www.telam.com.ar/xml/rss/7')
+             ,(u'Cultura'         , u'http://www.telam.com.ar/xml/rss/8')
+             ,(u'Deportes'        , u'http://www.telam.com.ar/xml/rss/9')
+             ,(u'Telam Investiga' , u'http://www.telam.com.ar/xml/rss/5')
+            ]
+
+    def print_version(self, url):
+        artid = url.rpartition('/')[2]
+        return 'http://www.telam.com.ar/?codProg=imprimir-nota&id=' + artid
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/recipes/the_sun.recipe
+++ b/recipes/the_sun.recipe
@ -1,9 +1,8 @@
-import re
+import re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1325006965(BasicNewsRecipe):

    title          = u'The Sun UK'
-    cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'

    description = 'A Recipe for The Sun tabloid UK'
    __author__ = 'Dave Asbury'
@ -24,37 +23,69 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    no_stylesheets = True

    extra_css  = '''
-	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
-                	 '''
+    body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+                     '''

    preprocess_regexps = [
-    	(re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]
+        (re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]



    keep_only_tags = [
                               dict(name='h1'),dict(name='h2',attrs={'class' : 'medium centered'}),
-	           dict(name='div',attrs={'class' : 'text-center'}),
-	           dict(name='div',attrs={'id' : 'bodyText'})
-	           # dict(name='p')
-	           ]
+               dict(name='div',attrs={'class' : 'text-center'}),
+               dict(name='div',attrs={'id' : 'bodyText'})
+               # dict(name='p')
+               ]
    remove_tags=[
-	       #dict(name='head'),
-	       dict(attrs={'class' : ['mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
+           #dict(name='head'),
+           dict(attrs={'class' : ['mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
                           dict(name='div',attrs={'class' : 'cf'}),
-	       dict(attrs={'title' : 'download flash'}),
+           dict(attrs={'title' : 'download flash'}),
                           dict(attrs={'style' : 'padding: 5px'})

-	       ]
+           ]


    feeds          = [
-	#(u'News', u'http://www.thesun.co.uk/sol/homepage/news/rss'),
-	(u'News','http://feed43.com/2517447382644748.xml'),
-	(u'Sport', u'http://feed43.com/4283846255668687.xml'),
-	(u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
-	(u'Film',u'http://feed43.com/1307545221226200.xml'),
-        	(u'Music',u'http://feed43.com/1701513435064132.xml'),
-	(u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
-]
+            (u'News','http://feed43.com/2517447382644748.xml'),
+            (u'Sport', u'http://feed43.com/4283846255668687.xml'),
+            (u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
+            (u'Film',u'http://feed43.com/1307545221226200.xml'),
+            (u'Music',u'http://feed43.com/1701513435064132.xml'),
+            (u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
+    ]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
+        # look for the block containing the sun button and url
+        cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_84.gif);'})
+
+
+
+        #cov = soup.find(attrs={'id' : 'large'})
+        cov2 = str(cov)
+
+        cov2='http://www.politicshome.com'+cov2[9:-133]
+        #cov2 now contains url of the page containing pic
+
+        #cov2 now contains url of the page containing pic
+        soup = self.index_to_soup(cov2)
+        cov = soup.find(attrs={'id' : 'large'})
+        cov2 = str(cov)
+        cov2=cov2[27:-18]
+        #cov2 now is pic url, now  go back to original function
+
+        br = mechanize.Browser()
+        br.set_handle_redirect(False)
+        try:
+            br.open_novisit(cov2)
+            cover_url = cov2
+        except:
+            cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
+
+        #cover_url = cov2
+        #cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
+        return cover_url
+

--- a/recipes/tpm_uk.recipe
+++ b/recipes/tpm_uk.recipe
@ -11,6 +11,8 @@ class TPM_uk(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Title says it all'
    publisher             = "The Philosophers' Magazine"
+    recipe_disabled = ('This recipe has been disabled as the website has'
+            ' started providing articles only in PDF form')
    category              = 'philosophy, news'
    oldest_article        = 25
    max_articles_per_feed = 200
--- a/recipes/tweakers_net.recipe
+++ b/recipes/tweakers_net.recipe
@ -2,65 +2,50 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement

+''' Changelog
+ 2012-04-27 DrMerry:
+  Added cover picture
+  removed some extra tags
+'''
+
 __license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class Tweakers(BasicNewsRecipe):
-     title          = u'Tweakers.net - with Reactions'
-     __author__     = 'Roedi06'
+     title          = u'Tweakers.net'
+     __author__     = 'Kovid Goyal'
     language       = 'nl'
-     oldest_article = 7
-     max_articles_per_feed = 100
-     cover_url       = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif'
+     oldest_article = 4
+     max_articles_per_feed = 40
+     cover_url      = 'http://tweakers.net/ext/launch/g/logo.gif'

-     keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}),
-    {'id':'reacties'},
-      ]
+     keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'})]

-     remove_tags    = [dict(name='div', attrs={'id' : ['utracker']}),
-                        {'id' : ['channelNav']},
-                        {'id' : ['contentArea']},
-                        {'class' : ['breadCrumb']},
-                        {'class' : ['nextPrevious ellipsis']},
-                        {'class' : ['advertorial']},
-                        {'class' : ['sidebar']},
-                        {'class' : ['filterBox']},
-                        {'id' : ['toggleButtonTxt']},
-                        {'id' : ['socialButtons']},
-                        {'class' : ['button']},
-                        {'class' : ['textadTop']},
-                        {'class' : ['commentLink']},
-                        {'title' : ['Reageer op deze reactie']},
-                        {'class' : ['pageIndex']},
-        {'class' : ['reactieHeader collapsed']},
+     remove_tags    = [dict(name='div', attrs={'class':'reacties'}),
+                        {'id' : ['utracker','socialButtons','b_ac']},
+                        {'class' : ['sidebar','advertorial']},
+                        {'class' : re.compile('nextPrevious')},
                      ]
     no_stylesheets=True
+     filter_regexps = [r'ads\.doubleclick\.net',r'ad\.doubleclick\.net']

-     preprocess_regexps = [
-        (re.compile(r'<hr*?>', re.IGNORECASE | re.DOTALL), lambda match : ''),
-        (re.compile(r'<p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
-        (re.compile(r'</p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
-        (re.compile(r'<a.*?>'), lambda h1: '<b><u>'),
-        (re.compile(r'</a>'), lambda h2: '</u></b>'),
-        (re.compile(r'<span class="new">', re.IGNORECASE | re.DOTALL), lambda match : ''),
-        (re.compile(r'</span>', re.IGNORECASE | re.DOTALL), lambda match : ''),
-        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'), lambda match : ' - moderated 0<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'),
-        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'), lambda match : ' - moderated +1<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'),
-        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'), lambda match : ' - moderated +2<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'),
-        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'), lambda match : ' - moderated +3<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'),
-        (re.compile(r'<div class="moderation">.*?</div>'), lambda h1: ''),
-     ]
+     feeds          = [(u'Tweakers.net', u'http://tweakers.net/feeds/nieuws.xml')]

-     extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \
-       .reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \
-       .quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }'
-
-
-     feeds          = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')]
-
-     def print_version(self, url):
-        return url + '?max=200'
+     def preprocess_html(self, soup):
+        for a in soup.findAll('a', href=True, rel=True):
+            if a['rel'].startswith('imageview'):
+                a['src'] = a['href']
+                del a['href']
+                a.name = 'img'
+                for x in a.findAll(True):
+                    x.extract()
+        return soup

+     def postprocess_html(self, soup, first):
+        for base in soup.findAll('base'):
+            base.extract()
+        return soup
--- a/recipes/vignette.recipe
+++ b/recipes/vignette.recipe
@ -0,0 +1,19 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1334935485(BasicNewsRecipe):
+    title          = u'Vignette'
+    oldest_article = 15
+    max_articles_per_feed = 100
+    auto_cleanup = False
+    keep_only_tags = [
+                       dict(name='div', attrs={'class':['HomeFirstNewsfoto', 'photo']}),
+                       dict(name='img', attrs={'class':'altan-big'})
+                     ]
+    masthead_url            = 'http://vauro.globalist.it/vauroglobalistit/Img/vauro-logo-beta.gif'
+    feeds          = [(u'Altan', u'http://feed43.com/3556647724071522.xml'), (u'Ellekappa', u'http://ellekappa.tumblr.com/rss'), (u'Vauro', u'http://feeds.feedburner.com/vauro')]
+    description = 'Ellekappa, Altan, Vauro - Italian best satirical cartoons'
+    language = 'it'
+    __author__      = 'faber1971'
+
+__version__     = 'v1.0'
+__date__        = '24, April 2012'
--- a/recipes/webhosting_pl.recipe
+++ b/recipes/webhosting_pl.recipe
@ -8,6 +8,7 @@ class webhosting_pl(BasicNewsRecipe):
    cover_url='http://webhosting.pl/images/logo.png'
    masthead_url='http://webhosting.pl/images/logo.png'
    oldest_article = 7
+    index='http://webhosting.pl'
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_empty_feeds = True
@ -36,4 +37,10 @@ class webhosting_pl(BasicNewsRecipe):
 		(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]

    def print_version(self, url):
-        return url.replace('webhosting.pl', 'webhosting.pl/print')
+        return url.replace('webhosting.pl', 'webhosting.pl/print')
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/xkcd.recipe
+++ b/recipes/xkcd.recipe
@ -21,7 +21,7 @@ class XkcdCom(BasicNewsRecipe):

    use_embedded_content   = False
    oldest_article = 60
-    keep_only_tags = [dict(id='middleContent')]
+    keep_only_tags = [dict(id='middleContainer')]
    remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
    no_stylesheets = True
    # turn image bubblehelp into a paragraph
--- a/resources/templates/fb2.xsl
+++ b/resources/templates/fb2.xsl
@ -377,7 +377,7 @@
        <xsl:apply-templates/><br/>
    </xsl:template>
    <!-- image -->
-    <xsl:template match="fb:image">
+    <xsl:template match="fb:body/fb:image|fb:section/fb:image">
        <div align="center">
            <xsl:element name="img">
                <xsl:attribute name="border">1</xsl:attribute>
@ -395,4 +395,20 @@
            </xsl:element>
        </div>
    </xsl:template>
+    <xsl:template match="fb:image">
+            <xsl:element name="img">
+                <xsl:choose>
+                    <xsl:when test="starts-with(@xlink:href,'#')">
+                        <xsl:attribute name="src"><xsl:value-of select="substring-after(@xlink:href,'#')"/></xsl:attribute>
+                    </xsl:when>
+                    <xsl:otherwise>
+                        <xsl:attribute name="src"><xsl:value-of select="@xlink:href"/></xsl:attribute>
+                    </xsl:otherwise>
+                </xsl:choose>
+                <xsl:if test="@title">
+                    <xsl:attribute name="title"><xsl:value-of select="@title"/></xsl:attribute>
+                </xsl:if>
+            </xsl:element>
+    </xsl:template>
+
 </xsl:stylesheet>
--- a/setup/hosting.py
+++ b/setup/hosting.py
@ -26,7 +26,7 @@ def login_to_google(username, password):
    br.form['Email'] = username
    br.form['Passwd'] = password
    raw = br.submit().read()
-    if re.search(br'<title>.*?Account Settings</title>', raw) is None:
+    if re.search(br'(?i)<title>.*?Account Settings</title>', raw) is None:
        x = re.search(br'(?is)<title>.*?</title>', raw)
        if x is not None:
            print ('Title of post login page: %s'%x.group())
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,14 +12,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-12-14 19:48+0000\n"
-"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
+"PO-Revision-Date: 2012-04-12 09:56+0000\n"
+"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-12-15 05:18+0000\n"
-"X-Generator: Launchpad (build 14487)\n"
+"X-Launchpad-Export-Date: 2012-04-13 05:26+0000\n"
+"X-Generator: Launchpad (build 15070)\n"
 "Language: ca\n"

 #. name for aaa
@ -9584,31 +9584,31 @@ msgstr ""

 #. name for hoi
 msgid "Holikachuk"
-msgstr ""
+msgstr "Holikachuk"

 #. name for hoj
 msgid "Hadothi"
-msgstr ""
+msgstr "Hadothi"

 #. name for hol
 msgid "Holu"
-msgstr ""
+msgstr "Holu"

 #. name for hom
 msgid "Homa"
-msgstr ""
+msgstr "Homa"

 #. name for hoo
 msgid "Holoholo"
-msgstr ""
+msgstr "Holoholo"

 #. name for hop
 msgid "Hopi"
-msgstr ""
+msgstr "Hopi"

 #. name for hor
 msgid "Horo"
-msgstr ""
+msgstr "Horo"

 #. name for hos
 msgid "Ho Chi Minh City Sign Language"
@ -9616,15 +9616,15 @@ msgstr "Llenguatge de signes de la ciutat de Ho Chi Minh"

 #. name for hot
 msgid "Hote"
-msgstr ""
+msgstr "Hote"

 #. name for hov
 msgid "Hovongan"
-msgstr ""
+msgstr "Hovongan"

 #. name for how
 msgid "Honi"
-msgstr ""
+msgstr "Honi"

 #. name for hoy
 msgid "Holiya"
@ -9636,7 +9636,7 @@ msgstr ""

 #. name for hpo
 msgid "Hpon"
-msgstr ""
+msgstr "Hpon"

 #. name for hps
 msgid "Hawai'i Pidgin Sign Language"
@ -9644,35 +9644,35 @@ msgstr "Hawaià Pidgin; llenguatge de signes"

 #. name for hra
 msgid "Hrangkhol"
-msgstr ""
+msgstr "Hrangkhol"

 #. name for hre
 msgid "Hre"
-msgstr ""
+msgstr "Hre"

 #. name for hrk
 msgid "Haruku"
-msgstr ""
+msgstr "Haruku"

 #. name for hrm
 msgid "Miao; Horned"
-msgstr ""
+msgstr "Miao; Horned"

 #. name for hro
 msgid "Haroi"
-msgstr ""
+msgstr "Haroi"

 #. name for hrr
 msgid "Horuru"
-msgstr ""
+msgstr "Horuru"

 #. name for hrt
 msgid "Hértevin"
-msgstr ""
+msgstr "Hértevin"

 #. name for hru
 msgid "Hruso"
-msgstr ""
+msgstr "Hruso"

 #. name for hrv
 msgid "Croatian"
@ -9680,11 +9680,11 @@ msgstr "Croat"

 #. name for hrx
 msgid "Hunsrik"
-msgstr ""
+msgstr "Hunsrik"

 #. name for hrz
 msgid "Harzani"
-msgstr ""
+msgstr "Harzani"

 #. name for hsb
 msgid "Sorbian; Upper"
@ -9704,31 +9704,31 @@ msgstr "Xinès; Xiang"

 #. name for hss
 msgid "Harsusi"
-msgstr ""
+msgstr "Harsusi"

 #. name for hti
 msgid "Hoti"
-msgstr ""
+msgstr "Hoti"

 #. name for hto
 msgid "Huitoto; Minica"
-msgstr ""
+msgstr "Huitoto; Minica"

 #. name for hts
 msgid "Hadza"
-msgstr ""
+msgstr "Hadza"

 #. name for htu
 msgid "Hitu"
-msgstr ""
+msgstr "Hitu"

 #. name for htx
 msgid "Hittite; Middle"
-msgstr ""
+msgstr "Hittite; Middle"

 #. name for hub
 msgid "Huambisa"
-msgstr ""
+msgstr "Huambisa"

 #. name for huc
 msgid "=/Hua"
@ -9736,27 +9736,27 @@ msgstr ""

 #. name for hud
 msgid "Huaulu"
-msgstr ""
+msgstr "Huaulu"

 #. name for hue
 msgid "Huave; San Francisco Del Mar"
-msgstr ""
+msgstr "Huave; San Francisco Del Mar"

 #. name for huf
 msgid "Humene"
-msgstr ""
+msgstr "Humene"

 #. name for hug
 msgid "Huachipaeri"
-msgstr ""
+msgstr "Huachipaeri"

 #. name for huh
 msgid "Huilliche"
-msgstr ""
+msgstr "Huilliche"

 #. name for hui
 msgid "Huli"
-msgstr ""
+msgstr "Huli"

 #. name for huj
 msgid "Miao; Northern Guiyang"
@ -9764,15 +9764,15 @@ msgstr "Miao; Guiyang septentrional"

 #. name for huk
 msgid "Hulung"
-msgstr ""
+msgstr "Hulung"

 #. name for hul
 msgid "Hula"
-msgstr ""
+msgstr "Hula"

 #. name for hum
 msgid "Hungana"
-msgstr ""
+msgstr "Hungana"

 #. name for hun
 msgid "Hungarian"
@ -9780,43 +9780,43 @@ msgstr "Hongarès"

 #. name for huo
 msgid "Hu"
-msgstr ""
+msgstr "Hu"

 #. name for hup
 msgid "Hupa"
-msgstr ""
+msgstr "Hupa"

 #. name for huq
 msgid "Tsat"
-msgstr ""
+msgstr "Tsat"

 #. name for hur
 msgid "Halkomelem"
-msgstr ""
+msgstr "Halkomelem"

 #. name for hus
 msgid "Huastec"
-msgstr ""
+msgstr "Huastec"

 #. name for hut
 msgid "Humla"
-msgstr ""
+msgstr "Humla"

 #. name for huu
 msgid "Huitoto; Murui"
-msgstr ""
+msgstr "Huitoto; Murui"

 #. name for huv
 msgid "Huave; San Mateo Del Mar"
-msgstr ""
+msgstr "Huave; San Mateo Del Mar"

 #. name for huw
 msgid "Hukumina"
-msgstr ""
+msgstr "Hukumina"

 #. name for hux
 msgid "Huitoto; Nüpode"
-msgstr ""
+msgstr "Huitoto; Nüpode"

 #. name for huy
 msgid "Hulaulá"
--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
@ -18,27 +18,27 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-03-05 19:08+0000\n"
-"Last-Translator: Dennis Baudys <Unknown>\n"
+"PO-Revision-Date: 2012-04-21 14:42+0000\n"
+"Last-Translator: SimonFS <simonschuette@arcor.de>\n"
 "Language-Team: German <debian-l10n-german@lists.debian.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-03-06 04:47+0000\n"
-"X-Generator: Launchpad (build 14900)\n"
+"X-Launchpad-Export-Date: 2012-04-22 04:43+0000\n"
+"X-Generator: Launchpad (build 15120)\n"
 "Language: de\n"

 #. name for aaa
 msgid "Ghotuo"
-msgstr ""
+msgstr "Ghotuo (Nigeria)"

 #. name for aab
 msgid "Alumu-Tesu"
-msgstr "Alumu-Tesu"
+msgstr "Alumu-Tesu (Nigeria)"

 #. name for aac
 msgid "Ari"
-msgstr "Ari"
+msgstr "Ari (Papua-Neuguinea)"

 #. name for aad
 msgid "Amal"
@ -66,7 +66,7 @@ msgstr "Arifama-Miniafia"

 #. name for aak
 msgid "Ankave"
-msgstr "Ankave"
+msgstr "Ankave (Papua-Neuguinea)"

 #. name for aal
 msgid "Afade"
@ -110,7 +110,7 @@ msgstr ""

 #. name for aaw
 msgid "Solong"
-msgstr ""
+msgstr "Solong"

 #. name for aax
 msgid "Mandobo Atas"
@ -30860,7 +30860,7 @@ msgstr ""

 #. name for zxx
 msgid "No linguistic content"
-msgstr ""
+msgstr "Kein linguistischer Inhalt"

 #. name for zyb
 msgid "Zhuang; Yongbei"
--- a/setup/iso_639/es.po
+++ b/setup/iso_639/es.po
--- a/setup/iso_639/eu.po
+++ b/setup/iso_639/eu.po
@ -9,14 +9,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-03-06 13:55+0000\n"
+"PO-Revision-Date: 2012-04-18 13:08+0000\n"
 "Last-Translator: Asier Iturralde Sarasola <Unknown>\n"
 "Language-Team: Euskara <itzulpena@comtropos.com>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-03-07 05:12+0000\n"
-"X-Generator: Launchpad (build 14907)\n"
+"X-Launchpad-Export-Date: 2012-04-19 04:36+0000\n"
+"X-Generator: Launchpad (build 15108)\n"
 "Language: eu\n"

 #. name for aaa
@ -27125,7 +27125,7 @@ msgstr ""

 #. name for vie
 msgid "Vietnamese"
-msgstr "Mahastiak"
+msgstr "Vietnamera"

 #. name for vif
 msgid "Vili"
--- a/setup/iso_639/tr.po
+++ b/setup/iso_639/tr.po
@ -10,14 +10,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-11-11 00:16+0000\n"
+"PO-Revision-Date: 2012-04-22 07:11+0000\n"
 "Last-Translator: kulkke <Unknown>\n"
 "Language-Team: Turkish <gnome-turk@gnome.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-11-26 05:42+0000\n"
-"X-Generator: Launchpad (build 14381)\n"
+"X-Launchpad-Export-Date: 2012-04-23 04:45+0000\n"
+"X-Generator: Launchpad (build 15135)\n"
 "Language: tr\n"

 #. name for aaa
@ -7371,7 +7371,7 @@ msgstr ""

 #. name for est
 msgid "Estonian"
-msgstr "Estonyaca"
+msgstr "Estonca"

 #. name for esu
 msgid "Yupik; Central"
@ -11131,7 +11131,7 @@ msgstr ""

 #. name for kaz
 msgid "Kazakh"
-msgstr "Kazak Dili"
+msgstr "Kazakça"

 #. name for kba
 msgid "Kalarko"
@ -13767,7 +13767,7 @@ msgstr ""

 #. name for lav
 msgid "Latvian"
-msgstr "Letonyaca"
+msgstr "Letonca"

 #. name for law
 msgid "Lauje"
@ -16031,7 +16031,7 @@ msgstr ""

 #. name for mkd
 msgid "Macedonian"
-msgstr "Makedonyaca"
+msgstr "Makedonca"

 #. name for mke
 msgid "Mawchi"
@ -22227,7 +22227,7 @@ msgstr ""

 #. name for ron
 msgid "Romanian"
-msgstr "Romence"
+msgstr "Rumence"

 #. name for roo
 msgid "Rotokas"
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 46)
+numeric_version = (0, 8, 49)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -259,7 +259,7 @@ class LRXMetadataReader(MetadataReaderPlugin):
 class MOBIMetadataReader(MetadataReaderPlugin):

    name        = 'Read MOBI metadata'
-    file_types  = set(['mobi', 'prc', 'azw', 'azw4', 'pobi'])
+    file_types  = set(['mobi', 'prc', 'azw', 'azw3', 'azw4', 'pobi'])
    description = _('Read metadata from %s files')%'MOBI'

    def get_metadata(self, stream, ftype):
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -10,6 +10,8 @@ import cStringIO

 from calibre.devices.usbms.driver import USBMS

+HTC_BCDS = [0x100, 0x0222, 0x0226, 0x227, 0x228]
+
 class ANDROID(USBMS):

    name           = 'Android driver'
@ -23,23 +25,24 @@ class ANDROID(USBMS):

    VENDOR_ID   = {
            # HTC
-            0x0bb4 : { 0xc02  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xc01  : [0x100, 0x0227, 0x0226],
-                       0xff9  : [0x0100, 0x0227, 0x0226],
-                       0xc86  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xc87  : [0x0100, 0x0227, 0x0226],
-                       0xc8d  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xc91  : [0x0100, 0x0227, 0x0226],
-                       0xc92  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xc97  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xc99  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xca2  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xca3  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xca4  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xca9  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xcac  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xccf  : [0x100, 0x0227, 0x0226, 0x222],
-                       0x2910 : [0x222],
+            0x0bb4 : { 0xc02  : HTC_BCDS,
+                       0xc01  : HTC_BCDS,
+                       0xff9  : HTC_BCDS,
+                       0xc86  : HTC_BCDS,
+                       0xc87  : HTC_BCDS,
+                       0xc8d  : HTC_BCDS,
+                       0xc91  : HTC_BCDS,
+                       0xc92  : HTC_BCDS,
+                       0xc97  : HTC_BCDS,
+                       0xc99  : HTC_BCDS,
+                       0xca2  : HTC_BCDS,
+                       0xca3  : HTC_BCDS,
+                       0xca4  : HTC_BCDS,
+                       0xca9  : HTC_BCDS,
+                       0xcac  : HTC_BCDS,
+                       0xccf  : HTC_BCDS,
+                       0x2910 : HTC_BCDS,
+                       0xff9  : [0x9999],
            },

            # Eken
@ -174,7 +177,7 @@ class ANDROID(USBMS):
            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
            'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
-            'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC']
+            'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -189,7 +192,8 @@ class ANDROID(USBMS):
            'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
            'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
-            'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD']
+            'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
+            'GT-S5830L_CARD']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -197,7 +201,7 @@ class ANDROID(USBMS):
            'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
            'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
            'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
-            'FILE-CD_GADGET', 'GT-I9001_CARD']
+            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -325,6 +325,10 @@ class KINDLE2(KINDLE):
    OPT_APNX_ACCURATE  = 1
    OPT_APNX_CUST_COL  = 2

+    def formats_to_scan_for(self):
+        ans = USBMS.formats_to_scan_for(self) | {'azw3'}
+        return ans
+
    def books(self, oncard=None, end_session=True):
        bl = USBMS.books(self, oncard=oncard, end_session=end_session)
        # Read collections information
@ -423,6 +427,8 @@ class KINDLE_FIRE(KINDLE2):
    name = 'Kindle Fire Device Interface'
    description = _('Communicate with the Kindle Fire')
    gui_name = 'Fire'
+    FORMATS = list(KINDLE2.FORMATS)
+    FORMATS.insert(0, 'azw3')

    PRODUCT_ID = [0x0006]
    BCD = [0x216, 0x100]
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -298,7 +298,7 @@ class KOBO(USBMS):
            changed = False
            for i, row in enumerate(cursor):
            #  self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
-                if row[3].startswith("file:///usr/local/Kobo/help/"):
+                if not hasattr(row[3], 'startswith') or row[3].startswith("file:///usr/local/Kobo/help/"):
                    # These are internal to the Kobo device and do not exist
                    continue
                path = self.path_from_contentid(row[3], row[5], row[4], oncard)
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@ -86,7 +86,8 @@ class NOOK_COLOR(NOOK):
    PRODUCT_ID  = [0x002, 0x003, 0x004]
    BCD         = [0x216]

-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_DISK', 'NOOK_TABLET']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_DISK', 'NOOK_TABLET',
+            'NOOK_SIMPLETOUCH']
    EBOOK_DIR_MAIN = 'My Files'
    NEWS_IN_FOLDER = False

--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -307,11 +307,21 @@ class PRST1(USBMS):

        # Work-around for Sony Bug (SD Card DB not using right SQLite sequence)
        if source_id == 1:
+			# Update any existing sequence numbers in the table that aren't in the required range
            sdcard_sequence_start = '4294967296'
            query = 'UPDATE sqlite_sequence SET seq = ? WHERE seq < ?'
            t = (sdcard_sequence_start, sdcard_sequence_start,)
            cursor.execute(query, t)

+			# Insert sequence numbers for tables we will be manipulating, if they don't already exist
+			query = ('INSERT INTO sqlite_sequence (name, seq) ' 
+					'SELECT ?, ? '
+				    'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
+			cursor.execute(query, ('books',sdcard_sequence_start,'books',))
+			cursor.execute(query, ('collection',sdcard_sequence_start,'collection',))
+			cursor.execute(query, ('collections',sdcard_sequence_start,'collections',))
+			
+
        for book in booklist:
            # Run through plugboard if needed
            if plugboard is not None:
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -128,6 +128,9 @@ class USBMS(CLI, Device):
        elif location_code == 'B':
            self._update_driveinfo_file(self._card_b_prefix, location_code, name)

+    def formats_to_scan_for(self):
+        return set(self.settings().format_map) | set(self.FORMATS)
+
    def books(self, oncard=None, end_session=True):
        from calibre.ebooks.metadata.meta import path_to_ext

@ -166,7 +169,7 @@ class USBMS(CLI, Device):
        for idx,b in enumerate(bl):
            bl_cache[b.lpath] = idx

-        all_formats = set(self.settings().format_map) | set(self.FORMATS)
+        all_formats = self.formats_to_scan_for()

        def update_booklist(filename, path, prefix):
            changed = False
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -31,7 +31,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
                   'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'md',
-                   'textile', 'markdown', 'ibook', 'iba']
+                   'textile', 'markdown', 'ibook', 'iba', 'azw3']

 class HTMLRenderer(object):

@ -93,6 +93,20 @@ def extract_calibre_cover(raw, base, log):
        if os.path.exists(img):
            return open(img, 'rb').read()

+    # Look for a simple cover, i.e. a body with no text and only one <img> tag
+    if matches is None:
+        body = soup.find('body')
+        if body is not None:
+            text = u''.join(map(unicode, body.findAll(text=True)))
+            if text.strip():
+                # Body has text, abort
+                return
+            images = body.findAll('img', src=True)
+            if 0 < len(images) < 2:
+                img = os.path.join(base, *images[0]['src'].split('/'))
+                if os.path.exists(img):
+                    return open(img, 'rb').read()
+
 def render_html_svg_workaround(path_to_html, log, width=590, height=750):
    from calibre.ebooks.oeb.base import SVG_NS
    raw = open(path_to_html, 'rb').read()
@ -108,6 +122,7 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
        except:
            pass
+
    if data is None:
        renderer = render_html(path_to_html, width, height)
        data = getattr(renderer, 'data', None)
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -156,9 +156,10 @@ def add_pipeline_options(parser, plumber):
              'SEARCH AND REPLACE' : (
                 _('Modify the document text and structure using user defined patterns.'),
                 [
-                      'sr1_search', 'sr1_replace',
-                      'sr2_search', 'sr2_replace',
-                      'sr3_search', 'sr3_replace',
+                     'sr1_search', 'sr1_replace',
+                     'sr2_search', 'sr2_replace',
+                     'sr3_search', 'sr3_replace',
+                     'search_replace',
                 ]
              ),

@ -211,6 +212,7 @@ def add_pipeline_options(parser, plumber):
            if rec.level < rec.HIGH:
                option_recommendation_to_cli_option(add_option, rec)

+
 def option_parser():
    parser = OptionParser(usage=USAGE)
    parser.add_option('--list-recipes', default=False, action='store_true',
@ -271,6 +273,34 @@ def abspath(x):
        return x
    return os.path.abspath(os.path.expanduser(x))

+def read_sr_patterns(path, log=None):
+    import json, re, codecs
+    pats = []
+    with codecs.open(path, 'r', 'utf-8') as f:
+        pat = None
+        for line in f.readlines():
+            if line.endswith(u'\n'):
+                line = line[:-1]
+
+            if pat is None:
+                if not line.strip():
+                    continue
+                try:
+                    re.compile(line)
+                except:
+                    msg = u'Invalid regular expression: %r from file: %r'%(
+                            line, path)
+                    if log is not None:
+                        log.error(msg)
+                        raise SystemExit(1)
+                    else:
+                        raise ValueError(msg)
+                pat = line
+            else:
+                pats.append((pat, line))
+                pat = None
+    return json.dumps(pats)
+
 def main(args=sys.argv):
    log = Log()
    parser, plumber = create_option_parser(args, log)
@ -278,6 +308,9 @@ def main(args=sys.argv):
    for x in ('read_metadata_from_opf', 'cover'):
        if getattr(opts, x, None) is not None:
            setattr(opts, x, abspath(getattr(opts, x)))
+    if opts.search_replace:
+        opts.search_replace = read_sr_patterns(opts.search_replace, log)
+
    recommendations = [(n.dest, getattr(opts, n.dest),
                        OptionRecommendation.HIGH) \
                                        for n in parser.options_iter()
--- a/src/calibre/ebooks/conversion/plugins/mobi_input.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py
@ -7,41 +7,17 @@ import os

 from calibre.customize.conversion import InputFormatPlugin

-def run_mobi_unpack(stream, options, log, accelerators):
-    from mobiunpack.mobi_unpack import Mobi8Reader
-    from calibre.customize.ui import plugin_for_input_format
-    from calibre.ptempfile import PersistentTemporaryDirectory
-
-    wdir = PersistentTemporaryDirectory('_unpack_space')
-    m8r = Mobi8Reader(stream, wdir)
-    if m8r.isK8():
-        epub_path = m8r.processMobi8()
-        epub_input = plugin_for_input_format('epub')
-        for opt in epub_input.options:
-            setattr(options, opt.option.name, opt.recommended_value)
-        options.input_encoding = m8r.getCodec()
-        return epub_input.convert(open(epub_path,'rb'), options,
-                'epub', log, accelerators)
-
 class MOBIInput(InputFormatPlugin):

    name        = 'MOBI Input'
    author      = 'Kovid Goyal'
    description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
-    file_types  = set(['mobi', 'prc', 'azw'])
+    file_types  = set(['mobi', 'prc', 'azw', 'azw3'])

    def convert(self, stream, options, file_ext, log,
                accelerators):
        self.is_kf8 = False

-        if os.environ.get('USE_MOBIUNPACK', None) is not None:
-            pos = stream.tell()
-            try:
-                return run_mobi_unpack(stream, options, log, accelerators)
-            except Exception:
-                log.exception('mobi_unpack code not working')
-            stream.seek(pos)
-
        from calibre.ebooks.mobi.reader.mobi6 import MobiReader
        from lxml import html
        parse_cache = {}
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
@ -6,8 +6,6 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-from cStringIO import StringIO
-
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.customize.conversion import OptionRecommendation

@ -79,18 +77,9 @@ class MOBIOutput(OutputFormatPlugin):
    def check_for_masthead(self):
        found = 'masthead' in self.oeb.guide
        if not found:
+            from calibre.ebooks import generate_masthead
            self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
-            try:
-                from PIL import Image as PILImage
-                PILImage
-            except ImportError:
-                import Image as PILImage
-
-            raw = open(P('content_server/calibre_banner.png'), 'rb')
-            im = PILImage.open(raw)
-            of = StringIO()
-            im.save(of, 'GIF')
-            raw = of.getvalue()
+            raw = generate_masthead(unicode(self.oeb.metadata['title'][0]))
            id, href = self.oeb.manifest.generate('masthead', 'masthead')
            self.oeb.manifest.add(id, href, 'image/gif', data=raw)
            self.oeb.guide.add('masthead', 'Masthead Image', href)
@ -151,13 +140,70 @@ class MOBIOutput(OutputFormatPlugin):
            # Fix up the periodical href to point to first section href
            toc.nodes[0].href = toc.nodes[0].nodes[0].href

+    def remove_html_cover(self):
+        from calibre.ebooks.oeb.base import OEB_DOCS
+
+        oeb = self.oeb
+        if not oeb.metadata.cover \
+           or 'cover' not in oeb.guide:
+            return
+        href = oeb.guide['cover'].href
+        del oeb.guide['cover']
+        item = oeb.manifest.hrefs[href]
+        if item.spine_position is not None:
+            self.log.warn('Found an HTML cover: ', item.href, 'removing it.',
+                    'If you find some content missing from the output MOBI, it '
+                    'is because you misidentified the HTML cover in the input '
+                    'document')
+            oeb.spine.remove(item)
+            if item.media_type in OEB_DOCS:
+                self.oeb.manifest.remove(item)
+
    def convert(self, oeb, output_path, input_plugin, opts, log):
+        from calibre.utils.config import tweaks
+        from calibre.ebooks.mobi.writer2.resources import Resources
        self.log, self.opts, self.oeb = log, opts, oeb
+
+        mobi_type = tweaks.get('test_mobi_output_type', 'old')
+        if self.is_periodical:
+            mobi_type = 'old' # Amazon does not support KF8 periodicals
+        create_kf8 = mobi_type in ('new', 'both')
+
+        self.remove_html_cover()
+        resources = Resources(oeb, opts, self.is_periodical,
+                add_fonts=create_kf8)
+        self.check_for_periodical()
+
+        if create_kf8:
+            # Split on pagebreaks so that the resulting KF8 works better with
+            # calibre's viewer, which does not support CSS page breaks
+            from calibre.ebooks.oeb.transforms.split import Split
+            Split()(self.oeb, self.opts)
+
+
+        kf8 = self.create_kf8(resources, for_joint=mobi_type=='both'
+                ) if create_kf8 else None
+        if mobi_type == 'new':
+            kf8.write(output_path)
+            self.extract_mobi(output_path, opts)
+            return
+
+        self.log('Creating MOBI 6 output')
+        self.write_mobi(input_plugin, output_path, kf8, resources)
+
+    def create_kf8(self, resources, for_joint=False):
+        from calibre.ebooks.mobi.writer8.main import create_kf8_book
+        return create_kf8_book(self.oeb, self.opts, resources,
+                for_joint=for_joint)
+
+    def write_mobi(self, input_plugin, output_path, kf8, resources):
        from calibre.ebooks.mobi.mobiml import MobiMLizer
        from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
        from calibre.customize.ui import plugin_for_input_format
+
+        opts, oeb = self.opts, self.oeb
        if not opts.no_inline_toc:
            tocadder = HTMLTOCAdder(title=opts.toc_title, position='start' if
                    opts.mobi_toc_at_start else 'end')
@ -169,15 +215,19 @@ class MOBIOutput(OutputFormatPlugin):
            rasterizer(oeb, opts)
        except Unavailable:
            self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
+        else:
+            # Add rasterized SVG images
+            resources.add_extra_images()
        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
        mobimlizer(oeb, opts)
-        self.check_for_periodical()
        write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
        from calibre.ebooks.mobi.writer2.main import MobiWriter
-        writer = MobiWriter(opts,
+        writer = MobiWriter(opts, resources, kf8,
                        write_page_breaks_after_item=write_page_breaks_after_item)
        writer(oeb, output_path)
+        self.extract_mobi(output_path, opts)

+    def extract_mobi(self, output_path, opts):
        if opts.extract_to is not None:
            from calibre.ebooks.mobi.debug.main import inspect_mobi
            ddir = opts.extract_to
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -536,7 +536,7 @@ OptionRecommendation(name='pubdate',

 OptionRecommendation(name='timestamp',
    recommended_value=None, level=OptionRecommendation.LOW,
-    help=_('Set the book timestamp (used by the date column in calibre).')),
+    help=_('Set the book timestamp (no longer used anywhere)')),

 OptionRecommendation(name='enable_heuristics',
    recommended_value=False, level=OptionRecommendation.LOW,
@ -626,6 +626,14 @@ OptionRecommendation(name='sr3_search',
 OptionRecommendation(name='sr3_replace',
    recommended_value='', level=OptionRecommendation.LOW,
    help=_('Replacement to replace the text found with sr3-search.')),
+
+OptionRecommendation(name='search_replace',
+    recommended_value=None, level=OptionRecommendation.LOW, help=_(
+        'Path to a file containing search and replace regular expressions. '
+        'The file must contain alternating lines of regular expression '
+        'followed by replacement pattern (which can be an empty line). '
+        'The regular expression must be in the python regex syntax and '
+        'the file must be UTF-8 encoded.')),
 ]
        # }}}

--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import functools, re
+import functools, re, json

 from calibre import entity_to_unicode, as_unicode

@ -515,18 +515,31 @@ class HTMLPreProcessor(object):
        if not getattr(self.extra_opts, 'keep_ligatures', False):
            html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)

-        for search, replace in [['sr3_search', 'sr3_replace'], ['sr2_search', 'sr2_replace'], ['sr1_search', 'sr1_replace']]:
+        # Function for processing search and replace
+        def do_search_replace(search_pattern, replace_txt):
+            try:
+                search_re = re.compile(search_pattern)
+                if not replace_txt:
+                    replace_txt = ''
+                rules.insert(0, (search_re, replace_txt))
+            except Exception as e:
+                self.log.error('Failed to parse %r regexp because %s' %
+                        (search, as_unicode(e)))
+
+        # search / replace using the sr?_search / sr?_replace options
+        for i in range(1, 4):
+            search, replace = 'sr%d_search'%i, 'sr%d_replace'%i
            search_pattern = getattr(self.extra_opts, search, '')
+            replace_txt = getattr(self.extra_opts, replace, '')
            if search_pattern:
-                try:
-                    search_re = re.compile(search_pattern)
-                    replace_txt = getattr(self.extra_opts, replace, '')
-                    if not replace_txt:
-                        replace_txt = ''
-                    rules.insert(0, (search_re, replace_txt))
-                except Exception as e:
-                    self.log.error('Failed to parse %r regexp because %s' %
-                            (search, as_unicode(e)))
+                do_search_replace(search_pattern, replace_txt)
+
+        # multi-search / replace using the search_replace option
+        search_replace = getattr(self.extra_opts, 'search_replace', None)
+        if search_replace:
+            search_replace = json.loads(search_replace)
+            for search_pattern, replace_txt in search_replace:
+                do_search_replace(search_pattern, replace_txt)

        end_rules = []
        # delete soft hyphens - moved here so it's executed after header/footer removal
@ -546,7 +559,7 @@ class HTMLPreProcessor(object):
                end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                end_rules.append(
                    # Un wrap using punctuation
-                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:“”)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
                )

        for rule in self.PREPROCESS + start_rules:
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -148,6 +148,7 @@ class HeuristicProcessor(object):
        return wordcount.words

    def markup_italicis(self, html):
+        self.log.debug("\n\n\nitalicize debugging \n\n\n")
        ITALICIZE_WORDS = [
            'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
            'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.',
@ -156,28 +157,30 @@ class HeuristicProcessor(object):
        ]

        ITALICIZE_STYLE_PATS = [
-            ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
-            ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/',
+            ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
            ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
-            ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
-            ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
            ur'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_',
            ur'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_',
            ur'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*',
-            ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
            ur'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/',
            ur'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|',
+            ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
+            ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
+            ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/',
+            ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_'
        ]

        for word in ITALICIZE_WORDS:
            html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '<i>%s</i>' % word, html)

-        def sub(mo):
-            return '<i>%s</i>'%mo.group('words')
-
+        search_text = re.sub(r'(?s)<head[^>]*>.*?</head>', '', html)
+        search_text = re.sub(r'<[^>]*>', '', search_text)
        for pat in ITALICIZE_STYLE_PATS:
-            html = re.sub(pat, sub, html)
-
+            for match in re.finditer(pat, search_text):
+                ital_string = str(match.group('words'))
+                #self.log.debug("italicising "+str(match.group(0))+"    with <i>"+ital_string+"</i>")
+                html = re.sub(re.escape(str(match.group(0))), '<i>%s</i>' % ital_string, html)
+                
        return html

    def markup_chapters(self, html, wordcount, blanks_between_paragraphs):
@ -316,13 +319,20 @@ class HeuristicProcessor(object):
        '''
        Unwraps lines based on line length and punctuation
        supports a range of html markup and text files
+        
+        the lookahead regex below is meant look for any non-full stop characters - punctuation
+        characters which can be used as a full stop should *not* be added below - e.g. ?!“”. etc
+        the reason for this is to prevent false positive wrapping.  False positives are more
+        difficult to detect than false negatives during a manual review of the doc
+        
+        This function intentionally leaves hyphenated content alone as that is handled by the 
+        dehyphenate routine in a separate step
        '''
-        # define the pieces of the regex

-        lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:“”)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
+        # define the pieces of the regex
+        lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
        em_en_lookahead = "(?<=.{"+str(length)+u"}[\u2013\u2014])"
        soft_hyphen = u"\xad"
-        dash = u"\x2d" # some ocrs doesn't convert dashes to hyphens
        line_ending = "\s*</(span|[iubp]|div)>\s*(</(span|[iubp]|div)>)?"
        blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
        line_opening = "<(span|[iubp]|div)[^>]*>\s*(<(span|[iubp]|div)[^>]*>)?\s*"
@ -331,23 +341,19 @@ class HeuristicProcessor(object):
        unwrap_regex = lookahead+line_ending+blanklines+line_opening
        em_en_unwrap_regex = em_en_lookahead+line_ending+blanklines+line_opening
        shy_unwrap_regex = soft_hyphen+line_ending+blanklines+line_opening
-        dash_unwrap_regex = dash+line_ending+blanklines+line_opening

        if format == 'txt':
            unwrap_regex = lookahead+txt_line_wrap
            em_en_unwrap_regex = em_en_lookahead+txt_line_wrap
            shy_unwrap_regex = soft_hyphen+txt_line_wrap
-            dash_unwrap_regex = dash+txt_line_wrap

        unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
        em_en_unwrap = re.compile(u"%s" % em_en_unwrap_regex, re.UNICODE)
        shy_unwrap = re.compile(u"%s" % shy_unwrap_regex, re.UNICODE)
-        dash_unwrap = re.compile(u"%s" % dash_unwrap_regex, re.UNICODE)

        content = unwrap.sub(' ', content)
        content = em_en_unwrap.sub('', content)
        content = shy_unwrap.sub('', content)
-        content = dash_unwrap.sub('', content)
        return content

    def txt_process(self, match):
@ -460,27 +466,31 @@ class HeuristicProcessor(object):
        return html

    def detect_whitespace(self, html):
-        blanks_around_headings = re.compile(r'(?P<initparas>(<(p|div)[^>]*>\s*</(p|div)>\s*){1,}\s*)?(?P<heading><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<(p|div)[^>]*>\s*</(p|div)>\s*){1,})?', re.IGNORECASE|re.DOTALL)
+        blanks_around_headings = re.compile(r'(?P<initparas>(<(p|div)[^>]*>\s*</(p|div)>\s*){1,}\s*)?(?P<content><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<(p|div)[^>]*>\s*</(p|div)>\s*){1,})?', re.IGNORECASE|re.DOTALL)
+        blanks_around_scene_breaks = re.compile(r'(?P<initparas>(<(p|div)[^>]*>\s*</(p|div)>\s*){1,}\s*)?(?P<content><p class="scenebreak"[^>]*>.*?</p>)(?P<endparas>\s*(<(p|div)[^>]*>\s*</(p|div)>\s*){1,})?', re.IGNORECASE|re.DOTALL)
        blanks_n_nopunct = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?<p[^>]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W](</(span|[ibu]|em|strong|font)>\s*)*</p>(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE|re.DOTALL)

        def merge_header_whitespace(match):
            initblanks = match.group('initparas')
-            endblanks = match.group('initparas')
-            heading = match.group('heading')
+            endblanks = match.group('endparas')
+            content = match.group('content')
            top_margin = ''
            bottom_margin = ''
            if initblanks is not None:
                top_margin = 'margin-top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
            if endblanks is not None:
-                bottom_margin = 'margin-bottom:'+str(len(self.single_blank.findall(initblanks)))+'em;'
+                bottom_margin = 'margin-bottom:'+str(len(self.single_blank.findall(endblanks)))+'em;'

            if initblanks == None and endblanks == None:
-                return heading
+                return content
+            elif content.find('scenebreak') != -1:
+                return content
            else:
-                heading = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '\n\n<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', heading)
-            return heading
+                content = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '\n\n<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', content)
+            return content

        html = blanks_around_headings.sub(merge_header_whitespace, html)
+        html = blanks_around_scene_breaks.sub(merge_header_whitespace, html)

        def markup_whitespaces(match):
            blanks = match.group(0)
@ -515,6 +525,12 @@ class HeuristicProcessor(object):
            html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html)
        return html

+    def detect_scene_breaks(self, html):
+        scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+'<))(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
+        scene_breaks = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
+        html = scene_breaks.sub(self.scene_break_open+'\g<break>'+'</p>', html)
+        return html
+
    def markup_user_break(self, replacement_break):
        '''
        Takes string a user supplies and wraps it in markup that will be centered with
@ -781,25 +797,25 @@ class HeuristicProcessor(object):
        if getattr(self.extra_opts, 'format_scene_breaks', False):
            self.log.debug('Formatting scene breaks')
            html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html)
+            html = self.detect_scene_breaks(html)
            html = self.detect_whitespace(html)
            html = self.detect_soft_breaks(html)
            blanks_count = len(self.any_multi_blank.findall(html))
            if blanks_count >= 1:
                html = self.merge_blanks(html, blanks_count)
-            scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+'<))(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
-            scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
+            detected_scene_break = re.compile(r'<p class="scenebreak"[^>]*>.*?</p>')
+            scene_break_count = len(detected_scene_break.findall(html))
            # If the user has enabled scene break replacement, then either softbreaks
            # or 'hard' scene breaks are replaced, depending on which is in use
            # Otherwise separator lines are centered, use a bit larger margin in this case
            replacement_break = getattr(self.extra_opts, 'replace_scene_breaks', None)
            if replacement_break:
                replacement_break = self.markup_user_break(replacement_break)
-                if len(scene_break.findall(html)) >= 1:
-                    html = scene_break.sub(replacement_break, html)
+                if scene_break_count >= 1:
+                    html = detected_scene_break.sub(replacement_break, html)
+                    html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html)
                else:
                    html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html)
-            else:
-                html = scene_break.sub(self.scene_break_open+'\g<break>'+'</p>', html)

        if self.deleted_nbsps:
            # put back non-breaking spaces in empty paragraphs so they render correctly
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -18,6 +18,7 @@ from lxml import etree
 from calibre import prepare_string_for_xml
 from calibre.constants import __appname__, __version__
 from calibre.utils.magick import Image
+from calibre.utils.localization import lang_as_iso639_1

 class FB2MLizer(object):
    '''
@ -103,7 +104,10 @@ class FB2MLizer(object):
        metadata['version'] = __version__
        metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
        if self.oeb_book.metadata.language:
-            metadata['lang'] = self.oeb_book.metadata.language[0].value
+            lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value)
+            if not lc:
+                lc = self.oeb_book.metadata.language[0].value
+            metadata['lang'] = lc or 'en'
        else:
            metadata['lang'] = u'en'
        metadata['id'] = None
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@ -197,14 +197,18 @@ class OverDrive(Source):
        title_tokens = list(self.get_title_tokens(title,
                strip_joiners=False, strip_subtitle=True))

-        if len(title_tokens) >= len(author_tokens):
+        xref_q = ''
+        if len(author_tokens) <= 1:
            initial_q = ' '.join(title_tokens)
            xref_q = '+'.join(author_tokens)
        else:
            initial_q = ' '.join(author_tokens)
-            xref_q = '+'.join(title_tokens)
-        #log.error('Initial query is %s'%initial_q)
-        #log.error('Cross reference query is %s'%xref_q)
+            for token in title_tokens:
+                if len(xref_q) < len(token):
+                    xref_q = token
+
+        log.error('Initial query is %s'%initial_q)
+        log.error('Cross reference query is %s'%xref_q)

        q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
        query = '{"szKeyword":"'+initial_q+'"}'
@ -219,27 +223,30 @@ class OverDrive(Source):

        # get the search results object
        results = False
+        iterations = 0
        while results == False:
+            iterations += 1
            xreq = mechanize.Request(q_xref)
            xreq.add_header('X-Requested-With', 'XMLHttpRequest')
            xreq.add_header('Referer', q_init_search)
            xreq.add_header('Accept', 'application/json, text/javascript, */*')
            raw = br.open_novisit(xreq).read()
            for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
-                if int(m.group('displayrecords')) >= 1:
-                    results = True
-                elif int(m.group('totalrecords')) >= 1:
-                    if int(m.group('totalrecords')) >= 100:
-                        if xref_q.find('+') != -1:
-                            xref_tokens = xref_q.split('+')
-                            xref_q = xref_tokens[0]
-                            #log.error('xref_q is '+xref_q)
-                    else:
-                        xref_q = ''
-                    xref_q = ''
-                    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
-                elif int(m.group('totalrecords')) == 0:
+                if int(m.group('totalrecords')) == 0:
                    return ''
+                elif int(m.group('displayrecords')) >= 1:
+                    results = True
+                elif int(m.group('totalrecords')) >= 1 and iterations < 3:
+                    if xref_q.find('+') != -1:
+                        xref_tokens = xref_q.split('+')
+                        xref_q = xref_tokens[0]
+                        for token in xref_tokens:
+                            if len(xref_q) < len(token):
+                                xref_q = token
+                        #log.error('rewrote xref_q, new query is '+xref_q)
+                else:
+                        xref_q = ''
+                q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q

        return self.sort_ovrdrv_results(raw, log, title, title_tokens, author, author_tokens)

@ -263,6 +270,7 @@ class OverDrive(Source):
                else:
                    if creators:
                        creators = creators.split(', ')
+
                    # if an exact match in a preferred format occurs
                    if ((author and creators and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
                        return self.format_results(reserveid, od_title, subtitle, series, publisher,
@ -330,9 +338,9 @@ class OverDrive(Source):
    def find_ovrdrv_data(self, br, log, title, author, isbn, ovrdrv_id=None):
        q = base_url
        if ovrdrv_id is None:
-           return self.overdrive_search(br, log, q, title, author)
+            return self.overdrive_search(br, log, q, title, author)
        else:
-           return self.overdrive_get_record(br, log, q, ovrdrv_id)
+            return self.overdrive_get_record(br, log, q, ovrdrv_id)



@ -461,10 +469,10 @@ if __name__ == '__main__':
        [

            (
-                {'title':'Foundation and Earth',
-                    'authors':['Asimov']},
-                [title_test('Foundation and Earth', exact=True),
-                    authors_test(['Isaac Asimov'])]
+                {'title':'The Sea Kings Daughter',
+                    'authors':['Elizabeth Peters']},
+                [title_test('The Sea Kings Daughter', exact=False),
+                    authors_test(['Elizabeth Peters'])]
            ),

            (
--- a/src/calibre/ebooks/metadata/sources/worker.py
+++ b/src/calibre/ebooks/metadata/sources/worker.py
@ -48,7 +48,8 @@ def merge_result(oldmi, newmi, ensure_fields=None):

    return newmi

-def main(do_identify, covers, metadata, ensure_fields):
+def main(do_identify, covers, metadata, ensure_fields, tdir):
+    os.chdir(tdir)
    failed_ids = set()
    failed_covers = set()
    all_failed = True
@ -103,7 +104,8 @@ def single_identify(title, authors, identifiers):
    return [metadata_to_opf(r) for r in results], [r.has_cached_cover_url for
        r in results], dump_caches(), log.dump()

-def single_covers(title, authors, identifiers, caches):
+def single_covers(title, authors, identifiers, caches, tdir):
+    os.chdir(tdir)
    load_caches(caches)
    log = GUILog()
    results = Queue()
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@ -295,21 +295,21 @@ class MOBIHeader(object): # {{{
        self.datp_record_count, = struct.unpack(b'>I', self.raw[124:128])
        self.exth_flags, = struct.unpack(b'>I', self.raw[128:132])
        self.has_exth = bool(self.exth_flags & 0x40)
-        self.has_drm_data = self.length >= 174 and len(self.raw) >= 180
+        self.has_drm_data = self.length >= 174 and len(self.raw) >= 184
        if self.has_drm_data:
-            self.unknown3 = self.raw[132:164]
-            self.drm_offset, = struct.unpack(b'>I', self.raw[164:168])
-            self.drm_count, = struct.unpack(b'>I', self.raw[168:172])
-            self.drm_size, = struct.unpack(b'>I', self.raw[172:176])
-            self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
+            self.unknown3 = self.raw[132:168]
+            self.drm_offset, self.drm_count, self.drm_size, self.drm_flags = \
+                    struct.unpack(b'>4I', self.raw[168:184])
        self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
        self.has_fcis_flis = False
        self.has_multibytes = self.has_indexing_bytes = self.has_uncrossable_breaks = False
        self.extra_data_flags = 0
        if self.has_extra_data_flags:
-            self.unknown4 = self.raw[180:192]
-            self.fdst_idx, self.fdst_count = struct.unpack_from(b'>II',
+            self.unknown4 = self.raw[184:192]
+            self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
                    self.raw, 192)
+            if self.fdst_count <= 1:
+                self.fdst_idx = NULL_INDEX
            (self.fcis_number, self.fcis_count, self.flis_number,
                    self.flis_count) = struct.unpack(b'>IIII',
                            self.raw[200:216])
@ -327,7 +327,7 @@ class MOBIHeader(object): # {{{
            self.primary_index_record, = struct.unpack(b'>I',
                    self.raw[244:248])

-        if self.file_version >= 8:
+        if self.length >= 248:
            (self.sect_idx, self.skel_idx, self.datp_idx, self.oth_idx
                    ) = struct.unpack_from(b'>4L', self.raw, 248)
            self.unknown9 = self.raw[264:self.length]
@ -337,12 +337,13 @@ class MOBIHeader(object): # {{{

        # The following are all relative to the position of the header record
        # make them absolute for ease of debugging
-        for x in ('sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
+        self.relative_records = {'sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
                'meta_orth_indx', 'huffman_record_offset',
                'first_non_book_record', 'datp_record_offset', 'fcis_number',
                'flis_number', 'primary_index_record', 'fdst_idx',
-                'first_image_index'):
-            if hasattr(self, x):
+                'first_image_index'}
+        for x in self.relative_records:
+            if hasattr(self, x) and getattr(self, x) != NULL_INDEX:
                setattr(self, x, self.header_offset+getattr(self, x))

        if self.has_exth:
@ -355,70 +356,79 @@ class MOBIHeader(object): # {{{

    def __str__(self):
        ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
+
        a = ans.append
-        i = lambda d, x : a('%s (null value: %d): %d'%(d, NULL_INDEX, x))
-        ans.append('Compression: %s'%self.compression)
-        ans.append('Unused: %r'%self.unused)
-        ans.append('Number of text records: %d'%self.number_of_text_records)
-        ans.append('Text record size: %d'%self.text_record_size)
-        ans.append('Encryption: %s'%self.encryption_type)
-        ans.append('Unknown: %r'%self.unknown)
-        ans.append('Identifier: %r'%self.identifier)
-        ans.append('Header length: %d'% self.length)
-        ans.append('Type: %s'%self.type)
-        ans.append('Encoding: %s'%self.encoding)
-        ans.append('UID: %r'%self.uid)
-        ans.append('File version: %d'%self.file_version)
-        i('Meta Orth Index (Sections index in KF8)', self.meta_orth_indx)
-        i('Meta Infl Index', self.meta_infl_indx)
-        ans.append('Secondary index record: %d (null val: %d)'%(
-            self.secondary_index_record, NULL_INDEX))
-        ans.append('Reserved: %r'%self.reserved)
-        ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX,
-            self.first_non_book_record))
-        ans.append('Full name offset: %d'%self.fullname_offset)
-        ans.append('Full name length: %d bytes'%self.fullname_length)
-        ans.append('Langcode: %r'%self.locale_raw)
-        ans.append('Language: %s'%self.language)
-        ans.append('Sub language: %s'%self.sublanguage)
-        ans.append('Input language: %r'%self.input_language)
-        ans.append('Output language: %r'%self.output_langauage)
-        ans.append('Min version: %d'%self.min_version)
-        ans.append('First Image index: %d'%self.first_image_index)
-        ans.append('Huffman record offset: %d'%self.huffman_record_offset)
-        ans.append('Huffman record count: %d'%self.huffman_record_count)
-        ans.append('DATP record offset: %r'%self.datp_record_offset)
-        ans.append('DATP record count: %r'%self.datp_record_count)
-        ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
+
+        def i(d, x):
+            x = 'NULL' if x == NULL_INDEX else x
+            a('%s: %s'%(d, x))
+
+        def r(d, attr):
+            x = getattr(self, attr)
+            if attr in self.relative_records and x != NULL_INDEX:
+                a('%s: Absolute: %d Relative: %d'%(d, x, x-self.header_offset))
+            else:
+                i(d, x)
+
+        a('Compression: %s'%self.compression)
+        a('Unused: %r'%self.unused)
+        a('Number of text records: %d'%self.number_of_text_records)
+        a('Text record size: %d'%self.text_record_size)
+        a('Encryption: %s'%self.encryption_type)
+        a('Unknown: %r'%self.unknown)
+        a('Identifier: %r'%self.identifier)
+        a('Header length: %d'% self.length)
+        a('Type: %s'%self.type)
+        a('Encoding: %s'%self.encoding)
+        a('UID: %r'%self.uid)
+        a('File version: %d'%self.file_version)
+        r('Meta Orth Index', 'meta_orth_indx')
+        r('Meta Infl Index', 'meta_infl_indx')
+        r('Secondary index record', 'secondary_index_record')
+        a('Reserved: %r'%self.reserved)
+        r('First non-book record', 'first_non_book_record')
+        a('Full name offset: %d'%self.fullname_offset)
+        a('Full name length: %d bytes'%self.fullname_length)
+        a('Langcode: %r'%self.locale_raw)
+        a('Language: %s'%self.language)
+        a('Sub language: %s'%self.sublanguage)
+        a('Input language: %r'%self.input_language)
+        a('Output language: %r'%self.output_langauage)
+        a('Min version: %d'%self.min_version)
+        r('First Image index', 'first_image_index')
+        r('Huffman record offset', 'huffman_record_offset')
+        a('Huffman record count: %d'%self.huffman_record_count)
+        r('DATP record offset', 'datp_record_offset')
+        a('DATP record count: %r'%self.datp_record_count)
+        a('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
        if self.has_drm_data:
-            ans.append('Unknown3: %r'%self.unknown3)
-            ans.append('DRM Offset: %s'%self.drm_offset)
-            ans.append('DRM Count: %s'%self.drm_count)
-            ans.append('DRM Size: %s'%self.drm_size)
-            ans.append('DRM Flags: %r'%self.drm_flags)
+            a('Unknown3: %r'%self.unknown3)
+            r('DRM Offset', 'drm_offset')
+            a('DRM Count: %s'%self.drm_count)
+            a('DRM Size: %s'%self.drm_size)
+            a('DRM Flags: %r'%self.drm_flags)
        if self.has_extra_data_flags:
-            ans.append('Unknown4: %r'%self.unknown4)
-            ans.append('FDST Index: %d'% self.fdst_idx)
-            ans.append('FDST Count: %d'% self.fdst_count)
-            ans.append('FCIS number: %d'% self.fcis_number)
-            ans.append('FCIS count: %d'% self.fcis_count)
-            ans.append('FLIS number: %d'% self.flis_number)
-            ans.append('FLIS count: %d'% self.flis_count)
-            ans.append('Unknown6: %r'% self.unknown6)
-            ans.append('SRCS record index: %d'%self.srcs_record_index)
-            ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
-            ans.append('Unknown7: %r'%self.unknown7)
-            ans.append(('Extra data flags: %s (has multibyte: %s) '
+            a('Unknown4: %r'%self.unknown4)
+            r('FDST Index', 'fdst_idx')
+            a('FDST Count: %d'% self.fdst_count)
+            r('FCIS number', 'fcis_number')
+            a('FCIS count: %d'% self.fcis_count)
+            r('FLIS number', 'flis_number')
+            a('FLIS count: %d'% self.flis_count)
+            a('Unknown6: %r'% self.unknown6)
+            r('SRCS record index', 'srcs_record_index')
+            a('Number of SRCS records?: %d'%self.num_srcs_records)
+            a('Unknown7: %r'%self.unknown7)
+            a(('Extra data flags: %s (has multibyte: %s) '
                '(has indexing: %s) (has uncrossable breaks: %s)')%(
                    bin(self.extra_data_flags), self.has_multibytes,
                    self.has_indexing_bytes, self.has_uncrossable_breaks ))
-            ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
-                self.primary_index_record))
-        if self.file_version >= 8:
-            i('Sections Index', self.sect_idx)
-            i('SKEL Index', self.skel_idx)
-            i('DATP Index', self.datp_idx)
-            i('Other Index', self.oth_idx)
+            r('NCX index', 'primary_index_record')
+        if self.length >= 248:
+            r('Sections Index', 'sect_idx')
+            r('SKEL Index', 'skel_idx')
+            r('DATP Index', 'datp_idx')
+            r('Other Index', 'oth_idx')
            if self.unknown9:
                a('Unknown9: %r'%self.unknown9)

--- a/src/calibre/ebooks/mobi/debug/index.py
+++ b/src/calibre/ebooks/mobi/debug/index.py
@ -0,0 +1,185 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from collections import OrderedDict, namedtuple
+
+from calibre.ebooks.mobi.reader.headers import NULL_INDEX
+from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header,
+        parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS)
+from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
+
+File = namedtuple('File',
+    'file_number name divtbl_count start_position length')
+
+Elem = namedtuple('Chunk',
+    'insert_pos toc_text file_number sequence_number start_pos '
+    'length')
+
+GuideRef = namedtuple('GuideRef', 'type title pos_fid')
+
+def read_index(sections, idx, codec):
+    table, cncx = OrderedDict(), CNCX([], codec)
+
+    data = sections[idx].raw
+
+    indx_header = parse_indx_header(data)
+    indx_count = indx_header['count']
+
+    if indx_header['ncncx'] > 0:
+        off = idx + indx_count + 1
+        cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]]
+        cncx = CNCX(cncx_records, codec)
+
+    tag_section_start = indx_header['tagx']
+    control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
+
+    for i in xrange(idx + 1, idx + 1 + indx_count):
+        # Index record
+        data = sections[i].raw
+        parse_index_record(table, data, control_byte_count, tags, codec,
+                indx_header['ordt_map'], strict=True)
+    return table, cncx, indx_header
+
+class Index(object):
+
+    def __init__(self, idx, records, codec):
+        self.table = self.cncx = self.header = self.records = None
+        if idx != NULL_INDEX:
+            self.table, self.cncx, self.header = read_index(records, idx, codec)
+
+    def render(self):
+        ans = ['*'*10 + ' Index Header ' + '*'*10]
+        a = ans.append
+        if self.header is not None:
+            for field in INDEX_HEADER_FIELDS:
+                a('%-12s: %r'%(field, self.header[field]))
+        ans.extend(['', ''])
+
+        if self.cncx:
+            a('*'*10 + ' CNCX ' + '*'*10)
+            for offset, val in self.cncx.iteritems():
+                a('%10s: %s'%(offset, val))
+            ans.extend(['', ''])
+
+        if self.table is not None:
+            a('*'*10 + ' %d Index Entries '%len(self.table) + '*'*10)
+            for k, v in self.table.iteritems():
+                a('%s: %r'%(k, v))
+
+        if self.records:
+            ans.extend(['', '', '*'*10 + ' Parsed Entries ' + '*'*10])
+            for f in self.records:
+                a(repr(f))
+
+        return ans + ['']
+
+    def __str__(self):
+        return '\n'.join(self.render())
+
+    def __iter__(self):
+        return iter(self.records)
+
+class SKELIndex(Index):
+
+    def __init__(self, skelidx, records, codec):
+        super(SKELIndex, self).__init__(skelidx, records, codec)
+        self.records = []
+
+        if self.table is not None:
+            for i, text in enumerate(self.table.iterkeys()):
+                tag_map = self.table[text]
+                if set(tag_map.iterkeys()) != {1, 6}:
+                    raise ValueError('SKEL Index has unknown tags: %s'%
+                            (set(tag_map.iterkeys())-{1,6}))
+                self.records.append(File(
+                    i, # file_number
+                    text, # name
+                    tag_map[1][0], # divtbl_count
+                    tag_map[6][0], # start_pos
+                    tag_map[6][1]) # length
+                )
+
+class SECTIndex(Index):
+
+    def __init__(self, sectidx, records, codec):
+        super(SECTIndex, self).__init__(sectidx, records, codec)
+        self.records = []
+
+        if self.table is not None:
+             for i, text in enumerate(self.table.iterkeys()):
+                tag_map = self.table[text]
+                if set(tag_map.iterkeys()) != {2, 3, 4, 6}:
+                    raise ValueError('Chunk Index has unknown tags: %s'%
+                            (set(tag_map.iterkeys())-{2, 3, 4, 6}))
+
+                toc_text = self.cncx[tag_map[2][0]]
+                self.records.append(Elem(
+                    int(text), # insert_pos
+                    toc_text, # toc_text
+                    tag_map[3][0], # file_number
+                    tag_map[4][0], # sequence_number
+                    tag_map[6][0], # start_pos
+                    tag_map[6][1]  # length
+                    )
+                )
+
+class GuideIndex(Index):
+
+    def __init__(self, guideidx, records, codec):
+        super(GuideIndex, self).__init__(guideidx, records, codec)
+        self.records = []
+
+        if self.table is not None:
+             for i, text in enumerate(self.table.iterkeys()):
+                tag_map = self.table[text]
+                if set(tag_map.iterkeys()) not in ({1, 6}, {1, 2, 3}):
+                    raise ValueError('Guide Index has unknown tags: %s'%
+                            tag_map)
+
+                title = self.cncx[tag_map[1][0]]
+                self.records.append(GuideRef(
+                    text,
+                    title,
+                    tag_map[6] if 6 in tag_map else (tag_map[2], tag_map[3])
+                    )
+                )
+
+
+class NCXIndex(Index):
+
+    def __init__(self, ncxidx, records, codec):
+        super(NCXIndex, self).__init__(ncxidx, records, codec)
+        self.records = []
+
+        if self.table is not None:
+            for num, x in enumerate(self.table.iteritems()):
+                text, tag_map = x
+                entry = default_entry.copy()
+                entry['name'] = text
+                entry['num'] = num
+
+                for tag in tag_fieldname_map.iterkeys():
+                    fieldname, i = tag_fieldname_map[tag]
+                    if tag in tag_map:
+                        fieldvalue = tag_map[tag][i]
+                        if tag == 6:
+                            # Appears to be an idx into the KF8 elems table with an
+                            # offset
+                            fieldvalue = tuple(tag_map[tag])
+                        entry[fieldname] = fieldvalue
+                        for which, name in {3:'text', 5:'kind', 70:'description',
+                                71:'author', 72:'image_caption',
+                                73:'image_attribution'}.iteritems():
+                            if tag == which:
+                                entry[name] = self.cncx.get(fieldvalue,
+                                        default_entry[name])
+                self.records.append(entry)
+
+
+
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@ -10,8 +10,11 @@ __docformat__ = 'restructuredtext en'
 import sys, os, imghdr, struct
 from itertools import izip

+from calibre import CurrentDir
 from calibre.ebooks.mobi.debug.headers import TextRecord
-from calibre.ebooks.mobi.utils import read_font_record
+from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
+        GuideIndex)
+from calibre.ebooks.mobi.utils import read_font_record, decode_tbs
 from calibre.ebooks.mobi.debug import format_bytes
 from calibre.ebooks.mobi.reader.headers import NULL_INDEX

@ -42,6 +45,24 @@ class FDST(object):

        return '\n'.join(ans)

+class File(object):
+
+    def __init__(self, skel, skeleton, text, first_aid, sections):
+        self.name = 'part%04d'%skel.file_number
+        self.skeleton, self.text, self.first_aid = skeleton, text, first_aid
+        self.sections = sections
+
+    def dump(self, ddir):
+        with open(os.path.join(ddir, self.name + '.html'), 'wb') as f:
+            f.write(self.text)
+        base = os.path.join(ddir, self.name + '-parts')
+        os.mkdir(base)
+        with CurrentDir(base):
+            with open('skeleton.html', 'wb') as f:
+                f.write(self.skeleton)
+            for i, text in enumerate(self.sections):
+                with open('sect-%04d.html'%i, 'wb') as f:
+                    f.write(text)

 class MOBIFile(object):

@ -65,6 +86,9 @@ class MOBIFile(object):
        self.header = self.mf.mobi8_header
        self.extract_resources()
        self.read_fdst()
+        self.read_indices()
+        self.build_files()
+        self.read_tbs()

    def print_header(self, f=sys.stdout):
        print (str(self.mf.palmdb).encode('utf-8'), file=f)
@ -85,6 +109,45 @@ class MOBIFile(object):
            if self.fdst.num_sections != self.header.fdst_count:
                raise ValueError('KF8 Header contains invalid FDST count')

+    def read_indices(self):
+        self.skel_index = SKELIndex(self.header.skel_idx, self.mf.records,
+                self.header.encoding)
+        self.sect_index = SECTIndex(self.header.sect_idx, self.mf.records,
+                self.header.encoding)
+        self.ncx_index = NCXIndex(self.header.primary_index_record,
+                self.mf.records, self.header.encoding)
+        self.guide_index = GuideIndex(self.header.oth_idx, self.mf.records,
+                self.header.encoding)
+
+    def build_files(self):
+        text = self.raw_text
+        self.files = []
+        for skel in self.skel_index.records:
+            sects = [x for x in self.sect_index.records if x.file_number
+                    == skel.file_number]
+            skeleton = text[skel.start_position:skel.start_position+skel.length]
+            ftext = skeleton
+            first_aid = sects[0].toc_text
+            sections = []
+
+            for sect in sects:
+                start_pos = skel.start_position + skel.length + sect.start_pos
+                sect_text = text[start_pos:start_pos+sect.length]
+                insert_pos = sect.insert_pos - skel.start_position
+                ftext = ftext[:insert_pos] + sect_text + ftext[insert_pos:]
+                sections.append(sect_text)
+
+            self.files.append(File(skel, skeleton, ftext, first_aid, sections))
+
+    def dump_flows(self, ddir):
+        if self.fdst is None:
+            raise ValueError('This MOBI file has no FDST record')
+        for i, x in enumerate(self.fdst.sections):
+            start, end = x
+            raw = self.raw_text[start:end]
+            with open(os.path.join(ddir, 'flow%04d.txt'%i), 'wb') as f:
+                f.write(raw)
+
    def extract_resources(self):
        self.resource_map = []
        known_types = {b'FLIS', b'FCIS', b'SRCS',
@ -121,6 +184,54 @@ class MOBIFile(object):
            self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
                payload))

+    def read_tbs(self):
+        from calibre.ebooks.mobi.writer8.tbs import (Entry,
+                collect_indexing_data)
+        entry_map = []
+        for index in self.ncx_index:
+            enders = [e['pos'] for e in self.ncx_index if e['pos'] >
+                    index['pos'] and
+                    e['hlvl'] <= index['hlvl']]
+            end = min(enders+[len(self.raw_text)])
+
+            entry_map.append(Entry(index=index['num'], title=index['text'],
+                depth=index['hlvl'],
+                parent=index['parent'] if index['parent'] > -1 else None,
+                first_child=index['child1'] if index['child1'] > -1 else None,
+                last_child=index['childn'] if index['childn'] > -1 else None,
+                start=index['pos'], length=end-index['pos']))
+
+        indexing_data = collect_indexing_data(entry_map,
+                len(self.text_records))
+        self.indexing_data = []
+        for i, data in enumerate(indexing_data):
+            rec = self.text_records[i]
+            tbs_bytes = rec.trailing_data.get('indexing', b'')
+            desc = ['Record #%d'%i]
+            for x in ('starts', 'completes', 'ends', 'spans'):
+                points = ['\t%d at depth: %d'%(e.index, e.depth) for e in
+                    getattr(data, x)]
+                if points:
+                    desc.append(x+':')
+                    desc.extend(points)
+            desc.append('TBS Bytes: ' + format_bytes(tbs_bytes))
+            flag_sz = 3
+            sequences = []
+            while tbs_bytes:
+                try:
+                    val, extra, consumed = decode_tbs(tbs_bytes, flag_size=flag_sz)
+                except:
+                    break
+                flag_sz = 4
+                tbs_bytes = tbs_bytes[consumed:]
+                extra = {bin(k):v for k, v in extra.iteritems()}
+                sequences.append((val, extra))
+            for i, seq in enumerate(sequences):
+                desc.append('Sequence #%d: %r %r'%(i, seq[0], seq[1]))
+            if tbs_bytes:
+                desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes))
+            desc.append('')
+            self.indexing_data.append('\n'.join(desc))

 def inspect_mobi(mobi_file, ddir):
    f = MOBIFile(mobi_file)
@ -131,7 +242,8 @@ def inspect_mobi(mobi_file, ddir):
    with open(alltext, 'wb') as of:
        of.write(f.raw_text)

-    for x in ('text_records', 'images', 'fonts', 'binary'):
+    for x in ('text_records', 'images', 'fonts', 'binary', 'files', 'flows',
+            'tbs'):
        os.mkdir(os.path.join(ddir, x))

    for rec in f.text_records:
@ -145,3 +257,24 @@ def inspect_mobi(mobi_file, ddir):
        with open(os.path.join(ddir, 'fdst.record'), 'wb') as fo:
            fo.write(str(f.fdst).encode('utf-8'))

+    with open(os.path.join(ddir, 'skel.record'), 'wb') as fo:
+        fo.write(str(f.skel_index).encode('utf-8'))
+
+    with open(os.path.join(ddir, 'chunks.record'), 'wb') as fo:
+        fo.write(str(f.sect_index).encode('utf-8'))
+
+    with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
+        fo.write(str(f.ncx_index).encode('utf-8'))
+
+    with open(os.path.join(ddir, 'guide.record'), 'wb') as fo:
+        fo.write(str(f.guide_index).encode('utf-8'))
+
+    with open(os.path.join(ddir, 'tbs', 'all.txt'), 'wb') as fo:
+        fo.write(('\n'.join(f.indexing_data)).encode('utf-8'))
+
+    for part in f.files:
+        part.dump(os.path.join(ddir, 'files'))
+
+    f.dump_flows(os.path.join(ddir, 'flows'))
+
+
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -10,7 +10,7 @@ import copy
 import re
 from lxml import etree
 from calibre.ebooks.oeb.base import namespace, barename
-from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS, urlnormalize
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, urlnormalize
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
 from calibre.utils.magick.draw import identify_data
@ -109,26 +109,8 @@ class MobiMLizer(object):
        self.profile = profile = context.dest
        self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
        self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
-        self.remove_html_cover()
        self.mobimlize_spine()

-    def remove_html_cover(self):
-        oeb = self.oeb
-        if not oeb.metadata.cover \
-           or 'cover' not in oeb.guide:
-            return
-        href = oeb.guide['cover'].href
-        del oeb.guide['cover']
-        item = oeb.manifest.hrefs[href]
-        if item.spine_position is not None:
-            self.log.warn('Found an HTML cover,', item.href, 'removing it.',
-                    'If you find some content missing from the output MOBI, it '
-                    'is because you misidentified the HTML cover in the input '
-                    'document')
-            oeb.spine.remove(item)
-            if item.media_type in OEB_DOCS:
-                self.oeb.manifest.remove(item)
-
    def mobimlize_spine(self):
        'Iterate over the spine and convert it to MOBIML'
        for item in self.oeb.spine:
@ -473,7 +455,7 @@ class MobiMLizer(object):
        if tag in TABLE_TAGS and self.ignore_tables:
            tag = 'span' if tag == 'td' else 'div'

-        if tag == 'table':
+        if tag in ('table', 'td', 'tr'):
            col = style.backgroundColor
            if col:
                elem.set('bgcolor', col)
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@ -111,6 +111,13 @@ class CNCX(object): # {{{

    def get(self, offset, default=None):
        return self.records.get(offset, default)
+
+    def __bool__(self):
+        return bool(self.records)
+    __nonzero__ = __bool__
+
+    def iteritems(self):
+        return self.records.iteritems()
 # }}}

 def parse_tagx_section(data):
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@ -223,15 +223,15 @@ def insert_images_into_markup(parts, resource_map, log):
    # Handle any embedded raster images links in the xhtml text
    # kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
    img_pattern = re.compile(r'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
-    img_index_pattern = re.compile(r'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''')
+    img_index_pattern = re.compile(r'''[('"]kindle:embed:([0-9|A-V]+)[^')"]*[)'"]''')
+
+    style_pattern = re.compile(r'''(<[a-zA-Z0-9]+\s[^>]*style\s*=\s*[^>]*>)''',
+            re.IGNORECASE)
+
    for i in xrange(len(parts)):
        part = parts[i]
-        #[partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
-
-        # links to raster image files
-        # image_pattern
        srcpieces = img_pattern.split(part)
-        for j in range(1, len(srcpieces), 2):
+        for j in xrange(1, len(srcpieces), 2):
            tag = srcpieces[j]
            if tag.startswith('<im'):
                for m in img_index_pattern.finditer(tag):
@ -248,6 +248,30 @@ def insert_images_into_markup(parts, resource_map, log):
        # store away modified version
        parts[i] = part

+    # Replace urls used in style attributes
+    for i in xrange(len(parts)):
+        part = parts[i]
+        srcpieces = style_pattern.split(part)
+        for j in xrange(1, len(srcpieces), 2):
+            tag = srcpieces[j]
+            if 'kindle:embed' in tag:
+                for m in img_index_pattern.finditer(tag):
+                    num = int(m.group(1), 32)
+                    href = resource_map[num-1]
+                    osep = m.group()[0]
+                    csep = m.group()[-1]
+                    if href:
+                        replacement = '%s%s%s'%(osep, '../' + href, csep)
+                        tag = img_index_pattern.sub(replacement, tag, 1)
+                    else:
+                        log.warn('Referenced image %s was not recognized as '
+                                'a valid image in %s' % (num, tag))
+                srcpieces[j] = tag
+        part = "".join(srcpieces)
+        # store away modified version
+        parts[i] = part
+
+
 def upshift_markup(parts):
    tag_pattern = re.compile(r'''(<(?:svg)[^>]*>)''', re.IGNORECASE)

--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -109,7 +109,7 @@ class Mobi8Reader(object):
            table, cncx = read_index(self.kf8_sections, self.header.othidx,
                    self.header.codec)
            Item = namedtuple('Item',
-                'type title div_frag_num')
+                'type title pos_fid')

            for i, ref_type in enumerate(table.iterkeys()):
                tag_map = table[ref_type]
@ -119,7 +119,7 @@ class Mobi8Reader(object):
                if 3 in tag_map.keys():
                    fileno  = tag_map[3][0]
                if 6 in tag_map.keys():
-                    fileno = tag_map[6][0]
+                    fileno = tag_map[6]
                self.guide.append(Item(ref_type.decode(self.header.codec),
                    title, fileno))

@ -287,23 +287,24 @@ class Mobi8Reader(object):

    def create_guide(self):
        guide = Guide()
-        for ref_type, ref_title, fileno in self.guide:
+        has_start = False
+        for ref_type, ref_title, pos_fid in self.guide:
            try:
-                elem = self.elems[fileno]
-            except IndexError:
-                # Happens for thumbnailstandard in Amazon book samples
-                continue
-            fi = self.get_file_info(elem.insert_pos)
-            idtext = self.get_id_tag(elem.insert_pos).decode(self.header.codec)
-            linktgt = fi.filename
+                if len(pos_fid) != 2:
+                    continue
+            except TypeError:
+                continue # thumbnailstandard record, ignore it
+            linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid)
            if idtext:
                linktgt += b'#' + idtext
-            g = Guide.Reference('%s/%s'%(fi.type, linktgt), os.getcwdu())
+            g = Guide.Reference(linktgt, os.getcwdu())
            g.title, g.type = ref_title, ref_type
+            if g.title == 'start' or g.type == 'text':
+                has_start = True
            guide.append(g)

        so = self.header.exth.start_offset
-        if so not in {None, NULL_INDEX}:
+        if so not in {None, NULL_INDEX} and not has_start:
            fi = self.get_file_info(so)
            if fi.filename is not None:
                idtext = self.get_id_tag(so).decode(self.header.codec)
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@ -7,13 +7,15 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import struct, string, imghdr, zlib
+import struct, string, imghdr, zlib, os
 from collections import OrderedDict
+from io import BytesIO

 from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
 from calibre.ebooks import normalize

 IMAGE_MAX_SIZE = 10 * 1024 * 1024
+RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))

 def decode_string(raw, codec='utf-8', ordt_map=''):
    length, = struct.unpack(b'>B', raw[0])
@ -364,15 +366,17 @@ def count_set_bits(num):
        num >>= 1
    return ans

-def to_base(num, base=32):
+def to_base(num, base=32, min_num_digits=None):
    digits = string.digits + string.ascii_uppercase
    sign = 1 if num >= 0 else -1
-    if num == 0: return '0'
+    if num == 0: return ('0' if min_num_digits is None else '0'*min_num_digits)
    num *= sign
    ans = []
    while num:
        ans.append(digits[(num % base)])
        num //= base
+    if min_num_digits is not None and len(ans) < min_num_digits:
+        ans.extend('0'*(min_num_digits - len(ans)))
    if sign < 0:
        ans.append('-')
    ans.reverse()
@ -388,27 +392,8 @@ def mobify_image(data):
        data = im.export('gif')
    return data

-def read_zlib_header(header):
-    header = bytearray(header)
-    # See sec 2.2 of RFC 1950 for the zlib stream format
-    # http://www.ietf.org/rfc/rfc1950.txt
-    if (header[0]*256 + header[1])%31 != 0:
-        return None, 'Bad zlib header, FCHECK failed'
-
-    cmf = header[0] & 0b1111
-    cinfo = header[0] >> 4
-    if cmf != 8:
-        return None, 'Unknown zlib compression method: %d'%cmf
-    if cinfo > 7:
-        return None, 'Invalid CINFO field in zlib header: %d'%cinfo
-    fdict = (header[1]&0b10000)>>5
-    if fdict != 0:
-        return None, 'FDICT based zlib compression not supported'
-    wbits = cinfo + 8
-    return wbits, None
-
-
-def read_font_record(data, extent=1040): # {{{
+# Font records {{{
+def read_font_record(data, extent=1040):
    '''
    Return the font encoded in the MOBI FONT record represented by data.
    The return value in a dict with fields raw_data, font_data, err, ext,
@ -466,15 +451,8 @@ def read_font_record(data, extent=1040): # {{{

    if flags & 0b1:
        # ZLIB compressed data
-        wbits, err = read_zlib_header(font_data[:2])
-        if err is not None:
-            ans['err'] = err
-            return ans
-        adler32, = struct.unpack_from(b'>I', font_data, len(font_data) - 4)
        try:
-            # remove two bytes of zlib header and 4 bytes of trailing checksum
-            # negative wbits indicates no standard gzip header
-            font_data = zlib.decompress(font_data[2:-4], -wbits, usize)
+            font_data = zlib.decompress(font_data)
        except Exception as e:
            ans['err'] = 'Failed to zlib decompress font data (%s)'%e
            return ans
@ -483,23 +461,146 @@ def read_font_record(data, extent=1040): # {{{
            ans['err'] = 'Uncompressed font size mismatch'
            return ans

-        if False:
-            # For some reason these almost never match, probably Amazon has a
-            # buggy Adler32 implementation
-            sig = (zlib.adler32(font_data) & 0xffffffff)
-            if sig != adler32:
-                ans['err'] = ('Adler checksum did not match. Stored: %d '
-                        'Calculated: %d')%(adler32, sig)
-                return ans
-
    ans['font_data'] = font_data
    sig = font_data[:4]
    ans['ext'] = ('ttf' if sig in {b'\0\1\0\0', b'true', b'ttcf'}
                    else 'otf' if sig == b'OTTO' else 'dat')

    return ans
+
+def write_font_record(data, obfuscate=True, compress=True):
+    '''
+    Write the ttf/otf font represented by data into a font record. See
+    read_font_record() for details on the format of the record.
+    '''
+
+    flags = 0
+    key_len = 20
+    usize = len(data)
+    xor_key = b''
+    if compress:
+        flags |= 0b1
+        data = zlib.compress(data, 9)
+    if obfuscate:
+        flags |= 0b10
+        xor_key = os.urandom(key_len)
+        key = bytearray(xor_key)
+        data = bytearray(data)
+        for i in xrange(1040):
+            data[i] ^= key[i%key_len]
+        data = bytes(data)
+
+    key_start = struct.calcsize(b'>5L') + 4
+    data_start = key_start + len(xor_key)
+
+    header = b'FONT' + struct.pack(b'>5L', usize, flags, data_start,
+            len(xor_key), key_start)
+
+    return header + xor_key + data
+
 # }}}

+def create_text_record(text):
+    '''
+    Return a Palmdoc record of size RECORD_SIZE from the text file object.
+    In case the record ends in the middle of a multibyte character return
+    the overlap as well.

+    Returns data, overlap: where both are byte strings. overlap is the
+    extra bytes needed to complete the truncated multibyte character.
+    '''
+    opos = text.tell()
+    text.seek(0, 2)
+    # npos is the position of the next record
+    npos = min((opos + RECORD_SIZE, text.tell()))
+    # Number of bytes from the next record needed to complete the last
+    # character in this record
+    extra = 0

+    last = b''
+    while not last.decode('utf-8', 'ignore'):
+        # last contains no valid utf-8 characters
+        size = len(last) + 1
+        text.seek(npos - size)
+        last = text.read(size)
+
+    # last now has one valid utf-8 char and possibly some bytes that belong
+    # to a truncated char
+
+    try:
+        last.decode('utf-8', 'strict')
+    except UnicodeDecodeError:
+        # There are some truncated bytes in last
+        prev = len(last)
+        while True:
+            text.seek(npos - prev)
+            last = text.read(len(last) + 1)
+            try:
+                last.decode('utf-8')
+            except UnicodeDecodeError:
+                pass
+            else:
+                break
+        extra = len(last) - prev
+
+    text.seek(opos)
+    data = text.read(RECORD_SIZE)
+    overlap = text.read(extra)
+    text.seek(npos)
+
+    return data, overlap
+
+class CNCX(object): # {{{
+
+    '''
+    Create the CNCX records. These are records containing all the strings from
+    an index. Each record is of the form: <vwi string size><utf-8 encoded
+    string>
+    '''
+
+    MAX_STRING_LENGTH = 500
+
+    def __init__(self, strings=()):
+        self.strings = OrderedDict((s, 0) for s in strings)
+
+        self.records = []
+        offset = 0
+        buf = BytesIO()
+        for key in tuple(self.strings.iterkeys()):
+            utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
+            l = len(utf8)
+            sz_bytes = encint(l)
+            raw = sz_bytes + utf8
+            if 0xfbf8 - buf.tell() < 6 + len(raw):
+                # Records in PDB files cannot be larger than 0x10000, so we
+                # stop well before that.
+                pad = 0xfbf8 - buf.tell()
+                buf.write(b'\0' * pad)
+                self.records.append(buf.getvalue())
+                buf.seek(0), buf.truncate(0)
+                offset = len(self.records) * 0x10000
+            buf.write(raw)
+            self.strings[key] = offset
+            offset += len(raw)
+
+        val = buf.getvalue()
+        if val:
+            self.records.append(align_block(val))
+
+    def __getitem__(self, string):
+        return self.strings[string]
+
+    def __bool__(self):
+        return bool(self.records)
+    __nonzero__ = __bool__
+
+    def __len__(self):
+        return len(self.records)
+
+# }}}
+
+def is_guide_ref_start(ref):
+    return (ref.title.lower() == 'start' or
+            (ref.type and ref.type.lower() in {'start',
+                    'other.start', 'text'}))

--- a/src/calibre/ebooks/mobi/writer2/init.py
+++ b/src/calibre/ebooks/mobi/writer2/init.py
@ -12,5 +12,4 @@ UNCOMPRESSED = 1
 PALMDOC = 2
 HUFFDIC = 17480
 PALM_MAX_IMAGE_SIZE = 63 * 1024
-RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))

--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@ -12,56 +12,22 @@ from struct import pack
 from cStringIO import StringIO
 from collections import OrderedDict, defaultdict

-from calibre.ebooks.mobi.writer2 import RECORD_SIZE
 from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
-        encode_tbs, align_block, utf8_text)
+        encode_tbs, align_block, RECORD_SIZE, CNCX as CNCX_)

-class CNCX(object): # {{{
-
-    '''
-    Create the CNCX records. These are records containing all the strings from
-    the NCX. Each record is of the form: <vwi string size><utf-8 encoded
-    string>
-    '''
-
-    MAX_STRING_LENGTH = 500
+class CNCX(CNCX_): # {{{

    def __init__(self, toc, is_periodical):
-        self.strings = OrderedDict()
-
+        strings = []
        for item in toc.iterdescendants(breadth_first=True):
-            self.strings[item.title] = 0
+            strings.append(item.title)
            if is_periodical:
-                self.strings[item.klass] = 0
+                strings.append(item.klass)
                if item.author:
-                    self.strings[item.author] = 0
+                    strings.append(item.author)
                if item.description:
-                    self.strings[item.description] = 0
-
-        self.records = []
-        offset = 0
-        buf = StringIO()
-        for key in tuple(self.strings.iterkeys()):
-            utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
-            l = len(utf8)
-            sz_bytes = encint(l)
-            raw = sz_bytes + utf8
-            if 0xfbf8 - buf.tell() < 6 + len(raw):
-                # Records in PDB files cannot be larger than 0x10000, so we
-                # stop well before that.
-                pad = 0xfbf8 - buf.tell()
-                buf.write(b'\0' * pad)
-                self.records.append(buf.getvalue())
-                buf.truncate(0)
-                offset = len(self.records) * 0x10000
-            buf.write(raw)
-            self.strings[key] = offset
-            offset += len(raw)
-
-        self.records.append(align_block(buf.getvalue()))
-
-    def __getitem__(self, string):
-        return self.strings[string]
+                    strings.append(item.description)
+        CNCX_.__init__(self, strings)
 # }}}

 class TAGX(object): # {{{
@ -534,14 +500,14 @@ class Indexer(object): # {{{

        # Write offsets to index entries as an IDXT block
        idxt_block = b'IDXT'
-        buf.truncate(0)
+        buf.seek(0), buf.truncate(0)
        for offset in offsets:
            buf.write(pack(b'>H', header_length+offset))
        idxt_block = align_block(idxt_block + buf.getvalue())
        body = index_block + idxt_block

        header = b'INDX'
-        buf.truncate(0)
+        buf.seek(0), buf.truncate(0)
        buf.write(pack(b'>I', header_length))
        buf.write(b'\0'*4) # Unknown
        buf.write(pack(b'>I', 1)) # Header type? Or index record number?
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -7,51 +7,31 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re, random, time
+import random, time
 from cStringIO import StringIO
 from struct import pack

-from calibre.ebooks import normalize, generate_masthead
-from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
+from calibre.ebooks import normalize
 from calibre.ebooks.mobi.writer2.serializer import Serializer
 from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.utils.filenames import ascii_filename
-from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
-from calibre.ebooks.mobi.utils import (rescale_image, encint, mobify_image,
-        encode_trailing_data, align_block, detect_periodical)
+from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
+from calibre.ebooks.mobi.utils import (encint, encode_trailing_data,
+        align_block, detect_periodical, RECORD_SIZE, create_text_record)
 from calibre.ebooks.mobi.writer2.indexer import Indexer
-from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
-
-EXTH_CODES = {
-    'creator': 100,
-    'publisher': 101,
-    'description': 103,
-    'identifier': 104,
-    'subject': 105,
-    'pubdate': 106,
-    'review': 107,
-    'contributor': 108,
-    'rights': 109,
-    'type': 111,
-    'source': 112,
-    'versionnumber': 114,
-    'startreading': 116,
-    'coveroffset': 201,
-    'thumboffset': 202,
-    'hasfakecover': 203,
-    'lastupdatetime': 502,
-    'title': 503,
-    }

 # Disabled as I dont care about uncrossable breaks
 WRITE_UNCROSSABLE_BREAKS = False
+NULL_INDEX = 0xffffffff

 class MobiWriter(object):
-    COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')

-    def __init__(self, opts, write_page_breaks_after_item=True):
+    def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
        self.opts = opts
+        self.resources = resources
+        self.kf8 = kf8
+        self.for_joint = kf8 is not None
        self.write_page_breaks_after_item = write_page_breaks_after_item
        self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
        self.prefer_author_sort = opts.prefer_author_sort
@ -83,7 +63,7 @@ class MobiWriter(object):
        self.stream = stream
        self.records = [None]
        self.generate_content()
-        self.generate_record0()
+        self.generate_joint_record0() if self.for_joint else self.generate_record0()
        self.write_header()
        self.write_content()

@ -151,73 +131,19 @@ class MobiWriter(object):
    # Images {{{

    def generate_images(self):
-        oeb = self.oeb
-        oeb.logger.info('Serializing images...')
-        self.image_records = []
-        self.image_map = {}
-        self.masthead_offset = 0
-        index = 1
+        resources = self.resources
+        image_records = resources.records
+        self.image_map = resources.item_map
+        self.masthead_offset = resources.masthead_offset
+        self.cover_offset = resources.cover_offset
+        self.thumbnail_offset = resources.thumbnail_offset

-        mh_href = None
-        if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
-            mh_href = oeb.guide['masthead'].href
-            self.image_records.append(None)
-            index += 1
-        elif self.is_periodical:
-            # Generate a default masthead
-            data = generate_masthead(unicode(self.oeb.metadata['title'][0]))
-            self.image_records.append(data)
-            index += 1
-
-        cover_href = self.cover_offset = self.thumbnail_offset = None
-        if (oeb.metadata.cover and
-                unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
-            cover_id = unicode(oeb.metadata.cover[0])
-            item = oeb.manifest.ids[cover_id]
-            cover_href = item.href
-
-        for item in self.oeb.manifest.values():
-            if item.media_type not in OEB_RASTER_IMAGES: continue
-            try:
-                data = item.data
-                if self.opts.mobi_keep_original_images:
-                    data = mobify_image(data)
-                else:
-                    data = rescale_image(data)
-            except:
-                oeb.logger.warn('Bad image file %r' % item.href)
-                continue
-            else:
-                if mh_href and item.href == mh_href:
-                    self.image_records[0] = data
-                    continue
-
-                self.image_records.append(data)
-                self.image_map[item.href] = index
-                index += 1
-
-                if cover_href and item.href == cover_href:
-                    self.cover_offset = self.image_map[item.href] - 1
-                    try:
-                        data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
-                            maxsizeb=MAX_THUMB_SIZE)
-                    except:
-                        oeb.logger.warn('Failed to generate thumbnail')
-                    else:
-                        self.image_records.append(data)
-                        self.thumbnail_offset = index - 1
-                        index += 1
-            finally:
-                item.unload_data_from_memory()
-
-        if self.image_records and self.image_records[0] is None:
+        if image_records and image_records[0] is None:
            raise ValueError('Failed to find masthead image in manifest')

    # }}}

-    # Text {{{
-
-    def generate_text(self):
+    def generate_text(self): # {{{
        self.oeb.logger.info('Serializing markup content...')
        self.serializer = Serializer(self.oeb, self.image_map,
                self.is_periodical,
@ -232,7 +158,7 @@ class MobiWriter(object):
            self.oeb.logger.info('  Compressing markup content...')

        while text.tell() < self.text_length:
-            data, overlap = self.read_text_record(text)
+            data, overlap = create_text_record(text)
            if self.compression == PALMDOC:
                data = compress_doc(data)

@ -249,57 +175,6 @@ class MobiWriter(object):
        if records_size % 4 != 0:
            self.records.append(b'\x00'*(records_size % 4))
            self.first_non_text_record_idx += 1
-
-    def read_text_record(self, text):
-        '''
-        Return a Palmdoc record of size RECORD_SIZE from the text file object.
-        In case the record ends in the middle of a multibyte character return
-        the overlap as well.
-
-        Returns data, overlap: where both are byte strings. overlap is the
-        extra bytes needed to complete the truncated multibyte character.
-        '''
-        opos = text.tell()
-        text.seek(0, 2)
-        # npos is the position of the next record
-        npos = min((opos + RECORD_SIZE, text.tell()))
-        # Number of bytes from the next record needed to complete the last
-        # character in this record
-        extra = 0
-
-        last = b''
-        while not last.decode('utf-8', 'ignore'):
-            # last contains no valid utf-8 characters
-            size = len(last) + 1
-            text.seek(npos - size)
-            last = text.read(size)
-
-        # last now has one valid utf-8 char and possibly some bytes that belong
-        # to a truncated char
-
-        try:
-            last.decode('utf-8', 'strict')
-        except UnicodeDecodeError:
-            # There are some truncated bytes in last
-            prev = len(last)
-            while True:
-                text.seek(npos - prev)
-                last = text.read(len(last) + 1)
-                try:
-                    last.decode('utf-8')
-                except UnicodeDecodeError:
-                    pass
-                else:
-                    break
-            extra = len(last) - prev
-
-        text.seek(opos)
-        data = text.read(RECORD_SIZE)
-        overlap = text.read(extra)
-        text.seek(npos)
-
-        return data, overlap
-
    # }}}

    def generate_record0(self): #  MOBI header {{{
@ -315,11 +190,20 @@ class MobiWriter(object):
                # header as well
                bt = 0x103 if self.indexer.is_flat_periodical else 0x101

-        exth = self.build_exth(bt)
+        from calibre.ebooks.mobi.writer8.exth import build_exth
+        exth = build_exth(metadata,
+                prefer_author_sort=self.opts.prefer_author_sort,
+                is_periodical=self.is_periodical,
+                share_not_sync=self.opts.share_not_sync,
+                cover_offset=self.cover_offset,
+                thumbnail_offset=self.thumbnail_offset,
+                start_offset=self.serializer.start_offset, mobi_doctype=bt
+                )
        first_image_record = None
-        if self.image_records:
+        if self.resources:
+            used_images = self.serializer.used_images
            first_image_record  = len(self.records)
-            self.records.extend(self.image_records)
+            self.resources.serialize(self.records, used_images)
        last_content_record = len(self.records) - 1

        # FCIS/FLIS (Seems to serve no purpose)
@ -481,125 +365,72 @@ class MobiWriter(object):
        self.records[0] = align_block(record0)
    # }}}

-    def build_exth(self, mobi_doctype): # EXTH Header {{{
-        oeb = self.oeb
-        exth = StringIO()
-        nrecs = 0
-        for term in oeb.metadata:
-            if term not in EXTH_CODES: continue
-            code = EXTH_CODES[term]
-            items = oeb.metadata[term]
-            if term == 'creator':
-                if self.prefer_author_sort:
-                    creators = [normalize(unicode(c.file_as or c)) for c in
-                            items][:1]
-                else:
-                    creators = [normalize(unicode(c)) for c in items]
-                items = ['; '.join(creators)]
-            for item in items:
-                data = normalize(unicode(item))
-                if term != 'description':
-                    data = self.COLLAPSE_RE.sub(' ', data)
-                if term == 'identifier':
-                    if data.lower().startswith('urn:isbn:'):
-                        data = data[9:]
-                    elif item.scheme.lower() == 'isbn':
-                        pass
-                    else:
-                        continue
-                data = data.encode('utf-8')
-                exth.write(pack(b'>II', code, len(data) + 8))
-                exth.write(data)
-                nrecs += 1
-            if term == 'rights' :
-                try:
-                    rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
-                except:
-                    rights = b'Unknown'
-                exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
-                exth.write(rights)
-                nrecs += 1
+    def generate_joint_record0(self): # {{{
+        from calibre.ebooks.mobi.writer8.mobi import (MOBIHeader,
+                HEADER_FIELDS)
+        from calibre.ebooks.mobi.writer8.exth import build_exth

-        # Write UUID as ASIN
-        uuid = None
-        from calibre.ebooks.oeb.base import OPF
-        for x in oeb.metadata['identifier']:
-            if (x.get(OPF('scheme'), None).lower() == 'uuid' or
-                    unicode(x).startswith('urn:uuid:')):
-                uuid = unicode(x).split(':')[-1]
-                break
-        if uuid is None:
-            from uuid import uuid4
-            uuid = str(uuid4())
+        # Insert resource records
+        first_image_record = None
+        old = len(self.records)
+        if self.resources:
+            used_images = self.serializer.used_images | self.kf8.used_images
+            first_image_record  = len(self.records)
+            self.resources.serialize(self.records, used_images)
+        resource_record_count = len(self.records) - old

-        if isinstance(uuid, unicode):
-            uuid = uuid.encode('utf-8')
-        if not self.opts.share_not_sync:
-            exth.write(pack(b'>II', 113, len(uuid) + 8))
-            exth.write(uuid)
-            nrecs += 1
+        # Insert KF8 records
+        self.records.append(b'BOUNDARY')
+        kf8_header_index = len(self.records)
+        self.kf8.start_offset = (self.serializer.start_offset,
+                self.kf8.start_offset)
+        self.records.append(self.kf8.record0)
+        self.records.extend(self.kf8.records[1:])

-        # Write cdetype
-        if not self.is_periodical:
-            if not self.opts.share_not_sync:
-                exth.write(pack(b'>II', 501, 12))
-                exth.write(b'EBOK')
-                nrecs += 1
-        else:
-            ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
-            if ids:
-                exth.write(pack(b'>II', 501, 12))
-                exth.write(ids)
-                nrecs += 1
+        first_image_record = (first_image_record if first_image_record else
+                len(self.records))

-        # Add a publication date entry
-        if oeb.metadata['date']:
-            datestr = str(oeb.metadata['date'][0])
-        elif oeb.metadata['timestamp']:
-            datestr = str(oeb.metadata['timestamp'][0])
+        header_fields = {k:getattr(self.kf8, k) for k in HEADER_FIELDS}

-        if datestr is None:
-            raise ValueError("missing date or timestamp")
+        # Now change the header fields that need to be different in the MOBI 6
+        # header
+        header_fields['first_resource_record'] = first_image_record
+        header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
+        header_fields['fdst_record'] = NULL_INDEX
+        header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
+        extra_data_flags = 0b1 # Has multibyte overlap bytes
+        if self.primary_index_record_idx is not None:
+            extra_data_flags |= 0b10
+        header_fields['extra_data_flags'] = extra_data_flags

-        datestr = bytes(datestr)
-        exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
-        exth.write(datestr)
-        nrecs += 1
-        if self.is_periodical:
-            exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
-            exth.write(datestr)
-            nrecs += 1
+        for k, v in {'last_text_record':'last_text_record_idx',
+                'first_non_text_record':'first_non_text_record_idx',
+                'ncx_index':'primary_index_record_idx',
+                }.iteritems():
+            header_fields[k] = getattr(self, v)
+        if header_fields['ncx_index'] is None:
+            header_fields['ncx_index'] = NULL_INDEX

-        if self.is_periodical:
-            # Pretend to be amazon's super secret periodical generator
-            vals = {204:201, 205:2, 206:0, 207:101}
-        else:
-            # Pretend to be kindlegen 1.2
-            vals = {204:201, 205:1, 206:2, 207:33307}
-        for code, val in vals.iteritems():
-            exth.write(pack(b'>III', code, 12, val))
-            nrecs += 1
+        for x in ('skel', 'chunk', 'guide'):
+            header_fields[x+'_index'] = NULL_INDEX

-        if self.cover_offset is not None:
-            exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
-                self.cover_offset))
-            exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
-            nrecs += 2
-        if self.thumbnail_offset is not None:
-            exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
-                self.thumbnail_offset))
-            nrecs += 1
+        # Create the MOBI 6 EXTH
+        opts = self.opts
+        kuc = 0 if resource_record_count > 0 else None

-        if self.serializer.start_offset is not None:
-            exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
-                self.serializer.start_offset))
-            nrecs += 1
+        header_fields['exth'] = build_exth(self.oeb.metadata,
+                prefer_author_sort=opts.prefer_author_sort,
+                is_periodical=opts.mobi_periodical,
+                share_not_sync=opts.share_not_sync,
+                cover_offset=self.cover_offset,
+                thumbnail_offset=self.thumbnail_offset,
+                num_of_resources=resource_record_count,
+                kf8_unknown_count=kuc, be_kindlegen2=True,
+                kf8_header_index=kf8_header_index,
+                start_offset=self.serializer.start_offset,
+                mobi_doctype=2)
+        self.records[0] = MOBIHeader(file_version=6)(**header_fields)

-        exth = exth.getvalue()
-        trail = len(exth) % 4
-        pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
-        exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
-        return b''.join(exth)
    # }}}

    def write_header(self): # PalmDB header {{{
--- a/src/calibre/ebooks/mobi/writer2/resources.py
+++ b/src/calibre/ebooks/mobi/writer2/resources.py
@ -0,0 +1,136 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import imghdr
+
+from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
+from calibre.ebooks.mobi.utils import (rescale_image, mobify_image,
+        write_font_record)
+from calibre.ebooks import generate_masthead
+from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
+
+PLACEHOLDER_GIF = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00@\x02\x01D\x00;'
+
+class Resources(object):
+
+    def __init__(self, oeb, opts, is_periodical, add_fonts=False):
+        self.oeb, self.log, self.opts = oeb, oeb.log, opts
+        self.is_periodical = is_periodical
+
+        self.item_map = {}
+        self.records = []
+        self.mime_map = {}
+        self.masthead_offset = 0
+        self.used_image_indices = set()
+        self.image_indices = set()
+        self.cover_offset = self.thumbnail_offset = None
+
+        self.add_resources(add_fonts)
+
+    def process_image(self, data):
+        return (mobify_image(data) if self.opts.mobi_keep_original_images else
+                rescale_image(data))
+
+    def add_resources(self, add_fonts):
+        oeb = self.oeb
+        oeb.logger.info('Serializing resources...')
+        index = 1
+
+        mh_href = None
+        if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
+            mh_href = oeb.guide['masthead'].href
+            self.records.append(None)
+            index += 1
+            self.used_image_indices.add(0)
+            self.image_indices.add(0)
+        elif self.is_periodical:
+            # Generate a default masthead
+            data = generate_masthead(unicode(self.oeb.metadata['title'][0]))
+            self.records.append(data)
+            self.used_image_indices.add(0)
+            self.image_indices.add(0)
+            index += 1
+
+        cover_href = self.cover_offset = self.thumbnail_offset = None
+        if (oeb.metadata.cover and
+                unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
+            cover_id = unicode(oeb.metadata.cover[0])
+            item = oeb.manifest.ids[cover_id]
+            cover_href = item.href
+
+        for item in self.oeb.manifest.values():
+            if item.media_type not in OEB_RASTER_IMAGES: continue
+            try:
+                data = self.process_image(item.data)
+            except:
+                self.log.warn('Bad image file %r' % item.href)
+                continue
+            else:
+                if mh_href and item.href == mh_href:
+                    self.records[0] = data
+                    continue
+
+                self.image_indices.add(len(self.records))
+                self.records.append(data)
+                self.item_map[item.href] = index
+                self.mime_map[item.href] = 'image/%s'%imghdr.what(None, data)
+                index += 1
+
+                if cover_href and item.href == cover_href:
+                    self.cover_offset = self.item_map[item.href] - 1
+                    self.used_image_indices.add(self.cover_offset)
+                    try:
+                        data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
+                            maxsizeb=MAX_THUMB_SIZE)
+                    except:
+                        self.log.warn('Failed to generate thumbnail')
+                    else:
+                        self.image_indices.add(len(self.records))
+                        self.records.append(data)
+                        self.thumbnail_offset = index - 1
+                        self.used_image_indices.add(self.thumbnail_offset)
+                        index += 1
+            finally:
+                item.unload_data_from_memory()
+
+        if add_fonts:
+            for item in self.oeb.manifest.values():
+                if item.href and item.href.rpartition('.')[-1].lower() in {
+                        'ttf', 'otf'} and isinstance(item.data, bytes):
+                    self.records.append(write_font_record(item.data))
+                    self.item_map[item.href] = len(self.records)
+
+    def add_extra_images(self):
+        '''
+        Add any images that were created after the call to add_resources()
+        '''
+        for item in self.oeb.manifest.values():
+            if (item.media_type not in OEB_RASTER_IMAGES or item.href in
+                    self.item_map): continue
+            try:
+                data = self.process_image(item.data)
+            except:
+                self.log.warn('Bad image file %r' % item.href)
+            else:
+                self.records.append(data)
+                self.item_map[item.href] = len(self.records)
+            finally:
+                item.unload_data_from_memory()
+
+    def serialize(self, records, used_images):
+        used_image_indices = self.used_image_indices | {
+                v-1 for k, v in self.item_map.iteritems() if k in used_images}
+        for i in self.image_indices-used_image_indices:
+            self.records[i] = PLACEHOLDER_GIF
+        records.extend(self.records)
+
+    def __bool__(self):
+        return bool(self.records)
+    __nonzero__ = __bool__
+
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -12,6 +12,7 @@ import re
 from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
        namespace, prefixname, urlnormalize)
 from calibre.ebooks.mobi.mobiml import MBP_NS
+from calibre.ebooks.mobi.utils import is_guide_ref_start

 from collections import defaultdict
 from urlparse import urldefrag
@ -39,6 +40,7 @@ class Serializer(object):
        self.oeb = oeb
        # Map of image hrefs to image index in the MOBI file
        self.images = images
+        self.used_images = set()
        self.logger = oeb.logger
        self.is_periodical = is_periodical
        self.write_page_breaks_after_item = write_page_breaks_after_item
@ -160,9 +162,7 @@ class Serializer(object):
                buf.write(b'title="')
                self.serialize_text(ref.title, quot=True)
                buf.write(b'" ')
-                if (ref.title.lower() == 'start' or
-                    (ref.type and ref.type.lower() in {'start',
-                        'other.start', 'text'})):
+                if is_guide_ref_start(ref):
                    self._start_href = ref.href
            self.serialize_href(ref.href)
            # Space required or won't work, I kid you not
@ -329,6 +329,7 @@ class Serializer(object):
                    href = urlnormalize(item.abshref(val))
                    if href in self.images:
                        index = self.images[href]
+                        self.used_images.add(href)
                        buf.write(b'recindex="%05d"' % index)
                        continue
                buf.write(attr.encode('utf-8'))
--- a/src/calibre/ebooks/mobi/writer8/init.py
+++ b/src/calibre/ebooks/mobi/writer8/init.py
@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
--- a/src/calibre/ebooks/mobi/writer8/exth.py
+++ b/src/calibre/ebooks/mobi/writer8/exth.py
@ -0,0 +1,188 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+from struct import pack
+from io import BytesIO
+
+from calibre.ebooks.mobi.utils import utf8_text
+
+EXTH_CODES = {
+    'creator': 100,
+    'publisher': 101,
+    'description': 103,
+    'identifier': 104,
+    'subject': 105,
+    'pubdate': 106,
+    'review': 107,
+    'contributor': 108,
+    'rights': 109,
+    'type': 111,
+    'source': 112,
+    'versionnumber': 114,
+    'startreading': 116,
+    'kf8_header_index': 121,
+    'num_of_resources': 125,
+    'kf8_unknown_count': 131,
+    'coveroffset': 201,
+    'thumboffset': 202,
+    'hasfakecover': 203,
+    'lastupdatetime': 502,
+    'title': 503,
+}
+
+COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
+
+def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
+        share_not_sync=True, cover_offset=None, thumbnail_offset=None,
+        start_offset=None, mobi_doctype=2, num_of_resources=None,
+        kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None):
+    exth = BytesIO()
+    nrecs = 0
+
+    for term in metadata:
+        if term not in EXTH_CODES: continue
+        code = EXTH_CODES[term]
+        items = metadata[term]
+        if term == 'creator':
+            if prefer_author_sort:
+                creators = [unicode(c.file_as or c) for c in
+                        items][:1]
+            else:
+                creators = [unicode(c) for c in items]
+            items = ['; '.join(creators)]
+        for item in items:
+            data = unicode(item)
+            if term != 'description':
+                data = COLLAPSE_RE.sub(' ', data)
+            if term == 'identifier':
+                if data.lower().startswith('urn:isbn:'):
+                    data = data[9:]
+                elif item.scheme.lower() == 'isbn':
+                    pass
+                else:
+                    continue
+            data = utf8_text(data)
+            exth.write(pack(b'>II', code, len(data) + 8))
+            exth.write(data)
+            nrecs += 1
+        if term == 'rights' :
+            try:
+                rights = utf8_text(unicode(metadata.rights[0]))
+            except:
+                rights = b'Unknown'
+            exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
+            exth.write(rights)
+            nrecs += 1
+
+    # Write UUID as ASIN
+    uuid = None
+    from calibre.ebooks.oeb.base import OPF
+    for x in metadata['identifier']:
+        if (x.get(OPF('scheme'), None).lower() == 'uuid' or
+                unicode(x).startswith('urn:uuid:')):
+            uuid = unicode(x).split(':')[-1]
+            break
+    if uuid is None:
+        from uuid import uuid4
+        uuid = str(uuid4())
+
+    if isinstance(uuid, unicode):
+        uuid = uuid.encode('utf-8')
+    if not share_not_sync:
+        exth.write(pack(b'>II', 113, len(uuid) + 8))
+        exth.write(uuid)
+        nrecs += 1
+
+    # Write cdetype
+    if not is_periodical:
+        if not share_not_sync:
+            exth.write(pack(b'>II', 501, 12))
+            exth.write(b'EBOK')
+            nrecs += 1
+    else:
+        ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
+        if ids:
+            exth.write(pack(b'>II', 501, 12))
+            exth.write(ids)
+            nrecs += 1
+
+    # Add a publication date entry
+    if metadata['date']:
+        datestr = str(metadata['date'][0])
+    elif metadata['timestamp']:
+        datestr = str(metadata['timestamp'][0])
+
+    if datestr is None:
+        raise ValueError("missing date or timestamp")
+
+    datestr = bytes(datestr)
+    exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
+    exth.write(datestr)
+    nrecs += 1
+    if is_periodical:
+        exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
+        exth.write(datestr)
+        nrecs += 1
+
+    if be_kindlegen2:
+        vals = {204:201, 205:2, 206:2, 207:35621}
+    elif is_periodical:
+        # Pretend to be amazon's super secret periodical generator
+        vals = {204:201, 205:2, 206:0, 207:101}
+    else:
+        # Pretend to be kindlegen 1.2
+        vals = {204:201, 205:1, 206:2, 207:33307}
+    for code, val in vals.iteritems():
+        exth.write(pack(b'>III', code, 12, val))
+        nrecs += 1
+
+    if cover_offset is not None:
+        exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
+            cover_offset))
+        exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
+        nrecs += 2
+    if thumbnail_offset is not None:
+        exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
+            thumbnail_offset))
+        nrecs += 1
+
+    if start_offset is not None:
+        try:
+            len(start_offset)
+        except TypeError:
+            start_offset = [start_offset]
+        for so in start_offset:
+            if so is not None:
+                exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
+                    so))
+                nrecs += 1
+
+    if kf8_header_index is not None:
+        exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12,
+            kf8_header_index))
+        nrecs += 1
+
+    if num_of_resources is not None:
+        exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
+            num_of_resources))
+        nrecs += 1
+
+    if kf8_unknown_count is not None:
+        exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12,
+            kf8_unknown_count))
+        nrecs += 1
+
+    exth = exth.getvalue()
+    trail = len(exth) % 4
+    pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
+    exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
+    return b''.join(exth)
+
+
--- a/src/calibre/ebooks/mobi/writer8/header.py
+++ b/src/calibre/ebooks/mobi/writer8/header.py
@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import random
+from io import BytesIO
+from collections import OrderedDict
+from struct import pack
+
+from calibre.ebooks.mobi.utils import align_block
+
+NULL = 0xffffffff
+zeroes = lambda x: b'\0'*x
+nulls = lambda x: b'\xff'*x
+short = lambda x: pack(b'>H', x)
+
+class Header(OrderedDict):
+
+    HEADER_NAME = b''
+
+    DEFINITION = '''
+    '''
+
+    ALIGN_BLOCK = False
+    POSITIONS = {}  # Mapping of position field to field whose position should
+                    # be stored in the position field
+    SHORT_FIELDS = set()
+
+    def __init__(self):
+        OrderedDict.__init__(self)
+
+        for line in self.DEFINITION.splitlines():
+            line = line.strip()
+            if not line or line.startswith('#'): continue
+            name, val = [x.strip() for x in line.partition('=')[0::2]]
+            if val:
+                val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
+                    'nulls':nulls, 'short':short, 'random':random})
+            else:
+                val = 0
+            if name in self:
+                raise ValueError('Duplicate field in definition: %r'%name)
+            self[name] = val
+
+    @property
+    def dynamic_fields(self):
+        return tuple(k for k, v in self.iteritems() if v is None)
+
+    def __call__(self, **kwargs):
+        positions = {}
+        for name, val in kwargs.iteritems():
+            if name not in self:
+                raise KeyError('Not a valid header field: %r'%name)
+            self[name] = val
+
+        buf = BytesIO()
+        buf.write(bytes(self.HEADER_NAME))
+        for name, val in self.iteritems():
+            val = self.format_value(name, val)
+            positions[name] = buf.tell()
+            if val is None:
+                raise ValueError('Dynamic field %r not set'%name)
+            if isinstance(val, (int, long)):
+                fmt = 'H' if name in self.SHORT_FIELDS else 'I'
+                val = pack(b'>'+fmt, val)
+            buf.write(val)
+
+        for pos_field, field in self.POSITIONS.iteritems():
+            buf.seek(positions[pos_field])
+            buf.write(pack(b'>I', positions[field]))
+
+        ans = buf.getvalue()
+        if self.ALIGN_BLOCK:
+            ans = align_block(ans)
+        return ans
+
+
+    def format_value(self, name, val):
+        return val
+
+
--- a/src/calibre/ebooks/mobi/writer8/index.py
+++ b/src/calibre/ebooks/mobi/writer8/index.py
@ -0,0 +1,335 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+from future_builtins import map
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from collections import namedtuple
+from struct import pack
+from io import BytesIO
+
+from calibre.ebooks.mobi.utils import CNCX, encint, align_block
+from calibre.ebooks.mobi.writer8.header import Header
+
+TagMeta_ = namedtuple('TagMeta',
+        'name number values_per_entry bitmask end_flag')
+TagMeta = lambda x:TagMeta_(*x)
+EndTagTable = TagMeta(('eof', 0, 0, 0, 1))
+
+# map of mask to number of shifts needed, works with 1 bit and two-bit wide masks
+# could also be extended to 4 bit wide ones as well
+mask_to_bit_shifts = { 1:0, 2:1, 3:0, 4:2, 8:3, 12:2, 16:4, 32:5, 48:4, 64:6,
+        128:7, 192: 6 }
+
+class IndexHeader(Header): # {{{
+
+    HEADER_NAME = b'INDX'
+    ALIGN_BLOCK = True
+    HEADER_LENGTH = 192
+
+    DEFINITION = '''
+    # 4 - 8: Header Length
+    header_length = {header_length}
+
+    # 8 - 16: Unknown
+    unknown1 = zeroes(8)
+
+    # 16 - 20: Index type: 0 - normal 2 - inflection
+    type = 2
+
+    # 20 - 24: IDXT offset (filled in later)
+    idxt_offset
+
+    # 24 - 28: Number of index records
+    num_of_records = 1
+
+    # 28 - 32: Index encoding (65001 = utf-8)
+    encoding = 65001
+
+    # 32 - 36: Unknown
+    unknown2 = NULL
+
+    # 36 - 40: Number of Index entries
+    num_of_entries = DYN
+
+    # 40 - 44: ORDT offset
+    ordt_offset
+
+    # 44 - 48: LIGT offset
+    ligt_offset
+
+    # 48 - 52: Number of ORDT/LIGT? entries
+    num_of_ordt_entries
+
+    # 52 - 56: Number of CNCX records
+    num_of_cncx = DYN
+
+    # 56 - 180: Unknown
+    unknown3 = zeroes(124)
+
+    # 180 - 184: TAGX offset
+    tagx_offset = {header_length}
+
+    # 184 - 192: Unknown
+    unknown4 = zeroes(8)
+
+    # TAGX
+    tagx = DYN
+
+    # Last Index entry
+    last_index = DYN
+
+    # IDXT
+    idxt = DYN
+    '''.format(header_length=HEADER_LENGTH)
+
+    POSITIONS = {'idxt_offset':'idxt'}
+# }}}
+
+class Index(object): # {{{
+
+    control_byte_count = 1
+    cncx = CNCX()
+    tag_types = (EndTagTable,)
+
+    HEADER_LENGTH = IndexHeader.HEADER_LENGTH
+
+    @classmethod
+    def generate_tagx(cls):
+        header = b'TAGX'
+        byts = bytearray()
+        for tag_meta in cls.tag_types:
+            byts.extend(tag_meta[1:])
+        # table length, control byte count
+        header += pack(b'>II', 12+len(byts), cls.control_byte_count)
+        return header + bytes(byts)
+
+    @classmethod
+    def calculate_control_bytes_for_each_entry(cls, entries):
+        control_bytes = []
+        for lead_text, tags in entries:
+            cbs = []
+            ans = 0
+            for (name, number, vpe, mask, endi) in cls.tag_types:
+                if endi == 1:
+                    cbs.append(ans)
+                    ans = 0
+                    continue
+                try:
+                    nvals = len(tags.get(name, ()))
+                except TypeError:
+                    nvals = 1
+                nentries = nvals // vpe
+                shifts = mask_to_bit_shifts[mask]
+                ans |= mask & (nentries << shifts)
+            if len(cbs) != cls.control_byte_count:
+                raise ValueError('The entry %r is invalid'%[lead_text, tags])
+            control_bytes.append(cbs)
+        return control_bytes
+
+    def __call__(self):
+        self.control_bytes = self.calculate_control_bytes_for_each_entry(
+                self.entries)
+
+        rendered_entries = []
+        index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
+        IndexEntry = namedtuple('IndexEntry', 'offset length raw')
+        last_lead_text = b''
+        too_large = ValueError('Index has too many entries, calibre does not'
+                    ' support generating multiple index records at this'
+                    ' time.')
+
+        for i, x in enumerate(self.entries):
+            control_bytes = self.control_bytes[i]
+            leading_text, tags = x
+            buf.seek(0), buf.truncate(0)
+            leading_text = (leading_text.encode('utf-8') if
+                    isinstance(leading_text, unicode) else leading_text)
+            raw = bytearray(leading_text)
+            raw.insert(0, len(leading_text))
+            buf.write(bytes(raw))
+            buf.write(bytes(bytearray(control_bytes)))
+            for tag in self.tag_types:
+                values = tags.get(tag.name, None)
+                if values is None: continue
+                try:
+                    len(values)
+                except TypeError:
+                    values = [values]
+                if values:
+                    for val in values:
+                        try:
+                            buf.write(encint(val))
+                        except ValueError:
+                            raise ValueError('Invalid values for %r: %r'%(
+                                tag, values))
+            raw = buf.getvalue()
+            offset = index.tell()
+            if offset + self.HEADER_LENGTH >= 0x10000:
+                raise too_large
+            rendered_entries.append(IndexEntry(offset, len(raw), raw))
+            idxt.write(pack(b'>H', self.HEADER_LENGTH+offset))
+            index.write(raw)
+            last_lead_text = leading_text
+
+        index_block = align_block(index.getvalue())
+        idxt_block = align_block(b'IDXT' + idxt.getvalue())
+        body = index_block + idxt_block
+        if len(body) + self.HEADER_LENGTH >= 0x10000:
+            raise too_large
+        header = b'INDX'
+        buf.seek(0), buf.truncate(0)
+        buf.write(pack(b'>I', self.HEADER_LENGTH))
+        buf.write(b'\0'*4) # Unknown
+        buf.write(pack(b'>I', 1)) # Header type? Or index record number?
+        buf.write(b'\0'*4) # Unknown
+
+        # IDXT block offset
+        buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block)))
+
+        # Number of index entries
+        buf.write(pack(b'>I', len(rendered_entries)))
+
+        buf.write(b'\xff'*8) # Unknown
+
+        buf.write(b'\0'*156) # Unknown
+
+        header += buf.getvalue()
+        index_record = header + body
+
+        tagx = self.generate_tagx()
+        idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
+                b'\0')
+        # Last index
+        idx = bytes(bytearray([len(last_lead_text)])) + last_lead_text
+        idx += pack(b'>H', len(rendered_entries))
+
+        header = {
+                'num_of_entries': len(rendered_entries),
+                'num_of_cncx': len(self.cncx),
+                'tagx':tagx,
+                'last_index':align_block(idx),
+                'idxt':idxt
+        }
+        header = IndexHeader()(**header)
+        self.records = [header, index_record]
+        self.records.extend(self.cncx.records)
+        return self.records
+# }}}
+
+class SkelIndex(Index):
+
+    tag_types = tuple(map(TagMeta, (
+        ('chunk_count', 1, 1, 3, 0),
+        ('geometry',    6, 2, 12, 0),
+        EndTagTable
+    )))
+
+    def __init__(self, skel_table):
+        self.entries = [
+                (s.name, {
+                    # Dont ask me why these entries have to be repeated twice
+                    'chunk_count':(s.chunk_count, s.chunk_count),
+                    'geometry':(s.start_pos, s.length, s.start_pos, s.length),
+                    }) for s in skel_table
+        ]
+
+
+class ChunkIndex(Index):
+
+    tag_types = tuple(map(TagMeta, (
+        ('cncx_offset',     2, 1, 1, 0),
+        ('file_number',     3, 1, 2, 0),
+        ('sequence_number', 4, 1, 4, 0),
+        ('geometry',        6, 2, 8, 0),
+        EndTagTable
+    )))
+
+    def __init__(self, chunk_table):
+        self.cncx = CNCX(c.selector for c in chunk_table)
+
+        self.entries = [
+                ('%010d'%c.insert_pos, {
+
+                    'cncx_offset':self.cncx[c.selector],
+                    'file_number':c.file_number,
+                    'sequence_number':c.sequence_number,
+                    'geometry':(c.start_pos, c.length),
+                    }) for c in chunk_table
+        ]
+
+class GuideIndex(Index):
+
+    tag_types = tuple(map(TagMeta, (
+        ('title',           1, 1, 1, 0),
+        ('pos_fid',         6, 2, 2, 0),
+        EndTagTable
+    )))
+
+    def __init__(self, guide_table):
+        self.cncx = CNCX(c.title for c in guide_table)
+
+        self.entries = [
+                (r.type, {
+
+                    'title':self.cncx[r.title],
+                    'pos_fid':r.pos_fid,
+                    }) for r in guide_table
+        ]
+
+
+class NCXIndex(Index):
+
+    ''' The commented out parts have been seen in NCX indexes from MOBI 6
+    periodicals. Since we have no MOBI 8 periodicals to reverse engineer, leave
+    it for now. '''
+    # control_byte_count = 2
+    tag_types = tuple(map(TagMeta, (
+        ('offset',             1, 1, 1, 0),
+        ('length',             2, 1, 2, 0),
+        ('label',              3, 1, 4, 0),
+        ('depth',              4, 1, 8, 0),
+        ('parent',             21, 1, 16, 0),
+        ('first_child',        22, 1, 32, 0),
+        ('last_child',         23, 1, 64, 0),
+        ('pos_fid',            6, 2, 128, 0),
+        EndTagTable,
+        # ('image',              69, 1, 1, 0),
+        # ('description',        70, 1, 2, 0),
+        # ('author',             71, 1, 4, 0),
+        # ('caption',            72, 1, 8, 0),
+        # ('attribution',        73, 1, 16, 0),
+        # EndTagTable
+    )))
+
+    def __init__(self, toc_table):
+        strings = []
+        for entry in toc_table:
+            strings.append(entry['label'])
+            aut = entry.get('author', None)
+            if aut:
+                strings.append(aut)
+            desc = entry.get('description', None)
+            if desc:
+                strings.append(desc)
+        self.cncx = CNCX(strings)
+
+        def to_entry(x):
+            ans = {}
+            for f in ('offset', 'length', 'depth', 'pos_fid', 'parent',
+                    'first_child', 'last_child'):
+                if f in x:
+                    ans[f] = x[f]
+            for f in ('label', 'description', 'author'):
+                if f in x:
+                    ans[f] = self.cncx[x[f]]
+            return ('%02x'%x['index'], ans)
+
+        self.entries = list(map(to_entry, toc_table))
+
+
+
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@ -0,0 +1,406 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import copy, logging
+from functools import partial
+from collections import defaultdict, namedtuple
+from io import BytesIO
+from struct import pack
+
+import cssutils
+from lxml import etree
+
+from calibre import isbytestring, force_unicode
+from calibre.ebooks.mobi.utils import (create_text_record, to_base,
+        is_guide_ref_start)
+from calibre.ebooks.compression.palmdoc import compress_doc
+from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
+        extract, XHTML, urlnormalize)
+from calibre.ebooks.oeb.parse_utils import barename
+from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
+from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
+        ChunkIndex, GuideIndex)
+from calibre.ebooks.mobi.writer8.mobi import KF8Book
+from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences
+from calibre.ebooks.mobi.writer8.toc import TOCAdder
+
+XML_DOCS = OEB_DOCS | {SVG_MIME}
+
+# References to record numbers in KF8 are stored as base-32 encoded integers,
+# with 4 digits
+to_ref = partial(to_base, base=32, min_num_digits=4)
+
+class KF8Writer(object):
+
+    def __init__(self, oeb, opts, resources):
+        self.oeb, self.opts, self.log = oeb, opts, oeb.log
+        self.compress = not self.opts.dont_compress
+        self.has_tbs = False
+        self.log.info('Creating KF8 output')
+
+        # Create an inline ToC if one does not already exist
+        self.toc_adder = TOCAdder(oeb, opts)
+        self.used_images = set()
+        self.resources = resources
+        self.flows = [None] # First flow item is reserved for the text
+        self.records = [None] # Placeholder for zeroth record
+
+        self.log('\tGenerating KF8 markup...')
+        self.dup_data()
+        self.replace_resource_links()
+        self.extract_css_into_flows()
+        self.extract_svg_into_flows()
+        self.replace_internal_links_with_placeholders()
+        self.insert_aid_attributes()
+        self.chunk_it_up()
+        # Dump the cloned data as it is no longer needed
+        del self._data_cache
+        self.create_text_records()
+        self.log('\tCreating indices...')
+        self.create_fdst_records()
+        self.create_indices()
+        self.create_guide()
+        # We do not want to use this ToC for MOBI 6, so remove it
+        self.toc_adder.remove_generated_toc()
+
+    def dup_data(self):
+        ''' Duplicate data so that any changes we make to markup/CSS only
+        affect KF8 output and not MOBI 6 output '''
+        self._data_cache = {}
+        # Suppress cssutils logging output as it is duplicated anyway earlier
+        # in the pipeline
+        cssutils.log.setLevel(logging.CRITICAL)
+        for item in self.oeb.manifest:
+            if item.media_type in XML_DOCS:
+                self._data_cache[item.href] = copy.deepcopy(item.data)
+            elif item.media_type in OEB_STYLES:
+                # I can't figure out how to make an efficient copy of the
+                # in-memory CSSStylesheet, as deepcopy doesn't work (raises an
+                # exception)
+                self._data_cache[item.href] = cssutils.parseString(
+                        item.data.cssText, validate=False)
+
+    def data(self, item):
+        return self._data_cache.get(item.href, item.data)
+
+    def replace_resource_links(self):
+        ''' Replace links to resources (raster images/fonts) with pointers to
+        the MOBI record containing the resource. The pointers are of the form:
+        kindle:embed:XXXX?mime=image/* The ?mime= is apparently optional and
+        not used for fonts. '''
+
+        def pointer(item, oref):
+            ref = item.abshref(oref)
+            idx = self.resources.item_map.get(ref, None)
+            if idx is not None:
+                is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
+                idx = to_ref(idx)
+                if is_image:
+                    self.used_images.add(ref)
+                    return 'kindle:embed:%s?mime=%s'%(idx,
+                            self.resources.mime_map[ref])
+                else:
+                    return 'kindle:embed:%s'%idx
+            return oref
+
+        for item in self.oeb.manifest:
+
+            if item.media_type in XML_DOCS:
+                root = self.data(item)
+                for tag in XPath('//h:img|//svg:image')(root):
+                    for attr, ref in tag.attrib.iteritems():
+                        if attr.split('}')[-1].lower() in {'src', 'href'}:
+                            tag.attrib[attr] = pointer(item, ref)
+
+                for tag in XPath('//h:style')(root):
+                    if tag.text:
+                        sheet = cssutils.parseString(tag.text, validate=False)
+                        replacer = partial(pointer, item)
+                        cssutils.replaceUrls(sheet, replacer,
+                                ignoreImportRules=True)
+                        repl = sheet.cssText
+                        if isbytestring(repl):
+                            repl = repl.decode('utf-8')
+                        tag.text = '\n'+ repl + '\n'
+
+            elif item.media_type in OEB_STYLES:
+                sheet = self.data(item)
+                replacer = partial(pointer, item)
+                cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
+
+    def extract_css_into_flows(self):
+        inlines = defaultdict(list) # Ensure identical <style>s not repeated
+        sheets = {}
+
+        for item in self.oeb.manifest:
+            if item.media_type in OEB_STYLES:
+                data = self.data(item).cssText
+                sheets[item.href] = len(self.flows)
+                self.flows.append(force_unicode(data, 'utf-8'))
+
+        for item in self.oeb.spine:
+            root = self.data(item)
+
+            for link in XPath('//h:link[@href]')(root):
+                href = item.abshref(link.get('href'))
+                idx = sheets.get(href, None)
+                if idx is not None:
+                    idx = to_ref(idx)
+                    link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
+
+            for tag in XPath('//h:style')(root):
+                p = tag.getparent()
+                idx = p.index(tag)
+                raw = tag.text
+                if not raw or not raw.strip():
+                    extract(tag)
+                    continue
+                repl = etree.Element(XHTML('link'), type='text/css',
+                        rel='stylesheet')
+                repl.tail='\n'
+                p.insert(idx, repl)
+                extract(tag)
+                inlines[raw].append(repl)
+
+        for raw, elems in inlines.iteritems():
+            idx = to_ref(len(self.flows))
+            self.flows.append(raw)
+            for link in elems:
+                link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
+
+    def extract_svg_into_flows(self):
+        images = {}
+
+        for item in self.oeb.manifest:
+            if item.media_type == SVG_MIME:
+                data = self.data(item)
+                images[item.href] = len(self.flows)
+                self.flows.append(etree.tostring(data, encoding='UTF-8',
+                    with_tail=True, xml_declaration=True))
+
+        for item in self.oeb.spine:
+            root = self.data(item)
+
+            for svg in XPath('//svg:svg')(root):
+                raw = etree.tostring(svg, encoding=unicode, with_tail=False)
+                idx = len(self.flows)
+                self.flows.append(raw)
+                p = svg.getparent()
+                pos = p.index(svg)
+                img = etree.Element(XHTML('img'),
+                        src="kindle:flow:%s?mime=image/svg+xml"%to_ref(idx))
+                p.insert(pos, img)
+                extract(svg)
+
+            for img in XPath('//h:img[@src]')(root):
+                src = img.get('src')
+                abshref = item.abshref(src)
+                idx = images.get(abshref, None)
+                if idx is not None:
+                    img.set('src', 'kindle:flow:%s?mime=image/svg+xml'%
+                            to_ref(idx))
+
+    def replace_internal_links_with_placeholders(self):
+        self.link_map = {}
+        count = 0
+        hrefs = {item.href for item in self.oeb.spine}
+        for item in self.oeb.spine:
+            root = self.data(item)
+
+            for a in XPath('//h:a[@href]')(root):
+                count += 1
+                ref = item.abshref(a.get('href'))
+                href, _, frag = ref.partition('#')
+                href = urlnormalize(href)
+                if href in hrefs:
+                    placeholder = 'kindle:pos:fid:0000:off:%s'%to_href(count)
+                    self.link_map[placeholder] = (href, frag)
+                    a.set('href', placeholder)
+
+    def insert_aid_attributes(self):
+        self.id_map = {}
+        for i, item in enumerate(self.oeb.spine):
+            root = self.data(item)
+            aidbase = i * int(1e6)
+            j = 0
+            for tag in root.iterdescendants(etree.Element):
+                id_ = tag.attrib.get('id', None)
+                if id_ is not None or barename(tag.tag).lower() in aid_able_tags:
+                    aid = aidbase + j
+                    tag.attrib['aid'] = to_base(aid, base=32)
+                    if tag.tag == XHTML('body'):
+                        self.id_map[(item.href, '')] = tag.attrib['aid']
+                    if id_ is not None:
+                        self.id_map[(item.href, id_)] = tag.attrib['aid']
+
+                    j += 1
+
+    def chunk_it_up(self):
+        placeholder_map = {}
+        for placeholder, x in self.link_map.iteritems():
+            href, frag = x
+            aid = self.id_map.get(x, None)
+            if aid is None:
+                aid = self.id_map.get((href, ''))
+            placeholder_map[placeholder] = aid
+        chunker = Chunker(self.oeb, self.data, placeholder_map)
+
+        for x in ('skel_table', 'chunk_table', 'aid_offset_map'):
+            setattr(self, x, getattr(chunker, x))
+
+        self.flows[0] = chunker.text
+
+    def create_text_records(self):
+        self.flows = [x.encode('utf-8') if isinstance(x, unicode) else x for x
+                in self.flows]
+        text = b''.join(self.flows)
+        self.text_length = len(text)
+        text = BytesIO(text)
+        nrecords = 0
+        records_size = 0
+
+        if self.compress:
+            self.oeb.logger.info('\tCompressing markup...')
+
+        while text.tell() < self.text_length:
+            data, overlap = create_text_record(text)
+            if self.compress:
+                data = compress_doc(data)
+
+            data += overlap
+            data += pack(b'>B', len(overlap))
+
+            self.records.append(data)
+            records_size += len(data)
+            nrecords += 1
+
+        self.last_text_record_idx = nrecords
+        self.first_non_text_record_idx = nrecords + 1
+        # Pad so that the next records starts at a 4 byte boundary
+        if records_size % 4 != 0:
+            self.records.append(b'\x00'*(records_size % 4))
+            self.first_non_text_record_idx += 1
+
+    def create_fdst_records(self):
+        FDST = namedtuple('Flow', 'start end')
+        entries = []
+        self.fdst_table = []
+        for i, flow in enumerate(self.flows):
+            start = 0 if i == 0 else self.fdst_table[-1].end
+            self.fdst_table.append(FDST(start, start + len(flow)))
+            entries.extend(self.fdst_table[-1])
+        rec = (b'FDST' + pack(b'>LL', 12, len(self.fdst_table)) +
+                pack(b'>%dL'%len(entries), *entries))
+        self.fdst_records = [rec]
+        self.fdst_count = len(self.fdst_table)
+
+    def create_indices(self):
+        self.skel_records = SkelIndex(self.skel_table)()
+        self.chunk_records = ChunkIndex(self.chunk_table)()
+        self.ncx_records = []
+        toc = self.oeb.toc
+        entries = []
+        is_periodical = self.opts.mobi_periodical
+        if toc.count() < 2:
+            self.log.warn('Document has no ToC, MOBI will have no NCX index')
+            return
+
+        # Flatten the ToC into a depth first list
+        fl = toc.iter() if is_periodical else toc.iterdescendants()
+        for i, item in enumerate(fl):
+            entry = {'id': id(item), 'index': i, 'href':item.href,
+                    'label':(item.title or _('Unknown')),
+                    'children':[]}
+            entry['depth'] = getattr(item, 'ncx_hlvl', 0)
+            p = getattr(item, 'ncx_parent', None)
+            if p is not None:
+                entry['parent_id'] = p
+            for child in item:
+                child.ncx_parent = entry['id']
+                child.ncx_hlvl = entry['depth'] + 1
+                entry['children'].append(id(child))
+            if is_periodical:
+                if item.author:
+                    entry['author'] = item.author
+                if item.description:
+                    entry['description'] = item.description
+            entries.append(entry)
+
+        # The Kindle requires entries to be sorted by (depth, playorder)
+        entries.sort(key=lambda entry: (entry['depth'], entry['index']))
+        for i, entry in enumerate(entries):
+            entry['index'] = i
+        id_to_index = {entry['id']:entry['index'] for entry in entries}
+
+        # Write the hierarchical and start offset information
+        for entry in entries:
+            children = entry.pop('children')
+            if children:
+                entry['first_child'] = id_to_index[children[0]]
+                entry['last_child'] = id_to_index[children[-1]]
+            if 'parent_id' in entry:
+                entry['parent'] = id_to_index[entry.pop('parent_id')]
+            href = entry.pop('href')
+            href, frag = href.partition('#')[0::2]
+            aid = self.id_map.get((href, frag), None)
+            if aid is None:
+                aid = self.id_map.get((href, ''), None)
+            if aid is None:
+                pos, fid = 0, 0
+            else:
+                pos, fid = self.aid_offset_map[aid]
+            chunk = self.chunk_table[pos]
+            offset = chunk.insert_pos + fid
+            entry['pos_fid'] = (pos, fid)
+            entry['offset'] = offset
+
+        # Write the lengths
+        def get_next_start(entry):
+            enders = [e['offset'] for e in entries if e['depth'] <=
+                    entry['depth'] and e['offset'] > entry['offset']]
+            if enders:
+                return min(enders)
+            return len(self.flows[0])
+
+        for entry in entries:
+            entry['length'] = get_next_start(entry) - entry['offset']
+
+        self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
+                self.last_text_record_idx+1)
+        self.ncx_records = NCXIndex(entries)()
+
+    def create_guide(self):
+        self.start_offset = None
+        self.guide_table = []
+        self.guide_records = []
+        GuideRef = namedtuple('GuideRef', 'title type pos_fid')
+        for ref in self.oeb.guide.values():
+            href, frag = ref.href.partition('#')[0::2]
+            aid = self.id_map.get((href, frag), None)
+            if aid is None:
+                aid = self.id_map.get((href, ''))
+            if aid is None:
+                continue
+            pos, fid = self.aid_offset_map[aid]
+            if is_guide_ref_start(ref):
+                chunk = self.chunk_table[pos]
+                skel = [s for s in self.skel_table if s.file_number ==
+                        chunk.file_number][0]
+                self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
+            self.guide_table.append(GuideRef(ref.title or
+                _('Unknown'), ref.type, (pos, fid)))
+
+        if self.guide_table:
+            self.guide_table.sort(key=lambda x:x.type) # Needed by the Kindle
+            self.guide_records = GuideIndex(self.guide_table)()
+
+def create_kf8_book(oeb, opts, resources, for_joint=False):
+    writer = KF8Writer(oeb, opts, resources)
+    return KF8Book(writer, for_joint=for_joint)
+
--- a/src/calibre/ebooks/mobi/writer8/mobi.py
+++ b/src/calibre/ebooks/mobi/writer8/mobi.py
@ -0,0 +1,311 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import time, random
+from struct import pack
+
+from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text
+from calibre.ebooks.mobi.writer8.header import Header
+from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
+from calibre.ebooks.mobi.langcodes import iana2mobi
+from calibre.ebooks.mobi.writer8.exth import build_exth
+from calibre.utils.filenames import ascii_filename
+
+NULL_INDEX = 0xffffffff
+
+class MOBIHeader(Header): # {{{
+    '''
+    Represents the first record in a MOBI file, contains all the metadata about
+    the file.
+    '''
+
+    DEFINITION = '''
+    # 0: Compression
+    compression = DYN
+
+    # 2: Unused
+    unused1 = zeroes(2)
+
+    # 4: Text length
+    text_length = DYN
+
+    # 8: Last text record
+    last_text_record = DYN
+
+    # 10: Text record size
+    record_size = {record_size}
+
+    # 12: Encryption Type
+    encryption_type
+
+    # 14: Unused
+    unused2
+
+    # 16: Ident
+    ident = b'MOBI'
+
+    # 20: Header length
+    header_length = 248
+
+    # 24: Book Type (0x2 - Book, 0x101 - News hierarchical, 0x102 - News
+    # (flat), 0x103 - News magazine same as 0x101)
+    book_type = DYN
+
+    # 28: Text encoding (utf-8 = 65001)
+    encoding = 65001
+
+    # 32: UID
+    uid = DYN
+
+    # 36: File version
+    file_version = {file_version}
+
+    # 40: Meta orth record (used in dictionaries)
+    meta_orth_record = NULL
+
+    # 44: Meta infl index
+    meta_infl_index = NULL
+
+    # 48: Extra indices
+    extra_index0 = NULL
+    extra_index1 = NULL
+    extra_index2 = NULL
+    extra_index3 = NULL
+    extra_index4 = NULL
+    extra_index5 = NULL
+    extra_index6 = NULL
+    extra_index7 = NULL
+
+    # 80: First non text record
+    first_non_text_record = DYN
+
+    # 84: Title offset
+    title_offset
+
+    # 88: Title Length
+    title_length = DYN
+
+    # 92: Language code
+    language_code = DYN
+
+    # 96: Dictionary in and out languages
+    in_lang
+    out_lang
+
+    # 104: Min version
+    min_version = {file_version}
+
+    # 108: First resource record
+    first_resource_record = DYN
+
+    # 112: Huff/CDIC compression
+    huff_first_record
+    huff_count
+
+    # 120: Unknown (Maybe DATP related, maybe HUFF/CDIC related)
+    maybe_datp = zeroes(8)
+
+    # 128: EXTH flags
+    exth_flags = DYN
+
+    # 132: Unknown
+    unknown = zeroes(36)
+
+    # 168: DRM
+    drm_offset = NULL
+    drm_count
+    drm_size
+    drm_flags
+
+    # 184: Unknown
+    unknown2 = zeroes(8)
+
+    # 192: FDST
+    fdst_record = DYN
+    fdst_count = DYN
+
+    # 200: FCI
+    fcis_record = NULL
+    fcis_count
+
+    # 208: FLIS
+    flis_record = NULL
+    flis_count
+
+    # 216: Unknown
+    unknown3 = zeroes(8)
+
+    # 224: SRCS
+    srcs_record = NULL
+    srcs_count
+
+    # 232: Unknown
+    unknown4 = nulls(8)
+
+    # 240: Extra data flags
+    # 0b1 - extra multibyte bytes after text records
+    # 0b10 - TBS indexing data (only used in MOBI 6)
+    # 0b100 - uncrossable breaks only used in MOBI 6
+    extra_data_flags = DYN
+
+    # 244: KF8 Indices
+    ncx_index = DYN
+    chunk_index = DYN
+    skel_index = DYN
+    datp_index = NULL
+    guide_index = DYN
+
+    # 264: EXTH
+    exth = DYN
+
+    # Full title
+    full_title = DYN
+
+    # Padding to allow amazon's DTP service to add data
+    padding = zeroes(8192)
+    '''
+
+    SHORT_FIELDS = {'compression', 'last_text_record', 'record_size',
+            'encryption_type', 'unused2'}
+    ALIGN = True
+    POSITIONS = {'title_offset':'full_title'}
+
+    def __init__(self, file_version=8):
+        self.DEFINITION = self.DEFINITION.format(file_version=file_version,
+                record_size=RECORD_SIZE)
+        super(MOBIHeader, self).__init__()
+
+    def format_value(self, name, val):
+        if name == 'compression':
+            val = PALMDOC if val else UNCOMPRESSED
+        return super(MOBIHeader, self).format_value(name, val)
+
+# }}}
+
+HEADER_FIELDS = {'compression', 'text_length', 'last_text_record', 'book_type',
+                    'first_non_text_record', 'title_length', 'language_code',
+                    'first_resource_record', 'exth_flags', 'fdst_record',
+                    'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
+                    'guide_index', 'exth', 'full_title', 'extra_data_flags',
+                    'uid'}
+
+class KF8Book(object):
+
+    def __init__(self, writer, for_joint=False):
+        self.build_records(writer, for_joint)
+        self.used_images = writer.used_images
+
+    def build_records(self, writer, for_joint):
+        metadata = writer.oeb.metadata
+        # The text records
+        for x in ('last_text_record_idx', 'first_non_text_record_idx'):
+            setattr(self, x.rpartition('_')[0], getattr(writer, x))
+        self.records = writer.records
+        self.text_length = writer.text_length
+
+        # KF8 Indices
+        self.chunk_index = len(self.records)
+        self.records.extend(writer.chunk_records)
+        self.skel_index = len(self.records)
+        self.records.extend(writer.skel_records)
+        self.guide_index = NULL_INDEX
+        if writer.guide_records:
+            self.guide_index = len(self.records)
+            self.records.extend(writer.guide_records)
+        self.ncx_index = NULL_INDEX
+        if writer.ncx_records:
+            self.ncx_index = len(self.records)
+            self.records.extend(writer.ncx_records)
+
+        # Resources
+        resources = writer.resources
+        for x in ('cover_offset', 'thumbnail_offset', 'masthead_offset'):
+            setattr(self, x, getattr(resources, x))
+
+        self.first_resource_record = NULL_INDEX
+        before = len(self.records)
+        if resources.records:
+            self.first_resource_record = len(self.records)
+            if not for_joint:
+                resources.serialize(self.records, writer.used_images)
+        self.num_of_resources = len(self.records) - before
+
+        # FDST
+        self.fdst_count = writer.fdst_count
+        self.fdst_record = len(self.records)
+        self.records.extend(writer.fdst_records)
+
+        # EOF
+        self.records.append(b'\xe9\x8e\r\n') # EOF record
+
+        # Miscellaneous header fields
+        self.compression = writer.compress
+        self.book_type = 0x101 if writer.opts.mobi_periodical else 2
+        self.full_title = utf8_text(unicode(metadata.title[0]))
+        self.title_length = len(self.full_title)
+        self.extra_data_flags = 0b1
+        if writer.has_tbs:
+            self.extra_data_flags |= 0b10
+        self.uid = random.randint(0, 0xffffffff)
+
+        self.language_code = iana2mobi(str(metadata.language[0]))
+        self.exth_flags = 0b1010000
+        if writer.opts.mobi_periodical:
+            self.exth_flags |= 0b1000
+
+        self.opts = writer.opts
+        self.start_offset = writer.start_offset
+        self.metadata = metadata
+        self.kuc = 0 if len(resources.records) > 0 else None
+
+    @property
+    def record0(self):
+        ''' We generate the EXTH header and record0 dynamically, to allow other
+        code to customize various values after build_records() has been
+        called'''
+        opts = self.opts
+        self.exth = build_exth(self.metadata,
+                prefer_author_sort=opts.prefer_author_sort,
+                is_periodical=opts.mobi_periodical,
+                share_not_sync=opts.share_not_sync,
+                cover_offset=self.cover_offset,
+                thumbnail_offset=self.thumbnail_offset,
+                num_of_resources=self.num_of_resources,
+                kf8_unknown_count=self.kuc, be_kindlegen2=True,
+                start_offset=self.start_offset, mobi_doctype=self.book_type)
+
+        kwargs = {field:getattr(self, field) for field in HEADER_FIELDS}
+        return MOBIHeader()(**kwargs)
+
+    def write(self, outpath):
+        records = [self.record0] + self.records[1:]
+
+        with open(outpath, 'wb') as f:
+
+            # Write PalmDB Header
+
+            title = ascii_filename(self.full_title.decode('utf-8')).replace(
+                    ' ', '_')[:31]
+            title += (b'\0' * (32 - len(title)))
+            now = int(time.time())
+            nrecords = len(records)
+            f.write(title)
+            f.write(pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0))
+            f.write(b'BOOKMOBI')
+            f.write(pack(b'>IIH', (2*nrecords)-1, 0, nrecords))
+            offset = f.tell() + (8 * nrecords) + 2
+            for i, record in enumerate(records):
+                f.write(pack(b'>I', offset))
+                f.write(b'\0' + pack(b'>I', 2*i)[1:])
+                offset += len(record)
+            f.write(b'\0\0')
+
+            for rec in records:
+                f.write(rec)
+
--- a/src/calibre/ebooks/mobi/writer8/skeleton.py
+++ b/src/calibre/ebooks/mobi/writer8/skeleton.py
@ -0,0 +1,417 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+from collections import namedtuple
+from functools import partial
+
+from lxml import etree
+
+from calibre.ebooks.oeb.base import XHTML_NS
+from calibre.constants import ispy3
+from calibre.ebooks.mobi.utils import to_base
+
+CHUNK_SIZE = 8192
+
+# References in links are stored with 10 digits
+to_href = partial(to_base, base=32, min_num_digits=10)
+
+# Tags to which kindlegen adds the aid attribute
+aid_able_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b',
+'bdo', 'blockquote', 'body', 'button', 'cite', 'code', 'dd', 'del', 'details',
+'dfn', 'div', 'dl', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer',
+'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'ins', 'kbd',
+'label', 'legend', 'li', 'map', 'mark', 'meter', 'nav', 'ol', 'output', 'p',
+'pre', 'progress', 'q', 'rp', 'rt', 'samp', 'section', 'select', 'small',
+'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
+'video'}
+
+_self_closing_pat = re.compile(bytes(
+    r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(aid_able_tags))),
+    re.IGNORECASE)
+
+def close_self_closing_tags(raw):
+    return _self_closing_pat.sub(br'<\g<tag>\g<arg>></\g<tag>>', raw)
+
+def path_to_node(node):
+    ans = []
+    parent = node.getparent()
+    while parent is not None:
+        ans.append(parent.index(node))
+        node = parent
+        parent = parent.getparent()
+    return tuple(reversed(ans))
+
+def node_from_path(root, path):
+    parent = root
+    for idx in path:
+        parent = parent[idx]
+    return parent
+
+mychr = chr if ispy3 else unichr
+
+def tostring(raw, **kwargs):
+    ''' lxml *sometimes* represents non-ascii characters as hex entities in
+    attribute values. I can't figure out exactly what circumstances cause it.
+    It seems to happen when serializing a part of a larger tree. Since we need
+    serialization to be the same when serializing full and partial trees, we
+    manually replace all hex entities with their unicode codepoints. '''
+
+    xml_declaration = kwargs.pop('xml_declaration', False)
+    encoding = kwargs.pop('encoding', 'UTF-8')
+    kwargs['encoding'] = unicode
+    kwargs['xml_declaration'] = False
+    ans = etree.tostring(raw, **kwargs)
+    if xml_declaration:
+        ans = '<?xml version="1.0" encoding="%s"?>\n'%encoding + ans
+    return re.sub(r'&#x([0-9A-Fa-f]+);', lambda m:mychr(int(m.group(1), 16)),
+            ans).encode(encoding)
+
+class Chunk(object):
+
+    def __init__(self, raw, parent_tag):
+        self.raw = raw
+        self.starts_tags = []
+        self.ends_tags = []
+        self.insert_pos = None
+        self.parent_tag = parent_tag
+        self.parent_is_body = False
+        self.is_last_chunk = False
+        self.is_first_chunk = False
+
+    def __len__(self):
+        return len(self.raw)
+
+    def merge(self, chunk):
+        self.raw += chunk.raw
+        self.ends_tags = chunk.ends_tags
+
+    def __repr__(self):
+        return 'Chunk(len=%r insert_pos=%r starts_tags=%r ends_tags=%r)'%(
+                len(self.raw), self.insert_pos, self.starts_tags, self.ends_tags)
+
+    @property
+    def selector(self):
+        typ = 'S' if (self.is_last_chunk and not self.parent_is_body) else 'P'
+        return "%s-//*[@aid='%s']"%(typ, self.parent_tag)
+
+    __str__ = __repr__
+
+class Skeleton(object):
+
+    def __init__(self, file_number, item, root, chunks):
+        self.file_number, self.item = file_number, item
+        self.chunks = chunks
+
+        self.skeleton = self.render(root)
+        self.body_offset = self.skeleton.find('<body')
+        self.calculate_metrics(root)
+
+        self.calculate_insert_positions()
+
+    def render(self, root):
+        raw = tostring(root, xml_declaration=True)
+        raw = raw.replace(b'<html', bytes('<html xmlns="%s"'%XHTML_NS), 1)
+        return raw
+
+    def calculate_metrics(self, root):
+        Metric = namedtuple('Metric', 'start end')
+        self.metrics = {}
+        for tag in root.xpath('//*[@aid]'):
+            text = (tag.text or '').encode('utf-8')
+            raw = tostring(tag, with_tail=True)
+            start_length = len(raw.partition(b'>')[0]) + len(text) + 1
+            end_length = len(raw.rpartition(b'<')[-1]) + 1
+            self.metrics[tag.get('aid')] = Metric(start_length, end_length)
+
+    def calculate_insert_positions(self):
+        pos = self.body_offset
+        for chunk in self.chunks:
+            for tag in chunk.starts_tags:
+                pos += self.metrics[tag].start
+            chunk.insert_pos = pos
+            pos += len(chunk)
+            for tag in chunk.ends_tags:
+                pos += self.metrics[tag].end
+
+    def rebuild(self):
+        ans = self.skeleton
+        for chunk in self.chunks:
+            i = chunk.insert_pos
+            ans = ans[:i] + chunk.raw + ans[i:]
+        return ans
+
+    def __len__(self):
+        return len(self.skeleton) + sum([len(x.raw) for x in self.chunks])
+
+    @property
+    def raw_text(self):
+        return b''.join([self.skeleton] + [x.raw for x in self.chunks])
+
+class Chunker(object):
+
+    def __init__(self, oeb, data_func, placeholder_map):
+        self.oeb, self.log = oeb, oeb.log
+        self.data = data_func
+        self.placeholder_map = placeholder_map
+
+        self.skeletons = []
+
+        # Set this to a list to enable dumping of the original and rebuilt
+        # html files for debugging
+        orig_dumps = None
+
+        for i, item in enumerate(self.oeb.spine):
+            root = self.remove_namespaces(self.data(item))
+            body = root.xpath('//body')[0]
+            body.tail = '\n'
+
+            if orig_dumps is not None:
+                orig_dumps.append(tostring(root, xml_declaration=True,
+                    with_tail=True))
+                orig_dumps[-1] = close_self_closing_tags(
+                        orig_dumps[-1].replace(b'<html',
+                        bytes('<html xmlns="%s"'%XHTML_NS), 1))
+
+            # First pass: break up document into rendered strings of length no
+            # more than CHUNK_SIZE
+            chunks = []
+            self.step_into_tag(body, chunks)
+
+            # Second pass: Merge neighboring small chunks within the same
+            # skeleton tag so as to have chunks as close to the CHUNK_SIZE as
+            # possible.
+            chunks = self.merge_small_chunks(chunks)
+
+            # Third pass: Create the skeleton and calculate the insert position
+            # for all chunks
+            self.skeletons.append(Skeleton(i, item, root, chunks))
+
+        if orig_dumps:
+            self.dump(orig_dumps)
+
+        # Create the SKEL and Chunk tables
+        self.skel_table = []
+        self.chunk_table = []
+        self.create_tables()
+
+        # Set internal links
+        text = b''.join(x.raw_text for x in self.skeletons)
+        self.text = self.set_internal_links(text)
+
+    def remove_namespaces(self, root):
+        lang = None
+        for attr, val in root.attrib.iteritems():
+            if attr.rpartition('}')[-1] == 'lang':
+                lang = val
+
+        # Remove all namespace information from the tree. This means namespaced
+        # tags have their namespaces removed and all namespace declarations are
+        # removed. We have to do this manual cloning of the tree as there is no
+        # other way to remove namespace declarations in lxml. This is done so
+        # that serialization creates clean HTML 5 markup with no namespaces. We
+        # insert the XHTML namespace manually after serialization. The
+        # preceding layers should have removed svg and any other non html
+        # namespaced tags.
+        attrib = {'lang':lang} if lang else {}
+        nroot = etree.Element('html', attrib=attrib)
+        nroot.text = root.text
+        nroot.tail = '\n'
+
+        for tag in root.iterdescendants(etree.Element):
+            # We are ignoring all non tag entities in the tree
+            # like comments and processing instructions, as they make the
+            # chunking code even harder, for minimal gain.
+            elem = nroot.makeelement(tag.tag.rpartition('}')[-1],
+                    attrib={k.rpartition('}')[-1]:v for k, v in
+                        tag.attrib.iteritems()})
+            elem.text, elem.tail = tag.text, tag.tail
+            parent = node_from_path(nroot, path_to_node(tag.getparent()))
+            parent.append(elem)
+
+        return nroot
+
+    def step_into_tag(self, tag, chunks):
+        aid = tag.get('aid')
+        is_body = tag.tag == 'body'
+
+        first_chunk_idx = len(chunks)
+
+        # First handle any text
+        if tag.text and tag.text.strip(): # Leave pure whitespace in the skel
+            chunks.extend(self.chunk_up_text(tag.text, aid))
+            tag.text = None
+
+        # Now loop over children
+        for child in list(tag):
+            raw = tostring(child, with_tail=False)
+            raw = close_self_closing_tags(raw)
+            if len(raw) > CHUNK_SIZE and child.get('aid', None):
+                self.step_into_tag(child, chunks)
+                if child.tail and child.tail.strip(): # Leave pure whitespace
+                    chunks.extend(self.chunk_up_text(child.tail, aid))
+                    child.tail = None
+            else:
+                if len(raw) > CHUNK_SIZE:
+                    self.log.warn('Tag %s has no aid and a too large chunk'
+                            ' size. Adding anyway.'%child.tag)
+                chunks.append(Chunk(raw, aid))
+                if child.tail:
+                    chunks.extend(self.chunk_up_text(child.tail, aid))
+                tag.remove(child)
+
+        if len(chunks) <= first_chunk_idx and chunks:
+            raise ValueError('Stepped into a tag that generated no chunks.')
+
+        # Mark the first and last chunks of this tag
+        if chunks:
+            chunks[first_chunk_idx].starts_tags.append(aid)
+            chunks[-1].ends_tags.append(aid)
+            my_chunks = chunks[first_chunk_idx:]
+            if my_chunks:
+                my_chunks[0].is_first_chunk = True
+                my_chunks[-1].is_last_chunk = True
+                if is_body:
+                    for chunk in my_chunks:
+                        chunk.parent_is_body = True
+
+    def chunk_up_text(self, text, parent_tag):
+        text = text.encode('utf-8')
+        ans = []
+
+        def split_multibyte_text(raw):
+            if len(raw) <= CHUNK_SIZE:
+                return raw, b''
+            l = raw[:CHUNK_SIZE]
+            l = l.decode('utf-8', 'ignore').encode('utf-8')
+            return l, raw[len(l):]
+
+        start, rest = split_multibyte_text(text)
+        ans.append(start)
+        while rest:
+            start, rest = split_multibyte_text(rest)
+            ans.append(b'<span class="AmznBigTextBlock">' + start + '</span>')
+        return [Chunk(x, parent_tag) for x in ans]
+
+    def merge_small_chunks(self, chunks):
+        ans = chunks[:1]
+        for chunk in chunks[1:]:
+            prev = ans[-1]
+            if (
+                    chunk.starts_tags or # Starts a tag in the skel
+                    len(chunk) + len(prev) > CHUNK_SIZE or # Too large
+                    prev.ends_tags # Prev chunk ended a tag
+                    ):
+                ans.append(chunk)
+            else:
+                prev.merge(chunk)
+        return ans
+
+    def create_tables(self):
+        Skel = namedtuple('Skel',
+                'file_number name chunk_count start_pos length')
+        sp = 0
+        for s in self.skeletons:
+            s.start_pos = sp
+            sp += len(s)
+        self.skel_table = [Skel(s.file_number, 'SKEL%010d'%s.file_number,
+            len(s.chunks), s.start_pos, len(s.skeleton)) for s in self.skeletons]
+
+        Chunk = namedtuple('Chunk',
+            'insert_pos selector file_number sequence_number start_pos length')
+        num = 0
+        for skel in self.skeletons:
+            cp = 0
+            for chunk in skel.chunks:
+                self.chunk_table.append(
+                    Chunk(chunk.insert_pos + skel.start_pos, chunk.selector,
+                        skel.file_number, num, cp, len(chunk.raw)))
+                cp += len(chunk.raw)
+                num += 1
+
+    def set_internal_links(self, text):
+        ''' Update the internal link placeholders to point to the correct
+        location, based on the chunk table.'''
+        # A kindle:pos:fid link contains two base 32 numbers of the form
+        # XXXX:YYYYYYYYYY
+        # The first number is an index into the chunk table and the second is
+        # an offset from the start of the chunk to the start of the tag pointed
+        # to by the link.
+        aid_map = {} # Map of aid to (pos, fid)
+        for match in re.finditer(br'<[^>]+? aid=[\'"]([A-Z0-9]+)[\'"]', text):
+            offset = match.start()
+            pos_fid = None
+            for chunk in self.chunk_table:
+                if chunk.insert_pos <= offset < chunk.insert_pos + chunk.length:
+                    pos_fid = (chunk.sequence_number, offset-chunk.insert_pos)
+                    break
+                if chunk.insert_pos > offset:
+                    # This aid is in the skeleton, not in a chunk, so we use
+                    # the chunk immediately after
+                    pos_fid = (chunk.sequence_number, 0)
+                    break
+            if pos_fid is None:
+                raise ValueError('Could not find chunk for aid: %r'%
+                        match.group(1))
+            aid_map[match.group(1)] = pos_fid
+
+        self.aid_offset_map = aid_map
+
+        def to_placeholder(aid):
+            pos, fid = aid_map[aid]
+            pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
+            return bytes(':off:'.join((pos, fid)))
+
+        placeholder_map = {bytes(k):to_placeholder(v) for k, v in
+                self.placeholder_map.iteritems()}
+
+        # Now update the links
+        def sub(match):
+            raw = match.group()
+            pl = match.group(1)
+            try:
+                return raw[:-19] + placeholder_map[pl]
+            except KeyError:
+                pass
+            return raw
+
+        return re.sub(br'<[^>]+(kindle:pos:fid:0000:off:[0-9A-Za-z]{10})', sub,
+                text)
+
+    def dump(self, orig_dumps):
+        import tempfile, shutil, os
+        tdir = os.path.join(tempfile.gettempdir(), 'skeleton')
+        self.log('Skeletons dumped to:', tdir)
+        if os.path.exists(tdir):
+            shutil.rmtree(tdir)
+        orig = os.path.join(tdir, 'orig')
+        rebuilt = os.path.join(tdir, 'rebuilt')
+        chunks = os.path.join(tdir, 'chunks')
+        for x in (orig, rebuilt, chunks):
+            os.makedirs(x)
+        error = False
+        for i, skeleton in enumerate(self.skeletons):
+            for j, chunk in enumerate(skeleton.chunks):
+                with open(os.path.join(chunks, 'file-%d-chunk-%d.html'%(i, j)),
+                        'wb') as f:
+                    f.write(chunk.raw)
+            oraw, rraw = orig_dumps[i], skeleton.rebuild()
+            with open(os.path.join(orig, '%04d.html'%i),  'wb') as f:
+                f.write(oraw)
+            with open(os.path.join(rebuilt, '%04d.html'%i),  'wb') as f:
+                f.write(rraw)
+            if oraw != rraw:
+                error = True
+        if error:
+            raise ValueError('The before and after HTML differs. Run a diff '
+                    'tool on the orig and rebuilt directories')
+        else:
+            self.log('Skeleton HTML before and after is identical.')
+
+
--- a/src/calibre/ebooks/mobi/writer8/tbs.py
+++ b/src/calibre/ebooks/mobi/writer8/tbs.py
@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from collections import namedtuple
+from functools import partial
+
+from calibre.ebooks.mobi.utils import (RECORD_SIZE, encode_trailing_data,
+        encode_tbs)
+
+Entry = namedtuple('IndexEntry', 'index start length depth parent '
+        'first_child last_child title')
+Data = namedtuple('Data', 'starts ends completes spans')
+
+def collect_indexing_data(entries, number_of_text_records):
+    ''' For every text record calculate which index entries start, end, span or
+    are contained within that record.'''
+
+    data = []
+    for i in xrange(number_of_text_records):
+        record_start, next_record_start = i*RECORD_SIZE, (i+1)*RECORD_SIZE
+        datum = Data([], [], [], [])
+        data.append(datum)
+
+        for entry in entries:
+            end = entry.start + entry.length - 1
+            if (entry.start >= next_record_start or end < record_start):
+                # This entry does not have any overlap with this record
+                continue
+            if (entry.start < record_start and end >= next_record_start):
+                # This entry spans this record
+                datum.spans.append(entry)
+                continue
+            if (entry.start >= record_start and end < next_record_start):
+                # This entry is contained in this record
+                datum.completes.append(entry)
+            if (entry.start >= record_start and end >= next_record_start):
+                # This entry starts in this record
+                datum.starts.append(entry)
+                continue
+            if (entry.start < record_start and end < next_record_start):
+                # This entry ends in this record
+                datum.ends.append(entry)
+
+        for x in datum:
+            # Should be unnecessary as entries are already in this order, but
+            # best to be safe.
+            x.sort(key=lambda x:x.depth)
+
+    return data
+
+def generate_tbs_for_flat_index(indexing_data):
+    ans = []
+    record_type = 8 # 8 for KF8 0 for MOBI 6
+    enc = partial(encode_tbs, flag_size=3)
+    for datum in indexing_data:
+        tbs = b''
+        extra = {0b010 : record_type}
+        if not (datum.starts or datum.ends or datum.completes or datum.spans):
+            # No index entry touches this record
+            pass
+        elif datum.spans:
+            extra[0b001] = 0
+            tbs = enc(datum.spans[0].index, extra)
+        else:
+            starts, ends, completes = datum[:3]
+            if (not completes and len(starts) + len(ends) == 1):
+                # Either has the first or the last index, and no other indices.
+                node = (starts+ends)[0]
+                tbs = enc(node.index, extra)
+            else:
+                # This record contains the end of an index and
+                # some complete index entries. Or it contains some complete
+                # entries and a start. Or it contains an end, a start and
+                # optionally some completes. In every case, we encode the first
+                # entry to touch this record and the number of entries
+                # that touch this record.
+                nodes = starts + completes + ends
+                nodes.sort(key=lambda x:x.index)
+                extra[0b100] = len(nodes)
+                tbs = enc(nodes[0].index, extra)
+        ans.append(tbs)
+
+    return ans
+
+def apply_trailing_byte_sequences(index_table, records, number_of_text_records):
+    entries = tuple(Entry(r['index'], r['offset'], r['length'], r['depth'],
+        r.get('parent', None), r.get('first_child', None), r.get('last_child',
+            None), r['label']) for r in index_table)
+
+    indexing_data = collect_indexing_data(entries, number_of_text_records)
+    max_depth = max(e['depth'] for e in index_table)
+    if max_depth > 0:
+        # TODO: Implement for hierarchical ToCs
+        tbs = []
+    else:
+        tbs = generate_tbs_for_flat_index(indexing_data)
+    if not tbs:
+        return False
+    for i, tbs_bytes in enumerate(tbs):
+        records[i+1] += encode_trailing_data(tbs_bytes)
+    return True
+
+
--- a/Show More
+++ b/Show More