From 2b826c4974152c707f6b3a54aa9310710f6769a1 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Mon, 9 Apr 2012 01:02:09 +0200
Subject: [PATCH 01/36] Improved Read It Later recipe, uses API to get articles
 feed, and new "Article View" data to get enhanced article content (with
 images)

---
 recipes/readitlater.recipe | 171 +++++++++++++++++++++----------------
 1 file changed, 96 insertions(+), 75 deletions(-)
diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 38f7ec1a9a..08196d3a3d 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -1,36 +1,39 @@
-"""
+'''
 readitlaterlist.com
-"""
+'''
 __license__   = 'GPL v3'
 __copyright__ = '''
-2010, Darko Miletic <darko.miletic at gmail.com>
-2011, Przemyslaw Kryger <pkryger at gmail.com>
-2012, tBunnyMan <Wag That Tail At Me dot com>
+2011, Keith Callenberg <keithcallenberg@gmail.com>
+2012, Alayn Gortazar <zutoin at gmail dot com>
 '''
 
-from calibre import strftime
+from contextlib import closing
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+import json
+import urllib
+import urllib2
 
-
-class Readitlater(BasicNewsRecipe):
-    title                 = 'ReadItLater'
-    __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
-    description           = '''Personalized news feeds. Go to readitlaterlist.com to setup \
-                            up your news. This version displays pages of articles from \
-                            oldest to newest, with max & minimum counts, and marks articles \
-                            read after downloading.'''
+class Readitlaterv2(BasicNewsRecipe):
+    title                 = 'Read It Later v2'
+    __author__            = 'Keith Callenberg'
+    description           = '''Personalized news feeds. Go to readitlaterlist.com to
+                               setup up your news. Fill in your account
+                               username, and optionally you can add your password.'''
     publisher             = 'readitlaterlist.com'
     category              = 'news, custom'
     oldest_article        = 7
-    max_articles_per_feed = 50
-    minimum_articles      = 1
+    max_articles_per_feed = 100
     no_stylesheets        = True
     use_embedded_content  = False
     needs_subscription    = True
-    INDEX                 = u'http://readitlaterlist.com'
+    KEY                   = '8e0p5f19A74emL3a47goP87m69d4VF8b'
+    INDEX                 = 'https://readitlaterlist.com/'
     LOGIN                 = INDEX + u'/l'
-    readList              = []
 
+    articles           = []
+    
+    feeds = [(u'Unread articles' , INDEX)]
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser()
@@ -43,66 +46,84 @@ class Readitlater(BasicNewsRecipe):
             br.submit()
         return br
 
-    def get_feeds(self):
-        self.report_progress(0, ('Fetching list of pages...'))
-        lfeeds = []
-        i = 1
-        feedurl = self.INDEX + u'/unread/1'
-        while True:
-            title = u'Unread articles, page ' + str(i)
-            lfeeds.insert(0, (title, feedurl))
-            self.report_progress(0, ('Got ') + str(i) + (' pages'))
-            i += 1
-            soup = self.index_to_soup(feedurl)
-            ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
-            if ritem is None:
-                break
-            feedurl = self.INDEX + ritem['href']
-        return lfeeds
+
 
     def parse_index(self):
-        totalfeeds = []
-        articlesToGrab = self.max_articles_per_feed
-        lfeeds = self.get_feeds()
-        for feedobj in lfeeds:
-            if articlesToGrab < 1:
-                break
-            feedtitle, feedurl = feedobj
-            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
-            articles = []
-            soup = self.index_to_soup(feedurl)
-            ritem = soup.find('ul', attrs={'id':'list'})
-            for item in reversed(ritem.findAll('li')):
-                if articlesToGrab < 1:
-                    break
-                else:
-                    articlesToGrab -= 1
-                description = ''
-                atag = item.find('a', attrs={'class':'text'})
-                if atag and atag.has_key('href'):
-                    url         = self.INDEX + atag['href']
-                    title       = self.tag_to_string(item.div)
-                    date        = strftime(self.timefmt)
-                    articles.append({
-                                      'title'      :title
-                                     ,'date'       :date
-                                     ,'url'        :url
-                                     ,'description':description
-                                    })
-                    readLink = item.find('a', attrs={'class':'check'})['href']
-                    self.readList.append(readLink)
-            totalfeeds.append((feedtitle, articles))
-        if len(self.readList) < self.minimum_articles:
-            raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
-        return totalfeeds
+        index = self.INDEX + 'v2/get?'
+        index += 'apikey=' + self.KEY
+        index += '&username=' + self.username + '&password=' + self.password 
+        index += '&state=unread'
+        index += '&count=' + str(self.max_articles_per_feed) 
 
-    def mark_as_read(self, markList):
-        br = self.get_browser()
-        for link in markList:
-            url = self.INDEX + link
-            response = br.open(url)
-            response
+        open_func = getattr(self.browser, 'open_novisit', self.browser.open)
+        with closing(open_func(index)) as f:
+            results = f.read()
+        if not results:
+            raise RuntimeError('Could not fetch index!')
+
+        json_obj = json.loads(results)
+
+        if len(json_obj['list']) > 0:
+            for item in json_obj['list'].iteritems():
+                dataurl = "https://readitlaterlist.com/a/x/getArticle.php?itemId=" + item[1]['item_id']
+                self.articles.append({
+                                 'title':item[1]['title'],
+                                 'date':item[1]['time_added'],
+                                 'url':dataurl,
+                                 'description':item[1]['item_id'],
+                                 'real_url':item[1]['url']
+                            })
+        return [('Unread', self.articles)]
+
+    def preprocess_raw_html(self, raw_html, url):
+        # get article and image urls from json object
+        json_obj = json.loads(raw_html)
+        self.images = {}
+        for image in json_obj['article']['images']:
+            self.images[image] = json_obj['article']['images'][image]['src']
+        return json_obj['article']['article']
+
+    def preprocess_html(self, soup):
+        # Insert images on RIL_IMG_# divs
+        for key, url in self.images.iteritems():
+            tag = Tag(soup, 'img')
+            tag['src'] = url
+            div = soup.find('div', attrs={'id':'RIL_IMG_' + key})
+            div.insert(0, tag)
+        return soup
 
     def cleanup(self):
-        self.mark_as_read(self.readList)
+        # From a list of urls, create a human-readable JSON string
+        # suitable for passing to the ReadItLater SEND::READ method.
+    
+        self.markAsRead(self.createMarkList(self.articles))
+    
 
+    def createMarkList(self, articles):
+        urls = []
+        for article in self.articles:
+            urls.append(article['real_url'])
+        items = ['"%d": {"url": "%s"}' % (n,u) for n,u in enumerate(urls)]
+        s = '{\n %s\n}' % (',\n '.join(items),)
+        return s
+
+    def markAsRead(self, markList):
+        url = self.INDEX + 'v2/send'
+        values = {
+            'username' : self.username,
+            'password' : self.password,
+            'apikey' : self.KEY,
+            'read' : markList
+            }
+        data = urllib.urlencode(values)
+    
+        try:
+            print 'Calling ReadItLater API...'
+            request = urllib2.Request(url,data)
+            response = urllib2.urlopen(request)
+            the_page = response.read()
+            print 'response =', response.code
+        except urllib2.HTTPError as e:
+            print 'The server could not fulfill the request: ', e
+        except urllib2.URLError as e:
+            print 'The call to ReadItLater API failed:', e

From b81deec83a040ab2645cd14017e69f92edc60410 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Mon, 16 Apr 2012 23:05:06 +0200
Subject: [PATCH 02/36] Added title to each article and minimum_recipes support

---
 recipes/readitlater.recipe | 42 +++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 08196d3a3d..53061dd72a 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -3,7 +3,10 @@ readitlaterlist.com
 '''
 __license__   = 'GPL v3'
 __copyright__ = '''
+2010, Darko Miletic <darko.miletic at gmail.com>
+2011, Przemyslaw Kryger <pkryger at gmail.com>
 2011, Keith Callenberg <keithcallenberg@gmail.com>
+2012, tBunnyMan <Wag That Tail At Me dot com>
 2012, Alayn Gortazar <zutoin at gmail dot com>
 '''
 
@@ -14,16 +17,17 @@ import json
 import urllib
 import urllib2
 
-class Readitlaterv2(BasicNewsRecipe):
-    title                 = 'Read It Later v2'
-    __author__            = 'Keith Callenberg'
+class Readitlater(BasicNewsRecipe):
+    title                 = 'Read It Later'
+    __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan, Alayn Gortazar'
     description           = '''Personalized news feeds. Go to readitlaterlist.com to
                                setup up your news. Fill in your account
                                username, and optionally you can add your password.'''
     publisher             = 'readitlaterlist.com'
     category              = 'news, custom'
     oldest_article        = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed = 50
+    minimum_articles      = 1
     no_stylesheets        = True
     use_embedded_content  = False
     needs_subscription    = True
@@ -51,7 +55,10 @@ class Readitlaterv2(BasicNewsRecipe):
     def parse_index(self):
         index = self.INDEX + 'v2/get?'
         index += 'apikey=' + self.KEY
-        index += '&username=' + self.username + '&password=' + self.password 
+        if self.username is not None:
+            index += '&username=' + self.username 
+            if self.password is not None:
+                index += '&password=' + self.password 
         index += '&state=unread'
         index += '&count=' + str(self.max_articles_per_feed) 
 
@@ -62,10 +69,12 @@ class Readitlaterv2(BasicNewsRecipe):
             raise RuntimeError('Could not fetch index!')
 
         json_obj = json.loads(results)
-
-        if len(json_obj['list']) > 0:
+        
+        if len(json_obj['list']) >= self.minimum_articles:
             for item in json_obj['list'].iteritems():
-                dataurl = "https://readitlaterlist.com/a/x/getArticle.php?itemId=" + item[1]['item_id']
+                # TODO: This URL should be modified by it's corresponding API call in a future. 
+                #       Actually is not possible to get the Article View potential throught an API call (12/04/2012)
+                dataurl = self.INDEX + "a/x/getArticle.php?itemId=" + item[1]['item_id']
                 self.articles.append({
                                  'title':item[1]['title'],
                                  'date':item[1]['time_added'],
@@ -73,6 +82,9 @@ class Readitlaterv2(BasicNewsRecipe):
                                  'description':item[1]['item_id'],
                                  'real_url':item[1]['url']
                             })
+        else:
+            raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
+
         return [('Unread', self.articles)]
 
     def preprocess_raw_html(self, raw_html, url):
@@ -81,23 +93,25 @@ class Readitlaterv2(BasicNewsRecipe):
         self.images = {}
         for image in json_obj['article']['images']:
             self.images[image] = json_obj['article']['images'][image]['src']
-        return json_obj['article']['article']
+        title = '<h1>{title}</h1>'.format(title=json_obj['article']['title']) 
+        link = '<p>Original: <a href="{url}">{url}</a></p>'.format(url=json_obj['article']['resolvedUrl'])
+        return link + title + json_obj['article']['article']
 
     def preprocess_html(self, soup):
         # Insert images on RIL_IMG_# divs
         for key, url in self.images.iteritems():
-            tag = Tag(soup, 'img')
-            tag['src'] = url
+            imgtag = Tag(soup, 'img')
+            imgtag['src'] = url
             div = soup.find('div', attrs={'id':'RIL_IMG_' + key})
-            div.insert(0, tag)
+            div.insert(0, imgtag)
         return soup
 
     def cleanup(self):
         # From a list of urls, create a human-readable JSON string
         # suitable for passing to the ReadItLater SEND::READ method.
     
-        self.markAsRead(self.createMarkList(self.articles))
-    
+        #self.markAsRead(self.createMarkList(self.articles))
+        return 
 
     def createMarkList(self, articles):
         urls = []

From 56aec322cd7aca25ff550b532a1019d12d6cafeb Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Tue, 17 Apr 2012 22:38:46 +0200
Subject: [PATCH 03/36] Added horizontal line between articles

---
 recipes/readitlater.recipe | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 53061dd72a..5e425b8b5f 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -95,7 +95,7 @@ class Readitlater(BasicNewsRecipe):
             self.images[image] = json_obj['article']['images'][image]['src']
         title = '<h1>{title}</h1>'.format(title=json_obj['article']['title']) 
         link = '<p>Original: <a href="{url}">{url}</a></p>'.format(url=json_obj['article']['resolvedUrl'])
-        return link + title + json_obj['article']['article']
+        return link + title + json_obj['article']['article'] + '<hr />'
 
     def preprocess_html(self, soup):
         # Insert images on RIL_IMG_# divs

From 2a2ae6bb1403ba96999cb142e90f89c7f1606777 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Tue, 17 Apr 2012 22:40:30 +0200
Subject: [PATCH 04/36] Added recipe for Berria

---
 recipes/berria.recipe | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 recipes/berria.recipe

diff --git a/recipes/berria.recipe b/recipes/berria.recipe
new file mode 100644
index 0000000000..240682231e
--- /dev/null
+++ b/recipes/berria.recipe
@@ -0,0 +1,37 @@
+__license__   = 'GPL v3'
+__copyright__ = '2012, Alayn Gortazar <zutoin at gmail.com>'
+'''
+www.berria.info
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Berria(BasicNewsRecipe):
+    title                 = 'Berria'
+    __author__            = 'Alayn Gortazar'
+    description           = 'Euskal Herriko euskarazko egunkaria'
+    publisher             = 'Berria'
+    category              = 'news, politics, Basque Country'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    language              = 'eu'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png'
+
+    keep_only_tags = [
+                        dict(id='goiburua')
+                        ,dict(name='div', attrs={'class':'testua' })
+                     ]
+    remove_tags = [
+                        dict(name='a', attrs={'class':'iruzkinak'})
+                  ]
+     
+    feeds = [
+              (u'Edizioa jarraia'  , u'http://berria.info/rss/ediziojarraia.xml')
+#             ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml'       )
+#             ,(u'Iritzia'          , u'http://berria.info/rss/iritzia.xml'      )
+#             ,(u'Kirola'           , u'http://berria.info/rss/kirola.xml'       )
+#             ,(u'Plaza'            , u'http://berria.info/rss/plaza.xml'        )
+            ]

From adf67292fb4641ed4ad10d21348fe6dfb749ce0b Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Tue, 17 Apr 2012 22:42:11 +0200
Subject: [PATCH 05/36] Mark downloaded articles as read

---
 recipes/readitlater.recipe | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 5e425b8b5f..50c0cc27eb 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -110,8 +110,7 @@ class Readitlater(BasicNewsRecipe):
         # From a list of urls, create a human-readable JSON string
         # suitable for passing to the ReadItLater SEND::READ method.
     
-        #self.markAsRead(self.createMarkList(self.articles))
-        return 
+        self.markAsRead(self.createMarkList(self.articles))
 
     def createMarkList(self, articles):
         urls = []

From 7bcb500a4766119cae3bea01fb58a77b202c9fd2 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Wed, 18 Apr 2012 00:47:00 +0200
Subject: [PATCH 06/36] Improved Berria recipe visualization

---
 recipes/berria.recipe | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/recipes/berria.recipe b/recipes/berria.recipe
index 240682231e..9d5bfe1585 100644
--- a/recipes/berria.recipe
+++ b/recipes/berria.recipe
@@ -22,16 +22,20 @@ class Berria(BasicNewsRecipe):
 
     keep_only_tags = [
                         dict(id='goiburua')
+                        ,dict(name='div', attrs={'class':'burukoak'})
                         ,dict(name='div', attrs={'class':'testua' })
+                        ,dict(name='div', attrs={'class':'ber_ikus' })
                      ]
     remove_tags = [
                         dict(name='a', attrs={'class':'iruzkinak'})
                   ]
+
+    extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .titularra{font-size: x-large} .sarrera{font-weight: bold}'
      
     feeds = [
               (u'Edizioa jarraia'  , u'http://berria.info/rss/ediziojarraia.xml')
-#             ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml'       )
-#             ,(u'Iritzia'          , u'http://berria.info/rss/iritzia.xml'      )
-#             ,(u'Kirola'           , u'http://berria.info/rss/kirola.xml'       )
-#             ,(u'Plaza'            , u'http://berria.info/rss/plaza.xml'        )
+             ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml'       )
+             ,(u'Iritzia'          , u'http://berria.info/rss/iritzia.xml'      )
+             ,(u'Kirola'           , u'http://berria.info/rss/kirola.xml'       )
+             ,(u'Plaza'            , u'http://berria.info/rss/plaza.xml'        )
             ]

From 1111868a36c66e58ba7b02a06876fd0139dd0d8e Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Wed, 18 Apr 2012 13:20:27 +0200
Subject: [PATCH 07/36] Improved Berria recipe styles

---
 recipes/berria.recipe | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/recipes/berria.recipe b/recipes/berria.recipe
index 9d5bfe1585..d987e1224b 100644
--- a/recipes/berria.recipe
+++ b/recipes/berria.recipe
@@ -22,15 +22,15 @@ class Berria(BasicNewsRecipe):
 
     keep_only_tags = [
                         dict(id='goiburua')
-                        ,dict(name='div', attrs={'class':'burukoak'})
-                        ,dict(name='div', attrs={'class':'testua' })
-                        ,dict(name='div', attrs={'class':'ber_ikus' })
+                        ,dict(name='div', attrs={'class':['ber_ikus']})
+                        ,dict(name='section', attrs={'class':'ber_ikus' })
                      ]
     remove_tags = [
                         dict(name='a', attrs={'class':'iruzkinak'})
+                        ,dict(name='div', attrs={'class':'laguntzaileak'})
                   ]
 
-    extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .titularra{font-size: x-large} .sarrera{font-weight: bold}'
+    extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}'
      
     feeds = [
               (u'Edizioa jarraia'  , u'http://berria.info/rss/ediziojarraia.xml')

From dda955e67c15baec96482d1e17fe79057b6a27dd Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Wed, 18 Apr 2012 13:49:25 +0200
Subject: [PATCH 08/36] Added correct feed url's to Berria recipe

---
 recipes/berria.recipe | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/recipes/berria.recipe b/recipes/berria.recipe
index d987e1224b..6d2b5e05ec 100644
--- a/recipes/berria.recipe
+++ b/recipes/berria.recipe
@@ -34,8 +34,10 @@ class Berria(BasicNewsRecipe):
      
     feeds = [
               (u'Edizioa jarraia'  , u'http://berria.info/rss/ediziojarraia.xml')
-             ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml'       )
              ,(u'Iritzia'          , u'http://berria.info/rss/iritzia.xml'      )
+             ,(u'Euskal Herria'    , u'http://berria.info/rss/euskalherria.xml' )
+             ,(u'Ekonomia'          , u'http://berria.info/rss/ekonomia.xml'    )
+             ,(u'Mundua'          , u'http://berria.info/rss/mundua.xml'        )
              ,(u'Kirola'           , u'http://berria.info/rss/kirola.xml'       )
              ,(u'Plaza'            , u'http://berria.info/rss/plaza.xml'        )
             ]

From f9817538923c9d929d3da6193187b46f470d6f85 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Wed, 18 Apr 2012 23:06:32 +0200
Subject: [PATCH 09/36] Migrating to getpocket.com

---
 recipes/readitlater.recipe | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 50c0cc27eb..ec0b9c83b7 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -32,7 +32,7 @@ class Readitlater(BasicNewsRecipe):
     use_embedded_content  = False
     needs_subscription    = True
     KEY                   = '8e0p5f19A74emL3a47goP87m69d4VF8b'
-    INDEX                 = 'https://readitlaterlist.com/'
+    INDEX                 = 'https://getpocket.com/'
     LOGIN                 = INDEX + u'/l'
 
     articles           = []
@@ -109,7 +109,6 @@ class Readitlater(BasicNewsRecipe):
     def cleanup(self):
         # From a list of urls, create a human-readable JSON string
         # suitable for passing to the ReadItLater SEND::READ method.
-    
         self.markAsRead(self.createMarkList(self.articles))
 
     def createMarkList(self, articles):

From de81f45215f18feb3c98338e8abd8a1f90535379 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Thu, 19 Apr 2012 01:25:40 +0200
Subject: [PATCH 10/36] Added "Enhanced version" option to read it later recipe

---
 recipes/readitlater.recipe | 72 +++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index ec0b9c83b7..c9d39e9082 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -32,33 +32,38 @@ class Readitlater(BasicNewsRecipe):
     use_embedded_content  = False
     needs_subscription    = True
     KEY                   = '8e0p5f19A74emL3a47goP87m69d4VF8b'
+    API_TEXT_INDEX             = 'https://text.readitlaterlist.com/'
+    API_INDEX             = 'https://readitlaterlist.com/'
     INDEX                 = 'https://getpocket.com/'
     LOGIN                 = INDEX + u'/l'
+    enhanced_version      = True
 
-    articles           = []
+    articles              = []
     
     feeds = [(u'Unread articles' , INDEX)]
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser()
-        if self.username is not None:
-            br.open(self.LOGIN)
-            br.select_form(nr=0)
-            br['feed_id'] = self.username
-            if self.password is not None:
-                br['password'] = self.password
-            br.submit()
+        if self.enhanced_version:
+            if self.username is not None:
+                br.open(self.LOGIN)
+                br.select_form(nr=0)
+                br['feed_id'] = self.username
+                if self.password is not None:
+                    br['password'] = self.password
+                br.submit()
         return br
 
-
+    def get_auth_params(self):
+        auth_params = 'apikey=' + self.KEY
+        if self.username is not None:
+            auth_params += '&username=' + self.username 
+            if self.password is not None:
+                auth_params += '&password=' + self.password 
+        return auth_params
 
     def parse_index(self):
-        index = self.INDEX + 'v2/get?'
-        index += 'apikey=' + self.KEY
-        if self.username is not None:
-            index += '&username=' + self.username 
-            if self.password is not None:
-                index += '&password=' + self.password 
+        index = self.API_INDEX + 'v2/get?' + self.get_auth_params()
         index += '&state=unread'
         index += '&count=' + str(self.max_articles_per_feed) 
 
@@ -74,7 +79,11 @@ class Readitlater(BasicNewsRecipe):
             for item in json_obj['list'].iteritems():
                 # TODO: This URL should be modified by it's corresponding API call in a future. 
                 #       Actually is not possible to get the Article View potential throught an API call (12/04/2012)
-                dataurl = self.INDEX + "a/x/getArticle.php?itemId=" + item[1]['item_id']
+                if self.enhanced_version:
+                    dataurl = self.INDEX + 'a/x/getArticle.php?itemId=' + item[1]['item_id']
+                else:
+                    dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params()
+                    dataurl += '&url=' + item[1]['url']
                 self.articles.append({
                                  'title':item[1]['title'],
                                  'date':item[1]['time_added'],
@@ -89,21 +98,26 @@ class Readitlater(BasicNewsRecipe):
 
     def preprocess_raw_html(self, raw_html, url):
         # get article and image urls from json object
-        json_obj = json.loads(raw_html)
-        self.images = {}
-        for image in json_obj['article']['images']:
-            self.images[image] = json_obj['article']['images'][image]['src']
-        title = '<h1>{title}</h1>'.format(title=json_obj['article']['title']) 
-        link = '<p>Original: <a href="{url}">{url}</a></p>'.format(url=json_obj['article']['resolvedUrl'])
-        return link + title + json_obj['article']['article'] + '<hr />'
+        if self.enhanced_version:
+            json_obj = json.loads(raw_html)
+            self.images = {}
+            for image in json_obj['article']['images']:
+                self.images[image] = json_obj['article']['images'][image]['src']
+            title = '<h1>{title}</h1>'.format(title=json_obj['article']['title']) 
+            link = '<p>Original: <a href="{url}">{url}</a></p>'.format(url=json_obj['article']['resolvedUrl'])
+            html = link + title + json_obj['article']['article']
+        else:
+            html = raw_html
+        return html + '<hr />'
 
     def preprocess_html(self, soup):
         # Insert images on RIL_IMG_# divs
-        for key, url in self.images.iteritems():
-            imgtag = Tag(soup, 'img')
-            imgtag['src'] = url
-            div = soup.find('div', attrs={'id':'RIL_IMG_' + key})
-            div.insert(0, imgtag)
+        if self.enhanced_version:
+            for key, url in self.images.iteritems():
+                imgtag = Tag(soup, 'img')
+                imgtag['src'] = url
+                div = soup.find('div', attrs={'id':'RIL_IMG_' + key})
+                div.insert(0, imgtag)
         return soup
 
     def cleanup(self):
@@ -120,7 +134,7 @@ class Readitlater(BasicNewsRecipe):
         return s
 
     def markAsRead(self, markList):
-        url = self.INDEX + 'v2/send'
+        url = self.API_INDEX + 'v2/send'
         values = {
             'username' : self.username,
             'password' : self.password,

From b32e6085768ae0659b6878ef31ba6d1c25145685 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Sat, 21 Apr 2012 16:29:22 +0800
Subject: [PATCH 11/36] attempt to eliminate the general problem of italicize
 matching things inside of tags, headers, etc

---
 src/calibre/ebooks/conversion/utils.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index acfa80e877..2c1a5cd4d3 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -148,6 +148,7 @@ class HeuristicProcessor(object):
         return wordcount.words
 
     def markup_italicis(self, html):
+        self.log.debug("\n\n\nitalicize debugging \n\n\n")
         ITALICIZE_WORDS = [
             'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
             'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.',
@@ -156,28 +157,30 @@ class HeuristicProcessor(object):
         ]
 
         ITALICIZE_STYLE_PATS = [
-            ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
-            ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/',
+            ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
             ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
-            ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
-            ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
             ur'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_',
             ur'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_',
             ur'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*',
-            ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
             ur'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/',
             ur'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|',
+            ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
+            ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
+            ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/',
+            ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_'
         ]
 
         for word in ITALICIZE_WORDS:
             html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '<i>%s</i>' % word, html)
 
-        def sub(mo):
-            return '<i>%s</i>'%mo.group('words')
-
+        search_text = re.sub(r'(?s)<head[^>]*>.*?</head>', '', html)
+        search_text = re.sub(r'<[^>]*>', '', search_text)
         for pat in ITALICIZE_STYLE_PATS:
-            html = re.sub(pat, sub, html)
-
+            for match in re.finditer(pat, search_text):
+                ital_string = str(match.group('words'))
+                #self.log.debug("italicising "+str(match.group(0))+"    with <i>"+ital_string+"</i>")
+                html = re.sub(re.escape(str(match.group(0))), '<i>%s</i>' % ital_string, html)
+                
         return html
 
     def markup_chapters(self, html, wordcount, blanks_between_paragraphs):

From 43ada84eef994851a79e80fe00bb5a43408fd043 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Mon, 23 Apr 2012 00:48:28 +0200
Subject: [PATCH 12/36] Oldest to newest order. Added time to cover

---
 recipes/readitlater.recipe | 43 ++++++++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index c9d39e9082..9cda772354 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -10,9 +10,11 @@ __copyright__ = '''
 2012, Alayn Gortazar <zutoin at gmail dot com>
 '''
 
+from operator import itemgetter 
 from contextlib import closing
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
+from calibre import strftime
 import json
 import urllib
 import urllib2
@@ -31,16 +33,16 @@ class Readitlater(BasicNewsRecipe):
     no_stylesheets        = True
     use_embedded_content  = False
     needs_subscription    = True
+    mark_as_read_after_dl = False
+    enhanced_version      = True
+    
     KEY                   = '8e0p5f19A74emL3a47goP87m69d4VF8b'
-    API_TEXT_INDEX             = 'https://text.readitlaterlist.com/'
+    API_TEXT_INDEX        = 'https://text.readitlaterlist.com/'
     API_INDEX             = 'https://readitlaterlist.com/'
     INDEX                 = 'https://getpocket.com/'
     LOGIN                 = INDEX + u'/l'
-    enhanced_version      = True
 
     articles              = []
-    
-    feeds = [(u'Unread articles' , INDEX)]
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser()
@@ -63,9 +65,10 @@ class Readitlater(BasicNewsRecipe):
         return auth_params
 
     def parse_index(self):
-        index = self.API_INDEX + 'v2/get?' + self.get_auth_params()
+        index = self.API_INDEX + 'v3/get?' + self.get_auth_params()
         index += '&state=unread'
         index += '&count=' + str(self.max_articles_per_feed) 
+        index += '&sort=oldest' 
 
         open_func = getattr(self.browser, 'open_novisit', self.browser.open)
         with closing(open_func(index)) as f:
@@ -85,15 +88,17 @@ class Readitlater(BasicNewsRecipe):
                     dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params()
                     dataurl += '&url=' + item[1]['url']
                 self.articles.append({
-                                 'title':item[1]['title'],
+                                 'title':item[1]['resolved_title'],
                                  'date':item[1]['time_added'],
                                  'url':dataurl,
                                  'description':item[1]['item_id'],
-                                 'real_url':item[1]['url']
+                                 'sort_id':int(item[1]['sort_id']),
+                                 'real_url':item[1]['given_url']
                             })
         else:
             raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
-
+        
+        self.articles = sorted(self.articles, key=itemgetter('sort_id'))
         return [('Unread', self.articles)]
 
     def preprocess_raw_html(self, raw_html, url):
@@ -123,7 +128,8 @@ class Readitlater(BasicNewsRecipe):
     def cleanup(self):
         # From a list of urls, create a human-readable JSON string
         # suitable for passing to the ReadItLater SEND::READ method.
-        self.markAsRead(self.createMarkList(self.articles))
+        if self.mark_as_read_after_dl:        
+            self.markAsRead(self.createMarkList(self.articles))
 
     def createMarkList(self, articles):
         urls = []
@@ -153,3 +159,22 @@ class Readitlater(BasicNewsRecipe):
             print 'The server could not fulfill the request: ', e
         except urllib2.URLError as e:
             print 'The call to ReadItLater API failed:', e
+
+    def default_cover(self, cover_file):
+        '''
+        Create a generic cover for recipes that don't have a cover
+        This override adds time to the cover
+        '''
+        try:
+            from calibre.ebooks import calibre_cover
+            title = self.title if isinstance(self.title, unicode) else \
+                    self.title.decode(preferred_encoding, 'replace')
+            date = strftime(self.timefmt)
+            time = strftime('[%I:%M %p]')
+            img_data = calibre_cover(title, date, time)
+            cover_file.write(img_data)
+            cover_file.flush()
+        except:
+            self.log.exception('Failed to generate default cover')
+            return False
+        return True

From 6185fa15528f487366fd9f48d1d9f90e684f21c4 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Mon, 23 Apr 2012 00:55:24 +0200
Subject: [PATCH 13/36] Changing 'unread' state with 'queue'

---
 recipes/readitlater.recipe | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 9cda772354..26dbe5baa7 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -66,7 +66,7 @@ class Readitlater(BasicNewsRecipe):
 
     def parse_index(self):
         index = self.API_INDEX + 'v3/get?' + self.get_auth_params()
-        index += '&state=unread'
+        index += '&state=queue'
         index += '&count=' + str(self.max_articles_per_feed) 
         index += '&sort=oldest' 
 

From 211ff892b235f1c6d56d88df61870293f902686c Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Mon, 23 Apr 2012 01:17:10 +0200
Subject: [PATCH 14/36] Making code more PEP8 friendly

---
 recipes/readitlater.recipe | 54 ++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 26dbe5baa7..e1c622ee0d 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -1,7 +1,7 @@
 '''
 readitlaterlist.com
 '''
-__license__   = 'GPL v3'
+__license__ = 'GPL v3'
 __copyright__ = '''
 2010, Darko Miletic <darko.miletic at gmail.com>
 2011, Przemyslaw Kryger <pkryger at gmail.com>
@@ -10,7 +10,7 @@ __copyright__ = '''
 2012, Alayn Gortazar <zutoin at gmail dot com>
 '''
 
-from operator import itemgetter 
+from operator import itemgetter
 from contextlib import closing
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
@@ -19,6 +19,7 @@ import json
 import urllib
 import urllib2
 
+
 class Readitlater(BasicNewsRecipe):
     title                 = 'Read It Later'
     __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan, Alayn Gortazar'
@@ -35,7 +36,7 @@ class Readitlater(BasicNewsRecipe):
     needs_subscription    = True
     mark_as_read_after_dl = False
     enhanced_version      = True
-    
+
     KEY                   = '8e0p5f19A74emL3a47goP87m69d4VF8b'
     API_TEXT_INDEX        = 'https://text.readitlaterlist.com/'
     API_INDEX             = 'https://readitlaterlist.com/'
@@ -59,16 +60,17 @@ class Readitlater(BasicNewsRecipe):
     def get_auth_params(self):
         auth_params = 'apikey=' + self.KEY
         if self.username is not None:
-            auth_params += '&username=' + self.username 
+            auth_params += '&username=' + self.username
             if self.password is not None:
-                auth_params += '&password=' + self.password 
+                auth_params += '&password=' + self.password
         return auth_params
 
     def parse_index(self):
+        # WARNING: Pre-alpha API, I just figured out this calls params. Surprisingly worked! :)
         index = self.API_INDEX + 'v3/get?' + self.get_auth_params()
         index += '&state=queue'
-        index += '&count=' + str(self.max_articles_per_feed) 
-        index += '&sort=oldest' 
+        index += '&count=' + str(self.max_articles_per_feed)
+        index += '&sort=oldest'
 
         open_func = getattr(self.browser, 'open_novisit', self.browser.open)
         with closing(open_func(index)) as f:
@@ -77,10 +79,10 @@ class Readitlater(BasicNewsRecipe):
             raise RuntimeError('Could not fetch index!')
 
         json_obj = json.loads(results)
-        
+
         if len(json_obj['list']) >= self.minimum_articles:
             for item in json_obj['list'].iteritems():
-                # TODO: This URL should be modified by it's corresponding API call in a future. 
+                # TODO: This URL should be modified by it's corresponding API call in a future.
                 #       Actually is not possible to get the Article View potential throught an API call (12/04/2012)
                 if self.enhanced_version:
                     dataurl = self.INDEX + 'a/x/getArticle.php?itemId=' + item[1]['item_id']
@@ -88,16 +90,16 @@ class Readitlater(BasicNewsRecipe):
                     dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params()
                     dataurl += '&url=' + item[1]['url']
                 self.articles.append({
-                                 'title':item[1]['resolved_title'],
-                                 'date':item[1]['time_added'],
-                                 'url':dataurl,
-                                 'description':item[1]['item_id'],
-                                 'sort_id':int(item[1]['sort_id']),
-                                 'real_url':item[1]['given_url']
+                                 'title': item[1]['resolved_title'],
+                                 'date': item[1]['time_added'],
+                                 'url': dataurl,
+                                 'description': item[1]['item_id'],
+                                 'sort_id': int(item[1]['sort_id']),
+                                 'real_url': item[1]['given_url']
                             })
         else:
             raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
-        
+
         self.articles = sorted(self.articles, key=itemgetter('sort_id'))
         return [('Unread', self.articles)]
 
@@ -108,7 +110,7 @@ class Readitlater(BasicNewsRecipe):
             self.images = {}
             for image in json_obj['article']['images']:
                 self.images[image] = json_obj['article']['images'][image]['src']
-            title = '<h1>{title}</h1>'.format(title=json_obj['article']['title']) 
+            title = '<h1>{title}</h1>'.format(title=json_obj['article']['title'])
             link = '<p>Original: <a href="{url}">{url}</a></p>'.format(url=json_obj['article']['resolvedUrl'])
             html = link + title + json_obj['article']['article']
         else:
@@ -121,37 +123,37 @@ class Readitlater(BasicNewsRecipe):
             for key, url in self.images.iteritems():
                 imgtag = Tag(soup, 'img')
                 imgtag['src'] = url
-                div = soup.find('div', attrs={'id':'RIL_IMG_' + key})
+                div = soup.find('div', attrs={'id': 'RIL_IMG_' + key})
                 div.insert(0, imgtag)
         return soup
 
     def cleanup(self):
         # From a list of urls, create a human-readable JSON string
         # suitable for passing to the ReadItLater SEND::READ method.
-        if self.mark_as_read_after_dl:        
+        if self.mark_as_read_after_dl:
             self.markAsRead(self.createMarkList(self.articles))
 
     def createMarkList(self, articles):
         urls = []
         for article in self.articles:
             urls.append(article['real_url'])
-        items = ['"%d": {"url": "%s"}' % (n,u) for n,u in enumerate(urls)]
+        items = ['"%d": {"url": "%s"}' % (n, u) for n, u in enumerate(urls)]
         s = '{\n %s\n}' % (',\n '.join(items),)
         return s
 
     def markAsRead(self, markList):
         url = self.API_INDEX + 'v2/send'
         values = {
-            'username' : self.username,
-            'password' : self.password,
-            'apikey' : self.KEY,
-            'read' : markList
+            'username': self.username,
+            'password': self.password,
+            'apikey': self.KEY,
+            'read': markList
             }
         data = urllib.urlencode(values)
-    
+
         try:
             print 'Calling ReadItLater API...'
-            request = urllib2.Request(url,data)
+            request = urllib2.Request(url, data)
             response = urllib2.urlopen(request)
             the_page = response.read()
             print 'response =', response.code

From 857ee6bc8192de5aac2eab03dd04ef669f102eb1 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Mon, 23 Apr 2012 01:28:10 +0200
Subject: [PATCH 15/36] Making berria recipe more PEP8 friendly

---
 recipes/berria.recipe | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/recipes/berria.recipe b/recipes/berria.recipe
index 6d2b5e05ec..406a27e36c 100644
--- a/recipes/berria.recipe
+++ b/recipes/berria.recipe
@@ -6,12 +6,13 @@ www.berria.info
 
 from calibre.web.feeds.news import BasicNewsRecipe
 
+
 class Berria(BasicNewsRecipe):
     title                 = 'Berria'
     __author__            = 'Alayn Gortazar'
     description           = 'Euskal Herriko euskarazko egunkaria'
     publisher             = 'Berria'
-    category              = 'news, politics, Basque Country'
+    category              = 'news, politics, sports, Basque Country'
     oldest_article        = 2
     max_articles_per_feed = 100
     no_stylesheets        = True
@@ -21,23 +22,23 @@ class Berria(BasicNewsRecipe):
     masthead_url          = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png'
 
     keep_only_tags = [
-                        dict(id='goiburua')
-                        ,dict(name='div', attrs={'class':['ber_ikus']})
-                        ,dict(name='section', attrs={'class':'ber_ikus' })
+                        dict(id='goiburua'),
+                        dict(name='div', attrs={'class':['ber_ikus']}),
+                        dict(name='section', attrs={'class':'ber_ikus'})
                      ]
     remove_tags = [
-                        dict(name='a', attrs={'class':'iruzkinak'})
-                        ,dict(name='div', attrs={'class':'laguntzaileak'})
+                        dict(name='a', attrs={'class':'iruzkinak'}),
+                        dict(name='div', attrs={'class':'laguntzaileak'})
                   ]
 
     extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}'
-     
+
     feeds = [
-              (u'Edizioa jarraia'  , u'http://berria.info/rss/ediziojarraia.xml')
-             ,(u'Iritzia'          , u'http://berria.info/rss/iritzia.xml'      )
-             ,(u'Euskal Herria'    , u'http://berria.info/rss/euskalherria.xml' )
-             ,(u'Ekonomia'          , u'http://berria.info/rss/ekonomia.xml'    )
-             ,(u'Mundua'          , u'http://berria.info/rss/mundua.xml'        )
-             ,(u'Kirola'           , u'http://berria.info/rss/kirola.xml'       )
-             ,(u'Plaza'            , u'http://berria.info/rss/plaza.xml'        )
+              (u'Edizioa jarraia', u'http://berria.info/rss/ediziojarraia.xml'),
+              (u'Iritzia', u'http://berria.info/rss/iritzia.xml'),
+              (u'Euskal Herria', u'http://berria.info/rss/euskalherria.xml'),
+              (u'Ekonomia', u'http://berria.info/rss/ekonomia.xml'),
+              (u'Mundua', u'http://berria.info/rss/mundua.xml'),
+              (u'Kirola', u'http://berria.info/rss/kirola.xml'),
+              (u'Plaza', u'http://berria.info/rss/plaza.xml')
             ]

From a187febc8432a2dfa87bf90496c99b694fb5bb59 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 10:48:32 +0530
Subject: [PATCH 16/36] KF8: Fix handling of multi-level ToCs

---
 src/calibre/ebooks/mobi/writer8/main.py | 32 ++++++++++++++++---------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py
index cc2512549b..f929af80d4 100644
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@@ -297,7 +297,6 @@ class KF8Writer(object):
         self.chunk_records = ChunkIndex(self.chunk_table)()
         self.ncx_records = []
         toc = self.oeb.toc
-        max_depth = toc.depth()
         entries = []
         is_periodical = self.opts.mobi_periodical
         if toc.count() < 2:
@@ -307,26 +306,37 @@ class KF8Writer(object):
         # Flatten the ToC into a depth first list
         fl = toc.iter() if is_periodical else toc.iterdescendants()
         for i, item in enumerate(fl):
-            entry = {'index':i, 'depth': max_depth - item.depth() - (0 if
-                is_periodical else 1), 'href':item.href, 'label':(item.title or
-                    _('Unknown'))}
-            entries.append(entry)
-            for child in item:
-                child.ncx_parent = entry
+            entry = {'id': id(item), 'index': i, 'href':item.href,
+                    'label':(item.title or _('Unknown')),
+                    'children':[]}
+            entry['depth'] = getattr(item, 'ncx_hlvl', 0)
             p = getattr(item, 'ncx_parent', None)
             if p is not None:
-                entry['parent'] = p['index']
+                entry['parent_id'] = p
+            for child in item:
+                child.ncx_parent = entry['id']
+                child.ncx_hlvl = entry['depth'] + 1
+                entry['children'].append(id(child))
             if is_periodical:
                 if item.author:
                     entry['author'] = item.author
                 if item.description:
                     entry['description'] = item.description
+            entries.append(entry)
+
+        # The Kindle requires entries to be sorted by (depth, playorder)
+        entries.sort(key=lambda entry: (entry['depth'], entry['index']))
+        for i, entry in enumerate(entries):
+            entry['index'] = i
+        id_to_index = {entry['id']:entry['index'] for entry in entries}
 
         for entry in entries:
-            children = [e for e in entries if e.get('parent', -1) == entry['index']]
+            children = entry.pop('children')
             if children:
-                entry['first_child'] = children[0]['index']
-                entry['last_child'] = children[-1]['index']
+                entry['first_child'] = id_to_index[children[0]]
+                entry['last_child'] = id_to_index[children[-1]]
+            if 'parent_id' in entry:
+                entry['parent'] = id_to_index[entry.pop('parent_id')]
             href = entry.pop('href')
             href, frag = href.partition('#')[0::2]
             aid = self.id_map.get((href, frag), None)

From 8a3dedc7eb33ae54e4586f9301294fdf6fbb90d1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 11:55:11 +0530
Subject: [PATCH 17/36] ...

---
 src/calibre/ebooks/conversion/plugins/mobi_output.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py
index 4210f7223e..98a837e1a3 100644
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
@@ -165,6 +165,8 @@ class MOBIOutput(OutputFormatPlugin):
         self.log, self.opts, self.oeb = log, opts, oeb
 
         mobi_type = tweaks.get('test_mobi_output_type', 'old')
+        if self.is_periodical:
+            mobi_type = 'old' # Amazon does not support KF8 periodicals
         create_kf8 = mobi_type in ('new', 'both')
 
         self.remove_html_cover()

From b13b7f8a504d9d95b42091c5f5faf0c71d0db3f9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 13:11:24 +0530
Subject: [PATCH 18/36] Fix Der Tagesspiegel

---
 recipes/tagesspiegel.recipe | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/recipes/tagesspiegel.recipe b/recipes/tagesspiegel.recipe
index 92d88d56ae..71191065f1 100644
--- a/recipes/tagesspiegel.recipe
+++ b/recipes/tagesspiegel.recipe
@@ -34,7 +34,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
     no_javascript = True
     remove_empty_feeds = True
     encoding = 'utf-8'
-    remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-date hcf-separate'}]
+    remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-colon'}, {'class':'hcf-date hcf-separate'}]
 
     def print_version(self, url):
         url = url.split('/')
@@ -51,6 +51,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
             return ''.join(div.findAll(text=True, recursive=False)).strip() if div is not None else None
 
         articles = {}
+        links = set()
         key = None
         ans = []
         maincol = soup.find('div', attrs={'class':re.compile('hcf-main-col')})
@@ -59,7 +60,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
 
              if div['class'] == 'hcf-header':
                  try:
-                     key = string.capwords(feed_title(div.em.a))
+                     key = string.capwords(feed_title(div.em))
                      articles[key] = []
                      ans.append(key)
                  except:
@@ -70,6 +71,12 @@ class TagesspiegelRSS(BasicNewsRecipe):
                  if not a:
                      continue
                  url = 'http://www.tagesspiegel.de' + a['href']
+
+                 # check for duplicates
+                 if url in links:
+                     continue	
+                 links.add(url)					 
+
                  title = self.tag_to_string(a, use_alt=True).strip()
                  description = ''
                  pubdate = strftime('%a, %d %b')

From 2951a9c696762aeabf17ad563bc043a37bb1ceab Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 13:19:14 +0530
Subject: [PATCH 19/36] KF8 debug: Dump the guide

---
 src/calibre/ebooks/mobi/debug/index.py | 24 ++++++++++++++++++++++++
 src/calibre/ebooks/mobi/debug/mobi8.py |  9 ++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/debug/index.py b/src/calibre/ebooks/mobi/debug/index.py
index 94f252e231..6065d12e5e 100644
--- a/src/calibre/ebooks/mobi/debug/index.py
+++ b/src/calibre/ebooks/mobi/debug/index.py
@@ -21,6 +21,8 @@ Elem = namedtuple('Chunk',
     'insert_pos toc_text file_number sequence_number start_pos '
     'length')
 
+GuideRef = namedtuple('GuideRef', 'type title pos_fid')
+
 def read_index(sections, idx, codec):
     table, cncx = OrderedDict(), CNCX([], codec)
 
@@ -124,6 +126,28 @@ class SECTIndex(Index):
                     )
                 )
 
+class GuideIndex(Index):
+
+    def __init__(self, guideidx, records, codec):
+        super(GuideIndex, self).__init__(guideidx, records, codec)
+        self.records = []
+
+        if self.table is not None:
+             for i, text in enumerate(self.table.iterkeys()):
+                tag_map = self.table[text]
+                if set(tag_map.iterkeys()) not in ({1, 6}, {1, 2, 3}):
+                    raise ValueError('Guide Index has unknown tags: %s'%
+                            tag_map)
+
+                title = self.cncx[tag_map[1][0]]
+                self.records.append(GuideRef(
+                    text,
+                    title,
+                    tag_map[6] if 6 in tag_map else (tag_map[2], tag_map[3])
+                    )
+                )
+
+
 class NCXIndex(Index):
 
     def __init__(self, ncxidx, records, codec):
diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py
index 4306d565e2..a91213f889 100644
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@@ -12,7 +12,8 @@ from itertools import izip
 
 from calibre import CurrentDir
 from calibre.ebooks.mobi.debug.headers import TextRecord
-from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
+from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
+        GuideIndex)
 from calibre.ebooks.mobi.utils import read_font_record
 from calibre.ebooks.mobi.debug import format_bytes
 from calibre.ebooks.mobi.reader.headers import NULL_INDEX
@@ -114,6 +115,8 @@ class MOBIFile(object):
                 self.header.encoding)
         self.ncx_index = NCXIndex(self.header.primary_index_record,
                 self.mf.records, self.header.encoding)
+        self.guide_index = GuideIndex(self.header.oth_idx, self.mf.records,
+                self.header.encoding)
 
     def build_files(self):
         text = self.raw_text
@@ -211,6 +214,10 @@ def inspect_mobi(mobi_file, ddir):
     with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
         fo.write(str(f.ncx_index).encode('utf-8'))
 
+    with open(os.path.join(ddir, 'guide.record'), 'wb') as fo:
+        fo.write(str(f.guide_index).encode('utf-8'))
+
+
     for part in f.files:
         part.dump(os.path.join(ddir, 'files'))
 

From 898cd84b726cc29832f7e541e328dc57dfaf3bf3 Mon Sep 17 00:00:00 2001
From: Alayn Gortazar <zutoin+launchpad@gmail.com>
Date: Mon, 23 Apr 2012 10:37:24 +0200
Subject: [PATCH 20/36] changing a dot

---
 recipes/berria.recipe | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/berria.recipe b/recipes/berria.recipe
index 406a27e36c..06f8344988 100644
--- a/recipes/berria.recipe
+++ b/recipes/berria.recipe
@@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2012, Alayn Gortazar <zutoin at gmail.com>'
+__copyright__ = '2012, Alayn Gortazar <zutoin at gmail dot com>'
 '''
 www.berria.info
 '''

From 9f7a30d3780fb4ca86c80dda9938dcfeee51644b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 14:39:57 +0530
Subject: [PATCH 21/36] KF8: Improved handling of startoffset

---
 src/calibre/ebooks/mobi/reader/mobi8.py | 25 +++++++++++++------------
 src/calibre/ebooks/mobi/writer8/exth.py | 11 ++++++++---
 src/calibre/ebooks/mobi/writer8/main.py | 10 +++++-----
 3 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py
index bf068eb498..dcf2f998b2 100644
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@@ -109,7 +109,7 @@ class Mobi8Reader(object):
             table, cncx = read_index(self.kf8_sections, self.header.othidx,
                     self.header.codec)
             Item = namedtuple('Item',
-                'type title div_frag_num')
+                'type title pos_fid')
 
             for i, ref_type in enumerate(table.iterkeys()):
                 tag_map = table[ref_type]
@@ -119,7 +119,7 @@ class Mobi8Reader(object):
                 if 3 in tag_map.keys():
                     fileno  = tag_map[3][0]
                 if 6 in tag_map.keys():
-                    fileno = tag_map[6][0]
+                    fileno = tag_map[6]
                 self.guide.append(Item(ref_type.decode(self.header.codec),
                     title, fileno))
 
@@ -287,23 +287,24 @@ class Mobi8Reader(object):
 
     def create_guide(self):
         guide = Guide()
-        for ref_type, ref_title, fileno in self.guide:
+        has_start = False
+        for ref_type, ref_title, pos_fid in self.guide:
             try:
-                elem = self.elems[fileno]
-            except IndexError:
-                # Happens for thumbnailstandard in Amazon book samples
-                continue
-            fi = self.get_file_info(elem.insert_pos)
-            idtext = self.get_id_tag(elem.insert_pos).decode(self.header.codec)
-            linktgt = fi.filename
+                if len(pos_fid) != 2:
+                    continue
+            except TypeError:
+                continue # thumbnailstandard record, ignore it
+            linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid)
             if idtext:
                 linktgt += b'#' + idtext
-            g = Guide.Reference('%s/%s'%(fi.type, linktgt), os.getcwdu())
+            g = Guide.Reference(linktgt, os.getcwdu())
             g.title, g.type = ref_title, ref_type
+            if g.title == 'start' or g.type == 'text':
+                has_start = True
             guide.append(g)
 
         so = self.header.exth.start_offset
-        if so not in {None, NULL_INDEX}:
+        if so not in {None, NULL_INDEX} and not has_start:
             fi = self.get_file_info(so)
             if fi.filename is not None:
                 idtext = self.get_id_tag(so).decode(self.header.codec)
diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py
index 867e2c3112..b469c01d85 100644
--- a/src/calibre/ebooks/mobi/writer8/exth.py
+++ b/src/calibre/ebooks/mobi/writer8/exth.py
@@ -153,9 +153,14 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
         nrecs += 1
 
     if start_offset is not None:
-        exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
-            start_offset))
-        nrecs += 1
+        try:
+            len(start_offset)
+        except TypeError:
+            start_offset = [start_offset]
+        for so in start_offset:
+            exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
+                so))
+            nrecs += 1
 
     if num_of_resources is not None:
         exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py
index f929af80d4..a2148546f8 100644
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@@ -368,11 +368,11 @@ class KF8Writer(object):
             if aid is None:
                 continue
             pos, fid = self.aid_offset_map[aid]
-            if is_guide_ref_start(ref) and fid == 0:
-                # If fid != 0 then we cannot represent the start position as a
-                # single number in the EXTH header, so we do not write it to
-                # EXTH
-                self.start_offset = pos
+            if is_guide_ref_start(ref):
+                chunk = self.chunk_table[pos]
+                skel = [s for s in self.skel_table if s.file_number ==
+                        chunk.file_number][0]
+                self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
             self.guide_table.append(GuideRef(ref.title or
                 _('Unknown'), ref.type, (pos, fid)))
 

From 33bdde0edf4f9084c8ffe2bb276cf6caed312ff4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 15:10:35 +0530
Subject: [PATCH 22/36] ...

---
 src/calibre/ebooks/mobi/writer8/index.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/mobi/writer8/index.py b/src/calibre/ebooks/mobi/writer8/index.py
index a3d5c6763f..c37afb81ff 100644
--- a/src/calibre/ebooks/mobi/writer8/index.py
+++ b/src/calibre/ebooks/mobi/writer8/index.py
@@ -284,7 +284,10 @@ class GuideIndex(Index):
 
 class NCXIndex(Index):
 
-    control_byte_count = 2
+    ''' The commented out parts have been seen in NCX indexes from MOBI 6
+    periodicals. Since we have no MOBI 8 periodicals to reverse engineer, leave
+    it for now. '''
+    # control_byte_count = 2
     tag_types = tuple(map(TagMeta, (
         ('offset',             1, 1, 1, 0),
         ('length',             2, 1, 2, 0),
@@ -295,12 +298,12 @@ class NCXIndex(Index):
         ('last_child',         23, 1, 64, 0),
         ('pos_fid',            6, 2, 128, 0),
         EndTagTable,
-        ('image',              69, 1, 1, 0),
-        ('description',        70, 1, 2, 0),
-        ('author',             71, 1, 4, 0),
-        ('caption',            72, 1, 8, 0),
-        ('attribution',        73, 1, 16, 0),
-        EndTagTable
+        # ('image',              69, 1, 1, 0),
+        # ('description',        70, 1, 2, 0),
+        # ('author',             71, 1, 4, 0),
+        # ('caption',            72, 1, 8, 0),
+        # ('attribution',        73, 1, 16, 0),
+        # EndTagTable
     )))
 
     def __init__(self, toc_table):

From b7be75ad8cdf50c4d31d60192aa2c333b37c083e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 15:50:44 +0530
Subject: [PATCH 23/36] ...

---
 src/calibre/gui2/complete.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py
index b5c1fc8b3e..fb1f39dfa3 100644
--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
 
 
 from PyQt4.Qt import (QLineEdit, QAbstractListModel, Qt,
-        QApplication, QCompleter, QMetaObject)
+        QApplication, QCompleter, pyqtSignal)
 
 from calibre.utils.icu import sort_key, lower
 from calibre.gui2 import NONE
@@ -158,6 +158,8 @@ class MultiCompleteLineEdit(QLineEdit, LineEditECM):
 
 class MultiCompleteComboBox(EnComboBox):
 
+    clear_edit_text = pyqtSignal()
+
     def __init__(self, *args):
         EnComboBox.__init__(self, *args)
         self.setLineEdit(MultiCompleteLineEdit(self))
@@ -169,6 +171,8 @@ class MultiCompleteComboBox(EnComboBox):
         self.dummy_model = CompleteModel(self)
         c.setModel(self.dummy_model)
         self.lineEdit()._completer.setWidget(self)
+        self.clear_edit_text.connect(self.clearEditText,
+                type=Qt.QueuedConnection)
 
     def update_items_cache(self, complete_items):
         self.lineEdit().update_items_cache(complete_items)
@@ -191,8 +195,7 @@ class MultiCompleteComboBox(EnComboBox):
         what = unicode(what)
         le = self.lineEdit()
         if not what.strip():
-            QMetaObject.invokeMethod(self, 'clearEditText',
-                    Qt.QueuedConnection)
+            self.clear_edit_text.emit()
         else:
             self.setEditText(what)
             le.selectAll()

From 75a066b36421a13898f2a8da7ea90488d6864436 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 16:21:00 +0530
Subject: [PATCH 24/36] Show cover size in a tooltip in the conversion dialog

---
 src/calibre/gui2/convert/metadata.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/calibre/gui2/convert/metadata.py b/src/calibre/gui2/convert/metadata.py
index 80311502e8..68e90be9ef 100644
--- a/src/calibre/gui2/convert/metadata.py
+++ b/src/calibre/gui2/convert/metadata.py
@@ -95,9 +95,15 @@ class MetadataWidget(Widget, Ui_Form):
             if not pm.isNull():
                 self.cover.setPixmap(pm)
                 self.cover_data = cover
+                self.set_cover_tooltip(pm)
         else:
             self.cover.setPixmap(QPixmap(I('default_cover.png')))
+            self.cover.setToolTip(_('This book has no cover'))
 
+    def set_cover_tooltip(self, pm):
+        tt = _('Cover size: %(width)d x %(height)d pixels') % dict(
+                width=pm.width(), height=pm.height())
+        self.cover.setToolTip(tt)
 
     def initialize_combos(self):
         self.initalize_authors()
@@ -205,6 +211,7 @@ class MetadataWidget(Widget, Ui_Form):
                     d.exec_()
                 else:
                     self.cover_path.setText(_file)
+                    self.set_cover_tooltip(pix)
                     self.cover.setPixmap(pix)
                     self.cover_changed = True
                     self.cpixmap = pix

From 6b412476ab3874ef3240c63a7340761e1d7441d2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 16:27:03 +0530
Subject: [PATCH 25/36] Cover Browser: Wrap the title on space only, not in
 between words. Fixes #986516 (Cover Browser - font size, divided words)

---
 src/calibre/gui2/pictureflow/pictureflow.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/pictureflow/pictureflow.cpp b/src/calibre/gui2/pictureflow/pictureflow.cpp
index 28c29ee2c7..88fff1fd2c 100644
--- a/src/calibre/gui2/pictureflow/pictureflow.cpp
+++ b/src/calibre/gui2/pictureflow/pictureflow.cpp
@@ -99,7 +99,7 @@ typedef unsigned short QRgb565;
 #define PFREAL_ONE (1 << PFREAL_SHIFT)
 #define PFREAL_HALF (PFREAL_ONE >> 1)
 
-#define TEXT_FLAGS (Qt::TextWordWrap|Qt::TextWrapAnywhere|Qt::TextHideMnemonic|Qt::AlignCenter)
+#define TEXT_FLAGS (Qt::TextWordWrap|Qt::TextHideMnemonic|Qt::AlignCenter)
 
 inline PFreal fmul(PFreal a, PFreal b)
 {

From fe1d9582886b704b380b63c972895b73a4fe1763 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 17:34:38 +0530
Subject: [PATCH 26/36] Fix #986958 (Tooltip not updating when paste cover)

---
 src/calibre/gui2/book_details.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py
index 4b12335fe3..cf5bfd14d3 100644
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@@ -334,6 +334,7 @@ class CoverView(QWidget): # {{{
             self.pixmap = pmap
             self.do_layout()
             self.update()
+            self.update_tooltip()
             if not config['disable_animations']:
                 self.animation.start()
             id_ = self.data.get('id', None)

From 0c929941281525c45b5c7e1c0d6c37409d708773 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 18:53:31 +0530
Subject: [PATCH 27/36] Fix #986658 (Calibre crashes when updating data on
 Metadata)

---
 src/calibre/gui2/actions/edit_metadata.py |   6 +-
 src/calibre/gui2/proceed.py               | 151 ++++++++++++++++++++++
 src/calibre/gui2/ui.py                    |   2 +
 3 files changed, 155 insertions(+), 4 deletions(-)
 create mode 100644 src/calibre/gui2/proceed.py

diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index a58bae25fd..21cba758e8 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -115,14 +115,12 @@ class EditMetadataAction(InterfaceAction):
                     ' "Show details" to see which books.')%num
 
         payload = (id_map, tdir, log_file, lm_map)
-        from calibre.gui2.dialogs.message_box import ProceedNotification
-        p = ProceedNotification(self.apply_downloaded_metadata,
+        self.gui.proceed_question(self.apply_downloaded_metadata,
                 payload, log_file,
                 _('Download log'), _('Download complete'), msg,
                 det_msg=det_msg, show_copy_button=show_copy_button,
                 cancel_callback=lambda x:self.cleanup_bulk_download(tdir),
-                parent=self.gui, log_is_file=True)
-        p.show()
+                log_is_file=True)
 
     def apply_downloaded_metadata(self, payload):
         good_ids, tdir, log_file, lm_map = payload
diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py
new file mode 100644
index 0000000000..433b365e35
--- /dev/null
+++ b/src/calibre/gui2/proceed.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from collections import namedtuple
+
+from PyQt4.Qt import (QDialog, Qt, QLabel, QGridLayout, QPixmap,
+        QDialogButtonBox, QApplication, QSize, pyqtSignal, QIcon,
+        QPlainTextEdit)
+
+from calibre.constants import __version__
+from calibre.gui2.dialogs.message_box import ViewLog
+
+Question = namedtuple('Question', 'payload callback cancel_callback '
+        'title msg html_log log_viewer_title log_is_file det_msg '
+        'show_copy_button')
+
+class ProceedQuestion(QDialog):
+
+    ask_question = pyqtSignal(object, object)
+
+    def __init__(self, parent):
+        QDialog.__init__(self, parent)
+        self.setAttribute(Qt.WA_DeleteOnClose, False)
+        self.setWindowIcon(QIcon(I('dialog_question.png')))
+
+        self.questions = []
+
+        self._l = l = QGridLayout(self)
+        self.setLayout(l)
+
+        self.icon_label = ic = QLabel(self)
+        ic.setPixmap(QPixmap(I('dialog_question.png')))
+        self.msg_label = msg = QLabel('some random filler text')
+        msg.setWordWrap(True)
+        ic.setMaximumWidth(110)
+        ic.setMaximumHeight(100)
+        ic.setScaledContents(True)
+        ic.setStyleSheet('QLabel { margin-right: 10px }')
+        self.bb = QDialogButtonBox(QDialogButtonBox.Yes|QDialogButtonBox.No)
+        self.bb.accepted.connect(self.accept)
+        self.bb.rejected.connect(self.reject)
+        self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
+        self.log_button.setIcon(QIcon(I('debug.png')))
+        self.log_button.clicked.connect(self.show_log)
+        self.copy_button = self.bb.addButton(_('&Copy to clipboard'),
+                self.bb.ActionRole)
+        self.copy_button.clicked.connect(self.copy_to_clipboard)
+        self.show_det_msg = _('Show &details')
+        self.hide_det_msg = _('Hide &details')
+        self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole)
+        self.det_msg_toggle.clicked.connect(self.toggle_det_msg)
+        self.det_msg_toggle.setToolTip(
+                _('Show detailed information about this error'))
+        self.det_msg = QPlainTextEdit(self)
+        self.det_msg.setReadOnly(True)
+        self.bb.button(self.bb.Yes).setDefault(True)
+
+        l.addWidget(ic, 0, 0, 1, 1)
+        l.addWidget(msg, 0, 1, 1, 1)
+        l.addWidget(self.det_msg, 1, 0, 1, 2)
+        l.addWidget(self.bb, 2, 0, 1, 2)
+
+        self.ask_question.connect(self.do_ask_question,
+                type=Qt.QueuedConnection)
+
+    def copy_to_clipboard(self, *args):
+        QApplication.clipboard().setText(
+                'calibre, version %s\n%s: %s\n\n%s' %
+                (__version__, unicode(self.windowTitle()),
+                    unicode(self.msg_label.text()),
+                    unicode(self.det_msg.toPlainText())))
+        self.copy_button.setText(_('Copied'))
+
+    def accept(self):
+        if self.questions:
+            payload, callback, cancel_callback = self.questions[0][:3]
+            self.questions = self.questions[1:]
+            self.ask_question.emit(callback, payload)
+        self.hide()
+
+    def reject(self):
+        if self.questions:
+            payload, callback, cancel_callback = self.questions[0][:3]
+            self.questions = self.questions[1:]
+            self.ask_question.emit(cancel_callback, payload)
+        self.hide()
+
+    def do_ask_question(self, callback, payload):
+        if callable(callback):
+            callback(payload)
+        self.show_question()
+
+    def toggle_det_msg(self, *args):
+        vis = unicode(self.det_msg_toggle.text()) == self.hide_det_msg
+        self.det_msg_toggle.setText(self.show_det_msg if vis else
+                self.hide_det_msg)
+        self.det_msg.setVisible(not vis)
+        self.do_resize()
+
+    def do_resize(self):
+        sz = self.sizeHint() + QSize(100, 0)
+        sz.setWidth(min(500, sz.width()))
+        sz.setHeight(min(500, sz.height()))
+        self.resize(sz)
+
+    def show_question(self):
+        if self.isVisible(): return
+        if self.questions:
+            question = self.questions[0]
+            self.msg_label.setText(question.msg)
+            self.setWindowTitle(question.title)
+            self.log_button.setVisible(bool(question.html_log))
+            self.copy_button.setVisible(bool(question.show_copy_button))
+            self.det_msg.setPlainText(question.det_msg or '')
+            self.det_msg.setVisible(False)
+            self.det_msg_toggle.setVisible(bool(question.det_msg))
+            self.det_msg_toggle.setText(self.show_det_msg)
+            self.bb.button(self.bb.Yes).setDefault(True)
+            self.do_resize()
+            self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason)
+            self.show()
+
+    def __call__(self, callback, payload, html_log, log_viewer_title, title,
+            msg, det_msg='', show_copy_button=False, cancel_callback=None,
+            log_is_file=False):
+        question = Question(payload, callback, cancel_callback, title, msg,
+                html_log, log_viewer_title, log_is_file, det_msg,
+                show_copy_button)
+        self.questions.append(question)
+        self.show_question()
+
+    def show_log(self):
+        if self.questions:
+            q = self.questions[0]
+            log = q.html_log
+            if q.log_is_file:
+                with open(log, 'rb') as f:
+                    log = f.read().decode('utf-8')
+            self.log_viewer = ViewLog(q.log_viewer_title, log,
+                        parent=self)
+
+if __name__ == '__main__':
+    app = QApplication([])
+    ProceedQuestion(None).exec_()
+
diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index fa62dba4bc..84abda8f12 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -43,6 +43,7 @@ from calibre.gui2.tag_browser.ui import TagBrowserMixin
 from calibre.gui2.keyboard import Manager
 from calibre.gui2.auto_add import AutoAdder
 from calibre.library.sqlite import sqlite, DatabaseException
+from calibre.gui2.proceed import ProceedQuestion
 
 class Listener(Thread): # {{{
 
@@ -109,6 +110,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
         MainWindow.__init__(self, opts, parent=parent, disable_automatic_gc=True)
         self.proceed_requested.connect(self.do_proceed,
                 type=Qt.QueuedConnection)
+        self.proceed_question = ProceedQuestion(self)
         self.keyboard = Manager(self)
         _gui = self
         self.opts = opts

From 4374e16bc8f6b90e7fc3bc9c8e2478a354e6a1b4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 23:48:14 +0530
Subject: [PATCH 28/36] tighten format_field_extended

---
 src/calibre/ebooks/metadata/book/base.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index ce80486af8..63d8ffacf4 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -650,11 +650,7 @@ class Metadata(object):
             res = self.get(key, None)
             cmeta = self.get_user_metadata(key, make_copy=False)
             name = unicode(cmeta['name'])
-            if cmeta['datatype'] != 'composite' and (res is None or res == ''):
-                return (name, res, None, None)
-            orig_res = res
-            cmeta = self.get_user_metadata(key, make_copy=False)
-            if res is None or res == '':
+            if res in {None, ''}:
                 return (name, res, None, None)
             orig_res = res
             datatype = cmeta['datatype']

From 5bac9086706e8f63076053d7ca0f4ece5e11da0d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 23 Apr 2012 23:55:48 +0530
Subject: [PATCH 29/36] ...

---
 src/calibre/ebooks/mobi/writer8/main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py
index a2148546f8..4a54a73ca4 100644
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@@ -377,6 +377,7 @@ class KF8Writer(object):
                 _('Unknown'), ref.type, (pos, fid)))
 
         if self.guide_table:
+            self.guide_table.sort(key=lambda x:x.type)
             self.guide_records = GuideIndex(self.guide_table)()
 
 def create_kf8_book(oeb, opts, resources):

From 9ca2f906ba247bc4c1aa8e890255384bf3ab0bad Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 24 Apr 2012 00:25:18 +0530
Subject: [PATCH 30/36] ...

---
 src/calibre/ebooks/mobi/writer8/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py
index 4a54a73ca4..19d7e390a9 100644
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@@ -377,7 +377,7 @@ class KF8Writer(object):
                 _('Unknown'), ref.type, (pos, fid)))
 
         if self.guide_table:
-            self.guide_table.sort(key=lambda x:x.type)
+            self.guide_table.sort(key=lambda x:x.type) # Needed by the Kindle
             self.guide_records = GuideIndex(self.guide_table)()
 
 def create_kf8_book(oeb, opts, resources):

From 81a94b287b5fb7c8abbb0b68e0f2a3e5d32ad9c8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 24 Apr 2012 06:50:12 +0530
Subject: [PATCH 31/36] oops, remove the mistaken tightening of
 gotmat_field_extended

---
 src/calibre/ebooks/metadata/book/base.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index 63d8ffacf4..ce80486af8 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -650,7 +650,11 @@ class Metadata(object):
             res = self.get(key, None)
             cmeta = self.get_user_metadata(key, make_copy=False)
             name = unicode(cmeta['name'])
-            if res in {None, ''}:
+            if cmeta['datatype'] != 'composite' and (res is None or res == ''):
+                return (name, res, None, None)
+            orig_res = res
+            cmeta = self.get_user_metadata(key, make_copy=False)
+            if res is None or res == '':
                 return (name, res, None, None)
             orig_res = res
             datatype = cmeta['datatype']

From 6b9fc3d0abbe088724e44c88cefac52f3965e799 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 24 Apr 2012 09:03:45 +0530
Subject: [PATCH 32/36] ...

---
 src/calibre/ebooks/mobi/debug/headers.py |  4 ++--
 src/calibre/manual/conversion.rst        | 12 ++++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py
index 1d2c5b78b9..1799d403f1 100644
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@@ -327,7 +327,7 @@ class MOBIHeader(object): # {{{
             self.primary_index_record, = struct.unpack(b'>I',
                     self.raw[244:248])
 
-        if self.file_version >= 8:
+        if self.length >= 248:
             (self.sect_idx, self.skel_idx, self.datp_idx, self.oth_idx
                     ) = struct.unpack_from(b'>4L', self.raw, 248)
             self.unknown9 = self.raw[264:self.length]
@@ -414,7 +414,7 @@ class MOBIHeader(object): # {{{
                     self.has_indexing_bytes, self.has_uncrossable_breaks ))
             ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
                 self.primary_index_record))
-        if self.file_version >= 8:
+        if self.length >= 248:
             i('Sections Index', self.sect_idx)
             i('SKEL Index', self.skel_idx)
             i('DATP Index', self.datp_idx)
diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst
index c37c1eafdb..f6fe04dd90 100644
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@@ -573,6 +573,18 @@ There is a Word macro package that can automate the conversion of Word documents
 generating the Table of Contents much simpler. It is called BookCreator and is available for free
 at `mobileread <http://www.mobileread.com/forums/showthread.php?t=28313>`_.
 
+An easy way to generate a Table of Contents when converting a Word document is:
+
+ 1. Mark your Chapters and sub-Chapters in the doc file with one of the MS built-in styles called 'Heading 1', 'Heading 2', ..., 'Heading 6'. 'Heading 1' equates to the HTML tag <h1>, 'Heading 2' to <h2> etc
+
+ 2. Save the doc as Webpage-filtered (rather than Webpage) and import the html file into |app|
+
+ 3. When you convert in |app| you use what you did in step 1 to set the box called 'Detect chapters at' on the Convert - Structure Detection page. For example:
+
+  * If you mark Chapters with style 'Heading 2' then set the 'Detect chapters at' box to //h:h2 This will give you a proper external metadata TOC in the converted epub.
+  * A slightly more complex example...if your book has Sections and Chapters and you want a 2-level nested metadata TOC. Mark the doc Sections with style 'Heading 2' and the Chapters with style 'Heading 3'. When you convert set the 'Detect chapters at' box to //h:h2|//h:h3. On the Convert - TOC page set the 'Level 1 TOC' box to //h:h2 and the 'Level 2 TOC' box to //h:h3.
+
+
 Convert TXT documents
 ~~~~~~~~~~~~~~~~~~~~~~
 

From 6d3bd67c9336fe4517c395a3db7b92e1f84b860c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 24 Apr 2012 09:33:37 +0530
Subject: [PATCH 33/36] Nicer dump headers for joint KF8 files

---
 src/calibre/ebooks/mobi/debug/headers.py | 128 ++++++++++++-----------
 1 file changed, 69 insertions(+), 59 deletions(-)

diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py
index 1799d403f1..77a31606e2 100644
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@@ -337,11 +337,12 @@ class MOBIHeader(object): # {{{
 
         # The following are all relative to the position of the header record
         # make them absolute for ease of debugging
-        for x in ('sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
+        self.relative_records = {'sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
                 'meta_orth_indx', 'huffman_record_offset',
                 'first_non_book_record', 'datp_record_offset', 'fcis_number',
                 'flis_number', 'primary_index_record', 'fdst_idx',
-                'first_image_index'):
+                'first_image_index'}
+        for x in self.relative_records:
             if hasattr(self, x) and getattr(self, x) != NULL_INDEX:
                 setattr(self, x, self.header_offset+getattr(self, x))
 
@@ -355,70 +356,79 @@ class MOBIHeader(object): # {{{
 
     def __str__(self):
         ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
+
         a = ans.append
-        i = lambda d, x : a('%s (null value: %d): %d'%(d, NULL_INDEX, x))
-        ans.append('Compression: %s'%self.compression)
-        ans.append('Unused: %r'%self.unused)
-        ans.append('Number of text records: %d'%self.number_of_text_records)
-        ans.append('Text record size: %d'%self.text_record_size)
-        ans.append('Encryption: %s'%self.encryption_type)
-        ans.append('Unknown: %r'%self.unknown)
-        ans.append('Identifier: %r'%self.identifier)
-        ans.append('Header length: %d'% self.length)
-        ans.append('Type: %s'%self.type)
-        ans.append('Encoding: %s'%self.encoding)
-        ans.append('UID: %r'%self.uid)
-        ans.append('File version: %d'%self.file_version)
-        i('Meta Orth Index (Sections index in KF8)', self.meta_orth_indx)
-        i('Meta Infl Index', self.meta_infl_indx)
-        ans.append('Secondary index record: %d (null val: %d)'%(
-            self.secondary_index_record, NULL_INDEX))
-        ans.append('Reserved: %r'%self.reserved)
-        ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX,
-            self.first_non_book_record))
-        ans.append('Full name offset: %d'%self.fullname_offset)
-        ans.append('Full name length: %d bytes'%self.fullname_length)
-        ans.append('Langcode: %r'%self.locale_raw)
-        ans.append('Language: %s'%self.language)
-        ans.append('Sub language: %s'%self.sublanguage)
-        ans.append('Input language: %r'%self.input_language)
-        ans.append('Output language: %r'%self.output_langauage)
-        ans.append('Min version: %d'%self.min_version)
-        ans.append('First Image index: %d'%self.first_image_index)
-        ans.append('Huffman record offset: %d'%self.huffman_record_offset)
-        ans.append('Huffman record count: %d'%self.huffman_record_count)
-        ans.append('DATP record offset: %r'%self.datp_record_offset)
-        ans.append('DATP record count: %r'%self.datp_record_count)
-        ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
+
+        def i(d, x):
+            x = 'NULL' if x == NULL_INDEX else x
+            a('%s: %s'%(d, x))
+
+        def r(d, attr):
+            x = getattr(self, attr)
+            if attr in self.relative_records and x != NULL_INDEX:
+                a('%s: Absolute: %d Relative: %d'%(d, x, x-self.header_offset))
+            else:
+                i(d, x)
+
+        a('Compression: %s'%self.compression)
+        a('Unused: %r'%self.unused)
+        a('Number of text records: %d'%self.number_of_text_records)
+        a('Text record size: %d'%self.text_record_size)
+        a('Encryption: %s'%self.encryption_type)
+        a('Unknown: %r'%self.unknown)
+        a('Identifier: %r'%self.identifier)
+        a('Header length: %d'% self.length)
+        a('Type: %s'%self.type)
+        a('Encoding: %s'%self.encoding)
+        a('UID: %r'%self.uid)
+        a('File version: %d'%self.file_version)
+        r('Meta Orth Index', 'meta_orth_indx')
+        r('Meta Infl Index', 'meta_infl_indx')
+        r('Secondary index record', 'secondary_index_record')
+        a('Reserved: %r'%self.reserved)
+        r('First non-book record', 'first_non_book_record')
+        a('Full name offset: %d'%self.fullname_offset)
+        a('Full name length: %d bytes'%self.fullname_length)
+        a('Langcode: %r'%self.locale_raw)
+        a('Language: %s'%self.language)
+        a('Sub language: %s'%self.sublanguage)
+        a('Input language: %r'%self.input_language)
+        a('Output language: %r'%self.output_langauage)
+        a('Min version: %d'%self.min_version)
+        r('First Image index', 'first_image_index')
+        r('Huffman record offset', 'huffman_record_offset')
+        a('Huffman record count: %d'%self.huffman_record_count)
+        r('DATP record offset', 'datp_record_offset')
+        a('DATP record count: %r'%self.datp_record_count)
+        a('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
         if self.has_drm_data:
-            ans.append('Unknown3: %r'%self.unknown3)
-            ans.append('DRM Offset: %s'%self.drm_offset)
-            ans.append('DRM Count: %s'%self.drm_count)
-            ans.append('DRM Size: %s'%self.drm_size)
-            ans.append('DRM Flags: %r'%self.drm_flags)
+            a('Unknown3: %r'%self.unknown3)
+            r('DRM Offset', 'drm_offset')
+            a('DRM Count: %s'%self.drm_count)
+            a('DRM Size: %s'%self.drm_size)
+            a('DRM Flags: %r'%self.drm_flags)
         if self.has_extra_data_flags:
-            ans.append('Unknown4: %r'%self.unknown4)
-            ans.append('FDST Index: %d'% self.fdst_idx)
-            ans.append('FDST Count: %d'% self.fdst_count)
-            ans.append('FCIS number: %d'% self.fcis_number)
-            ans.append('FCIS count: %d'% self.fcis_count)
-            ans.append('FLIS number: %d'% self.flis_number)
-            ans.append('FLIS count: %d'% self.flis_count)
-            ans.append('Unknown6: %r'% self.unknown6)
-            ans.append('SRCS record index: %d'%self.srcs_record_index)
-            ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
-            ans.append('Unknown7: %r'%self.unknown7)
-            ans.append(('Extra data flags: %s (has multibyte: %s) '
+            a('Unknown4: %r'%self.unknown4)
+            r('FDST Index', 'fdst_idx')
+            a('FDST Count: %d'% self.fdst_count)
+            r('FCIS number', 'fcis_number')
+            a('FCIS count: %d'% self.fcis_count)
+            r('FLIS number', 'flis_number')
+            a('FLIS count: %d'% self.flis_count)
+            a('Unknown6: %r'% self.unknown6)
+            r('SRCS record index', 'srcs_record_index')
+            a('Number of SRCS records?: %d'%self.num_srcs_records)
+            a('Unknown7: %r'%self.unknown7)
+            a(('Extra data flags: %s (has multibyte: %s) '
                 '(has indexing: %s) (has uncrossable breaks: %s)')%(
                     bin(self.extra_data_flags), self.has_multibytes,
                     self.has_indexing_bytes, self.has_uncrossable_breaks ))
-            ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
-                self.primary_index_record))
+            r('NCX index', 'primary_index_record')
         if self.length >= 248:
-            i('Sections Index', self.sect_idx)
-            i('SKEL Index', self.skel_idx)
-            i('DATP Index', self.datp_idx)
-            i('Other Index', self.oth_idx)
+            r('Sections Index', 'sect_idx')
+            r('SKEL Index', 'skel_idx')
+            r('DATP Index', 'datp_idx')
+            r('Other Index', 'oth_idx')
             if self.unknown9:
                 a('Unknown9: %r'%self.unknown9)
 

From 2f6a705e74620ec2f32aeedac0ac5876f498a334 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 24 Apr 2012 12:27:39 +0530
Subject: [PATCH 34/36] ...

---
 src/calibre/devices/android/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 07be4e42c1..d162b6692a 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -29,7 +29,7 @@ class ANDROID(USBMS):
                        0xc86  : [0x100, 0x0227, 0x0226, 0x222],
                        0xc87  : [0x0100, 0x0227, 0x0226],
                        0xc8d  : [0x100, 0x0227, 0x0226, 0x222],
-                       0xc91  : [0x0100, 0x0227, 0x0226],
+                       0xc91  : [0x0100, 0x0227, 0x0226, 0x222],
                        0xc92  : [0x100, 0x0227, 0x0226, 0x222],
                        0xc97  : [0x100, 0x0227, 0x0226, 0x222],
                        0xc99  : [0x100, 0x0227, 0x0226, 0x222],

From ce4655ddd5fb692bb7be094743695103f4f5e2aa Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 24 Apr 2012 18:00:19 +0530
Subject: [PATCH 35/36] KF8 Output: Implement joint mobi files with both MOBI 6
 and KF8 versions in the same file

---
 .../ebooks/conversion/plugins/mobi_output.py  |  8 ++-
 src/calibre/ebooks/mobi/debug/mobi8.py        |  2 +
 src/calibre/ebooks/mobi/writer2/main.py       | 68 ++++++++++++++++++-
 src/calibre/ebooks/mobi/writer8/exth.py       | 15 ++--
 src/calibre/ebooks/mobi/writer8/main.py       |  4 +-
 src/calibre/ebooks/mobi/writer8/mobi.py       | 51 ++++++++------
 6 files changed, 114 insertions(+), 34 deletions(-)

diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py
index 98a837e1a3..b73d6341f9 100644
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
@@ -174,7 +174,8 @@ class MOBIOutput(OutputFormatPlugin):
                 add_fonts=create_kf8)
         self.check_for_periodical()
 
-        kf8 = self.create_kf8(resources) if create_kf8 else None
+        kf8 = self.create_kf8(resources, for_joint=mobi_type=='both'
+                ) if create_kf8 else None
         if mobi_type == 'new':
             kf8.write(output_path)
             self.extract_mobi(output_path, opts)
@@ -183,9 +184,10 @@ class MOBIOutput(OutputFormatPlugin):
         self.log('Creating MOBI 6 output')
         self.write_mobi(input_plugin, output_path, kf8, resources)
 
-    def create_kf8(self, resources):
+    def create_kf8(self, resources, for_joint=False):
         from calibre.ebooks.mobi.writer8.main import create_kf8_book
-        return create_kf8_book(self.oeb, self.opts, resources)
+        return create_kf8_book(self.oeb, self.opts, resources,
+                for_joint=for_joint)
 
     def write_mobi(self, input_plugin, output_path, kf8, resources):
         from calibre.ebooks.mobi.mobiml import MobiMLizer
diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py
index a91213f889..21ed11fc51 100644
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@@ -139,6 +139,8 @@ class MOBIFile(object):
             self.files.append(File(skel, skeleton, ftext, first_aid, sections))
 
     def dump_flows(self, ddir):
+        if self.fdst is None:
+            raise ValueError('This MOBI file has no FDST record')
         for i, x in enumerate(self.fdst.sections):
             start, end = x
             raw = self.raw_text[start:end]
diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index a8fc37ff45..f064fd2625 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -23,6 +23,7 @@ from calibre.ebooks.mobi.writer2.indexer import Indexer
 
 # Disabled as I dont care about uncrossable breaks
 WRITE_UNCROSSABLE_BREAKS = False
+NULL_INDEX = 0xffffffff
 
 class MobiWriter(object):
 
@@ -30,6 +31,7 @@ class MobiWriter(object):
         self.opts = opts
         self.resources = resources
         self.kf8 = kf8
+        self.for_joint = kf8 is not None
         self.write_page_breaks_after_item = write_page_breaks_after_item
         self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
         self.prefer_author_sort = opts.prefer_author_sort
@@ -61,7 +63,7 @@ class MobiWriter(object):
         self.stream = stream
         self.records = [None]
         self.generate_content()
-        self.generate_record0()
+        self.generate_joint_record0() if self.for_joint else self.generate_record0()
         self.write_header()
         self.write_content()
 
@@ -200,8 +202,6 @@ class MobiWriter(object):
         first_image_record = None
         if self.resources:
             used_images = self.serializer.used_images
-            if self.kf8 is not None:
-                used_images |= self.kf8.used_images
             first_image_record  = len(self.records)
             self.resources.serialize(self.records, used_images)
         last_content_record = len(self.records) - 1
@@ -365,6 +365,68 @@ class MobiWriter(object):
         self.records[0] = align_block(record0)
     # }}}
 
+    def generate_joint_record0(self): # {{{
+        from calibre.ebooks.mobi.writer8.mobi import (MOBIHeader,
+                HEADER_FIELDS)
+        from calibre.ebooks.mobi.writer8.exth import build_exth
+
+        # Insert resource records
+        first_image_record = None
+        old = len(self.records)
+        if self.resources:
+            used_images = self.serializer.used_images | self.kf8.used_images
+            first_image_record  = len(self.records)
+            self.resources.serialize(self.records, used_images)
+        resource_record_count = len(self.records) - old
+
+        # Insert KF8 records
+        self.records.append(b'BOUNDARY')
+        kf8_header_index = len(self.records)
+        self.kf8.start_offset = (self.serializer.start_offset,
+                self.kf8.start_offset)
+        self.records.append(self.kf8.record0)
+        self.records.extend(self.kf8.records[1:])
+
+        first_image_record if first_image_record else len(self.records)
+
+        header_fields = {k:getattr(self.kf8, k) for k in HEADER_FIELDS}
+
+        # Now change the header fields that need to be different in the MOBI 6
+        # header
+        header_fields['first_resource_record'] = first_image_record
+        header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
+        header_fields['fdst_record'] = NULL_INDEX
+        header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
+        header_fields['extra_data_flags'] = 0b11
+
+        for k, v in {'last_text_record':'last_text_record_idx',
+                'first_non_text_record':'first_non_text_record_idx',
+                'ncx_index':'primary_index_record_idx',
+                }.iteritems():
+            header_fields[k] = getattr(self, v)
+
+        for x in ('skel', 'chunk', 'guide'):
+            header_fields[x+'_index'] = NULL_INDEX
+
+        # Create the MOBI 6 EXTH
+        opts = self.opts
+        kuc = 0 if resource_record_count > 0 else None
+
+        header_fields['exth'] = build_exth(self.oeb.metadata,
+                prefer_author_sort=opts.prefer_author_sort,
+                is_periodical=opts.mobi_periodical,
+                share_not_sync=opts.share_not_sync,
+                cover_offset=self.cover_offset,
+                thumbnail_offset=self.thumbnail_offset,
+                num_of_resources=resource_record_count,
+                kf8_unknown_count=kuc, be_kindlegen2=True,
+                kf8_header_index=kf8_header_index,
+                start_offset=self.serializer.start_offset,
+                mobi_doctype=2)
+        self.records[0] = MOBIHeader(file_version=6)(**header_fields)
+
+    # }}}
+
     def write_header(self): # PalmDB header {{{
         '''
         Write the PalmDB header
diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py
index b469c01d85..4c0c769668 100644
--- a/src/calibre/ebooks/mobi/writer8/exth.py
+++ b/src/calibre/ebooks/mobi/writer8/exth.py
@@ -27,6 +27,7 @@ EXTH_CODES = {
     'source': 112,
     'versionnumber': 114,
     'startreading': 116,
+    'kf8_header_index': 121,
     'num_of_resources': 125,
     'kf8_unknown_count': 131,
     'coveroffset': 201,
@@ -41,7 +42,7 @@ COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
 def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
         share_not_sync=True, cover_offset=None, thumbnail_offset=None,
         start_offset=None, mobi_doctype=2, num_of_resources=None,
-        kf8_unknown_count=0, be_kindlegen2=False):
+        kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None):
     exth = BytesIO()
     nrecs = 0
 
@@ -158,9 +159,15 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
         except TypeError:
             start_offset = [start_offset]
         for so in start_offset:
-            exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
-                so))
-            nrecs += 1
+            if so is not None:
+                exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
+                    so))
+                nrecs += 1
+
+    if kf8_header_index is not None:
+        exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12,
+            kf8_header_index))
+        nrecs += 1
 
     if num_of_resources is not None:
         exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py
index 19d7e390a9..e35ab5e437 100644
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@@ -380,7 +380,7 @@ class KF8Writer(object):
             self.guide_table.sort(key=lambda x:x.type) # Needed by the Kindle
             self.guide_records = GuideIndex(self.guide_table)()
 
-def create_kf8_book(oeb, opts, resources):
+def create_kf8_book(oeb, opts, resources, for_joint=False):
     writer = KF8Writer(oeb, opts, resources)
-    return KF8Book(writer)
+    return KF8Book(writer, for_joint=for_joint)
 
diff --git a/src/calibre/ebooks/mobi/writer8/mobi.py b/src/calibre/ebooks/mobi/writer8/mobi.py
index 1bb83ad4b6..ff096f350b 100644
--- a/src/calibre/ebooks/mobi/writer8/mobi.py
+++ b/src/calibre/ebooks/mobi/writer8/mobi.py
@@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import time
+import time, random
 from struct import pack
 
 from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text
@@ -25,8 +25,6 @@ class MOBIHeader(Header): # {{{
     the file.
     '''
 
-    FILE_VERSION = 8
-
     DEFINITION = '''
     # 0: Compression
     compression = DYN
@@ -63,7 +61,7 @@ class MOBIHeader(Header): # {{{
     encoding = 65001
 
     # 32: UID
-    uid = random.randint(0, 0xffffffff)
+    uid = DYN
 
     # 36: File version
     file_version = {file_version}
@@ -154,7 +152,7 @@ class MOBIHeader(Header): # {{{
     # 0b1 - extra multibyte bytes after text records
     # 0b10 - TBS indexing data (only used in MOBI 6)
     # 0b100 - uncrossable breaks only used in MOBI 6
-    extra_data_flags = 1
+    extra_data_flags = DYN
 
     # 244: KF8 Indices
     ncx_index = DYN
@@ -171,13 +169,18 @@ class MOBIHeader(Header): # {{{
 
     # Padding to allow amazon's DTP service to add data
     padding = zeroes(8192)
-    '''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION)
+    '''
 
     SHORT_FIELDS = {'compression', 'last_text_record', 'record_size',
             'encryption_type', 'unused2'}
     ALIGN = True
     POSITIONS = {'title_offset':'full_title'}
 
+    def __init__(self, file_version=8):
+        self.DEFINITION = self.DEFINITION.format(file_version=file_version,
+                record_size=RECORD_SIZE)
+        super(MOBIHeader, self).__init__()
+
     def format_value(self, name, val):
         if name == 'compression':
             val = PALMDOC if val else UNCOMPRESSED
@@ -185,14 +188,20 @@ class MOBIHeader(Header): # {{{
 
 # }}}
 
-# Fields that need to be set in the MOBI Header are
+HEADER_FIELDS = {'compression', 'text_length', 'last_text_record', 'book_type',
+                    'first_non_text_record', 'title_length', 'language_code',
+                    'first_resource_record', 'exth_flags', 'fdst_record',
+                    'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
+                    'guide_index', 'exth', 'full_title', 'extra_data_flags',
+                    'uid'}
 
 class KF8Book(object):
 
-    def __init__(self, writer):
-        self.build_records(writer)
+    def __init__(self, writer, for_joint=False):
+        self.build_records(writer, for_joint)
+        self.used_images = writer.used_images
 
-    def build_records(self, writer):
+    def build_records(self, writer, for_joint):
         metadata = writer.oeb.metadata
         # The text records
         for x in ('last_text_record_idx', 'first_non_text_record_idx'):
@@ -222,8 +231,10 @@ class KF8Book(object):
         self.first_resource_record = NULL_INDEX
         if resources.records:
             self.first_resource_record = len(self.records)
-            self.records.extend(resources.records)
-        self.num_of_resources = len(resources.records)
+            before = len(self.records)
+            if not for_joint:
+                resources.serialize(self.records, writer.used_images)
+        self.num_of_resources = len(self.records) - before
 
         # FDST
         self.fdst_count = writer.fdst_count
@@ -233,12 +244,13 @@ class KF8Book(object):
         # EOF
         self.records.append(b'\xe9\x8e\r\n') # EOF record
 
-
         # Miscellaneous header fields
         self.compression = writer.compress
         self.book_type = 0x101 if writer.opts.mobi_periodical else 2
         self.full_title = utf8_text(unicode(metadata.title[0]))
         self.title_length = len(self.full_title)
+        self.extra_data_flags = 0b1
+        self.uid = random.randint(0, 0xffffffff)
 
         self.language_code = iana2mobi(str(metadata.language[0]))
         self.exth_flags = 0b1010000
@@ -248,14 +260,14 @@ class KF8Book(object):
         self.opts = writer.opts
         self.start_offset = writer.start_offset
         self.metadata = metadata
+        self.kuc = 0 if len(resources.records) > 0 else None
 
     @property
     def record0(self):
         ''' We generate the EXTH header and record0 dynamically, to allow other
-        code to customize various values after build_record() has been
+        code to customize various values after build_records() has been
         called'''
         opts = self.opts
-        kuc = 0 if self.num_of_resources > 0 else None
         self.exth = build_exth(self.metadata,
                 prefer_author_sort=opts.prefer_author_sort,
                 is_periodical=opts.mobi_periodical,
@@ -263,15 +275,10 @@ class KF8Book(object):
                 cover_offset=self.cover_offset,
                 thumbnail_offset=self.thumbnail_offset,
                 num_of_resources=self.num_of_resources,
-                kf8_unknown_count=kuc, be_kindlegen2=True,
+                kf8_unknown_count=self.kuc, be_kindlegen2=True,
                 start_offset=self.start_offset, mobi_doctype=self.book_type)
 
-        kwargs = {field:getattr(self, field) for field in
-                ('compression', 'text_length', 'last_text_record', 'book_type',
-                    'first_non_text_record', 'title_length', 'language_code',
-                    'first_resource_record', 'exth_flags', 'fdst_record',
-                    'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
-                    'guide_index', 'exth', 'full_title')}
+        kwargs = {field:getattr(self, field) for field in HEADER_FIELDS}
         return MOBIHeader()(**kwargs)
 
     def write(self, outpath):

From e9296b97637a40ab2e84253682c7a4a8a1961011 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 24 Apr 2012 23:26:53 +0530
Subject: [PATCH 36/36] ...

---
 src/calibre/gui2/dialogs/message_box.py |  7 +++++++
 src/calibre/gui2/proceed.py             | 19 +++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/calibre/gui2/dialogs/message_box.py b/src/calibre/gui2/dialogs/message_box.py
index 64c8bf75ba..e15cd055b9 100644
--- a/src/calibre/gui2/dialogs/message_box.py
+++ b/src/calibre/gui2/dialogs/message_box.py
@@ -158,6 +158,13 @@ _proceed_memory = []
 
 class ProceedNotification(MessageBox): # {{{
 
+    '''
+    WARNING: This class is deprecated. DO not use it as some users ahve
+    reported crashes when closing the dialog box generated by this class.
+    Instead use: gui.proceed_question(...) The arguments are the same as for
+    this class.
+    '''
+
     def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
             det_msg='', show_copy_button=False, parent=None,
             cancel_callback=None, log_is_file=False):
diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py
index 433b365e35..422e1c0e0f 100644
--- a/src/calibre/gui2/proceed.py
+++ b/src/calibre/gui2/proceed.py
@@ -129,6 +129,25 @@ class ProceedQuestion(QDialog):
     def __call__(self, callback, payload, html_log, log_viewer_title, title,
             msg, det_msg='', show_copy_button=False, cancel_callback=None,
             log_is_file=False):
+        '''
+        A non modal popup that notifies the user that a background task has
+        been completed. This class guarantees that onlya single popup is
+        visible at any one time. Other requests are queued and displayed after
+        the user dismisses the current popup.
+
+        :param callback: A callable that is called with payload if the user
+        asks to proceed. Note that this is always called in the GUI thread.
+        :param cancel_callback: A callable that is called with the payload if
+        the users asks not to proceed.
+        :param payload: Arbitrary object, passed to callback
+        :param html_log: An HTML or plain text log
+        :param log_viewer_title: The title for the log viewer window
+        :param title: The title for this popup
+        :param msg: The msg to display
+        :param det_msg: Detailed message
+        :param log_is_file: If True the html_log parameter is interpreted as
+        the path to a file on disk containing the log encoded with utf-8
+        '''
         question = Question(payload, callback, cancel_callback, title, msg,
                 html_log, log_viewer_title, log_is_file, det_msg,
                 show_copy_button)