From 2b826c4974152c707f6b3a54aa9310710f6769a1 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 9 Apr 2012 01:02:09 +0200 Subject: [PATCH 01/36] Improved Read It Later recipe, uses API to get articles feed, and new "Article View" data to get enhanced article content (with images) --- recipes/readitlater.recipe | 171 +++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 75 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 38f7ec1a9a..08196d3a3d 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,36 +1,39 @@ -""" +''' readitlaterlist.com -""" +''' __license__ = 'GPL v3' __copyright__ = ''' -2010, Darko Miletic -2011, Przemyslaw Kryger -2012, tBunnyMan +2011, Keith Callenberg +2012, Alayn Gortazar ''' -from calibre import strftime +from contextlib import closing from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag +import json +import urllib +import urllib2 - -class Readitlater(BasicNewsRecipe): - title = 'ReadItLater' - __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' - description = '''Personalized news feeds. Go to readitlaterlist.com to setup \ - up your news. This version displays pages of articles from \ - oldest to newest, with max & minimum counts, and marks articles \ - read after downloading.''' +class Readitlaterv2(BasicNewsRecipe): + title = 'Read It Later v2' + __author__ = 'Keith Callenberg' + description = '''Personalized news feeds. Go to readitlaterlist.com to + setup up your news. Fill in your account + username, and optionally you can add your password.''' publisher = 'readitlaterlist.com' category = 'news, custom' oldest_article = 7 - max_articles_per_feed = 50 - minimum_articles = 1 + max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False needs_subscription = True - INDEX = u'http://readitlaterlist.com' + KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' + INDEX = 'https://readitlaterlist.com/' LOGIN = INDEX + u'/l' - readList = [] + articles = [] + + feeds = [(u'Unread articles' , INDEX)] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -43,66 +46,84 @@ class Readitlater(BasicNewsRecipe): br.submit() return br - def get_feeds(self): - self.report_progress(0, ('Fetching list of pages...')) - lfeeds = [] - i = 1 - feedurl = self.INDEX + u'/unread/1' - while True: - title = u'Unread articles, page ' + str(i) - lfeeds.insert(0, (title, feedurl)) - self.report_progress(0, ('Got ') + str(i) + (' pages')) - i += 1 - soup = self.index_to_soup(feedurl) - ritem = soup.find('a', attrs={'id':'next', 'class':'active'}) - if ritem is None: - break - feedurl = self.INDEX + ritem['href'] - return lfeeds + def parse_index(self): - totalfeeds = [] - articlesToGrab = self.max_articles_per_feed - lfeeds = self.get_feeds() - for feedobj in lfeeds: - if articlesToGrab < 1: - break - feedtitle, feedurl = feedobj - self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - ritem = soup.find('ul', attrs={'id':'list'}) - for item in reversed(ritem.findAll('li')): - if articlesToGrab < 1: - break - else: - articlesToGrab -= 1 - description = '' - atag = item.find('a', attrs={'class':'text'}) - if atag and atag.has_key('href'): - url = self.INDEX + atag['href'] - title = self.tag_to_string(item.div) - date = strftime(self.timefmt) - articles.append({ - 'title' :title - ,'date' :date - ,'url' :url - ,'description':description - }) - readLink = item.find('a', attrs={'class':'check'})['href'] - self.readList.append(readLink) - totalfeeds.append((feedtitle, articles)) - if len(self.readList) < self.minimum_articles: - raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") - return totalfeeds + index = self.INDEX + 'v2/get?' + index += 'apikey=' + self.KEY + index += '&username=' + self.username + '&password=' + self.password + index += '&state=unread' + index += '&count=' + str(self.max_articles_per_feed) - def mark_as_read(self, markList): - br = self.get_browser() - for link in markList: - url = self.INDEX + link - response = br.open(url) - response + open_func = getattr(self.browser, 'open_novisit', self.browser.open) + with closing(open_func(index)) as f: + results = f.read() + if not results: + raise RuntimeError('Could not fetch index!') + + json_obj = json.loads(results) + + if len(json_obj['list']) > 0: + for item in json_obj['list'].iteritems(): + dataurl = "https://readitlaterlist.com/a/x/getArticle.php?itemId=" + item[1]['item_id'] + self.articles.append({ + 'title':item[1]['title'], + 'date':item[1]['time_added'], + 'url':dataurl, + 'description':item[1]['item_id'], + 'real_url':item[1]['url'] + }) + return [('Unread', self.articles)] + + def preprocess_raw_html(self, raw_html, url): + # get article and image urls from json object + json_obj = json.loads(raw_html) + self.images = {} + for image in json_obj['article']['images']: + self.images[image] = json_obj['article']['images'][image]['src'] + return json_obj['article']['article'] + + def preprocess_html(self, soup): + # Insert images on RIL_IMG_# divs + for key, url in self.images.iteritems(): + tag = Tag(soup, 'img') + tag['src'] = url + div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) + div.insert(0, tag) + return soup def cleanup(self): - self.mark_as_read(self.readList) + # From a list of urls, create a human-readable JSON string + # suitable for passing to the ReadItLater SEND::READ method. + + self.markAsRead(self.createMarkList(self.articles)) + + def createMarkList(self, articles): + urls = [] + for article in self.articles: + urls.append(article['real_url']) + items = ['"%d": {"url": "%s"}' % (n,u) for n,u in enumerate(urls)] + s = '{\n %s\n}' % (',\n '.join(items),) + return s + + def markAsRead(self, markList): + url = self.INDEX + 'v2/send' + values = { + 'username' : self.username, + 'password' : self.password, + 'apikey' : self.KEY, + 'read' : markList + } + data = urllib.urlencode(values) + + try: + print 'Calling ReadItLater API...' + request = urllib2.Request(url,data) + response = urllib2.urlopen(request) + the_page = response.read() + print 'response =', response.code + except urllib2.HTTPError as e: + print 'The server could not fulfill the request: ', e + except urllib2.URLError as e: + print 'The call to ReadItLater API failed:', e From b81deec83a040ab2645cd14017e69f92edc60410 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 16 Apr 2012 23:05:06 +0200 Subject: [PATCH 02/36] Added title to each article and minimum_recipes support --- recipes/readitlater.recipe | 42 +++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 08196d3a3d..53061dd72a 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -3,7 +3,10 @@ readitlaterlist.com ''' __license__ = 'GPL v3' __copyright__ = ''' +2010, Darko Miletic +2011, Przemyslaw Kryger 2011, Keith Callenberg +2012, tBunnyMan 2012, Alayn Gortazar ''' @@ -14,16 +17,17 @@ import json import urllib import urllib2 -class Readitlaterv2(BasicNewsRecipe): - title = 'Read It Later v2' - __author__ = 'Keith Callenberg' +class Readitlater(BasicNewsRecipe): + title = 'Read It Later' + __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan, Alayn Gortazar' description = '''Personalized news feeds. Go to readitlaterlist.com to setup up your news. Fill in your account username, and optionally you can add your password.''' publisher = 'readitlaterlist.com' category = 'news, custom' oldest_article = 7 - max_articles_per_feed = 100 + max_articles_per_feed = 50 + minimum_articles = 1 no_stylesheets = True use_embedded_content = False needs_subscription = True @@ -51,7 +55,10 @@ class Readitlaterv2(BasicNewsRecipe): def parse_index(self): index = self.INDEX + 'v2/get?' index += 'apikey=' + self.KEY - index += '&username=' + self.username + '&password=' + self.password + if self.username is not None: + index += '&username=' + self.username + if self.password is not None: + index += '&password=' + self.password index += '&state=unread' index += '&count=' + str(self.max_articles_per_feed) @@ -62,10 +69,12 @@ class Readitlaterv2(BasicNewsRecipe): raise RuntimeError('Could not fetch index!') json_obj = json.loads(results) - - if len(json_obj['list']) > 0: + + if len(json_obj['list']) >= self.minimum_articles: for item in json_obj['list'].iteritems(): - dataurl = "https://readitlaterlist.com/a/x/getArticle.php?itemId=" + item[1]['item_id'] + # TODO: This URL should be modified by it's corresponding API call in a future. + # Actually is not possible to get the Article View potential throught an API call (12/04/2012) + dataurl = self.INDEX + "a/x/getArticle.php?itemId=" + item[1]['item_id'] self.articles.append({ 'title':item[1]['title'], 'date':item[1]['time_added'], @@ -73,6 +82,9 @@ class Readitlaterv2(BasicNewsRecipe): 'description':item[1]['item_id'], 'real_url':item[1]['url'] }) + else: + raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") + return [('Unread', self.articles)] def preprocess_raw_html(self, raw_html, url): @@ -81,23 +93,25 @@ class Readitlaterv2(BasicNewsRecipe): self.images = {} for image in json_obj['article']['images']: self.images[image] = json_obj['article']['images'][image]['src'] - return json_obj['article']['article'] + title = '

{title}

'.format(title=json_obj['article']['title']) + link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) + return link + title + json_obj['article']['article'] def preprocess_html(self, soup): # Insert images on RIL_IMG_# divs for key, url in self.images.iteritems(): - tag = Tag(soup, 'img') - tag['src'] = url + imgtag = Tag(soup, 'img') + imgtag['src'] = url div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) - div.insert(0, tag) + div.insert(0, imgtag) return soup def cleanup(self): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - self.markAsRead(self.createMarkList(self.articles)) - + #self.markAsRead(self.createMarkList(self.articles)) + return def createMarkList(self, articles): urls = [] From 56aec322cd7aca25ff550b532a1019d12d6cafeb Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Tue, 17 Apr 2012 22:38:46 +0200 Subject: [PATCH 03/36] Added horizontal line between articles --- recipes/readitlater.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 53061dd72a..5e425b8b5f 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -95,7 +95,7 @@ class Readitlater(BasicNewsRecipe): self.images[image] = json_obj['article']['images'][image]['src'] title = '

{title}

'.format(title=json_obj['article']['title']) link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) - return link + title + json_obj['article']['article'] + return link + title + json_obj['article']['article'] + '
' def preprocess_html(self, soup): # Insert images on RIL_IMG_# divs From 2a2ae6bb1403ba96999cb142e90f89c7f1606777 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Tue, 17 Apr 2012 22:40:30 +0200 Subject: [PATCH 04/36] Added recipe for Berria --- recipes/berria.recipe | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 recipes/berria.recipe diff --git a/recipes/berria.recipe b/recipes/berria.recipe new file mode 100644 index 0000000000..240682231e --- /dev/null +++ b/recipes/berria.recipe @@ -0,0 +1,37 @@ +__license__ = 'GPL v3' +__copyright__ = '2012, Alayn Gortazar ' +''' +www.berria.info +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Berria(BasicNewsRecipe): + title = 'Berria' + __author__ = 'Alayn Gortazar' + description = 'Euskal Herriko euskarazko egunkaria' + publisher = 'Berria' + category = 'news, politics, Basque Country' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'eu' + remove_empty_feeds = True + masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png' + + keep_only_tags = [ + dict(id='goiburua') + ,dict(name='div', attrs={'class':'testua' }) + ] + remove_tags = [ + dict(name='a', attrs={'class':'iruzkinak'}) + ] + + feeds = [ + (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') +# ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml' ) +# ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) +# ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) +# ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) + ] From adf67292fb4641ed4ad10d21348fe6dfb749ce0b Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Tue, 17 Apr 2012 22:42:11 +0200 Subject: [PATCH 05/36] Mark downloaded articles as read --- recipes/readitlater.recipe | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 5e425b8b5f..50c0cc27eb 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -110,8 +110,7 @@ class Readitlater(BasicNewsRecipe): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - #self.markAsRead(self.createMarkList(self.articles)) - return + self.markAsRead(self.createMarkList(self.articles)) def createMarkList(self, articles): urls = [] From 7bcb500a4766119cae3bea01fb58a77b202c9fd2 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Wed, 18 Apr 2012 00:47:00 +0200 Subject: [PATCH 06/36] Improved Berria recipe visualization --- recipes/berria.recipe | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index 240682231e..9d5bfe1585 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -22,16 +22,20 @@ class Berria(BasicNewsRecipe): keep_only_tags = [ dict(id='goiburua') + ,dict(name='div', attrs={'class':'burukoak'}) ,dict(name='div', attrs={'class':'testua' }) + ,dict(name='div', attrs={'class':'ber_ikus' }) ] remove_tags = [ dict(name='a', attrs={'class':'iruzkinak'}) ] + + extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .titularra{font-size: x-large} .sarrera{font-weight: bold}' feeds = [ (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') -# ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml' ) -# ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) -# ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) -# ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) + ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml' ) + ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) + ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) + ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) ] From 1111868a36c66e58ba7b02a06876fd0139dd0d8e Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Wed, 18 Apr 2012 13:20:27 +0200 Subject: [PATCH 07/36] Improved Berria recipe styles --- recipes/berria.recipe | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index 9d5bfe1585..d987e1224b 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -22,15 +22,15 @@ class Berria(BasicNewsRecipe): keep_only_tags = [ dict(id='goiburua') - ,dict(name='div', attrs={'class':'burukoak'}) - ,dict(name='div', attrs={'class':'testua' }) - ,dict(name='div', attrs={'class':'ber_ikus' }) + ,dict(name='div', attrs={'class':['ber_ikus']}) + ,dict(name='section', attrs={'class':'ber_ikus' }) ] remove_tags = [ dict(name='a', attrs={'class':'iruzkinak'}) + ,dict(name='div', attrs={'class':'laguntzaileak'}) ] - extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .titularra{font-size: x-large} .sarrera{font-weight: bold}' + extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}' feeds = [ (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') From dda955e67c15baec96482d1e17fe79057b6a27dd Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Wed, 18 Apr 2012 13:49:25 +0200 Subject: [PATCH 08/36] Added correct feed url's to Berria recipe --- recipes/berria.recipe | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index d987e1224b..6d2b5e05ec 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -34,8 +34,10 @@ class Berria(BasicNewsRecipe): feeds = [ (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') - ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml' ) ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) + ,(u'Euskal Herria' , u'http://berria.info/rss/euskalherria.xml' ) + ,(u'Ekonomia' , u'http://berria.info/rss/ekonomia.xml' ) + ,(u'Mundua' , u'http://berria.info/rss/mundua.xml' ) ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) ] From f9817538923c9d929d3da6193187b46f470d6f85 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Wed, 18 Apr 2012 23:06:32 +0200 Subject: [PATCH 09/36] Migrating to getpocket.com --- recipes/readitlater.recipe | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 50c0cc27eb..ec0b9c83b7 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -32,7 +32,7 @@ class Readitlater(BasicNewsRecipe): use_embedded_content = False needs_subscription = True KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' - INDEX = 'https://readitlaterlist.com/' + INDEX = 'https://getpocket.com/' LOGIN = INDEX + u'/l' articles = [] @@ -109,7 +109,6 @@ class Readitlater(BasicNewsRecipe): def cleanup(self): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - self.markAsRead(self.createMarkList(self.articles)) def createMarkList(self, articles): From de81f45215f18feb3c98338e8abd8a1f90535379 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Thu, 19 Apr 2012 01:25:40 +0200 Subject: [PATCH 10/36] Added "Enhanced version" option to read it later recipe --- recipes/readitlater.recipe | 72 +++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index ec0b9c83b7..c9d39e9082 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -32,33 +32,38 @@ class Readitlater(BasicNewsRecipe): use_embedded_content = False needs_subscription = True KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' + API_TEXT_INDEX = 'https://text.readitlaterlist.com/' + API_INDEX = 'https://readitlaterlist.com/' INDEX = 'https://getpocket.com/' LOGIN = INDEX + u'/l' + enhanced_version = True - articles = [] + articles = [] feeds = [(u'Unread articles' , INDEX)] def get_browser(self): br = BasicNewsRecipe.get_browser() - if self.username is not None: - br.open(self.LOGIN) - br.select_form(nr=0) - br['feed_id'] = self.username - if self.password is not None: - br['password'] = self.password - br.submit() + if self.enhanced_version: + if self.username is not None: + br.open(self.LOGIN) + br.select_form(nr=0) + br['feed_id'] = self.username + if self.password is not None: + br['password'] = self.password + br.submit() return br - + def get_auth_params(self): + auth_params = 'apikey=' + self.KEY + if self.username is not None: + auth_params += '&username=' + self.username + if self.password is not None: + auth_params += '&password=' + self.password + return auth_params def parse_index(self): - index = self.INDEX + 'v2/get?' - index += 'apikey=' + self.KEY - if self.username is not None: - index += '&username=' + self.username - if self.password is not None: - index += '&password=' + self.password + index = self.API_INDEX + 'v2/get?' + self.get_auth_params() index += '&state=unread' index += '&count=' + str(self.max_articles_per_feed) @@ -74,7 +79,11 @@ class Readitlater(BasicNewsRecipe): for item in json_obj['list'].iteritems(): # TODO: This URL should be modified by it's corresponding API call in a future. # Actually is not possible to get the Article View potential throught an API call (12/04/2012) - dataurl = self.INDEX + "a/x/getArticle.php?itemId=" + item[1]['item_id'] + if self.enhanced_version: + dataurl = self.INDEX + 'a/x/getArticle.php?itemId=' + item[1]['item_id'] + else: + dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params() + dataurl += '&url=' + item[1]['url'] self.articles.append({ 'title':item[1]['title'], 'date':item[1]['time_added'], @@ -89,21 +98,26 @@ class Readitlater(BasicNewsRecipe): def preprocess_raw_html(self, raw_html, url): # get article and image urls from json object - json_obj = json.loads(raw_html) - self.images = {} - for image in json_obj['article']['images']: - self.images[image] = json_obj['article']['images'][image]['src'] - title = '

{title}

'.format(title=json_obj['article']['title']) - link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) - return link + title + json_obj['article']['article'] + '
' + if self.enhanced_version: + json_obj = json.loads(raw_html) + self.images = {} + for image in json_obj['article']['images']: + self.images[image] = json_obj['article']['images'][image]['src'] + title = '

{title}

'.format(title=json_obj['article']['title']) + link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) + html = link + title + json_obj['article']['article'] + else: + html = raw_html + return html + '
' def preprocess_html(self, soup): # Insert images on RIL_IMG_# divs - for key, url in self.images.iteritems(): - imgtag = Tag(soup, 'img') - imgtag['src'] = url - div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) - div.insert(0, imgtag) + if self.enhanced_version: + for key, url in self.images.iteritems(): + imgtag = Tag(soup, 'img') + imgtag['src'] = url + div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) + div.insert(0, imgtag) return soup def cleanup(self): @@ -120,7 +134,7 @@ class Readitlater(BasicNewsRecipe): return s def markAsRead(self, markList): - url = self.INDEX + 'v2/send' + url = self.API_INDEX + 'v2/send' values = { 'username' : self.username, 'password' : self.password, From b32e6085768ae0659b6878ef31ba6d1c25145685 Mon Sep 17 00:00:00 2001 From: Lee Date: Sat, 21 Apr 2012 16:29:22 +0800 Subject: [PATCH 11/36] attempt to eliminate the general problem of italicize matching things inside of tags, headers, etc --- src/calibre/ebooks/conversion/utils.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index acfa80e877..2c1a5cd4d3 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -148,6 +148,7 @@ class HeuristicProcessor(object): return wordcount.words def markup_italicis(self, html): + self.log.debug("\n\n\nitalicize debugging \n\n\n") ITALICIZE_WORDS = [ 'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.', 'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.', @@ -156,28 +157,30 @@ class HeuristicProcessor(object): ] ITALICIZE_STYLE_PATS = [ - ur'(?msu)(?<=[\s>"“\'‘])_(?P[^_]+)_', - ur'(?msu)(?<=[\s>"“\'‘])/(?P[^/\*><]+)/', + ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P[^\*_]+)/\*_', ur'(?msu)(?<=[\s>"“\'‘])~~(?P[^~]+)~~', - ur'(?msu)(?<=[\s>"“\'‘])\*(?P[^\*]+)\*', - ur'(?msu)(?<=[\s>"“\'‘])~(?P[^~]+)~', ur'(?msu)(?<=[\s>"“\'‘])_/(?P[^/_]+)/_', ur'(?msu)(?<=[\s>"“\'‘])_\*(?P[^\*_]+)\*_', ur'(?msu)(?<=[\s>"“\'‘])\*/(?P[^/\*]+)/\*', - ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P[^\*_]+)/\*_', ur'(?msu)(?<=[\s>"“\'‘])/:(?P[^:/]+):/', ur'(?msu)(?<=[\s>"“\'‘])\|:(?P[^:\|]+):\|', + ur'(?msu)(?<=[\s>"“\'‘])\*(?P[^\*]+)\*', + ur'(?msu)(?<=[\s>"“\'‘])~(?P[^~]+)~', + ur'(?msu)(?<=[\s>"“\'‘])/(?P[^/\*><]+)/', + ur'(?msu)(?<=[\s>"“\'‘])_(?P[^_]+)_' ] for word in ITALICIZE_WORDS: html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '%s' % word, html) - def sub(mo): - return '%s'%mo.group('words') - + search_text = re.sub(r'(?s)]*>.*?', '', html) + search_text = re.sub(r'<[^>]*>', '', search_text) for pat in ITALICIZE_STYLE_PATS: - html = re.sub(pat, sub, html) - + for match in re.finditer(pat, search_text): + ital_string = str(match.group('words')) + #self.log.debug("italicising "+str(match.group(0))+" with "+ital_string+"") + html = re.sub(re.escape(str(match.group(0))), '%s' % ital_string, html) + return html def markup_chapters(self, html, wordcount, blanks_between_paragraphs): From 43ada84eef994851a79e80fe00bb5a43408fd043 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 00:48:28 +0200 Subject: [PATCH 12/36] Oldest to newest order. Added time to cover --- recipes/readitlater.recipe | 43 ++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index c9d39e9082..9cda772354 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -10,9 +10,11 @@ __copyright__ = ''' 2012, Alayn Gortazar ''' +from operator import itemgetter from contextlib import closing from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +from calibre import strftime import json import urllib import urllib2 @@ -31,16 +33,16 @@ class Readitlater(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False needs_subscription = True + mark_as_read_after_dl = False + enhanced_version = True + KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' - API_TEXT_INDEX = 'https://text.readitlaterlist.com/' + API_TEXT_INDEX = 'https://text.readitlaterlist.com/' API_INDEX = 'https://readitlaterlist.com/' INDEX = 'https://getpocket.com/' LOGIN = INDEX + u'/l' - enhanced_version = True articles = [] - - feeds = [(u'Unread articles' , INDEX)] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -63,9 +65,10 @@ class Readitlater(BasicNewsRecipe): return auth_params def parse_index(self): - index = self.API_INDEX + 'v2/get?' + self.get_auth_params() + index = self.API_INDEX + 'v3/get?' + self.get_auth_params() index += '&state=unread' index += '&count=' + str(self.max_articles_per_feed) + index += '&sort=oldest' open_func = getattr(self.browser, 'open_novisit', self.browser.open) with closing(open_func(index)) as f: @@ -85,15 +88,17 @@ class Readitlater(BasicNewsRecipe): dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params() dataurl += '&url=' + item[1]['url'] self.articles.append({ - 'title':item[1]['title'], + 'title':item[1]['resolved_title'], 'date':item[1]['time_added'], 'url':dataurl, 'description':item[1]['item_id'], - 'real_url':item[1]['url'] + 'sort_id':int(item[1]['sort_id']), + 'real_url':item[1]['given_url'] }) else: raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") - + + self.articles = sorted(self.articles, key=itemgetter('sort_id')) return [('Unread', self.articles)] def preprocess_raw_html(self, raw_html, url): @@ -123,7 +128,8 @@ class Readitlater(BasicNewsRecipe): def cleanup(self): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - self.markAsRead(self.createMarkList(self.articles)) + if self.mark_as_read_after_dl: + self.markAsRead(self.createMarkList(self.articles)) def createMarkList(self, articles): urls = [] @@ -153,3 +159,22 @@ class Readitlater(BasicNewsRecipe): print 'The server could not fulfill the request: ', e except urllib2.URLError as e: print 'The call to ReadItLater API failed:', e + + def default_cover(self, cover_file): + ''' + Create a generic cover for recipes that don't have a cover + This override adds time to the cover + ''' + try: + from calibre.ebooks import calibre_cover + title = self.title if isinstance(self.title, unicode) else \ + self.title.decode(preferred_encoding, 'replace') + date = strftime(self.timefmt) + time = strftime('[%I:%M %p]') + img_data = calibre_cover(title, date, time) + cover_file.write(img_data) + cover_file.flush() + except: + self.log.exception('Failed to generate default cover') + return False + return True From 6185fa15528f487366fd9f48d1d9f90e684f21c4 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 00:55:24 +0200 Subject: [PATCH 13/36] Changing 'unread' state with 'queue' --- recipes/readitlater.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 9cda772354..26dbe5baa7 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -66,7 +66,7 @@ class Readitlater(BasicNewsRecipe): def parse_index(self): index = self.API_INDEX + 'v3/get?' + self.get_auth_params() - index += '&state=unread' + index += '&state=queue' index += '&count=' + str(self.max_articles_per_feed) index += '&sort=oldest' From 211ff892b235f1c6d56d88df61870293f902686c Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 01:17:10 +0200 Subject: [PATCH 14/36] Making code more PEP8 friendly --- recipes/readitlater.recipe | 54 ++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 26dbe5baa7..e1c622ee0d 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,7 +1,7 @@ ''' readitlaterlist.com ''' -__license__ = 'GPL v3' +__license__ = 'GPL v3' __copyright__ = ''' 2010, Darko Miletic 2011, Przemyslaw Kryger @@ -10,7 +10,7 @@ __copyright__ = ''' 2012, Alayn Gortazar ''' -from operator import itemgetter +from operator import itemgetter from contextlib import closing from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag @@ -19,6 +19,7 @@ import json import urllib import urllib2 + class Readitlater(BasicNewsRecipe): title = 'Read It Later' __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan, Alayn Gortazar' @@ -35,7 +36,7 @@ class Readitlater(BasicNewsRecipe): needs_subscription = True mark_as_read_after_dl = False enhanced_version = True - + KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' API_TEXT_INDEX = 'https://text.readitlaterlist.com/' API_INDEX = 'https://readitlaterlist.com/' @@ -59,16 +60,17 @@ class Readitlater(BasicNewsRecipe): def get_auth_params(self): auth_params = 'apikey=' + self.KEY if self.username is not None: - auth_params += '&username=' + self.username + auth_params += '&username=' + self.username if self.password is not None: - auth_params += '&password=' + self.password + auth_params += '&password=' + self.password return auth_params def parse_index(self): + # WARNING: Pre-alpha API, I just figured out this calls params. Surprisingly worked! :) index = self.API_INDEX + 'v3/get?' + self.get_auth_params() index += '&state=queue' - index += '&count=' + str(self.max_articles_per_feed) - index += '&sort=oldest' + index += '&count=' + str(self.max_articles_per_feed) + index += '&sort=oldest' open_func = getattr(self.browser, 'open_novisit', self.browser.open) with closing(open_func(index)) as f: @@ -77,10 +79,10 @@ class Readitlater(BasicNewsRecipe): raise RuntimeError('Could not fetch index!') json_obj = json.loads(results) - + if len(json_obj['list']) >= self.minimum_articles: for item in json_obj['list'].iteritems(): - # TODO: This URL should be modified by it's corresponding API call in a future. + # TODO: This URL should be modified by it's corresponding API call in a future. # Actually is not possible to get the Article View potential throught an API call (12/04/2012) if self.enhanced_version: dataurl = self.INDEX + 'a/x/getArticle.php?itemId=' + item[1]['item_id'] @@ -88,16 +90,16 @@ class Readitlater(BasicNewsRecipe): dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params() dataurl += '&url=' + item[1]['url'] self.articles.append({ - 'title':item[1]['resolved_title'], - 'date':item[1]['time_added'], - 'url':dataurl, - 'description':item[1]['item_id'], - 'sort_id':int(item[1]['sort_id']), - 'real_url':item[1]['given_url'] + 'title': item[1]['resolved_title'], + 'date': item[1]['time_added'], + 'url': dataurl, + 'description': item[1]['item_id'], + 'sort_id': int(item[1]['sort_id']), + 'real_url': item[1]['given_url'] }) else: raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") - + self.articles = sorted(self.articles, key=itemgetter('sort_id')) return [('Unread', self.articles)] @@ -108,7 +110,7 @@ class Readitlater(BasicNewsRecipe): self.images = {} for image in json_obj['article']['images']: self.images[image] = json_obj['article']['images'][image]['src'] - title = '

{title}

'.format(title=json_obj['article']['title']) + title = '

{title}

'.format(title=json_obj['article']['title']) link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) html = link + title + json_obj['article']['article'] else: @@ -121,37 +123,37 @@ class Readitlater(BasicNewsRecipe): for key, url in self.images.iteritems(): imgtag = Tag(soup, 'img') imgtag['src'] = url - div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) + div = soup.find('div', attrs={'id': 'RIL_IMG_' + key}) div.insert(0, imgtag) return soup def cleanup(self): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - if self.mark_as_read_after_dl: + if self.mark_as_read_after_dl: self.markAsRead(self.createMarkList(self.articles)) def createMarkList(self, articles): urls = [] for article in self.articles: urls.append(article['real_url']) - items = ['"%d": {"url": "%s"}' % (n,u) for n,u in enumerate(urls)] + items = ['"%d": {"url": "%s"}' % (n, u) for n, u in enumerate(urls)] s = '{\n %s\n}' % (',\n '.join(items),) return s def markAsRead(self, markList): url = self.API_INDEX + 'v2/send' values = { - 'username' : self.username, - 'password' : self.password, - 'apikey' : self.KEY, - 'read' : markList + 'username': self.username, + 'password': self.password, + 'apikey': self.KEY, + 'read': markList } data = urllib.urlencode(values) - + try: print 'Calling ReadItLater API...' - request = urllib2.Request(url,data) + request = urllib2.Request(url, data) response = urllib2.urlopen(request) the_page = response.read() print 'response =', response.code From 857ee6bc8192de5aac2eab03dd04ef669f102eb1 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 01:28:10 +0200 Subject: [PATCH 15/36] Making berria recipe more PEP8 friendly --- recipes/berria.recipe | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index 6d2b5e05ec..406a27e36c 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -6,12 +6,13 @@ www.berria.info from calibre.web.feeds.news import BasicNewsRecipe + class Berria(BasicNewsRecipe): title = 'Berria' __author__ = 'Alayn Gortazar' description = 'Euskal Herriko euskarazko egunkaria' publisher = 'Berria' - category = 'news, politics, Basque Country' + category = 'news, politics, sports, Basque Country' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True @@ -21,23 +22,23 @@ class Berria(BasicNewsRecipe): masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png' keep_only_tags = [ - dict(id='goiburua') - ,dict(name='div', attrs={'class':['ber_ikus']}) - ,dict(name='section', attrs={'class':'ber_ikus' }) + dict(id='goiburua'), + dict(name='div', attrs={'class':['ber_ikus']}), + dict(name='section', attrs={'class':'ber_ikus'}) ] remove_tags = [ - dict(name='a', attrs={'class':'iruzkinak'}) - ,dict(name='div', attrs={'class':'laguntzaileak'}) + dict(name='a', attrs={'class':'iruzkinak'}), + dict(name='div', attrs={'class':'laguntzaileak'}) ] extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}' - + feeds = [ - (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') - ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) - ,(u'Euskal Herria' , u'http://berria.info/rss/euskalherria.xml' ) - ,(u'Ekonomia' , u'http://berria.info/rss/ekonomia.xml' ) - ,(u'Mundua' , u'http://berria.info/rss/mundua.xml' ) - ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) - ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) + (u'Edizioa jarraia', u'http://berria.info/rss/ediziojarraia.xml'), + (u'Iritzia', u'http://berria.info/rss/iritzia.xml'), + (u'Euskal Herria', u'http://berria.info/rss/euskalherria.xml'), + (u'Ekonomia', u'http://berria.info/rss/ekonomia.xml'), + (u'Mundua', u'http://berria.info/rss/mundua.xml'), + (u'Kirola', u'http://berria.info/rss/kirola.xml'), + (u'Plaza', u'http://berria.info/rss/plaza.xml') ] From a187febc8432a2dfa87bf90496c99b694fb5bb59 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 10:48:32 +0530 Subject: [PATCH 16/36] KF8: Fix handling of multi-level ToCs --- src/calibre/ebooks/mobi/writer8/main.py | 32 ++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index cc2512549b..f929af80d4 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -297,7 +297,6 @@ class KF8Writer(object): self.chunk_records = ChunkIndex(self.chunk_table)() self.ncx_records = [] toc = self.oeb.toc - max_depth = toc.depth() entries = [] is_periodical = self.opts.mobi_periodical if toc.count() < 2: @@ -307,26 +306,37 @@ class KF8Writer(object): # Flatten the ToC into a depth first list fl = toc.iter() if is_periodical else toc.iterdescendants() for i, item in enumerate(fl): - entry = {'index':i, 'depth': max_depth - item.depth() - (0 if - is_periodical else 1), 'href':item.href, 'label':(item.title or - _('Unknown'))} - entries.append(entry) - for child in item: - child.ncx_parent = entry + entry = {'id': id(item), 'index': i, 'href':item.href, + 'label':(item.title or _('Unknown')), + 'children':[]} + entry['depth'] = getattr(item, 'ncx_hlvl', 0) p = getattr(item, 'ncx_parent', None) if p is not None: - entry['parent'] = p['index'] + entry['parent_id'] = p + for child in item: + child.ncx_parent = entry['id'] + child.ncx_hlvl = entry['depth'] + 1 + entry['children'].append(id(child)) if is_periodical: if item.author: entry['author'] = item.author if item.description: entry['description'] = item.description + entries.append(entry) + + # The Kindle requires entries to be sorted by (depth, playorder) + entries.sort(key=lambda entry: (entry['depth'], entry['index'])) + for i, entry in enumerate(entries): + entry['index'] = i + id_to_index = {entry['id']:entry['index'] for entry in entries} for entry in entries: - children = [e for e in entries if e.get('parent', -1) == entry['index']] + children = entry.pop('children') if children: - entry['first_child'] = children[0]['index'] - entry['last_child'] = children[-1]['index'] + entry['first_child'] = id_to_index[children[0]] + entry['last_child'] = id_to_index[children[-1]] + if 'parent_id' in entry: + entry['parent'] = id_to_index[entry.pop('parent_id')] href = entry.pop('href') href, frag = href.partition('#')[0::2] aid = self.id_map.get((href, frag), None) From 8a3dedc7eb33ae54e4586f9301294fdf6fbb90d1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 11:55:11 +0530 Subject: [PATCH 17/36] ... --- src/calibre/ebooks/conversion/plugins/mobi_output.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index 4210f7223e..98a837e1a3 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -165,6 +165,8 @@ class MOBIOutput(OutputFormatPlugin): self.log, self.opts, self.oeb = log, opts, oeb mobi_type = tweaks.get('test_mobi_output_type', 'old') + if self.is_periodical: + mobi_type = 'old' # Amazon does not support KF8 periodicals create_kf8 = mobi_type in ('new', 'both') self.remove_html_cover() From b13b7f8a504d9d95b42091c5f5faf0c71d0db3f9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 13:11:24 +0530 Subject: [PATCH 18/36] Fix Der Tagesspiegel --- recipes/tagesspiegel.recipe | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/recipes/tagesspiegel.recipe b/recipes/tagesspiegel.recipe index 92d88d56ae..71191065f1 100644 --- a/recipes/tagesspiegel.recipe +++ b/recipes/tagesspiegel.recipe @@ -34,7 +34,7 @@ class TagesspiegelRSS(BasicNewsRecipe): no_javascript = True remove_empty_feeds = True encoding = 'utf-8' - remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-date hcf-separate'}] + remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-colon'}, {'class':'hcf-date hcf-separate'}] def print_version(self, url): url = url.split('/') @@ -51,6 +51,7 @@ class TagesspiegelRSS(BasicNewsRecipe): return ''.join(div.findAll(text=True, recursive=False)).strip() if div is not None else None articles = {} + links = set() key = None ans = [] maincol = soup.find('div', attrs={'class':re.compile('hcf-main-col')}) @@ -59,7 +60,7 @@ class TagesspiegelRSS(BasicNewsRecipe): if div['class'] == 'hcf-header': try: - key = string.capwords(feed_title(div.em.a)) + key = string.capwords(feed_title(div.em)) articles[key] = [] ans.append(key) except: @@ -70,6 +71,12 @@ class TagesspiegelRSS(BasicNewsRecipe): if not a: continue url = 'http://www.tagesspiegel.de' + a['href'] + + # check for duplicates + if url in links: + continue + links.add(url) + title = self.tag_to_string(a, use_alt=True).strip() description = '' pubdate = strftime('%a, %d %b') From 2951a9c696762aeabf17ad563bc043a37bb1ceab Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 13:19:14 +0530 Subject: [PATCH 19/36] KF8 debug: Dump the guide --- src/calibre/ebooks/mobi/debug/index.py | 24 ++++++++++++++++++++++++ src/calibre/ebooks/mobi/debug/mobi8.py | 9 ++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/debug/index.py b/src/calibre/ebooks/mobi/debug/index.py index 94f252e231..6065d12e5e 100644 --- a/src/calibre/ebooks/mobi/debug/index.py +++ b/src/calibre/ebooks/mobi/debug/index.py @@ -21,6 +21,8 @@ Elem = namedtuple('Chunk', 'insert_pos toc_text file_number sequence_number start_pos ' 'length') +GuideRef = namedtuple('GuideRef', 'type title pos_fid') + def read_index(sections, idx, codec): table, cncx = OrderedDict(), CNCX([], codec) @@ -124,6 +126,28 @@ class SECTIndex(Index): ) ) +class GuideIndex(Index): + + def __init__(self, guideidx, records, codec): + super(GuideIndex, self).__init__(guideidx, records, codec) + self.records = [] + + if self.table is not None: + for i, text in enumerate(self.table.iterkeys()): + tag_map = self.table[text] + if set(tag_map.iterkeys()) not in ({1, 6}, {1, 2, 3}): + raise ValueError('Guide Index has unknown tags: %s'% + tag_map) + + title = self.cncx[tag_map[1][0]] + self.records.append(GuideRef( + text, + title, + tag_map[6] if 6 in tag_map else (tag_map[2], tag_map[3]) + ) + ) + + class NCXIndex(Index): def __init__(self, ncxidx, records, codec): diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py index 4306d565e2..a91213f889 100644 --- a/src/calibre/ebooks/mobi/debug/mobi8.py +++ b/src/calibre/ebooks/mobi/debug/mobi8.py @@ -12,7 +12,8 @@ from itertools import izip from calibre import CurrentDir from calibre.ebooks.mobi.debug.headers import TextRecord -from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex) +from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex, + GuideIndex) from calibre.ebooks.mobi.utils import read_font_record from calibre.ebooks.mobi.debug import format_bytes from calibre.ebooks.mobi.reader.headers import NULL_INDEX @@ -114,6 +115,8 @@ class MOBIFile(object): self.header.encoding) self.ncx_index = NCXIndex(self.header.primary_index_record, self.mf.records, self.header.encoding) + self.guide_index = GuideIndex(self.header.oth_idx, self.mf.records, + self.header.encoding) def build_files(self): text = self.raw_text @@ -211,6 +214,10 @@ def inspect_mobi(mobi_file, ddir): with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo: fo.write(str(f.ncx_index).encode('utf-8')) + with open(os.path.join(ddir, 'guide.record'), 'wb') as fo: + fo.write(str(f.guide_index).encode('utf-8')) + + for part in f.files: part.dump(os.path.join(ddir, 'files')) From 898cd84b726cc29832f7e541e328dc57dfaf3bf3 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 10:37:24 +0200 Subject: [PATCH 20/36] changing a dot --- recipes/berria.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index 406a27e36c..06f8344988 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2012, Alayn Gortazar ' +__copyright__ = '2012, Alayn Gortazar ' ''' www.berria.info ''' From 9f7a30d3780fb4ca86c80dda9938dcfeee51644b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 14:39:57 +0530 Subject: [PATCH 21/36] KF8: Improved handling of startoffset --- src/calibre/ebooks/mobi/reader/mobi8.py | 25 +++++++++++++------------ src/calibre/ebooks/mobi/writer8/exth.py | 11 ++++++++--- src/calibre/ebooks/mobi/writer8/main.py | 10 +++++----- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index bf068eb498..dcf2f998b2 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -109,7 +109,7 @@ class Mobi8Reader(object): table, cncx = read_index(self.kf8_sections, self.header.othidx, self.header.codec) Item = namedtuple('Item', - 'type title div_frag_num') + 'type title pos_fid') for i, ref_type in enumerate(table.iterkeys()): tag_map = table[ref_type] @@ -119,7 +119,7 @@ class Mobi8Reader(object): if 3 in tag_map.keys(): fileno = tag_map[3][0] if 6 in tag_map.keys(): - fileno = tag_map[6][0] + fileno = tag_map[6] self.guide.append(Item(ref_type.decode(self.header.codec), title, fileno)) @@ -287,23 +287,24 @@ class Mobi8Reader(object): def create_guide(self): guide = Guide() - for ref_type, ref_title, fileno in self.guide: + has_start = False + for ref_type, ref_title, pos_fid in self.guide: try: - elem = self.elems[fileno] - except IndexError: - # Happens for thumbnailstandard in Amazon book samples - continue - fi = self.get_file_info(elem.insert_pos) - idtext = self.get_id_tag(elem.insert_pos).decode(self.header.codec) - linktgt = fi.filename + if len(pos_fid) != 2: + continue + except TypeError: + continue # thumbnailstandard record, ignore it + linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid) if idtext: linktgt += b'#' + idtext - g = Guide.Reference('%s/%s'%(fi.type, linktgt), os.getcwdu()) + g = Guide.Reference(linktgt, os.getcwdu()) g.title, g.type = ref_title, ref_type + if g.title == 'start' or g.type == 'text': + has_start = True guide.append(g) so = self.header.exth.start_offset - if so not in {None, NULL_INDEX}: + if so not in {None, NULL_INDEX} and not has_start: fi = self.get_file_info(so) if fi.filename is not None: idtext = self.get_id_tag(so).decode(self.header.codec) diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py index 867e2c3112..b469c01d85 100644 --- a/src/calibre/ebooks/mobi/writer8/exth.py +++ b/src/calibre/ebooks/mobi/writer8/exth.py @@ -153,9 +153,14 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False, nrecs += 1 if start_offset is not None: - exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, - start_offset)) - nrecs += 1 + try: + len(start_offset) + except TypeError: + start_offset = [start_offset] + for so in start_offset: + exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, + so)) + nrecs += 1 if num_of_resources is not None: exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12, diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index f929af80d4..a2148546f8 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -368,11 +368,11 @@ class KF8Writer(object): if aid is None: continue pos, fid = self.aid_offset_map[aid] - if is_guide_ref_start(ref) and fid == 0: - # If fid != 0 then we cannot represent the start position as a - # single number in the EXTH header, so we do not write it to - # EXTH - self.start_offset = pos + if is_guide_ref_start(ref): + chunk = self.chunk_table[pos] + skel = [s for s in self.skel_table if s.file_number == + chunk.file_number][0] + self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid self.guide_table.append(GuideRef(ref.title or _('Unknown'), ref.type, (pos, fid))) From 33bdde0edf4f9084c8ffe2bb276cf6caed312ff4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 15:10:35 +0530 Subject: [PATCH 22/36] ... --- src/calibre/ebooks/mobi/writer8/index.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer8/index.py b/src/calibre/ebooks/mobi/writer8/index.py index a3d5c6763f..c37afb81ff 100644 --- a/src/calibre/ebooks/mobi/writer8/index.py +++ b/src/calibre/ebooks/mobi/writer8/index.py @@ -284,7 +284,10 @@ class GuideIndex(Index): class NCXIndex(Index): - control_byte_count = 2 + ''' The commented out parts have been seen in NCX indexes from MOBI 6 + periodicals. Since we have no MOBI 8 periodicals to reverse engineer, leave + it for now. ''' + # control_byte_count = 2 tag_types = tuple(map(TagMeta, ( ('offset', 1, 1, 1, 0), ('length', 2, 1, 2, 0), @@ -295,12 +298,12 @@ class NCXIndex(Index): ('last_child', 23, 1, 64, 0), ('pos_fid', 6, 2, 128, 0), EndTagTable, - ('image', 69, 1, 1, 0), - ('description', 70, 1, 2, 0), - ('author', 71, 1, 4, 0), - ('caption', 72, 1, 8, 0), - ('attribution', 73, 1, 16, 0), - EndTagTable + # ('image', 69, 1, 1, 0), + # ('description', 70, 1, 2, 0), + # ('author', 71, 1, 4, 0), + # ('caption', 72, 1, 8, 0), + # ('attribution', 73, 1, 16, 0), + # EndTagTable ))) def __init__(self, toc_table): From b7be75ad8cdf50c4d31d60192aa2c333b37c083e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 15:50:44 +0530 Subject: [PATCH 23/36] ... --- src/calibre/gui2/complete.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py index b5c1fc8b3e..fb1f39dfa3 100644 --- a/src/calibre/gui2/complete.py +++ b/src/calibre/gui2/complete.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' from PyQt4.Qt import (QLineEdit, QAbstractListModel, Qt, - QApplication, QCompleter, QMetaObject) + QApplication, QCompleter, pyqtSignal) from calibre.utils.icu import sort_key, lower from calibre.gui2 import NONE @@ -158,6 +158,8 @@ class MultiCompleteLineEdit(QLineEdit, LineEditECM): class MultiCompleteComboBox(EnComboBox): + clear_edit_text = pyqtSignal() + def __init__(self, *args): EnComboBox.__init__(self, *args) self.setLineEdit(MultiCompleteLineEdit(self)) @@ -169,6 +171,8 @@ class MultiCompleteComboBox(EnComboBox): self.dummy_model = CompleteModel(self) c.setModel(self.dummy_model) self.lineEdit()._completer.setWidget(self) + self.clear_edit_text.connect(self.clearEditText, + type=Qt.QueuedConnection) def update_items_cache(self, complete_items): self.lineEdit().update_items_cache(complete_items) @@ -191,8 +195,7 @@ class MultiCompleteComboBox(EnComboBox): what = unicode(what) le = self.lineEdit() if not what.strip(): - QMetaObject.invokeMethod(self, 'clearEditText', - Qt.QueuedConnection) + self.clear_edit_text.emit() else: self.setEditText(what) le.selectAll() From 75a066b36421a13898f2a8da7ea90488d6864436 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 16:21:00 +0530 Subject: [PATCH 24/36] Show cover size in a tooltip in the conversion dialog --- src/calibre/gui2/convert/metadata.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/calibre/gui2/convert/metadata.py b/src/calibre/gui2/convert/metadata.py index 80311502e8..68e90be9ef 100644 --- a/src/calibre/gui2/convert/metadata.py +++ b/src/calibre/gui2/convert/metadata.py @@ -95,9 +95,15 @@ class MetadataWidget(Widget, Ui_Form): if not pm.isNull(): self.cover.setPixmap(pm) self.cover_data = cover + self.set_cover_tooltip(pm) else: self.cover.setPixmap(QPixmap(I('default_cover.png'))) + self.cover.setToolTip(_('This book has no cover')) + def set_cover_tooltip(self, pm): + tt = _('Cover size: %(width)d x %(height)d pixels') % dict( + width=pm.width(), height=pm.height()) + self.cover.setToolTip(tt) def initialize_combos(self): self.initalize_authors() @@ -205,6 +211,7 @@ class MetadataWidget(Widget, Ui_Form): d.exec_() else: self.cover_path.setText(_file) + self.set_cover_tooltip(pix) self.cover.setPixmap(pix) self.cover_changed = True self.cpixmap = pix From 6b412476ab3874ef3240c63a7340761e1d7441d2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 16:27:03 +0530 Subject: [PATCH 25/36] Cover Browser: Wrap the title on space only, not in between words. Fixes #986516 (Cover Browser - font size, divided words) --- src/calibre/gui2/pictureflow/pictureflow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/pictureflow/pictureflow.cpp b/src/calibre/gui2/pictureflow/pictureflow.cpp index 28c29ee2c7..88fff1fd2c 100644 --- a/src/calibre/gui2/pictureflow/pictureflow.cpp +++ b/src/calibre/gui2/pictureflow/pictureflow.cpp @@ -99,7 +99,7 @@ typedef unsigned short QRgb565; #define PFREAL_ONE (1 << PFREAL_SHIFT) #define PFREAL_HALF (PFREAL_ONE >> 1) -#define TEXT_FLAGS (Qt::TextWordWrap|Qt::TextWrapAnywhere|Qt::TextHideMnemonic|Qt::AlignCenter) +#define TEXT_FLAGS (Qt::TextWordWrap|Qt::TextHideMnemonic|Qt::AlignCenter) inline PFreal fmul(PFreal a, PFreal b) { From fe1d9582886b704b380b63c972895b73a4fe1763 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 17:34:38 +0530 Subject: [PATCH 26/36] Fix #986958 (Tooltip not updating when paste cover) --- src/calibre/gui2/book_details.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index 4b12335fe3..cf5bfd14d3 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -334,6 +334,7 @@ class CoverView(QWidget): # {{{ self.pixmap = pmap self.do_layout() self.update() + self.update_tooltip() if not config['disable_animations']: self.animation.start() id_ = self.data.get('id', None) From 0c929941281525c45b5c7e1c0d6c37409d708773 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 18:53:31 +0530 Subject: [PATCH 27/36] Fix #986658 (Calibre crashes when updating data on Metadata) --- src/calibre/gui2/actions/edit_metadata.py | 6 +- src/calibre/gui2/proceed.py | 151 ++++++++++++++++++++++ src/calibre/gui2/ui.py | 2 + 3 files changed, 155 insertions(+), 4 deletions(-) create mode 100644 src/calibre/gui2/proceed.py diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index a58bae25fd..21cba758e8 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -115,14 +115,12 @@ class EditMetadataAction(InterfaceAction): ' "Show details" to see which books.')%num payload = (id_map, tdir, log_file, lm_map) - from calibre.gui2.dialogs.message_box import ProceedNotification - p = ProceedNotification(self.apply_downloaded_metadata, + self.gui.proceed_question(self.apply_downloaded_metadata, payload, log_file, _('Download log'), _('Download complete'), msg, det_msg=det_msg, show_copy_button=show_copy_button, cancel_callback=lambda x:self.cleanup_bulk_download(tdir), - parent=self.gui, log_is_file=True) - p.show() + log_is_file=True) def apply_downloaded_metadata(self, payload): good_ids, tdir, log_file, lm_map = payload diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py new file mode 100644 index 0000000000..433b365e35 --- /dev/null +++ b/src/calibre/gui2/proceed.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from collections import namedtuple + +from PyQt4.Qt import (QDialog, Qt, QLabel, QGridLayout, QPixmap, + QDialogButtonBox, QApplication, QSize, pyqtSignal, QIcon, + QPlainTextEdit) + +from calibre.constants import __version__ +from calibre.gui2.dialogs.message_box import ViewLog + +Question = namedtuple('Question', 'payload callback cancel_callback ' + 'title msg html_log log_viewer_title log_is_file det_msg ' + 'show_copy_button') + +class ProceedQuestion(QDialog): + + ask_question = pyqtSignal(object, object) + + def __init__(self, parent): + QDialog.__init__(self, parent) + self.setAttribute(Qt.WA_DeleteOnClose, False) + self.setWindowIcon(QIcon(I('dialog_question.png'))) + + self.questions = [] + + self._l = l = QGridLayout(self) + self.setLayout(l) + + self.icon_label = ic = QLabel(self) + ic.setPixmap(QPixmap(I('dialog_question.png'))) + self.msg_label = msg = QLabel('some random filler text') + msg.setWordWrap(True) + ic.setMaximumWidth(110) + ic.setMaximumHeight(100) + ic.setScaledContents(True) + ic.setStyleSheet('QLabel { margin-right: 10px }') + self.bb = QDialogButtonBox(QDialogButtonBox.Yes|QDialogButtonBox.No) + self.bb.accepted.connect(self.accept) + self.bb.rejected.connect(self.reject) + self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole) + self.log_button.setIcon(QIcon(I('debug.png'))) + self.log_button.clicked.connect(self.show_log) + self.copy_button = self.bb.addButton(_('&Copy to clipboard'), + self.bb.ActionRole) + self.copy_button.clicked.connect(self.copy_to_clipboard) + self.show_det_msg = _('Show &details') + self.hide_det_msg = _('Hide &details') + self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole) + self.det_msg_toggle.clicked.connect(self.toggle_det_msg) + self.det_msg_toggle.setToolTip( + _('Show detailed information about this error')) + self.det_msg = QPlainTextEdit(self) + self.det_msg.setReadOnly(True) + self.bb.button(self.bb.Yes).setDefault(True) + + l.addWidget(ic, 0, 0, 1, 1) + l.addWidget(msg, 0, 1, 1, 1) + l.addWidget(self.det_msg, 1, 0, 1, 2) + l.addWidget(self.bb, 2, 0, 1, 2) + + self.ask_question.connect(self.do_ask_question, + type=Qt.QueuedConnection) + + def copy_to_clipboard(self, *args): + QApplication.clipboard().setText( + 'calibre, version %s\n%s: %s\n\n%s' % + (__version__, unicode(self.windowTitle()), + unicode(self.msg_label.text()), + unicode(self.det_msg.toPlainText()))) + self.copy_button.setText(_('Copied')) + + def accept(self): + if self.questions: + payload, callback, cancel_callback = self.questions[0][:3] + self.questions = self.questions[1:] + self.ask_question.emit(callback, payload) + self.hide() + + def reject(self): + if self.questions: + payload, callback, cancel_callback = self.questions[0][:3] + self.questions = self.questions[1:] + self.ask_question.emit(cancel_callback, payload) + self.hide() + + def do_ask_question(self, callback, payload): + if callable(callback): + callback(payload) + self.show_question() + + def toggle_det_msg(self, *args): + vis = unicode(self.det_msg_toggle.text()) == self.hide_det_msg + self.det_msg_toggle.setText(self.show_det_msg if vis else + self.hide_det_msg) + self.det_msg.setVisible(not vis) + self.do_resize() + + def do_resize(self): + sz = self.sizeHint() + QSize(100, 0) + sz.setWidth(min(500, sz.width())) + sz.setHeight(min(500, sz.height())) + self.resize(sz) + + def show_question(self): + if self.isVisible(): return + if self.questions: + question = self.questions[0] + self.msg_label.setText(question.msg) + self.setWindowTitle(question.title) + self.log_button.setVisible(bool(question.html_log)) + self.copy_button.setVisible(bool(question.show_copy_button)) + self.det_msg.setPlainText(question.det_msg or '') + self.det_msg.setVisible(False) + self.det_msg_toggle.setVisible(bool(question.det_msg)) + self.det_msg_toggle.setText(self.show_det_msg) + self.bb.button(self.bb.Yes).setDefault(True) + self.do_resize() + self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason) + self.show() + + def __call__(self, callback, payload, html_log, log_viewer_title, title, + msg, det_msg='', show_copy_button=False, cancel_callback=None, + log_is_file=False): + question = Question(payload, callback, cancel_callback, title, msg, + html_log, log_viewer_title, log_is_file, det_msg, + show_copy_button) + self.questions.append(question) + self.show_question() + + def show_log(self): + if self.questions: + q = self.questions[0] + log = q.html_log + if q.log_is_file: + with open(log, 'rb') as f: + log = f.read().decode('utf-8') + self.log_viewer = ViewLog(q.log_viewer_title, log, + parent=self) + +if __name__ == '__main__': + app = QApplication([]) + ProceedQuestion(None).exec_() + diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index fa62dba4bc..84abda8f12 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -43,6 +43,7 @@ from calibre.gui2.tag_browser.ui import TagBrowserMixin from calibre.gui2.keyboard import Manager from calibre.gui2.auto_add import AutoAdder from calibre.library.sqlite import sqlite, DatabaseException +from calibre.gui2.proceed import ProceedQuestion class Listener(Thread): # {{{ @@ -109,6 +110,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ MainWindow.__init__(self, opts, parent=parent, disable_automatic_gc=True) self.proceed_requested.connect(self.do_proceed, type=Qt.QueuedConnection) + self.proceed_question = ProceedQuestion(self) self.keyboard = Manager(self) _gui = self self.opts = opts From 4374e16bc8f6b90e7fc3bc9c8e2478a354e6a1b4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 23:48:14 +0530 Subject: [PATCH 28/36] tighten format_field_extended --- src/calibre/ebooks/metadata/book/base.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index ce80486af8..63d8ffacf4 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -650,11 +650,7 @@ class Metadata(object): res = self.get(key, None) cmeta = self.get_user_metadata(key, make_copy=False) name = unicode(cmeta['name']) - if cmeta['datatype'] != 'composite' and (res is None or res == ''): - return (name, res, None, None) - orig_res = res - cmeta = self.get_user_metadata(key, make_copy=False) - if res is None or res == '': + if res in {None, ''}: return (name, res, None, None) orig_res = res datatype = cmeta['datatype'] From 5bac9086706e8f63076053d7ca0f4ece5e11da0d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Apr 2012 23:55:48 +0530 Subject: [PATCH 29/36] ... --- src/calibre/ebooks/mobi/writer8/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index a2148546f8..4a54a73ca4 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -377,6 +377,7 @@ class KF8Writer(object): _('Unknown'), ref.type, (pos, fid))) if self.guide_table: + self.guide_table.sort(key=lambda x:x.type) self.guide_records = GuideIndex(self.guide_table)() def create_kf8_book(oeb, opts, resources): From 9ca2f906ba247bc4c1aa8e890255384bf3ab0bad Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Apr 2012 00:25:18 +0530 Subject: [PATCH 30/36] ... --- src/calibre/ebooks/mobi/writer8/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index 4a54a73ca4..19d7e390a9 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -377,7 +377,7 @@ class KF8Writer(object): _('Unknown'), ref.type, (pos, fid))) if self.guide_table: - self.guide_table.sort(key=lambda x:x.type) + self.guide_table.sort(key=lambda x:x.type) # Needed by the Kindle self.guide_records = GuideIndex(self.guide_table)() def create_kf8_book(oeb, opts, resources): From 81a94b287b5fb7c8abbb0b68e0f2a3e5d32ad9c8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Apr 2012 06:50:12 +0530 Subject: [PATCH 31/36] oops, remove the mistaken tightening of gotmat_field_extended --- src/calibre/ebooks/metadata/book/base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 63d8ffacf4..ce80486af8 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -650,7 +650,11 @@ class Metadata(object): res = self.get(key, None) cmeta = self.get_user_metadata(key, make_copy=False) name = unicode(cmeta['name']) - if res in {None, ''}: + if cmeta['datatype'] != 'composite' and (res is None or res == ''): + return (name, res, None, None) + orig_res = res + cmeta = self.get_user_metadata(key, make_copy=False) + if res is None or res == '': return (name, res, None, None) orig_res = res datatype = cmeta['datatype'] From 6b9fc3d0abbe088724e44c88cefac52f3965e799 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Apr 2012 09:03:45 +0530 Subject: [PATCH 32/36] ... --- src/calibre/ebooks/mobi/debug/headers.py | 4 ++-- src/calibre/manual/conversion.rst | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py index 1d2c5b78b9..1799d403f1 100644 --- a/src/calibre/ebooks/mobi/debug/headers.py +++ b/src/calibre/ebooks/mobi/debug/headers.py @@ -327,7 +327,7 @@ class MOBIHeader(object): # {{{ self.primary_index_record, = struct.unpack(b'>I', self.raw[244:248]) - if self.file_version >= 8: + if self.length >= 248: (self.sect_idx, self.skel_idx, self.datp_idx, self.oth_idx ) = struct.unpack_from(b'>4L', self.raw, 248) self.unknown9 = self.raw[264:self.length] @@ -414,7 +414,7 @@ class MOBIHeader(object): # {{{ self.has_indexing_bytes, self.has_uncrossable_breaks )) ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX, self.primary_index_record)) - if self.file_version >= 8: + if self.length >= 248: i('Sections Index', self.sect_idx) i('SKEL Index', self.skel_idx) i('DATP Index', self.datp_idx) diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index c37c1eafdb..f6fe04dd90 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -573,6 +573,18 @@ There is a Word macro package that can automate the conversion of Word documents generating the Table of Contents much simpler. It is called BookCreator and is available for free at `mobileread `_. +An easy way to generate a Table of Contents when converting a Word document is: + + 1. Mark your Chapters and sub-Chapters in the doc file with one of the MS built-in styles called 'Heading 1', 'Heading 2', ..., 'Heading 6'. 'Heading 1' equates to the HTML tag

, 'Heading 2' to

etc + + 2. Save the doc as Webpage-filtered (rather than Webpage) and import the html file into |app| + + 3. When you convert in |app| you use what you did in step 1 to set the box called 'Detect chapters at' on the Convert - Structure Detection page. For example: + + * If you mark Chapters with style 'Heading 2' then set the 'Detect chapters at' box to //h:h2 This will give you a proper external metadata TOC in the converted epub. + * A slightly more complex example...if your book has Sections and Chapters and you want a 2-level nested metadata TOC. Mark the doc Sections with style 'Heading 2' and the Chapters with style 'Heading 3'. When you convert set the 'Detect chapters at' box to //h:h2|//h:h3. On the Convert - TOC page set the 'Level 1 TOC' box to //h:h2 and the 'Level 2 TOC' box to //h:h3. + + Convert TXT documents ~~~~~~~~~~~~~~~~~~~~~~ From 6d3bd67c9336fe4517c395a3db7b92e1f84b860c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Apr 2012 09:33:37 +0530 Subject: [PATCH 33/36] Nicer dump headers for joint KF8 files --- src/calibre/ebooks/mobi/debug/headers.py | 128 ++++++++++++----------- 1 file changed, 69 insertions(+), 59 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py index 1799d403f1..77a31606e2 100644 --- a/src/calibre/ebooks/mobi/debug/headers.py +++ b/src/calibre/ebooks/mobi/debug/headers.py @@ -337,11 +337,12 @@ class MOBIHeader(object): # {{{ # The following are all relative to the position of the header record # make them absolute for ease of debugging - for x in ('sect_idx', 'skel_idx', 'datp_idx', 'oth_idx', + self.relative_records = {'sect_idx', 'skel_idx', 'datp_idx', 'oth_idx', 'meta_orth_indx', 'huffman_record_offset', 'first_non_book_record', 'datp_record_offset', 'fcis_number', 'flis_number', 'primary_index_record', 'fdst_idx', - 'first_image_index'): + 'first_image_index'} + for x in self.relative_records: if hasattr(self, x) and getattr(self, x) != NULL_INDEX: setattr(self, x, self.header_offset+getattr(self, x)) @@ -355,70 +356,79 @@ class MOBIHeader(object): # {{{ def __str__(self): ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20] + a = ans.append - i = lambda d, x : a('%s (null value: %d): %d'%(d, NULL_INDEX, x)) - ans.append('Compression: %s'%self.compression) - ans.append('Unused: %r'%self.unused) - ans.append('Number of text records: %d'%self.number_of_text_records) - ans.append('Text record size: %d'%self.text_record_size) - ans.append('Encryption: %s'%self.encryption_type) - ans.append('Unknown: %r'%self.unknown) - ans.append('Identifier: %r'%self.identifier) - ans.append('Header length: %d'% self.length) - ans.append('Type: %s'%self.type) - ans.append('Encoding: %s'%self.encoding) - ans.append('UID: %r'%self.uid) - ans.append('File version: %d'%self.file_version) - i('Meta Orth Index (Sections index in KF8)', self.meta_orth_indx) - i('Meta Infl Index', self.meta_infl_indx) - ans.append('Secondary index record: %d (null val: %d)'%( - self.secondary_index_record, NULL_INDEX)) - ans.append('Reserved: %r'%self.reserved) - ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX, - self.first_non_book_record)) - ans.append('Full name offset: %d'%self.fullname_offset) - ans.append('Full name length: %d bytes'%self.fullname_length) - ans.append('Langcode: %r'%self.locale_raw) - ans.append('Language: %s'%self.language) - ans.append('Sub language: %s'%self.sublanguage) - ans.append('Input language: %r'%self.input_language) - ans.append('Output language: %r'%self.output_langauage) - ans.append('Min version: %d'%self.min_version) - ans.append('First Image index: %d'%self.first_image_index) - ans.append('Huffman record offset: %d'%self.huffman_record_offset) - ans.append('Huffman record count: %d'%self.huffman_record_count) - ans.append('DATP record offset: %r'%self.datp_record_offset) - ans.append('DATP record count: %r'%self.datp_record_count) - ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth)) + + def i(d, x): + x = 'NULL' if x == NULL_INDEX else x + a('%s: %s'%(d, x)) + + def r(d, attr): + x = getattr(self, attr) + if attr in self.relative_records and x != NULL_INDEX: + a('%s: Absolute: %d Relative: %d'%(d, x, x-self.header_offset)) + else: + i(d, x) + + a('Compression: %s'%self.compression) + a('Unused: %r'%self.unused) + a('Number of text records: %d'%self.number_of_text_records) + a('Text record size: %d'%self.text_record_size) + a('Encryption: %s'%self.encryption_type) + a('Unknown: %r'%self.unknown) + a('Identifier: %r'%self.identifier) + a('Header length: %d'% self.length) + a('Type: %s'%self.type) + a('Encoding: %s'%self.encoding) + a('UID: %r'%self.uid) + a('File version: %d'%self.file_version) + r('Meta Orth Index', 'meta_orth_indx') + r('Meta Infl Index', 'meta_infl_indx') + r('Secondary index record', 'secondary_index_record') + a('Reserved: %r'%self.reserved) + r('First non-book record', 'first_non_book_record') + a('Full name offset: %d'%self.fullname_offset) + a('Full name length: %d bytes'%self.fullname_length) + a('Langcode: %r'%self.locale_raw) + a('Language: %s'%self.language) + a('Sub language: %s'%self.sublanguage) + a('Input language: %r'%self.input_language) + a('Output language: %r'%self.output_langauage) + a('Min version: %d'%self.min_version) + r('First Image index', 'first_image_index') + r('Huffman record offset', 'huffman_record_offset') + a('Huffman record count: %d'%self.huffman_record_count) + r('DATP record offset', 'datp_record_offset') + a('DATP record count: %r'%self.datp_record_count) + a('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth)) if self.has_drm_data: - ans.append('Unknown3: %r'%self.unknown3) - ans.append('DRM Offset: %s'%self.drm_offset) - ans.append('DRM Count: %s'%self.drm_count) - ans.append('DRM Size: %s'%self.drm_size) - ans.append('DRM Flags: %r'%self.drm_flags) + a('Unknown3: %r'%self.unknown3) + r('DRM Offset', 'drm_offset') + a('DRM Count: %s'%self.drm_count) + a('DRM Size: %s'%self.drm_size) + a('DRM Flags: %r'%self.drm_flags) if self.has_extra_data_flags: - ans.append('Unknown4: %r'%self.unknown4) - ans.append('FDST Index: %d'% self.fdst_idx) - ans.append('FDST Count: %d'% self.fdst_count) - ans.append('FCIS number: %d'% self.fcis_number) - ans.append('FCIS count: %d'% self.fcis_count) - ans.append('FLIS number: %d'% self.flis_number) - ans.append('FLIS count: %d'% self.flis_count) - ans.append('Unknown6: %r'% self.unknown6) - ans.append('SRCS record index: %d'%self.srcs_record_index) - ans.append('Number of SRCS records?: %d'%self.num_srcs_records) - ans.append('Unknown7: %r'%self.unknown7) - ans.append(('Extra data flags: %s (has multibyte: %s) ' + a('Unknown4: %r'%self.unknown4) + r('FDST Index', 'fdst_idx') + a('FDST Count: %d'% self.fdst_count) + r('FCIS number', 'fcis_number') + a('FCIS count: %d'% self.fcis_count) + r('FLIS number', 'flis_number') + a('FLIS count: %d'% self.flis_count) + a('Unknown6: %r'% self.unknown6) + r('SRCS record index', 'srcs_record_index') + a('Number of SRCS records?: %d'%self.num_srcs_records) + a('Unknown7: %r'%self.unknown7) + a(('Extra data flags: %s (has multibyte: %s) ' '(has indexing: %s) (has uncrossable breaks: %s)')%( bin(self.extra_data_flags), self.has_multibytes, self.has_indexing_bytes, self.has_uncrossable_breaks )) - ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX, - self.primary_index_record)) + r('NCX index', 'primary_index_record') if self.length >= 248: - i('Sections Index', self.sect_idx) - i('SKEL Index', self.skel_idx) - i('DATP Index', self.datp_idx) - i('Other Index', self.oth_idx) + r('Sections Index', 'sect_idx') + r('SKEL Index', 'skel_idx') + r('DATP Index', 'datp_idx') + r('Other Index', 'oth_idx') if self.unknown9: a('Unknown9: %r'%self.unknown9) From 2f6a705e74620ec2f32aeedac0ac5876f498a334 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Apr 2012 12:27:39 +0530 Subject: [PATCH 34/36] ... --- src/calibre/devices/android/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 07be4e42c1..d162b6692a 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -29,7 +29,7 @@ class ANDROID(USBMS): 0xc86 : [0x100, 0x0227, 0x0226, 0x222], 0xc87 : [0x0100, 0x0227, 0x0226], 0xc8d : [0x100, 0x0227, 0x0226, 0x222], - 0xc91 : [0x0100, 0x0227, 0x0226], + 0xc91 : [0x0100, 0x0227, 0x0226, 0x222], 0xc92 : [0x100, 0x0227, 0x0226, 0x222], 0xc97 : [0x100, 0x0227, 0x0226, 0x222], 0xc99 : [0x100, 0x0227, 0x0226, 0x222], From ce4655ddd5fb692bb7be094743695103f4f5e2aa Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Apr 2012 18:00:19 +0530 Subject: [PATCH 35/36] KF8 Output: Implement joint mobi files with both MOBI 6 and KF8 versions in the same file --- .../ebooks/conversion/plugins/mobi_output.py | 8 ++- src/calibre/ebooks/mobi/debug/mobi8.py | 2 + src/calibre/ebooks/mobi/writer2/main.py | 68 ++++++++++++++++++- src/calibre/ebooks/mobi/writer8/exth.py | 15 ++-- src/calibre/ebooks/mobi/writer8/main.py | 4 +- src/calibre/ebooks/mobi/writer8/mobi.py | 51 ++++++++------ 6 files changed, 114 insertions(+), 34 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index 98a837e1a3..b73d6341f9 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -174,7 +174,8 @@ class MOBIOutput(OutputFormatPlugin): add_fonts=create_kf8) self.check_for_periodical() - kf8 = self.create_kf8(resources) if create_kf8 else None + kf8 = self.create_kf8(resources, for_joint=mobi_type=='both' + ) if create_kf8 else None if mobi_type == 'new': kf8.write(output_path) self.extract_mobi(output_path, opts) @@ -183,9 +184,10 @@ class MOBIOutput(OutputFormatPlugin): self.log('Creating MOBI 6 output') self.write_mobi(input_plugin, output_path, kf8, resources) - def create_kf8(self, resources): + def create_kf8(self, resources, for_joint=False): from calibre.ebooks.mobi.writer8.main import create_kf8_book - return create_kf8_book(self.oeb, self.opts, resources) + return create_kf8_book(self.oeb, self.opts, resources, + for_joint=for_joint) def write_mobi(self, input_plugin, output_path, kf8, resources): from calibre.ebooks.mobi.mobiml import MobiMLizer diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py index a91213f889..21ed11fc51 100644 --- a/src/calibre/ebooks/mobi/debug/mobi8.py +++ b/src/calibre/ebooks/mobi/debug/mobi8.py @@ -139,6 +139,8 @@ class MOBIFile(object): self.files.append(File(skel, skeleton, ftext, first_aid, sections)) def dump_flows(self, ddir): + if self.fdst is None: + raise ValueError('This MOBI file has no FDST record') for i, x in enumerate(self.fdst.sections): start, end = x raw = self.raw_text[start:end] diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index a8fc37ff45..f064fd2625 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -23,6 +23,7 @@ from calibre.ebooks.mobi.writer2.indexer import Indexer # Disabled as I dont care about uncrossable breaks WRITE_UNCROSSABLE_BREAKS = False +NULL_INDEX = 0xffffffff class MobiWriter(object): @@ -30,6 +31,7 @@ class MobiWriter(object): self.opts = opts self.resources = resources self.kf8 = kf8 + self.for_joint = kf8 is not None self.write_page_breaks_after_item = write_page_breaks_after_item self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC self.prefer_author_sort = opts.prefer_author_sort @@ -61,7 +63,7 @@ class MobiWriter(object): self.stream = stream self.records = [None] self.generate_content() - self.generate_record0() + self.generate_joint_record0() if self.for_joint else self.generate_record0() self.write_header() self.write_content() @@ -200,8 +202,6 @@ class MobiWriter(object): first_image_record = None if self.resources: used_images = self.serializer.used_images - if self.kf8 is not None: - used_images |= self.kf8.used_images first_image_record = len(self.records) self.resources.serialize(self.records, used_images) last_content_record = len(self.records) - 1 @@ -365,6 +365,68 @@ class MobiWriter(object): self.records[0] = align_block(record0) # }}} + def generate_joint_record0(self): # {{{ + from calibre.ebooks.mobi.writer8.mobi import (MOBIHeader, + HEADER_FIELDS) + from calibre.ebooks.mobi.writer8.exth import build_exth + + # Insert resource records + first_image_record = None + old = len(self.records) + if self.resources: + used_images = self.serializer.used_images | self.kf8.used_images + first_image_record = len(self.records) + self.resources.serialize(self.records, used_images) + resource_record_count = len(self.records) - old + + # Insert KF8 records + self.records.append(b'BOUNDARY') + kf8_header_index = len(self.records) + self.kf8.start_offset = (self.serializer.start_offset, + self.kf8.start_offset) + self.records.append(self.kf8.record0) + self.records.extend(self.kf8.records[1:]) + + first_image_record if first_image_record else len(self.records) + + header_fields = {k:getattr(self.kf8, k) for k in HEADER_FIELDS} + + # Now change the header fields that need to be different in the MOBI 6 + # header + header_fields['first_resource_record'] = first_image_record + header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this + header_fields['fdst_record'] = NULL_INDEX + header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1 + header_fields['extra_data_flags'] = 0b11 + + for k, v in {'last_text_record':'last_text_record_idx', + 'first_non_text_record':'first_non_text_record_idx', + 'ncx_index':'primary_index_record_idx', + }.iteritems(): + header_fields[k] = getattr(self, v) + + for x in ('skel', 'chunk', 'guide'): + header_fields[x+'_index'] = NULL_INDEX + + # Create the MOBI 6 EXTH + opts = self.opts + kuc = 0 if resource_record_count > 0 else None + + header_fields['exth'] = build_exth(self.oeb.metadata, + prefer_author_sort=opts.prefer_author_sort, + is_periodical=opts.mobi_periodical, + share_not_sync=opts.share_not_sync, + cover_offset=self.cover_offset, + thumbnail_offset=self.thumbnail_offset, + num_of_resources=resource_record_count, + kf8_unknown_count=kuc, be_kindlegen2=True, + kf8_header_index=kf8_header_index, + start_offset=self.serializer.start_offset, + mobi_doctype=2) + self.records[0] = MOBIHeader(file_version=6)(**header_fields) + + # }}} + def write_header(self): # PalmDB header {{{ ''' Write the PalmDB header diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py index b469c01d85..4c0c769668 100644 --- a/src/calibre/ebooks/mobi/writer8/exth.py +++ b/src/calibre/ebooks/mobi/writer8/exth.py @@ -27,6 +27,7 @@ EXTH_CODES = { 'source': 112, 'versionnumber': 114, 'startreading': 116, + 'kf8_header_index': 121, 'num_of_resources': 125, 'kf8_unknown_count': 131, 'coveroffset': 201, @@ -41,7 +42,7 @@ COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') def build_exth(metadata, prefer_author_sort=False, is_periodical=False, share_not_sync=True, cover_offset=None, thumbnail_offset=None, start_offset=None, mobi_doctype=2, num_of_resources=None, - kf8_unknown_count=0, be_kindlegen2=False): + kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None): exth = BytesIO() nrecs = 0 @@ -158,9 +159,15 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False, except TypeError: start_offset = [start_offset] for so in start_offset: - exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, - so)) - nrecs += 1 + if so is not None: + exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, + so)) + nrecs += 1 + + if kf8_header_index is not None: + exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12, + kf8_header_index)) + nrecs += 1 if num_of_resources is not None: exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12, diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index 19d7e390a9..e35ab5e437 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -380,7 +380,7 @@ class KF8Writer(object): self.guide_table.sort(key=lambda x:x.type) # Needed by the Kindle self.guide_records = GuideIndex(self.guide_table)() -def create_kf8_book(oeb, opts, resources): +def create_kf8_book(oeb, opts, resources, for_joint=False): writer = KF8Writer(oeb, opts, resources) - return KF8Book(writer) + return KF8Book(writer, for_joint=for_joint) diff --git a/src/calibre/ebooks/mobi/writer8/mobi.py b/src/calibre/ebooks/mobi/writer8/mobi.py index 1bb83ad4b6..ff096f350b 100644 --- a/src/calibre/ebooks/mobi/writer8/mobi.py +++ b/src/calibre/ebooks/mobi/writer8/mobi.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import time +import time, random from struct import pack from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text @@ -25,8 +25,6 @@ class MOBIHeader(Header): # {{{ the file. ''' - FILE_VERSION = 8 - DEFINITION = ''' # 0: Compression compression = DYN @@ -63,7 +61,7 @@ class MOBIHeader(Header): # {{{ encoding = 65001 # 32: UID - uid = random.randint(0, 0xffffffff) + uid = DYN # 36: File version file_version = {file_version} @@ -154,7 +152,7 @@ class MOBIHeader(Header): # {{{ # 0b1 - extra multibyte bytes after text records # 0b10 - TBS indexing data (only used in MOBI 6) # 0b100 - uncrossable breaks only used in MOBI 6 - extra_data_flags = 1 + extra_data_flags = DYN # 244: KF8 Indices ncx_index = DYN @@ -171,13 +169,18 @@ class MOBIHeader(Header): # {{{ # Padding to allow amazon's DTP service to add data padding = zeroes(8192) - '''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION) + ''' SHORT_FIELDS = {'compression', 'last_text_record', 'record_size', 'encryption_type', 'unused2'} ALIGN = True POSITIONS = {'title_offset':'full_title'} + def __init__(self, file_version=8): + self.DEFINITION = self.DEFINITION.format(file_version=file_version, + record_size=RECORD_SIZE) + super(MOBIHeader, self).__init__() + def format_value(self, name, val): if name == 'compression': val = PALMDOC if val else UNCOMPRESSED @@ -185,14 +188,20 @@ class MOBIHeader(Header): # {{{ # }}} -# Fields that need to be set in the MOBI Header are +HEADER_FIELDS = {'compression', 'text_length', 'last_text_record', 'book_type', + 'first_non_text_record', 'title_length', 'language_code', + 'first_resource_record', 'exth_flags', 'fdst_record', + 'fdst_count', 'ncx_index', 'chunk_index', 'skel_index', + 'guide_index', 'exth', 'full_title', 'extra_data_flags', + 'uid'} class KF8Book(object): - def __init__(self, writer): - self.build_records(writer) + def __init__(self, writer, for_joint=False): + self.build_records(writer, for_joint) + self.used_images = writer.used_images - def build_records(self, writer): + def build_records(self, writer, for_joint): metadata = writer.oeb.metadata # The text records for x in ('last_text_record_idx', 'first_non_text_record_idx'): @@ -222,8 +231,10 @@ class KF8Book(object): self.first_resource_record = NULL_INDEX if resources.records: self.first_resource_record = len(self.records) - self.records.extend(resources.records) - self.num_of_resources = len(resources.records) + before = len(self.records) + if not for_joint: + resources.serialize(self.records, writer.used_images) + self.num_of_resources = len(self.records) - before # FDST self.fdst_count = writer.fdst_count @@ -233,12 +244,13 @@ class KF8Book(object): # EOF self.records.append(b'\xe9\x8e\r\n') # EOF record - # Miscellaneous header fields self.compression = writer.compress self.book_type = 0x101 if writer.opts.mobi_periodical else 2 self.full_title = utf8_text(unicode(metadata.title[0])) self.title_length = len(self.full_title) + self.extra_data_flags = 0b1 + self.uid = random.randint(0, 0xffffffff) self.language_code = iana2mobi(str(metadata.language[0])) self.exth_flags = 0b1010000 @@ -248,14 +260,14 @@ class KF8Book(object): self.opts = writer.opts self.start_offset = writer.start_offset self.metadata = metadata + self.kuc = 0 if len(resources.records) > 0 else None @property def record0(self): ''' We generate the EXTH header and record0 dynamically, to allow other - code to customize various values after build_record() has been + code to customize various values after build_records() has been called''' opts = self.opts - kuc = 0 if self.num_of_resources > 0 else None self.exth = build_exth(self.metadata, prefer_author_sort=opts.prefer_author_sort, is_periodical=opts.mobi_periodical, @@ -263,15 +275,10 @@ class KF8Book(object): cover_offset=self.cover_offset, thumbnail_offset=self.thumbnail_offset, num_of_resources=self.num_of_resources, - kf8_unknown_count=kuc, be_kindlegen2=True, + kf8_unknown_count=self.kuc, be_kindlegen2=True, start_offset=self.start_offset, mobi_doctype=self.book_type) - kwargs = {field:getattr(self, field) for field in - ('compression', 'text_length', 'last_text_record', 'book_type', - 'first_non_text_record', 'title_length', 'language_code', - 'first_resource_record', 'exth_flags', 'fdst_record', - 'fdst_count', 'ncx_index', 'chunk_index', 'skel_index', - 'guide_index', 'exth', 'full_title')} + kwargs = {field:getattr(self, field) for field in HEADER_FIELDS} return MOBIHeader()(**kwargs) def write(self, outpath): From e9296b97637a40ab2e84253682c7a4a8a1961011 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Apr 2012 23:26:53 +0530 Subject: [PATCH 36/36] ... --- src/calibre/gui2/dialogs/message_box.py | 7 +++++++ src/calibre/gui2/proceed.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/calibre/gui2/dialogs/message_box.py b/src/calibre/gui2/dialogs/message_box.py index 64c8bf75ba..e15cd055b9 100644 --- a/src/calibre/gui2/dialogs/message_box.py +++ b/src/calibre/gui2/dialogs/message_box.py @@ -158,6 +158,13 @@ _proceed_memory = [] class ProceedNotification(MessageBox): # {{{ + ''' + WARNING: This class is deprecated. DO not use it as some users ahve + reported crashes when closing the dialog box generated by this class. + Instead use: gui.proceed_question(...) The arguments are the same as for + this class. + ''' + def __init__(self, callback, payload, html_log, log_viewer_title, title, msg, det_msg='', show_copy_button=False, parent=None, cancel_callback=None, log_is_file=False): diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py index 433b365e35..422e1c0e0f 100644 --- a/src/calibre/gui2/proceed.py +++ b/src/calibre/gui2/proceed.py @@ -129,6 +129,25 @@ class ProceedQuestion(QDialog): def __call__(self, callback, payload, html_log, log_viewer_title, title, msg, det_msg='', show_copy_button=False, cancel_callback=None, log_is_file=False): + ''' + A non modal popup that notifies the user that a background task has + been completed. This class guarantees that onlya single popup is + visible at any one time. Other requests are queued and displayed after + the user dismisses the current popup. + + :param callback: A callable that is called with payload if the user + asks to proceed. Note that this is always called in the GUI thread. + :param cancel_callback: A callable that is called with the payload if + the users asks not to proceed. + :param payload: Arbitrary object, passed to callback + :param html_log: An HTML or plain text log + :param log_viewer_title: The title for the log viewer window + :param title: The title for this popup + :param msg: The msg to display + :param det_msg: Detailed message + :param log_is_file: If True the html_log parameter is interpreted as + the path to a file on disk containing the log encoded with utf-8 + ''' question = Question(payload, callback, cancel_callback, title, msg, html_log, log_viewer_title, log_is_file, det_msg, show_copy_button)