From 6e1fc23c47ebd487bd926049705cc82e8e0e4a46 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jan 2010 10:58:00 -0700 Subject: [PATCH 01/16] Improved recipe for Wired Magazine --- resources/recipes/wired.recipe | 123 +++++++++++++++++++++++-------- src/calibre/ebooks/pdf/reflow.py | 24 ++++++ 2 files changed, 116 insertions(+), 31 deletions(-) diff --git a/resources/recipes/wired.recipe b/resources/recipes/wired.recipe index fcc2494850..e7395a9ada 100644 --- a/resources/recipes/wired.recipe +++ b/resources/recipes/wired.recipe @@ -1,44 +1,105 @@ -#!/usr/bin/env python + __license__ = 'GPL v3' -__docformat__ = 'restructuredtext en' - +__copyright__ = '2010, Darko Miletic ' +''' +www.wired.com +''' +import re +from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Wired(BasicNewsRecipe): + title = 'Wired Magazine' + __author__ = 'Darko Miletic' + description = 'Gaming news' + publisher = 'Conde Nast Digital' + category = 'news, games, IT, gadgets' + oldest_article = 32 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + language = 'en' + extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} ' + index = 'http://www.wired.com/magazine/' - title = 'Wired.com' - __author__ = 'Kovid Goyal' - description = 'Technology news' - timefmt = ' [%Y%b%d %H%M]' - language = 'en' + preprocess_regexps = [(re.compile(r'', re.DOTALL|re.IGNORECASE),lambda match: '')] + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } - no_stylesheets = True + keep_only_tags = [dict(name='div', attrs={'class':'post'})] + remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'}) + remove_tags = [ + dict(name=['object','embed','iframe','link']) + ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']}) + ] - remove_tags_before = dict(name='div', id='content') - remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar', - 'footer', 'advertisement', 'blog_subscription_unit', - 'brightcove_component']), - {'class':'entryActions'}, - dict(name=['noscript', 'script'])] - feeds = [ - ('Top News', 'http://feeds.wired.com/wired/index'), - ('Culture', 'http://feeds.wired.com/wired/culture'), - ('Software', 'http://feeds.wired.com/wired/software'), - ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'), - ('Gadgets', 'http://feeds.wired.com/wired/gadgets'), - ('Cars', 'http://feeds.wired.com/wired/cars'), - ('Entertainment', 'http://feeds.wired.com/wired/entertainment'), - ('Gaming', 'http://feeds.wired.com/wired/gaming'), - ('Science', 'http://feeds.wired.com/wired/science'), - ('Med Tech', 'http://feeds.wired.com/wired/medtech'), - ('Politics', 'http://feeds.wired.com/wired/politics'), - ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'), - ('Commentary', 'http://feeds.wired.com/wired/commentary'), - ] + #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )] + + def parse_index(self): + totalfeeds = [] + + soup = self.index_to_soup(self.index) + features = soup.find('div',attrs={'id':'my-glider'}) + if features: + farticles = [] + for item in features.findAll('div',attrs={'class':'section'}): + divurl = item.find('div',attrs={'class':'feature-header'}) + divdesc = item.find('div',attrs={'class':'feature-text'}) + url = 'http://www.wired.com' + divurl.a['href'] + title = self.tag_to_string(divurl.a) + description = self.tag_to_string(divdesc) + date = strftime(self.timefmt) + farticles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + totalfeeds.append(('Featured Articles', farticles)) + #department feeds + departments = ['rants','start','test','play','found'] + dept = soup.find('div',attrs={'id':'magazine-departments'}) + if dept: + for ditem in departments: + darticles = [] + department = dept.find('div',attrs={'id':'department-'+ditem}) + if department: + for item in department.findAll('div'): + description = '' + feed_link = item.find('a') + if feed_link and feed_link.has_key('href'): + url = feed_link['href'] + title = self.tag_to_string(feed_link) + date = strftime(self.timefmt) + darticles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + totalfeeds.append((ditem.capitalize(), darticles)) + return totalfeeds + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.index) + cover_item = soup.find('div',attrs={'class':'spread-image'}) + if cover_item: + cover_url = 'http://www.wired.com' + cover_item.a.img['src'] + return cover_url def print_version(self, url): - return url.replace('http://www.wired.com/', 'http://www.wired.com/print/') + return url.rstrip('/') + '/all/1' + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 92a0ceebe1..3fce8a41f8 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -28,6 +28,7 @@ class Column(object): self.left = self.right = self.top = self.bottom = 0 self.width = self.height = 0 self.elements = [] + self.average_line_separation = 0 def add(self, elem): if elem in self.elements: return @@ -49,8 +50,17 @@ class Column(object): return elem.left > self.left - self.HFUZZ*self.width and \ elem.right < self.right + self.HFUZZ*self.width + def collect_stats(self): + if len(self.elements) > 1: + gaps = [self.elements[i+1].top - self.elements[i].bottom for i in + range(len(0, len(self.elements)-1))] + self.average_line_separation = sum(gaps)/len(gaps) + class Element(object): + def __init__(self): + self.starts_paragraph = False + def __eq__(self, other): return self.id == other.id @@ -60,6 +70,7 @@ class Element(object): class Image(Element): def __init__(self, img, opts, log, idc): + Element.__init__(self) self.opts, self.log = opts, log self.id = idc.next() self.top, self.left, self.width, self.height, self.iwidth, self.iheight = \ @@ -71,6 +82,7 @@ class Image(Element): class Text(Element): def __init__(self, text, font_map, opts, log, idc): + Element.__init__(self) self.id = idc.next() self.opts, self.log = opts, log self.font_map = font_map @@ -174,6 +186,12 @@ class Region(object): def is_empty(self): return len(self.elements) == 0 + def collect_stats(self): + for column in self.column: + column.collect_stats() + self.average_line_separation = sum([x.average_line_separation for x in + self.columns])/float(len(self.columns)) + class Page(object): @@ -298,6 +316,11 @@ class Page(object): x_interval.intersection(h_interval).width <= 0: yield y + def second_pass(self): + 'Locate paragraph boundaries in each column' + for region in self.regions: + region.collect_stats() + class PDFDocument(object): @@ -327,6 +350,7 @@ class PDFDocument(object): for page in self.pages: page.document_font_stats = self.font_size_stats page.first_pass() + page.second_pass() def collect_font_statistics(self): self.font_size_stats = {} From b0edb9896c00b29b0a7b8fc11637208f3e050625 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 23 Jan 2010 13:14:41 -0500 Subject: [PATCH 02/16] Fix bug #4646: Support eReader PDB with 116 Byte header. --- src/calibre/ebooks/pdb/ereader/reader.py | 2 +- src/calibre/ebooks/pdb/ereader/reader202.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py index ad1df98793..71ba3efdc6 100644 --- a/src/calibre/ebooks/pdb/ereader/reader.py +++ b/src/calibre/ebooks/pdb/ereader/reader.py @@ -20,7 +20,7 @@ class Reader(FormatReader): if record0_size == 132: self.reader = Reader132(header, stream, log, options) - elif record0_size == 202: + elif record0_size in (116, 202): self.reader = Reader202(header, stream, log, options) else: raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size) diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py index a674c5bf60..590301b433 100644 --- a/src/calibre/ebooks/pdb/ereader/reader202.py +++ b/src/calibre/ebooks/pdb/ereader/reader202.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- ''' -Read content from ereader pdb file with a 202 byte header created by Makebook. +Read content from ereader pdb file with a 116 and 202 byte header created by Makebook. ''' __license__ = 'GPL v3' __copyright__ = '2009, John Schember <john@nachtimwald.com>' @@ -44,7 +44,7 @@ class Reader202(FormatReader): self.header_record = HeaderRecord(self.section_data(0)) - if self.header_record.version != 4: + if self.header_record.version not in (2, 4): raise EreaderError('Unknown book version %i.' % self.header_record.version) from calibre.ebooks.metadata.pdb import get_metadata From feb58a884650e0088cb51293cdf64180af09378b Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 23 Jan 2010 11:20:34 -0700 Subject: [PATCH 03/16] ... --- src/calibre/ebooks/pdf/reflow.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 3fce8a41f8..73178f5621 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -192,6 +192,16 @@ class Region(object): self.average_line_separation = sum([x.average_line_separation for x in self.columns])/float(len(self.columns)) + def __iter__(self): + for x in self.columns: + yield x + + def detect_paragraphs(self): + first = True + for col in self: + col.detect_paragraphs(self.average_line_separation, first) + first = False + class Page(object): @@ -203,6 +213,8 @@ class Page(object): # for them to be considered to be part of the same text fragment LINE_FACTOR = 0.4 + # Multiplies the average line height when determining row height + # of a particular element to detect columns. YFUZZ = 1.5 @@ -305,7 +317,7 @@ class Page(object): def find_elements_in_row_of(self, x): interval = Interval(x.top, - x.top + self.YFUZZ*(1+self.average_text_height)) + x.top + self.YFUZZ*(self.average_text_height)) h_interval = Interval(x.left, x.right) for y in self.elements[x.idx:x.idx+15]: if y is not x: @@ -320,6 +332,7 @@ class Page(object): 'Locate paragraph boundaries in each column' for region in self.regions: region.collect_stats() + region.detect_paragraphs() class PDFDocument(object): From 44129f0d1f083988e79e532d3cac637aac3fa141 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 23 Jan 2010 11:53:24 -0700 Subject: [PATCH 04/16] Fix #4653 (Typo in metadata for new quick start guide) and strip img tags with src=# in epub output as they cause ADE to freeze --- src/calibre/ebooks/epub/output.py | 3 ++- src/calibre/gui2/ui.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index bffc24ac91..8e9c9efea9 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -268,7 +268,8 @@ class EPUBOutput(OutputFormatPlugin): # remove <img> tags with empty src elements bad = [] for x in XPath('//h:img')(body): - if not x.get('src', '').strip(): + src = x.get('src', '').strip() + if src in ('', '#'): bad.append(x) for img in bad: img.getparent().remove(img) diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index b132e368ee..714b2c3a27 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -522,7 +522,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember']) mi.author_sort = 'Schember, John' - mi.comments = "A guide to get you up an running with calibre" + mi.comments = "A guide to get you up and running with calibre" mi.publisher = 'calibre' self.library_view.model().add_books([P('quick_start.epub')], ['epub'], [mi]) From f1d81044505a97529bdf36cbd02d29c1f04bfdbb Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 23 Jan 2010 14:55:28 -0700 Subject: [PATCH 05/16] New recipe for Neowin by Darko Miletic --- resources/images/news/neowin.png | Bin 0 -> 1068 bytes resources/recipes/neowin.recipe | 40 +++++++++++ src/calibre/ebooks/pdf/reflow.py | 110 ++++++++++++++++++------------- 3 files changed, 104 insertions(+), 46 deletions(-) create mode 100644 resources/images/news/neowin.png create mode 100644 resources/recipes/neowin.recipe diff --git a/resources/images/news/neowin.png b/resources/images/news/neowin.png new file mode 100644 index 0000000000000000000000000000000000000000..5aee949c0bb7e844b4b47169f3302776087be6d4 GIT binary patch literal 1068 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87??vnT^vI!PRCA;&WJ8$I9C6^*z=sH-(<Di z%S&>1<?T(`x;X1%WJ{O%9W@KrE`KE+7YRF_gW~P}zm$Rn;<=3W@U;7PnP0Q}q3~#z zw|Bbsqgy_2b9ODu-2A1isA|&WIs5rjlYiOP^4ss5E<gW4{lp-?8@u0WcWXy0XRE5O z)#&mtYn`?`XriaF#2+UP!)kr~cOv({Z@oSH-0t|l7WG+=9{Cz?&w3pCnt`QRD^@lz zu42>fUzd#8k2CH1H%;KsUTcM=R~q%}XP@tvtN$zAQ!wLL1+!D=_hYvF*9$jn&ux33 z{iyk}`|;q;7b^sk!Wj#+?^*9&XKqztaP+3JzR=5ePPsh`+m%<ewTks69G#t!>;2($ zPUFY<V)NEs{qvnML^Jh&x$x%aR=1K?WwO1<+x)`jf=?gA!G<;mMwTFr1uK5+JO9Cf zdwch6{<-$%X6v4WnZ@n>^SVeuj$3)Di`U7CcHNiHmw)`SQO@Z5_KQ<;R8m?mI4inn zoLcy=Y~7hVAz@+W@t4=dMOXQ(I%vyWC+jNM;I&`XBh4pGDA(t&;G<(somUU>z2&uP zo+-BPucMLec9S)!;@xl4G#GwZOqKc{A~H*NY73tsqjF*EvV|65cT%2Jbt-E*-nekl zWa>}*>%viM<HS8gHEr$Y-FnidEb*%IQ^BnT_gR=L3|M0KsvVD(_GGDD67T3Gzx@%X z?p2|vpV^t(Gxw$zKYYY>vEkI!KYY?NEtWP)v`e${GA#-bOcKBS(eaAr^w@7()vs-R zSCUbrSe^ai)*q>*F`R;i1>0BOn61~k==388<No7X5%1WZcC`yMWw{<Ll&P{>o2ff@ zi`~upiF41Sg`4JA&+cL~UEdeJHgMbOC!frM&$6liikT!<Vd|NAIIZ5D`S-20o4w-I zjK5#;ct0b&tia3lPu9$+qepeEb40gCoPIWQ_ogH5_t!tHN|3PHwXXO}=f3k*zm2A| zIMk)yjEU8+|NG5-dVg&Ey`>v9oemfXc5Pa|{axURe4{(*pYF|8&Y1OU^ZVcDmtLN0 zU4A|!Cf-!`x_$9Kr*BV=ES(dj^Y)Q9XG4=p5^F`thR}IcKO6mf7kqLvntAKzdFl5{ zUfpWvmYyoS|Nq{P9_tde{|f5b_Uyy0h0GgY_KAJ|+>q#d@AQ9e!=-83#i!gAfw@h! z#5JNMC9x#cD!C{XNHG{07#ipr80s1shZveznHpIcTId>>TNxOf*)TH!MMG|WN@iLm fiUw0F6H6;26Nm-@tBpT_8W=oX{an^LB{Ts5fE3tc literal 0 HcmV?d00001 diff --git a/resources/recipes/neowin.recipe b/resources/recipes/neowin.recipe new file mode 100644 index 0000000000..9f5a669a75 --- /dev/null +++ b/resources/recipes/neowin.recipe @@ -0,0 +1,40 @@ + +from calibre.web.feeds.news import BasicNewsRecipe + +class Neowin(BasicNewsRecipe): + title = u'Neowin.net' + oldest_article = 5 + language = 'en' + description = 'News from IT' + publisher = 'Neowin' + category = 'news, IT, Microsoft, Apple, hardware, software, games' + __author__ = 'Darko Miletic' + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf8' + + conversion_options = { + 'tags' : category + ,'language' : language + ,'comments' : description + ,'publisher' : publisher + } + + keep_only_tags = [dict(name='div', attrs={'id':'article'})] + remove_tags_after = dict(name='div', attrs={'id':'tag-bar'}) + + remove_tags = [ + dict(name=['base','object','link','iframe']) + ,dict(name='div', attrs={'id':'tag-bar'}) + ] + + feeds = [ + (u'Software' , u'http://www.neowin.net/news/rss/software' ) + ,(u'Gaming' , u'http://www.neowin.net/news/rss/gaming' ) + ,(u'Microsoft', u'http://www.neowin.net/news/rss/microsoft') + ,(u'Apple' , u'http://www.neowin.net/news/rss/apple' ) + ,(u'Editorial', u'http://www.neowin.net/news/rss/editorial') + ] + def image_url_processor(cls, baseurl, url): + return url + diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 73178f5621..1a0e5e0dcb 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -18,48 +18,11 @@ class Font(object): self.color = spec.get('color') self.family = spec.get('family') -class Column(object): - - # A column contains an element is the element bulges out to - # the left or the right by at most HFUZZ*col width. - HFUZZ = 0.2 - - def __init__(self): - self.left = self.right = self.top = self.bottom = 0 - self.width = self.height = 0 - self.elements = [] - self.average_line_separation = 0 - - def add(self, elem): - if elem in self.elements: return - self.elements.append(elem) - self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom)) - self.top = self.elements[0].top - self.bottom = self.elements[-1].bottom - self.left, self.right = sys.maxint, 0 - for x in self: - self.left = min(self.left, x.left) - self.right = max(self.right, x.right) - self.width, self.height = self.right-self.left, self.bottom-self.top - - def __iter__(self): - for x in self.elements: - yield x - - def contains(self, elem): - return elem.left > self.left - self.HFUZZ*self.width and \ - elem.right < self.right + self.HFUZZ*self.width - - def collect_stats(self): - if len(self.elements) > 1: - gaps = [self.elements[i+1].top - self.elements[i].bottom for i in - range(len(0, len(self.elements)-1))] - self.average_line_separation = sum(gaps)/len(gaps) - class Element(object): def __init__(self): - self.starts_paragraph = False + self.starts_block = None + self.block_style = None def __eq__(self, other): return self.id == other.id @@ -152,6 +115,61 @@ class Interval(object): def __hash__(self): return hash('(%f,%f)'%self.left, self.right) +class Column(object): + + # A column contains an element is the element bulges out to + # the left or the right by at most HFUZZ*col width. + HFUZZ = 0.2 + + + def __init__(self): + self.left = self.right = self.top = self.bottom = 0 + self.width = self.height = 0 + self.elements = [] + self.average_line_separation = 0 + + def add(self, elem): + if elem in self.elements: return + self.elements.append(elem) + self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom)) + self.top = self.elements[0].top + self.bottom = self.elements[-1].bottom + self.left, self.right = sys.maxint, 0 + for x in self: + self.left = min(self.left, x.left) + self.right = max(self.right, x.right) + self.width, self.height = self.right-self.left, self.bottom-self.top + + def __iter__(self): + for x in self.elements: + yield x + + def contains(self, elem): + return elem.left > self.left - self.HFUZZ*self.width and \ + elem.right < self.right + self.HFUZZ*self.width + + def collect_stats(self): + if len(self.elements) > 1: + gaps = [self.elements[i+1].top - self.elements[i].bottom for i in + range(len(0, len(self.elements)-1))] + self.average_line_separation = sum(gaps)/len(gaps) + for i, elem in enumerate(self.elements): + left_margin = elem.left - self.left + elem.indent_fraction = left_margin/self.width + elem.width_fraction = elem.width/self.width + if i == 0: + elem.top_gap = None + else: + elem.top_gap = self.elements[i-1].bottom - elem.top + + def previous_element(self, idx): + if idx == 0: + return None + return self.elements[idx-1] + + + + class Region(object): def __init__(self): @@ -168,6 +186,7 @@ class Region(object): self.columns[i].add(elem) def contains(self, columns): + # TODO: handle unbalanced columns if not self.columns: return True if len(columns) != len(self.columns): @@ -187,7 +206,7 @@ class Region(object): return len(self.elements) == 0 def collect_stats(self): - for column in self.column: + for column in self.columns: column.collect_stats() self.average_line_separation = sum([x.average_line_separation for x in self.columns])/float(len(self.columns)) @@ -196,11 +215,10 @@ class Region(object): for x in self.columns: yield x - def detect_paragraphs(self): - first = True - for col in self: - col.detect_paragraphs(self.average_line_separation, first) - first = False + def linearize(self): + self.elements = [] + for x in self.columns: + self.elements.extend(x) class Page(object): @@ -332,7 +350,7 @@ class Page(object): 'Locate paragraph boundaries in each column' for region in self.regions: region.collect_stats() - region.detect_paragraphs() + region.linearize() class PDFDocument(object): From 839b5618cbe69d148e368b80304d6e35dbea1970 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 23 Jan 2010 17:41:11 -0700 Subject: [PATCH 06/16] Improved recipe for Common Dreams --- resources/recipes/common_dreams.recipe | 32 +++++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/resources/recipes/common_dreams.recipe b/resources/recipes/common_dreams.recipe index b662cc3ee0..5443b5890b 100644 --- a/resources/recipes/common_dreams.recipe +++ b/resources/recipes/common_dreams.recipe @@ -2,17 +2,37 @@ from calibre.web.feeds.news import BasicNewsRecipe class CommonDreams(BasicNewsRecipe): + # Identify the recipe + title = u'Common Dreams' description = u'Progressive news and views' __author__ = u'XanthanGum' language = 'en' + + # Format the text + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif ;} + h1{font-size: xx-large;} + h2{font-size: large;} + ''' + # Pick no article older than seven days and limit the number of articles per feed to 100 + oldest_article = 7 max_articles_per_feed = 100 - feeds = [ - (u'Common Dreams Headlines', - u'http://www.commondreams.org/feed/headlines_rss'), - (u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'), - (u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss') - ] + # Remove everything before the article + + remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'}) + + # Remove everything after the article + + remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'}) + + # Identify the news feeds + + feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'), + (u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'), + (u'Views', u'http://www.commondreams.org/feed/views_rss'), + (u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')] From 561b9c9113d05ad7440a7ae5e182def74ec61a49 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sun, 24 Jan 2010 09:30:33 -0500 Subject: [PATCH 07/16] PMLML: add missing argurment to Stylizer object. --- src/calibre/ebooks/pml/pmlml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index e3609fcddb..7427a77c2f 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -138,7 +138,7 @@ class PMLMLizer(object): text = [u''] for item in self.oeb_book.spine: self.log.debug('Converting %s to PML markup...' % item.href) - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) text.append(self.add_page_anchor(item)) text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) return ''.join(text) From abf95b3511fd29d190787db241ad4605c7729ad3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 09:06:56 -0700 Subject: [PATCH 08/16] Fix #4649 (Sport Illustrated Recipe) --- resources/recipes/sportsillustrated.recipe | 75 ++++++++++++++-------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/resources/recipes/sportsillustrated.recipe b/resources/recipes/sportsillustrated.recipe index 0dbae1ebc0..dd1df16ac7 100644 --- a/resources/recipes/sportsillustrated.recipe +++ b/resources/recipes/sportsillustrated.recipe @@ -1,6 +1,5 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup -#from random import randint from urllib import quote class SportsIllustratedRecipe(BasicNewsRecipe) : @@ -9,12 +8,11 @@ class SportsIllustratedRecipe(BasicNewsRecipe) : __license__ = 'GPL v3' language = 'en' description = 'Sports Illustrated' - version = 1 + version = 3 title = u'Sports Illustrated' no_stylesheets = True remove_javascript = True - #template_css = '' use_embedded_content = False INDEX = 'http://sportsillustrated.cnn.com/' @@ -22,13 +20,39 @@ class SportsIllustratedRecipe(BasicNewsRecipe) : def parse_index(self): answer = [] soup = self.index_to_soup(self.INDEX) - # Find the link to the current issue on the front page. + # Find the link to the current issue on the front page. SI Cover cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'}) if cover: currentIssue = cover.parent['href'] if currentIssue: # Open the index of current issue + index = self.index_to_soup(currentIssue) + self.log('\tLooking for current issue in: ' + currentIssue) + # Now let us see if they updated their frontpage + nav = index.find('div', attrs = {'class': 'siv_trav_top'}) + if nav: + img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'}) + if img: + parent = img.parent + if parent.name == 'a': + # They didn't update their frontpage; Load the next issue from here + href = self.INDEX + parent['href'] + index = self.index_to_soup(href) + self.log('\tLooking for current issue in: ' + href) + + if index.find('div', 'siv_noArticleMessage'): + nav = index.find('div', attrs = {'class': 'siv_trav_top'}) + if nav: + # Their frontpage points to an issue without any articles; Use the previous issue + img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'}) + if img: + parent = img.parent + if parent.name == 'a': + href = self.INDEX + parent['href'] + index = self.index_to_soup(href) + self.log('\tLooking for current issue in: ' + href) + # Find all articles. list = index.find('div', attrs = {'class' : 'siv_artList'}) @@ -69,31 +93,26 @@ class SportsIllustratedRecipe(BasicNewsRecipe) : def preprocess_html(self, soup): header = soup.find('div', attrs = {'class' : 'siv_artheader'}) - if header: - # It's an article, prepare a container for the content - homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>') - body = homeMadeSoup.find('body') + homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>') + body = homeMadeSoup.body - # Find the date, title and byline - temp = header.find('td', attrs = {'class' : 'title'}) - if temp : - date = temp.find('div', attrs = {'class' : 'date'}) - if date: - body.append(date) - if temp.h1: - body.append(temp.h1) - if temp.h2 : - body.append(temp.h2) - byline = temp.find('div', attrs = {'class' : 'byline'}) - if byline: - body.append(byline) + # Find the date, title and byline + temp = header.find('td', attrs = {'class' : 'title'}) + if temp : + date = temp.find('div', attrs = {'class' : 'date'}) + if date: + body.append(date) + if temp.h1: + body.append(temp.h1) + if temp.h2 : + body.append(temp.h2) + byline = temp.find('div', attrs = {'class' : 'byline'}) + if byline: + body.append(byline) - # Find the content - for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) : - body.append(para) + # Find the content + for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) : + body.append(para) - return homeMadeSoup - else : - # It's a TOC, just return the whole lot - return soup + return homeMadeSoup From 3a6fb236460ed0063c43501f5f5b0085e1217fbd Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 09:48:03 -0700 Subject: [PATCH 09/16] Update catalog timestamp/pubdate when re-generating --- src/calibre/library/catalog.py | 46 ++++++++++++++++---------------- src/calibre/library/database2.py | 7 ++++- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index bc6826baf4..7a1272f59d 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1,4 +1,4 @@ -import pickle, os, re, shutil, htmlentitydefs +import os, re, shutil, htmlentitydefs from xml.sax.saxutils import escape @@ -412,11 +412,11 @@ class EPUB_MOBI(CatalogPlugin): # Convert the upper 3 numbers - thousandsNumber if thousandsNumber: if number > 1099 and number < 2000: - resultString = '%s %s' % (self.lessThanTwenty[number/100], + resultString = '%s %s' % (self.lessThanTwenty[number/100], self.stringFromInt(number % 100)) self.text = resultString.strip().capitalize() return - else: + else: thousandsString = self.stringFromInt(thousandsNumber) # Concatenate the strings @@ -509,10 +509,10 @@ class EPUB_MOBI(CatalogPlugin): self.__thumbs = None self.__title = opts.catalog_title self.__verbose = opts.verbose - + if self.verbose: print "CatalogBuilder(): Generating %s for %s" % (self.opts.fmt, self.opts.output_profile) - + # Accessors ''' @dynamic_property @@ -522,7 +522,7 @@ class EPUB_MOBI(CatalogPlugin): def fset(self, val): self.__ = val return property(fget=fget, fset=fset) - ''' + ''' @dynamic_property def authors(self): @@ -538,7 +538,7 @@ class EPUB_MOBI(CatalogPlugin): def fset(self, val): self.__basename = val return property(fget=fget, fset=fset) - @dynamic_property + @dynamic_property def booksByAuthor(self): def fget(self): return self.__booksByAuthor @@ -878,7 +878,7 @@ class EPUB_MOBI(CatalogPlugin): if self.verbose: print "fetchBooksByTitle(): %d books" % len(self.booksByTitle) for title in self.booksByTitle: - print (u" %-50s %-25s" % (title['title'][0:45], title['title_sort'][0:20])).encode('utf-8') + print (u" %-50s %-25s" % (title['title'][0:45], title['title_sort'][0:20])).encode('utf-8') print def fetchBooksByAuthor(self): @@ -927,10 +927,10 @@ class EPUB_MOBI(CatalogPlugin): if self.verbose: print "\nfetchBooksByauthor(): %d unique authors" % len(unique_authors) for author in unique_authors: - print (u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20], - author[2])).encode('utf-8') + print (u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20], + author[2])).encode('utf-8') print - + self.authors = unique_authors def generateHTMLDescriptions(self): @@ -1676,7 +1676,7 @@ class EPUB_MOBI(CatalogPlugin): cmTag['name'] = "author" cmTag.insert(0, NavigableString(self.formatNCXText(book['author']))) navPointVolumeTag.insert(2, cmTag) - + # Add the description tag if book['short_description']: cmTag = Tag(ncx_soup, '%s' % 'calibre:meta') @@ -1816,7 +1816,7 @@ class EPUB_MOBI(CatalogPlugin): nptc += 1 # Create an NCX article entry for each populated author index letter - # Loop over the sorted_authors list, find start of each letter, + # Loop over the sorted_authors list, find start of each letter, # add description_preview_count artists # self.authors[0]:friendly [1]:author_sort [2]:book_count master_author_list = [] @@ -1868,7 +1868,7 @@ class EPUB_MOBI(CatalogPlugin): contentTag['src'] = "%s#%sauthors" % (HTML_file, authors_by_letter[1]) navPointByLetterTag.insert(1,contentTag) - + if self.generateForKindle: cmTag = Tag(soup, '%s' % 'calibre:meta') cmTag['name'] = "description" @@ -1903,7 +1903,7 @@ class EPUB_MOBI(CatalogPlugin): file_ID = file_ID.replace(" ","") navPointTag['id'] = "%s-ID" % file_ID navPointTag['playOrder'] = self.playOrder - #print "generateNCXByTags(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder) + #print "generateNCXByTags(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder) self.playOrder += 1 navLabelTag = Tag(ncx_soup, 'navLabel') textTag = Tag(ncx_soup, 'text') @@ -1943,19 +1943,19 @@ class EPUB_MOBI(CatalogPlugin): cmTag = Tag(ncx_soup, '%s' % 'calibre:meta') cmTag['name'] = "author" # First - Last author - + if len(genre['titles_spanned']) > 1 : author_range = "%s - %s" % (genre['titles_spanned'][0][0], genre['titles_spanned'][1][0]) else : author_range = "%s" % (genre['titles_spanned'][0][0]) - + cmTag.insert(0, NavigableString(author_range)) navPointVolumeTag.insert(2, cmTag) - + # Build the description tag cmTag = Tag(ncx_soup, '%s' % 'calibre:meta') cmTag['name'] = "description" - + if False: # Form 1: Titles spanned if len(genre['titles_spanned']) > 1: @@ -1971,7 +1971,7 @@ class EPUB_MOBI(CatalogPlugin): titles = sorted(titles, key=lambda x:(self.generateSortTitle(x),self.generateSortTitle(x))) titles_list = self.generateShortDescription(" • ".join(titles)) cmTag.insert(0, NavigableString(self.formatNCXText(titles_list))) - + navPointVolumeTag.insert(3, cmTag) # Add this volume to the section tag @@ -2065,11 +2065,11 @@ class EPUB_MOBI(CatalogPlugin): markerTags = [] markerTags.extend(self.opts.exclude_tags.split(',')) markerTags.extend(self.opts.note_tag.split(',')) - markerTags.extend(self.opts.read_tag.split(',')) + markerTags.extend(self.opts.read_tag.split(',')) return markerTags def filterDbTags(self, tags): - # Remove the special marker tags from the database's tag list, + # Remove the special marker tags from the database's tag list, # return sorted list of tags representing valid genres filtered_tags = [] @@ -2082,7 +2082,7 @@ class EPUB_MOBI(CatalogPlugin): if re.search(self.opts.exclude_genre, tag): #print "skipping %s" % tag continue - + filtered_tags.append(tag) filtered_tags.sort() diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 7b0f7a083e..1fdb4ef9a9 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1415,9 +1415,10 @@ class LibraryDatabase2(LibraryDatabase): if matches: tag_matches = self.data.get_matches('tags', _('Catalog')) matches = matches.intersection(tag_matches) - db_id = None + db_id, existing = None, False if matches: db_id = list(matches)[0] + existing = True if db_id is None: obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)', (title, 'calibre')) @@ -1433,6 +1434,10 @@ class LibraryDatabase2(LibraryDatabase): if not hasattr(path, 'read'): stream.close() self.conn.commit() + if existing: + t = datetime.utcnow() + self.set_timestamp(db_id, t, notify=False) + self.set_pubdate(db_id, t, notify=False) self.data.refresh_ids(self, [db_id]) # Needed to update format list and size return db_id From 6b59d8aa966b5831df0673689af574b8a69ce924 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 11:25:05 -0700 Subject: [PATCH 10/16] Content server: Mobile page breaks if library contains empty books. Now fixed. --- src/calibre/library/catalog.py | 1 - src/calibre/library/server.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 7a1272f59d..ea0a9005d2 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1410,7 +1410,6 @@ class EPUB_MOBI(CatalogPlugin): else: #if verbose: print "generating new thumbnail for %s" % title['title'] self.generateThumbnail(title, image_dir, thumb_file) - else: # Use default cover if self.verbose: print "no cover available for %s, will use default" % \ diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py index 00eea78589..a1c8aec0bd 100644 --- a/src/calibre/library/server.py +++ b/src/calibre/library/server.py @@ -714,6 +714,10 @@ class LibraryServer(object): book, books = MarkupTemplate(self.MOBILE_BOOK), [] for record in items[(start-1):(start-1)+num]: + if record[13] is None: + record[13] = '' + if record[6] is None: + record[6] = 0 aus = record[2] if record[2] else __builtin__._('Unknown') authors = '|'.join([i.replace('|', ',') for i in aus.split(',')]) record[10] = fmt_sidx(float(record[10])) From 0b4de383306480d753be5be3fd01bca090c1dbab Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 11:34:42 -0700 Subject: [PATCH 11/16] Speed up generation of thumbnails for catalog --- src/calibre/library/catalog.py | 49 +++++++++++++++++----------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index ea0a9005d2..f6d8017156 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -751,7 +751,9 @@ class EPUB_MOBI(CatalogPlugin): self.generateHTMLByTags() if getattr(self.reporter, 'cancel_requested', False): return 1 - self.generateThumbnails() + from calibre.utils.PythonMagickWand import ImageMagick + with ImageMagick(): + self.generateThumbnails() if getattr(self.reporter, 'cancel_requested', False): return 1 self.generateOPF() @@ -2343,29 +2345,28 @@ class EPUB_MOBI(CatalogPlugin): def generateThumbnail(self, title, image_dir, thumb_file): import calibre.utils.PythonMagickWand as pw - with pw.ImageMagick(): - try: - img = pw.NewMagickWand() - if img < 0: - raise RuntimeError('generate_thumbnail(): Cannot create wand') - # Read the cover - if not pw.MagickReadImage(img, - title['cover'].encode(filesystem_encoding)): - print 'Failed to read cover image from: %s' % title['cover'] - raise IOError - thumb = pw.CloneMagickWand(img) - if thumb < 0: - print 'generate_thumbnail(): Cannot clone cover' - raise RuntimeError - # img, width, height - pw.MagickThumbnailImage(thumb, 75, 100) - pw.MagickWriteImage(thumb, os.path.join(image_dir, thumb_file)) - pw.DestroyMagickWand(thumb) - pw.DestroyMagickWand(img) - except IOError: - print "generate_thumbnail() IOError with %s" % title['title'] - except RuntimeError: - print "generate_thumbnail() RuntimeError with %s" % title['title'] + try: + img = pw.NewMagickWand() + if img < 0: + raise RuntimeError('generate_thumbnail(): Cannot create wand') + # Read the cover + if not pw.MagickReadImage(img, + title['cover'].encode(filesystem_encoding)): + print 'Failed to read cover image from: %s' % title['cover'] + raise IOError + thumb = pw.CloneMagickWand(img) + if thumb < 0: + print 'generate_thumbnail(): Cannot clone cover' + raise RuntimeError + # img, width, height + pw.MagickThumbnailImage(thumb, 75, 100) + pw.MagickWriteImage(thumb, os.path.join(image_dir, thumb_file)) + pw.DestroyMagickWand(thumb) + pw.DestroyMagickWand(img) + except IOError: + print "generate_thumbnail() IOError with %s" % title['title'] + except RuntimeError: + print "generate_thumbnail() RuntimeError with %s" % title['title'] def processSpecialTags(self, tags, this_title, opts): tag_list = [] From cb753ba603bcea1f7bbd26c62b865a2996b01b16 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 11:36:10 -0700 Subject: [PATCH 12/16] Fix #4664 (conversion of a comic collection fails if the comics.txt contains a blank line) --- src/calibre/ebooks/comic/input.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index 85590a7bae..122f61e45a 100755 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -340,6 +340,9 @@ class ComicInput(InputFormatPlugin): %stream.name) for line in open('comics.txt', 'rb').read().decode('utf-8').splitlines(): + line = line.strip() + if not line: + continue fname, title = line.partition(':')[0], line.partition(':')[-1] fname = os.path.join(tdir, *fname.split('/')) if not title: From 988d944e748df7e3a2db7c151be3046d00ea12f9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 11:43:10 -0700 Subject: [PATCH 13/16] Fix #4662 (Metadata->Add format file dialog does not include .cbc format as acceptable) --- src/calibre/ebooks/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index a9985e6480..d5651568fa 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -24,7 +24,7 @@ class DRMError(ValueError): BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', 'html', 'xhtml', 'pdf', 'pdb', 'prc', 'mobi', 'azw', 'doc', - 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip', + 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml'] class HTMLRenderer(object): From b37e606d86d46f4654a8266b1bd66bbd10c4c812 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 11:46:59 -0700 Subject: [PATCH 14/16] ... --- src/calibre/ebooks/pdf/reflow.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 1a0e5e0dcb..3fef8c30ce 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -40,6 +40,8 @@ class Image(Element): map(float, map(img.get, ('top', 'left', 'rwidth', 'rheight', 'iwidth', 'iheight'))) self.src = img.get('src') + self.bottom = self.top + self.height + self.right = self.left + self.width class Text(Element): @@ -151,7 +153,7 @@ class Column(object): def collect_stats(self): if len(self.elements) > 1: gaps = [self.elements[i+1].top - self.elements[i].bottom for i in - range(len(0, len(self.elements)-1))] + range(0, len(self.elements)-1)] self.average_line_separation = sum(gaps)/len(gaps) for i, elem in enumerate(self.elements): left_margin = elem.left - self.left @@ -203,7 +205,7 @@ class Region(object): @property def is_empty(self): - return len(self.elements) == 0 + return len(self.columns) == 0 def collect_stats(self): for column in self.columns: @@ -311,10 +313,10 @@ class Page(object): columns = self.sort_into_columns(x, elems) processed.update(elems) if not current_region.contains(columns): - self.regions.append(self.current_region) + self.regions.append(current_region) current_region = Region() current_region.add(columns) - if not self.current_region.is_empty(): + if not current_region.is_empty: self.regions.append(current_region) def sort_into_columns(self, elem, neighbors): From 8b4cb3417b78523cd70848a84f4370ff73f3a2f6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 12:07:33 -0700 Subject: [PATCH 15/16] ... --- src/calibre/devices/blackberry/driver.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/calibre/devices/blackberry/driver.py b/src/calibre/devices/blackberry/driver.py index ec8a7e8f49..6a9bb84431 100644 --- a/src/calibre/devices/blackberry/driver.py +++ b/src/calibre/devices/blackberry/driver.py @@ -9,23 +9,22 @@ from calibre.devices.usbms.driver import USBMS class BLACKBERRY(USBMS): name = 'Blackberry Device Interface' + gui_name = 'Blackberry' description = _('Communicate with the Blackberry smart phone.') author = _('Kovid Goyal') - supported_platforms = ['windows', 'linux'] + supported_platforms = ['windows', 'linux', 'osx'] # Ordered list of supported formats FORMATS = ['mobi', 'prc'] VENDOR_ID = [0x0fca] PRODUCT_ID = [0x8004, 0x0004] - BCD = [0x0200, 0x0107, 0x0201] + BCD = [0x0200, 0x0107, 0x0210] VENDOR_NAME = 'RIM' WINDOWS_MAIN_MEM = 'BLACKBERRY_SD' - #OSX_MAIN_MEM = 'Kindle Internal Storage Media' - MAIN_MEMORY_VOLUME_LABEL = 'Blackberry SD Card' - EBOOK_DIR_MAIN = 'ebooks' + EBOOK_DIR_MAIN = 'eBooks' SUPPORTS_SUB_DIRS = True From 1acc7f0f6adde1423b67bc35a721e8bbcc461f4d Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 24 Jan 2010 13:09:36 -0700 Subject: [PATCH 16/16] ... --- src/calibre/utils/ipc/job.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py index 79db972008..458d5adb8a 100644 --- a/src/calibre/utils/ipc/job.py +++ b/src/calibre/utils/ipc/job.py @@ -52,8 +52,10 @@ class BaseJob(object): else: self._status_text = _('Error') if self.failed else _('Finished') if DEBUG: - prints('Job:', self.id, self.description, 'finished') - prints('\t'.join(self.details.splitlines(True))) + prints('Job:', self.id, self.description, 'finished', + safe_encode=True) + prints('\t'.join(self.details.splitlines(True)), + safe_encode=True) if not self._done_called: self._done_called = True try: