From 6e1fc23c47ebd487bd926049705cc82e8e0e4a46 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 23 Jan 2010 10:58:00 -0700
Subject: [PATCH 01/16] Improved recipe for Wired Magazine

---
 resources/recipes/wired.recipe   | 123 +++++++++++++++++++++++--------
 src/calibre/ebooks/pdf/reflow.py |  24 ++++++
 2 files changed, 116 insertions(+), 31 deletions(-)
diff --git a/resources/recipes/wired.recipe b/resources/recipes/wired.recipe
index fcc2494850..e7395a9ada 100644
--- a/resources/recipes/wired.recipe
+++ b/resources/recipes/wired.recipe
@@ -1,44 +1,105 @@
-#!/usr/bin/env  python
+
 __license__   = 'GPL v3'
-__docformat__ = 'restructuredtext en'
-
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.wired.com
+'''
 
+import re
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class Wired(BasicNewsRecipe):
+    title                 = 'Wired Magazine'
+    __author__            = 'Darko Miletic'
+    description           = 'Gaming news'
+    publisher             = 'Conde Nast Digital'
+    category              = 'news, games, IT, gadgets'
+    oldest_article        = 32
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'en'
+    extra_css             = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
+    index                 = 'http://www.wired.com/magazine/'
 
-    title = 'Wired.com'
-    __author__ = 'Kovid Goyal'
-    description = 'Technology news'
-    timefmt  = ' [%Y%b%d  %H%M]'
-    language = 'en'
+    preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')]
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
 
-    no_stylesheets = True
+    keep_only_tags = [dict(name='div', attrs={'class':'post'})]
+    remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
+    remove_tags = [
+                     dict(name=['object','embed','iframe','link'])
+                    ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
+                  ]
 
-    remove_tags_before = dict(name='div', id='content')
-    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
-        'footer', 'advertisement', 'blog_subscription_unit',
-        'brightcove_component']),
-        {'class':'entryActions'},
-        dict(name=['noscript', 'script'])]
 
-    feeds = [
-        ('Top News', 'http://feeds.wired.com/wired/index'),
-        ('Culture', 'http://feeds.wired.com/wired/culture'),
-        ('Software', 'http://feeds.wired.com/wired/software'),
-        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
-        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
-        ('Cars', 'http://feeds.wired.com/wired/cars'),
-        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
-        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
-        ('Science', 'http://feeds.wired.com/wired/science'),
-        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
-        ('Politics', 'http://feeds.wired.com/wired/politics'),
-        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
-        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
-        ]
+    #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
+
+    def parse_index(self):
+        totalfeeds = []
+
+        soup = self.index_to_soup(self.index)
+        features = soup.find('div',attrs={'id':'my-glider'})
+        if features:
+           farticles = []
+           for item in features.findAll('div',attrs={'class':'section'}):
+               divurl = item.find('div',attrs={'class':'feature-header'})
+               divdesc = item.find('div',attrs={'class':'feature-text'})
+               url   = 'http://www.wired.com' + divurl.a['href']
+               title = self.tag_to_string(divurl.a)
+               description = self.tag_to_string(divdesc)
+               date  = strftime(self.timefmt)
+               farticles.append({
+                                  'title'      :title
+                                 ,'date'       :date
+                                 ,'url'        :url
+                                 ,'description':description
+                                })
+           totalfeeds.append(('Featured Articles', farticles))
+        #department feeds
+        departments = ['rants','start','test','play','found']
+        dept = soup.find('div',attrs={'id':'magazine-departments'})
+        if dept:
+            for ditem in departments:
+                darticles = []
+                department = dept.find('div',attrs={'id':'department-'+ditem})
+                if department:
+                    for item in department.findAll('div'):
+                        description = ''
+                        feed_link = item.find('a')
+                        if feed_link and feed_link.has_key('href'):
+                            url   = feed_link['href']
+                            title = self.tag_to_string(feed_link)
+                            date  = strftime(self.timefmt)
+                            darticles.append({
+                                              'title'      :title
+                                             ,'date'       :date
+                                             ,'url'        :url
+                                             ,'description':description
+                                            })
+                    totalfeeds.append((ditem.capitalize(), darticles))
+        return totalfeeds
+
+    def get_cover_url(self):
+        cover_url = None
+        soup = self.index_to_soup(self.index)
+        cover_item = soup.find('div',attrs={'class':'spread-image'})
+        if cover_item:
+           cover_url = 'http://www.wired.com' + cover_item.a.img['src']
+        return cover_url
 
     def print_version(self, url):
-        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
+        return url.rstrip('/') + '/all/1'
 
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
 
diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 92a0ceebe1..3fce8a41f8 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -28,6 +28,7 @@ class Column(object):
         self.left = self.right = self.top = self.bottom = 0
         self.width = self.height = 0
         self.elements = []
+        self.average_line_separation = 0
 
     def add(self, elem):
         if elem in self.elements: return
@@ -49,8 +50,17 @@ class Column(object):
         return elem.left > self.left - self.HFUZZ*self.width and \
                elem.right < self.right + self.HFUZZ*self.width
 
+    def collect_stats(self):
+        if len(self.elements) > 1:
+            gaps = [self.elements[i+1].top - self.elements[i].bottom for i in
+                    range(len(0, len(self.elements)-1))]
+            self.average_line_separation = sum(gaps)/len(gaps)
+
 class Element(object):
 
+    def __init__(self):
+        self.starts_paragraph = False
+
     def __eq__(self, other):
         return self.id == other.id
 
@@ -60,6 +70,7 @@ class Element(object):
 class Image(Element):
 
     def __init__(self, img, opts, log, idc):
+        Element.__init__(self)
         self.opts, self.log = opts, log
         self.id = idc.next()
         self.top, self.left, self.width, self.height, self.iwidth, self.iheight = \
@@ -71,6 +82,7 @@ class Image(Element):
 class Text(Element):
 
     def __init__(self, text, font_map, opts, log, idc):
+        Element.__init__(self)
         self.id = idc.next()
         self.opts, self.log = opts, log
         self.font_map = font_map
@@ -174,6 +186,12 @@ class Region(object):
     def is_empty(self):
         return len(self.elements) == 0
 
+    def collect_stats(self):
+        for column in self.column:
+            column.collect_stats()
+        self.average_line_separation = sum([x.average_line_separation for x in
+            self.columns])/float(len(self.columns))
+
 
 class Page(object):
 
@@ -298,6 +316,11 @@ class Page(object):
                     x_interval.intersection(h_interval).width <= 0:
                     yield y
 
+    def second_pass(self):
+        'Locate paragraph boundaries in each column'
+        for region in self.regions:
+            region.collect_stats()
+
 
 class PDFDocument(object):
 
@@ -327,6 +350,7 @@ class PDFDocument(object):
         for page in self.pages:
             page.document_font_stats = self.font_size_stats
             page.first_pass()
+            page.second_pass()
 
     def collect_font_statistics(self):
         self.font_size_stats = {}

From b0edb9896c00b29b0a7b8fc11637208f3e050625 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 23 Jan 2010 13:14:41 -0500
Subject: [PATCH 02/16] Fix bug #4646: Support eReader PDB with 116 Byte
 header.

---
 src/calibre/ebooks/pdb/ereader/reader.py    | 2 +-
 src/calibre/ebooks/pdb/ereader/reader202.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py
index ad1df98793..71ba3efdc6 100644
--- a/src/calibre/ebooks/pdb/ereader/reader.py
+++ b/src/calibre/ebooks/pdb/ereader/reader.py
@@ -20,7 +20,7 @@ class Reader(FormatReader):
 
         if record0_size == 132:
             self.reader = Reader132(header, stream, log, options)
-        elif record0_size == 202:
+        elif record0_size in (116, 202):
             self.reader = Reader202(header, stream, log, options)
         else:
             raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py
index a674c5bf60..590301b433 100644
--- a/src/calibre/ebooks/pdb/ereader/reader202.py
+++ b/src/calibre/ebooks/pdb/ereader/reader202.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 '''
-Read content from ereader pdb file with a 202 byte header created by Makebook.
+Read content from ereader pdb file with a 116 and 202 byte header created by Makebook.
 '''
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@@ -44,7 +44,7 @@ class Reader202(FormatReader):
 
         self.header_record = HeaderRecord(self.section_data(0))
 
-        if self.header_record.version != 4:
+        if self.header_record.version not in (2, 4):
             raise EreaderError('Unknown book version %i.' % self.header_record.version)
 
         from calibre.ebooks.metadata.pdb import get_metadata

From feb58a884650e0088cb51293cdf64180af09378b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 23 Jan 2010 11:20:34 -0700
Subject: [PATCH 03/16] ...

---
 src/calibre/ebooks/pdf/reflow.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 3fce8a41f8..73178f5621 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -192,6 +192,16 @@ class Region(object):
         self.average_line_separation = sum([x.average_line_separation for x in
             self.columns])/float(len(self.columns))
 
+    def __iter__(self):
+        for x in self.columns:
+            yield x
+
+    def detect_paragraphs(self):
+        first = True
+        for col in self:
+            col.detect_paragraphs(self.average_line_separation, first)
+            first = False
+
 
 class Page(object):
 
@@ -203,6 +213,8 @@ class Page(object):
     # for them to be considered to be part of the same text fragment
     LINE_FACTOR = 0.4
 
+    # Multiplies the average line height when determining row height
+    # of a particular element to detect columns.
     YFUZZ = 1.5
 
 
@@ -305,7 +317,7 @@ class Page(object):
 
     def find_elements_in_row_of(self, x):
         interval = Interval(x.top,
-                x.top + self.YFUZZ*(1+self.average_text_height))
+                x.top + self.YFUZZ*(self.average_text_height))
         h_interval = Interval(x.left, x.right)
         for y in self.elements[x.idx:x.idx+15]:
             if y is not x:
@@ -320,6 +332,7 @@ class Page(object):
         'Locate paragraph boundaries in each column'
         for region in self.regions:
             region.collect_stats()
+            region.detect_paragraphs()
 
 
 class PDFDocument(object):

From 44129f0d1f083988e79e532d3cac637aac3fa141 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 23 Jan 2010 11:53:24 -0700
Subject: [PATCH 04/16] Fix #4653 (Typo in metadata for new quick start guide)
 and strip img tags with src=# in epub output as they cause ADE to freeze

---
 src/calibre/ebooks/epub/output.py | 3 ++-
 src/calibre/gui2/ui.py            | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
index bffc24ac91..8e9c9efea9 100644
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@@ -268,7 +268,8 @@ class EPUBOutput(OutputFormatPlugin):
                 # remove <img> tags with empty src elements
                 bad = []
                 for x in XPath('//h:img')(body):
-                    if not x.get('src', '').strip():
+                    src = x.get('src', '').strip()
+                    if src in ('', '#'):
                         bad.append(x)
                 for img in bad:
                     img.getparent().remove(img)
diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index b132e368ee..714b2c3a27 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -522,7 +522,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
             from calibre.ebooks.metadata import MetaInformation
             mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
             mi.author_sort = 'Schember, John'
-            mi.comments = "A guide to get you up an running with calibre"
+            mi.comments = "A guide to get you up and running with calibre"
             mi.publisher = 'calibre'
             self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
                     [mi])

From f1d81044505a97529bdf36cbd02d29c1f04bfdbb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 23 Jan 2010 14:55:28 -0700
Subject: [PATCH 05/16] New recipe for Neowin by Darko Miletic

---
 resources/images/news/neowin.png | Bin 0 -> 1068 bytes
 resources/recipes/neowin.recipe  |  40 +++++++++++
 src/calibre/ebooks/pdf/reflow.py | 110 ++++++++++++++++++-------------
 3 files changed, 104 insertions(+), 46 deletions(-)
 create mode 100644 resources/images/news/neowin.png
 create mode 100644 resources/recipes/neowin.recipe

diff --git a/resources/images/news/neowin.png b/resources/images/news/neowin.png
new file mode 100644
index 0000000000000000000000000000000000000000..5aee949c0bb7e844b4b47169f3302776087be6d4
GIT binary patch
literal 1068
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87??vnT^vI!PRCA;&WJ8$I9C6^*z=sH-(<Di
z%S&>1<?T(`x;X1%WJ{O%9W@KrE`KE+7YRF_gW~P}zm$Rn;<=3W@U;7PnP0Q}q3~#z
zw|Bbsqgy_2b9ODu-2A1isA|&WIs5rjlYiOP^4ss5E<gW4{lp-?8@u0WcWXy0XRE5O
z)#&mtYn`?`XriaF#2+UP!)kr~cOv({Z@oSH-0t|l7WG+=9{Cz?&w3pCnt`QRD^@lz
zu42>fUzd#8k2CH1H%;KsUTcM=R~q%}XP@tvtN$zAQ!wLL1+!D=_hYvF*9$jn&ux33
z{iyk}`|;q;7b^sk!Wj#+?^*9&XKqztaP+3JzR=5ePPsh`+m%<ewTks69G#t!>;2($
zPUFY<V)NEs{qvnML^Jh&x$x%aR=1K?WwO1<+x)`jf=?gA!G<;mMwTFr1uK5+JO9Cf
zdwch6{<-$%X6v4WnZ@n>^SVeuj$3)Di`U7CcHNiHmw)`SQO@Z5_KQ<;R8m?mI4inn
zoLcy=Y~7hVAz@+W@t4=dMOXQ(I%vyWC+jNM;I&`XBh4pGDA(t&;G<(somUU>z2&uP
zo+-BPucMLec9S)!;@xl4G#GwZOqKc{A~H*NY73tsqjF*EvV|65cT%2Jbt-E*-nekl
zWa>}*>%viM<HS8gHEr$Y-FnidEb*%IQ^BnT_gR=L3|M0KsvVD(_GGDD67T3Gzx@%X
z?p2|vpV^t(Gxw$zKYYY>vEkI!KYY?NEtWP)v`e${GA#-bOcKBS(eaAr^w@7()vs-R
zSCUbrSe^ai)*q>*F`R;i1>0BOn61~k==388<No7X5%1WZcC`yMWw{<Ll&P{>o2ff@
zi`~upiF41Sg`4JA&+cL~UEdeJHgMbOC!frM&$6liikT!<Vd|NAIIZ5D`S-20o4w-I
zjK5#;ct0b&tia3lPu9$+qepeEb40gCoPIWQ_ogH5_t!tHN|3PHwXXO}=f3k*zm2A|
zIMk)yjEU8+|NG5-dVg&Ey`>v9oemfXc5Pa|{axURe4{(*pYF|8&Y1OU^ZVcDmtLN0
zU4A|!Cf-!`x_$9Kr*BV=ES(dj^Y)Q9XG4=p5^F`thR}IcKO6mf7kqLvntAKzdFl5{
zUfpWvmYyoS|Nq{P9_tde{|f5b_Uyy0h0GgY_KAJ|+>q#d@AQ9e!=-83#i!gAfw@h!
z#5JNMC9x#cD!C{XNHG{07#ipr80s1shZveznHpIcTId>>TNxOf*)TH!MMG|WN@iLm
fiUw0F6H6;26Nm-@tBpT_8W=oX{an^LB{Ts5fE3tc

literal 0
HcmV?d00001

diff --git a/resources/recipes/neowin.recipe b/resources/recipes/neowin.recipe
new file mode 100644
index 0000000000..9f5a669a75
--- /dev/null
+++ b/resources/recipes/neowin.recipe
@@ -0,0 +1,40 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Neowin(BasicNewsRecipe):
+    title                 = u'Neowin.net'
+    oldest_article        = 5
+    language              = 'en'
+    description           = 'News from IT'
+    publisher             = 'Neowin'
+    category              = 'news, IT, Microsoft, Apple, hardware, software, games'
+    __author__            = 'Darko Miletic'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf8'
+
+    conversion_options = {
+                             'tags'      : category
+                            ,'language'  : language
+                            ,'comments'  : description
+                            ,'publisher' : publisher
+                         }
+
+    keep_only_tags     = [dict(name='div', attrs={'id':'article'})]
+    remove_tags_after = dict(name='div', attrs={'id':'tag-bar'})
+
+    remove_tags        = [
+                            dict(name=['base','object','link','iframe'])
+                           ,dict(name='div', attrs={'id':'tag-bar'})
+                         ]
+
+    feeds          = [
+                        (u'Software' , u'http://www.neowin.net/news/rss/software' )
+                       ,(u'Gaming'   , u'http://www.neowin.net/news/rss/gaming'   )
+                       ,(u'Microsoft', u'http://www.neowin.net/news/rss/microsoft')
+                       ,(u'Apple'    , u'http://www.neowin.net/news/rss/apple'    )
+                       ,(u'Editorial', u'http://www.neowin.net/news/rss/editorial')
+                     ]
+    def image_url_processor(cls, baseurl, url):
+        return url
+
diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 73178f5621..1a0e5e0dcb 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -18,48 +18,11 @@ class Font(object):
         self.color = spec.get('color')
         self.family = spec.get('family')
 
-class Column(object):
-
-    # A column contains an element is the element bulges out to
-    # the left or the right by at most HFUZZ*col width.
-    HFUZZ = 0.2
-
-    def __init__(self):
-        self.left = self.right = self.top = self.bottom = 0
-        self.width = self.height = 0
-        self.elements = []
-        self.average_line_separation = 0
-
-    def add(self, elem):
-        if elem in self.elements: return
-        self.elements.append(elem)
-        self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
-        self.top = self.elements[0].top
-        self.bottom = self.elements[-1].bottom
-        self.left, self.right = sys.maxint, 0
-        for x in self:
-            self.left = min(self.left, x.left)
-            self.right = max(self.right, x.right)
-        self.width, self.height = self.right-self.left, self.bottom-self.top
-
-    def __iter__(self):
-        for x in self.elements:
-            yield x
-
-    def contains(self, elem):
-        return elem.left > self.left - self.HFUZZ*self.width and \
-               elem.right < self.right + self.HFUZZ*self.width
-
-    def collect_stats(self):
-        if len(self.elements) > 1:
-            gaps = [self.elements[i+1].top - self.elements[i].bottom for i in
-                    range(len(0, len(self.elements)-1))]
-            self.average_line_separation = sum(gaps)/len(gaps)
-
 class Element(object):
 
     def __init__(self):
-        self.starts_paragraph = False
+        self.starts_block = None
+        self.block_style = None
 
     def __eq__(self, other):
         return self.id == other.id
@@ -152,6 +115,61 @@ class Interval(object):
     def __hash__(self):
         return hash('(%f,%f)'%self.left, self.right)
 
+class Column(object):
+
+    # A column contains an element is the element bulges out to
+    # the left or the right by at most HFUZZ*col width.
+    HFUZZ = 0.2
+
+
+    def __init__(self):
+        self.left = self.right = self.top = self.bottom = 0
+        self.width = self.height = 0
+        self.elements = []
+        self.average_line_separation = 0
+
+    def add(self, elem):
+        if elem in self.elements: return
+        self.elements.append(elem)
+        self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
+        self.top = self.elements[0].top
+        self.bottom = self.elements[-1].bottom
+        self.left, self.right = sys.maxint, 0
+        for x in self:
+            self.left = min(self.left, x.left)
+            self.right = max(self.right, x.right)
+        self.width, self.height = self.right-self.left, self.bottom-self.top
+
+    def __iter__(self):
+        for x in self.elements:
+            yield x
+
+    def contains(self, elem):
+        return elem.left > self.left - self.HFUZZ*self.width and \
+               elem.right < self.right + self.HFUZZ*self.width
+
+    def collect_stats(self):
+        if len(self.elements) > 1:
+            gaps = [self.elements[i+1].top - self.elements[i].bottom for i in
+                    range(len(0, len(self.elements)-1))]
+            self.average_line_separation = sum(gaps)/len(gaps)
+        for i, elem in enumerate(self.elements):
+            left_margin = elem.left - self.left
+            elem.indent_fraction = left_margin/self.width
+            elem.width_fraction = elem.width/self.width
+            if i == 0:
+                elem.top_gap = None
+            else:
+                elem.top_gap = self.elements[i-1].bottom - elem.top
+
+    def previous_element(self, idx):
+        if idx == 0:
+            return None
+        return self.elements[idx-1]
+
+
+
+
 class Region(object):
 
     def __init__(self):
@@ -168,6 +186,7 @@ class Region(object):
                     self.columns[i].add(elem)
 
     def contains(self, columns):
+        # TODO: handle unbalanced columns
         if not self.columns:
             return True
         if len(columns) != len(self.columns):
@@ -187,7 +206,7 @@ class Region(object):
         return len(self.elements) == 0
 
     def collect_stats(self):
-        for column in self.column:
+        for column in self.columns:
             column.collect_stats()
         self.average_line_separation = sum([x.average_line_separation for x in
             self.columns])/float(len(self.columns))
@@ -196,11 +215,10 @@ class Region(object):
         for x in self.columns:
             yield x
 
-    def detect_paragraphs(self):
-        first = True
-        for col in self:
-            col.detect_paragraphs(self.average_line_separation, first)
-            first = False
+    def linearize(self):
+        self.elements = []
+        for x in self.columns:
+            self.elements.extend(x)
 
 
 class Page(object):
@@ -332,7 +350,7 @@ class Page(object):
         'Locate paragraph boundaries in each column'
         for region in self.regions:
             region.collect_stats()
-            region.detect_paragraphs()
+            region.linearize()
 
 
 class PDFDocument(object):

From 839b5618cbe69d148e368b80304d6e35dbea1970 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 23 Jan 2010 17:41:11 -0700
Subject: [PATCH 06/16] Improved recipe for Common Dreams

---
 resources/recipes/common_dreams.recipe | 32 +++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/resources/recipes/common_dreams.recipe b/resources/recipes/common_dreams.recipe
index b662cc3ee0..5443b5890b 100644
--- a/resources/recipes/common_dreams.recipe
+++ b/resources/recipes/common_dreams.recipe
@@ -2,17 +2,37 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class CommonDreams(BasicNewsRecipe):
+    # Identify the recipe
+    
     title          = u'Common Dreams'
     description    = u'Progressive news and views'
     __author__     = u'XanthanGum'
     language = 'en'
+    
+    # Format the text
+    
+    extra_css = '''
+                 body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
+                 h1{font-size: xx-large;}
+                 h2{font-size: large;}
+                '''
 
+    # Pick no article older than seven days and limit the number of articles per feed to 100
+    
     oldest_article = 7
     max_articles_per_feed = 100
     
-    feeds          = [
-                       (u'Common Dreams Headlines', 
-                       u'http://www.commondreams.org/feed/headlines_rss'), 
-                       (u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'), 
-                       (u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
-                       ]
+    # Remove everything before the article
+    
+    remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
+    
+    # Remove everything after the article
+    
+    remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
+    
+    # Identify the news feeds
+    
+    feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
+             (u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'), 
+             (u'Views', u'http://www.commondreams.org/feed/views_rss'), 
+             (u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]

From 561b9c9113d05ad7440a7ae5e182def74ec61a49 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 24 Jan 2010 09:30:33 -0500
Subject: [PATCH 07/16] PMLML: add missing argurment to Stylizer object.

---
 src/calibre/ebooks/pml/pmlml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index e3609fcddb..7427a77c2f 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -138,7 +138,7 @@ class PMLMLizer(object):
         text = [u'']
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to PML markup...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
             text.append(self.add_page_anchor(item))
             text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
         return ''.join(text)

From abf95b3511fd29d190787db241ad4605c7729ad3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 09:06:56 -0700
Subject: [PATCH 08/16] Fix #4649 (Sport Illustrated Recipe)

---
 resources/recipes/sportsillustrated.recipe | 75 ++++++++++++++--------
 1 file changed, 47 insertions(+), 28 deletions(-)

diff --git a/resources/recipes/sportsillustrated.recipe b/resources/recipes/sportsillustrated.recipe
index 0dbae1ebc0..dd1df16ac7 100644
--- a/resources/recipes/sportsillustrated.recipe
+++ b/resources/recipes/sportsillustrated.recipe
@@ -1,6 +1,5 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-#from random import randint
 from urllib import quote
 
 class SportsIllustratedRecipe(BasicNewsRecipe) :
@@ -9,12 +8,11 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
     __license__ = 'GPL v3'
     language = 'en'
     description = 'Sports Illustrated'
-    version = 1
+    version = 3
     title          = u'Sports Illustrated'
 
     no_stylesheets = True
     remove_javascript = True
-    #template_css = ''
     use_embedded_content   = False
 
     INDEX = 'http://sportsillustrated.cnn.com/'
@@ -22,13 +20,39 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
     def parse_index(self):
         answer = []
         soup = self.index_to_soup(self.INDEX)
-        # Find the link to the current issue on the front page.
+        # Find the link to the current issue on the front page. SI Cover
         cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
         if cover:
             currentIssue = cover.parent['href']
             if currentIssue:
                 # Open the index of current issue
+
                 index = self.index_to_soup(currentIssue)
+                self.log('\tLooking for current issue in: ' + currentIssue)
+                # Now let us see if they updated their frontpage
+                nav = index.find('div', attrs = {'class': 'siv_trav_top'})
+                if nav:
+                    img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
+                    if img:
+                        parent = img.parent
+                        if parent.name == 'a':
+                            # They didn't update their frontpage; Load the next issue from here
+                            href = self.INDEX + parent['href']
+                            index = self.index_to_soup(href)
+                            self.log('\tLooking for current issue in: ' + href)
+
+                if index.find('div', 'siv_noArticleMessage'):
+                    nav = index.find('div', attrs = {'class': 'siv_trav_top'})
+                    if nav:
+                    # Their frontpage points to an issue without any articles; Use the previous issue
+                        img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
+                        if img:
+                            parent = img.parent
+                            if parent.name == 'a':
+                                href = self.INDEX + parent['href']
+                                index = self.index_to_soup(href)
+                                self.log('\tLooking for current issue in: ' + href)
+
 
                 # Find all articles.
                 list = index.find('div', attrs = {'class' : 'siv_artList'})
@@ -69,31 +93,26 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
 
     def preprocess_html(self, soup):
         header = soup.find('div', attrs = {'class' : 'siv_artheader'})
-        if header:
-            # It's an article, prepare a container for the content
-            homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
-            body = homeMadeSoup.find('body')
+        homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
+        body = homeMadeSoup.body
 
-            # Find the date, title and byline
-            temp = header.find('td', attrs = {'class' : 'title'})
-            if temp :
-                date = temp.find('div', attrs = {'class' : 'date'})
-                if date:
-                    body.append(date)
-                if temp.h1:
-                    body.append(temp.h1)
-                if temp.h2 :
-                    body.append(temp.h2)
-                byline = temp.find('div', attrs = {'class' : 'byline'})
-                if byline:
-                    body.append(byline)
+        # Find the date, title and byline
+        temp = header.find('td', attrs = {'class' : 'title'})
+        if temp :
+            date = temp.find('div', attrs = {'class' : 'date'})
+            if date:
+                body.append(date)
+            if temp.h1:
+                body.append(temp.h1)
+            if temp.h2 :
+                body.append(temp.h2)
+            byline = temp.find('div', attrs = {'class' : 'byline'})
+            if byline:
+                body.append(byline)
 
-            # Find the content
-            for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) :
-                body.append(para)
+        # Find the content
+        for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) :
+            body.append(para)
 
-            return homeMadeSoup
-        else :
-            # It's a TOC, just return the whole lot
-            return soup
+        return homeMadeSoup
 

From 3a6fb236460ed0063c43501f5f5b0085e1217fbd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 09:48:03 -0700
Subject: [PATCH 09/16] Update catalog timestamp/pubdate when re-generating

---
 src/calibre/library/catalog.py   | 46 ++++++++++++++++----------------
 src/calibre/library/database2.py |  7 ++++-
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index bc6826baf4..7a1272f59d 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1,4 +1,4 @@
-import pickle, os, re, shutil, htmlentitydefs
+import os, re, shutil, htmlentitydefs
 
 from xml.sax.saxutils import escape
 
@@ -412,11 +412,11 @@ class EPUB_MOBI(CatalogPlugin):
                     # Convert the upper 3 numbers - thousandsNumber
                     if thousandsNumber:
                         if number > 1099 and number < 2000:
-                            resultString = '%s %s' % (self.lessThanTwenty[number/100], 
+                            resultString = '%s %s' % (self.lessThanTwenty[number/100],
                                                      self.stringFromInt(number % 100))
                             self.text = resultString.strip().capitalize()
                             return
-                        else:  
+                        else:
                             thousandsString = self.stringFromInt(thousandsNumber)
 
                     # Concatenate the strings
@@ -509,10 +509,10 @@ class EPUB_MOBI(CatalogPlugin):
             self.__thumbs = None
             self.__title = opts.catalog_title
             self.__verbose = opts.verbose
-            
+
             if self.verbose:
                 print "CatalogBuilder(): Generating %s for %s" % (self.opts.fmt, self.opts.output_profile)
-            
+
         # Accessors
         '''
         @dynamic_property
@@ -522,7 +522,7 @@ class EPUB_MOBI(CatalogPlugin):
             def fset(self, val):
                 self.__ = val
             return property(fget=fget, fset=fset)
-        '''        
+        '''
 
         @dynamic_property
         def authors(self):
@@ -538,7 +538,7 @@ class EPUB_MOBI(CatalogPlugin):
             def fset(self, val):
                 self.__basename = val
             return property(fget=fget, fset=fset)
-        @dynamic_property        
+        @dynamic_property
         def booksByAuthor(self):
             def fget(self):
                 return self.__booksByAuthor
@@ -878,7 +878,7 @@ class EPUB_MOBI(CatalogPlugin):
             if self.verbose:
                 print "fetchBooksByTitle(): %d books" % len(self.booksByTitle)
                 for title in self.booksByTitle:
-                    print (u" %-50s %-25s" % (title['title'][0:45], title['title_sort'][0:20])).encode('utf-8')                    
+                    print (u" %-50s %-25s" % (title['title'][0:45], title['title_sort'][0:20])).encode('utf-8')
                 print
 
         def fetchBooksByAuthor(self):
@@ -927,10 +927,10 @@ class EPUB_MOBI(CatalogPlugin):
             if self.verbose:
                 print "\nfetchBooksByauthor(): %d unique authors" % len(unique_authors)
                 for author in unique_authors:
-                    print (u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],  
-                       author[2])).encode('utf-8')                    
+                    print (u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
+                       author[2])).encode('utf-8')
                 print
-                    
+
             self.authors = unique_authors
 
         def generateHTMLDescriptions(self):
@@ -1676,7 +1676,7 @@ class EPUB_MOBI(CatalogPlugin):
                     cmTag['name'] = "author"
                     cmTag.insert(0, NavigableString(self.formatNCXText(book['author'])))
                     navPointVolumeTag.insert(2, cmTag)
-    
+
                     # Add the description tag
                     if book['short_description']:
                         cmTag = Tag(ncx_soup, '%s' % 'calibre:meta')
@@ -1816,7 +1816,7 @@ class EPUB_MOBI(CatalogPlugin):
             nptc += 1
 
             # Create an NCX article entry for each populated author index letter
-            # Loop over the sorted_authors list, find start of each letter, 
+            # Loop over the sorted_authors list, find start of each letter,
             # add description_preview_count artists
             # self.authors[0]:friendly [1]:author_sort [2]:book_count
             master_author_list = []
@@ -1868,7 +1868,7 @@ class EPUB_MOBI(CatalogPlugin):
                 contentTag['src'] = "%s#%sauthors" % (HTML_file, authors_by_letter[1])
 
                 navPointByLetterTag.insert(1,contentTag)
-                
+
                 if self.generateForKindle:
                     cmTag = Tag(soup, '%s' % 'calibre:meta')
                     cmTag['name'] = "description"
@@ -1903,7 +1903,7 @@ class EPUB_MOBI(CatalogPlugin):
             file_ID = file_ID.replace(" ","")
             navPointTag['id'] = "%s-ID" % file_ID
             navPointTag['playOrder'] = self.playOrder
-            #print "generateNCXByTags(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)                        
+            #print "generateNCXByTags(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
             self.playOrder += 1
             navLabelTag = Tag(ncx_soup, 'navLabel')
             textTag = Tag(ncx_soup, 'text')
@@ -1943,19 +1943,19 @@ class EPUB_MOBI(CatalogPlugin):
                     cmTag = Tag(ncx_soup, '%s' % 'calibre:meta')
                     cmTag['name'] = "author"
                     # First - Last author
-    
+
                     if len(genre['titles_spanned']) > 1 :
                         author_range = "%s - %s" % (genre['titles_spanned'][0][0], genre['titles_spanned'][1][0])
                     else :
                         author_range = "%s" % (genre['titles_spanned'][0][0])
-    
+
                     cmTag.insert(0, NavigableString(author_range))
                     navPointVolumeTag.insert(2, cmTag)
-    
+
                     # Build the description tag
                     cmTag = Tag(ncx_soup, '%s' % 'calibre:meta')
                     cmTag['name'] = "description"
-    
+
                     if False:
                         # Form 1: Titles spanned
                         if len(genre['titles_spanned']) > 1:
@@ -1971,7 +1971,7 @@ class EPUB_MOBI(CatalogPlugin):
                         titles = sorted(titles, key=lambda x:(self.generateSortTitle(x),self.generateSortTitle(x)))
                         titles_list = self.generateShortDescription(" &bull; ".join(titles))
                         cmTag.insert(0, NavigableString(self.formatNCXText(titles_list)))
-    
+
                     navPointVolumeTag.insert(3, cmTag)
 
                 # Add this volume to the section tag
@@ -2065,11 +2065,11 @@ class EPUB_MOBI(CatalogPlugin):
             markerTags = []
             markerTags.extend(self.opts.exclude_tags.split(','))
             markerTags.extend(self.opts.note_tag.split(','))
-            markerTags.extend(self.opts.read_tag.split(','))            
+            markerTags.extend(self.opts.read_tag.split(','))
             return markerTags
 
         def filterDbTags(self, tags):
-            # Remove the special marker tags from the database's tag list, 
+            # Remove the special marker tags from the database's tag list,
             # return sorted list of tags representing valid genres
 
             filtered_tags = []
@@ -2082,7 +2082,7 @@ class EPUB_MOBI(CatalogPlugin):
                 if re.search(self.opts.exclude_genre, tag):
                     #print "skipping %s" % tag
                     continue
-                
+
                 filtered_tags.append(tag)
 
             filtered_tags.sort()
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 7b0f7a083e..1fdb4ef9a9 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1415,9 +1415,10 @@ class LibraryDatabase2(LibraryDatabase):
         if matches:
             tag_matches = self.data.get_matches('tags', _('Catalog'))
             matches = matches.intersection(tag_matches)
-        db_id = None
+        db_id, existing = None, False
         if matches:
             db_id = list(matches)[0]
+            existing = True
         if db_id is None:
             obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
                                 (title, 'calibre'))
@@ -1433,6 +1434,10 @@ class LibraryDatabase2(LibraryDatabase):
         if not hasattr(path, 'read'):
             stream.close()
         self.conn.commit()
+        if existing:
+            t = datetime.utcnow()
+            self.set_timestamp(db_id, t, notify=False)
+            self.set_pubdate(db_id, t, notify=False)
         self.data.refresh_ids(self, [db_id]) # Needed to update format list and size
         return db_id
 

From 6b59d8aa966b5831df0673689af574b8a69ce924 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 11:25:05 -0700
Subject: [PATCH 10/16] Content server: Mobile page breaks if library contains
 empty books. Now fixed.

---
 src/calibre/library/catalog.py | 1 -
 src/calibre/library/server.py  | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 7a1272f59d..ea0a9005d2 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1410,7 +1410,6 @@ class EPUB_MOBI(CatalogPlugin):
                     else:
                         #if verbose: print "generating new thumbnail for %s" % title['title']
                         self.generateThumbnail(title, image_dir, thumb_file)
-
                 else:
                     # Use default cover
                     if self.verbose: print "no cover available for %s, will use default" % \
diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py
index 00eea78589..a1c8aec0bd 100644
--- a/src/calibre/library/server.py
+++ b/src/calibre/library/server.py
@@ -714,6 +714,10 @@ class LibraryServer(object):
 
         book, books = MarkupTemplate(self.MOBILE_BOOK), []
         for record in items[(start-1):(start-1)+num]:
+            if record[13] is None:
+                record[13] = ''
+            if record[6] is None:
+                record[6] = 0
             aus = record[2] if record[2] else __builtin__._('Unknown')
             authors = '|'.join([i.replace('|', ',') for i in aus.split(',')])
             record[10] = fmt_sidx(float(record[10]))

From 0b4de383306480d753be5be3fd01bca090c1dbab Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 11:34:42 -0700
Subject: [PATCH 11/16] Speed up generation of thumbnails for catalog

---
 src/calibre/library/catalog.py | 49 +++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index ea0a9005d2..f6d8017156 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -751,7 +751,9 @@ class EPUB_MOBI(CatalogPlugin):
             self.generateHTMLByTags()
 
             if getattr(self.reporter, 'cancel_requested', False): return 1
-            self.generateThumbnails()
+            from calibre.utils.PythonMagickWand import ImageMagick
+            with ImageMagick():
+                self.generateThumbnails()
 
             if getattr(self.reporter, 'cancel_requested', False): return 1
             self.generateOPF()
@@ -2343,29 +2345,28 @@ class EPUB_MOBI(CatalogPlugin):
 
         def generateThumbnail(self, title, image_dir, thumb_file):
             import calibre.utils.PythonMagickWand as pw
-            with pw.ImageMagick():
-                try:
-                    img = pw.NewMagickWand()
-                    if img < 0:
-                        raise RuntimeError('generate_thumbnail(): Cannot create wand')
-                    # Read the cover
-                    if not pw.MagickReadImage(img,
-                            title['cover'].encode(filesystem_encoding)):
-                        print 'Failed to read cover image from: %s' % title['cover']
-                        raise IOError
-                    thumb = pw.CloneMagickWand(img)
-                    if thumb < 0:
-                        print 'generate_thumbnail(): Cannot clone cover'
-                        raise RuntimeError
-                    # img, width, height
-                    pw.MagickThumbnailImage(thumb, 75, 100)
-                    pw.MagickWriteImage(thumb, os.path.join(image_dir, thumb_file))
-                    pw.DestroyMagickWand(thumb)
-                    pw.DestroyMagickWand(img)
-                except IOError:
-                    print "generate_thumbnail() IOError with %s" % title['title']
-                except RuntimeError:
-                    print "generate_thumbnail() RuntimeError with %s" % title['title']
+            try:
+                img = pw.NewMagickWand()
+                if img < 0:
+                    raise RuntimeError('generate_thumbnail(): Cannot create wand')
+                # Read the cover
+                if not pw.MagickReadImage(img,
+                        title['cover'].encode(filesystem_encoding)):
+                    print 'Failed to read cover image from: %s' % title['cover']
+                    raise IOError
+                thumb = pw.CloneMagickWand(img)
+                if thumb < 0:
+                    print 'generate_thumbnail(): Cannot clone cover'
+                    raise RuntimeError
+                # img, width, height
+                pw.MagickThumbnailImage(thumb, 75, 100)
+                pw.MagickWriteImage(thumb, os.path.join(image_dir, thumb_file))
+                pw.DestroyMagickWand(thumb)
+                pw.DestroyMagickWand(img)
+            except IOError:
+                print "generate_thumbnail() IOError with %s" % title['title']
+            except RuntimeError:
+                print "generate_thumbnail() RuntimeError with %s" % title['title']
 
         def processSpecialTags(self, tags, this_title, opts):
             tag_list = []

From cb753ba603bcea1f7bbd26c62b865a2996b01b16 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 11:36:10 -0700
Subject: [PATCH 12/16] Fix #4664 (conversion of a comic collection fails if
 the comics.txt contains a blank line)

---
 src/calibre/ebooks/comic/input.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py
index 85590a7bae..122f61e45a 100755
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@@ -340,6 +340,9 @@ class ComicInput(InputFormatPlugin):
                         %stream.name)
             for line in open('comics.txt',
                     'rb').read().decode('utf-8').splitlines():
+                line = line.strip()
+                if not line:
+                    continue
                 fname, title = line.partition(':')[0], line.partition(':')[-1]
                 fname = os.path.join(tdir, *fname.split('/'))
                 if not title:

From 988d944e748df7e3a2db7c151be3046d00ea12f9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 11:43:10 -0700
Subject: [PATCH 13/16] Fix #4662 (Metadata->Add format file dialog does not
 include .cbc format as acceptable)

---
 src/calibre/ebooks/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index a9985e6480..d5651568fa 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -24,7 +24,7 @@ class DRMError(ValueError):
 
 BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
                    'html', 'xhtml', 'pdf', 'pdb', 'prc', 'mobi', 'azw', 'doc',
-                   'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip',
+                   'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                    'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml']
 
 class HTMLRenderer(object):

From b37e606d86d46f4654a8266b1bd66bbd10c4c812 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 11:46:59 -0700
Subject: [PATCH 14/16] ...

---
 src/calibre/ebooks/pdf/reflow.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 1a0e5e0dcb..3fef8c30ce 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -40,6 +40,8 @@ class Image(Element):
           map(float, map(img.get, ('top', 'left', 'rwidth', 'rheight', 'iwidth',
               'iheight')))
         self.src = img.get('src')
+        self.bottom = self.top + self.height
+        self.right = self.left + self.width
 
 
 class Text(Element):
@@ -151,7 +153,7 @@ class Column(object):
     def collect_stats(self):
         if len(self.elements) > 1:
             gaps = [self.elements[i+1].top - self.elements[i].bottom for i in
-                    range(len(0, len(self.elements)-1))]
+                    range(0, len(self.elements)-1)]
             self.average_line_separation = sum(gaps)/len(gaps)
         for i, elem in enumerate(self.elements):
             left_margin = elem.left - self.left
@@ -203,7 +205,7 @@ class Region(object):
 
     @property
     def is_empty(self):
-        return len(self.elements) == 0
+        return len(self.columns) == 0
 
     def collect_stats(self):
         for column in self.columns:
@@ -311,10 +313,10 @@ class Page(object):
             columns = self.sort_into_columns(x, elems)
             processed.update(elems)
             if not current_region.contains(columns):
-                self.regions.append(self.current_region)
+                self.regions.append(current_region)
                 current_region = Region()
             current_region.add(columns)
-        if not self.current_region.is_empty():
+        if not current_region.is_empty:
             self.regions.append(current_region)
 
     def sort_into_columns(self, elem, neighbors):

From 8b4cb3417b78523cd70848a84f4370ff73f3a2f6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 12:07:33 -0700
Subject: [PATCH 15/16] ...

---
 src/calibre/devices/blackberry/driver.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/calibre/devices/blackberry/driver.py b/src/calibre/devices/blackberry/driver.py
index ec8a7e8f49..6a9bb84431 100644
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@@ -9,23 +9,22 @@ from calibre.devices.usbms.driver import USBMS
 class BLACKBERRY(USBMS):
 
     name           = 'Blackberry Device Interface'
+    gui_name       = 'Blackberry'
     description    = _('Communicate with the Blackberry smart phone.')
     author         = _('Kovid Goyal')
-    supported_platforms = ['windows', 'linux']
+    supported_platforms = ['windows', 'linux', 'osx']
 
     # Ordered list of supported formats
     FORMATS     = ['mobi', 'prc']
 
     VENDOR_ID   = [0x0fca]
     PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0201]
+    BCD         = [0x0200, 0x0107, 0x0210]
 
     VENDOR_NAME = 'RIM'
     WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
 
-    #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
-
     MAIN_MEMORY_VOLUME_LABEL  = 'Blackberry SD Card'
 
-    EBOOK_DIR_MAIN = 'ebooks'
+    EBOOK_DIR_MAIN = 'eBooks'
     SUPPORTS_SUB_DIRS = True

From 1acc7f0f6adde1423b67bc35a721e8bbcc461f4d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 13:09:36 -0700
Subject: [PATCH 16/16] ...

---
 src/calibre/utils/ipc/job.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py
index 79db972008..458d5adb8a 100644
--- a/src/calibre/utils/ipc/job.py
+++ b/src/calibre/utils/ipc/job.py
@@ -52,8 +52,10 @@ class BaseJob(object):
             else:
                 self._status_text = _('Error') if self.failed else _('Finished')
             if DEBUG:
-                prints('Job:', self.id, self.description, 'finished')
-                prints('\t'.join(self.details.splitlines(True)))
+                prints('Job:', self.id, self.description, 'finished',
+                        safe_encode=True)
+                prints('\t'.join(self.details.splitlines(True)),
+                        safe_encode=True)
             if not self._done_called:
                 self._done_called = True
                 try: