GwR cleanup, rendering default cover against white bg

2025-07-09 03:04:10 -04:00 · 2010-01-24 16:07:01 -07:00 · 2010-01-24 16:07:01 -07:00 · a15b9745e9
commit a15b9745e9
parent d4dd72d683 1acc7f0f6a
19 changed files with 422 additions and 280 deletions
--- a/resources/catalog/mastheadImage.gif
+++ b/resources/catalog/mastheadImage.gif
--- a/resources/images/news/neowin.png
+++ b/resources/images/news/neowin.png
--- a/resources/recipes/common_dreams.recipe
+++ b/resources/recipes/common_dreams.recipe
@ -2,17 +2,37 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class CommonDreams(BasicNewsRecipe):
+    # Identify the recipe
+    
    title          = u'Common Dreams'
    description    = u'Progressive news and views'
    __author__     = u'XanthanGum'
    language = 'en'
    
+    # Format the text
+    
+    extra_css = '''
+                 body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
+                 h1{font-size: xx-large;}
+                 h2{font-size: large;}
+                '''
+
+    # Pick no article older than seven days and limit the number of articles per feed to 100
+    
    oldest_article = 7
    max_articles_per_feed = 100
    
-    feeds          = [
-                       (u'Common Dreams Headlines', 
-                       u'http://www.commondreams.org/feed/headlines_rss'), 
-                       (u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'), 
-                       (u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
-                       ]
+    # Remove everything before the article
+    
+    remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
+    
+    # Remove everything after the article
+    
+    remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
+    
+    # Identify the news feeds
+    
+    feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
+             (u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'), 
+             (u'Views', u'http://www.commondreams.org/feed/views_rss'), 
+             (u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]
--- a/resources/recipes/neowin.recipe
+++ b/resources/recipes/neowin.recipe
@ -0,0 +1,40 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Neowin(BasicNewsRecipe):
+    title                 = u'Neowin.net'
+    oldest_article        = 5
+    language              = 'en'
+    description           = 'News from IT'
+    publisher             = 'Neowin'
+    category              = 'news, IT, Microsoft, Apple, hardware, software, games'
+    __author__            = 'Darko Miletic'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf8'
+
+    conversion_options = {
+                             'tags'      : category
+                            ,'language'  : language
+                            ,'comments'  : description
+                            ,'publisher' : publisher
+                         }
+
+    keep_only_tags     = [dict(name='div', attrs={'id':'article'})]
+    remove_tags_after = dict(name='div', attrs={'id':'tag-bar'})
+
+    remove_tags        = [
+                            dict(name=['base','object','link','iframe'])
+                           ,dict(name='div', attrs={'id':'tag-bar'})
+                         ]
+
+    feeds          = [
+                        (u'Software' , u'http://www.neowin.net/news/rss/software' )
+                       ,(u'Gaming'   , u'http://www.neowin.net/news/rss/gaming'   )
+                       ,(u'Microsoft', u'http://www.neowin.net/news/rss/microsoft')
+                       ,(u'Apple'    , u'http://www.neowin.net/news/rss/apple'    )
+                       ,(u'Editorial', u'http://www.neowin.net/news/rss/editorial')
+                     ]
+    def image_url_processor(cls, baseurl, url):
+        return url
+
--- a/resources/recipes/sportsillustrated.recipe
+++ b/resources/recipes/sportsillustrated.recipe
@ -1,6 +1,5 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-#from random import randint
 from urllib import quote

 class SportsIllustratedRecipe(BasicNewsRecipe) :
@ -9,12 +8,11 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
    __license__ = 'GPL v3'
    language = 'en'
    description = 'Sports Illustrated'
-    version = 1
+    version = 3
    title          = u'Sports Illustrated'

    no_stylesheets = True
    remove_javascript = True
-    #template_css = ''
    use_embedded_content   = False

    INDEX = 'http://sportsillustrated.cnn.com/'
@ -22,13 +20,39 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
    def parse_index(self):
        answer = []
        soup = self.index_to_soup(self.INDEX)
-        # Find the link to the current issue on the front page.
+        # Find the link to the current issue on the front page. SI Cover
        cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
        if cover:
            currentIssue = cover.parent['href']
            if currentIssue:
                # Open the index of current issue
+
                index = self.index_to_soup(currentIssue)
+                self.log('\tLooking for current issue in: ' + currentIssue)
+                # Now let us see if they updated their frontpage
+                nav = index.find('div', attrs = {'class': 'siv_trav_top'})
+                if nav:
+                    img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
+                    if img:
+                        parent = img.parent
+                        if parent.name == 'a':
+                            # They didn't update their frontpage; Load the next issue from here
+                            href = self.INDEX + parent['href']
+                            index = self.index_to_soup(href)
+                            self.log('\tLooking for current issue in: ' + href)
+
+                if index.find('div', 'siv_noArticleMessage'):
+                    nav = index.find('div', attrs = {'class': 'siv_trav_top'})
+                    if nav:
+                    # Their frontpage points to an issue without any articles; Use the previous issue
+                        img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
+                        if img:
+                            parent = img.parent
+                            if parent.name == 'a':
+                                href = self.INDEX + parent['href']
+                                index = self.index_to_soup(href)
+                                self.log('\tLooking for current issue in: ' + href)
+

                # Find all articles.
                list = index.find('div', attrs = {'class' : 'siv_artList'})
@ -69,10 +93,8 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :

    def preprocess_html(self, soup):
        header = soup.find('div', attrs = {'class' : 'siv_artheader'})
-        if header:
-            # It's an article, prepare a container for the content
        homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
-            body = homeMadeSoup.find('body')
+        body = homeMadeSoup.body

        # Find the date, title and byline
        temp = header.find('td', attrs = {'class' : 'title'})
@ -93,7 +115,4 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
            body.append(para)

        return homeMadeSoup
-        else :
-            # It's a TOC, just return the whole lot
-            return soup

--- a/resources/recipes/wired.recipe
+++ b/resources/recipes/wired.recipe
@ -1,44 +1,105 @@
-#!/usr/bin/env  python
+
 __license__   = 'GPL v3'
-__docformat__ = 'restructuredtext en'
-
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.wired.com
+'''

+import re
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

 class Wired(BasicNewsRecipe):
-
-    title = 'Wired.com'
-    __author__ = 'Kovid Goyal'
-    description = 'Technology news'
-    timefmt  = ' [%Y%b%d  %H%M]'
-    language = 'en'
-
+    title                 = 'Wired Magazine'
+    __author__            = 'Darko Miletic'
+    description           = 'Gaming news'
+    publisher             = 'Conde Nast Digital'
+    category              = 'news, games, IT, gadgets'
+    oldest_article        = 32
+    max_articles_per_feed = 100
    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'en'
+    extra_css             = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
+    index                 = 'http://www.wired.com/magazine/'

-    remove_tags_before = dict(name='div', id='content')
-    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
-        'footer', 'advertisement', 'blog_subscription_unit',
-        'brightcove_component']),
-        {'class':'entryActions'},
-        dict(name=['noscript', 'script'])]
+    preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')]
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }

-    feeds = [
-        ('Top News', 'http://feeds.wired.com/wired/index'),
-        ('Culture', 'http://feeds.wired.com/wired/culture'),
-        ('Software', 'http://feeds.wired.com/wired/software'),
-        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
-        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
-        ('Cars', 'http://feeds.wired.com/wired/cars'),
-        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
-        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
-        ('Science', 'http://feeds.wired.com/wired/science'),
-        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
-        ('Politics', 'http://feeds.wired.com/wired/politics'),
-        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
-        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
+    keep_only_tags = [dict(name='div', attrs={'class':'post'})]
+    remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
+    remove_tags = [
+                     dict(name=['object','embed','iframe','link'])
+                    ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
                  ]

+
+    #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
+
+    def parse_index(self):
+        totalfeeds = []
+
+        soup = self.index_to_soup(self.index)
+        features = soup.find('div',attrs={'id':'my-glider'})
+        if features:
+           farticles = []
+           for item in features.findAll('div',attrs={'class':'section'}):
+               divurl = item.find('div',attrs={'class':'feature-header'})
+               divdesc = item.find('div',attrs={'class':'feature-text'})
+               url   = 'http://www.wired.com' + divurl.a['href']
+               title = self.tag_to_string(divurl.a)
+               description = self.tag_to_string(divdesc)
+               date  = strftime(self.timefmt)
+               farticles.append({
+                                  'title'      :title
+                                 ,'date'       :date
+                                 ,'url'        :url
+                                 ,'description':description
+                                })
+           totalfeeds.append(('Featured Articles', farticles))
+        #department feeds
+        departments = ['rants','start','test','play','found']
+        dept = soup.find('div',attrs={'id':'magazine-departments'})
+        if dept:
+            for ditem in departments:
+                darticles = []
+                department = dept.find('div',attrs={'id':'department-'+ditem})
+                if department:
+                    for item in department.findAll('div'):
+                        description = ''
+                        feed_link = item.find('a')
+                        if feed_link and feed_link.has_key('href'):
+                            url   = feed_link['href']
+                            title = self.tag_to_string(feed_link)
+                            date  = strftime(self.timefmt)
+                            darticles.append({
+                                              'title'      :title
+                                             ,'date'       :date
+                                             ,'url'        :url
+                                             ,'description':description
+                                            })
+                    totalfeeds.append((ditem.capitalize(), darticles))
+        return totalfeeds
+
+    def get_cover_url(self):
+        cover_url = None
+        soup = self.index_to_soup(self.index)
+        cover_item = soup.find('div',attrs={'class':'spread-image'})
+        if cover_item:
+           cover_url = 'http://www.wired.com' + cover_item.a.img['src']
+        return cover_url
+
    def print_version(self, url):
-        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
+        return url.rstrip('/') + '/all/1'

+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup

--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -9,23 +9,22 @@ from calibre.devices.usbms.driver import USBMS
 class BLACKBERRY(USBMS):

    name           = 'Blackberry Device Interface'
+    gui_name       = 'Blackberry'
    description    = _('Communicate with the Blackberry smart phone.')
    author         = _('Kovid Goyal')
-    supported_platforms = ['windows', 'linux']
+    supported_platforms = ['windows', 'linux', 'osx']

    # Ordered list of supported formats
    FORMATS     = ['mobi', 'prc']

    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0201]
+    BCD         = [0x0200, 0x0107, 0x0210]

    VENDOR_NAME = 'RIM'
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'

-    #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
-
    MAIN_MEMORY_VOLUME_LABEL  = 'Blackberry SD Card'

-    EBOOK_DIR_MAIN = 'ebooks'
+    EBOOK_DIR_MAIN = 'eBooks'
    SUPPORTS_SUB_DIRS = True
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -24,7 +24,7 @@ class DRMError(ValueError):

 BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
                   'html', 'xhtml', 'pdf', 'pdb', 'prc', 'mobi', 'azw', 'doc',
-                   'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip',
+                   'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml']

 class HTMLRenderer(object):
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@ -340,6 +340,9 @@ class ComicInput(InputFormatPlugin):
                        %stream.name)
            for line in open('comics.txt',
                    'rb').read().decode('utf-8').splitlines():
+                line = line.strip()
+                if not line:
+                    continue
                fname, title = line.partition(':')[0], line.partition(':')[-1]
                fname = os.path.join(tdir, *fname.split('/'))
                if not title:
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -268,7 +268,8 @@ class EPUBOutput(OutputFormatPlugin):
                # remove <img> tags with empty src elements
                bad = []
                for x in XPath('//h:img')(body):
-                    if not x.get('src', '').strip():
+                    src = x.get('src', '').strip()
+                    if src in ('', '#'):
                        bad.append(x)
                for img in bad:
                    img.getparent().remove(img)
--- a/src/calibre/ebooks/pdb/ereader/reader.py
+++ b/src/calibre/ebooks/pdb/ereader/reader.py
@ -20,7 +20,7 @@ class Reader(FormatReader):

        if record0_size == 132:
            self.reader = Reader132(header, stream, log, options)
-        elif record0_size == 202:
+        elif record0_size in (116, 202):
            self.reader = Reader202(header, stream, log, options)
        else:
            raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
--- a/src/calibre/ebooks/pdb/ereader/reader202.py
+++ b/src/calibre/ebooks/pdb/ereader/reader202.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 '''
-Read content from ereader pdb file with a 202 byte header created by Makebook.
+Read content from ereader pdb file with a 116 and 202 byte header created by Makebook.
 '''
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -44,7 +44,7 @@ class Reader202(FormatReader):

        self.header_record = HeaderRecord(self.section_data(0))

-        if self.header_record.version != 4:
+        if self.header_record.version not in (2, 4):
            raise EreaderError('Unknown book version %i.' % self.header_record.version)

        from calibre.ebooks.metadata.pdb import get_metadata
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -18,38 +18,11 @@ class Font(object):
        self.color = spec.get('color')
        self.family = spec.get('family')

-class Column(object):
-
-    # A column contains an element is the element bulges out to
-    # the left or the right by at most HFUZZ*col width.
-    HFUZZ = 0.2
+class Element(object):

    def __init__(self):
-        self.left = self.right = self.top = self.bottom = 0
-        self.width = self.height = 0
-        self.elements = []
-
-    def add(self, elem):
-        if elem in self.elements: return
-        self.elements.append(elem)
-        self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
-        self.top = self.elements[0].top
-        self.bottom = self.elements[-1].bottom
-        self.left, self.right = sys.maxint, 0
-        for x in self:
-            self.left = min(self.left, x.left)
-            self.right = max(self.right, x.right)
-        self.width, self.height = self.right-self.left, self.bottom-self.top
-
-    def __iter__(self):
-        for x in self.elements:
-            yield x
-
-    def contains(self, elem):
-        return elem.left > self.left - self.HFUZZ*self.width and \
-               elem.right < self.right + self.HFUZZ*self.width
-
-class Element(object):
+        self.starts_block = None
+        self.block_style = None

    def __eq__(self, other):
        return self.id == other.id
@ -60,17 +33,21 @@ class Element(object):
 class Image(Element):

    def __init__(self, img, opts, log, idc):
+        Element.__init__(self)
        self.opts, self.log = opts, log
        self.id = idc.next()
        self.top, self.left, self.width, self.height, self.iwidth, self.iheight = \
          map(float, map(img.get, ('top', 'left', 'rwidth', 'rheight', 'iwidth',
              'iheight')))
        self.src = img.get('src')
+        self.bottom = self.top + self.height
+        self.right = self.left + self.width


 class Text(Element):

    def __init__(self, text, font_map, opts, log, idc):
+        Element.__init__(self)
        self.id = idc.next()
        self.opts, self.log = opts, log
        self.font_map = font_map
@ -140,6 +117,61 @@ class Interval(object):
    def __hash__(self):
        return hash('(%f,%f)'%self.left, self.right)

+class Column(object):
+
+    # A column contains an element is the element bulges out to
+    # the left or the right by at most HFUZZ*col width.
+    HFUZZ = 0.2
+
+
+    def __init__(self):
+        self.left = self.right = self.top = self.bottom = 0
+        self.width = self.height = 0
+        self.elements = []
+        self.average_line_separation = 0
+
+    def add(self, elem):
+        if elem in self.elements: return
+        self.elements.append(elem)
+        self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
+        self.top = self.elements[0].top
+        self.bottom = self.elements[-1].bottom
+        self.left, self.right = sys.maxint, 0
+        for x in self:
+            self.left = min(self.left, x.left)
+            self.right = max(self.right, x.right)
+        self.width, self.height = self.right-self.left, self.bottom-self.top
+
+    def __iter__(self):
+        for x in self.elements:
+            yield x
+
+    def contains(self, elem):
+        return elem.left > self.left - self.HFUZZ*self.width and \
+               elem.right < self.right + self.HFUZZ*self.width
+
+    def collect_stats(self):
+        if len(self.elements) > 1:
+            gaps = [self.elements[i+1].top - self.elements[i].bottom for i in
+                    range(0, len(self.elements)-1)]
+            self.average_line_separation = sum(gaps)/len(gaps)
+        for i, elem in enumerate(self.elements):
+            left_margin = elem.left - self.left
+            elem.indent_fraction = left_margin/self.width
+            elem.width_fraction = elem.width/self.width
+            if i == 0:
+                elem.top_gap = None
+            else:
+                elem.top_gap = self.elements[i-1].bottom - elem.top
+
+    def previous_element(self, idx):
+        if idx == 0:
+            return None
+        return self.elements[idx-1]
+
+
+
+
 class Region(object):

    def __init__(self):
@ -156,6 +188,7 @@ class Region(object):
                    self.columns[i].add(elem)

    def contains(self, columns):
+        # TODO: handle unbalanced columns
        if not self.columns:
            return True
        if len(columns) != len(self.columns):
@ -172,7 +205,22 @@ class Region(object):

    @property
    def is_empty(self):
-        return len(self.elements) == 0
+        return len(self.columns) == 0
+
+    def collect_stats(self):
+        for column in self.columns:
+            column.collect_stats()
+        self.average_line_separation = sum([x.average_line_separation for x in
+            self.columns])/float(len(self.columns))
+
+    def __iter__(self):
+        for x in self.columns:
+            yield x
+
+    def linearize(self):
+        self.elements = []
+        for x in self.columns:
+            self.elements.extend(x)


 class Page(object):
@ -185,6 +233,8 @@ class Page(object):
    # for them to be considered to be part of the same text fragment
    LINE_FACTOR = 0.4

+    # Multiplies the average line height when determining row height
+    # of a particular element to detect columns.
    YFUZZ = 1.5


@ -263,10 +313,10 @@ class Page(object):
            columns = self.sort_into_columns(x, elems)
            processed.update(elems)
            if not current_region.contains(columns):
-                self.regions.append(self.current_region)
+                self.regions.append(current_region)
                current_region = Region()
            current_region.add(columns)
-        if not self.current_region.is_empty():
+        if not current_region.is_empty:
            self.regions.append(current_region)

    def sort_into_columns(self, elem, neighbors):
@ -287,7 +337,7 @@ class Page(object):

    def find_elements_in_row_of(self, x):
        interval = Interval(x.top,
-                x.top + self.YFUZZ*(1+self.average_text_height))
+                x.top + self.YFUZZ*(self.average_text_height))
        h_interval = Interval(x.left, x.right)
        for y in self.elements[x.idx:x.idx+15]:
            if y is not x:
@ -298,6 +348,12 @@ class Page(object):
                    x_interval.intersection(h_interval).width <= 0:
                    yield y

+    def second_pass(self):
+        'Locate paragraph boundaries in each column'
+        for region in self.regions:
+            region.collect_stats()
+            region.linearize()
+

 class PDFDocument(object):

@ -327,6 +383,7 @@ class PDFDocument(object):
        for page in self.pages:
            page.document_font_stats = self.font_size_stats
            page.first_pass()
+            page.second_pass()

    def collect_font_statistics(self):
        self.font_size_stats = {}
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -138,7 +138,7 @@ class PMLMLizer(object):
        text = [u'']
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to PML markup...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            text.append(self.add_page_anchor(item))
            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
        return ''.join(text)
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -522,7 +522,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            from calibre.ebooks.metadata import MetaInformation
            mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
            mi.author_sort = 'Schember, John'
-            mi.comments = "A guide to get you up an running with calibre"
+            mi.comments = "A guide to get you up and running with calibre"
            mi.publisher = 'calibre'
            self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
                    [mi])
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1,8 +1,10 @@
-import pickle, os, re, shutil, htmlentitydefs
+import os, re, shutil, htmlentitydefs

 from collections import namedtuple
 from xml.sax.saxutils import escape

+from PyQt4.Qt import *
+
 from calibre import filesystem_encoding
 from calibre.customize import CatalogPlugin
 from calibre.customize.conversion import OptionRecommendation, DummyReporter
@ -493,7 +495,6 @@ class EPUB_MOBI(CatalogPlugin):
                                       self.opts.output_profile.startswith("kindle")) else False
            self.__genres = None
            self.__htmlFileList = []
-            self.__libraryPath = self.fetchLibraryPath()
            self.__markerTags = self.getMarkerTags()
            self.__ncxSoup = None
            self.__playOrder = 1
@ -507,9 +508,10 @@ class EPUB_MOBI(CatalogPlugin):
            self.__title = opts.catalog_title
            self.__verbose = opts.verbose

-            if self.verbose:
-                self.opts.log.info("CatalogBuilder(): Generating %s for %s" % (self.opts.fmt, self.opts.output_profile))
-            
+            self.opts.log.info("CatalogBuilder(): Generating %s %s"% \
+                                (self.opts.fmt, 
+                                 "for %s" % self.opts.output_profile if self.opts.output_profile \
+                                  else ''))
        # Accessors
        '''
        @dynamic_property
@ -748,6 +750,8 @@ class EPUB_MOBI(CatalogPlugin):
            self.generateHTMLByTags()

            if getattr(self.reporter, 'cancel_requested', False): return 1
+            from calibre.utils.PythonMagickWand import ImageMagick
+            with ImageMagick():
                self.generateThumbnails()

            if getattr(self.reporter, 'cancel_requested', False): return 1
@ -794,9 +798,7 @@ class EPUB_MOBI(CatalogPlugin):

        def fetchBooksByTitle(self):

-            result = self.updateProgressFullStep("fetchBooksByTitle()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("fetchBooksByTitle()"))

            # Get the database as a dictionary
            # Sort by title
@ -873,7 +875,7 @@ class EPUB_MOBI(CatalogPlugin):
            # Re-sort based on title_sort
            self.booksByTitle = sorted(titles,
                                 key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper()))
-            if self.verbose:
+            if False and self.verbose:
                self.opts.log.info("fetchBooksByTitle(): %d books" % len(self.booksByTitle))
                for title in self.booksByTitle:
                    self.opts.log.info((u" %-50s %-25s" % (title['title'][0:45], title['title_sort'][0:20])).encode('utf-8'))
@ -881,9 +883,7 @@ class EPUB_MOBI(CatalogPlugin):
        def fetchBooksByAuthor(self):
            # Generate a list of titles sorted by author from the database

-            result = self.updateProgressFullStep("fetchBooksByAuthor()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("fetchBooksByAuthor()"))

            # Sort titles case-insensitive
            self.booksByAuthor = sorted(self.booksByTitle,
@ -927,19 +927,16 @@ class EPUB_MOBI(CatalogPlugin):
                unique_authors.append((current_author[0], current_author[1].title(),
                                       books_by_current_author))

-            if self.verbose:
+            if False and self.verbose:
                self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
                for author in unique_authors:
                    self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],  
                       author[2])).encode('utf-8'))               
-                    
            self.authors = unique_authors

        def generateHTMLDescriptions(self):
            # Write each title to a separate HTML file in contentdir
-            result = self.updateProgressFullStep("generateHTMLDescriptions()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateHTMLDescriptions()"))

            for (title_num, title) in enumerate(self.booksByTitle):
                if False:
@ -1066,9 +1063,7 @@ class EPUB_MOBI(CatalogPlugin):
        def generateHTMLByTitle(self):
            # Write books by title A-Z to HTML file

-            result = self.updateProgressFullStep("generateHTMLByTitle()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateHTMLByTitle()"))

            soup = self.generateHTMLEmptyHeader("Books By Alpha Title")
            body = soup.find('body')
@ -1170,9 +1165,8 @@ class EPUB_MOBI(CatalogPlugin):

        def generateHTMLByAuthor(self):
            # Write books by author A-Z
-            result = self.updateProgressFullStep("generateHTMLByAuthor()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateHTMLByAuthor()"))
+
            friendly_name = "By Author"

            soup = self.generateHTMLEmptyHeader(friendly_name)
@ -1303,9 +1297,7 @@ class EPUB_MOBI(CatalogPlugin):
            # Generate individual HTML files for each tag, e.g. Fiction, Nonfiction ...
            # Note that special tags - ~+*[] -  have already been filtered from books[]

-            result = self.updateProgressFullStep("generateHTMLByTags()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateHTMLByTags()"))

            # Filter out REMOVE_TAGS, sort
            filtered_tags = self.filterDbTags(self.db.all_tags())
@ -1313,13 +1305,11 @@ class EPUB_MOBI(CatalogPlugin):
            # Extract books matching filtered_tags
            genre_list = []
            for tag in filtered_tags:
-                if False : print "searching for %s" % tag
                tag_list = {}
                tag_list['tag'] = tag
                tag_list['books'] = []
                for book in self.booksByAuthor:
                    if 'tags' in book and tag in book['tags']:
-                        if False: print "\t %s" % (book['title'])
                        this_book = {}
                        this_book['author'] = book['author']
                        this_book['title'] = book['title']
@ -1336,12 +1326,10 @@ class EPUB_MOBI(CatalogPlugin):
            # genre_list = [ [tag_list], [tag_list] ...]
            master_genre_list = []
            for (index, genre) in enumerate(genre_list):
-                if False : print "genre: %s" % genre['tag']

                # Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation
                authors = []
                for book in genre['books']:
-                    #print "\t %s - %s" % (book['title'], book['author'])
                    authors.append((book['author'],book['author_sort']))

                # authors[] contains a list of all book authors, with multiple entries for multiple books by author
@ -1384,9 +1372,7 @@ class EPUB_MOBI(CatalogPlugin):
            # Generate a thumbnail per cover.  If a current thumbnail exists, skip
            # If a cover doesn't exist, use default
            # Return list of active thumbs
-            result = self.updateProgressFullStep("generateThumbnails()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateThumbnails()"))

            thumbs = ['thumbnail_default.jpg']

@ -1398,7 +1384,6 @@ class EPUB_MOBI(CatalogPlugin):
                        i/float(len(self.booksByTitle)))
                # Check to see if source file exists
                if 'cover' in title and os.path.isfile(title['cover']):
-                    # print "cover found for %s" % title['title']
                    # Add the thumb spec to thumbs[]
                    thumbs.append("thumbnail_%d.jpg" % int(title['id']))

@ -1415,11 +1400,10 @@ class EPUB_MOBI(CatalogPlugin):
                           self.generateThumbnail(title, image_dir, thumb_file)
                    else:
                        self.generateThumbnail(title, image_dir, thumb_file)
-
                else:
                    # Use default cover
                    if self.verbose: 
-                        self.opts.log.warn(" no cover available for %s, will use default" % \
+                        self.opts.log.warn(" using default cover for '%s'" % \
                        (title['title']))
                    # Check to make sure default is current
                    # Check to see if thumbnail exists
@ -1428,16 +1412,16 @@ class EPUB_MOBI(CatalogPlugin):

                    # Init Qt for image conversion
                    from calibre.gui2 import is_ok_to_use_qt
-                    is_ok_to_use_qt()
-                    from PyQt4.QtGui import QImage
-
-                    # I() fetches path to resource, e.g. I('book.svg') returns:
-                    # /Applications/calibre.app/Contents/Resources/resources/images/book.svg
-                    # Convert .svg to .jpg
-                    default_cover = I('book.svg')
-                    cover_img = QImage()
-                    cover_img.load(default_cover)
-                    cover_img.save(cover, "PNG", -1)
+                    if is_ok_to_use_qt():
+                        # Render default book image against white bg
+                        i = QImage(I('book.svg'))
+                        i2 = QImage(i.size(),QImage.Format_ARGB32_Premultiplied )
+                        i2.fill(QColor(Qt.white).rgb())
+                        p = QPainter()
+                        p.begin(i2)
+                        p.drawImage(0, 0, i)
+                        p.end()
+                        i2.save(cover, "PNG", -1)
    
                        if os.path.isfile(thumb_fp):
                            # Check to see if default cover is newer than thumbnail
@ -1453,18 +1437,18 @@ class EPUB_MOBI(CatalogPlugin):
                                self.generateThumbnail(title, image_dir, "thumbnail_default.jpg")
                        else:
                            if self.verbose: 
-                            self.opts.log.info(" generating new thumbnail_default.jpg")
+                                self.opts.log.info(" generating default cover thumbnail")
                            #title['cover'] = "%s/DefaultCover.jpg" % self.catalogPath
                            title['cover'] = cover
                            self.generateThumbnail(title, image_dir, "thumbnail_default.jpg")
+                    else:
+                        self.opts.log.error("Not OK to use PyQt, can't create default thumbnail")
                        
            self.thumbs = thumbs

        def generateOPF(self):

-            result = self.updateProgressFullStep("generateOPF()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateOPF()"))

            header = '''
                <?xml version="1.0" encoding="UTF-8"?>
@ -1596,9 +1580,7 @@ class EPUB_MOBI(CatalogPlugin):

        def generateNCXHeader(self):

-            result = self.updateProgressFullStep("generateNCXHeader()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateNCXHeader()"))

            header = '''
                <?xml version="1.0" encoding="utf-8"?>
@ -1613,7 +1595,6 @@ class EPUB_MOBI(CatalogPlugin):
            navPointTag['class'] = "periodical"
            navPointTag['id'] = "title"
            navPointTag['playOrder'] = self.playOrder
-            #print "generateNCXHeader(periodical): self.playOrder: %d" % self.playOrder
            self.playOrder += 1
            navLabelTag = Tag(soup, 'navLabel')
            textTag = Tag(soup, 'text')
@ -1635,9 +1616,7 @@ class EPUB_MOBI(CatalogPlugin):

        def generateNCXDescriptions(self, tocTitle):

-            result = self.updateProgressFullStep("generateNCXDescriptions()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateNCXDescriptions()"))

            # --- Construct the 'Books by Title' section ---
            ncx_soup = self.ncxSoup
@ -1649,7 +1628,6 @@ class EPUB_MOBI(CatalogPlugin):
            navPointTag['class'] = "section"
            navPointTag['id'] = "bytitle-ID"
            navPointTag['playOrder'] = self.playOrder
-            #print "generateNCXDescriptions(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
            self.playOrder += 1
            navLabelTag = Tag(ncx_soup, 'navLabel')
            textTag = Tag(ncx_soup, 'text')
@ -1669,7 +1647,6 @@ class EPUB_MOBI(CatalogPlugin):
                navPointVolumeTag['class'] = "article"
                navPointVolumeTag['id'] = "book%dID" % int(book['id'])
                navPointVolumeTag['playOrder'] = self.playOrder
-                #print "generateNCXDescriptions(article): self.playOrder: %d" % self.playOrder
                self.playOrder += 1
                navLabelTag = Tag(ncx_soup, "navLabel")
                textTag = Tag(ncx_soup, "text")
@ -1707,9 +1684,7 @@ class EPUB_MOBI(CatalogPlugin):

        def generateNCXByTitle(self, tocTitle):

-            result = self.updateProgressFullStep("generateNCXByTitle()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateNCXByTitle()"))

            soup = self.ncxSoup
            output = "ByAlphaTitle"
@ -1721,7 +1696,6 @@ class EPUB_MOBI(CatalogPlugin):
            navPointTag['class'] = "section"
            navPointTag['id'] = "byalphatitle-ID"
            navPointTag['playOrder'] = self.playOrder
-            #print "generateNCXByTitle(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
            self.playOrder += 1
            navLabelTag = Tag(soup, 'navLabel')
            textTag = Tag(soup, 'text')
@ -1799,9 +1773,7 @@ class EPUB_MOBI(CatalogPlugin):

        def generateNCXByAuthor(self, tocTitle):

-            result = self.updateProgressFullStep("generateNCXByAuthor()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateNCXByAuthor()"))

            soup = self.ncxSoup
            HTML_file = "content/ByAlphaAuthor.html"
@ -1844,7 +1816,7 @@ class EPUB_MOBI(CatalogPlugin):
                        author_list += " &hellip;"

                    author_list = self.formatNCXText(author_list)
-                    if self.verbose:
+                    if False and self.verbose:
                        self.opts.log.info(" adding '%s' to master_author_list" % current_letter)
                    master_author_list.append((author_list, current_letter))

@ -1860,7 +1832,7 @@ class EPUB_MOBI(CatalogPlugin):
            if len(current_author_list) == self.descriptionClip:
                author_list += " &hellip;"
            author_list = self.formatNCXText(author_list)
-            if self.verbose:
+            if False and self.verbose:
                self.opts.log.info(" adding '%s' to master_author_list" % current_letter)
            master_author_list.append((author_list, current_letter))

@ -1902,9 +1874,7 @@ class EPUB_MOBI(CatalogPlugin):
            # Add each genre as an article
            # 'tag', 'file', 'authors'

-            result = self.updateProgressFullStep("generateNCXByTags()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("generateNCXByTags()"))

            ncx_soup = self.ncxSoup
            body = ncx_soup.find("navPoint")
@ -1917,7 +1887,6 @@ class EPUB_MOBI(CatalogPlugin):
            file_ID = file_ID.replace(" ","")
            navPointTag['id'] = "%s-ID" % file_ID
            navPointTag['playOrder'] = self.playOrder
-            #print "generateNCXByTags(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)                        
            self.playOrder += 1
            navLabelTag = Tag(ncx_soup, 'navLabel')
            textTag = Tag(ncx_soup, 'text')
@ -1939,7 +1908,6 @@ class EPUB_MOBI(CatalogPlugin):
                navPointVolumeTag['class'] = "article"
                navPointVolumeTag['id'] = "genre-%s-ID" % genre['tag']
                navPointVolumeTag['playOrder'] = self.playOrder
-                #print "generateNCXByTags(article '%s'): self.playOrder: %d" % (genre['tag'], self.playOrder)
                self.playOrder += 1
                navLabelTag = Tag(ncx_soup, "navLabel")
                textTag = Tag(ncx_soup, "text")
@ -2000,34 +1968,12 @@ class EPUB_MOBI(CatalogPlugin):

        def writeNCX(self):

-            result = self.updateProgressFullStep("writeNCX()")
-            if self.verbose:
-                self.opts.log.info(result)
+            self.opts.log.info(self.updateProgressFullStep("writeNCX()"))
+
            outfile = open("%s/%s.ncx" % (self.catalogPath, self.basename), 'w')
            outfile.write(self.ncxSoup.prettify())

        # Helpers
-        def contents(self, element, title, key=None):
-            content = None
-
-            if element is None:
-                return None
-
-            # Some elements seem to have \n fields
-            for node in element:
-                if node == "\n":
-                    continue
-                else:
-                    content = node
-            # Special handling for '&amp;' in 'cover'
-            if key == 'cover' and re.search('&amp;',content):
-                content = re.sub('&amp;','&',content)
-
-            if content:
-                return unicode(content)
-            else:
-                return None
-
        def convertHTMLEntities(self, s):
            matches = re.findall("&#\d+;", s)
            if len(matches) > 0:
@ -2057,24 +2003,16 @@ class EPUB_MOBI(CatalogPlugin):
            self.cleanUp()

            if not os.path.isdir(catalogPath):
-                #if self.verbose: print " creating %s" % catalogPath
                os.makedirs(catalogPath)

            # Create /content and /images
            content_path = catalogPath + "/content"
            if not os.path.isdir(content_path):
-                #if self.verbose: print " creating %s" % content_path
                os.makedirs(content_path)
            images_path = catalogPath + "/images"
            if not os.path.isdir(images_path):
-                #if self.verbose: print " creating %s" % images_path
                os.makedirs(images_path)

-        def fetchLibraryPath(self):
-            # Return a path to the current library
-            from calibre.utils.config import prefs
-            return prefs['library_path']
-
        def getMarkerTags(self):
            ''' Return a list of special marker tags to be excluded from genre list '''
            markerTags = []
@ -2089,13 +2027,9 @@ class EPUB_MOBI(CatalogPlugin):

            filtered_tags = []
            for tag in tags:
-                # Check the leading character
                if tag[0] in self.markerTags:
-                    #print "skipping %s" % tag
                    continue
-                # Check the exclude_genre pattern
                if re.search(self.opts.exclude_genre, tag):
-                    #print "skipping %s" % tag
                    continue

                filtered_tags.append(tag)
@ -2340,11 +2274,9 @@ class EPUB_MOBI(CatalogPlugin):
            # Convert numbers to strings, ignore leading stop words
            # The 21-Day Consciousness Cleanse

-            if False: print "generate_sort_title(%s)" % title
            title_words = title.split(' ')
            if title_words[0].lower() in ['the','a','an']:
                stop_word = title_words.pop(0)
-                if False : print "removing stop word '%s'" % stop_word

            # Scan for numbers in each word clump
            translated = []
@ -2359,19 +2291,18 @@ class EPUB_MOBI(CatalogPlugin):

        def generateThumbnail(self, title, image_dir, thumb_file):
            import calibre.utils.PythonMagickWand as pw
-            with pw.ImageMagick():
            try:
                img = pw.NewMagickWand()
                if img < 0:
-                        raise RuntimeError('generate_thumbnail(): Cannot create wand')
+                    raise RuntimeError('generateThumbnail(): Cannot create wand')
                # Read the cover
                if not pw.MagickReadImage(img,
                        title['cover'].encode(filesystem_encoding)):
-                        self.opts.log.info('Failed to read cover image from: %s' % title['cover'])
+                    self.opts.log.error('generateThumbnail(): Failed to read cover image from: %s' % title['cover'])
                    raise IOError
                thumb = pw.CloneMagickWand(img)
                if thumb < 0:
-                        self.opts.log.info('generateThumbnail(): Cannot clone cover')
+                    self.opts.log.error('generateThumbnail(): Cannot clone cover')
                    raise RuntimeError
                # img, width, height
                pw.MagickThumbnailImage(thumb, 75, 100)
@ -2379,9 +2310,9 @@ class EPUB_MOBI(CatalogPlugin):
                pw.DestroyMagickWand(thumb)
                pw.DestroyMagickWand(img)
            except IOError:
-                    self.opts.log.info("generateThumbnail() IOError with %s" % title['title'])
+                self.opts.log.error("generateThumbnail(): IOError with %s" % title['title'])
            except RuntimeError:
-                    self.opts.log.info("generateThumbnail() RuntimeError with %s" % title['title'])
+                self.opts.log.error("generateThumbnail(): RuntimeError with %s" % title['title'])

        def processSpecialTags(self, tags, this_title, opts):
            tag_list = []
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1415,9 +1415,10 @@ class LibraryDatabase2(LibraryDatabase):
        if matches:
            tag_matches = self.data.get_matches('tags', _('Catalog'))
            matches = matches.intersection(tag_matches)
-        db_id = None
+        db_id, existing = None, False
        if matches:
            db_id = list(matches)[0]
+            existing = True
        if db_id is None:
            obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
                                (title, 'calibre'))
@ -1433,6 +1434,10 @@ class LibraryDatabase2(LibraryDatabase):
        if not hasattr(path, 'read'):
            stream.close()
        self.conn.commit()
+        if existing:
+            t = datetime.utcnow()
+            self.set_timestamp(db_id, t, notify=False)
+            self.set_pubdate(db_id, t, notify=False)
        self.data.refresh_ids(self, [db_id]) # Needed to update format list and size
        return db_id

--- a/src/calibre/library/server.py
+++ b/src/calibre/library/server.py
@ -714,6 +714,10 @@ class LibraryServer(object):

        book, books = MarkupTemplate(self.MOBILE_BOOK), []
        for record in items[(start-1):(start-1)+num]:
+            if record[13] is None:
+                record[13] = ''
+            if record[6] is None:
+                record[6] = 0
            aus = record[2] if record[2] else __builtin__._('Unknown')
            authors = '|'.join([i.replace('|', ',') for i in aus.split(',')])
            record[10] = fmt_sidx(float(record[10]))
--- a/src/calibre/utils/ipc/job.py
+++ b/src/calibre/utils/ipc/job.py
@ -52,8 +52,10 @@ class BaseJob(object):
            else:
                self._status_text = _('Error') if self.failed else _('Finished')
            if DEBUG:
-                prints('Job:', self.id, self.description, 'finished')
-                prints('\t'.join(self.details.splitlines(True)))
+                prints('Job:', self.id, self.description, 'finished',
+                        safe_encode=True)
+                prints('\t'.join(self.details.splitlines(True)),
+                        safe_encode=True)
            if not self._done_called:
                self._done_called = True
                try: