diff --git a/resources/catalog/mastheadImage.gif b/resources/catalog/mastheadImage.gif
index 1f2287a857..06340a09a9 100644
Binary files a/resources/catalog/mastheadImage.gif and b/resources/catalog/mastheadImage.gif differ
diff --git a/resources/images/news/neowin.png b/resources/images/news/neowin.png
new file mode 100644
index 0000000000..5aee949c0b
Binary files /dev/null and b/resources/images/news/neowin.png differ
diff --git a/resources/recipes/common_dreams.recipe b/resources/recipes/common_dreams.recipe
index b662cc3ee0..5443b5890b 100644
--- a/resources/recipes/common_dreams.recipe
+++ b/resources/recipes/common_dreams.recipe
@@ -2,17 +2,37 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CommonDreams(BasicNewsRecipe):
+ # Identify the recipe
+
title = u'Common Dreams'
description = u'Progressive news and views'
__author__ = u'XanthanGum'
language = 'en'
+
+ # Format the text
+
+ extra_css = '''
+ body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
+ h1{font-size: xx-large;}
+ h2{font-size: large;}
+ '''
+ # Pick no article older than seven days and limit the number of articles per feed to 100
+
oldest_article = 7
max_articles_per_feed = 100
- feeds = [
- (u'Common Dreams Headlines',
- u'http://www.commondreams.org/feed/headlines_rss'),
- (u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'),
- (u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
- ]
+ # Remove everything before the article
+
+ remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
+
+ # Remove everything after the article
+
+ remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
+
+ # Identify the news feeds
+
+ feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
+ (u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'),
+ (u'Views', u'http://www.commondreams.org/feed/views_rss'),
+ (u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]
diff --git a/resources/recipes/neowin.recipe b/resources/recipes/neowin.recipe
new file mode 100644
index 0000000000..9f5a669a75
--- /dev/null
+++ b/resources/recipes/neowin.recipe
@@ -0,0 +1,40 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Neowin(BasicNewsRecipe):
+ title = u'Neowin.net'
+ oldest_article = 5
+ language = 'en'
+ description = 'News from IT'
+ publisher = 'Neowin'
+ category = 'news, IT, Microsoft, Apple, hardware, software, games'
+ __author__ = 'Darko Miletic'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ encoding = 'utf8'
+
+ conversion_options = {
+ 'tags' : category
+ ,'language' : language
+ ,'comments' : description
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+ remove_tags_after = dict(name='div', attrs={'id':'tag-bar'})
+
+ remove_tags = [
+ dict(name=['base','object','link','iframe'])
+ ,dict(name='div', attrs={'id':'tag-bar'})
+ ]
+
+ feeds = [
+ (u'Software' , u'http://www.neowin.net/news/rss/software' )
+ ,(u'Gaming' , u'http://www.neowin.net/news/rss/gaming' )
+ ,(u'Microsoft', u'http://www.neowin.net/news/rss/microsoft')
+ ,(u'Apple' , u'http://www.neowin.net/news/rss/apple' )
+ ,(u'Editorial', u'http://www.neowin.net/news/rss/editorial')
+ ]
+ def image_url_processor(cls, baseurl, url):
+ return url
+
diff --git a/resources/recipes/sportsillustrated.recipe b/resources/recipes/sportsillustrated.recipe
index 0dbae1ebc0..dd1df16ac7 100644
--- a/resources/recipes/sportsillustrated.recipe
+++ b/resources/recipes/sportsillustrated.recipe
@@ -1,6 +1,5 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
-#from random import randint
from urllib import quote
class SportsIllustratedRecipe(BasicNewsRecipe) :
@@ -9,12 +8,11 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
__license__ = 'GPL v3'
language = 'en'
description = 'Sports Illustrated'
- version = 1
+ version = 3
title = u'Sports Illustrated'
no_stylesheets = True
remove_javascript = True
- #template_css = ''
use_embedded_content = False
INDEX = 'http://sportsillustrated.cnn.com/'
@@ -22,13 +20,39 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
def parse_index(self):
answer = []
soup = self.index_to_soup(self.INDEX)
- # Find the link to the current issue on the front page.
+ # Find the link to the current issue on the front page. SI Cover
cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
if cover:
currentIssue = cover.parent['href']
if currentIssue:
# Open the index of current issue
+
index = self.index_to_soup(currentIssue)
+ self.log('\tLooking for current issue in: ' + currentIssue)
+ # Now let us see if they updated their frontpage
+ nav = index.find('div', attrs = {'class': 'siv_trav_top'})
+ if nav:
+ img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
+ if img:
+ parent = img.parent
+ if parent.name == 'a':
+ # They didn't update their frontpage; Load the next issue from here
+ href = self.INDEX + parent['href']
+ index = self.index_to_soup(href)
+ self.log('\tLooking for current issue in: ' + href)
+
+ if index.find('div', 'siv_noArticleMessage'):
+ nav = index.find('div', attrs = {'class': 'siv_trav_top'})
+ if nav:
+ # Their frontpage points to an issue without any articles; Use the previous issue
+ img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
+ if img:
+ parent = img.parent
+ if parent.name == 'a':
+ href = self.INDEX + parent['href']
+ index = self.index_to_soup(href)
+ self.log('\tLooking for current issue in: ' + href)
+
# Find all articles.
list = index.find('div', attrs = {'class' : 'siv_artList'})
@@ -69,31 +93,26 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
def preprocess_html(self, soup):
header = soup.find('div', attrs = {'class' : 'siv_artheader'})
- if header:
- # It's an article, prepare a container for the content
- homeMadeSoup = BeautifulSoup('
')
- body = homeMadeSoup.find('body')
+ homeMadeSoup = BeautifulSoup('')
+ body = homeMadeSoup.body
- # Find the date, title and byline
- temp = header.find('td', attrs = {'class' : 'title'})
- if temp :
- date = temp.find('div', attrs = {'class' : 'date'})
- if date:
- body.append(date)
- if temp.h1:
- body.append(temp.h1)
- if temp.h2 :
- body.append(temp.h2)
- byline = temp.find('div', attrs = {'class' : 'byline'})
- if byline:
- body.append(byline)
+ # Find the date, title and byline
+ temp = header.find('td', attrs = {'class' : 'title'})
+ if temp :
+ date = temp.find('div', attrs = {'class' : 'date'})
+ if date:
+ body.append(date)
+ if temp.h1:
+ body.append(temp.h1)
+ if temp.h2 :
+ body.append(temp.h2)
+ byline = temp.find('div', attrs = {'class' : 'byline'})
+ if byline:
+ body.append(byline)
- # Find the content
- for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) :
- body.append(para)
+ # Find the content
+ for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) :
+ body.append(para)
- return homeMadeSoup
- else :
- # It's a TOC, just return the whole lot
- return soup
+ return homeMadeSoup
diff --git a/resources/recipes/wired.recipe b/resources/recipes/wired.recipe
index fcc2494850..e7395a9ada 100644
--- a/resources/recipes/wired.recipe
+++ b/resources/recipes/wired.recipe
@@ -1,44 +1,105 @@
-#!/usr/bin/env python
+
__license__ = 'GPL v3'
-__docformat__ = 'restructuredtext en'
-
+__copyright__ = '2010, Darko Miletic '
+'''
+www.wired.com
+'''
+import re
+from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Wired(BasicNewsRecipe):
+ title = 'Wired Magazine'
+ __author__ = 'Darko Miletic'
+ description = 'Gaming news'
+ publisher = 'Conde Nast Digital'
+ category = 'news, games, IT, gadgets'
+ oldest_article = 32
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ encoding = 'utf-8'
+ use_embedded_content = False
+ language = 'en'
+ extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
+ index = 'http://www.wired.com/magazine/'
- title = 'Wired.com'
- __author__ = 'Kovid Goyal'
- description = 'Technology news'
- timefmt = ' [%Y%b%d %H%M]'
- language = 'en'
+ preprocess_regexps = [(re.compile(r'', re.DOTALL|re.IGNORECASE),lambda match: '')]
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher' : publisher
+ , 'language' : language
+ }
- no_stylesheets = True
+ keep_only_tags = [dict(name='div', attrs={'class':'post'})]
+ remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
+ remove_tags = [
+ dict(name=['object','embed','iframe','link'])
+ ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
+ ]
- remove_tags_before = dict(name='div', id='content')
- remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
- 'footer', 'advertisement', 'blog_subscription_unit',
- 'brightcove_component']),
- {'class':'entryActions'},
- dict(name=['noscript', 'script'])]
- feeds = [
- ('Top News', 'http://feeds.wired.com/wired/index'),
- ('Culture', 'http://feeds.wired.com/wired/culture'),
- ('Software', 'http://feeds.wired.com/wired/software'),
- ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
- ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
- ('Cars', 'http://feeds.wired.com/wired/cars'),
- ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
- ('Gaming', 'http://feeds.wired.com/wired/gaming'),
- ('Science', 'http://feeds.wired.com/wired/science'),
- ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
- ('Politics', 'http://feeds.wired.com/wired/politics'),
- ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
- ('Commentary', 'http://feeds.wired.com/wired/commentary'),
- ]
+ #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
+
+ def parse_index(self):
+ totalfeeds = []
+
+ soup = self.index_to_soup(self.index)
+ features = soup.find('div',attrs={'id':'my-glider'})
+ if features:
+ farticles = []
+ for item in features.findAll('div',attrs={'class':'section'}):
+ divurl = item.find('div',attrs={'class':'feature-header'})
+ divdesc = item.find('div',attrs={'class':'feature-text'})
+ url = 'http://www.wired.com' + divurl.a['href']
+ title = self.tag_to_string(divurl.a)
+ description = self.tag_to_string(divdesc)
+ date = strftime(self.timefmt)
+ farticles.append({
+ 'title' :title
+ ,'date' :date
+ ,'url' :url
+ ,'description':description
+ })
+ totalfeeds.append(('Featured Articles', farticles))
+ #department feeds
+ departments = ['rants','start','test','play','found']
+ dept = soup.find('div',attrs={'id':'magazine-departments'})
+ if dept:
+ for ditem in departments:
+ darticles = []
+ department = dept.find('div',attrs={'id':'department-'+ditem})
+ if department:
+ for item in department.findAll('div'):
+ description = ''
+ feed_link = item.find('a')
+ if feed_link and feed_link.has_key('href'):
+ url = feed_link['href']
+ title = self.tag_to_string(feed_link)
+ date = strftime(self.timefmt)
+ darticles.append({
+ 'title' :title
+ ,'date' :date
+ ,'url' :url
+ ,'description':description
+ })
+ totalfeeds.append((ditem.capitalize(), darticles))
+ return totalfeeds
+
+ def get_cover_url(self):
+ cover_url = None
+ soup = self.index_to_soup(self.index)
+ cover_item = soup.find('div',attrs={'class':'spread-image'})
+ if cover_item:
+ cover_url = 'http://www.wired.com' + cover_item.a.img['src']
+ return cover_url
def print_version(self, url):
- return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
+ return url.rstrip('/') + '/all/1'
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ return soup
diff --git a/src/calibre/devices/blackberry/driver.py b/src/calibre/devices/blackberry/driver.py
index ec8a7e8f49..6a9bb84431 100644
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@@ -9,23 +9,22 @@ from calibre.devices.usbms.driver import USBMS
class BLACKBERRY(USBMS):
name = 'Blackberry Device Interface'
+ gui_name = 'Blackberry'
description = _('Communicate with the Blackberry smart phone.')
author = _('Kovid Goyal')
- supported_platforms = ['windows', 'linux']
+ supported_platforms = ['windows', 'linux', 'osx']
# Ordered list of supported formats
FORMATS = ['mobi', 'prc']
VENDOR_ID = [0x0fca]
PRODUCT_ID = [0x8004, 0x0004]
- BCD = [0x0200, 0x0107, 0x0201]
+ BCD = [0x0200, 0x0107, 0x0210]
VENDOR_NAME = 'RIM'
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
- #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
-
MAIN_MEMORY_VOLUME_LABEL = 'Blackberry SD Card'
- EBOOK_DIR_MAIN = 'ebooks'
+ EBOOK_DIR_MAIN = 'eBooks'
SUPPORTS_SUB_DIRS = True
diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index a9985e6480..d5651568fa 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -24,7 +24,7 @@ class DRMError(ValueError):
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'prc', 'mobi', 'azw', 'doc',
- 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip',
+ 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml']
class HTMLRenderer(object):
diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py
index 85590a7bae..122f61e45a 100755
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@@ -340,6 +340,9 @@ class ComicInput(InputFormatPlugin):
%stream.name)
for line in open('comics.txt',
'rb').read().decode('utf-8').splitlines():
+ line = line.strip()
+ if not line:
+ continue
fname, title = line.partition(':')[0], line.partition(':')[-1]
fname = os.path.join(tdir, *fname.split('/'))
if not title:
diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
index bffc24ac91..8e9c9efea9 100644
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@@ -268,7 +268,8 @@ class EPUBOutput(OutputFormatPlugin):
# remove
tags with empty src elements
bad = []
for x in XPath('//h:img')(body):
- if not x.get('src', '').strip():
+ src = x.get('src', '').strip()
+ if src in ('', '#'):
bad.append(x)
for img in bad:
img.getparent().remove(img)
diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py
index ad1df98793..71ba3efdc6 100644
--- a/src/calibre/ebooks/pdb/ereader/reader.py
+++ b/src/calibre/ebooks/pdb/ereader/reader.py
@@ -20,7 +20,7 @@ class Reader(FormatReader):
if record0_size == 132:
self.reader = Reader132(header, stream, log, options)
- elif record0_size == 202:
+ elif record0_size in (116, 202):
self.reader = Reader202(header, stream, log, options)
else:
raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py
index a674c5bf60..590301b433 100644
--- a/src/calibre/ebooks/pdb/ereader/reader202.py
+++ b/src/calibre/ebooks/pdb/ereader/reader202.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
'''
-Read content from ereader pdb file with a 202 byte header created by Makebook.
+Read content from ereader pdb file with a 116 and 202 byte header created by Makebook.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember '
@@ -44,7 +44,7 @@ class Reader202(FormatReader):
self.header_record = HeaderRecord(self.section_data(0))
- if self.header_record.version != 4:
+ if self.header_record.version not in (2, 4):
raise EreaderError('Unknown book version %i.' % self.header_record.version)
from calibre.ebooks.metadata.pdb import get_metadata
diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 92a0ceebe1..3fef8c30ce 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -18,38 +18,11 @@ class Font(object):
self.color = spec.get('color')
self.family = spec.get('family')
-class Column(object):
-
- # A column contains an element is the element bulges out to
- # the left or the right by at most HFUZZ*col width.
- HFUZZ = 0.2
+class Element(object):
def __init__(self):
- self.left = self.right = self.top = self.bottom = 0
- self.width = self.height = 0
- self.elements = []
-
- def add(self, elem):
- if elem in self.elements: return
- self.elements.append(elem)
- self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
- self.top = self.elements[0].top
- self.bottom = self.elements[-1].bottom
- self.left, self.right = sys.maxint, 0
- for x in self:
- self.left = min(self.left, x.left)
- self.right = max(self.right, x.right)
- self.width, self.height = self.right-self.left, self.bottom-self.top
-
- def __iter__(self):
- for x in self.elements:
- yield x
-
- def contains(self, elem):
- return elem.left > self.left - self.HFUZZ*self.width and \
- elem.right < self.right + self.HFUZZ*self.width
-
-class Element(object):
+ self.starts_block = None
+ self.block_style = None
def __eq__(self, other):
return self.id == other.id
@@ -60,17 +33,21 @@ class Element(object):
class Image(Element):
def __init__(self, img, opts, log, idc):
+ Element.__init__(self)
self.opts, self.log = opts, log
self.id = idc.next()
self.top, self.left, self.width, self.height, self.iwidth, self.iheight = \
map(float, map(img.get, ('top', 'left', 'rwidth', 'rheight', 'iwidth',
'iheight')))
self.src = img.get('src')
+ self.bottom = self.top + self.height
+ self.right = self.left + self.width
class Text(Element):
def __init__(self, text, font_map, opts, log, idc):
+ Element.__init__(self)
self.id = idc.next()
self.opts, self.log = opts, log
self.font_map = font_map
@@ -140,6 +117,61 @@ class Interval(object):
def __hash__(self):
return hash('(%f,%f)'%self.left, self.right)
+class Column(object):
+
+ # A column contains an element is the element bulges out to
+ # the left or the right by at most HFUZZ*col width.
+ HFUZZ = 0.2
+
+
+ def __init__(self):
+ self.left = self.right = self.top = self.bottom = 0
+ self.width = self.height = 0
+ self.elements = []
+ self.average_line_separation = 0
+
+ def add(self, elem):
+ if elem in self.elements: return
+ self.elements.append(elem)
+ self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
+ self.top = self.elements[0].top
+ self.bottom = self.elements[-1].bottom
+ self.left, self.right = sys.maxint, 0
+ for x in self:
+ self.left = min(self.left, x.left)
+ self.right = max(self.right, x.right)
+ self.width, self.height = self.right-self.left, self.bottom-self.top
+
+ def __iter__(self):
+ for x in self.elements:
+ yield x
+
+ def contains(self, elem):
+ return elem.left > self.left - self.HFUZZ*self.width and \
+ elem.right < self.right + self.HFUZZ*self.width
+
+ def collect_stats(self):
+ if len(self.elements) > 1:
+ gaps = [self.elements[i+1].top - self.elements[i].bottom for i in
+ range(0, len(self.elements)-1)]
+ self.average_line_separation = sum(gaps)/len(gaps)
+ for i, elem in enumerate(self.elements):
+ left_margin = elem.left - self.left
+ elem.indent_fraction = left_margin/self.width
+ elem.width_fraction = elem.width/self.width
+ if i == 0:
+ elem.top_gap = None
+ else:
+ elem.top_gap = self.elements[i-1].bottom - elem.top
+
+ def previous_element(self, idx):
+ if idx == 0:
+ return None
+ return self.elements[idx-1]
+
+
+
+
class Region(object):
def __init__(self):
@@ -156,6 +188,7 @@ class Region(object):
self.columns[i].add(elem)
def contains(self, columns):
+ # TODO: handle unbalanced columns
if not self.columns:
return True
if len(columns) != len(self.columns):
@@ -172,7 +205,22 @@ class Region(object):
@property
def is_empty(self):
- return len(self.elements) == 0
+ return len(self.columns) == 0
+
+ def collect_stats(self):
+ for column in self.columns:
+ column.collect_stats()
+ self.average_line_separation = sum([x.average_line_separation for x in
+ self.columns])/float(len(self.columns))
+
+ def __iter__(self):
+ for x in self.columns:
+ yield x
+
+ def linearize(self):
+ self.elements = []
+ for x in self.columns:
+ self.elements.extend(x)
class Page(object):
@@ -185,6 +233,8 @@ class Page(object):
# for them to be considered to be part of the same text fragment
LINE_FACTOR = 0.4
+ # Multiplies the average line height when determining row height
+ # of a particular element to detect columns.
YFUZZ = 1.5
@@ -263,10 +313,10 @@ class Page(object):
columns = self.sort_into_columns(x, elems)
processed.update(elems)
if not current_region.contains(columns):
- self.regions.append(self.current_region)
+ self.regions.append(current_region)
current_region = Region()
current_region.add(columns)
- if not self.current_region.is_empty():
+ if not current_region.is_empty:
self.regions.append(current_region)
def sort_into_columns(self, elem, neighbors):
@@ -287,7 +337,7 @@ class Page(object):
def find_elements_in_row_of(self, x):
interval = Interval(x.top,
- x.top + self.YFUZZ*(1+self.average_text_height))
+ x.top + self.YFUZZ*(self.average_text_height))
h_interval = Interval(x.left, x.right)
for y in self.elements[x.idx:x.idx+15]:
if y is not x:
@@ -298,6 +348,12 @@ class Page(object):
x_interval.intersection(h_interval).width <= 0:
yield y
+ def second_pass(self):
+ 'Locate paragraph boundaries in each column'
+ for region in self.regions:
+ region.collect_stats()
+ region.linearize()
+
class PDFDocument(object):
@@ -327,6 +383,7 @@ class PDFDocument(object):
for page in self.pages:
page.document_font_stats = self.font_size_stats
page.first_pass()
+ page.second_pass()
def collect_font_statistics(self):
self.font_size_stats = {}
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index e3609fcddb..7427a77c2f 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -138,7 +138,7 @@ class PMLMLizer(object):
text = [u'']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to PML markup...' % item.href)
- stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+ stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append(self.add_page_anchor(item))
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
return ''.join(text)
diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index b132e368ee..714b2c3a27 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -522,7 +522,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
mi.author_sort = 'Schember, John'
- mi.comments = "A guide to get you up an running with calibre"
+ mi.comments = "A guide to get you up and running with calibre"
mi.publisher = 'calibre'
self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
[mi])
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index afb727cc7d..f74fcbb391 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1,8 +1,10 @@
-import pickle, os, re, shutil, htmlentitydefs
+import os, re, shutil, htmlentitydefs
from collections import namedtuple
from xml.sax.saxutils import escape
+from PyQt4.Qt import *
+
from calibre import filesystem_encoding
from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter
@@ -409,11 +411,11 @@ class EPUB_MOBI(CatalogPlugin):
# Convert the upper 3 numbers - thousandsNumber
if thousandsNumber:
if number > 1099 and number < 2000:
- resultString = '%s %s' % (self.lessThanTwenty[number/100],
+ resultString = '%s %s' % (self.lessThanTwenty[number/100],
self.stringFromInt(number % 100))
self.text = resultString.strip().capitalize()
return
- else:
+ else:
thousandsString = self.stringFromInt(thousandsNumber)
# Concatenate the strings
@@ -493,7 +495,6 @@ class EPUB_MOBI(CatalogPlugin):
self.opts.output_profile.startswith("kindle")) else False
self.__genres = None
self.__htmlFileList = []
- self.__libraryPath = self.fetchLibraryPath()
self.__markerTags = self.getMarkerTags()
self.__ncxSoup = None
self.__playOrder = 1
@@ -506,10 +507,11 @@ class EPUB_MOBI(CatalogPlugin):
self.__thumbs = None
self.__title = opts.catalog_title
self.__verbose = opts.verbose
-
- if self.verbose:
- self.opts.log.info("CatalogBuilder(): Generating %s for %s" % (self.opts.fmt, self.opts.output_profile))
-
+
+ self.opts.log.info("CatalogBuilder(): Generating %s %s"% \
+ (self.opts.fmt,
+ "for %s" % self.opts.output_profile if self.opts.output_profile \
+ else ''))
# Accessors
'''
@dynamic_property
@@ -519,7 +521,7 @@ class EPUB_MOBI(CatalogPlugin):
def fset(self, val):
self.__ = val
return property(fget=fget, fset=fset)
- '''
+ '''
@dynamic_property
def authors(self):
@@ -535,7 +537,7 @@ class EPUB_MOBI(CatalogPlugin):
def fset(self, val):
self.__basename = val
return property(fget=fget, fset=fset)
- @dynamic_property
+ @dynamic_property
def booksByAuthor(self):
def fget(self):
return self.__booksByAuthor
@@ -748,7 +750,9 @@ class EPUB_MOBI(CatalogPlugin):
self.generateHTMLByTags()
if getattr(self.reporter, 'cancel_requested', False): return 1
- self.generateThumbnails()
+ from calibre.utils.PythonMagickWand import ImageMagick
+ with ImageMagick():
+ self.generateThumbnails()
if getattr(self.reporter, 'cancel_requested', False): return 1
self.generateOPF()
@@ -794,9 +798,7 @@ class EPUB_MOBI(CatalogPlugin):
def fetchBooksByTitle(self):
- result = self.updateProgressFullStep("fetchBooksByTitle()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("fetchBooksByTitle()"))
# Get the database as a dictionary
# Sort by title
@@ -873,7 +875,7 @@ class EPUB_MOBI(CatalogPlugin):
# Re-sort based on title_sort
self.booksByTitle = sorted(titles,
key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper()))
- if self.verbose:
+ if False and self.verbose:
self.opts.log.info("fetchBooksByTitle(): %d books" % len(self.booksByTitle))
for title in self.booksByTitle:
self.opts.log.info((u" %-50s %-25s" % (title['title'][0:45], title['title_sort'][0:20])).encode('utf-8'))
@@ -881,9 +883,7 @@ class EPUB_MOBI(CatalogPlugin):
def fetchBooksByAuthor(self):
# Generate a list of titles sorted by author from the database
- result = self.updateProgressFullStep("fetchBooksByAuthor()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("fetchBooksByAuthor()"))
# Sort titles case-insensitive
self.booksByAuthor = sorted(self.booksByTitle,
@@ -927,19 +927,16 @@ class EPUB_MOBI(CatalogPlugin):
unique_authors.append((current_author[0], current_author[1].title(),
books_by_current_author))
- if self.verbose:
+ if False and self.verbose:
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
for author in unique_authors:
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
author[2])).encode('utf-8'))
-
self.authors = unique_authors
def generateHTMLDescriptions(self):
# Write each title to a separate HTML file in contentdir
- result = self.updateProgressFullStep("generateHTMLDescriptions()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateHTMLDescriptions()"))
for (title_num, title) in enumerate(self.booksByTitle):
if False:
@@ -1066,9 +1063,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateHTMLByTitle(self):
# Write books by title A-Z to HTML file
- result = self.updateProgressFullStep("generateHTMLByTitle()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateHTMLByTitle()"))
soup = self.generateHTMLEmptyHeader("Books By Alpha Title")
body = soup.find('body')
@@ -1170,9 +1165,8 @@ class EPUB_MOBI(CatalogPlugin):
def generateHTMLByAuthor(self):
# Write books by author A-Z
- result = self.updateProgressFullStep("generateHTMLByAuthor()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateHTMLByAuthor()"))
+
friendly_name = "By Author"
soup = self.generateHTMLEmptyHeader(friendly_name)
@@ -1303,9 +1297,7 @@ class EPUB_MOBI(CatalogPlugin):
# Generate individual HTML files for each tag, e.g. Fiction, Nonfiction ...
# Note that special tags - ~+*[] - have already been filtered from books[]
- result = self.updateProgressFullStep("generateHTMLByTags()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateHTMLByTags()"))
# Filter out REMOVE_TAGS, sort
filtered_tags = self.filterDbTags(self.db.all_tags())
@@ -1313,13 +1305,11 @@ class EPUB_MOBI(CatalogPlugin):
# Extract books matching filtered_tags
genre_list = []
for tag in filtered_tags:
- if False : print "searching for %s" % tag
tag_list = {}
tag_list['tag'] = tag
tag_list['books'] = []
for book in self.booksByAuthor:
if 'tags' in book and tag in book['tags']:
- if False: print "\t %s" % (book['title'])
this_book = {}
this_book['author'] = book['author']
this_book['title'] = book['title']
@@ -1336,12 +1326,10 @@ class EPUB_MOBI(CatalogPlugin):
# genre_list = [ [tag_list], [tag_list] ...]
master_genre_list = []
for (index, genre) in enumerate(genre_list):
- if False : print "genre: %s" % genre['tag']
# Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation
authors = []
for book in genre['books']:
- #print "\t %s - %s" % (book['title'], book['author'])
authors.append((book['author'],book['author_sort']))
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
@@ -1384,9 +1372,7 @@ class EPUB_MOBI(CatalogPlugin):
# Generate a thumbnail per cover. If a current thumbnail exists, skip
# If a cover doesn't exist, use default
# Return list of active thumbs
- result = self.updateProgressFullStep("generateThumbnails()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateThumbnails()"))
thumbs = ['thumbnail_default.jpg']
@@ -1398,7 +1384,6 @@ class EPUB_MOBI(CatalogPlugin):
i/float(len(self.booksByTitle)))
# Check to see if source file exists
if 'cover' in title and os.path.isfile(title['cover']):
- # print "cover found for %s" % title['title']
# Add the thumb spec to thumbs[]
thumbs.append("thumbnail_%d.jpg" % int(title['id']))
@@ -1415,11 +1400,10 @@ class EPUB_MOBI(CatalogPlugin):
self.generateThumbnail(title, image_dir, thumb_file)
else:
self.generateThumbnail(title, image_dir, thumb_file)
-
else:
# Use default cover
if self.verbose:
- self.opts.log.warn(" no cover available for %s, will use default" % \
+ self.opts.log.warn(" using default cover for '%s'" % \
(title['title']))
# Check to make sure default is current
# Check to see if thumbnail exists
@@ -1428,43 +1412,43 @@ class EPUB_MOBI(CatalogPlugin):
# Init Qt for image conversion
from calibre.gui2 import is_ok_to_use_qt
- is_ok_to_use_qt()
- from PyQt4.QtGui import QImage
-
- # I() fetches path to resource, e.g. I('book.svg') returns:
- # /Applications/calibre.app/Contents/Resources/resources/images/book.svg
- # Convert .svg to .jpg
- default_cover = I('book.svg')
- cover_img = QImage()
- cover_img.load(default_cover)
- cover_img.save(cover, "PNG", -1)
-
- if os.path.isfile(thumb_fp):
- # Check to see if default cover is newer than thumbnail
- # os.path.getmtime() = modified time
- # os.path.ctime() = creation time
- cover_timestamp = os.path.getmtime(cover)
- thumb_timestamp = os.path.getmtime(thumb_fp)
- if thumb_timestamp < cover_timestamp:
+ if is_ok_to_use_qt():
+ # Render default book image against white bg
+ i = QImage(I('book.svg'))
+ i2 = QImage(i.size(),QImage.Format_ARGB32_Premultiplied )
+ i2.fill(QColor(Qt.white).rgb())
+ p = QPainter()
+ p.begin(i2)
+ p.drawImage(0, 0, i)
+ p.end()
+ i2.save(cover, "PNG", -1)
+
+ if os.path.isfile(thumb_fp):
+ # Check to see if default cover is newer than thumbnail
+ # os.path.getmtime() = modified time
+ # os.path.ctime() = creation time
+ cover_timestamp = os.path.getmtime(cover)
+ thumb_timestamp = os.path.getmtime(thumb_fp)
+ if thumb_timestamp < cover_timestamp:
+ if self.verbose:
+ self.opts.log.info("updating thumbnail_default for %s" % title['title'])
+ #title['cover'] = "%s/DefaultCover.jpg" % self.catalogPath
+ title['cover'] = cover
+ self.generateThumbnail(title, image_dir, "thumbnail_default.jpg")
+ else:
if self.verbose:
- self.opts.log.info("updating thumbnail_default for %s" % title['title'])
+ self.opts.log.info(" generating default cover thumbnail")
#title['cover'] = "%s/DefaultCover.jpg" % self.catalogPath
title['cover'] = cover
self.generateThumbnail(title, image_dir, "thumbnail_default.jpg")
else:
- if self.verbose:
- self.opts.log.info(" generating new thumbnail_default.jpg")
- #title['cover'] = "%s/DefaultCover.jpg" % self.catalogPath
- title['cover'] = cover
- self.generateThumbnail(title, image_dir, "thumbnail_default.jpg")
-
+ self.opts.log.error("Not OK to use PyQt, can't create default thumbnail")
+
self.thumbs = thumbs
def generateOPF(self):
- result = self.updateProgressFullStep("generateOPF()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateOPF()"))
header = '''
@@ -1596,9 +1580,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateNCXHeader(self):
- result = self.updateProgressFullStep("generateNCXHeader()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateNCXHeader()"))
header = '''
@@ -1613,7 +1595,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointTag['class'] = "periodical"
navPointTag['id'] = "title"
navPointTag['playOrder'] = self.playOrder
- #print "generateNCXHeader(periodical): self.playOrder: %d" % self.playOrder
self.playOrder += 1
navLabelTag = Tag(soup, 'navLabel')
textTag = Tag(soup, 'text')
@@ -1635,9 +1616,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateNCXDescriptions(self, tocTitle):
- result = self.updateProgressFullStep("generateNCXDescriptions()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateNCXDescriptions()"))
# --- Construct the 'Books by Title' section ---
ncx_soup = self.ncxSoup
@@ -1649,7 +1628,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointTag['class'] = "section"
navPointTag['id'] = "bytitle-ID"
navPointTag['playOrder'] = self.playOrder
- #print "generateNCXDescriptions(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
self.playOrder += 1
navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text')
@@ -1669,7 +1647,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointVolumeTag['class'] = "article"
navPointVolumeTag['id'] = "book%dID" % int(book['id'])
navPointVolumeTag['playOrder'] = self.playOrder
- #print "generateNCXDescriptions(article): self.playOrder: %d" % self.playOrder
self.playOrder += 1
navLabelTag = Tag(ncx_soup, "navLabel")
textTag = Tag(ncx_soup, "text")
@@ -1687,7 +1664,7 @@ class EPUB_MOBI(CatalogPlugin):
cmTag['name'] = "author"
cmTag.insert(0, NavigableString(self.formatNCXText(book['author'])))
navPointVolumeTag.insert(2, cmTag)
-
+
# Add the description tag
if book['short_description']:
cmTag = Tag(ncx_soup, '%s' % 'calibre:meta')
@@ -1707,9 +1684,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateNCXByTitle(self, tocTitle):
- result = self.updateProgressFullStep("generateNCXByTitle()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateNCXByTitle()"))
soup = self.ncxSoup
output = "ByAlphaTitle"
@@ -1721,7 +1696,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointTag['class'] = "section"
navPointTag['id'] = "byalphatitle-ID"
navPointTag['playOrder'] = self.playOrder
- #print "generateNCXByTitle(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
self.playOrder += 1
navLabelTag = Tag(soup, 'navLabel')
textTag = Tag(soup, 'text')
@@ -1799,9 +1773,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateNCXByAuthor(self, tocTitle):
- result = self.updateProgressFullStep("generateNCXByAuthor()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateNCXByAuthor()"))
soup = self.ncxSoup
HTML_file = "content/ByAlphaAuthor.html"
@@ -1829,7 +1801,7 @@ class EPUB_MOBI(CatalogPlugin):
nptc += 1
# Create an NCX article entry for each populated author index letter
- # Loop over the sorted_authors list, find start of each letter,
+ # Loop over the sorted_authors list, find start of each letter,
# add description_preview_count artists
# self.authors[0]:friendly [1]:author_sort [2]:book_count
master_author_list = []
@@ -1844,7 +1816,7 @@ class EPUB_MOBI(CatalogPlugin):
author_list += " …"
author_list = self.formatNCXText(author_list)
- if self.verbose:
+ if False and self.verbose:
self.opts.log.info(" adding '%s' to master_author_list" % current_letter)
master_author_list.append((author_list, current_letter))
@@ -1860,7 +1832,7 @@ class EPUB_MOBI(CatalogPlugin):
if len(current_author_list) == self.descriptionClip:
author_list += " …"
author_list = self.formatNCXText(author_list)
- if self.verbose:
+ if False and self.verbose:
self.opts.log.info(" adding '%s' to master_author_list" % current_letter)
master_author_list.append((author_list, current_letter))
@@ -1881,7 +1853,7 @@ class EPUB_MOBI(CatalogPlugin):
contentTag['src'] = "%s#%sauthors" % (HTML_file, authors_by_letter[1])
navPointByLetterTag.insert(1,contentTag)
-
+
if self.generateForKindle:
cmTag = Tag(soup, '%s' % 'calibre:meta')
cmTag['name'] = "description"
@@ -1902,9 +1874,7 @@ class EPUB_MOBI(CatalogPlugin):
# Add each genre as an article
# 'tag', 'file', 'authors'
- result = self.updateProgressFullStep("generateNCXByTags()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("generateNCXByTags()"))
ncx_soup = self.ncxSoup
body = ncx_soup.find("navPoint")
@@ -1917,11 +1887,10 @@ class EPUB_MOBI(CatalogPlugin):
file_ID = file_ID.replace(" ","")
navPointTag['id'] = "%s-ID" % file_ID
navPointTag['playOrder'] = self.playOrder
- #print "generateNCXByTags(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
self.playOrder += 1
navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text')
- # textTag.insert(0, NavigableString('%s (%d)' % (section_title, len(genre_list))))
+ # textTag.insert(0, NavigableString('%s (%d)' % (section_title, len(genre_list))))
textTag.insert(0, NavigableString('%s' % tocTitle))
navLabelTag.insert(0, textTag)
nptc = 0
@@ -1939,7 +1908,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointVolumeTag['class'] = "article"
navPointVolumeTag['id'] = "genre-%s-ID" % genre['tag']
navPointVolumeTag['playOrder'] = self.playOrder
- #print "generateNCXByTags(article '%s'): self.playOrder: %d" % (genre['tag'], self.playOrder)
self.playOrder += 1
navLabelTag = Tag(ncx_soup, "navLabel")
textTag = Tag(ncx_soup, "text")
@@ -1957,19 +1925,19 @@ class EPUB_MOBI(CatalogPlugin):
cmTag = Tag(ncx_soup, '%s' % 'calibre:meta')
cmTag['name'] = "author"
# First - Last author
-
+
if len(genre['titles_spanned']) > 1 :
author_range = "%s - %s" % (genre['titles_spanned'][0][0], genre['titles_spanned'][1][0])
else :
author_range = "%s" % (genre['titles_spanned'][0][0])
-
+
cmTag.insert(0, NavigableString(author_range))
navPointVolumeTag.insert(2, cmTag)
-
+
# Build the description tag
cmTag = Tag(ncx_soup, '%s' % 'calibre:meta')
cmTag['name'] = "description"
-
+
if False:
# Form 1: Titles spanned
if len(genre['titles_spanned']) > 1:
@@ -1985,7 +1953,7 @@ class EPUB_MOBI(CatalogPlugin):
titles = sorted(titles, key=lambda x:(self.generateSortTitle(x),self.generateSortTitle(x)))
titles_list = self.generateShortDescription(" • ".join(titles))
cmTag.insert(0, NavigableString(self.formatNCXText(titles_list)))
-
+
navPointVolumeTag.insert(3, cmTag)
# Add this volume to the section tag
@@ -2000,34 +1968,12 @@ class EPUB_MOBI(CatalogPlugin):
def writeNCX(self):
- result = self.updateProgressFullStep("writeNCX()")
- if self.verbose:
- self.opts.log.info(result)
+ self.opts.log.info(self.updateProgressFullStep("writeNCX()"))
+
outfile = open("%s/%s.ncx" % (self.catalogPath, self.basename), 'w')
outfile.write(self.ncxSoup.prettify())
# Helpers
- def contents(self, element, title, key=None):
- content = None
-
- if element is None:
- return None
-
- # Some elements seem to have \n fields
- for node in element:
- if node == "\n":
- continue
- else:
- content = node
- # Special handling for '&' in 'cover'
- if key == 'cover' and re.search('&',content):
- content = re.sub('&','&',content)
-
- if content:
- return unicode(content)
- else:
- return None
-
def convertHTMLEntities(self, s):
matches = re.findall("\d+;", s)
if len(matches) > 0:
@@ -2057,47 +2003,35 @@ class EPUB_MOBI(CatalogPlugin):
self.cleanUp()
if not os.path.isdir(catalogPath):
- #if self.verbose: print " creating %s" % catalogPath
os.makedirs(catalogPath)
# Create /content and /images
content_path = catalogPath + "/content"
if not os.path.isdir(content_path):
- #if self.verbose: print " creating %s" % content_path
os.makedirs(content_path)
images_path = catalogPath + "/images"
if not os.path.isdir(images_path):
- #if self.verbose: print " creating %s" % images_path
os.makedirs(images_path)
- def fetchLibraryPath(self):
- # Return a path to the current library
- from calibre.utils.config import prefs
- return prefs['library_path']
-
def getMarkerTags(self):
''' Return a list of special marker tags to be excluded from genre list '''
markerTags = []
markerTags.extend(self.opts.exclude_tags.split(','))
markerTags.extend(self.opts.note_tag.split(','))
- markerTags.extend(self.opts.read_tag.split(','))
+ markerTags.extend(self.opts.read_tag.split(','))
return markerTags
def filterDbTags(self, tags):
- # Remove the special marker tags from the database's tag list,
+ # Remove the special marker tags from the database's tag list,
# return sorted list of tags representing valid genres
filtered_tags = []
for tag in tags:
- # Check the leading character
if tag[0] in self.markerTags:
- #print "skipping %s" % tag
continue
- # Check the exclude_genre pattern
if re.search(self.opts.exclude_genre, tag):
- #print "skipping %s" % tag
continue
-
+
filtered_tags.append(tag)
filtered_tags.sort()
@@ -2340,11 +2274,9 @@ class EPUB_MOBI(CatalogPlugin):
# Convert numbers to strings, ignore leading stop words
# The 21-Day Consciousness Cleanse
- if False: print "generate_sort_title(%s)" % title
title_words = title.split(' ')
if title_words[0].lower() in ['the','a','an']:
stop_word = title_words.pop(0)
- if False : print "removing stop word '%s'" % stop_word
# Scan for numbers in each word clump
translated = []
@@ -2359,29 +2291,28 @@ class EPUB_MOBI(CatalogPlugin):
def generateThumbnail(self, title, image_dir, thumb_file):
import calibre.utils.PythonMagickWand as pw
- with pw.ImageMagick():
- try:
- img = pw.NewMagickWand()
- if img < 0:
- raise RuntimeError('generate_thumbnail(): Cannot create wand')
- # Read the cover
- if not pw.MagickReadImage(img,
- title['cover'].encode(filesystem_encoding)):
- self.opts.log.info('Failed to read cover image from: %s' % title['cover'])
- raise IOError
- thumb = pw.CloneMagickWand(img)
- if thumb < 0:
- self.opts.log.info('generateThumbnail(): Cannot clone cover')
- raise RuntimeError
- # img, width, height
- pw.MagickThumbnailImage(thumb, 75, 100)
- pw.MagickWriteImage(thumb, os.path.join(image_dir, thumb_file))
- pw.DestroyMagickWand(thumb)
- pw.DestroyMagickWand(img)
- except IOError:
- self.opts.log.info("generateThumbnail() IOError with %s" % title['title'])
- except RuntimeError:
- self.opts.log.info("generateThumbnail() RuntimeError with %s" % title['title'])
+ try:
+ img = pw.NewMagickWand()
+ if img < 0:
+ raise RuntimeError('generateThumbnail(): Cannot create wand')
+ # Read the cover
+ if not pw.MagickReadImage(img,
+ title['cover'].encode(filesystem_encoding)):
+ self.opts.log.error('generateThumbnail(): Failed to read cover image from: %s' % title['cover'])
+ raise IOError
+ thumb = pw.CloneMagickWand(img)
+ if thumb < 0:
+ self.opts.log.error('generateThumbnail(): Cannot clone cover')
+ raise RuntimeError
+ # img, width, height
+ pw.MagickThumbnailImage(thumb, 75, 100)
+ pw.MagickWriteImage(thumb, os.path.join(image_dir, thumb_file))
+ pw.DestroyMagickWand(thumb)
+ pw.DestroyMagickWand(img)
+ except IOError:
+ self.opts.log.error("generateThumbnail(): IOError with %s" % title['title'])
+ except RuntimeError:
+ self.opts.log.error("generateThumbnail(): RuntimeError with %s" % title['title'])
def processSpecialTags(self, tags, this_title, opts):
tag_list = []
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 7b0f7a083e..1fdb4ef9a9 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1415,9 +1415,10 @@ class LibraryDatabase2(LibraryDatabase):
if matches:
tag_matches = self.data.get_matches('tags', _('Catalog'))
matches = matches.intersection(tag_matches)
- db_id = None
+ db_id, existing = None, False
if matches:
db_id = list(matches)[0]
+ existing = True
if db_id is None:
obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
(title, 'calibre'))
@@ -1433,6 +1434,10 @@ class LibraryDatabase2(LibraryDatabase):
if not hasattr(path, 'read'):
stream.close()
self.conn.commit()
+ if existing:
+ t = datetime.utcnow()
+ self.set_timestamp(db_id, t, notify=False)
+ self.set_pubdate(db_id, t, notify=False)
self.data.refresh_ids(self, [db_id]) # Needed to update format list and size
return db_id
diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py
index 00eea78589..a1c8aec0bd 100644
--- a/src/calibre/library/server.py
+++ b/src/calibre/library/server.py
@@ -714,6 +714,10 @@ class LibraryServer(object):
book, books = MarkupTemplate(self.MOBILE_BOOK), []
for record in items[(start-1):(start-1)+num]:
+ if record[13] is None:
+ record[13] = ''
+ if record[6] is None:
+ record[6] = 0
aus = record[2] if record[2] else __builtin__._('Unknown')
authors = '|'.join([i.replace('|', ',') for i in aus.split(',')])
record[10] = fmt_sidx(float(record[10]))
diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py
index 79db972008..458d5adb8a 100644
--- a/src/calibre/utils/ipc/job.py
+++ b/src/calibre/utils/ipc/job.py
@@ -52,8 +52,10 @@ class BaseJob(object):
else:
self._status_text = _('Error') if self.failed else _('Finished')
if DEBUG:
- prints('Job:', self.id, self.description, 'finished')
- prints('\t'.join(self.details.splitlines(True)))
+ prints('Job:', self.id, self.description, 'finished',
+ safe_encode=True)
+ prints('\t'.join(self.details.splitlines(True)),
+ safe_encode=True)
if not self._done_called:
self._done_called = True
try: