GwR cleanup, rendering default cover against white bg

This commit is contained in:
GRiker 2010-01-24 16:07:01 -07:00
commit a15b9745e9
19 changed files with 422 additions and 280 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

View File

@ -2,17 +2,37 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CommonDreams(BasicNewsRecipe):
# Identify the recipe
title = u'Common Dreams'
description = u'Progressive news and views'
__author__ = u'XanthanGum'
language = 'en'
# Format the text
extra_css = '''
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
h1{font-size: xx-large;}
h2{font-size: large;}
'''
# Pick no article older than seven days and limit the number of articles per feed to 100
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'Common Dreams Headlines',
u'http://www.commondreams.org/feed/headlines_rss'),
(u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'),
(u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
]
# Remove everything before the article
remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
# Remove everything after the article
remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
# Identify the news feeds
feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
(u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'),
(u'Views', u'http://www.commondreams.org/feed/views_rss'),
(u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]

View File

@ -0,0 +1,40 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Neowin(BasicNewsRecipe):
title = u'Neowin.net'
oldest_article = 5
language = 'en'
description = 'News from IT'
publisher = 'Neowin'
category = 'news, IT, Microsoft, Apple, hardware, software, games'
__author__ = 'Darko Miletic'
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf8'
conversion_options = {
'tags' : category
,'language' : language
,'comments' : description
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
remove_tags_after = dict(name='div', attrs={'id':'tag-bar'})
remove_tags = [
dict(name=['base','object','link','iframe'])
,dict(name='div', attrs={'id':'tag-bar'})
]
feeds = [
(u'Software' , u'http://www.neowin.net/news/rss/software' )
,(u'Gaming' , u'http://www.neowin.net/news/rss/gaming' )
,(u'Microsoft', u'http://www.neowin.net/news/rss/microsoft')
,(u'Apple' , u'http://www.neowin.net/news/rss/apple' )
,(u'Editorial', u'http://www.neowin.net/news/rss/editorial')
]
def image_url_processor(cls, baseurl, url):
return url

View File

@ -1,6 +1,5 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
#from random import randint
from urllib import quote
class SportsIllustratedRecipe(BasicNewsRecipe) :
@ -9,12 +8,11 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
__license__ = 'GPL v3'
language = 'en'
description = 'Sports Illustrated'
version = 1
version = 3
title = u'Sports Illustrated'
no_stylesheets = True
remove_javascript = True
#template_css = ''
use_embedded_content = False
INDEX = 'http://sportsillustrated.cnn.com/'
@ -22,13 +20,39 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
def parse_index(self):
answer = []
soup = self.index_to_soup(self.INDEX)
# Find the link to the current issue on the front page.
# Find the link to the current issue on the front page. SI Cover
cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
if cover:
currentIssue = cover.parent['href']
if currentIssue:
# Open the index of current issue
index = self.index_to_soup(currentIssue)
self.log('\tLooking for current issue in: ' + currentIssue)
# Now let us see if they updated their frontpage
nav = index.find('div', attrs = {'class': 'siv_trav_top'})
if nav:
img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
if img:
parent = img.parent
if parent.name == 'a':
# They didn't update their frontpage; Load the next issue from here
href = self.INDEX + parent['href']
index = self.index_to_soup(href)
self.log('\tLooking for current issue in: ' + href)
if index.find('div', 'siv_noArticleMessage'):
nav = index.find('div', attrs = {'class': 'siv_trav_top'})
if nav:
# Their frontpage points to an issue without any articles; Use the previous issue
img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
if img:
parent = img.parent
if parent.name == 'a':
href = self.INDEX + parent['href']
index = self.index_to_soup(href)
self.log('\tLooking for current issue in: ' + href)
# Find all articles.
list = index.find('div', attrs = {'class' : 'siv_artList'})
@ -69,10 +93,8 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
def preprocess_html(self, soup):
header = soup.find('div', attrs = {'class' : 'siv_artheader'})
if header:
# It's an article, prepare a container for the content
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
body = homeMadeSoup.find('body')
body = homeMadeSoup.body
# Find the date, title and byline
temp = header.find('td', attrs = {'class' : 'title'})
@ -93,7 +115,4 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
body.append(para)
return homeMadeSoup
else :
# It's a TOC, just return the whole lot
return soup

View File

@ -1,44 +1,105 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__docformat__ = 'restructuredtext en'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.wired.com
'''
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Wired(BasicNewsRecipe):
title = 'Wired.com'
__author__ = 'Kovid Goyal'
description = 'Technology news'
timefmt = ' [%Y%b%d %H%M]'
language = 'en'
title = 'Wired Magazine'
__author__ = 'Darko Miletic'
description = 'Gaming news'
publisher = 'Conde Nast Digital'
category = 'news, games, IT, gadgets'
oldest_article = 32
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
index = 'http://www.wired.com/magazine/'
remove_tags_before = dict(name='div', id='content')
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
'footer', 'advertisement', 'blog_subscription_unit',
'brightcove_component']),
{'class':'entryActions'},
dict(name=['noscript', 'script'])]
preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')]
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
feeds = [
('Top News', 'http://feeds.wired.com/wired/index'),
('Culture', 'http://feeds.wired.com/wired/culture'),
('Software', 'http://feeds.wired.com/wired/software'),
('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
('Cars', 'http://feeds.wired.com/wired/cars'),
('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
('Gaming', 'http://feeds.wired.com/wired/gaming'),
('Science', 'http://feeds.wired.com/wired/science'),
('Med Tech', 'http://feeds.wired.com/wired/medtech'),
('Politics', 'http://feeds.wired.com/wired/politics'),
('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
('Commentary', 'http://feeds.wired.com/wired/commentary'),
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
remove_tags = [
dict(name=['object','embed','iframe','link'])
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
]
#feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
def parse_index(self):
totalfeeds = []
soup = self.index_to_soup(self.index)
features = soup.find('div',attrs={'id':'my-glider'})
if features:
farticles = []
for item in features.findAll('div',attrs={'class':'section'}):
divurl = item.find('div',attrs={'class':'feature-header'})
divdesc = item.find('div',attrs={'class':'feature-text'})
url = 'http://www.wired.com' + divurl.a['href']
title = self.tag_to_string(divurl.a)
description = self.tag_to_string(divdesc)
date = strftime(self.timefmt)
farticles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append(('Featured Articles', farticles))
#department feeds
departments = ['rants','start','test','play','found']
dept = soup.find('div',attrs={'id':'magazine-departments'})
if dept:
for ditem in departments:
darticles = []
department = dept.find('div',attrs={'id':'department-'+ditem})
if department:
for item in department.findAll('div'):
description = ''
feed_link = item.find('a')
if feed_link and feed_link.has_key('href'):
url = feed_link['href']
title = self.tag_to_string(feed_link)
date = strftime(self.timefmt)
darticles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append((ditem.capitalize(), darticles))
return totalfeeds
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.index)
cover_item = soup.find('div',attrs={'class':'spread-image'})
if cover_item:
cover_url = 'http://www.wired.com' + cover_item.a.img['src']
return cover_url
def print_version(self, url):
return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
return url.rstrip('/') + '/all/1'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -9,23 +9,22 @@ from calibre.devices.usbms.driver import USBMS
class BLACKBERRY(USBMS):
name = 'Blackberry Device Interface'
gui_name = 'Blackberry'
description = _('Communicate with the Blackberry smart phone.')
author = _('Kovid Goyal')
supported_platforms = ['windows', 'linux']
supported_platforms = ['windows', 'linux', 'osx']
# Ordered list of supported formats
FORMATS = ['mobi', 'prc']
VENDOR_ID = [0x0fca]
PRODUCT_ID = [0x8004, 0x0004]
BCD = [0x0200, 0x0107, 0x0201]
BCD = [0x0200, 0x0107, 0x0210]
VENDOR_NAME = 'RIM'
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
#OSX_MAIN_MEM = 'Kindle Internal Storage Media'
MAIN_MEMORY_VOLUME_LABEL = 'Blackberry SD Card'
EBOOK_DIR_MAIN = 'ebooks'
EBOOK_DIR_MAIN = 'eBooks'
SUPPORTS_SUB_DIRS = True

View File

@ -24,7 +24,7 @@ class DRMError(ValueError):
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml']
class HTMLRenderer(object):

View File

@ -340,6 +340,9 @@ class ComicInput(InputFormatPlugin):
%stream.name)
for line in open('comics.txt',
'rb').read().decode('utf-8').splitlines():
line = line.strip()
if not line:
continue
fname, title = line.partition(':')[0], line.partition(':')[-1]
fname = os.path.join(tdir, *fname.split('/'))
if not title:

View File

@ -268,7 +268,8 @@ class EPUBOutput(OutputFormatPlugin):
# remove <img> tags with empty src elements
bad = []
for x in XPath('//h:img')(body):
if not x.get('src', '').strip():
src = x.get('src', '').strip()
if src in ('', '#'):
bad.append(x)
for img in bad:
img.getparent().remove(img)

View File

@ -20,7 +20,7 @@ class Reader(FormatReader):
if record0_size == 132:
self.reader = Reader132(header, stream, log, options)
elif record0_size == 202:
elif record0_size in (116, 202):
self.reader = Reader202(header, stream, log, options)
else:
raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
'''
Read content from ereader pdb file with a 202 byte header created by Makebook.
Read content from ereader pdb file with a 116 and 202 byte header created by Makebook.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -44,7 +44,7 @@ class Reader202(FormatReader):
self.header_record = HeaderRecord(self.section_data(0))
if self.header_record.version != 4:
if self.header_record.version not in (2, 4):
raise EreaderError('Unknown book version %i.' % self.header_record.version)
from calibre.ebooks.metadata.pdb import get_metadata

View File

@ -18,38 +18,11 @@ class Font(object):
self.color = spec.get('color')
self.family = spec.get('family')
class Column(object):
# A column contains an element is the element bulges out to
# the left or the right by at most HFUZZ*col width.
HFUZZ = 0.2
class Element(object):
def __init__(self):
self.left = self.right = self.top = self.bottom = 0
self.width = self.height = 0
self.elements = []
def add(self, elem):
if elem in self.elements: return
self.elements.append(elem)
self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
self.top = self.elements[0].top
self.bottom = self.elements[-1].bottom
self.left, self.right = sys.maxint, 0
for x in self:
self.left = min(self.left, x.left)
self.right = max(self.right, x.right)
self.width, self.height = self.right-self.left, self.bottom-self.top
def __iter__(self):
for x in self.elements:
yield x
def contains(self, elem):
return elem.left > self.left - self.HFUZZ*self.width and \
elem.right < self.right + self.HFUZZ*self.width
class Element(object):
self.starts_block = None
self.block_style = None
def __eq__(self, other):
return self.id == other.id
@ -60,17 +33,21 @@ class Element(object):
class Image(Element):
def __init__(self, img, opts, log, idc):
Element.__init__(self)
self.opts, self.log = opts, log
self.id = idc.next()
self.top, self.left, self.width, self.height, self.iwidth, self.iheight = \
map(float, map(img.get, ('top', 'left', 'rwidth', 'rheight', 'iwidth',
'iheight')))
self.src = img.get('src')
self.bottom = self.top + self.height
self.right = self.left + self.width
class Text(Element):
def __init__(self, text, font_map, opts, log, idc):
Element.__init__(self)
self.id = idc.next()
self.opts, self.log = opts, log
self.font_map = font_map
@ -140,6 +117,61 @@ class Interval(object):
def __hash__(self):
return hash('(%f,%f)'%self.left, self.right)
class Column(object):
# A column contains an element is the element bulges out to
# the left or the right by at most HFUZZ*col width.
HFUZZ = 0.2
def __init__(self):
self.left = self.right = self.top = self.bottom = 0
self.width = self.height = 0
self.elements = []
self.average_line_separation = 0
def add(self, elem):
if elem in self.elements: return
self.elements.append(elem)
self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
self.top = self.elements[0].top
self.bottom = self.elements[-1].bottom
self.left, self.right = sys.maxint, 0
for x in self:
self.left = min(self.left, x.left)
self.right = max(self.right, x.right)
self.width, self.height = self.right-self.left, self.bottom-self.top
def __iter__(self):
for x in self.elements:
yield x
def contains(self, elem):
return elem.left > self.left - self.HFUZZ*self.width and \
elem.right < self.right + self.HFUZZ*self.width
def collect_stats(self):
if len(self.elements) > 1:
gaps = [self.elements[i+1].top - self.elements[i].bottom for i in
range(0, len(self.elements)-1)]
self.average_line_separation = sum(gaps)/len(gaps)
for i, elem in enumerate(self.elements):
left_margin = elem.left - self.left
elem.indent_fraction = left_margin/self.width
elem.width_fraction = elem.width/self.width
if i == 0:
elem.top_gap = None
else:
elem.top_gap = self.elements[i-1].bottom - elem.top
def previous_element(self, idx):
if idx == 0:
return None
return self.elements[idx-1]
class Region(object):
def __init__(self):
@ -156,6 +188,7 @@ class Region(object):
self.columns[i].add(elem)
def contains(self, columns):
# TODO: handle unbalanced columns
if not self.columns:
return True
if len(columns) != len(self.columns):
@ -172,7 +205,22 @@ class Region(object):
@property
def is_empty(self):
return len(self.elements) == 0
return len(self.columns) == 0
def collect_stats(self):
for column in self.columns:
column.collect_stats()
self.average_line_separation = sum([x.average_line_separation for x in
self.columns])/float(len(self.columns))
def __iter__(self):
for x in self.columns:
yield x
def linearize(self):
self.elements = []
for x in self.columns:
self.elements.extend(x)
class Page(object):
@ -185,6 +233,8 @@ class Page(object):
# for them to be considered to be part of the same text fragment
LINE_FACTOR = 0.4
# Multiplies the average line height when determining row height
# of a particular element to detect columns.
YFUZZ = 1.5
@ -263,10 +313,10 @@ class Page(object):
columns = self.sort_into_columns(x, elems)
processed.update(elems)
if not current_region.contains(columns):
self.regions.append(self.current_region)
self.regions.append(current_region)
current_region = Region()
current_region.add(columns)
if not self.current_region.is_empty():
if not current_region.is_empty:
self.regions.append(current_region)
def sort_into_columns(self, elem, neighbors):
@ -287,7 +337,7 @@ class Page(object):
def find_elements_in_row_of(self, x):
interval = Interval(x.top,
x.top + self.YFUZZ*(1+self.average_text_height))
x.top + self.YFUZZ*(self.average_text_height))
h_interval = Interval(x.left, x.right)
for y in self.elements[x.idx:x.idx+15]:
if y is not x:
@ -298,6 +348,12 @@ class Page(object):
x_interval.intersection(h_interval).width <= 0:
yield y
def second_pass(self):
'Locate paragraph boundaries in each column'
for region in self.regions:
region.collect_stats()
region.linearize()
class PDFDocument(object):
@ -327,6 +383,7 @@ class PDFDocument(object):
for page in self.pages:
page.document_font_stats = self.font_size_stats
page.first_pass()
page.second_pass()
def collect_font_statistics(self):
self.font_size_stats = {}

View File

@ -138,7 +138,7 @@ class PMLMLizer(object):
text = [u'']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to PML markup...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append(self.add_page_anchor(item))
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
return ''.join(text)

View File

@ -522,7 +522,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
mi.author_sort = 'Schember, John'
mi.comments = "A guide to get you up an running with calibre"
mi.comments = "A guide to get you up and running with calibre"
mi.publisher = 'calibre'
self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
[mi])

View File

@ -1,8 +1,10 @@
import pickle, os, re, shutil, htmlentitydefs
import os, re, shutil, htmlentitydefs
from collections import namedtuple
from xml.sax.saxutils import escape
from PyQt4.Qt import *
from calibre import filesystem_encoding
from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter
@ -493,7 +495,6 @@ class EPUB_MOBI(CatalogPlugin):
self.opts.output_profile.startswith("kindle")) else False
self.__genres = None
self.__htmlFileList = []
self.__libraryPath = self.fetchLibraryPath()
self.__markerTags = self.getMarkerTags()
self.__ncxSoup = None
self.__playOrder = 1
@ -507,9 +508,10 @@ class EPUB_MOBI(CatalogPlugin):
self.__title = opts.catalog_title
self.__verbose = opts.verbose
if self.verbose:
self.opts.log.info("CatalogBuilder(): Generating %s for %s" % (self.opts.fmt, self.opts.output_profile))
self.opts.log.info("CatalogBuilder(): Generating %s %s"% \
(self.opts.fmt,
"for %s" % self.opts.output_profile if self.opts.output_profile \
else ''))
# Accessors
'''
@dynamic_property
@ -748,6 +750,8 @@ class EPUB_MOBI(CatalogPlugin):
self.generateHTMLByTags()
if getattr(self.reporter, 'cancel_requested', False): return 1
from calibre.utils.PythonMagickWand import ImageMagick
with ImageMagick():
self.generateThumbnails()
if getattr(self.reporter, 'cancel_requested', False): return 1
@ -794,9 +798,7 @@ class EPUB_MOBI(CatalogPlugin):
def fetchBooksByTitle(self):
result = self.updateProgressFullStep("fetchBooksByTitle()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("fetchBooksByTitle()"))
# Get the database as a dictionary
# Sort by title
@ -873,7 +875,7 @@ class EPUB_MOBI(CatalogPlugin):
# Re-sort based on title_sort
self.booksByTitle = sorted(titles,
key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper()))
if self.verbose:
if False and self.verbose:
self.opts.log.info("fetchBooksByTitle(): %d books" % len(self.booksByTitle))
for title in self.booksByTitle:
self.opts.log.info((u" %-50s %-25s" % (title['title'][0:45], title['title_sort'][0:20])).encode('utf-8'))
@ -881,9 +883,7 @@ class EPUB_MOBI(CatalogPlugin):
def fetchBooksByAuthor(self):
# Generate a list of titles sorted by author from the database
result = self.updateProgressFullStep("fetchBooksByAuthor()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("fetchBooksByAuthor()"))
# Sort titles case-insensitive
self.booksByAuthor = sorted(self.booksByTitle,
@ -927,19 +927,16 @@ class EPUB_MOBI(CatalogPlugin):
unique_authors.append((current_author[0], current_author[1].title(),
books_by_current_author))
if self.verbose:
if False and self.verbose:
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
for author in unique_authors:
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
author[2])).encode('utf-8'))
self.authors = unique_authors
def generateHTMLDescriptions(self):
# Write each title to a separate HTML file in contentdir
result = self.updateProgressFullStep("generateHTMLDescriptions()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateHTMLDescriptions()"))
for (title_num, title) in enumerate(self.booksByTitle):
if False:
@ -1066,9 +1063,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateHTMLByTitle(self):
# Write books by title A-Z to HTML file
result = self.updateProgressFullStep("generateHTMLByTitle()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateHTMLByTitle()"))
soup = self.generateHTMLEmptyHeader("Books By Alpha Title")
body = soup.find('body')
@ -1170,9 +1165,8 @@ class EPUB_MOBI(CatalogPlugin):
def generateHTMLByAuthor(self):
# Write books by author A-Z
result = self.updateProgressFullStep("generateHTMLByAuthor()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateHTMLByAuthor()"))
friendly_name = "By Author"
soup = self.generateHTMLEmptyHeader(friendly_name)
@ -1303,9 +1297,7 @@ class EPUB_MOBI(CatalogPlugin):
# Generate individual HTML files for each tag, e.g. Fiction, Nonfiction ...
# Note that special tags - ~+*[] - have already been filtered from books[]
result = self.updateProgressFullStep("generateHTMLByTags()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateHTMLByTags()"))
# Filter out REMOVE_TAGS, sort
filtered_tags = self.filterDbTags(self.db.all_tags())
@ -1313,13 +1305,11 @@ class EPUB_MOBI(CatalogPlugin):
# Extract books matching filtered_tags
genre_list = []
for tag in filtered_tags:
if False : print "searching for %s" % tag
tag_list = {}
tag_list['tag'] = tag
tag_list['books'] = []
for book in self.booksByAuthor:
if 'tags' in book and tag in book['tags']:
if False: print "\t %s" % (book['title'])
this_book = {}
this_book['author'] = book['author']
this_book['title'] = book['title']
@ -1336,12 +1326,10 @@ class EPUB_MOBI(CatalogPlugin):
# genre_list = [ [tag_list], [tag_list] ...]
master_genre_list = []
for (index, genre) in enumerate(genre_list):
if False : print "genre: %s" % genre['tag']
# Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation
authors = []
for book in genre['books']:
#print "\t %s - %s" % (book['title'], book['author'])
authors.append((book['author'],book['author_sort']))
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
@ -1384,9 +1372,7 @@ class EPUB_MOBI(CatalogPlugin):
# Generate a thumbnail per cover. If a current thumbnail exists, skip
# If a cover doesn't exist, use default
# Return list of active thumbs
result = self.updateProgressFullStep("generateThumbnails()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateThumbnails()"))
thumbs = ['thumbnail_default.jpg']
@ -1398,7 +1384,6 @@ class EPUB_MOBI(CatalogPlugin):
i/float(len(self.booksByTitle)))
# Check to see if source file exists
if 'cover' in title and os.path.isfile(title['cover']):
# print "cover found for %s" % title['title']
# Add the thumb spec to thumbs[]
thumbs.append("thumbnail_%d.jpg" % int(title['id']))
@ -1415,11 +1400,10 @@ class EPUB_MOBI(CatalogPlugin):
self.generateThumbnail(title, image_dir, thumb_file)
else:
self.generateThumbnail(title, image_dir, thumb_file)
else:
# Use default cover
if self.verbose:
self.opts.log.warn(" no cover available for %s, will use default" % \
self.opts.log.warn(" using default cover for '%s'" % \
(title['title']))
# Check to make sure default is current
# Check to see if thumbnail exists
@ -1428,16 +1412,16 @@ class EPUB_MOBI(CatalogPlugin):
# Init Qt for image conversion
from calibre.gui2 import is_ok_to_use_qt
is_ok_to_use_qt()
from PyQt4.QtGui import QImage
# I() fetches path to resource, e.g. I('book.svg') returns:
# /Applications/calibre.app/Contents/Resources/resources/images/book.svg
# Convert .svg to .jpg
default_cover = I('book.svg')
cover_img = QImage()
cover_img.load(default_cover)
cover_img.save(cover, "PNG", -1)
if is_ok_to_use_qt():
# Render default book image against white bg
i = QImage(I('book.svg'))
i2 = QImage(i.size(),QImage.Format_ARGB32_Premultiplied )
i2.fill(QColor(Qt.white).rgb())
p = QPainter()
p.begin(i2)
p.drawImage(0, 0, i)
p.end()
i2.save(cover, "PNG", -1)
if os.path.isfile(thumb_fp):
# Check to see if default cover is newer than thumbnail
@ -1453,18 +1437,18 @@ class EPUB_MOBI(CatalogPlugin):
self.generateThumbnail(title, image_dir, "thumbnail_default.jpg")
else:
if self.verbose:
self.opts.log.info(" generating new thumbnail_default.jpg")
self.opts.log.info(" generating default cover thumbnail")
#title['cover'] = "%s/DefaultCover.jpg" % self.catalogPath
title['cover'] = cover
self.generateThumbnail(title, image_dir, "thumbnail_default.jpg")
else:
self.opts.log.error("Not OK to use PyQt, can't create default thumbnail")
self.thumbs = thumbs
def generateOPF(self):
result = self.updateProgressFullStep("generateOPF()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateOPF()"))
header = '''
<?xml version="1.0" encoding="UTF-8"?>
@ -1596,9 +1580,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateNCXHeader(self):
result = self.updateProgressFullStep("generateNCXHeader()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateNCXHeader()"))
header = '''
<?xml version="1.0" encoding="utf-8"?>
@ -1613,7 +1595,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointTag['class'] = "periodical"
navPointTag['id'] = "title"
navPointTag['playOrder'] = self.playOrder
#print "generateNCXHeader(periodical): self.playOrder: %d" % self.playOrder
self.playOrder += 1
navLabelTag = Tag(soup, 'navLabel')
textTag = Tag(soup, 'text')
@ -1635,9 +1616,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateNCXDescriptions(self, tocTitle):
result = self.updateProgressFullStep("generateNCXDescriptions()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateNCXDescriptions()"))
# --- Construct the 'Books by Title' section ---
ncx_soup = self.ncxSoup
@ -1649,7 +1628,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointTag['class'] = "section"
navPointTag['id'] = "bytitle-ID"
navPointTag['playOrder'] = self.playOrder
#print "generateNCXDescriptions(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
self.playOrder += 1
navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text')
@ -1669,7 +1647,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointVolumeTag['class'] = "article"
navPointVolumeTag['id'] = "book%dID" % int(book['id'])
navPointVolumeTag['playOrder'] = self.playOrder
#print "generateNCXDescriptions(article): self.playOrder: %d" % self.playOrder
self.playOrder += 1
navLabelTag = Tag(ncx_soup, "navLabel")
textTag = Tag(ncx_soup, "text")
@ -1707,9 +1684,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateNCXByTitle(self, tocTitle):
result = self.updateProgressFullStep("generateNCXByTitle()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateNCXByTitle()"))
soup = self.ncxSoup
output = "ByAlphaTitle"
@ -1721,7 +1696,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointTag['class'] = "section"
navPointTag['id'] = "byalphatitle-ID"
navPointTag['playOrder'] = self.playOrder
#print "generateNCXByTitle(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
self.playOrder += 1
navLabelTag = Tag(soup, 'navLabel')
textTag = Tag(soup, 'text')
@ -1799,9 +1773,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateNCXByAuthor(self, tocTitle):
result = self.updateProgressFullStep("generateNCXByAuthor()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateNCXByAuthor()"))
soup = self.ncxSoup
HTML_file = "content/ByAlphaAuthor.html"
@ -1844,7 +1816,7 @@ class EPUB_MOBI(CatalogPlugin):
author_list += " &hellip;"
author_list = self.formatNCXText(author_list)
if self.verbose:
if False and self.verbose:
self.opts.log.info(" adding '%s' to master_author_list" % current_letter)
master_author_list.append((author_list, current_letter))
@ -1860,7 +1832,7 @@ class EPUB_MOBI(CatalogPlugin):
if len(current_author_list) == self.descriptionClip:
author_list += " &hellip;"
author_list = self.formatNCXText(author_list)
if self.verbose:
if False and self.verbose:
self.opts.log.info(" adding '%s' to master_author_list" % current_letter)
master_author_list.append((author_list, current_letter))
@ -1902,9 +1874,7 @@ class EPUB_MOBI(CatalogPlugin):
# Add each genre as an article
# 'tag', 'file', 'authors'
result = self.updateProgressFullStep("generateNCXByTags()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("generateNCXByTags()"))
ncx_soup = self.ncxSoup
body = ncx_soup.find("navPoint")
@ -1917,7 +1887,6 @@ class EPUB_MOBI(CatalogPlugin):
file_ID = file_ID.replace(" ","")
navPointTag['id'] = "%s-ID" % file_ID
navPointTag['playOrder'] = self.playOrder
#print "generateNCXByTags(section '%s'): self.playOrder: %d" % (tocTitle, self.playOrder)
self.playOrder += 1
navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text')
@ -1939,7 +1908,6 @@ class EPUB_MOBI(CatalogPlugin):
navPointVolumeTag['class'] = "article"
navPointVolumeTag['id'] = "genre-%s-ID" % genre['tag']
navPointVolumeTag['playOrder'] = self.playOrder
#print "generateNCXByTags(article '%s'): self.playOrder: %d" % (genre['tag'], self.playOrder)
self.playOrder += 1
navLabelTag = Tag(ncx_soup, "navLabel")
textTag = Tag(ncx_soup, "text")
@ -2000,34 +1968,12 @@ class EPUB_MOBI(CatalogPlugin):
def writeNCX(self):
result = self.updateProgressFullStep("writeNCX()")
if self.verbose:
self.opts.log.info(result)
self.opts.log.info(self.updateProgressFullStep("writeNCX()"))
outfile = open("%s/%s.ncx" % (self.catalogPath, self.basename), 'w')
outfile.write(self.ncxSoup.prettify())
# Helpers
def contents(self, element, title, key=None):
content = None
if element is None:
return None
# Some elements seem to have \n fields
for node in element:
if node == "\n":
continue
else:
content = node
# Special handling for '&amp;' in 'cover'
if key == 'cover' and re.search('&amp;',content):
content = re.sub('&amp;','&',content)
if content:
return unicode(content)
else:
return None
def convertHTMLEntities(self, s):
matches = re.findall("&#\d+;", s)
if len(matches) > 0:
@ -2057,24 +2003,16 @@ class EPUB_MOBI(CatalogPlugin):
self.cleanUp()
if not os.path.isdir(catalogPath):
#if self.verbose: print " creating %s" % catalogPath
os.makedirs(catalogPath)
# Create /content and /images
content_path = catalogPath + "/content"
if not os.path.isdir(content_path):
#if self.verbose: print " creating %s" % content_path
os.makedirs(content_path)
images_path = catalogPath + "/images"
if not os.path.isdir(images_path):
#if self.verbose: print " creating %s" % images_path
os.makedirs(images_path)
def fetchLibraryPath(self):
# Return a path to the current library
from calibre.utils.config import prefs
return prefs['library_path']
def getMarkerTags(self):
''' Return a list of special marker tags to be excluded from genre list '''
markerTags = []
@ -2089,13 +2027,9 @@ class EPUB_MOBI(CatalogPlugin):
filtered_tags = []
for tag in tags:
# Check the leading character
if tag[0] in self.markerTags:
#print "skipping %s" % tag
continue
# Check the exclude_genre pattern
if re.search(self.opts.exclude_genre, tag):
#print "skipping %s" % tag
continue
filtered_tags.append(tag)
@ -2340,11 +2274,9 @@ class EPUB_MOBI(CatalogPlugin):
# Convert numbers to strings, ignore leading stop words
# The 21-Day Consciousness Cleanse
if False: print "generate_sort_title(%s)" % title
title_words = title.split(' ')
if title_words[0].lower() in ['the','a','an']:
stop_word = title_words.pop(0)
if False : print "removing stop word '%s'" % stop_word
# Scan for numbers in each word clump
translated = []
@ -2359,19 +2291,18 @@ class EPUB_MOBI(CatalogPlugin):
def generateThumbnail(self, title, image_dir, thumb_file):
import calibre.utils.PythonMagickWand as pw
with pw.ImageMagick():
try:
img = pw.NewMagickWand()
if img < 0:
raise RuntimeError('generate_thumbnail(): Cannot create wand')
raise RuntimeError('generateThumbnail(): Cannot create wand')
# Read the cover
if not pw.MagickReadImage(img,
title['cover'].encode(filesystem_encoding)):
self.opts.log.info('Failed to read cover image from: %s' % title['cover'])
self.opts.log.error('generateThumbnail(): Failed to read cover image from: %s' % title['cover'])
raise IOError
thumb = pw.CloneMagickWand(img)
if thumb < 0:
self.opts.log.info('generateThumbnail(): Cannot clone cover')
self.opts.log.error('generateThumbnail(): Cannot clone cover')
raise RuntimeError
# img, width, height
pw.MagickThumbnailImage(thumb, 75, 100)
@ -2379,9 +2310,9 @@ class EPUB_MOBI(CatalogPlugin):
pw.DestroyMagickWand(thumb)
pw.DestroyMagickWand(img)
except IOError:
self.opts.log.info("generateThumbnail() IOError with %s" % title['title'])
self.opts.log.error("generateThumbnail(): IOError with %s" % title['title'])
except RuntimeError:
self.opts.log.info("generateThumbnail() RuntimeError with %s" % title['title'])
self.opts.log.error("generateThumbnail(): RuntimeError with %s" % title['title'])
def processSpecialTags(self, tags, this_title, opts):
tag_list = []

View File

@ -1415,9 +1415,10 @@ class LibraryDatabase2(LibraryDatabase):
if matches:
tag_matches = self.data.get_matches('tags', _('Catalog'))
matches = matches.intersection(tag_matches)
db_id = None
db_id, existing = None, False
if matches:
db_id = list(matches)[0]
existing = True
if db_id is None:
obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
(title, 'calibre'))
@ -1433,6 +1434,10 @@ class LibraryDatabase2(LibraryDatabase):
if not hasattr(path, 'read'):
stream.close()
self.conn.commit()
if existing:
t = datetime.utcnow()
self.set_timestamp(db_id, t, notify=False)
self.set_pubdate(db_id, t, notify=False)
self.data.refresh_ids(self, [db_id]) # Needed to update format list and size
return db_id

View File

@ -714,6 +714,10 @@ class LibraryServer(object):
book, books = MarkupTemplate(self.MOBILE_BOOK), []
for record in items[(start-1):(start-1)+num]:
if record[13] is None:
record[13] = ''
if record[6] is None:
record[6] = 0
aus = record[2] if record[2] else __builtin__._('Unknown')
authors = '|'.join([i.replace('|', ',') for i in aus.split(',')])
record[10] = fmt_sidx(float(record[10]))

View File

@ -52,8 +52,10 @@ class BaseJob(object):
else:
self._status_text = _('Error') if self.failed else _('Finished')
if DEBUG:
prints('Job:', self.id, self.description, 'finished')
prints('\t'.join(self.details.splitlines(True)))
prints('Job:', self.id, self.description, 'finished',
safe_encode=True)
prints('\t'.join(self.details.splitlines(True)),
safe_encode=True)
if not self._done_called:
self._done_called = True
try: