Sync to trunk.

This commit is contained in:
John Schember 2011-04-24 13:17:18 -04:00
commit 555382a567
23 changed files with 996 additions and 143 deletions

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
clarin.com clarin.com
''' '''
@ -18,11 +18,18 @@ class Clarin(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
delay = 1
language = 'es_AR' language = 'es_AR'
publication_type = 'newspaper' publication_type = 'newspaper'
INDEX = 'http://www.clarin.com' INDEX = 'http://www.clarin.com'
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg' masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,serif; font-size: xx-large} .hora{font-weight:bold} .hd p{font-size: small} .nombre-autor{color: #0F325A} ' extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
h2{font-family: Georgia,serif; font-size: xx-large}
.hora{font-weight:bold}
.hd p{font-size: small}
.nombre-autor{color: #0F325A}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
@ -31,7 +38,9 @@ class Clarin(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [dict(attrs={'class':['hd','mt']})] keep_only_tags = [dict(attrs={'class':['hd','mt']})]
remove_tags = [dict(name=['meta','base','link'])]
remove_attributes = ['lang','_mce_bogus']
feeds = [ feeds = [
(u'Pagina principal', u'http://www.clarin.com/rss/' ) (u'Pagina principal', u'http://www.clarin.com/rss/' )
@ -47,6 +56,10 @@ class Clarin(BasicNewsRecipe):
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' ) ,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
] ]
def get_article_url(self, article):
return article.get('guid', None)
def print_version(self, url): def print_version(self, url):
return url + '?print=1' return url + '?print=1'

View File

@ -53,6 +53,7 @@ class FinancialTimes(BasicNewsRecipe):
feeds = [ feeds = [
(u'UK' , u'http://www.ft.com/rss/home/uk' ) (u'UK' , u'http://www.ft.com/rss/home/uk' )
,(u'US' , u'http://www.ft.com/rss/home/us' ) ,(u'US' , u'http://www.ft.com/rss/home/us' )
,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
,(u'Asia' , u'http://www.ft.com/rss/home/asia' ) ,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast') ,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
] ]

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
staradvertiser.com staradvertiser.com
''' '''
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Starbulletin(BasicNewsRecipe): class Starbulletin(BasicNewsRecipe):
title = 'Honolulu Star Advertiser' title = 'Honolulu Star Advertiser'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = "Latest national and local Hawaii sports news" description = 'Latest national and local Hawaii sports news'
publisher = 'Honolulu Star-Advertiser' publisher = 'Honolulu Star-Advertiser'
category = 'news, Honolulu, Hawaii' category = 'news, Honolulu, Hawaii'
oldest_article = 2 oldest_article = 2
@ -19,7 +19,13 @@ class Starbulletin(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif} h1,.brown,.postCredit{color: #663300} .storyDeck{font-size: 1.2em; font-weight: bold} ' masthead_url = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif'
extra_css = """
body{font-family: Verdana,Arial,Helvetica,sans-serif}
h1,.brown,.postCredit{color: #663300}
.storyDeck{font-size: 1.2em; font-weight: bold}
img{display: block}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
@ -28,14 +34,16 @@ class Starbulletin(BasicNewsRecipe):
, 'language' : language , 'language' : language
, 'linearize_tables' : True , 'linearize_tables' : True
} }
keep_only_tags = [
remove_tags_before = dict(attrs={'id':'storyTitle'}) dict(attrs={'id':'storyTitle'})
remove_tags_after = dict(name='div',attrs={'class':'storytext'}) ,dict(attrs={'class':['storyDeck','postCredit']})
,dict(name='span',attrs={'class':'brown'})
,dict(name='div',attrs={'class':'storytext'})
]
remove_tags = [ remove_tags = [
dict(name=['object','link','script','span']) dict(name=['object','link','script','span','meta','base','iframe'])
,dict(attrs={'class':'insideStoryImage'}) ,dict(attrs={'class':['insideStoryImage','insideStoryAd']})
,dict(attrs={'name':'fb_share'}) ,dict(attrs={'name':'fb_share'})
,dict(name='div',attrs={'class':'storytext'})
] ]
feeds = [ feeds = [
@ -47,3 +55,24 @@ class Starbulletin(BasicNewsRecipe):
,(u'Business' , u'http://www.staradvertiser.com/business/index.rss' ) ,(u'Business' , u'http://www.staradvertiser.com/business/index.rss' )
,(u'Travel' , u'http://www.staradvertiser.com/travel/index.rss' ) ,(u'Travel' , u'http://www.staradvertiser.com/travel/index.rss' )
] ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

Binary file not shown.

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
''' '''
Read meta information from eReader pdb files. Read meta information from pdb files.
''' '''
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -13,10 +13,12 @@ import re
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.pdb.header import PdbHeaderReader from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.metadata.ereader import get_metadata as get_eReader from calibre.ebooks.metadata.ereader import get_metadata as get_eReader
from calibre.ebooks.metadata.plucker import get_metadata as get_plucker
MREADER = { MREADER = {
'PNPdPPrs' : get_eReader, 'PNPdPPrs' : get_eReader,
'PNRdPPrs' : get_eReader, 'PNRdPPrs' : get_eReader,
'DataPlkr' : get_plucker,
} }
from calibre.ebooks.metadata.ereader import set_metadata as set_eReader from calibre.ebooks.metadata.ereader import set_metadata as set_eReader

View File

@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
'''
Read meta information from Plucker pdb files.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import struct
from datetime import datetime
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.pdb.plucker.reader import SectionHeader, DATATYPE_METADATA, \
MIBNUM_TO_NAME
def get_metadata(stream, extract_cover=True):
'''
Return metadata as a L{MetaInfo} object
'''
mi = MetaInformation(_('Unknown'), [_('Unknown')])
stream.seek(0)
pheader = PdbHeaderReader(stream)
section_data = None
for i in range(1, pheader.num_sections):
raw_data = pheader.section_data(i)
section_header = SectionHeader(raw_data)
if section_header.type == DATATYPE_METADATA:
section_data = raw_data[8:]
break
if not section_data:
return mi
default_encoding = 'latin-1'
record_count, = struct.unpack('>H', section_data[0:2])
adv = 0
title = None
author = None
pubdate = 0
for i in xrange(record_count):
type, = struct.unpack('>H', section_data[2+adv:4+adv])
length, = struct.unpack('>H', section_data[4+adv:6+adv])
# CharSet
if type == 1:
val, = struct.unpack('>H', section_data[6+adv:8+adv])
default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
# Author
elif type == 4:
author = section_data[6+adv+(2*length)]
# Title
elif type == 5:
title = section_data[6+adv+(2*length)]
# Publication Date
elif type == 6:
pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])
adv += 2*length
if title:
mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
if author:
author = author.replace('\0', '').decode(default_encoding, 'replace')
mi.author = author.split(',')
mi.pubdate = datetime.fromtimestamp(pubdate)
return mi

View File

@ -412,7 +412,7 @@ def identify(log, abort, # {{{
if msprefs['txt_comments']: if msprefs['txt_comments']:
for r in results: for r in results:
if r.plugin.has_html_comments and r.comments: if r.identify_plugin.has_html_comments and r.comments:
r.comments = html2text(r.comments) r.comments = html2text(r.comments)
max_tags = msprefs['max_tags'] max_tags = msprefs['max_tags']

View File

@ -206,6 +206,7 @@ class OverDrive(Source):
xref_q = '+'.join(title_tokens) xref_q = '+'.join(title_tokens)
#log.error('Initial query is %s'%initial_q) #log.error('Initial query is %s'%initial_q)
#log.error('Cross reference query is %s'%xref_q) #log.error('Cross reference query is %s'%xref_q)
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
query = '{"szKeyword":"'+initial_q+'"}' query = '{"szKeyword":"'+initial_q+'"}'
@ -229,34 +230,42 @@ class OverDrive(Source):
if int(m.group('displayrecords')) >= 1: if int(m.group('displayrecords')) >= 1:
results = True results = True
elif int(m.group('totalrecords')) >= 1: elif int(m.group('totalrecords')) >= 1:
if int(m.group('totalrecords')) >= 100:
if xref_q.find('+') != -1:
xref_tokens = xref_q.split('+')
xref_q = xref_tokens[0]
#log.error('xref_q is '+xref_q)
else:
xref_q = ''
xref_q = '' xref_q = ''
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
elif int(m.group('totalrecords')) == 0: elif int(m.group('totalrecords')) == 0:
return '' return ''
return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens) return self.sort_ovrdrv_results(raw, log, title, title_tokens, author, author_tokens)
def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None): def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
close_matches = [] close_matches = []
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw) raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
results = json.loads(raw) results = json.loads(raw)
#print results #log.error('raw results are:'+str(results))
# The search results are either from a keyword search or a multi-format list from a single ID, # The search results are either from a keyword search or a multi-format list from a single ID,
# sort through the results for closest match/format # sort through the results for closest match/format
if results: if results:
for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \ for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \ thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results: availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
#print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series #log.error("this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series)
if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]: if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
#print "overdrive id is not None, searching based on format type priority" #log.error('overdrive id is not None, searching based on format type priority')
return self.format_results(reserveid, od_title, subtitle, series, publisher, return self.format_results(reserveid, od_title, subtitle, series, publisher,
creators, thumbimage, worldcatlink, formatid) creators, thumbimage, worldcatlink, formatid)
else: else:
creators = creators.split(', ') if creators:
creators = creators.split(', ')
# if an exact match in a preferred format occurs # if an exact match in a preferred format occurs
if (author and creators[0] == author[0]) and od_title == title and int(formatid) in [1, 50, 410, 900] and thumbimage: if ((author and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
return self.format_results(reserveid, od_title, subtitle, series, publisher, return self.format_results(reserveid, od_title, subtitle, series, publisher,
creators, thumbimage, worldcatlink, formatid) creators, thumbimage, worldcatlink, formatid)
else: else:
@ -282,6 +291,10 @@ class OverDrive(Source):
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)) close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
else: else:
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid)) close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
if close_matches: if close_matches:
return close_matches[0] return close_matches[0]
else: else:
@ -289,7 +302,7 @@ class OverDrive(Source):
else: else:
return '' return ''
def overdrive_get_record(self, br, q, ovrdrv_id): def overdrive_get_record(self, br, log, q, ovrdrv_id):
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}' search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc' results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
@ -311,7 +324,7 @@ class OverDrive(Source):
raw = str(list(raw)) raw = str(list(raw))
clean_cj = mechanize.CookieJar() clean_cj = mechanize.CookieJar()
br.set_cookiejar(clean_cj) br.set_cookiejar(clean_cj)
return self.sort_ovrdrv_results(raw, None, None, None, ovrdrv_id) return self.sort_ovrdrv_results(raw, log, None, None, None, ovrdrv_id)
def find_ovrdrv_data(self, br, log, title, author, isbn, ovrdrv_id=None): def find_ovrdrv_data(self, br, log, title, author, isbn, ovrdrv_id=None):
@ -319,7 +332,7 @@ class OverDrive(Source):
if ovrdrv_id is None: if ovrdrv_id is None:
return self.overdrive_search(br, log, q, title, author) return self.overdrive_search(br, log, q, title, author)
else: else:
return self.overdrive_get_record(br, q, ovrdrv_id) return self.overdrive_get_record(br, log, q, ovrdrv_id)

View File

@ -1,75 +0,0 @@
'''
Device profiles.
'''
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
from itertools import izip
FONT_SIZES = [('xx-small', 1),
('x-small', None),
('small', 2),
('medium', 3),
('large', 4),
('x-large', 5),
('xx-large', 6),
(None, 7)]
class Profile(object):
def __init__(self, width, height, dpi, fbase, fsizes):
self.width = (float(width) / dpi) * 72.
self.height = (float(height) / dpi) * 72.
self.dpi = float(dpi)
self.fbase = float(fbase)
self.fsizes = []
for (name, num), size in izip(FONT_SIZES, fsizes):
self.fsizes.append((name, num, float(size)))
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
PROFILES = {
'PRS505':
Profile(width=584, height=754, dpi=168.451, fbase=12,
fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
'MSReader':
Profile(width=480, height=652, dpi=96, fbase=13,
fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
# Not really, but let's pretend
'Mobipocket':
Profile(width=600, height=800, dpi=96, fbase=18,
fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
# No clue on usable screen size; DPI should be good
'HanlinV3':
Profile(width=584, height=754, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
'CybookG3':
Profile(width=600, height=800, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
'Kindle':
Profile(width=525, height=640, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
'Browser':
Profile(width=800, height=600, dpi=100.0, fbase=12,
fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
}
class Context(object):
PROFILES = PROFILES
def __init__(self, source, dest):
if source in PROFILES:
source = PROFILES[source]
if dest in PROFILES:
dest = PROFILES[dest]
self.source = source
self.dest = dest

View File

@ -21,7 +21,6 @@ from calibre import force_unicode
from calibre.ebooks import unit_convert from calibre.ebooks import unit_convert
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
from calibre.ebooks.oeb.profile import PROFILES
cssutils.log.setLevel(logging.WARN) cssutils.log.setLevel(logging.WARN)
@ -123,10 +122,10 @@ class CSSSelector(etree.XPath):
class Stylizer(object): class Stylizer(object):
STYLESHEETS = WeakKeyDictionary() STYLESHEETS = WeakKeyDictionary()
def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'], def __init__(self, tree, path, oeb, opts, profile=None,
extra_css='', user_css=''): extra_css='', user_css=''):
self.oeb, self.opts = oeb, opts self.oeb, self.opts = oeb, opts
self.profile = profile self.profile = opts.input_profile
self.logger = oeb.logger self.logger = oeb.logger
item = oeb.manifest.hrefs[path] item = oeb.manifest.hrefs[path]
basename = os.path.basename(path) basename = os.path.basename(path)

View File

@ -12,6 +12,7 @@ from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
FORMAT_READERS = { FORMAT_READERS = {
'PNPdPPrs': ereader_reader, 'PNPdPPrs': ereader_reader,
@ -19,6 +20,7 @@ FORMAT_READERS = {
'zTXTGPlm': ztxt_reader, 'zTXTGPlm': ztxt_reader,
'TEXtREAd': palmdoc_reader, 'TEXtREAd': palmdoc_reader,
'.pdfADBE': pdf_reader, '.pdfADBE': pdf_reader,
'DataPlkr': plucker_reader,
} }
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
@ -37,6 +39,7 @@ IDENTITY_TO_NAME = {
'zTXTGPlm': 'zTXT', 'zTXTGPlm': 'zTXT',
'TEXtREAd': 'PalmDOC', 'TEXtREAd': 'PalmDOC',
'.pdfADBE': 'Adobe Reader', '.pdfADBE': 'Adobe Reader',
'DataPlkr': 'Plucker',
'BVokBDIC': 'BDicty', 'BVokBDIC': 'BDicty',
'DB99DBOS': 'DB (Database program)', 'DB99DBOS': 'DB (Database program)',
@ -50,7 +53,6 @@ IDENTITY_TO_NAME = {
'DATALSdb': 'LIST', 'DATALSdb': 'LIST',
'Mdb1Mdb1': 'MobileDB', 'Mdb1Mdb1': 'MobileDB',
'BOOKMOBI': 'MobiPocket', 'BOOKMOBI': 'MobiPocket',
'DataPlkr': 'Plucker',
'DataSprd': 'QuickSheet', 'DataSprd': 'QuickSheet',
'SM01SMem': 'SuperMemo', 'SM01SMem': 'SuperMemo',
'TEXtTlDc': 'TealDoc', 'TEXtTlDc': 'TealDoc',

View File

@ -0,0 +1,764 @@
# -*- coding: utf-8 -*-
#from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL v3'
__copyright__ = '20011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import struct
import zlib
from collections import OrderedDict
from calibre import CurrentDir
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ptempfile import TemporaryFile
from calibre.utils.magick import Image, create_canvas
DATATYPE_PHTML = 0
DATATYPE_PHTML_COMPRESSED = 1
DATATYPE_TBMP = 2
DATATYPE_TBMP_COMPRESSED = 3
DATATYPE_MAILTO = 4
DATATYPE_LINK_INDEX = 5
DATATYPE_LINKS = 6
DATATYPE_LINKS_COMPRESSED = 7
DATATYPE_BOOKMARKS = 8
DATATYPE_CATEGORY = 9
DATATYPE_METADATA = 10
DATATYPE_STYLE_SHEET = 11
DATATYPE_FONT_PAGE = 12
DATATYPE_TABLE = 13
DATATYPE_TABLE_COMPRESSED = 14
DATATYPE_COMPOSITE_IMAGE = 15
DATATYPE_PAGELIST_METADATA = 16
DATATYPE_SORTED_URL_INDEX = 17
DATATYPE_SORTED_URL = 18
DATATYPE_SORTED_URL_COMPRESSED = 19
DATATYPE_EXT_ANCHOR_INDEX = 20
DATATYPE_EXT_ANCHOR = 21
DATATYPE_EXT_ANCHOR_COMPRESSED = 22
# IETF IANA MIBenum value for the character set.
# See the http://www.iana.org/assignments/character-sets for valid values.
# Not all character sets are handled by Python. This is a small subset that
# the MIBenum maps to Python standard encodings
# from http://docs.python.org/library/codecs.html#standard-encodings
MIBNUM_TO_NAME = {
3: 'ascii',
4: 'latin_1',
5: 'iso8859_2',
6: 'iso8859_3',
7: 'iso8859_4',
8: 'iso8859_5',
9: 'iso8859_6',
10: 'iso8859_7',
11: 'iso8859_8',
12: 'iso8859_9',
13: 'iso8859_10',
17: 'shift_jis',
18: 'euc_jp',
27: 'utf_7',
36: 'euc_kr',
37: 'iso2022_kr',
38: 'euc_kr',
39: 'iso2022_jp',
40: 'iso2022_jp_2',
106: 'utf-8',
109: 'iso8859_13',
110: 'iso8859_14',
111: 'iso8859_15',
112: 'iso8859_16',
1013: 'utf_16_be',
1014: 'utf_16_le',
1015: 'utf_16',
2009: 'cp850',
2010: 'cp852',
2011: 'cp437',
2013: 'cp862',
2025: 'gb2312',
2026: 'big5',
2028: 'cp037',
2043: 'cp424',
2044: 'cp500',
2046: 'cp855',
2047: 'cp857',
2048: 'cp860',
2049: 'cp861',
2050: 'cp863',
2051: 'cp864',
2052: 'cp865',
2054: 'cp869',
2063: 'cp1026',
2085: 'hz',
2086: 'cp866',
2087: 'cp775',
2089: 'cp858',
2091: 'cp1140',
2102: 'big5hkscs',
2250: 'cp1250',
2251: 'cp1251',
2252: 'cp1252',
2253: 'cp1253',
2254: 'cp1254',
2255: 'cp1255',
2256: 'cp1256',
2257: 'cp1257',
2258: 'cp1258',
}
def decompress_doc(data):
buffer = [ord(i) for i in data]
res = []
i = 0
while i < len(buffer):
c = buffer[i]
i += 1
if c >= 1 and c <= 8:
res.extend(buffer[i:i+c])
i += c
elif c <= 0x7f:
res.append(c)
elif c >= 0xc0:
res.extend( (ord(' '), c^0x80) )
else:
c = (c << 8) + buffer[i]
i += 1
di = (c & 0x3fff) >> 3
j = len(res)
num = (c & ((1 << 3) - 1)) + 3
for k in range( num ):
res.append(res[j - di+k])
return ''.join([chr(i) for i in res])
class HeaderRecord(object):
'''
Plucker header. PDB record 0.
'''
def __init__(self, raw):
self.uid, = struct.unpack('>H', raw[0:2])
# This is labled version in the spec.
# 2 is ZLIB compressed,
# 1 is DOC compressed
self.compression, = struct.unpack('>H', raw[2:4])
self.records, = struct.unpack('>H', raw[4:6])
# uid of the first html file. This should link
# to other files which in turn may link to others.
self.home_html = None
self.reserved = {}
for i in xrange(self.records):
adv = 4*i
name, = struct.unpack('>H', raw[6+adv:8+adv])
id, = struct.unpack('>H', raw[8+adv:10+adv])
self.reserved[id] = name
if name == 0:
self.home_html = id
class SectionHeader(object):
'''
Every sections (record) has this header. It gives
details about the section such as it's uid.
'''
def __init__(self, raw):
self.uid, = struct.unpack('>H', raw[0:2])
self.paragraphs, = struct.unpack('>H', raw[2:4])
self.size, = struct.unpack('>H', raw[4:6])
self.type, = struct.unpack('>B', raw[6])
self.flags, = struct.unpack('>B', raw[7])
class SectionHeaderText(object):
'''
Sub header for text records.
'''
def __init__(self, section_header, raw):
# The uncompressed size of each paragraph.
self.sizes = []
# uncompressed offset of each paragraph starting
# at the beginning of the PHTML.
self.paragraph_offsets = []
# Paragraph attributes.
self.attributes = []
for i in xrange(section_header.paragraphs):
adv = 4*i
self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0])
self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0])
running_offset = 0
for size in self.sizes:
running_offset += size
self.paragraph_offsets.append(running_offset)
class SectionMetadata(object):
'''
Metadata.
This does not store metadata such as title, or author.
That metadata would be best retrieved with the PDB (plucker)
metdata reader.
This stores document specific information such as the
text encoding.
Note: There is a default encoding but each text section
can be assigned a different encoding.
'''
def __init__(self, raw):
self.default_encoding = 'latin-1'
self.exceptional_uid_encodings = {}
self.owner_id = None
record_count, = struct.unpack('>H', raw[0:2])
adv = 0
for i in xrange(record_count):
type, = struct.unpack('>H', raw[2+adv:4+adv])
length, = struct.unpack('>H', raw[4+adv:6+adv])
# CharSet
if type == 1:
val, = struct.unpack('>H', raw[6+adv:8+adv])
self.default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
# ExceptionalCharSets
elif type == 2:
ii_adv = 0
for ii in xrange(length / 2):
uid, = struct.unpack('>H', raw[6+adv+ii_adv:8+adv+ii_adv])
mib, = struct.unpack('>H', raw[8+adv+ii_adv:10+adv+ii_adv])
self.exceptional_uid_encodings[uid] = MIBNUM_TO_NAME.get(mib, 'latin-1')
ii_adv += 4
# OwnerID
elif type == 3:
self.owner_id = struct.unpack('>I', raw[6+adv:10+adv])
# Author, Title, PubDate
# Ignored here. The metadata reader plugin
# will get this info because if it's missing
# the metadata reader plugin will use fall
# back data from elsewhere in the file.
elif type in (4, 5, 6):
pass
# Linked Documents
elif type == 7:
pass
adv += 2*length
class SectionText(object):
'''
Text data. Stores a text section header and the PHTML.
'''
def __init__(self, section_header, raw):
self.header = SectionHeaderText(section_header, raw)
self.data = raw[section_header.paragraphs * 4:]
class SectionCompositeImage(object):
'''
A composite image consists of a a 2D array
of rows and columns. The entries in the array
are uid's.
'''
def __init__(self, raw):
self.columns, = struct.unpack('>H', raw[0:2])
self.rows, = struct.unpack('>H', raw[2:4])
# [
# [uid, uid, uid, ...],
# [uid, uid, uid, ...],
# ...
# ]
#
# Each item in the layout is in it's
# correct position in the final
# composite.
#
# Each item in the layout is a uid
# to an image record.
self.layout = []
offset = 4
for i in xrange(self.rows):
col = []
for j in xrange(self.columns):
col.append(struct.unpack('>H', raw[offset:offset+2])[0])
offset += 2
self.layout.append(col)
class Reader(FormatReader):
'''
Convert a plucker archive into HTML.
TODO:
* UTF 16 and 32 characters.
* Margins.
* Alignment.
* Font color.
* DATATYPE_MAILTO
* DATATYPE_TABLE(_COMPRESSED)
* DATATYPE_EXT_ANCHOR_INDEX
* DATATYPE_EXT_ANCHOR(_COMPRESSED)
'''
def __init__(self, header, stream, log, options):
self.stream = stream
self.log = log
self.options = options
# Mapping of section uid to our internal
# list of sections.
self.uid_section_number = OrderedDict()
self.uid_text_secion_number = OrderedDict()
self.uid_text_secion_encoding = {}
self.uid_image_section_number = {}
self.uid_composite_image_section_number = {}
self.metadata_section_number = None
self.default_encoding = 'latin-1'
self.owner_id = None
self.sections = []
# The Plucker record0 header
self.header_record = HeaderRecord(header.section_data(0))
for i in range(1, header.num_sections):
section_number = len(self.sections)
# The length of the section header.
# Where the actual data in the section starts.
start = 8
section = None
raw_data = header.section_data(i)
# Every sections has a section header.
section_header = SectionHeader(raw_data)
# Store sections we care able.
if section_header.type in (DATATYPE_PHTML, DATATYPE_PHTML_COMPRESSED):
self.uid_text_secion_number[section_header.uid] = section_number
section = SectionText(section_header, raw_data[start:])
elif section_header.type in (DATATYPE_TBMP, DATATYPE_TBMP_COMPRESSED):
self.uid_image_section_number[section_header.uid] = section_number
section = raw_data[start:]
elif section_header.type == DATATYPE_METADATA:
self.metadata_section_number = section_number
section = SectionMetadata(raw_data[start:])
elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
self.uid_composite_image_section_number[section_header.uid] = section_number
section = SectionCompositeImage(raw_data[start:])
# Store the section.
if section:
self.uid_section_number[section_header.uid] = section_number
self.sections.append((section_header, section))
# Store useful information from the metadata section locally
# to make access easier.
if self.metadata_section_number:
mdata_section = self.sections[self.metadata_section_number][1]
for k, v in mdata_section.exceptional_uid_encodings.items():
self.uid_text_secion_encoding[k] = v
self.default_encoding = mdata_section.default_encoding
self.owner_id = mdata_section.owner_id
# Get the metadata (tile, author, ...) with the metadata reader.
from calibre.ebooks.metadata.pdb import get_metadata
self.mi = get_metadata(stream, False)
def extract_content(self, output_dir):
# Each text record is independent (unless the continuation
# value is set in the previous record). Put each converted
# text recored into a separate file. We will reference the
# home.html file as the first file and let the HTML input
# plugin assemble the order based on hyperlinks.
with CurrentDir(output_dir):
for uid, num in self.uid_text_secion_number.items():
self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
with open('%s.html' % uid, 'wb') as htmlf:
html = u'<html><body>'
section_header, section_data = self.sections[num]
if section_header.type == DATATYPE_PHTML:
html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
elif section_header.type == DATATYPE_PHTML_COMPRESSED:
d = self.decompress_phtml(section_data.data)
html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
html += '</body></html>'
htmlf.write(html.encode('utf-8'))
# Images.
# Cache the image sizes in case they are used by a composite image.
image_sizes = {}
if not os.path.exists(os.path.join(output_dir, 'images/')):
os.makedirs(os.path.join(output_dir, 'images/'))
with CurrentDir(os.path.join(output_dir, 'images/')):
# Single images.
for uid, num in self.uid_image_section_number.items():
section_header, section_data = self.sections[num]
if section_data:
idata = None
if section_header.type == DATATYPE_TBMP:
idata = section_data
elif section_header.type == DATATYPE_TBMP_COMPRESSED:
if self.header_record.compression == 1:
idata = decompress_doc(section_data)
elif self.header_record.compression == 2:
idata = zlib.decompress(section_data)
try:
with TemporaryFile(suffix='.palm') as itn:
with open(itn, 'wb') as itf:
itf.write(idata)
im = Image()
im.read(itn)
image_sizes[uid] = im.size
im.set_compression_quality(70)
im.save('%s.jpg' % uid)
self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
except Exception as e:
self.log.error('Failed to write image with uid %s: %s' % (uid, e))
else:
self.log.error('Failed to write image with uid %s: No data.' % uid)
# Composite images.
# We're going to use the already compressed .jpg images here.
for uid, num in self.uid_composite_image_section_number.items():
try:
section_header, section_data = self.sections[num]
# Get the final width and height.
width = 0
height = 0
for row in section_data.layout:
row_width = 0
col_height = 0
for col in row:
if col not in image_sizes:
raise Exception('Image with uid: %s missing.' % col)
im = Image()
im.read('%s.jpg' % col)
w, h = im.size
row_width += w
if col_height < h:
col_height = h
if width < row_width:
width = row_width
height += col_height
# Create a new image the total size of all image
# parts. Put the parts into the new image.
canvas = create_canvas(width, height)
y_off = 0
for row in section_data.layout:
x_off = 0
largest_height = 0
for col in row:
im = Image()
im.read('%s.jpg' % col)
canvas.compose(im, x_off, y_off)
w, h = im.size
x_off += w
if largest_height < h:
largest_height = h
y_off += largest_height
canvas.set_compression_quality(70)
canvas.save('%s.jpg' % uid)
self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
except Exception as e:
self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))
# Run the HTML through the html processing plugin.
from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(self.options, opt.option.name, opt.recommended_value)
self.options.input_encoding = 'utf-8'
odi = self.options.debug_pipeline
self.options.debug_pipeline = None
# Determine the home.html record uid. This should be set in the
# reserved values in the metadata recored. home.html is the first
# text record (should have hyper link references to other records)
# in the document.
try:
home_html = self.header_record.home_html
if not home_html:
home_html = self.uid_text_secion_number.items()[0][0]
except:
raise Exception(_('Could not determine home.html'))
# Generate oeb from html conversion.
oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
self.options.debug_pipeline = odi
return oeb
def decompress_phtml(self, data):
if self.header_record.compression == 2:
if self.owner_id:
raise NotImplementedError
return zlib.decompress(data)
elif self.header_record.compression == 1:
#from calibre.ebooks.compression.palmdoc import decompress_doc
return decompress_doc(data)
def process_phtml(self, d, paragraph_offsets=[]):
html = u'<p id="p0">'
offset = 0
paragraph_open = True
link_open = False
need_set_p_id = False
p_num = 1
font_specifier_close = ''
while offset < len(d):
if not paragraph_open:
if need_set_p_id:
html += u'<p id="p%s">' % p_num
p_num += 1
need_set_p_id = False
else:
html += u'<p>'
paragraph_open = True
c = ord(d[offset])
# PHTML "functions"
if c == 0x0:
offset += 1
c = ord(d[offset])
# Page link begins
# 2 Bytes
# record ID
if c == 0x0a:
offset += 1
id = struct.unpack('>H', d[offset:offset+2])[0]
if id in self.uid_text_secion_number:
html += '<a href="%s.html">' % id
link_open = True
offset += 1
# Targeted page link begins
# 3 Bytes
# record ID, target
elif c == 0x0b:
offset += 3
# Paragraph link begins
# 4 Bytes
# record ID, paragraph number
elif c == 0x0c:
offset += 1
id = struct.unpack('>H', d[offset:offset+2])[0]
offset += 2
pid = struct.unpack('>H', d[offset:offset+2])[0]
if id in self.uid_text_secion_number:
html += '<a href="%s.html#p%s">' % (id, pid)
link_open = True
offset += 1
# Targeted paragraph link begins
# 5 Bytes
# record ID, paragraph number, target
elif c == 0x0d:
offset += 5
# Link ends
# 0 Bytes
elif c == 0x08:
if link_open:
html += '</a>'
link_open = False
# Set font
# 1 Bytes
# font specifier
elif c == 0x11:
offset += 1
specifier = d[offset]
html += font_specifier_close
# Regular text
if specifier == 0:
font_specifier_close = ''
# h1
elif specifier == 1:
html += '<h1>'
font_specifier_close = '</h1>'
# h2
elif specifier == 2:
html += '<h2>'
font_specifier_close = '</h2>'
# h3
elif specifier == 3:
html += '<h13>'
font_specifier_close = '</h3>'
# h4
elif specifier == 4:
html += '<h4>'
font_specifier_close = '</h4>'
# h5
elif specifier == 5:
html += '<h5>'
font_specifier_close = '</h5>'
# h6
elif specifier == 6:
html += '<h6>'
font_specifier_close = '</h6>'
# Bold
elif specifier == 7:
html += '<b>'
font_specifier_close = '</b>'
# Fixed-width
elif specifier == 8:
html += '<tt>'
font_specifier_close = '</tt>'
# Small
elif specifier == 9:
html += '<small>'
font_specifier_close = '</small>'
# Subscript
elif specifier == 10:
html += '<sub>'
font_specifier_close = '</sub>'
# Superscript
elif specifier == 11:
html += '<sup>'
font_specifier_close = '</sup>'
# Embedded image
# 2 Bytes
# image record ID
elif c == 0x1a:
offset += 1
uid = struct.unpack('>H', d[offset:offset+2])[0]
html += '<img src="images/%s.jpg" />' % uid
offset += 1
# Set margin
# 2 Bytes
# left margin, right margin
elif c == 0x22:
offset += 2
# Alignment of text
# 1 Bytes
# alignment
elif c == 0x29:
offset += 1
# Horizontal rule
# 3 Bytes
# 8-bit height, 8-bit width (pixels), 8-bit width (%, 1-100)
elif c == 0x33:
offset += 3
if paragraph_open:
html += u'</p>'
paragraph_open = False
html += u'<hr />'
# New line
# 0 Bytes
elif c == 0x38:
if paragraph_open:
html += u'</p>\n'
paragraph_open = False
# Italic text begins
# 0 Bytes
elif c == 0x40:
html += u'<i>'
# Italic text ends
# 0 Bytes
elif c == 0x48:
html += u'</i>'
# Set text color
# 3 Bytes
# 8-bit red, 8-bit green, 8-bit blue
elif c == 0x53:
offset += 3
# Multiple embedded image
# 4 Bytes
# alternate image record ID, image record ID
elif c == 0x5c:
offset += 3
uid = struct.unpack('>H', d[offset:offset+2])[0]
html += '<img src="images/%s.jpg" />' % uid
offset += 1
# Underline text begins
# 0 Bytes
elif c == 0x60:
html += u'<u>'
# Underline text ends
# 0 Bytes
elif c == 0x68:
html += u'</u>'
# Strike-through text begins
# 0 Bytes
elif c == 0x70:
html += u'<s>'
# Strike-through text ends
# 0 Bytes
elif c == 0x78:
html += u'</s>'
# 16-bit Unicode character
# 3 Bytes
# alternate text length, 16-bit unicode character
elif c == 0x83:
offset += 3
# 32-bit Unicode character
# 5 Bytes
# alternate text length, 32-bit unicode character
elif c == 0x85:
offset += 5
# Begin custom font span
# 6 Bytes
# font page record ID, X page position, Y page position
elif c == 0x8e:
offset += 6
# Adjust custom font glyph position
# 4 Bytes
# X page position, Y page position
elif c == 0x8c:
offset += 4
# Change font page
# 2 Bytes
# font record ID
elif c == 0x8a:
offset += 2
# End custom font span
# 0 Bytes
elif c == 0x88:
pass
# Begin new table row
# 0 Bytes
elif c == 0x90:
pass
# Insert table (or table link)
# 2 Bytes
# table record ID
elif c == 0x92:
offset += 2
# Table cell data
# 7 Bytes
# 8-bit alignment, 16-bit image record ID, 8-bit columns, 8-bit rows, 16-bit text length
elif c == 0x97:
offset += 7
# Exact link modifier
# 2 Bytes
# Paragraph Offset (The Exact Link Modifier modifies a Paragraph Link or Targeted Paragraph Link function to specify an exact byte offset within the paragraph. This function must be followed immediately by the function it modifies).
elif c == 0x9a:
offset += 2
elif c == 0xa0:
html += '&nbsp;'
else:
html += unichr(c)
offset += 1
if offset in paragraph_offsets:
need_set_p_id = True
if paragraph_open:
html += u'</p>\n'
paragraph_open = False
if paragraph_open:
html += u'</p>'
return html
def get_text_uid_encoding(self, uid):
# Return the user sepcified input encoding,
# otherwise return the alternate encoding specified for the uid,
# otherwise retur the default encoding for the document.
return self.options.input_encoding if self.options.input_encoding else self.uid_text_secion_encoding.get(uid, self.default_encoding)

View File

@ -117,11 +117,11 @@ class EditMetadataAction(InterfaceAction):
payload = (id_map, failed_ids, failed_covers) payload = (id_map, failed_ids, failed_covers)
from calibre.gui2.dialogs.message_box import ProceedNotification from calibre.gui2.dialogs.message_box import ProceedNotification
p = ProceedNotification(payload, job.html_details, p = ProceedNotification(self.apply_downloaded_metadata,
payload, job.html_details,
_('Download log'), _('Download complete'), msg, _('Download log'), _('Download complete'), msg,
det_msg=det_msg, show_copy_button=show_copy_button, det_msg=det_msg, show_copy_button=show_copy_button,
parent=self.gui) parent=self.gui)
p.proceed.connect(self.apply_downloaded_metadata)
p.show() p.show()
def apply_downloaded_metadata(self, payload): def apply_downloaded_metadata(self, payload):
@ -529,13 +529,17 @@ class EditMetadataAction(InterfaceAction):
view.reset() view.reset()
# Apply bulk metadata changes {{{ # Apply bulk metadata changes {{{
def apply_metadata_changes(self, id_map, title=None, msg=''): def apply_metadata_changes(self, id_map, title=None, msg='', callback=None):
''' '''
Apply the metadata changes in id_map to the database synchronously Apply the metadata changes in id_map to the database synchronously
id_map must be a mapping of ids to Metadata objects. Set any fields you id_map must be a mapping of ids to Metadata objects. Set any fields you
do not want updated in the Metadata object to null. An easy way to do do not want updated in the Metadata object to null. An easy way to do
that is to create a metadata object as Metadata(_('Unknown')) and then that is to create a metadata object as Metadata(_('Unknown')) and then
only set the fields you want changed on this object. only set the fields you want changed on this object.
callback can be either None or a function accepting a single argument,
in which case it is called after applying is complete with the list of
changed ids.
''' '''
if title is None: if title is None:
title = _('Applying changed metadata') title = _('Applying changed metadata')
@ -544,6 +548,7 @@ class EditMetadataAction(InterfaceAction):
self.apply_failures = [] self.apply_failures = []
self.applied_ids = [] self.applied_ids = []
self.apply_pd = None self.apply_pd = None
self.apply_callback = callback
if len(self.apply_id_map) > 1: if len(self.apply_id_map) > 1:
from calibre.gui2.dialogs.progress import ProgressDialog from calibre.gui2.dialogs.progress import ProgressDialog
self.apply_pd = ProgressDialog(title, msg, min=0, self.apply_pd = ProgressDialog(title, msg, min=0,
@ -611,6 +616,11 @@ class EditMetadataAction(InterfaceAction):
self.apply_id_map = [] self.apply_id_map = []
self.apply_pd = None self.apply_pd = None
try:
if callable(self.apply_callback):
self.apply_callback(self.applied_ids)
finally:
self.apply_callback = None
# }}} # }}}

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QDialog, QIcon, QApplication, QSize, QKeySequence, from PyQt4.Qt import (QDialog, QIcon, QApplication, QSize, QKeySequence,
QAction, Qt, pyqtSignal, QTextBrowser, QDialogButtonBox, QVBoxLayout) QAction, Qt, QTextBrowser, QDialogButtonBox, QVBoxLayout)
from calibre.constants import __version__ from calibre.constants import __version__
from calibre.gui2.dialogs.message_box_ui import Ui_Dialog from calibre.gui2.dialogs.message_box_ui import Ui_Dialog
@ -143,17 +143,20 @@ class ViewLog(QDialog): # {{{
QApplication.clipboard().setText(txt) QApplication.clipboard().setText(txt)
# }}} # }}}
_proceed_memory = []
class ProceedNotification(MessageBox): # {{{ class ProceedNotification(MessageBox): # {{{
proceed = pyqtSignal(object) def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
det_msg='', show_copy_button=False, parent=None):
def __init__(self, payload, html_log, log_viewer_title, title, msg, det_msg='', show_copy_button=False, parent=None):
''' '''
A non modal popup that notifies the user that a background task has A non modal popup that notifies the user that a background task has
been completed. If they user clicks yes, the proceed signal is emitted been completed.
with payload as its argument.
:param payload: Arbitrary object, emitted in the proceed signal :param callback: A callable that is called with payload if the user
asks to proceed. Note that this is always called in the GUI thread
:param payload: Arbitrary object, passed to callback
:param html_log: An HTML or plain text log :param html_log: An HTML or plain text log
:param log_viewer_title: The title for the log viewer window :param log_viewer_title: The title for the log viewer window
:param title: The title fo rthis popup :param title: The title fo rthis popup
@ -166,25 +169,31 @@ class ProceedNotification(MessageBox): # {{{
self.payload = payload self.payload = payload
self.html_log = html_log self.html_log = html_log
self.log_viewer_title = log_viewer_title self.log_viewer_title = log_viewer_title
self.finished.connect(self.do_proceed) self.finished.connect(self.do_proceed, type=Qt.QueuedConnection)
self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole) self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
self.vlb.setIcon(QIcon(I('debug.png'))) self.vlb.setIcon(QIcon(I('debug.png')))
self.vlb.clicked.connect(self.show_log) self.vlb.clicked.connect(self.show_log)
self.det_msg_toggle.setVisible(bool(det_msg)) self.det_msg_toggle.setVisible(bool(det_msg))
self.setModal(False) self.setModal(False)
self.callback = callback
_proceed_memory.append(self)
def show_log(self): def show_log(self):
self.log_viewer = ViewLog(self.log_viewer_title, self.html_log, self.log_viewer = ViewLog(self.log_viewer_title, self.html_log,
parent=self) parent=self)
def do_proceed(self, result): def do_proceed(self, result):
if result == self.Accepted:
self.proceed.emit(self.payload)
try: try:
self.proceed.disconnect() if result == self.Accepted:
except: self.callback(self.payload)
pass finally:
# Ensure this notification is garbage collected
self.callback = None
self.setParent(None)
self.finished.disconnect()
self.vlb.clicked.disconnect()
_proceed_memory.remove(self)
# }}} # }}}
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -6,7 +6,7 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>672</width> <width>941</width>
<height>563</height> <height>563</height>
</rect> </rect>
</property> </property>
@ -22,7 +22,7 @@
<property name="sizeHint" stdset="0"> <property name="sizeHint" stdset="0">
<size> <size>
<width>10</width> <width>10</width>
<height>00</height> <height>0</height>
</size> </size>
</property> </property>
</spacer> </spacer>
@ -50,13 +50,13 @@
</item> </item>
<item row="2" column="2"> <item row="2" column="2">
<widget class="QCheckBox" name="opt_bools_are_tristate"> <widget class="QCheckBox" name="opt_bools_are_tristate">
<property name="text">
<string>Yes/No columns have three values (Requires restart)</string>
</property>
<property name="toolTip"> <property name="toolTip">
<string>If checked, Yes/No custom columns values can be Yes, No, or Unknown. <string>If checked, Yes/No custom columns values can be Yes, No, or Unknown.
If not checked, the values can be Yes or No.</string> If not checked, the values can be Yes or No.</string>
</property> </property>
<property name="text">
<string>Yes/No columns have three values (Requires restart)</string>
</property>
</widget> </widget>
</item> </item>
<item row="4" column="0"> <item row="4" column="0">
@ -304,7 +304,7 @@ If not checked, the values can be Yes or No.</string>
</layout> </layout>
</widget> </widget>
</item> </item>
<item row="30" column="0" colspan="3"> <item row="9" column="2">
<widget class="QPushButton" name="reset_confirmation_button"> <widget class="QPushButton" name="reset_confirmation_button">
<property name="text"> <property name="text">
<string>Reset all disabled &amp;confirmation dialogs</string> <string>Reset all disabled &amp;confirmation dialogs</string>

View File

@ -158,7 +158,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
{ {
'isbn': '{identifiers:select(isbn)}', 'isbn': '{identifiers:select(isbn)}',
'formats': '{formats}', 'formats': '{formats}',
'last_modified':'''{last_modified:'format_date($, "dd MMM yy")'}''' 'last_modified':'''{last_modified:'format_date($, "dd MMM yyyy")'}'''
}[which]) }[which])
self.composite_sort_by.setCurrentIndex(2 if which == 'last_modified' else 0) self.composite_sort_by.setCurrentIndex(2 if which == 'last_modified' else 0)

View File

@ -87,6 +87,8 @@ class Category(QWidget): # {{{
self.plugins = plugins self.plugins = plugins
self.bar = QToolBar(self) self.bar = QToolBar(self)
self.bar.setStyleSheet(
'QToolBar { border: none; background: none }')
self.bar.setIconSize(QSize(48, 48)) self.bar.setIconSize(QSize(48, 48))
self.bar.setMovable(False) self.bar.setMovable(False)
self.bar.setFloatable(False) self.bar.setFloatable(False)

View File

@ -6,7 +6,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QCoreApplication, QDialog, QTimer) from PyQt4.Qt import QDialog
from calibre.gui2.store.mobileread.cache_progress_dialog_ui import Ui_Dialog from calibre.gui2.store.mobileread.cache_progress_dialog_ui import Ui_Dialog

View File

@ -534,6 +534,7 @@ class DocumentView(QWebView): # {{{
_('&Lookup in dictionary'), self) _('&Lookup in dictionary'), self)
self.dictionary_action.setShortcut(Qt.CTRL+Qt.Key_L) self.dictionary_action.setShortcut(Qt.CTRL+Qt.Key_L)
self.dictionary_action.triggered.connect(self.lookup) self.dictionary_action.triggered.connect(self.lookup)
self.addAction(self.dictionary_action)
self.goto_location_action = QAction(_('Go to...'), self) self.goto_location_action = QAction(_('Go to...'), self)
self.goto_location_menu = m = QMenu(self) self.goto_location_menu = m = QMenu(self)
self.goto_location_actions = a = { self.goto_location_actions = a = {

View File

@ -556,10 +556,14 @@ class ResultCache(SearchQueryParser): # {{{
return matchkind, query return matchkind, query
def get_bool_matches(self, location, query, candidates): def get_bool_matches(self, location, query, candidates):
bools_are_tristate = not self.db_prefs.get('bools_are_tristate') bools_are_tristate = self.db_prefs.get('bools_are_tristate')
loc = self.field_metadata[location]['rec_index'] loc = self.field_metadata[location]['rec_index']
matches = set() matches = set()
query = icu_lower(query) query = icu_lower(query)
if query not in (_('no'), _('unchecked'), '_no', 'false',
_('yes'), _('checked'), '_yes', 'true',
_('empty'), _('blank'), '_empty'):
raise ParseException(_('Invalid boolean query "{0}"').format(query))
for id_ in candidates: for id_ in candidates:
item = self._data[id_] item = self._data[id_]
if item is None: if item is None:
@ -630,8 +634,11 @@ class ResultCache(SearchQueryParser): # {{{
terms.add(l) terms.add(l)
if terms: if terms:
for l in terms: for l in terms:
matches |= self.get_matches(l, query, try:
candidates=candidates, allow_recursion=allow_recursion) matches |= self.get_matches(l, query,
candidates=candidates, allow_recursion=allow_recursion)
except:
pass
return matches return matches
if location in self.field_metadata: if location in self.field_metadata:
@ -1005,9 +1012,9 @@ class SortKeyGenerator(object):
if sb == 'date': if sb == 'date':
try: try:
val = parse_date(val) val = parse_date(val)
dt = 'datetime'
except: except:
pass val = UNDEFINED_DATE
dt = 'datetime'
elif sb == 'number': elif sb == 'number':
try: try:
val = float(val) val = float(val)

View File

@ -149,7 +149,8 @@ class PostInstall:
if islinux or isfreebsd: if islinux or isfreebsd:
for f in os.listdir('.'): for f in os.listdir('.'):
if os.stat(f).st_uid == 0: if os.stat(f).st_uid == 0:
os.rmdir(f) if os.path.isdir(f) else os.unlink(f) import shutil
shutil.rmtree(f) if os.path.isdir(f) else os.unlink(f)
if os.stat(config_dir).st_uid == 0: if os.stat(config_dir).st_uid == 0:
os.rmdir(config_dir) os.rmdir(config_dir)

View File

@ -20,12 +20,14 @@ What formats does |app| support conversion to/from?
|app| supports the conversion of many input formats to many output formats. |app| supports the conversion of many input formats to many output formats.
It can convert every input format in the following list, to every output format. It can convert every input format in the following list, to every output format.
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ *Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB***, PML, RB, RTF, SNB, TCR, TXT, TXTZ
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ *Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ
** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers ** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers
*** PDB is also a generic format. |app| supports eReder, Plucker, PML and zTxt PDB files.
.. _best-source-formats: .. _best-source-formats:
What are the best source formats to convert? What are the best source formats to convert?