diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe
index 0e3c91d3e3..e750b6f113 100644
--- a/recipes/financial_times.recipe
+++ b/recipes/financial_times.recipe
@@ -53,6 +53,7 @@ class FinancialTimes(BasicNewsRecipe):
feeds = [
(u'UK' , u'http://www.ft.com/rss/home/uk' )
,(u'US' , u'http://www.ft.com/rss/home/us' )
+ ,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
]
diff --git a/resources/quick_start.epub b/resources/quick_start.epub
index 589fd1d0dc..2d590ebef2 100644
Binary files a/resources/quick_start.epub and b/resources/quick_start.epub differ
diff --git a/src/calibre/ebooks/metadata/pdb.py b/src/calibre/ebooks/metadata/pdb.py
index ddf2b0c818..d01bb0ecdb 100644
--- a/src/calibre/ebooks/metadata/pdb.py
+++ b/src/calibre/ebooks/metadata/pdb.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
'''
-Read meta information from eReader pdb files.
+Read meta information from pdb files.
'''
__license__ = 'GPL v3'
@@ -13,10 +13,12 @@ import re
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.metadata.ereader import get_metadata as get_eReader
+from calibre.ebooks.metadata.plucker import get_metadata as get_plucker
MREADER = {
'PNPdPPrs' : get_eReader,
'PNRdPPrs' : get_eReader,
+ 'DataPlkr' : get_plucker,
}
from calibre.ebooks.metadata.ereader import set_metadata as set_eReader
diff --git a/src/calibre/ebooks/metadata/plucker.py b/src/calibre/ebooks/metadata/plucker.py
new file mode 100644
index 0000000000..fabaa080d2
--- /dev/null
+++ b/src/calibre/ebooks/metadata/plucker.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+'''
+Read meta information from Plucker pdb files.
+'''
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, John Schember '
+__docformat__ = 'restructuredtext en'
+
+import struct
+from datetime import datetime
+
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.pdb.header import PdbHeaderReader
+from calibre.ebooks.pdb.plucker.reader import SectionHeader, DATATYPE_METADATA, \
+ MIBNUM_TO_NAME
+
+def get_metadata(stream, extract_cover=True):
+ '''
+ Return metadata as a L{MetaInfo} object
+ '''
+ mi = MetaInformation(_('Unknown'), [_('Unknown')])
+ stream.seek(0)
+
+ pheader = PdbHeaderReader(stream)
+ section_data = None
+ for i in range(1, pheader.num_sections):
+ raw_data = pheader.section_data(i)
+ section_header = SectionHeader(raw_data)
+ if section_header.type == DATATYPE_METADATA:
+ section_data = raw_data[8:]
+ break
+
+ if not section_data:
+ return mi
+
+ default_encoding = 'latin-1'
+ record_count, = struct.unpack('>H', section_data[0:2])
+ adv = 0
+ title = None
+ author = None
+ pubdate = 0
+ for i in xrange(record_count):
+ type, = struct.unpack('>H', section_data[2+adv:4+adv])
+ length, = struct.unpack('>H', section_data[4+adv:6+adv])
+
+ # CharSet
+ if type == 1:
+ val, = struct.unpack('>H', section_data[6+adv:8+adv])
+ default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
+ # Author
+ elif type == 4:
+ author = section_data[6+adv+(2*length)]
+ # Title
+ elif type == 5:
+ title = section_data[6+adv+(2*length)]
+ # Publication Date
+ elif type == 6:
+ pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])
+
+ adv += 2*length
+
+ if title:
+ mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
+ if author:
+ author = author.replace('\0', '').decode(default_encoding, 'replace')
+ mi.author = author.split(',')
+ mi.pubdate = datetime.fromtimestamp(pubdate)
+
+ return mi
diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index e975d41ea6..759da45610 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -206,6 +206,7 @@ class OverDrive(Source):
xref_q = '+'.join(title_tokens)
#log.error('Initial query is %s'%initial_q)
#log.error('Cross reference query is %s'%xref_q)
+
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
query = '{"szKeyword":"'+initial_q+'"}'
@@ -229,34 +230,42 @@ class OverDrive(Source):
if int(m.group('displayrecords')) >= 1:
results = True
elif int(m.group('totalrecords')) >= 1:
+ if int(m.group('totalrecords')) >= 100:
+ if xref_q.find('+') != -1:
+ xref_tokens = xref_q.split('+')
+ xref_q = xref_tokens[0]
+ #log.error('xref_q is '+xref_q)
+ else:
+ xref_q = ''
xref_q = ''
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
elif int(m.group('totalrecords')) == 0:
return ''
- return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
+ return self.sort_ovrdrv_results(raw, log, title, title_tokens, author, author_tokens)
- def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
+ def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
close_matches = []
raw = re.sub('.*?\[\[(?P.*?)\]\].*', '[[\g]]', raw)
results = json.loads(raw)
- #print results
+ #log.error('raw results are:'+str(results))
# The search results are either from a keyword search or a multi-format list from a single ID,
# sort through the results for closest match/format
if results:
for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
- #print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
+ #log.error("this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series)
if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
- #print "overdrive id is not None, searching based on format type priority"
+ #log.error('overdrive id is not None, searching based on format type priority')
return self.format_results(reserveid, od_title, subtitle, series, publisher,
creators, thumbimage, worldcatlink, formatid)
else:
- creators = creators.split(', ')
+ if creators:
+ creators = creators.split(', ')
# if an exact match in a preferred format occurs
- if (author and creators[0] == author[0]) and od_title == title and int(formatid) in [1, 50, 410, 900] and thumbimage:
+ if ((author and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
return self.format_results(reserveid, od_title, subtitle, series, publisher,
creators, thumbimage, worldcatlink, formatid)
else:
@@ -282,6 +291,10 @@ class OverDrive(Source):
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
else:
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+
+ elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
+ close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+
if close_matches:
return close_matches[0]
else:
@@ -289,7 +302,7 @@ class OverDrive(Source):
else:
return ''
- def overdrive_get_record(self, br, q, ovrdrv_id):
+ def overdrive_get_record(self, br, log, q, ovrdrv_id):
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
@@ -311,7 +324,7 @@ class OverDrive(Source):
raw = str(list(raw))
clean_cj = mechanize.CookieJar()
br.set_cookiejar(clean_cj)
- return self.sort_ovrdrv_results(raw, None, None, None, ovrdrv_id)
+ return self.sort_ovrdrv_results(raw, log, None, None, None, ovrdrv_id)
def find_ovrdrv_data(self, br, log, title, author, isbn, ovrdrv_id=None):
@@ -319,7 +332,7 @@ class OverDrive(Source):
if ovrdrv_id is None:
return self.overdrive_search(br, log, q, title, author)
else:
- return self.overdrive_get_record(br, q, ovrdrv_id)
+ return self.overdrive_get_record(br, log, q, ovrdrv_id)
diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py
index 092c8a21bd..c8089297db 100644
--- a/src/calibre/ebooks/pdb/__init__.py
+++ b/src/calibre/ebooks/pdb/__init__.py
@@ -12,6 +12,7 @@ from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
+from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
FORMAT_READERS = {
'PNPdPPrs': ereader_reader,
@@ -19,6 +20,7 @@ FORMAT_READERS = {
'zTXTGPlm': ztxt_reader,
'TEXtREAd': palmdoc_reader,
'.pdfADBE': pdf_reader,
+ 'DataPlkr': plucker_reader,
}
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
@@ -37,6 +39,7 @@ IDENTITY_TO_NAME = {
'zTXTGPlm': 'zTXT',
'TEXtREAd': 'PalmDOC',
'.pdfADBE': 'Adobe Reader',
+ 'DataPlkr': 'Plucker',
'BVokBDIC': 'BDicty',
'DB99DBOS': 'DB (Database program)',
@@ -50,7 +53,6 @@ IDENTITY_TO_NAME = {
'DATALSdb': 'LIST',
'Mdb1Mdb1': 'MobileDB',
'BOOKMOBI': 'MobiPocket',
- 'DataPlkr': 'Plucker',
'DataSprd': 'QuickSheet',
'SM01SMem': 'SuperMemo',
'TEXtTlDc': 'TealDoc',
diff --git a/src/calibre/ebooks/pdb/plucker/__init__.py b/src/calibre/ebooks/pdb/plucker/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/calibre/ebooks/pdb/plucker/reader.py b/src/calibre/ebooks/pdb/plucker/reader.py
new file mode 100644
index 0000000000..28e875aceb
--- /dev/null
+++ b/src/calibre/ebooks/pdb/plucker/reader.py
@@ -0,0 +1,764 @@
+# -*- coding: utf-8 -*-
+
+#from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '20011, John Schember '
+__docformat__ = 'restructuredtext en'
+
+import os
+import struct
+import zlib
+
+from collections import OrderedDict
+
+from calibre import CurrentDir
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ptempfile import TemporaryFile
+from calibre.utils.magick import Image, create_canvas
+
+DATATYPE_PHTML = 0
+DATATYPE_PHTML_COMPRESSED = 1
+DATATYPE_TBMP = 2
+DATATYPE_TBMP_COMPRESSED = 3
+DATATYPE_MAILTO = 4
+DATATYPE_LINK_INDEX = 5
+DATATYPE_LINKS = 6
+DATATYPE_LINKS_COMPRESSED = 7
+DATATYPE_BOOKMARKS = 8
+DATATYPE_CATEGORY = 9
+DATATYPE_METADATA = 10
+DATATYPE_STYLE_SHEET = 11
+DATATYPE_FONT_PAGE = 12
+DATATYPE_TABLE = 13
+DATATYPE_TABLE_COMPRESSED = 14
+DATATYPE_COMPOSITE_IMAGE = 15
+DATATYPE_PAGELIST_METADATA = 16
+DATATYPE_SORTED_URL_INDEX = 17
+DATATYPE_SORTED_URL = 18
+DATATYPE_SORTED_URL_COMPRESSED = 19
+DATATYPE_EXT_ANCHOR_INDEX = 20
+DATATYPE_EXT_ANCHOR = 21
+DATATYPE_EXT_ANCHOR_COMPRESSED = 22
+
+# IETF IANA MIBenum value for the character set.
+# See the http://www.iana.org/assignments/character-sets for valid values.
+# Not all character sets are handled by Python. This is a small subset that
+# the MIBenum maps to Python standard encodings
+# from http://docs.python.org/library/codecs.html#standard-encodings
+MIBNUM_TO_NAME = {
+ 3: 'ascii',
+ 4: 'latin_1',
+ 5: 'iso8859_2',
+ 6: 'iso8859_3',
+ 7: 'iso8859_4',
+ 8: 'iso8859_5',
+ 9: 'iso8859_6',
+ 10: 'iso8859_7',
+ 11: 'iso8859_8',
+ 12: 'iso8859_9',
+ 13: 'iso8859_10',
+ 17: 'shift_jis',
+ 18: 'euc_jp',
+ 27: 'utf_7',
+ 36: 'euc_kr',
+ 37: 'iso2022_kr',
+ 38: 'euc_kr',
+ 39: 'iso2022_jp',
+ 40: 'iso2022_jp_2',
+ 106: 'utf-8',
+ 109: 'iso8859_13',
+ 110: 'iso8859_14',
+ 111: 'iso8859_15',
+ 112: 'iso8859_16',
+ 1013: 'utf_16_be',
+ 1014: 'utf_16_le',
+ 1015: 'utf_16',
+ 2009: 'cp850',
+ 2010: 'cp852',
+ 2011: 'cp437',
+ 2013: 'cp862',
+ 2025: 'gb2312',
+ 2026: 'big5',
+ 2028: 'cp037',
+ 2043: 'cp424',
+ 2044: 'cp500',
+ 2046: 'cp855',
+ 2047: 'cp857',
+ 2048: 'cp860',
+ 2049: 'cp861',
+ 2050: 'cp863',
+ 2051: 'cp864',
+ 2052: 'cp865',
+ 2054: 'cp869',
+ 2063: 'cp1026',
+ 2085: 'hz',
+ 2086: 'cp866',
+ 2087: 'cp775',
+ 2089: 'cp858',
+ 2091: 'cp1140',
+ 2102: 'big5hkscs',
+ 2250: 'cp1250',
+ 2251: 'cp1251',
+ 2252: 'cp1252',
+ 2253: 'cp1253',
+ 2254: 'cp1254',
+ 2255: 'cp1255',
+ 2256: 'cp1256',
+ 2257: 'cp1257',
+ 2258: 'cp1258',
+}
+
+def decompress_doc(data):
+ buffer = [ord(i) for i in data]
+ res = []
+ i = 0
+ while i < len(buffer):
+ c = buffer[i]
+ i += 1
+ if c >= 1 and c <= 8:
+ res.extend(buffer[i:i+c])
+ i += c
+ elif c <= 0x7f:
+ res.append(c)
+ elif c >= 0xc0:
+ res.extend( (ord(' '), c^0x80) )
+ else:
+ c = (c << 8) + buffer[i]
+ i += 1
+ di = (c & 0x3fff) >> 3
+ j = len(res)
+ num = (c & ((1 << 3) - 1)) + 3
+
+ for k in range( num ):
+ res.append(res[j - di+k])
+
+ return ''.join([chr(i) for i in res])
+
+class HeaderRecord(object):
+ '''
+ Plucker header. PDB record 0.
+ '''
+
+ def __init__(self, raw):
+ self.uid, = struct.unpack('>H', raw[0:2])
+ # This is labled version in the spec.
+ # 2 is ZLIB compressed,
+ # 1 is DOC compressed
+ self.compression, = struct.unpack('>H', raw[2:4])
+ self.records, = struct.unpack('>H', raw[4:6])
+ # uid of the first html file. This should link
+ # to other files which in turn may link to others.
+ self.home_html = None
+
+ self.reserved = {}
+ for i in xrange(self.records):
+ adv = 4*i
+ name, = struct.unpack('>H', raw[6+adv:8+adv])
+ id, = struct.unpack('>H', raw[8+adv:10+adv])
+ self.reserved[id] = name
+ if name == 0:
+ self.home_html = id
+
+
+class SectionHeader(object):
+ '''
+ Every sections (record) has this header. It gives
+ details about the section such as it's uid.
+ '''
+
+ def __init__(self, raw):
+ self.uid, = struct.unpack('>H', raw[0:2])
+ self.paragraphs, = struct.unpack('>H', raw[2:4])
+ self.size, = struct.unpack('>H', raw[4:6])
+ self.type, = struct.unpack('>B', raw[6])
+ self.flags, = struct.unpack('>B', raw[7])
+
+
+class SectionHeaderText(object):
+ '''
+ Sub header for text records.
+ '''
+
+ def __init__(self, section_header, raw):
+ # The uncompressed size of each paragraph.
+ self.sizes = []
+ # uncompressed offset of each paragraph starting
+ # at the beginning of the PHTML.
+ self.paragraph_offsets = []
+ # Paragraph attributes.
+ self.attributes = []
+
+ for i in xrange(section_header.paragraphs):
+ adv = 4*i
+ self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0])
+ self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0])
+
+ running_offset = 0
+ for size in self.sizes:
+ running_offset += size
+ self.paragraph_offsets.append(running_offset)
+
+
+class SectionMetadata(object):
+ '''
+ Metadata.
+
+ This does not store metadata such as title, or author.
+ That metadata would be best retrieved with the PDB (plucker)
+ metdata reader.
+
+ This stores document specific information such as the
+ text encoding.
+
+ Note: There is a default encoding but each text section
+ can be assigned a different encoding.
+ '''
+
+ def __init__(self, raw):
+ self.default_encoding = 'latin-1'
+ self.exceptional_uid_encodings = {}
+ self.owner_id = None
+
+ record_count, = struct.unpack('>H', raw[0:2])
+
+ adv = 0
+ for i in xrange(record_count):
+ type, = struct.unpack('>H', raw[2+adv:4+adv])
+ length, = struct.unpack('>H', raw[4+adv:6+adv])
+
+ # CharSet
+ if type == 1:
+ val, = struct.unpack('>H', raw[6+adv:8+adv])
+ self.default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
+ # ExceptionalCharSets
+ elif type == 2:
+ ii_adv = 0
+ for ii in xrange(length / 2):
+ uid, = struct.unpack('>H', raw[6+adv+ii_adv:8+adv+ii_adv])
+ mib, = struct.unpack('>H', raw[8+adv+ii_adv:10+adv+ii_adv])
+ self.exceptional_uid_encodings[uid] = MIBNUM_TO_NAME.get(mib, 'latin-1')
+ ii_adv += 4
+ # OwnerID
+ elif type == 3:
+ self.owner_id = struct.unpack('>I', raw[6+adv:10+adv])
+ # Author, Title, PubDate
+ # Ignored here. The metadata reader plugin
+ # will get this info because if it's missing
+ # the metadata reader plugin will use fall
+ # back data from elsewhere in the file.
+ elif type in (4, 5, 6):
+ pass
+ # Linked Documents
+ elif type == 7:
+ pass
+
+ adv += 2*length
+
+
+class SectionText(object):
+ '''
+ Text data. Stores a text section header and the PHTML.
+ '''
+
+ def __init__(self, section_header, raw):
+ self.header = SectionHeaderText(section_header, raw)
+ self.data = raw[section_header.paragraphs * 4:]
+
+
+class SectionCompositeImage(object):
+ '''
+ A composite image consists of a a 2D array
+ of rows and columns. The entries in the array
+ are uid's.
+ '''
+
+ def __init__(self, raw):
+ self.columns, = struct.unpack('>H', raw[0:2])
+ self.rows, = struct.unpack('>H', raw[2:4])
+
+ # [
+ # [uid, uid, uid, ...],
+ # [uid, uid, uid, ...],
+ # ...
+ # ]
+ #
+ # Each item in the layout is in it's
+ # correct position in the final
+ # composite.
+ #
+ # Each item in the layout is a uid
+ # to an image record.
+ self.layout = []
+ offset = 4
+ for i in xrange(self.rows):
+ col = []
+ for j in xrange(self.columns):
+ col.append(struct.unpack('>H', raw[offset:offset+2])[0])
+ offset += 2
+ self.layout.append(col)
+
+
+class Reader(FormatReader):
+ '''
+ Convert a plucker archive into HTML.
+
+ TODO:
+ * UTF 16 and 32 characters.
+ * Margins.
+ * Alignment.
+ * Font color.
+ * DATATYPE_MAILTO
+ * DATATYPE_TABLE(_COMPRESSED)
+ * DATATYPE_EXT_ANCHOR_INDEX
+ * DATATYPE_EXT_ANCHOR(_COMPRESSED)
+ '''
+
+ def __init__(self, header, stream, log, options):
+ self.stream = stream
+ self.log = log
+ self.options = options
+
+ # Mapping of section uid to our internal
+ # list of sections.
+ self.uid_section_number = OrderedDict()
+ self.uid_text_secion_number = OrderedDict()
+ self.uid_text_secion_encoding = {}
+ self.uid_image_section_number = {}
+ self.uid_composite_image_section_number = {}
+ self.metadata_section_number = None
+ self.default_encoding = 'latin-1'
+ self.owner_id = None
+ self.sections = []
+
+ # The Plucker record0 header
+ self.header_record = HeaderRecord(header.section_data(0))
+
+ for i in range(1, header.num_sections):
+ section_number = len(self.sections)
+ # The length of the section header.
+ # Where the actual data in the section starts.
+ start = 8
+ section = None
+
+ raw_data = header.section_data(i)
+ # Every sections has a section header.
+ section_header = SectionHeader(raw_data)
+
+ # Store sections we care able.
+ if section_header.type in (DATATYPE_PHTML, DATATYPE_PHTML_COMPRESSED):
+ self.uid_text_secion_number[section_header.uid] = section_number
+ section = SectionText(section_header, raw_data[start:])
+ elif section_header.type in (DATATYPE_TBMP, DATATYPE_TBMP_COMPRESSED):
+ self.uid_image_section_number[section_header.uid] = section_number
+ section = raw_data[start:]
+ elif section_header.type == DATATYPE_METADATA:
+ self.metadata_section_number = section_number
+ section = SectionMetadata(raw_data[start:])
+ elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
+ self.uid_composite_image_section_number[section_header.uid] = section_number
+ section = SectionCompositeImage(raw_data[start:])
+
+ # Store the section.
+ if section:
+ self.uid_section_number[section_header.uid] = section_number
+ self.sections.append((section_header, section))
+
+ # Store useful information from the metadata section locally
+ # to make access easier.
+ if self.metadata_section_number:
+ mdata_section = self.sections[self.metadata_section_number][1]
+ for k, v in mdata_section.exceptional_uid_encodings.items():
+ self.uid_text_secion_encoding[k] = v
+ self.default_encoding = mdata_section.default_encoding
+ self.owner_id = mdata_section.owner_id
+
+ # Get the metadata (tile, author, ...) with the metadata reader.
+ from calibre.ebooks.metadata.pdb import get_metadata
+ self.mi = get_metadata(stream, False)
+
+ def extract_content(self, output_dir):
+ # Each text record is independent (unless the continuation
+ # value is set in the previous record). Put each converted
+ # text recored into a separate file. We will reference the
+ # home.html file as the first file and let the HTML input
+ # plugin assemble the order based on hyperlinks.
+ with CurrentDir(output_dir):
+ for uid, num in self.uid_text_secion_number.items():
+ self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
+ with open('%s.html' % uid, 'wb') as htmlf:
+ html = u''
+ section_header, section_data = self.sections[num]
+ if section_header.type == DATATYPE_PHTML:
+ html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
+ elif section_header.type == DATATYPE_PHTML_COMPRESSED:
+ d = self.decompress_phtml(section_data.data)
+ html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
+ html += ''
+ htmlf.write(html.encode('utf-8'))
+
+ # Images.
+ # Cache the image sizes in case they are used by a composite image.
+ image_sizes = {}
+ if not os.path.exists(os.path.join(output_dir, 'images/')):
+ os.makedirs(os.path.join(output_dir, 'images/'))
+ with CurrentDir(os.path.join(output_dir, 'images/')):
+ # Single images.
+ for uid, num in self.uid_image_section_number.items():
+ section_header, section_data = self.sections[num]
+ if section_data:
+ idata = None
+ if section_header.type == DATATYPE_TBMP:
+ idata = section_data
+ elif section_header.type == DATATYPE_TBMP_COMPRESSED:
+ if self.header_record.compression == 1:
+ idata = decompress_doc(section_data)
+ elif self.header_record.compression == 2:
+ idata = zlib.decompress(section_data)
+ try:
+ with TemporaryFile(suffix='.palm') as itn:
+ with open(itn, 'wb') as itf:
+ itf.write(idata)
+ im = Image()
+ im.read(itn)
+ image_sizes[uid] = im.size
+ im.set_compression_quality(70)
+ im.save('%s.jpg' % uid)
+ self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
+ except Exception as e:
+ self.log.error('Failed to write image with uid %s: %s' % (uid, e))
+ else:
+ self.log.error('Failed to write image with uid %s: No data.' % uid)
+ # Composite images.
+ # We're going to use the already compressed .jpg images here.
+ for uid, num in self.uid_composite_image_section_number.items():
+ try:
+ section_header, section_data = self.sections[num]
+ # Get the final width and height.
+ width = 0
+ height = 0
+ for row in section_data.layout:
+ row_width = 0
+ col_height = 0
+ for col in row:
+ if col not in image_sizes:
+ raise Exception('Image with uid: %s missing.' % col)
+ im = Image()
+ im.read('%s.jpg' % col)
+ w, h = im.size
+ row_width += w
+ if col_height < h:
+ col_height = h
+ if width < row_width:
+ width = row_width
+ height += col_height
+ # Create a new image the total size of all image
+ # parts. Put the parts into the new image.
+ canvas = create_canvas(width, height)
+ y_off = 0
+ for row in section_data.layout:
+ x_off = 0
+ largest_height = 0
+ for col in row:
+ im = Image()
+ im.read('%s.jpg' % col)
+ canvas.compose(im, x_off, y_off)
+ w, h = im.size
+ x_off += w
+ if largest_height < h:
+ largest_height = h
+ y_off += largest_height
+ canvas.set_compression_quality(70)
+ canvas.save('%s.jpg' % uid)
+ self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
+ except Exception as e:
+ self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))
+
+ # Run the HTML through the html processing plugin.
+ from calibre.customize.ui import plugin_for_input_format
+ html_input = plugin_for_input_format('html')
+ for opt in html_input.options:
+ setattr(self.options, opt.option.name, opt.recommended_value)
+ self.options.input_encoding = 'utf-8'
+ odi = self.options.debug_pipeline
+ self.options.debug_pipeline = None
+ # Determine the home.html record uid. This should be set in the
+ # reserved values in the metadata recored. home.html is the first
+ # text record (should have hyper link references to other records)
+ # in the document.
+ try:
+ home_html = self.header_record.home_html
+ if not home_html:
+ home_html = self.uid_text_secion_number.items()[0][0]
+ except:
+ raise Exception(_('Could not determine home.html'))
+ # Generate oeb from html conversion.
+ oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
+ self.options.debug_pipeline = odi
+
+ return oeb
+
+ def decompress_phtml(self, data):
+ if self.header_record.compression == 2:
+ if self.owner_id:
+ raise NotImplementedError
+ return zlib.decompress(data)
+ elif self.header_record.compression == 1:
+ #from calibre.ebooks.compression.palmdoc import decompress_doc
+ return decompress_doc(data)
+
+ def process_phtml(self, d, paragraph_offsets=[]):
+ html = u''
+ offset = 0
+ paragraph_open = True
+ link_open = False
+ need_set_p_id = False
+ p_num = 1
+ font_specifier_close = ''
+
+ while offset < len(d):
+ if not paragraph_open:
+ if need_set_p_id:
+ html += u'
' % p_num
+ p_num += 1
+ need_set_p_id = False
+ else:
+ html += u'
'
+ paragraph_open = True
+
+ c = ord(d[offset])
+ # PHTML "functions"
+ if c == 0x0:
+ offset += 1
+ c = ord(d[offset])
+ # Page link begins
+ # 2 Bytes
+ # record ID
+ if c == 0x0a:
+ offset += 1
+ id = struct.unpack('>H', d[offset:offset+2])[0]
+ if id in self.uid_text_secion_number:
+ html += '' % id
+ link_open = True
+ offset += 1
+ # Targeted page link begins
+ # 3 Bytes
+ # record ID, target
+ elif c == 0x0b:
+ offset += 3
+ # Paragraph link begins
+ # 4 Bytes
+ # record ID, paragraph number
+ elif c == 0x0c:
+ offset += 1
+ id = struct.unpack('>H', d[offset:offset+2])[0]
+ offset += 2
+ pid = struct.unpack('>H', d[offset:offset+2])[0]
+ if id in self.uid_text_secion_number:
+ html += '' % (id, pid)
+ link_open = True
+ offset += 1
+ # Targeted paragraph link begins
+ # 5 Bytes
+ # record ID, paragraph number, target
+ elif c == 0x0d:
+ offset += 5
+ # Link ends
+ # 0 Bytes
+ elif c == 0x08:
+ if link_open:
+ html += ''
+ link_open = False
+ # Set font
+ # 1 Bytes
+ # font specifier
+ elif c == 0x11:
+ offset += 1
+ specifier = d[offset]
+ html += font_specifier_close
+ # Regular text
+ if specifier == 0:
+ font_specifier_close = ''
+ # h1
+ elif specifier == 1:
+ html += '
'
+ font_specifier_close = '
'
+ # h2
+ elif specifier == 2:
+ html += ''
+ font_specifier_close = '
'
+ # h3
+ elif specifier == 3:
+ html += ''
+ font_specifier_close = ''
+ # h4
+ elif specifier == 4:
+ html += ''
+ font_specifier_close = '
'
+ # h5
+ elif specifier == 5:
+ html += ''
+ font_specifier_close = '
'
+ # h6
+ elif specifier == 6:
+ html += ''
+ font_specifier_close = '
'
+ # Bold
+ elif specifier == 7:
+ html += ''
+ font_specifier_close = ''
+ # Fixed-width
+ elif specifier == 8:
+ html += ''
+ font_specifier_close = ''
+ # Small
+ elif specifier == 9:
+ html += ''
+ font_specifier_close = ''
+ # Subscript
+ elif specifier == 10:
+ html += ''
+ font_specifier_close = ''
+ # Superscript
+ elif specifier == 11:
+ html += ''
+ font_specifier_close = ''
+ # Embedded image
+ # 2 Bytes
+ # image record ID
+ elif c == 0x1a:
+ offset += 1
+ uid = struct.unpack('>H', d[offset:offset+2])[0]
+ html += '
' % uid
+ offset += 1
+ # Set margin
+ # 2 Bytes
+ # left margin, right margin
+ elif c == 0x22:
+ offset += 2
+ # Alignment of text
+ # 1 Bytes
+ # alignment
+ elif c == 0x29:
+ offset += 1
+ # Horizontal rule
+ # 3 Bytes
+ # 8-bit height, 8-bit width (pixels), 8-bit width (%, 1-100)
+ elif c == 0x33:
+ offset += 3
+ if paragraph_open:
+ html += u'
'
+ paragraph_open = False
+ html += u'
'
+ # New line
+ # 0 Bytes
+ elif c == 0x38:
+ if paragraph_open:
+ html += u'\n'
+ paragraph_open = False
+ # Italic text begins
+ # 0 Bytes
+ elif c == 0x40:
+ html += u''
+ # Italic text ends
+ # 0 Bytes
+ elif c == 0x48:
+ html += u''
+ # Set text color
+ # 3 Bytes
+ # 8-bit red, 8-bit green, 8-bit blue
+ elif c == 0x53:
+ offset += 3
+ # Multiple embedded image
+ # 4 Bytes
+ # alternate image record ID, image record ID
+ elif c == 0x5c:
+ offset += 3
+ uid = struct.unpack('>H', d[offset:offset+2])[0]
+ html += '
' % uid
+ offset += 1
+ # Underline text begins
+ # 0 Bytes
+ elif c == 0x60:
+ html += u''
+ # Underline text ends
+ # 0 Bytes
+ elif c == 0x68:
+ html += u''
+ # Strike-through text begins
+ # 0 Bytes
+ elif c == 0x70:
+ html += u''
+ # Strike-through text ends
+ # 0 Bytes
+ elif c == 0x78:
+ html += u''
+ # 16-bit Unicode character
+ # 3 Bytes
+ # alternate text length, 16-bit unicode character
+ elif c == 0x83:
+ offset += 3
+ # 32-bit Unicode character
+ # 5 Bytes
+ # alternate text length, 32-bit unicode character
+ elif c == 0x85:
+ offset += 5
+ # Begin custom font span
+ # 6 Bytes
+ # font page record ID, X page position, Y page position
+ elif c == 0x8e:
+ offset += 6
+ # Adjust custom font glyph position
+ # 4 Bytes
+ # X page position, Y page position
+ elif c == 0x8c:
+ offset += 4
+ # Change font page
+ # 2 Bytes
+ # font record ID
+ elif c == 0x8a:
+ offset += 2
+ # End custom font span
+ # 0 Bytes
+ elif c == 0x88:
+ pass
+ # Begin new table row
+ # 0 Bytes
+ elif c == 0x90:
+ pass
+ # Insert table (or table link)
+ # 2 Bytes
+ # table record ID
+ elif c == 0x92:
+ offset += 2
+ # Table cell data
+ # 7 Bytes
+ # 8-bit alignment, 16-bit image record ID, 8-bit columns, 8-bit rows, 16-bit text length
+ elif c == 0x97:
+ offset += 7
+ # Exact link modifier
+ # 2 Bytes
+ # Paragraph Offset (The Exact Link Modifier modifies a Paragraph Link or Targeted Paragraph Link function to specify an exact byte offset within the paragraph. This function must be followed immediately by the function it modifies).
+ elif c == 0x9a:
+ offset += 2
+ elif c == 0xa0:
+ html += ' '
+ else:
+ html += unichr(c)
+ offset += 1
+ if offset in paragraph_offsets:
+ need_set_p_id = True
+ if paragraph_open:
+ html += u'\n'
+ paragraph_open = False
+
+ if paragraph_open:
+ html += u''
+
+ return html
+
+ def get_text_uid_encoding(self, uid):
+ # Return the user sepcified input encoding,
+ # otherwise return the alternate encoding specified for the uid,
+ # otherwise retur the default encoding for the document.
+ return self.options.input_encoding if self.options.input_encoding else self.uid_text_secion_encoding.get(uid, self.default_encoding)
diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index 1718123435..44212e92a7 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -529,13 +529,17 @@ class EditMetadataAction(InterfaceAction):
view.reset()
# Apply bulk metadata changes {{{
- def apply_metadata_changes(self, id_map, title=None, msg=''):
+ def apply_metadata_changes(self, id_map, title=None, msg='', callback=None):
'''
Apply the metadata changes in id_map to the database synchronously
id_map must be a mapping of ids to Metadata objects. Set any fields you
do not want updated in the Metadata object to null. An easy way to do
that is to create a metadata object as Metadata(_('Unknown')) and then
only set the fields you want changed on this object.
+
+ callback can be either None or a function accepting a single argument,
+ in which case it is called after applying is complete with the list of
+ changed ids.
'''
if title is None:
title = _('Applying changed metadata')
@@ -544,6 +548,7 @@ class EditMetadataAction(InterfaceAction):
self.apply_failures = []
self.applied_ids = []
self.apply_pd = None
+ self.apply_callback = callback
if len(self.apply_id_map) > 1:
from calibre.gui2.dialogs.progress import ProgressDialog
self.apply_pd = ProgressDialog(title, msg, min=0,
@@ -611,6 +616,11 @@ class EditMetadataAction(InterfaceAction):
self.apply_id_map = []
self.apply_pd = None
+ try:
+ if callable(self.apply_callback):
+ self.apply_callback(self.applied_ids)
+ finally:
+ self.apply_callback = None
# }}}
diff --git a/src/calibre/gui2/store/__init__.py b/src/calibre/gui2/store/__init__.py
index c95d794975..fd2fb965a9 100644
--- a/src/calibre/gui2/store/__init__.py
+++ b/src/calibre/gui2/store/__init__.py
@@ -161,18 +161,6 @@ class StorePlugin(object): # {{{
'''
return False
- def get_settings(self):
- '''
- This is only useful for plugins that implement
- :attr:`config_widget` that is the only way to save
- settings. This is used by plugins to get the saved
- settings and apply when necessary.
-
- :return: A dictionary filled with the settings used
- by this plugin.
- '''
- raise NotImplementedError()
-
def do_genesis(self):
self.genesis()
diff --git a/src/calibre/gui2/store/baen_webscription_plugin.py b/src/calibre/gui2/store/baen_webscription_plugin.py
index d4f7924851..5be7e9c161 100644
--- a/src/calibre/gui2/store/baen_webscription_plugin.py
+++ b/src/calibre/gui2/store/baen_webscription_plugin.py
@@ -24,10 +24,9 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
- settings = self.get_settings()
url = 'http://www.webscription.net/'
- if external or settings.get(self.name + '_open_external', False):
+ if external or self.config.get('open_external', False):
if detail_item:
url = url + detail_item
open_url(QUrl(url_slash_cleaner(url)))
@@ -37,7 +36,7 @@ class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
detail_url = url + detail_item
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
- d.set_tags(settings.get(self.name + '_tags', ''))
+ d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
diff --git a/src/calibre/gui2/store/mobileread/mobileread_plugin.py b/src/calibre/gui2/store/mobileread/mobileread_plugin.py
index 54242ce0b2..271e34a619 100644
--- a/src/calibre/gui2/store/mobileread/mobileread_plugin.py
+++ b/src/calibre/gui2/store/mobileread/mobileread_plugin.py
@@ -37,10 +37,10 @@ class MobileReadStore(BasicStoreConfig, StorePlugin):
d.set_tags(self.config.get('tags', ''))
d.exec_()
else:
- if self.update_cache(parent, 30):
- d = MobeReadStoreDialog(self, parent)
- d.setWindowTitle(self.name)
- d.exec_()
+ self.update_cache(parent, 30)
+ d = MobeReadStoreDialog(self, parent)
+ d.setWindowTitle(self.name)
+ d.exec_()
def search(self, query, max_results=10, timeout=60):
books = self.get_book_list()
diff --git a/src/calibre/gui2/store/search/search.py b/src/calibre/gui2/store/search/search.py
index 70e92d1756..5654df8ffc 100644
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@@ -206,7 +206,7 @@ class SearchDialog(QDialog, Ui_Dialog):
if res:
self.results_view.model().add_result(res, store_plugin)
- if not self.results_view.model().has_results():
+ if not self.search_pool.threads_running() and not self.results_view.model().has_results():
info_dialog(self, _('No matches'), _('Couldn\'t find any books matching your query.'), show=True, show_copy_button=False)
@@ -247,5 +247,6 @@ class SearchDialog(QDialog, Ui_Dialog):
def dialog_closed(self, result):
self.results_view.model().closing()
self.search_pool.abort()
+ self.cache_pool.abort()
self.save_state()
diff --git a/src/calibre/gui2/viewer/main.ui b/src/calibre/gui2/viewer/main.ui
index d470a386c6..04166fe2cf 100644
--- a/src/calibre/gui2/viewer/main.ui
+++ b/src/calibre/gui2/viewer/main.ui
@@ -121,7 +121,7 @@
-
+
@@ -130,7 +130,7 @@
-
+
diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index e4342988b8..ca256e0350 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -560,6 +560,10 @@ class ResultCache(SearchQueryParser): # {{{
loc = self.field_metadata[location]['rec_index']
matches = set()
query = icu_lower(query)
+ if query not in (_('no'), _('unchecked'), '_no', 'false',
+ _('yes'), _('checked'), '_yes', 'true',
+ _('empty'), _('blank'), '_empty'):
+ raise ParseException(_('Invalid boolean query "{0}"').format(query))
for id_ in candidates:
item = self._data[id_]
if item is None:
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 5c80df20df..d83bba061f 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -149,7 +149,8 @@ class PostInstall:
if islinux or isfreebsd:
for f in os.listdir('.'):
if os.stat(f).st_uid == 0:
- os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
+ import shutil
+ shutil.rmtree(f) if os.path.isdir(f) else os.unlink(f)
if os.stat(config_dir).st_uid == 0:
os.rmdir(config_dir)
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 3dce13f144..c281773660 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -20,12 +20,14 @@ What formats does |app| support conversion to/from?
|app| supports the conversion of many input formats to many output formats.
It can convert every input format in the following list, to every output format.
-*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
+*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB***, PML, RB, RTF, SNB, TCR, TXT, TXTZ
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ
** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers
+*** PDB is also a generic format. |app| supports eReder, Plucker, PML and zTxt PDB files.
+
.. _best-source-formats:
What are the best source formats to convert?