mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge
This commit is contained in:
commit
a224be9eef
@ -1,4 +1,4 @@
|
||||
__license__ = 'GPL v3'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
|
||||
'''
|
||||
irishtimes.com
|
||||
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class IrishTimes(BasicNewsRecipe):
|
||||
title = u'The Irish Times'
|
||||
encoding = 'ISO-8859-15'
|
||||
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
|
||||
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
|
||||
language = 'en_IE'
|
||||
timefmt = ' (%A, %B %d, %Y)'
|
||||
|
||||
@ -18,6 +18,7 @@ class IrishTimes(BasicNewsRecipe):
|
||||
oldest_article = 1.0
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
simultaneous_downloads= 5
|
||||
|
||||
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
||||
@ -25,17 +26,17 @@ class IrishTimes(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
|
||||
('Ireland', 'http://rss.feedsportal.com/c/851/f/10845/index.rss'),
|
||||
('World', 'http://rss.feedsportal.com/c/851/f/10846/index.rss'),
|
||||
('Finance', 'http://rss.feedsportal.com/c/851/f/10847/index.rss'),
|
||||
('Features', 'http://rss.feedsportal.com/c/851/f/10848/index.rss'),
|
||||
('Sport', 'http://rss.feedsportal.com/c/851/f/10849/index.rss'),
|
||||
('Opinion', 'http://rss.feedsportal.com/c/851/f/10850/index.rss'),
|
||||
('Letters', 'http://rss.feedsportal.com/c/851/f/10851/index.rss'),
|
||||
('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
|
||||
('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
|
||||
('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
|
||||
('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
|
||||
('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
|
||||
('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
|
||||
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
||||
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
|
||||
('Health', 'http://rss.feedsportal.com/c/851/f/10852/index.rss'),
|
||||
('Education & Parenting', 'http://rss.feedsportal.com/c/851/f/10853/index.rss'),
|
||||
('Motors', 'http://rss.feedsportal.com/c/851/f/10854/index.rss'),
|
||||
('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
|
||||
('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
|
||||
('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
|
||||
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
|
||||
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
|
||||
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
|
||||
@ -49,10 +50,16 @@ class IrishTimes(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
||||
#u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
||||
u = url.find('irishtimes')
|
||||
u = 'http://www.irishtimes.com' + url[u + 12:]
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace('0Bhtml/story01.htm', '_pf.html')
|
||||
else:
|
||||
u = url.replace('.html','_pf.html')
|
||||
return u
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.link
|
||||
|
||||
|
@ -11,7 +11,7 @@ defaults.
|
||||
'''
|
||||
|
||||
#: Auto increment series index
|
||||
# The algorithm used to assign a new book in an existing series a series number.
|
||||
# The algorithm used to assign a book added to an existing series a series number.
|
||||
# New series numbers assigned using this tweak are always integer values, except
|
||||
# if a constant non-integer is specified.
|
||||
# Possible values are:
|
||||
@ -27,7 +27,19 @@ defaults.
|
||||
# series_index_auto_increment = 'next'
|
||||
# series_index_auto_increment = 'next_free'
|
||||
# series_index_auto_increment = 16.5
|
||||
#
|
||||
# Set the use_series_auto_increment_tweak_when_importing tweak to True to
|
||||
# use the above values when importing/adding books. If this tweak is set to
|
||||
# False (the default) then the series number will be set to 1 if it is not
|
||||
# explicitly set to during the import. If set to True, then the
|
||||
# series index will be set according to the series_index_auto_increment setting.
|
||||
# Note that the use_series_auto_increment_tweak_when_importing tweak is used
|
||||
# only when a value is not provided during import. If the importing regular
|
||||
# expression produces a value for series_index, or if you are reading metadata
|
||||
# from books and the import plugin produces a value, than that value will
|
||||
# be used irrespective of the setting of the tweak.
|
||||
series_index_auto_increment = 'next'
|
||||
use_series_auto_increment_tweak_when_importing = False
|
||||
|
||||
#: Add separator after completing an author name
|
||||
# Should the completion separator be append
|
||||
|
@ -570,7 +570,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
||||
from calibre.devices.sne.driver import SNE
|
||||
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK)
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY)
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
@ -705,7 +705,7 @@ plugins += [
|
||||
EEEREADER,
|
||||
NEXTBOOK,
|
||||
ADAM,
|
||||
MOOVYBOOK,
|
||||
MOOVYBOOK, COBY,
|
||||
ITUNES,
|
||||
BOEYE_BEX,
|
||||
BOEYE_BDX,
|
||||
|
@ -351,3 +351,29 @@ class MOOVYBOOK(USBMS):
|
||||
def get_main_ebook_dir(self, for_upload=False):
|
||||
return 'Books' if for_upload else self.EBOOK_DIR_MAIN
|
||||
|
||||
class COBY(USBMS):
|
||||
|
||||
name = 'COBY MP977 device interface'
|
||||
gui_name = 'COBY'
|
||||
description = _('Communicate with the COBY')
|
||||
author = 'Kovid Goyal'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
|
||||
# Ordered list of supported formats
|
||||
FORMATS = ['epub', 'pdf']
|
||||
|
||||
VENDOR_ID = [0x1e74]
|
||||
PRODUCT_ID = [0x7121]
|
||||
BCD = [0x02]
|
||||
VENDOR_NAME = 'USB_2.0'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MP977_DRIVER'
|
||||
|
||||
EBOOK_DIR_MAIN = ''
|
||||
|
||||
SUPPORTS_SUB_DIRS = False
|
||||
|
||||
def get_carda_ebook_dir(self, for_upload=False):
|
||||
if for_upload:
|
||||
return 'eBooks'
|
||||
return self.EBOOK_DIR_CARD_A
|
||||
|
||||
|
@ -22,6 +22,7 @@ from calibre.utils.date import parse_date, isoformat
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre import prints, guess_type
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.config import tweaks
|
||||
|
||||
class Resource(object): # {{{
|
||||
'''
|
||||
@ -527,7 +528,12 @@ class OPF(object): # {{{
|
||||
category = MetadataField('type')
|
||||
rights = MetadataField('rights')
|
||||
series = MetadataField('series', is_dc=False)
|
||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||
if tweaks['use_series_auto_increment_tweak_when_importing']:
|
||||
series_index = MetadataField('series_index', is_dc=False,
|
||||
formatter=float, none_is=None)
|
||||
else:
|
||||
series_index = MetadataField('series_index', is_dc=False,
|
||||
formatter=float, none_is=1)
|
||||
title_sort = TitleSortField('title_sort', is_dc=False)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||
pubdate = MetadataField('date', formatter=parse_date,
|
||||
@ -1024,8 +1030,10 @@ class OPF(object): # {{{
|
||||
attrib = attrib or {}
|
||||
attrib['name'] = 'calibre:' + name
|
||||
name = '{%s}%s' % (self.NAMESPACES['opf'], 'meta')
|
||||
nsmap = dict(self.NAMESPACES)
|
||||
del nsmap['opf']
|
||||
elem = etree.SubElement(self.metadata, name, attrib=attrib,
|
||||
nsmap=self.NAMESPACES)
|
||||
nsmap=nsmap)
|
||||
elem.tail = '\n'
|
||||
return elem
|
||||
|
||||
|
@ -22,6 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import utc_tz, as_utc
|
||||
from calibre.utils.html2text import html2text
|
||||
from calibre.utils.icu import lower
|
||||
from calibre.utils.date import UNDEFINED_DATE
|
||||
|
||||
# Download worker {{{
|
||||
class Worker(Thread):
|
||||
@ -490,6 +491,8 @@ def identify(log, abort, # {{{
|
||||
max_tags = msprefs['max_tags']
|
||||
for r in results:
|
||||
r.tags = r.tags[:max_tags]
|
||||
if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year:
|
||||
r.pubdate = None
|
||||
|
||||
if msprefs['swap_author_names']:
|
||||
for r in results:
|
||||
|
@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict
|
||||
from calibre.utils.date import utc_tz
|
||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||
get_trailing_data, decode_fvwi)
|
||||
get_trailing_data, decode_tbs)
|
||||
from calibre.utils.magick.draw import identify_data
|
||||
|
||||
# PalmDB {{{
|
||||
@ -73,7 +73,7 @@ class PalmDB(object):
|
||||
self.ident = self.type + self.creator
|
||||
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
|
||||
raise ValueError('Unknown book ident: %r'%self.ident)
|
||||
self.uid_seed = self.raw[68:72]
|
||||
self.uid_seed, = struct.unpack(b'>I', self.raw[68:72])
|
||||
self.next_rec_list_id = self.raw[72:76]
|
||||
|
||||
self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
|
||||
@ -182,6 +182,7 @@ class EXTHHeader(object):
|
||||
self.records = []
|
||||
for i in xrange(self.count):
|
||||
pos = self.read_record(pos)
|
||||
self.records.sort(key=lambda x:x.type)
|
||||
|
||||
def read_record(self, pos):
|
||||
type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
|
||||
@ -290,7 +291,12 @@ class MOBIHeader(object): # {{{
|
||||
(self.fcis_number, self.fcis_count, self.flis_number,
|
||||
self.flis_count) = struct.unpack(b'>IIII',
|
||||
self.raw[200:216])
|
||||
self.unknown6 = self.raw[216:240]
|
||||
self.unknown6 = self.raw[216:224]
|
||||
self.srcs_record_index = struct.unpack(b'>I',
|
||||
self.raw[224:228])[0]
|
||||
self.num_srcs_records = struct.unpack(b'>I',
|
||||
self.raw[228:232])[0]
|
||||
self.unknown7 = self.raw[232:240]
|
||||
self.extra_data_flags = struct.unpack(b'>I',
|
||||
self.raw[240:244])[0]
|
||||
self.has_multibytes = bool(self.extra_data_flags & 0b1)
|
||||
@ -339,7 +345,7 @@ class MOBIHeader(object): # {{{
|
||||
ans.append('Huffman record offset: %d'%self.huffman_record_offset)
|
||||
ans.append('Huffman record count: %d'%self.huffman_record_count)
|
||||
ans.append('Unknown2: %r'%self.unknown2)
|
||||
ans.append('EXTH flags: %r (%s)'%(self.exth_flags, self.has_exth))
|
||||
ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
|
||||
if self.has_drm_data:
|
||||
ans.append('Unknown3: %r'%self.unknown3)
|
||||
ans.append('DRM Offset: %s'%self.drm_offset)
|
||||
@ -356,6 +362,9 @@ class MOBIHeader(object): # {{{
|
||||
ans.append('FLIS number: %d'% self.flis_number)
|
||||
ans.append('FLIS count: %d'% self.flis_count)
|
||||
ans.append('Unknown6: %r'% self.unknown6)
|
||||
ans.append('SRCS record index: %d'%self.srcs_record_index)
|
||||
ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
|
||||
ans.append('Unknown7: %r'%self.unknown7)
|
||||
ans.append(('Extra data flags: %s (has multibyte: %s) '
|
||||
'(has indexing: %s) (has uncrossable breaks: %s)')%(
|
||||
bin(self.extra_data_flags), self.has_multibytes,
|
||||
@ -416,12 +425,7 @@ class IndexHeader(object): # {{{
|
||||
if self.index_encoding == 'unknown':
|
||||
raise ValueError(
|
||||
'Unknown index encoding: %d'%self.index_encoding_num)
|
||||
self.locale_raw, = struct.unpack(b'>I', raw[32:36])
|
||||
langcode = self.locale_raw
|
||||
langid = langcode & 0xFF
|
||||
sublangid = (langcode >> 10) & 0xFF
|
||||
self.language = main_language.get(langid, 'ENGLISH')
|
||||
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
||||
self.possibly_language = raw[32:36]
|
||||
self.num_index_entries, = struct.unpack('>I', raw[36:40])
|
||||
self.ordt_start, = struct.unpack('>I', raw[40:44])
|
||||
self.ligt_start, = struct.unpack('>I', raw[44:48])
|
||||
@ -481,8 +485,7 @@ class IndexHeader(object): # {{{
|
||||
a('Number of index records: %d'%self.index_count)
|
||||
a('Index encoding: %s (%d)'%(self.index_encoding,
|
||||
self.index_encoding_num))
|
||||
a('Index language: %s - %s (%s)'%(self.language, self.sublanguage,
|
||||
hex(self.locale_raw)))
|
||||
a('Unknown (possibly language?): %r'%(self.possibly_language))
|
||||
a('Number of index entries: %d'% self.num_index_entries)
|
||||
a('ORDT start: %d'%self.ordt_start)
|
||||
a('LIGT start: %d'%self.ligt_start)
|
||||
@ -602,6 +605,9 @@ class IndexEntry(object): # {{{
|
||||
self.raw = raw
|
||||
self.tags = []
|
||||
self.entry_type_raw = entry_type
|
||||
self.byte_size = len(raw)
|
||||
|
||||
orig_raw = raw
|
||||
|
||||
try:
|
||||
self.entry_type = self.TYPES[entry_type]
|
||||
@ -639,8 +645,8 @@ class IndexEntry(object): # {{{
|
||||
self.tags.append(Tag(aut_tag[0], [val], self.entry_type,
|
||||
cncx))
|
||||
|
||||
if raw.replace(b'\x00', b''): # There can be padding null bytes
|
||||
raise ValueError('Extra bytes in INDX table entry %d: %r'%(self.index, raw))
|
||||
self.consumed = len(orig_raw) - len(raw)
|
||||
self.trailing_bytes = raw
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
@ -692,13 +698,16 @@ class IndexEntry(object): # {{{
|
||||
return -1
|
||||
|
||||
def __str__(self):
|
||||
ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d)'%(
|
||||
self.index, self.entry_type, bin(self.entry_type_raw)[2:], len(self.tags))]
|
||||
ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d, byte_size=%d)'%(
|
||||
self.index, self.entry_type, bin(self.entry_type_raw)[2:],
|
||||
len(self.tags), self.byte_size)]
|
||||
for tag in self.tags:
|
||||
ans.append('\t'+str(tag))
|
||||
if self.first_child_index != -1:
|
||||
ans.append('\tNumber of children: %d'%(self.last_child_index -
|
||||
self.first_child_index + 1))
|
||||
if self.trailing_bytes:
|
||||
ans.append('\tTrailing bytes: %r'%self.trailing_bytes)
|
||||
return '\n'.join(ans)
|
||||
|
||||
# }}}
|
||||
@ -742,6 +751,7 @@ class IndexRecord(object): # {{{
|
||||
raise ValueError('Extra bytes after IDXT table: %r'%rest)
|
||||
|
||||
indxt = raw[192:self.idxt_offset]
|
||||
self.size_of_indxt_block = len(indxt)
|
||||
self.indices = []
|
||||
for i, off in enumerate(self.index_offsets):
|
||||
try:
|
||||
@ -754,10 +764,14 @@ class IndexRecord(object): # {{{
|
||||
if index_header.index_type == 6:
|
||||
flags = ord(indxt[off+consumed+d])
|
||||
d += 1
|
||||
pos = off+consumed+d
|
||||
self.indices.append(IndexEntry(index, entry_type,
|
||||
indxt[off+consumed+d:next_off], cncx,
|
||||
indxt[pos:next_off], cncx,
|
||||
index_header.tagx_entries, flags=flags))
|
||||
index = self.indices[-1]
|
||||
|
||||
rest = indxt[pos+self.indices[-1].consumed:]
|
||||
if rest.replace(b'\0', ''): # There can be padding null bytes
|
||||
raise ValueError('Extra bytes after IDXT table: %r'%rest)
|
||||
|
||||
def get_parent(self, index):
|
||||
if index.depth < 1:
|
||||
@ -778,12 +792,13 @@ class IndexRecord(object): # {{{
|
||||
u(self.unknown1)
|
||||
a('Unknown (header type? index record number? always 1?): %d'%self.header_type)
|
||||
u(self.unknown2)
|
||||
a('IDXT Offset: %d'%self.idxt_offset)
|
||||
a('IDXT Offset (%d block size): %d'%(self.size_of_indxt_block,
|
||||
self.idxt_offset))
|
||||
a('IDXT Count: %d'%self.idxt_count)
|
||||
u(self.unknown3)
|
||||
u(self.unknown4)
|
||||
a('Index offsets: %r'%self.index_offsets)
|
||||
a('\nIndex Entries:')
|
||||
a('\nIndex Entries (%d entries):'%len(self.indices))
|
||||
for entry in self.indices:
|
||||
a(str(entry)+'\n')
|
||||
|
||||
@ -829,6 +844,7 @@ class TextRecord(object): # {{{
|
||||
|
||||
def __init__(self, idx, record, extra_data_flags, decompress):
|
||||
self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
|
||||
raw_trailing_bytes = record.raw[len(self.raw):]
|
||||
self.raw = decompress(self.raw)
|
||||
if 0 in self.trailing_data:
|
||||
self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
|
||||
@ -836,6 +852,7 @@ class TextRecord(object): # {{{
|
||||
self.trailing_data['indexing'] = self.trailing_data.pop(1)
|
||||
if 2 in self.trailing_data:
|
||||
self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
|
||||
self.trailing_data['raw_bytes'] = raw_trailing_bytes
|
||||
|
||||
self.idx = idx
|
||||
|
||||
@ -949,21 +966,25 @@ class TBSIndexing(object): # {{{
|
||||
ans.append(('\t\tIndex Entry: %d (Parent index: %d, '
|
||||
'Depth: %d, Offset: %d, Size: %d) [%s]')%(
|
||||
x.index, x.parent_index, x.depth, x.offset, x.size, x.label))
|
||||
def bin3(num):
|
||||
def bin4(num):
|
||||
ans = bin(num)[2:]
|
||||
return '0'*(3-len(ans)) + ans
|
||||
return bytes('0'*(4-len(ans)) + ans)
|
||||
|
||||
def repr_extra(x):
|
||||
return str({bin4(k):v for k, v in extra.iteritems()})
|
||||
|
||||
tbs_type = 0
|
||||
is_periodical = self.doc_type in (257, 258, 259)
|
||||
if len(byts):
|
||||
outer, consumed = decint(byts)
|
||||
outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if
|
||||
is_periodical else 3)
|
||||
byts = byts[consumed:]
|
||||
tbs_type = outer & 0b111
|
||||
ans.append('TBS Type: %s (%d)'%(bin3(tbs_type), tbs_type))
|
||||
ans.append('Outer Index entry: %d'%(outer >> 3))
|
||||
arg1, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
ans.append('Unknown (vwi: always 0?): %d'%arg1)
|
||||
if self.doc_type in (257, 259): # Hierarchical periodical
|
||||
for k in extra:
|
||||
tbs_type |= k
|
||||
ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type)))
|
||||
ans.append('Outermost index: %d'%outermost_index)
|
||||
ans.append('Unknown extra start bytes: %s'%repr_extra(extra))
|
||||
if is_periodical: # Hierarchical periodical
|
||||
byts, a = self.interpret_periodical(tbs_type, byts,
|
||||
dat['geom'][0])
|
||||
ans += a
|
||||
@ -977,53 +998,21 @@ class TBSIndexing(object): # {{{
|
||||
def interpret_periodical(self, tbs_type, byts, record_offset):
|
||||
ans = []
|
||||
|
||||
def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{
|
||||
if psi is None:
|
||||
# Assume parent section is 1
|
||||
psi = self.get_index(1)
|
||||
if msg is None:
|
||||
msg = ('Article index at start of record or first article'
|
||||
' index, relative to parent section')
|
||||
if byts:
|
||||
# byts could be empty
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
flags = (arg & 0b1111)
|
||||
ai = (arg >> 4)
|
||||
ans.append('%s (fvwi): %d [%d absolute]'%(msg, ai,
|
||||
ai+psi.index))
|
||||
if flags == 1:
|
||||
arg, consumed = decint(byts)
|
||||
if arg == 0:
|
||||
# EOF of record, otherwise ignore and hope someone else
|
||||
# will deal with these bytes
|
||||
byts = byts[consumed:]
|
||||
ans.append('EOF (vwi: should be 0): %d'%arg)
|
||||
elif flags in (4, 5):
|
||||
num = byts[0]
|
||||
byts = byts[1:]
|
||||
ans.append('Number of article nodes in the record (byte): %d'%num)
|
||||
if flags == 5:
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
ans.append('%s (vwi)): %d'%(fmsg, arg))
|
||||
elif flags == 0:
|
||||
pass
|
||||
else:
|
||||
raise ValueError('Unknown flags: %d'%flags)
|
||||
return byts
|
||||
|
||||
# }}}
|
||||
|
||||
def read_section_transitions(byts, psi=None): # {{{
|
||||
if psi is None:
|
||||
# Assume parent section is 1
|
||||
# Assume previous section is 1
|
||||
psi = self.get_index(1)
|
||||
|
||||
while byts:
|
||||
ai, flags, consumed = decode_fvwi(byts)
|
||||
ai, extra, consumed = decode_tbs(byts)
|
||||
byts = byts[consumed:]
|
||||
if flags & 0b1000:
|
||||
if extra.get(0b0010, None) is not None:
|
||||
raise ValueError('Dont know how to interpret flag 0b0010'
|
||||
' while reading section transitions')
|
||||
if extra.get(0b1000, None) is not None:
|
||||
if len(extra) > 1:
|
||||
raise ValueError('Dont know how to interpret flags'
|
||||
' %r while reading section transitions'%extra)
|
||||
nsi = self.get_index(psi.index+1)
|
||||
ans.append('Last article in this record of section %d'
|
||||
' (relative to next section index [%d]): '
|
||||
@ -1036,113 +1025,58 @@ class TBSIndexing(object): # {{{
|
||||
' (relative to its parent section): '
|
||||
'%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
|
||||
|
||||
if flags == 0:
|
||||
ans.append('The section %d has only one article'
|
||||
' in this record'%psi.index)
|
||||
continue
|
||||
num = extra.get(0b0100, None)
|
||||
if num is None:
|
||||
msg = ('The section %d has at most one article'
|
||||
' in this record')%psi.index
|
||||
else:
|
||||
msg = ('Number of articles in this record of '
|
||||
'section %d: %d')%(psi.index, num)
|
||||
ans.append(msg)
|
||||
|
||||
if flags & 0b0100:
|
||||
num = byts[0]
|
||||
byts = byts[1:]
|
||||
ans.append('Number of articles in this record of '
|
||||
'section %d: %d'%(psi.index, num))
|
||||
|
||||
if flags & 0b0010:
|
||||
raise ValueError(
|
||||
'Dont know how to interpret the 0b0010 flag')
|
||||
|
||||
if flags & 0b0001:
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
ans.append('->Offset to start of next section (%d) from start'
|
||||
offset = extra.get(0b0001, None)
|
||||
if offset is not None:
|
||||
if offset == 0:
|
||||
ans.append('This record is spanned by the article:'
|
||||
'%d'%(ai+psi.index))
|
||||
else:
|
||||
ans.append('->Offset to start of next section (%d) from start'
|
||||
' of record: %d [%d absolute offset]'%(psi.index+1,
|
||||
arg, arg+record_offset))
|
||||
offset, offset+record_offset))
|
||||
return byts
|
||||
# }}}
|
||||
|
||||
if tbs_type == 3: # {{{
|
||||
arg2, consumed = decint(byts)
|
||||
def read_starting_section(byts): # {{{
|
||||
orig = byts
|
||||
si, extra, consumed = decode_tbs(byts)
|
||||
byts = byts[consumed:]
|
||||
ans.append('Unknown (vwi: always 0?): %d'%arg2)
|
||||
|
||||
arg3, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
fsi = arg3 >> 4
|
||||
flags = arg3 & 0b1111
|
||||
ans.append('First section index (fvwi): %d'%fsi)
|
||||
psi = self.get_index(fsi)
|
||||
ans.append('Flags: %d'%flags)
|
||||
if flags == 4:
|
||||
ans.append('Number of articles in this section: %d'%byts[0])
|
||||
byts = byts[1:]
|
||||
elif flags == 0:
|
||||
pass
|
||||
else:
|
||||
raise ValueError('Unknown flags value: %d'%flags)
|
||||
byts = read_section_transitions(byts, psi)
|
||||
|
||||
# }}}
|
||||
|
||||
elif tbs_type == 7: # {{{
|
||||
# This occurs for records that have no section nodes and
|
||||
# whose parent section's index == 1
|
||||
ans.append('Unknown (maybe vwi?): %r'%bytes(byts[:2]))
|
||||
byts = byts[2:]
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
ai = arg >> 4
|
||||
flags = arg & 0b1111
|
||||
ans.append('Article at start of record (fvwi): %d'%ai)
|
||||
if flags == 4:
|
||||
num = byts[0]
|
||||
byts = byts[1:]
|
||||
ans.append('Number of articles in record (byte): %d'%num)
|
||||
elif flags == 0:
|
||||
pass
|
||||
elif flags == 1:
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
ans.append('EOF (vwi: should be 0): %d'%arg)
|
||||
else:
|
||||
raise ValueError('Unknown flags value: %d'%flags)
|
||||
if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra:
|
||||
raise ValueError('Dont know how to interpret flags %r'
|
||||
' when reading starting section'%extra)
|
||||
si = self.get_index(si)
|
||||
ans.append('The section at the start of this record is:'
|
||||
' %d'%si.index)
|
||||
if 0b0100 in extra:
|
||||
num = extra[0b0100]
|
||||
ans.append('The number of articles from the section %d'
|
||||
' in this record: %d'%(si.index, num))
|
||||
elif 0b0001 in extra:
|
||||
eof = extra[0b0001]
|
||||
if eof != 0:
|
||||
raise ValueError('Unknown eof value %s when reading'
|
||||
' starting section. All bytes: %r'%(eof, orig))
|
||||
ans.append('This record is spanned by an article from'
|
||||
' the section: %d'%si.index)
|
||||
return si, byts
|
||||
# }}}
|
||||
|
||||
elif tbs_type == 6: # {{{
|
||||
# This is used for records spanned by an article whose parent
|
||||
# section's index == 1 or for the opening record if it contains the
|
||||
# periodical start, section 1 start and at least one article. The
|
||||
# two cases are distinguished by the flags on the article index
|
||||
# vwi.
|
||||
unk = byts[0]
|
||||
byts = byts[1:]
|
||||
ans.append('Unknown (byte: always 2?): %d'%unk)
|
||||
byts = tbs_type_6(byts)
|
||||
# }}}
|
||||
if tbs_type & 0b0100:
|
||||
# Starting section is the first section
|
||||
ssi = self.get_index(1)
|
||||
else:
|
||||
ssi, byts = read_starting_section(byts)
|
||||
|
||||
elif tbs_type == 2: # {{{
|
||||
# This occurs for records with no section nodes and whose parent
|
||||
# section's index != 1 (undefined (records before the first
|
||||
# section) or > 1)
|
||||
# This is also used for records that are spanned by an article
|
||||
# whose parent section index > 1. In this case the flags of the
|
||||
# vwi referring to the article at the start
|
||||
# of the record are set to 1 instead of 4.
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
flags = (arg & 0b1111)
|
||||
psi = (arg >> 4)
|
||||
ans.append('Parent section index (fvwi): %d'%psi)
|
||||
psi = self.get_index(psi)
|
||||
ans.append('Flags: %d'%flags)
|
||||
if flags == 1:
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
ans.append('Unknown (vwi?: always 0?): %d'%arg)
|
||||
byts = tbs_type_6(byts, psi=psi)
|
||||
elif flags == 0:
|
||||
byts = tbs_type_6(byts, psi=psi)
|
||||
else:
|
||||
raise ValueError('Unknown flags: %d'%flags)
|
||||
# }}}
|
||||
byts = read_section_transitions(byts, ssi)
|
||||
|
||||
return byts, ans
|
||||
|
||||
|
@ -3,6 +3,20 @@ Reverse engineering the trailing byte sequences for hierarchical periodicals
|
||||
|
||||
In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag. All the following information/inferences are from examining the output of kindlegen on a sample periodical. Given the general level of Amazon's incompetence, there are no guarantees that this information is the *best/most complete* way to do TBS indexing.
|
||||
|
||||
Sequence encoding:
|
||||
|
||||
0b1000 : Continuation bit
|
||||
|
||||
First sequences:
|
||||
0b0010 : 80
|
||||
0b0011 : 80 80
|
||||
0b0110 : 80 2
|
||||
0b0111 : 80 2 80
|
||||
|
||||
Other sequences:
|
||||
0b0101 : 4 1a
|
||||
0b0001 : c b1
|
||||
|
||||
Opening record
|
||||
----------------
|
||||
|
||||
@ -52,10 +66,60 @@ The text record that contains the opening node for the periodical (depth=0 node
|
||||
|
||||
If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record.
|
||||
|
||||
Starting record with two section transitions::
|
||||
|
||||
Record #1: Starts at: 0 Ends at: 4095
|
||||
Contains: 7 index entries (0 ends, 4 complete, 3 starts)
|
||||
TBS bytes: 86 80 2 c0 b8 c4 3
|
||||
Complete:
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica]
|
||||
Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz]
|
||||
Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 1014) [Max and the Magic Marker for iPad: Review]
|
||||
Index Entry: 7 (Parent index: 2, Depth: 2, Offset: 1961, Size: 1077) [iPad 2 steers itself into home console gaming territory with Real Racing 2 HD]
|
||||
Starts:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 35372) [j_x's Google reader]
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 10368) [Neowin.net]
|
||||
Index Entry: 8 (Parent index: 2, Depth: 2, Offset: 3038, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
|
||||
Remaining bytes: b8 c4 3
|
||||
|
||||
Starting record with three section transitions::
|
||||
|
||||
Record #1: Starts at: 0 Ends at: 4095
|
||||
Contains: 10 index entries (0 ends, 7 complete, 3 starts)
|
||||
TBS bytes: 86 80 2 c0 b8 c0 b8 c4 4
|
||||
Complete:
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica]
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 316) [Neowin.net]
|
||||
Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz]
|
||||
Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 308) [Max and the Magic Marker for iPad: Review]
|
||||
Index Entry: 7 (Parent index: 3, Depth: 2, Offset: 1263, Size: 760) [OSnews Asks on Interrupts: The Results]
|
||||
Index Entry: 8 (Parent index: 3, Depth: 2, Offset: 2023, Size: 693) [Apple Ditches SAMBA in Favour of Homegrown Replacement]
|
||||
Index Entry: 9 (Parent index: 3, Depth: 2, Offset: 2716, Size: 747) [ITC: Apple's Mobile Products Do Not Violate Nokia Patents]
|
||||
Starts:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 25320) [j_x's Google reader]
|
||||
Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 1255, Size: 6829) [OSNews]
|
||||
Index Entry: 10 (Parent index: 3, Depth: 2, Offset: 3463, Size: 666) [Transparent Monitor Embedded in Window Glass]
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
|
||||
Remaining bytes: b8 c0 b8 c4 4
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Records with no nodes
|
||||
------------------------
|
||||
|
||||
subtype = 010
|
||||
|
||||
These records are spanned by a single article. They are of two types:
|
||||
|
||||
1. If the parent section index is 1, TBS type of 6, like this::
|
||||
@ -247,7 +311,7 @@ In such a record there is a transition from one section to the next. As such the
|
||||
Last article of ending section w.r.t. starting section offset (fvwi): 12 [15 absolute]
|
||||
Flags (always 8?): 8
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 13 [16 absolute]
|
||||
Number of article nodes in the record (byte): 4
|
||||
Number of article nodes in the record belonging ot the last section (byte): 4
|
||||
|
||||
|
||||
Ending record
|
||||
@ -274,3 +338,26 @@ Logically, ending records must have at least one article ending, one section end
|
||||
|
||||
If the record had only a single article end, the last two bytes would be replaced with: f0
|
||||
|
||||
If the last record has multiple section transitions, it is of type 6 and looks like::
|
||||
|
||||
Record #9: Starts at: 32768 Ends at: 34953
|
||||
Contains: 9 index entries (3 ends, 6 complete, 0 starts)
|
||||
TBS bytes: 86 80 2 1 d0 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0
|
||||
Ends:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 34739) [j_x's Google reader]
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
|
||||
Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe]
|
||||
Complete:
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 316) [Neowin.net]
|
||||
Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 34353, Size: 282) [OSNews]
|
||||
Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 34635, Size: 319) [Slashdot]
|
||||
Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 308) [Max and the Magic Marker for iPad: Review]
|
||||
Index Entry: 16 (Parent index: 3, Depth: 2, Offset: 34361, Size: 274) [OSnews Asks on Interrupts: The Results]
|
||||
Index Entry: 17 (Parent index: 4, Depth: 2, Offset: 34643, Size: 311) [Leonard Nimoy Turns 80]
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 13 [14 absolute]
|
||||
Remaining bytes: 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0
|
||||
|
||||
|
@ -11,6 +11,7 @@ import struct
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||
from calibre.ebooks import normalize
|
||||
|
||||
IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
||||
|
||||
@ -40,6 +41,9 @@ def encode_number_as_hex(num):
|
||||
number.
|
||||
'''
|
||||
num = bytes(hex(num)[2:].upper())
|
||||
nlen = len(num)
|
||||
if nlen % 2 != 0:
|
||||
num = b'0'+num
|
||||
ans = bytearray(num)
|
||||
ans.insert(0, len(num))
|
||||
return bytes(ans)
|
||||
@ -65,11 +69,14 @@ def encint(value, forward=True):
|
||||
If forward is True the bytes returned are suitable for prepending to the
|
||||
output buffer, otherwise they must be append to the output buffer.
|
||||
'''
|
||||
if value < 0:
|
||||
raise ValueError('Cannot encode negative numbers as vwi')
|
||||
# Encode vwi
|
||||
byts = bytearray()
|
||||
while True:
|
||||
b = value & 0b01111111
|
||||
value >>= 7 # shift value to the right by 7 bits
|
||||
|
||||
byts.append(b)
|
||||
if value == 0:
|
||||
break
|
||||
@ -184,7 +191,7 @@ def encode_trailing_data(raw):
|
||||
<data><size>
|
||||
|
||||
where size is a backwards encoded vwi whose value is the length of the
|
||||
entire return bytestring.
|
||||
entire returned bytestring. data is the bytestring passed in as raw.
|
||||
|
||||
This is the encoding used for trailing data entries at the end of text
|
||||
records. See get_trailing_data() for details.
|
||||
@ -197,3 +204,131 @@ def encode_trailing_data(raw):
|
||||
lsize += 1
|
||||
return raw + encoded
|
||||
|
||||
def encode_fvwi(val, flags, flag_size=4):
|
||||
'''
|
||||
Encode the value val and the flag_size bits from flags as a fvwi. This encoding is
|
||||
used in the trailing byte sequences for indexing. Returns encoded
|
||||
bytestring.
|
||||
'''
|
||||
ans = val << flag_size
|
||||
for i in xrange(flag_size):
|
||||
ans |= (flags & (1 << i))
|
||||
return encint(ans)
|
||||
|
||||
|
||||
def decode_fvwi(byts, flag_size=4):
|
||||
'''
|
||||
Decode encoded fvwi. Returns number, flags, consumed
|
||||
'''
|
||||
arg, consumed = decint(bytes(byts))
|
||||
val = arg >> flag_size
|
||||
flags = 0
|
||||
for i in xrange(flag_size):
|
||||
flags |= (arg & (1 << i))
|
||||
return val, flags, consumed
|
||||
|
||||
|
||||
def decode_tbs(byts, flag_size=4):
|
||||
'''
|
||||
Trailing byte sequences for indexing consists of series of fvwi numbers.
|
||||
This function reads the fvwi number and its associated flags. It them uses
|
||||
the flags to read any more numbers that belong to the series. The flags are
|
||||
the lowest 4 bits of the vwi (see the encode_fvwi function above).
|
||||
|
||||
Returns the fvwi number, a dictionary mapping flags bits to the associated
|
||||
data and the number of bytes consumed.
|
||||
'''
|
||||
byts = bytes(byts)
|
||||
val, flags, consumed = decode_fvwi(byts, flag_size=flag_size)
|
||||
extra = {}
|
||||
byts = byts[consumed:]
|
||||
if flags & 0b1000 and flag_size > 3:
|
||||
extra[0b1000] = True
|
||||
if flags & 0b0010:
|
||||
x, consumed2 = decint(byts)
|
||||
byts = byts[consumed2:]
|
||||
extra[0b0010] = x
|
||||
consumed += consumed2
|
||||
if flags & 0b0100:
|
||||
extra[0b0100] = ord(byts[0])
|
||||
byts = byts[1:]
|
||||
consumed += 1
|
||||
if flags & 0b0001:
|
||||
x, consumed2 = decint(byts)
|
||||
byts = byts[consumed2:]
|
||||
extra[0b0001] = x
|
||||
consumed += consumed2
|
||||
return val, extra, consumed
|
||||
|
||||
def encode_tbs(val, extra, flag_size=4):
|
||||
'''
|
||||
Encode the number val and the extra data in the extra dict as an fvwi. See
|
||||
decode_tbs above.
|
||||
'''
|
||||
flags = 0
|
||||
for flag in extra:
|
||||
flags |= flag
|
||||
ans = encode_fvwi(val, flags, flag_size=flag_size)
|
||||
|
||||
if 0b0010 in extra:
|
||||
ans += encint(extra[0b0010])
|
||||
if 0b0100 in extra:
|
||||
ans += bytes(bytearray([extra[0b0100]]))
|
||||
if 0b0001 in extra:
|
||||
ans += encint(extra[0b0001])
|
||||
return ans
|
||||
|
||||
def utf8_text(text):
|
||||
'''
|
||||
Convert a possibly null string to utf-8 bytes, guaranteeing to return a non
|
||||
empty, normalized bytestring.
|
||||
'''
|
||||
if text and text.strip():
|
||||
text = text.strip()
|
||||
if not isinstance(text, unicode):
|
||||
text = text.decode('utf-8', 'replace')
|
||||
text = normalize(text).encode('utf-8')
|
||||
else:
|
||||
text = _('Unknown').encode('utf-8')
|
||||
return text
|
||||
|
||||
def align_block(raw, multiple=4, pad=b'\0'):
|
||||
'''
|
||||
Return raw with enough pad bytes append to ensure its length is a multiple
|
||||
of 4.
|
||||
'''
|
||||
extra = len(raw) % multiple
|
||||
if extra == 0: return raw
|
||||
return raw + pad*(multiple - extra)
|
||||
|
||||
|
||||
def detect_periodical(toc, log=None):
|
||||
'''
|
||||
Detect if the TOC object toc contains a periodical that conforms to the
|
||||
structure required by kindlegen to generate a periodical.
|
||||
'''
|
||||
for node in toc.iterdescendants():
|
||||
if node.depth() == 1 and node.klass != 'article':
|
||||
if log is not None:
|
||||
log.debug(
|
||||
'Not a periodical: Deepest node does not have '
|
||||
'class="article"')
|
||||
return False
|
||||
if node.depth() == 2 and node.klass != 'section':
|
||||
if log is not None:
|
||||
log.debug(
|
||||
'Not a periodical: Second deepest node does not have'
|
||||
' class="section"')
|
||||
return False
|
||||
if node.depth() == 3 and node.klass != 'periodical':
|
||||
if log is not None:
|
||||
log.debug('Not a periodical: Third deepest node'
|
||||
' does not have class="periodical"')
|
||||
return False
|
||||
if node.depth() > 3:
|
||||
if log is not None:
|
||||
log.debug('Not a periodical: Has nodes of depth > 3')
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
|
@ -10,35 +10,12 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import pack
|
||||
from cStringIO import StringIO
|
||||
from collections import OrderedDict
|
||||
from collections import OrderedDict, defaultdict
|
||||
|
||||
from calibre.ebooks import normalize
|
||||
from calibre.ebook.mobi.writer2 import RECORD_SIZE
|
||||
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex)
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.ebooks.mobi.writer2 import RECORD_SIZE
|
||||
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
|
||||
encode_tbs, align_block, utf8_text, detect_periodical)
|
||||
|
||||
def utf8_text(text):
|
||||
'''
|
||||
Convert a possibly null string to utf-8 bytes, guaranteeing to return a non
|
||||
empty, normalized bytestring.
|
||||
'''
|
||||
if text and text.strip():
|
||||
text = text.strip()
|
||||
if not isinstance(text, unicode):
|
||||
text = text.decode('utf-8', 'replace')
|
||||
text = normalize(text).encode('utf-8')
|
||||
else:
|
||||
text = _('Unknown').encode('utf-8')
|
||||
return text
|
||||
|
||||
def align_block(raw, multiple=4, pad=b'\0'):
|
||||
'''
|
||||
Return raw with enough pad bytes append to ensure its length is a multiple
|
||||
of 4.
|
||||
'''
|
||||
extra = len(raw) % multiple
|
||||
if extra == 0: return raw
|
||||
return raw + pad*(multiple - extra)
|
||||
|
||||
class CNCX(object): # {{{
|
||||
|
||||
@ -50,13 +27,12 @@ class CNCX(object): # {{{
|
||||
|
||||
MAX_STRING_LENGTH = 500
|
||||
|
||||
def __init__(self, toc, opts):
|
||||
def __init__(self, toc, is_periodical):
|
||||
self.strings = OrderedDict()
|
||||
|
||||
for item in toc:
|
||||
if item is self.toc: continue
|
||||
for item in toc.iterdescendants(breadth_first=True):
|
||||
self.strings[item.title] = 0
|
||||
if opts.mobi_periodical:
|
||||
if is_periodical:
|
||||
self.strings[item.klass] = 0
|
||||
|
||||
self.records = []
|
||||
@ -75,11 +51,10 @@ class CNCX(object): # {{{
|
||||
self.records.append(buf.getvalue())
|
||||
buf.truncate(0)
|
||||
offset = len(self.records) * 0x10000
|
||||
|
||||
buf.write(raw)
|
||||
self.strings[key] = offset
|
||||
offset += len(raw)
|
||||
|
||||
buf.write(b'\0') # CNCX must end with zero byte
|
||||
self.records.append(align_block(buf.getvalue()))
|
||||
|
||||
def __getitem__(self, string):
|
||||
@ -98,7 +73,7 @@ class IndexEntry(object): # {{{
|
||||
'first_child_index': 22,
|
||||
'last_child_index': 23,
|
||||
}
|
||||
RTAG_MAP = dict(TAG_VALUES.itervalues(), TAG_VALUES.iterkeys())
|
||||
RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()}
|
||||
|
||||
BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,]
|
||||
|
||||
@ -113,6 +88,17 @@ class IndexEntry(object): # {{{
|
||||
self.first_child_index = None
|
||||
self.last_child_index = None
|
||||
|
||||
def __repr__(self):
|
||||
return ('IndexEntry(offset=%r, depth=%r, length=%r, index=%r,'
|
||||
' parent_index=%r)')%(self.offset, self.depth, self.length,
|
||||
self.index, self.parent_index)
|
||||
|
||||
@dynamic_property
|
||||
def size(self):
|
||||
def fget(self): return self.length
|
||||
def fset(self, val): self.length = val
|
||||
return property(fget=fget, fset=fset, doc='Alias for length')
|
||||
|
||||
@classmethod
|
||||
def tagx_block(cls, for_periodical=True):
|
||||
buf = bytearray()
|
||||
@ -137,7 +123,7 @@ class IndexEntry(object): # {{{
|
||||
buf.append(1)
|
||||
|
||||
header = b'TAGX'
|
||||
header += pack(b'>I', len(buf)) # table length
|
||||
header += pack(b'>I', 12+len(buf)) # table length
|
||||
header += pack(b'>I', 1) # control byte count
|
||||
|
||||
return header + bytes(buf)
|
||||
@ -159,7 +145,7 @@ class IndexEntry(object): # {{{
|
||||
def entry_type(self):
|
||||
ans = 0
|
||||
for tag in self.tag_nums:
|
||||
ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x
|
||||
ans |= (1 << self.BITMASKS.index(tag)) # 1 << x == 2**x
|
||||
return ans
|
||||
|
||||
@property
|
||||
@ -174,7 +160,7 @@ class IndexEntry(object): # {{{
|
||||
val = getattr(self, attr)
|
||||
buf.write(encint(val))
|
||||
|
||||
ans = buf.get_value()
|
||||
ans = buf.getvalue()
|
||||
return ans
|
||||
|
||||
# }}}
|
||||
@ -186,16 +172,151 @@ class TBS(object): # {{{
|
||||
trailing byte sequence for the record.
|
||||
'''
|
||||
|
||||
def __init__(self, data, is_periodical):
|
||||
def __init__(self, data, is_periodical, first=False, section_map={},
|
||||
after_first=False):
|
||||
self.section_map = section_map
|
||||
#import pprint
|
||||
#pprint.pprint(data)
|
||||
#print()
|
||||
if is_periodical:
|
||||
self.periodical_tbs(data)
|
||||
# The starting bytes.
|
||||
# The value is zero which I think indicates the periodical
|
||||
# index entry. The values for the various flags seem to be
|
||||
# unused. If the 0b100 is present, it means that the record
|
||||
# deals with section 1 (or is the final record with section
|
||||
# transitions).
|
||||
self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3)
|
||||
self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0},
|
||||
flag_size=3)
|
||||
self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0},
|
||||
flag_size=3)
|
||||
self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001:
|
||||
0}, flag_size=3)
|
||||
|
||||
if not data:
|
||||
byts = b''
|
||||
if after_first:
|
||||
# This can happen if a record contains only text between
|
||||
# the periodical start and the first section
|
||||
byts = self.type_011
|
||||
self.bytestring = byts
|
||||
else:
|
||||
depth_map = defaultdict(list)
|
||||
for x in ('starts', 'ends', 'completes'):
|
||||
for idx in data[x]:
|
||||
depth_map[idx.depth].append(idx)
|
||||
for l in depth_map.itervalues():
|
||||
l.sort(key=lambda x:x.offset)
|
||||
self.periodical_tbs(data, first, depth_map)
|
||||
else:
|
||||
self.book_tbs(data)
|
||||
if not data:
|
||||
self.bytestring = b''
|
||||
else:
|
||||
self.book_tbs(data, first)
|
||||
|
||||
def periodical_tbs(self, data):
|
||||
self.bytestring = b''
|
||||
def periodical_tbs(self, data, first, depth_map):
|
||||
buf = StringIO()
|
||||
|
||||
def book_tbs(self, data):
|
||||
has_section_start = (depth_map[1] and
|
||||
set(depth_map[1]).intersection(set(data['starts'])))
|
||||
spanner = data['spans']
|
||||
parent_section_index = -1
|
||||
|
||||
if depth_map[0]:
|
||||
# We have a terminal record
|
||||
|
||||
# Find the first non periodical node
|
||||
first_node = None
|
||||
for nodes in (depth_map[1], depth_map[2]):
|
||||
for node in nodes:
|
||||
if (first_node is None or (node.offset, node.depth) <
|
||||
(first_node.offset, first_node.depth)):
|
||||
first_node = node
|
||||
|
||||
typ = (self.type_110 if has_section_start else self.type_010)
|
||||
|
||||
# parent_section_index is needed for the last record
|
||||
if first_node is not None and first_node.depth > 0:
|
||||
parent_section_index = (first_node.index if first_node.depth
|
||||
== 1 else first_node.parent_index)
|
||||
else:
|
||||
parent_section_index = max(self.section_map.iterkeys())
|
||||
|
||||
else:
|
||||
# Non terminal record
|
||||
|
||||
if spanner is not None:
|
||||
# record is spanned by a single article
|
||||
parent_section_index = spanner.parent_index
|
||||
typ = (self.type_110 if parent_section_index == 1 else
|
||||
self.type_010)
|
||||
elif not depth_map[1]:
|
||||
# has only article nodes, i.e. spanned by a section
|
||||
parent_section_index = depth_map[2][0].parent_index
|
||||
typ = (self.type_111 if parent_section_index == 1 else
|
||||
self.type_010)
|
||||
else:
|
||||
# has section transitions
|
||||
if depth_map[2]:
|
||||
parent_section_index = depth_map[2][0].parent_index
|
||||
else:
|
||||
parent_section_index = depth_map[1][0].index
|
||||
typ = self.type_011
|
||||
|
||||
buf.write(typ)
|
||||
|
||||
if typ not in (self.type_110, self.type_111) and parent_section_index > 0:
|
||||
# Write starting section information
|
||||
if spanner is None:
|
||||
num_articles = len([a for a in depth_map[1] if a.parent_index
|
||||
== parent_section_index])
|
||||
extra = {}
|
||||
if num_articles > 1:
|
||||
extra = {0b0100: num_articles}
|
||||
else:
|
||||
extra = {0b0001: 0}
|
||||
buf.write(encode_tbs(parent_section_index, extra))
|
||||
|
||||
if spanner is None:
|
||||
articles = depth_map[2]
|
||||
sections = set([self.section_map[a.parent_index] for a in
|
||||
articles])
|
||||
sections = sorted(sections, key=lambda x:x.offset)
|
||||
section_map = {s:[a for a in articles if a.parent_index ==
|
||||
s.index] for s in sections}
|
||||
for i, section in enumerate(sections):
|
||||
# All the articles in this record that belong to section
|
||||
articles = section_map[section]
|
||||
first_article = articles[0]
|
||||
last_article = articles[-1]
|
||||
num = len(articles)
|
||||
|
||||
try:
|
||||
next_sec = sections[i+1]
|
||||
except:
|
||||
next_sec = None
|
||||
|
||||
extra = {}
|
||||
if num > 1:
|
||||
extra[0b0100] = num
|
||||
if i == 0 and next_sec is not None:
|
||||
# Write offset to next section from start of record
|
||||
# For some reason kindlegen only writes this offset
|
||||
# for the first section transition. Imitate it.
|
||||
extra[0b0001] = next_sec.offset - data['offset']
|
||||
|
||||
buf.write(encode_tbs(first_article.index-section.index, extra))
|
||||
|
||||
if next_sec is not None:
|
||||
buf.write(encode_tbs(last_article.index-next_sec.index,
|
||||
{0b1000: 0}))
|
||||
else:
|
||||
buf.write(encode_tbs(spanner.index - parent_section_index,
|
||||
{0b0001: 0}))
|
||||
|
||||
self.bytestring = buf.getvalue()
|
||||
|
||||
def book_tbs(self, data, first):
|
||||
self.bytestring = b''
|
||||
# }}}
|
||||
|
||||
@ -211,16 +332,18 @@ class Indexer(object): # {{{
|
||||
self.log = oeb.log
|
||||
self.opts = opts
|
||||
|
||||
self.is_periodical = opts.mobi_periodical
|
||||
self.is_periodical = detect_periodical(self.oeb.toc, self.log)
|
||||
self.log('Generating MOBI index for a %s'%('periodical' if
|
||||
self.is_periodical else 'book'))
|
||||
self.is_flat_periodical = False
|
||||
if opts.mobi_periodical:
|
||||
if self.is_periodical:
|
||||
periodical_node = iter(oeb.toc).next()
|
||||
sections = tuple(periodical_node)
|
||||
self.is_flat_periodical = len(sections) == 1
|
||||
|
||||
self.records = []
|
||||
|
||||
self.cncx = CNCX(oeb.toc, opts)
|
||||
self.cncx = CNCX(oeb.toc, self.is_periodical)
|
||||
|
||||
if self.is_periodical:
|
||||
self.indices = self.create_periodical_index()
|
||||
@ -297,14 +420,13 @@ class Indexer(object): # {{{
|
||||
buf.write(pack(b'>I', 0)) # Filled in later
|
||||
|
||||
# Number of index records 24-28
|
||||
buf.write(pack('b>I', len(self.records)))
|
||||
buf.write(pack(b'>I', len(self.records)))
|
||||
|
||||
# Index Encoding 28-32
|
||||
buf.write(pack(b'>I', 65001)) # utf-8
|
||||
|
||||
# Index language 32-36
|
||||
buf.write(iana2mobi(
|
||||
str(self.oeb.metadata.language[0])))
|
||||
# Unknown 32-36
|
||||
buf.write(b'\xff'*4)
|
||||
|
||||
# Number of index entries 36-40
|
||||
buf.write(pack(b'>I', len(self.indices)))
|
||||
@ -349,7 +471,7 @@ class Indexer(object): # {{{
|
||||
idxt_offset = buf.tell()
|
||||
|
||||
buf.write(b'IDXT')
|
||||
buf.write(header_length + len(tagx_block))
|
||||
buf.write(pack(b'>H', header_length + len(tagx_block)))
|
||||
buf.write(b'\0')
|
||||
buf.seek(20)
|
||||
buf.write(pack(b'>I', idxt_offset))
|
||||
@ -373,12 +495,12 @@ class Indexer(object): # {{{
|
||||
continue
|
||||
seen.add(offset)
|
||||
index = IndexEntry(offset, label)
|
||||
self.indices.append(index)
|
||||
indices.append(index)
|
||||
|
||||
indices.sort(key=lambda x:x.offset)
|
||||
|
||||
# Set lengths
|
||||
for i, index in indices:
|
||||
for i, index in enumerate(indices):
|
||||
try:
|
||||
next_offset = indices[i+1].offset
|
||||
except:
|
||||
@ -389,11 +511,11 @@ class Indexer(object): # {{{
|
||||
indices = [i for i in indices if i.length > 0]
|
||||
|
||||
# Set index values
|
||||
for i, index in indices:
|
||||
for i, index in enumerate(indices):
|
||||
index.index = i
|
||||
|
||||
# Set lengths again to close up any gaps left by filtering
|
||||
for i, index in indices:
|
||||
for i, index in enumerate(indices):
|
||||
try:
|
||||
next_offset = indices[i+1].offset
|
||||
except:
|
||||
@ -459,7 +581,7 @@ class Indexer(object): # {{{
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, normalized_articles = x
|
||||
try:
|
||||
sec.length = normalized_sections[s+1].offset - sec.offset
|
||||
sec.length = normalized_sections[s+1][0].offset - sec.offset
|
||||
except:
|
||||
sec.length = self.serializer.body_end_offset - sec.offset
|
||||
for i, art in enumerate(normalized_articles):
|
||||
@ -475,17 +597,18 @@ class Indexer(object): # {{{
|
||||
normalized_articles))
|
||||
normalized_sections[i] = (sec, normalized_articles)
|
||||
|
||||
normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1],
|
||||
normalized_sections = list(filter(lambda x: x[0].length > 0 and x[1],
|
||||
normalized_sections))
|
||||
|
||||
# Set indices
|
||||
i = 0
|
||||
for sec, normalized_articles in normalized_sections:
|
||||
for sec, articles in normalized_sections:
|
||||
i += 1
|
||||
sec.index = i
|
||||
sec.parent_index = 0
|
||||
|
||||
for sec, normalized_articles in normalized_sections:
|
||||
for art in normalized_articles:
|
||||
for sec, articles in normalized_sections:
|
||||
for art in articles:
|
||||
i += 1
|
||||
art.index = i
|
||||
art.parent_index = sec.index
|
||||
@ -498,7 +621,7 @@ class Indexer(object): # {{{
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, articles = x
|
||||
try:
|
||||
next_offset = normalized_sections[s+1].offset
|
||||
next_offset = normalized_sections[s+1][0].offset
|
||||
except:
|
||||
next_offset = self.serializer.body_end_offset
|
||||
sec.length = next_offset - sec.offset
|
||||
@ -514,7 +637,7 @@ class Indexer(object): # {{{
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, articles = x
|
||||
try:
|
||||
next_sec = normalized_sections[s+1]
|
||||
next_sec = normalized_sections[s+1][0]
|
||||
except:
|
||||
if (sec.length == 0 or sec.next_offset !=
|
||||
self.serializer.body_end_offset):
|
||||
@ -546,17 +669,29 @@ class Indexer(object): # {{{
|
||||
return indices
|
||||
# }}}
|
||||
|
||||
# TBS {{{
|
||||
def calculate_trailing_byte_sequences(self):
|
||||
self.tbs_map = {}
|
||||
found_node = False
|
||||
sections = [i for i in self.indices if i.depth == 1]
|
||||
section_map = OrderedDict((i.index, i) for i in
|
||||
sorted(sections, key=lambda x:x.offset))
|
||||
|
||||
deepest = max(i.depth for i in self.indices)
|
||||
|
||||
for i in xrange(self.number_of_text_records):
|
||||
offset = i * RECORD_SIZE
|
||||
next_offset = offset + RECORD_SIZE
|
||||
data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]),
|
||||
('spans', None)])
|
||||
data = {'ends':[], 'completes':[], 'starts':[],
|
||||
'spans':None, 'offset':offset, 'record_number':i+1}
|
||||
|
||||
for index in self.indices:
|
||||
if index.offset >= next_offset:
|
||||
# Node starts after current record
|
||||
break
|
||||
if index.depth == deepest:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
if index.next_offset <= offset:
|
||||
# Node ends before current record
|
||||
continue
|
||||
@ -572,12 +707,21 @@ class Indexer(object): # {{{
|
||||
if index.next_offset <= next_offset:
|
||||
# Node ends in current record
|
||||
data['ends'].append(index)
|
||||
else:
|
||||
elif index.depth == deepest:
|
||||
data['spans'] = index
|
||||
self.tbs_map[i+1] = TBS(data, self.is_periodical)
|
||||
|
||||
if (data['ends'] or data['completes'] or data['starts'] or
|
||||
data['spans'] is not None):
|
||||
self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not
|
||||
found_node, section_map=section_map)
|
||||
found_node = True
|
||||
else:
|
||||
self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False,
|
||||
after_first=found_node, section_map=section_map)
|
||||
|
||||
def get_trailing_byte_sequence(self, num):
|
||||
return self.tbs_map[num].bytestring
|
||||
# }}}
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -29,7 +29,6 @@ EXTH_CODES = {
|
||||
'identifier': 104,
|
||||
'subject': 105,
|
||||
'pubdate': 106,
|
||||
'date': 106,
|
||||
'review': 107,
|
||||
'contributor': 108,
|
||||
'rights': 109,
|
||||
@ -55,6 +54,7 @@ class MobiWriter(object):
|
||||
self.last_text_record_idx = 1
|
||||
|
||||
def __call__(self, oeb, path_or_stream):
|
||||
self.log = oeb.log
|
||||
if hasattr(path_or_stream, 'write'):
|
||||
return self.dump_stream(oeb, path_or_stream)
|
||||
with open(path_or_stream, 'w+b') as stream:
|
||||
@ -90,6 +90,7 @@ class MobiWriter(object):
|
||||
self.primary_index_record_idx = None
|
||||
try:
|
||||
self.indexer = Indexer(self.serializer, self.last_text_record_idx,
|
||||
len(self.records[self.last_text_record_idx]),
|
||||
self.opts, self.oeb)
|
||||
except:
|
||||
self.log.exception('Failed to generate MOBI index:')
|
||||
@ -98,9 +99,13 @@ class MobiWriter(object):
|
||||
for i in xrange(len(self.records)):
|
||||
if i == 0: continue
|
||||
tbs = self.indexer.get_trailing_byte_sequence(i)
|
||||
self.records[i] += tbs
|
||||
self.records[i] += encode_trailing_data(tbs)
|
||||
self.records.extend(self.indexer.records)
|
||||
|
||||
@property
|
||||
def is_periodical(self):
|
||||
return (self.primary_index_record_idx is None or not
|
||||
self.indexer.is_periodical)
|
||||
|
||||
# }}}
|
||||
|
||||
@ -193,7 +198,6 @@ class MobiWriter(object):
|
||||
self.serializer = Serializer(self.oeb, self.images,
|
||||
write_page_breaks_after_item=self.write_page_breaks_after_item)
|
||||
text = self.serializer()
|
||||
self.content_length = len(text)
|
||||
self.text_length = len(text)
|
||||
text = StringIO(text)
|
||||
nrecords = 0
|
||||
@ -201,21 +205,16 @@ class MobiWriter(object):
|
||||
if self.compression != UNCOMPRESSED:
|
||||
self.oeb.logger.info(' Compressing markup content...')
|
||||
|
||||
data, overlap = self.read_text_record(text)
|
||||
|
||||
while len(data) > 0:
|
||||
while text.tell() < self.text_length:
|
||||
data, overlap = self.read_text_record(text)
|
||||
if self.compression == PALMDOC:
|
||||
data = compress_doc(data)
|
||||
record = StringIO()
|
||||
record.write(data)
|
||||
|
||||
self.records.append(record.getvalue())
|
||||
data += overlap
|
||||
data += pack(b'>B', len(overlap))
|
||||
|
||||
self.records.append(data)
|
||||
nrecords += 1
|
||||
data, overlap = self.read_text_record(text)
|
||||
|
||||
# Write information about the mutibyte character overlap, if any
|
||||
record.write(overlap)
|
||||
record.write(pack(b'>B', len(overlap)))
|
||||
|
||||
self.last_text_record_idx = nrecords
|
||||
|
||||
@ -276,8 +275,19 @@ class MobiWriter(object):
|
||||
exth = self.build_exth()
|
||||
last_content_record = len(self.records) - 1
|
||||
|
||||
# FCIS/FLIS (Seem to server no purpose)
|
||||
flis_number = len(self.records)
|
||||
self.records.append(
|
||||
b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+
|
||||
b'\xff'*4)
|
||||
fcis = b'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
|
||||
fcis += pack(b'>I', self.text_length)
|
||||
fcis += b'\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
|
||||
fcis_number = len(self.records)
|
||||
self.records.append(fcis)
|
||||
|
||||
# EOF record
|
||||
self.records.append('\xE9\x8E\x0D\x0A')
|
||||
self.records.append(b'\xE9\x8E\x0D\x0A')
|
||||
|
||||
record0 = StringIO()
|
||||
# The MOBI Header
|
||||
@ -307,8 +317,15 @@ class MobiWriter(object):
|
||||
# 0x10 - 0x13 : UID
|
||||
# 0x14 - 0x17 : Generator version
|
||||
|
||||
bt = 0x002
|
||||
if self.primary_index_record_idx is not None:
|
||||
if self.indexer.is_flat_periodical:
|
||||
bt = 0x102
|
||||
elif self.indexer.is_periodical:
|
||||
bt = 0x103
|
||||
|
||||
record0.write(pack(b'>IIIII',
|
||||
0xe8, 0x002, 65001, uid, 6))
|
||||
0xe8, bt, 65001, uid, 6))
|
||||
|
||||
# 0x18 - 0x1f : Unknown
|
||||
record0.write(b'\xff' * 8)
|
||||
@ -337,7 +354,8 @@ class MobiWriter(object):
|
||||
# 0x58 - 0x5b : Format version
|
||||
# 0x5c - 0x5f : First image record number
|
||||
record0.write(pack(b'>II',
|
||||
6, self.first_image_record if self.first_image_record else 0))
|
||||
6, self.first_image_record if self.first_image_record else
|
||||
len(self.records)-1))
|
||||
|
||||
# 0x60 - 0x63 : First HUFF/CDIC record number
|
||||
# 0x64 - 0x67 : Number of HUFF/CDIC records
|
||||
@ -346,7 +364,12 @@ class MobiWriter(object):
|
||||
record0.write(b'\0' * 16)
|
||||
|
||||
# 0x70 - 0x73 : EXTH flags
|
||||
record0.write(pack(b'>I', 0x50))
|
||||
# Bit 6 (0b1000000) being set indicates the presence of an EXTH header
|
||||
# The purpose of the other bits is unknown
|
||||
exth_flags = 0b1011000
|
||||
if self.is_periodical:
|
||||
exth_flags |= 0b1000
|
||||
record0.write(pack(b'>I', exth_flags))
|
||||
|
||||
# 0x74 - 0x93 : Unknown
|
||||
record0.write(b'\0' * 32)
|
||||
@ -371,13 +394,13 @@ class MobiWriter(object):
|
||||
record0.write(b'\0\0\0\x01')
|
||||
|
||||
# 0xb8 - 0xbb : FCIS record number
|
||||
record0.write(pack(b'>I', 0xffffffff))
|
||||
record0.write(pack(b'>I', fcis_number))
|
||||
|
||||
# 0xbc - 0xbf : Unknown (FCIS record count?)
|
||||
record0.write(pack(b'>I', 0xffffffff))
|
||||
record0.write(pack(b'>I', 1))
|
||||
|
||||
# 0xc0 - 0xc3 : FLIS record number
|
||||
record0.write(pack(b'>I', 0xffffffff))
|
||||
record0.write(pack(b'>I', flis_number))
|
||||
|
||||
# 0xc4 - 0xc7 : Unknown (FLIS record count?)
|
||||
record0.write(pack(b'>I', 1))
|
||||
@ -469,25 +492,33 @@ class MobiWriter(object):
|
||||
nrecs += 1
|
||||
|
||||
# Write cdetype
|
||||
if not self.opts.mobi_periodical:
|
||||
if self.is_periodical:
|
||||
data = b'EBOK'
|
||||
exth.write(pack(b'>II', 501, len(data)+8))
|
||||
exth.write(data)
|
||||
nrecs += 1
|
||||
|
||||
# Add a publication date entry
|
||||
if oeb.metadata['date'] != [] :
|
||||
if oeb.metadata['date']:
|
||||
datestr = str(oeb.metadata['date'][0])
|
||||
elif oeb.metadata['timestamp'] != [] :
|
||||
elif oeb.metadata['timestamp']:
|
||||
datestr = str(oeb.metadata['timestamp'][0])
|
||||
|
||||
if datestr is not None:
|
||||
datestr = bytes(datestr)
|
||||
datestr = datestr.replace(b'+00:00', b'Z')
|
||||
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
||||
exth.write(datestr)
|
||||
nrecs += 1
|
||||
else:
|
||||
raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
|
||||
|
||||
# Write the same creator info as kindlegen 1.2
|
||||
for code, val in [(204, 202), (205, 1), (206, 2), (207, 33307)]:
|
||||
exth.write(pack(b'>II', code, 12))
|
||||
exth.write(pack(b'>I', val))
|
||||
nrecs += 1
|
||||
|
||||
if (oeb.metadata.cover and
|
||||
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
|
||||
id = unicode(oeb.metadata.cover[0])
|
||||
|
@ -1680,11 +1680,18 @@ class TOC(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
def iterdescendants(self):
|
||||
def iterdescendants(self, breadth_first=False):
|
||||
"""Iterate over all descendant nodes in depth-first order."""
|
||||
for child in self.nodes:
|
||||
for node in child.iter():
|
||||
yield node
|
||||
if breadth_first:
|
||||
for child in self.nodes:
|
||||
yield child
|
||||
for child in self.nodes:
|
||||
for node in child.iterdescendants(breadth_first=True):
|
||||
yield node
|
||||
else:
|
||||
for child in self.nodes:
|
||||
for node in child.iter():
|
||||
yield node
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate over all immediate child nodes."""
|
||||
|
@ -165,6 +165,7 @@ class PDFWriter(QObject): # {{{
|
||||
printer = get_pdf_printer(self.opts)
|
||||
printer.setOutputFileName(item_path)
|
||||
self.view.print_(printer)
|
||||
printer.abort()
|
||||
self._render_book()
|
||||
|
||||
def _delete_tmpdir(self):
|
||||
@ -186,6 +187,7 @@ class PDFWriter(QObject): # {{{
|
||||
draw_image_page(printer, painter, p,
|
||||
preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
|
||||
painter.end()
|
||||
printer.abort()
|
||||
|
||||
|
||||
def _write(self):
|
||||
|
@ -24,7 +24,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
url = 'http://ad.zanox.com/ppc/?18817073C15644254T'
|
||||
url_details = ('http://ad.zanox.com/ppc/?18845780C1371495675T&ULP=[['
|
||||
url_details = ('http://ad.zanox.com/ppc/?18848208C1197627693T&ULP=[['
|
||||
'http://www.libri.de/shop/action/productDetails?artiId={0}]]')
|
||||
|
||||
if external or self.config.get('open_external', False):
|
||||
|
@ -1892,7 +1892,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
yield r[iindex]
|
||||
|
||||
def get_next_series_num_for(self, series):
|
||||
series_id = self.conn.get('SELECT id from series WHERE name=?',
|
||||
series_id = None
|
||||
if series:
|
||||
series_id = self.conn.get('SELECT id from series WHERE name=?',
|
||||
(series,), all=False)
|
||||
if series_id is None:
|
||||
if isinstance(tweaks['series_index_auto_increment'], (int, float)):
|
||||
@ -3023,8 +3025,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
stream.seek(0)
|
||||
mi = get_metadata(stream, format, use_libprs_metadata=False)
|
||||
stream.seek(0)
|
||||
if not mi.series_index:
|
||||
mi.series_index = 1.0
|
||||
if mi.series_index is None:
|
||||
mi.series_index = self.get_next_series_num_for(mi.series)
|
||||
mi.tags = [_('News')]
|
||||
if arg['add_title_tag']:
|
||||
mi.tags += [arg['title']]
|
||||
@ -3076,7 +3078,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
self._add_newbook_tag(mi)
|
||||
if not add_duplicates and self.has_book(mi):
|
||||
return None
|
||||
series_index = 1.0 if mi.series_index is None else mi.series_index
|
||||
series_index = self.get_next_series_num_for(mi.series) \
|
||||
if mi.series_index is None else mi.series_index
|
||||
aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors)
|
||||
title = mi.title
|
||||
if isbytestring(aus):
|
||||
@ -3123,7 +3126,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
if not add_duplicates and self.has_book(mi):
|
||||
duplicates.append((path, format, mi))
|
||||
continue
|
||||
series_index = 1.0 if mi.series_index is None else mi.series_index
|
||||
series_index = self.get_next_series_num_for(mi.series) \
|
||||
if mi.series_index is None else mi.series_index
|
||||
aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors)
|
||||
title = mi.title
|
||||
if isinstance(aus, str):
|
||||
@ -3157,7 +3161,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
def import_book(self, mi, formats, notify=True, import_hooks=True,
|
||||
apply_import_tags=True, preserve_uuid=False):
|
||||
series_index = 1.0 if mi.series_index is None else mi.series_index
|
||||
series_index = self.get_next_series_num_for(mi.series) \
|
||||
if mi.series_index is None else mi.series_index
|
||||
if apply_import_tags:
|
||||
self._add_newbook_tag(mi)
|
||||
if not mi.title:
|
||||
|
Loading…
x
Reference in New Issue
Block a user