This commit is contained in:
Alex Stanev 2011-07-27 17:09:50 +03:00
commit a224be9eef
15 changed files with 693 additions and 292 deletions

View File

@ -1,4 +1,4 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns" __copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
''' '''
irishtimes.com irishtimes.com
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class IrishTimes(BasicNewsRecipe): class IrishTimes(BasicNewsRecipe):
title = u'The Irish Times' title = u'The Irish Times'
encoding = 'ISO-8859-15' encoding = 'ISO-8859-15'
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns" __author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
language = 'en_IE' language = 'en_IE'
timefmt = ' (%A, %B %d, %Y)' timefmt = ' (%A, %B %d, %Y)'
@ -18,6 +18,7 @@ class IrishTimes(BasicNewsRecipe):
oldest_article = 1.0 oldest_article = 1.0
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
simultaneous_downloads= 5
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*') r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
remove_tags = [dict(name='div', attrs={'class':'footer'})] remove_tags = [dict(name='div', attrs={'class':'footer'})]
@ -25,17 +26,17 @@ class IrishTimes(BasicNewsRecipe):
feeds = [ feeds = [
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'), ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
('Ireland', 'http://rss.feedsportal.com/c/851/f/10845/index.rss'), ('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
('World', 'http://rss.feedsportal.com/c/851/f/10846/index.rss'), ('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
('Finance', 'http://rss.feedsportal.com/c/851/f/10847/index.rss'), ('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
('Features', 'http://rss.feedsportal.com/c/851/f/10848/index.rss'), ('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
('Sport', 'http://rss.feedsportal.com/c/851/f/10849/index.rss'), ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
('Opinion', 'http://rss.feedsportal.com/c/851/f/10850/index.rss'), ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
('Letters', 'http://rss.feedsportal.com/c/851/f/10851/index.rss'), ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'), ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
('Health', 'http://rss.feedsportal.com/c/851/f/10852/index.rss'), ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
('Education & Parenting', 'http://rss.feedsportal.com/c/851/f/10853/index.rss'), ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
('Motors', 'http://rss.feedsportal.com/c/851/f/10854/index.rss'), ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'), ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'), ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'), ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
@ -49,10 +50,16 @@ class IrishTimes(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
if url.count('rss.feedsportal.com'): if url.count('rss.feedsportal.com'):
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm') #u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
u = url.find('irishtimes')
u = 'http://www.irishtimes.com' + url[u + 12:]
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace('0Bhtml/story01.htm', '_pf.html')
else: else:
u = url.replace('.html','_pf.html') u = url.replace('.html','_pf.html')
return u return u
def get_article_url(self, article): def get_article_url(self, article):
return article.link return article.link

View File

@ -11,7 +11,7 @@ defaults.
''' '''
#: Auto increment series index #: Auto increment series index
# The algorithm used to assign a new book in an existing series a series number. # The algorithm used to assign a book added to an existing series a series number.
# New series numbers assigned using this tweak are always integer values, except # New series numbers assigned using this tweak are always integer values, except
# if a constant non-integer is specified. # if a constant non-integer is specified.
# Possible values are: # Possible values are:
@ -27,7 +27,19 @@ defaults.
# series_index_auto_increment = 'next' # series_index_auto_increment = 'next'
# series_index_auto_increment = 'next_free' # series_index_auto_increment = 'next_free'
# series_index_auto_increment = 16.5 # series_index_auto_increment = 16.5
#
# Set the use_series_auto_increment_tweak_when_importing tweak to True to
# use the above values when importing/adding books. If this tweak is set to
# False (the default) then the series number will be set to 1 if it is not
# explicitly set to during the import. If set to True, then the
# series index will be set according to the series_index_auto_increment setting.
# Note that the use_series_auto_increment_tweak_when_importing tweak is used
# only when a value is not provided during import. If the importing regular
# expression produces a value for series_index, or if you are reading metadata
# from books and the import plugin produces a value, than that value will
# be used irrespective of the setting of the tweak.
series_index_auto_increment = 'next' series_index_auto_increment = 'next'
use_series_auto_increment_tweak_when_importing = False
#: Add separator after completing an author name #: Add separator after completing an author name
# Should the completion separator be append # Should the completion separator be append

View File

@ -570,7 +570,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
from calibre.devices.sne.driver import SNE from calibre.devices.sne.driver import SNE
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL, from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK) TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY)
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK from calibre.devices.bambook.driver import BAMBOOK
@ -705,7 +705,7 @@ plugins += [
EEEREADER, EEEREADER,
NEXTBOOK, NEXTBOOK,
ADAM, ADAM,
MOOVYBOOK, MOOVYBOOK, COBY,
ITUNES, ITUNES,
BOEYE_BEX, BOEYE_BEX,
BOEYE_BDX, BOEYE_BDX,

View File

@ -351,3 +351,29 @@ class MOOVYBOOK(USBMS):
def get_main_ebook_dir(self, for_upload=False): def get_main_ebook_dir(self, for_upload=False):
return 'Books' if for_upload else self.EBOOK_DIR_MAIN return 'Books' if for_upload else self.EBOOK_DIR_MAIN
class COBY(USBMS):
name = 'COBY MP977 device interface'
gui_name = 'COBY'
description = _('Communicate with the COBY')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']
# Ordered list of supported formats
FORMATS = ['epub', 'pdf']
VENDOR_ID = [0x1e74]
PRODUCT_ID = [0x7121]
BCD = [0x02]
VENDOR_NAME = 'USB_2.0'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MP977_DRIVER'
EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = False
def get_carda_ebook_dir(self, for_upload=False):
if for_upload:
return 'eBooks'
return self.EBOOK_DIR_CARD_A

View File

@ -22,6 +22,7 @@ from calibre.utils.date import parse_date, isoformat
from calibre.utils.localization import get_lang from calibre.utils.localization import get_lang
from calibre import prints, guess_type from calibre import prints, guess_type
from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.config import tweaks
class Resource(object): # {{{ class Resource(object): # {{{
''' '''
@ -527,7 +528,12 @@ class OPF(object): # {{{
category = MetadataField('type') category = MetadataField('type')
rights = MetadataField('rights') rights = MetadataField('rights')
series = MetadataField('series', is_dc=False) series = MetadataField('series', is_dc=False)
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1) if tweaks['use_series_auto_increment_tweak_when_importing']:
series_index = MetadataField('series_index', is_dc=False,
formatter=float, none_is=None)
else:
series_index = MetadataField('series_index', is_dc=False,
formatter=float, none_is=1)
title_sort = TitleSortField('title_sort', is_dc=False) title_sort = TitleSortField('title_sort', is_dc=False)
rating = MetadataField('rating', is_dc=False, formatter=int) rating = MetadataField('rating', is_dc=False, formatter=int)
pubdate = MetadataField('date', formatter=parse_date, pubdate = MetadataField('date', formatter=parse_date,
@ -1024,8 +1030,10 @@ class OPF(object): # {{{
attrib = attrib or {} attrib = attrib or {}
attrib['name'] = 'calibre:' + name attrib['name'] = 'calibre:' + name
name = '{%s}%s' % (self.NAMESPACES['opf'], 'meta') name = '{%s}%s' % (self.NAMESPACES['opf'], 'meta')
nsmap = dict(self.NAMESPACES)
del nsmap['opf']
elem = etree.SubElement(self.metadata, name, attrib=attrib, elem = etree.SubElement(self.metadata, name, attrib=attrib,
nsmap=self.NAMESPACES) nsmap=nsmap)
elem.tail = '\n' elem.tail = '\n'
return elem return elem

View File

@ -22,6 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import utc_tz, as_utc from calibre.utils.date import utc_tz, as_utc
from calibre.utils.html2text import html2text from calibre.utils.html2text import html2text
from calibre.utils.icu import lower from calibre.utils.icu import lower
from calibre.utils.date import UNDEFINED_DATE
# Download worker {{{ # Download worker {{{
class Worker(Thread): class Worker(Thread):
@ -490,6 +491,8 @@ def identify(log, abort, # {{{
max_tags = msprefs['max_tags'] max_tags = msprefs['max_tags']
for r in results: for r in results:
r.tags = r.tags[:max_tags] r.tags = r.tags[:max_tags]
if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year:
r.pubdate = None
if msprefs['swap_author_names']: if msprefs['swap_author_names']:
for r in results: for r in results:

View File

@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict
from calibre.utils.date import utc_tz from calibre.utils.date import utc_tz
from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.mobi.utils import (decode_hex_number, decint, from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
get_trailing_data, decode_fvwi) get_trailing_data, decode_tbs)
from calibre.utils.magick.draw import identify_data from calibre.utils.magick.draw import identify_data
# PalmDB {{{ # PalmDB {{{
@ -73,7 +73,7 @@ class PalmDB(object):
self.ident = self.type + self.creator self.ident = self.type + self.creator
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'): if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
raise ValueError('Unknown book ident: %r'%self.ident) raise ValueError('Unknown book ident: %r'%self.ident)
self.uid_seed = self.raw[68:72] self.uid_seed, = struct.unpack(b'>I', self.raw[68:72])
self.next_rec_list_id = self.raw[72:76] self.next_rec_list_id = self.raw[72:76]
self.number_of_records, = struct.unpack(b'>H', self.raw[76:78]) self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
@ -182,6 +182,7 @@ class EXTHHeader(object):
self.records = [] self.records = []
for i in xrange(self.count): for i in xrange(self.count):
pos = self.read_record(pos) pos = self.read_record(pos)
self.records.sort(key=lambda x:x.type)
def read_record(self, pos): def read_record(self, pos):
type_, length = struct.unpack(b'>II', self.raw[pos:pos+8]) type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
@ -290,7 +291,12 @@ class MOBIHeader(object): # {{{
(self.fcis_number, self.fcis_count, self.flis_number, (self.fcis_number, self.fcis_count, self.flis_number,
self.flis_count) = struct.unpack(b'>IIII', self.flis_count) = struct.unpack(b'>IIII',
self.raw[200:216]) self.raw[200:216])
self.unknown6 = self.raw[216:240] self.unknown6 = self.raw[216:224]
self.srcs_record_index = struct.unpack(b'>I',
self.raw[224:228])[0]
self.num_srcs_records = struct.unpack(b'>I',
self.raw[228:232])[0]
self.unknown7 = self.raw[232:240]
self.extra_data_flags = struct.unpack(b'>I', self.extra_data_flags = struct.unpack(b'>I',
self.raw[240:244])[0] self.raw[240:244])[0]
self.has_multibytes = bool(self.extra_data_flags & 0b1) self.has_multibytes = bool(self.extra_data_flags & 0b1)
@ -339,7 +345,7 @@ class MOBIHeader(object): # {{{
ans.append('Huffman record offset: %d'%self.huffman_record_offset) ans.append('Huffman record offset: %d'%self.huffman_record_offset)
ans.append('Huffman record count: %d'%self.huffman_record_count) ans.append('Huffman record count: %d'%self.huffman_record_count)
ans.append('Unknown2: %r'%self.unknown2) ans.append('Unknown2: %r'%self.unknown2)
ans.append('EXTH flags: %r (%s)'%(self.exth_flags, self.has_exth)) ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
if self.has_drm_data: if self.has_drm_data:
ans.append('Unknown3: %r'%self.unknown3) ans.append('Unknown3: %r'%self.unknown3)
ans.append('DRM Offset: %s'%self.drm_offset) ans.append('DRM Offset: %s'%self.drm_offset)
@ -356,6 +362,9 @@ class MOBIHeader(object): # {{{
ans.append('FLIS number: %d'% self.flis_number) ans.append('FLIS number: %d'% self.flis_number)
ans.append('FLIS count: %d'% self.flis_count) ans.append('FLIS count: %d'% self.flis_count)
ans.append('Unknown6: %r'% self.unknown6) ans.append('Unknown6: %r'% self.unknown6)
ans.append('SRCS record index: %d'%self.srcs_record_index)
ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
ans.append('Unknown7: %r'%self.unknown7)
ans.append(('Extra data flags: %s (has multibyte: %s) ' ans.append(('Extra data flags: %s (has multibyte: %s) '
'(has indexing: %s) (has uncrossable breaks: %s)')%( '(has indexing: %s) (has uncrossable breaks: %s)')%(
bin(self.extra_data_flags), self.has_multibytes, bin(self.extra_data_flags), self.has_multibytes,
@ -416,12 +425,7 @@ class IndexHeader(object): # {{{
if self.index_encoding == 'unknown': if self.index_encoding == 'unknown':
raise ValueError( raise ValueError(
'Unknown index encoding: %d'%self.index_encoding_num) 'Unknown index encoding: %d'%self.index_encoding_num)
self.locale_raw, = struct.unpack(b'>I', raw[32:36]) self.possibly_language = raw[32:36]
langcode = self.locale_raw
langid = langcode & 0xFF
sublangid = (langcode >> 10) & 0xFF
self.language = main_language.get(langid, 'ENGLISH')
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
self.num_index_entries, = struct.unpack('>I', raw[36:40]) self.num_index_entries, = struct.unpack('>I', raw[36:40])
self.ordt_start, = struct.unpack('>I', raw[40:44]) self.ordt_start, = struct.unpack('>I', raw[40:44])
self.ligt_start, = struct.unpack('>I', raw[44:48]) self.ligt_start, = struct.unpack('>I', raw[44:48])
@ -481,8 +485,7 @@ class IndexHeader(object): # {{{
a('Number of index records: %d'%self.index_count) a('Number of index records: %d'%self.index_count)
a('Index encoding: %s (%d)'%(self.index_encoding, a('Index encoding: %s (%d)'%(self.index_encoding,
self.index_encoding_num)) self.index_encoding_num))
a('Index language: %s - %s (%s)'%(self.language, self.sublanguage, a('Unknown (possibly language?): %r'%(self.possibly_language))
hex(self.locale_raw)))
a('Number of index entries: %d'% self.num_index_entries) a('Number of index entries: %d'% self.num_index_entries)
a('ORDT start: %d'%self.ordt_start) a('ORDT start: %d'%self.ordt_start)
a('LIGT start: %d'%self.ligt_start) a('LIGT start: %d'%self.ligt_start)
@ -602,6 +605,9 @@ class IndexEntry(object): # {{{
self.raw = raw self.raw = raw
self.tags = [] self.tags = []
self.entry_type_raw = entry_type self.entry_type_raw = entry_type
self.byte_size = len(raw)
orig_raw = raw
try: try:
self.entry_type = self.TYPES[entry_type] self.entry_type = self.TYPES[entry_type]
@ -639,8 +645,8 @@ class IndexEntry(object): # {{{
self.tags.append(Tag(aut_tag[0], [val], self.entry_type, self.tags.append(Tag(aut_tag[0], [val], self.entry_type,
cncx)) cncx))
if raw.replace(b'\x00', b''): # There can be padding null bytes self.consumed = len(orig_raw) - len(raw)
raise ValueError('Extra bytes in INDX table entry %d: %r'%(self.index, raw)) self.trailing_bytes = raw
@property @property
def label(self): def label(self):
@ -692,13 +698,16 @@ class IndexEntry(object): # {{{
return -1 return -1
def __str__(self): def __str__(self):
ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d)'%( ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d, byte_size=%d)'%(
self.index, self.entry_type, bin(self.entry_type_raw)[2:], len(self.tags))] self.index, self.entry_type, bin(self.entry_type_raw)[2:],
len(self.tags), self.byte_size)]
for tag in self.tags: for tag in self.tags:
ans.append('\t'+str(tag)) ans.append('\t'+str(tag))
if self.first_child_index != -1: if self.first_child_index != -1:
ans.append('\tNumber of children: %d'%(self.last_child_index - ans.append('\tNumber of children: %d'%(self.last_child_index -
self.first_child_index + 1)) self.first_child_index + 1))
if self.trailing_bytes:
ans.append('\tTrailing bytes: %r'%self.trailing_bytes)
return '\n'.join(ans) return '\n'.join(ans)
# }}} # }}}
@ -742,6 +751,7 @@ class IndexRecord(object): # {{{
raise ValueError('Extra bytes after IDXT table: %r'%rest) raise ValueError('Extra bytes after IDXT table: %r'%rest)
indxt = raw[192:self.idxt_offset] indxt = raw[192:self.idxt_offset]
self.size_of_indxt_block = len(indxt)
self.indices = [] self.indices = []
for i, off in enumerate(self.index_offsets): for i, off in enumerate(self.index_offsets):
try: try:
@ -754,10 +764,14 @@ class IndexRecord(object): # {{{
if index_header.index_type == 6: if index_header.index_type == 6:
flags = ord(indxt[off+consumed+d]) flags = ord(indxt[off+consumed+d])
d += 1 d += 1
pos = off+consumed+d
self.indices.append(IndexEntry(index, entry_type, self.indices.append(IndexEntry(index, entry_type,
indxt[off+consumed+d:next_off], cncx, indxt[pos:next_off], cncx,
index_header.tagx_entries, flags=flags)) index_header.tagx_entries, flags=flags))
index = self.indices[-1]
rest = indxt[pos+self.indices[-1].consumed:]
if rest.replace(b'\0', ''): # There can be padding null bytes
raise ValueError('Extra bytes after IDXT table: %r'%rest)
def get_parent(self, index): def get_parent(self, index):
if index.depth < 1: if index.depth < 1:
@ -778,12 +792,13 @@ class IndexRecord(object): # {{{
u(self.unknown1) u(self.unknown1)
a('Unknown (header type? index record number? always 1?): %d'%self.header_type) a('Unknown (header type? index record number? always 1?): %d'%self.header_type)
u(self.unknown2) u(self.unknown2)
a('IDXT Offset: %d'%self.idxt_offset) a('IDXT Offset (%d block size): %d'%(self.size_of_indxt_block,
self.idxt_offset))
a('IDXT Count: %d'%self.idxt_count) a('IDXT Count: %d'%self.idxt_count)
u(self.unknown3) u(self.unknown3)
u(self.unknown4) u(self.unknown4)
a('Index offsets: %r'%self.index_offsets) a('Index offsets: %r'%self.index_offsets)
a('\nIndex Entries:') a('\nIndex Entries (%d entries):'%len(self.indices))
for entry in self.indices: for entry in self.indices:
a(str(entry)+'\n') a(str(entry)+'\n')
@ -829,6 +844,7 @@ class TextRecord(object): # {{{
def __init__(self, idx, record, extra_data_flags, decompress): def __init__(self, idx, record, extra_data_flags, decompress):
self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags) self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
raw_trailing_bytes = record.raw[len(self.raw):]
self.raw = decompress(self.raw) self.raw = decompress(self.raw)
if 0 in self.trailing_data: if 0 in self.trailing_data:
self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0) self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
@ -836,6 +852,7 @@ class TextRecord(object): # {{{
self.trailing_data['indexing'] = self.trailing_data.pop(1) self.trailing_data['indexing'] = self.trailing_data.pop(1)
if 2 in self.trailing_data: if 2 in self.trailing_data:
self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2) self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
self.trailing_data['raw_bytes'] = raw_trailing_bytes
self.idx = idx self.idx = idx
@ -949,21 +966,25 @@ class TBSIndexing(object): # {{{
ans.append(('\t\tIndex Entry: %d (Parent index: %d, ' ans.append(('\t\tIndex Entry: %d (Parent index: %d, '
'Depth: %d, Offset: %d, Size: %d) [%s]')%( 'Depth: %d, Offset: %d, Size: %d) [%s]')%(
x.index, x.parent_index, x.depth, x.offset, x.size, x.label)) x.index, x.parent_index, x.depth, x.offset, x.size, x.label))
def bin3(num): def bin4(num):
ans = bin(num)[2:] ans = bin(num)[2:]
return '0'*(3-len(ans)) + ans return bytes('0'*(4-len(ans)) + ans)
def repr_extra(x):
return str({bin4(k):v for k, v in extra.iteritems()})
tbs_type = 0 tbs_type = 0
is_periodical = self.doc_type in (257, 258, 259)
if len(byts): if len(byts):
outer, consumed = decint(byts) outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if
is_periodical else 3)
byts = byts[consumed:] byts = byts[consumed:]
tbs_type = outer & 0b111 for k in extra:
ans.append('TBS Type: %s (%d)'%(bin3(tbs_type), tbs_type)) tbs_type |= k
ans.append('Outer Index entry: %d'%(outer >> 3)) ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type)))
arg1, consumed = decint(byts) ans.append('Outermost index: %d'%outermost_index)
byts = byts[consumed:] ans.append('Unknown extra start bytes: %s'%repr_extra(extra))
ans.append('Unknown (vwi: always 0?): %d'%arg1) if is_periodical: # Hierarchical periodical
if self.doc_type in (257, 259): # Hierarchical periodical
byts, a = self.interpret_periodical(tbs_type, byts, byts, a = self.interpret_periodical(tbs_type, byts,
dat['geom'][0]) dat['geom'][0])
ans += a ans += a
@ -977,53 +998,21 @@ class TBSIndexing(object): # {{{
def interpret_periodical(self, tbs_type, byts, record_offset): def interpret_periodical(self, tbs_type, byts, record_offset):
ans = [] ans = []
def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{
if psi is None:
# Assume parent section is 1
psi = self.get_index(1)
if msg is None:
msg = ('Article index at start of record or first article'
' index, relative to parent section')
if byts:
# byts could be empty
arg, consumed = decint(byts)
byts = byts[consumed:]
flags = (arg & 0b1111)
ai = (arg >> 4)
ans.append('%s (fvwi): %d [%d absolute]'%(msg, ai,
ai+psi.index))
if flags == 1:
arg, consumed = decint(byts)
if arg == 0:
# EOF of record, otherwise ignore and hope someone else
# will deal with these bytes
byts = byts[consumed:]
ans.append('EOF (vwi: should be 0): %d'%arg)
elif flags in (4, 5):
num = byts[0]
byts = byts[1:]
ans.append('Number of article nodes in the record (byte): %d'%num)
if flags == 5:
arg, consumed = decint(byts)
byts = byts[consumed:]
ans.append('%s (vwi)): %d'%(fmsg, arg))
elif flags == 0:
pass
else:
raise ValueError('Unknown flags: %d'%flags)
return byts
# }}}
def read_section_transitions(byts, psi=None): # {{{ def read_section_transitions(byts, psi=None): # {{{
if psi is None: if psi is None:
# Assume parent section is 1 # Assume previous section is 1
psi = self.get_index(1) psi = self.get_index(1)
while byts: while byts:
ai, flags, consumed = decode_fvwi(byts) ai, extra, consumed = decode_tbs(byts)
byts = byts[consumed:] byts = byts[consumed:]
if flags & 0b1000: if extra.get(0b0010, None) is not None:
raise ValueError('Dont know how to interpret flag 0b0010'
' while reading section transitions')
if extra.get(0b1000, None) is not None:
if len(extra) > 1:
raise ValueError('Dont know how to interpret flags'
' %r while reading section transitions'%extra)
nsi = self.get_index(psi.index+1) nsi = self.get_index(psi.index+1)
ans.append('Last article in this record of section %d' ans.append('Last article in this record of section %d'
' (relative to next section index [%d]): ' ' (relative to next section index [%d]): '
@ -1036,113 +1025,58 @@ class TBSIndexing(object): # {{{
' (relative to its parent section): ' ' (relative to its parent section): '
'%d [%d absolute index]'%(psi.index, ai, ai+psi.index)) '%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
if flags == 0: num = extra.get(0b0100, None)
ans.append('The section %d has only one article' if num is None:
' in this record'%psi.index) msg = ('The section %d has at most one article'
continue ' in this record')%psi.index
else:
msg = ('Number of articles in this record of '
'section %d: %d')%(psi.index, num)
ans.append(msg)
if flags & 0b0100: offset = extra.get(0b0001, None)
num = byts[0] if offset is not None:
byts = byts[1:] if offset == 0:
ans.append('Number of articles in this record of ' ans.append('This record is spanned by the article:'
'section %d: %d'%(psi.index, num)) '%d'%(ai+psi.index))
else:
if flags & 0b0010: ans.append('->Offset to start of next section (%d) from start'
raise ValueError(
'Dont know how to interpret the 0b0010 flag')
if flags & 0b0001:
arg, consumed = decint(byts)
byts = byts[consumed:]
ans.append('->Offset to start of next section (%d) from start'
' of record: %d [%d absolute offset]'%(psi.index+1, ' of record: %d [%d absolute offset]'%(psi.index+1,
arg, arg+record_offset)) offset, offset+record_offset))
return byts
# }}} # }}}
if tbs_type == 3: # {{{ def read_starting_section(byts): # {{{
arg2, consumed = decint(byts) orig = byts
si, extra, consumed = decode_tbs(byts)
byts = byts[consumed:] byts = byts[consumed:]
ans.append('Unknown (vwi: always 0?): %d'%arg2) if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra:
raise ValueError('Dont know how to interpret flags %r'
arg3, consumed = decint(byts) ' when reading starting section'%extra)
byts = byts[consumed:] si = self.get_index(si)
fsi = arg3 >> 4 ans.append('The section at the start of this record is:'
flags = arg3 & 0b1111 ' %d'%si.index)
ans.append('First section index (fvwi): %d'%fsi) if 0b0100 in extra:
psi = self.get_index(fsi) num = extra[0b0100]
ans.append('Flags: %d'%flags) ans.append('The number of articles from the section %d'
if flags == 4: ' in this record: %d'%(si.index, num))
ans.append('Number of articles in this section: %d'%byts[0]) elif 0b0001 in extra:
byts = byts[1:] eof = extra[0b0001]
elif flags == 0: if eof != 0:
pass raise ValueError('Unknown eof value %s when reading'
else: ' starting section. All bytes: %r'%(eof, orig))
raise ValueError('Unknown flags value: %d'%flags) ans.append('This record is spanned by an article from'
byts = read_section_transitions(byts, psi) ' the section: %d'%si.index)
return si, byts
# }}}
elif tbs_type == 7: # {{{
# This occurs for records that have no section nodes and
# whose parent section's index == 1
ans.append('Unknown (maybe vwi?): %r'%bytes(byts[:2]))
byts = byts[2:]
arg, consumed = decint(byts)
byts = byts[consumed:]
ai = arg >> 4
flags = arg & 0b1111
ans.append('Article at start of record (fvwi): %d'%ai)
if flags == 4:
num = byts[0]
byts = byts[1:]
ans.append('Number of articles in record (byte): %d'%num)
elif flags == 0:
pass
elif flags == 1:
arg, consumed = decint(byts)
byts = byts[consumed:]
ans.append('EOF (vwi: should be 0): %d'%arg)
else:
raise ValueError('Unknown flags value: %d'%flags)
# }}} # }}}
elif tbs_type == 6: # {{{ if tbs_type & 0b0100:
# This is used for records spanned by an article whose parent # Starting section is the first section
# section's index == 1 or for the opening record if it contains the ssi = self.get_index(1)
# periodical start, section 1 start and at least one article. The else:
# two cases are distinguished by the flags on the article index ssi, byts = read_starting_section(byts)
# vwi.
unk = byts[0]
byts = byts[1:]
ans.append('Unknown (byte: always 2?): %d'%unk)
byts = tbs_type_6(byts)
# }}}
elif tbs_type == 2: # {{{ byts = read_section_transitions(byts, ssi)
# This occurs for records with no section nodes and whose parent
# section's index != 1 (undefined (records before the first
# section) or > 1)
# This is also used for records that are spanned by an article
# whose parent section index > 1. In this case the flags of the
# vwi referring to the article at the start
# of the record are set to 1 instead of 4.
arg, consumed = decint(byts)
byts = byts[consumed:]
flags = (arg & 0b1111)
psi = (arg >> 4)
ans.append('Parent section index (fvwi): %d'%psi)
psi = self.get_index(psi)
ans.append('Flags: %d'%flags)
if flags == 1:
arg, consumed = decint(byts)
byts = byts[consumed:]
ans.append('Unknown (vwi?: always 0?): %d'%arg)
byts = tbs_type_6(byts, psi=psi)
elif flags == 0:
byts = tbs_type_6(byts, psi=psi)
else:
raise ValueError('Unknown flags: %d'%flags)
# }}}
return byts, ans return byts, ans

View File

@ -3,6 +3,20 @@ Reverse engineering the trailing byte sequences for hierarchical periodicals
In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag. All the following information/inferences are from examining the output of kindlegen on a sample periodical. Given the general level of Amazon's incompetence, there are no guarantees that this information is the *best/most complete* way to do TBS indexing. In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag. All the following information/inferences are from examining the output of kindlegen on a sample periodical. Given the general level of Amazon's incompetence, there are no guarantees that this information is the *best/most complete* way to do TBS indexing.
Sequence encoding:
0b1000 : Continuation bit
First sequences:
0b0010 : 80
0b0011 : 80 80
0b0110 : 80 2
0b0111 : 80 2 80
Other sequences:
0b0101 : 4 1a
0b0001 : c b1
Opening record Opening record
---------------- ----------------
@ -52,10 +66,60 @@ The text record that contains the opening node for the periodical (depth=0 node
If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record. If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record.
Starting record with two section transitions::
Record #1: Starts at: 0 Ends at: 4095
Contains: 7 index entries (0 ends, 4 complete, 3 starts)
TBS bytes: 86 80 2 c0 b8 c4 3
Complete:
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica]
Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz]
Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 1014) [Max and the Magic Marker for iPad: Review]
Index Entry: 7 (Parent index: 2, Depth: 2, Offset: 1961, Size: 1077) [iPad 2 steers itself into home console gaming territory with Real Racing 2 HD]
Starts:
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 35372) [j_x's Google reader]
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 10368) [Neowin.net]
Index Entry: 8 (Parent index: 2, Depth: 2, Offset: 3038, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
TBS Type: 110 (6)
Outer Index entry: 0
Unknown (vwi: always 0?): 0
Unknown (byte: always 2?): 2
Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
Remaining bytes: b8 c4 3
Starting record with three section transitions::
Record #1: Starts at: 0 Ends at: 4095
Contains: 10 index entries (0 ends, 7 complete, 3 starts)
TBS bytes: 86 80 2 c0 b8 c0 b8 c4 4
Complete:
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica]
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 316) [Neowin.net]
Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz]
Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 308) [Max and the Magic Marker for iPad: Review]
Index Entry: 7 (Parent index: 3, Depth: 2, Offset: 1263, Size: 760) [OSnews Asks on Interrupts: The Results]
Index Entry: 8 (Parent index: 3, Depth: 2, Offset: 2023, Size: 693) [Apple Ditches SAMBA in Favour of Homegrown Replacement]
Index Entry: 9 (Parent index: 3, Depth: 2, Offset: 2716, Size: 747) [ITC: Apple's Mobile Products Do Not Violate Nokia Patents]
Starts:
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 25320) [j_x's Google reader]
Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 1255, Size: 6829) [OSNews]
Index Entry: 10 (Parent index: 3, Depth: 2, Offset: 3463, Size: 666) [Transparent Monitor Embedded in Window Glass]
TBS Type: 110 (6)
Outer Index entry: 0
Unknown (vwi: always 0?): 0
Unknown (byte: always 2?): 2
Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
Remaining bytes: b8 c0 b8 c4 4
Records with no nodes Records with no nodes
------------------------ ------------------------
subtype = 010
These records are spanned by a single article. They are of two types: These records are spanned by a single article. They are of two types:
1. If the parent section index is 1, TBS type of 6, like this:: 1. If the parent section index is 1, TBS type of 6, like this::
@ -247,7 +311,7 @@ In such a record there is a transition from one section to the next. As such the
Last article of ending section w.r.t. starting section offset (fvwi): 12 [15 absolute] Last article of ending section w.r.t. starting section offset (fvwi): 12 [15 absolute]
Flags (always 8?): 8 Flags (always 8?): 8
Article index at start of record or first article index, relative to parent section (fvwi): 13 [16 absolute] Article index at start of record or first article index, relative to parent section (fvwi): 13 [16 absolute]
Number of article nodes in the record (byte): 4 Number of article nodes in the record belonging ot the last section (byte): 4
Ending record Ending record
@ -274,3 +338,26 @@ Logically, ending records must have at least one article ending, one section end
If the record had only a single article end, the last two bytes would be replaced with: f0 If the record had only a single article end, the last two bytes would be replaced with: f0
If the last record has multiple section transitions, it is of type 6 and looks like::
Record #9: Starts at: 32768 Ends at: 34953
Contains: 9 index entries (3 ends, 6 complete, 0 starts)
TBS bytes: 86 80 2 1 d0 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0
Ends:
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 34739) [j_x's Google reader]
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe]
Complete:
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 316) [Neowin.net]
Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 34353, Size: 282) [OSNews]
Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 34635, Size: 319) [Slashdot]
Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 308) [Max and the Magic Marker for iPad: Review]
Index Entry: 16 (Parent index: 3, Depth: 2, Offset: 34361, Size: 274) [OSnews Asks on Interrupts: The Results]
Index Entry: 17 (Parent index: 4, Depth: 2, Offset: 34643, Size: 311) [Leonard Nimoy Turns 80]
TBS Type: 110 (6)
Outer Index entry: 0
Unknown (vwi: always 0?): 0
Unknown (byte: always 2?): 2
Article index at start of record or first article index, relative to parent section (fvwi): 13 [14 absolute]
Remaining bytes: 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0

View File

@ -11,6 +11,7 @@ import struct
from collections import OrderedDict from collections import OrderedDict
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
from calibre.ebooks import normalize
IMAGE_MAX_SIZE = 10 * 1024 * 1024 IMAGE_MAX_SIZE = 10 * 1024 * 1024
@ -40,6 +41,9 @@ def encode_number_as_hex(num):
number. number.
''' '''
num = bytes(hex(num)[2:].upper()) num = bytes(hex(num)[2:].upper())
nlen = len(num)
if nlen % 2 != 0:
num = b'0'+num
ans = bytearray(num) ans = bytearray(num)
ans.insert(0, len(num)) ans.insert(0, len(num))
return bytes(ans) return bytes(ans)
@ -65,11 +69,14 @@ def encint(value, forward=True):
If forward is True the bytes returned are suitable for prepending to the If forward is True the bytes returned are suitable for prepending to the
output buffer, otherwise they must be append to the output buffer. output buffer, otherwise they must be append to the output buffer.
''' '''
if value < 0:
raise ValueError('Cannot encode negative numbers as vwi')
# Encode vwi # Encode vwi
byts = bytearray() byts = bytearray()
while True: while True:
b = value & 0b01111111 b = value & 0b01111111
value >>= 7 # shift value to the right by 7 bits value >>= 7 # shift value to the right by 7 bits
byts.append(b) byts.append(b)
if value == 0: if value == 0:
break break
@ -184,7 +191,7 @@ def encode_trailing_data(raw):
<data><size> <data><size>
where size is a backwards encoded vwi whose value is the length of the where size is a backwards encoded vwi whose value is the length of the
entire return bytestring. entire returned bytestring. data is the bytestring passed in as raw.
This is the encoding used for trailing data entries at the end of text This is the encoding used for trailing data entries at the end of text
records. See get_trailing_data() for details. records. See get_trailing_data() for details.
@ -197,3 +204,131 @@ def encode_trailing_data(raw):
lsize += 1 lsize += 1
return raw + encoded return raw + encoded
def encode_fvwi(val, flags, flag_size=4):
'''
Encode the value val and the flag_size bits from flags as a fvwi. This encoding is
used in the trailing byte sequences for indexing. Returns encoded
bytestring.
'''
ans = val << flag_size
for i in xrange(flag_size):
ans |= (flags & (1 << i))
return encint(ans)
def decode_fvwi(byts, flag_size=4):
'''
Decode encoded fvwi. Returns number, flags, consumed
'''
arg, consumed = decint(bytes(byts))
val = arg >> flag_size
flags = 0
for i in xrange(flag_size):
flags |= (arg & (1 << i))
return val, flags, consumed
def decode_tbs(byts, flag_size=4):
'''
Trailing byte sequences for indexing consists of series of fvwi numbers.
This function reads the fvwi number and its associated flags. It them uses
the flags to read any more numbers that belong to the series. The flags are
the lowest 4 bits of the vwi (see the encode_fvwi function above).
Returns the fvwi number, a dictionary mapping flags bits to the associated
data and the number of bytes consumed.
'''
byts = bytes(byts)
val, flags, consumed = decode_fvwi(byts, flag_size=flag_size)
extra = {}
byts = byts[consumed:]
if flags & 0b1000 and flag_size > 3:
extra[0b1000] = True
if flags & 0b0010:
x, consumed2 = decint(byts)
byts = byts[consumed2:]
extra[0b0010] = x
consumed += consumed2
if flags & 0b0100:
extra[0b0100] = ord(byts[0])
byts = byts[1:]
consumed += 1
if flags & 0b0001:
x, consumed2 = decint(byts)
byts = byts[consumed2:]
extra[0b0001] = x
consumed += consumed2
return val, extra, consumed
def encode_tbs(val, extra, flag_size=4):
'''
Encode the number val and the extra data in the extra dict as an fvwi. See
decode_tbs above.
'''
flags = 0
for flag in extra:
flags |= flag
ans = encode_fvwi(val, flags, flag_size=flag_size)
if 0b0010 in extra:
ans += encint(extra[0b0010])
if 0b0100 in extra:
ans += bytes(bytearray([extra[0b0100]]))
if 0b0001 in extra:
ans += encint(extra[0b0001])
return ans
def utf8_text(text):
'''
Convert a possibly null string to utf-8 bytes, guaranteeing to return a non
empty, normalized bytestring.
'''
if text and text.strip():
text = text.strip()
if not isinstance(text, unicode):
text = text.decode('utf-8', 'replace')
text = normalize(text).encode('utf-8')
else:
text = _('Unknown').encode('utf-8')
return text
def align_block(raw, multiple=4, pad=b'\0'):
'''
Return raw with enough pad bytes append to ensure its length is a multiple
of 4.
'''
extra = len(raw) % multiple
if extra == 0: return raw
return raw + pad*(multiple - extra)
def detect_periodical(toc, log=None):
'''
Detect if the TOC object toc contains a periodical that conforms to the
structure required by kindlegen to generate a periodical.
'''
for node in toc.iterdescendants():
if node.depth() == 1 and node.klass != 'article':
if log is not None:
log.debug(
'Not a periodical: Deepest node does not have '
'class="article"')
return False
if node.depth() == 2 and node.klass != 'section':
if log is not None:
log.debug(
'Not a periodical: Second deepest node does not have'
' class="section"')
return False
if node.depth() == 3 and node.klass != 'periodical':
if log is not None:
log.debug('Not a periodical: Third deepest node'
' does not have class="periodical"')
return False
if node.depth() > 3:
if log is not None:
log.debug('Not a periodical: Has nodes of depth > 3')
return False
return True

View File

@ -10,35 +10,12 @@ __docformat__ = 'restructuredtext en'
from struct import pack from struct import pack
from cStringIO import StringIO from cStringIO import StringIO
from collections import OrderedDict from collections import OrderedDict, defaultdict
from calibre.ebooks import normalize from calibre.ebooks.mobi.writer2 import RECORD_SIZE
from calibre.ebook.mobi.writer2 import RECORD_SIZE from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex) encode_tbs, align_block, utf8_text, detect_periodical)
from calibre.ebooks.mobi.langcodes import iana2mobi
def utf8_text(text):
'''
Convert a possibly null string to utf-8 bytes, guaranteeing to return a non
empty, normalized bytestring.
'''
if text and text.strip():
text = text.strip()
if not isinstance(text, unicode):
text = text.decode('utf-8', 'replace')
text = normalize(text).encode('utf-8')
else:
text = _('Unknown').encode('utf-8')
return text
def align_block(raw, multiple=4, pad=b'\0'):
'''
Return raw with enough pad bytes append to ensure its length is a multiple
of 4.
'''
extra = len(raw) % multiple
if extra == 0: return raw
return raw + pad*(multiple - extra)
class CNCX(object): # {{{ class CNCX(object): # {{{
@ -50,13 +27,12 @@ class CNCX(object): # {{{
MAX_STRING_LENGTH = 500 MAX_STRING_LENGTH = 500
def __init__(self, toc, opts): def __init__(self, toc, is_periodical):
self.strings = OrderedDict() self.strings = OrderedDict()
for item in toc: for item in toc.iterdescendants(breadth_first=True):
if item is self.toc: continue
self.strings[item.title] = 0 self.strings[item.title] = 0
if opts.mobi_periodical: if is_periodical:
self.strings[item.klass] = 0 self.strings[item.klass] = 0
self.records = [] self.records = []
@ -75,11 +51,10 @@ class CNCX(object): # {{{
self.records.append(buf.getvalue()) self.records.append(buf.getvalue())
buf.truncate(0) buf.truncate(0)
offset = len(self.records) * 0x10000 offset = len(self.records) * 0x10000
buf.write(raw)
self.strings[key] = offset self.strings[key] = offset
offset += len(raw) offset += len(raw)
buf.write(b'\0') # CNCX must end with zero byte
self.records.append(align_block(buf.getvalue())) self.records.append(align_block(buf.getvalue()))
def __getitem__(self, string): def __getitem__(self, string):
@ -98,7 +73,7 @@ class IndexEntry(object): # {{{
'first_child_index': 22, 'first_child_index': 22,
'last_child_index': 23, 'last_child_index': 23,
} }
RTAG_MAP = dict(TAG_VALUES.itervalues(), TAG_VALUES.iterkeys()) RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()}
BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,] BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,]
@ -113,6 +88,17 @@ class IndexEntry(object): # {{{
self.first_child_index = None self.first_child_index = None
self.last_child_index = None self.last_child_index = None
def __repr__(self):
return ('IndexEntry(offset=%r, depth=%r, length=%r, index=%r,'
' parent_index=%r)')%(self.offset, self.depth, self.length,
self.index, self.parent_index)
@dynamic_property
def size(self):
def fget(self): return self.length
def fset(self, val): self.length = val
return property(fget=fget, fset=fset, doc='Alias for length')
@classmethod @classmethod
def tagx_block(cls, for_periodical=True): def tagx_block(cls, for_periodical=True):
buf = bytearray() buf = bytearray()
@ -137,7 +123,7 @@ class IndexEntry(object): # {{{
buf.append(1) buf.append(1)
header = b'TAGX' header = b'TAGX'
header += pack(b'>I', len(buf)) # table length header += pack(b'>I', 12+len(buf)) # table length
header += pack(b'>I', 1) # control byte count header += pack(b'>I', 1) # control byte count
return header + bytes(buf) return header + bytes(buf)
@ -159,7 +145,7 @@ class IndexEntry(object): # {{{
def entry_type(self): def entry_type(self):
ans = 0 ans = 0
for tag in self.tag_nums: for tag in self.tag_nums:
ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x ans |= (1 << self.BITMASKS.index(tag)) # 1 << x == 2**x
return ans return ans
@property @property
@ -174,7 +160,7 @@ class IndexEntry(object): # {{{
val = getattr(self, attr) val = getattr(self, attr)
buf.write(encint(val)) buf.write(encint(val))
ans = buf.get_value() ans = buf.getvalue()
return ans return ans
# }}} # }}}
@ -186,16 +172,151 @@ class TBS(object): # {{{
trailing byte sequence for the record. trailing byte sequence for the record.
''' '''
def __init__(self, data, is_periodical): def __init__(self, data, is_periodical, first=False, section_map={},
after_first=False):
self.section_map = section_map
#import pprint
#pprint.pprint(data)
#print()
if is_periodical: if is_periodical:
self.periodical_tbs(data) # The starting bytes.
# The value is zero which I think indicates the periodical
# index entry. The values for the various flags seem to be
# unused. If the 0b100 is present, it means that the record
# deals with section 1 (or is the final record with section
# transitions).
self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3)
self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0},
flag_size=3)
self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0},
flag_size=3)
self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001:
0}, flag_size=3)
if not data:
byts = b''
if after_first:
# This can happen if a record contains only text between
# the periodical start and the first section
byts = self.type_011
self.bytestring = byts
else:
depth_map = defaultdict(list)
for x in ('starts', 'ends', 'completes'):
for idx in data[x]:
depth_map[idx.depth].append(idx)
for l in depth_map.itervalues():
l.sort(key=lambda x:x.offset)
self.periodical_tbs(data, first, depth_map)
else: else:
self.book_tbs(data) if not data:
self.bytestring = b''
else:
self.book_tbs(data, first)
def periodical_tbs(self, data): def periodical_tbs(self, data, first, depth_map):
self.bytestring = b'' buf = StringIO()
def book_tbs(self, data): has_section_start = (depth_map[1] and
set(depth_map[1]).intersection(set(data['starts'])))
spanner = data['spans']
parent_section_index = -1
if depth_map[0]:
# We have a terminal record
# Find the first non periodical node
first_node = None
for nodes in (depth_map[1], depth_map[2]):
for node in nodes:
if (first_node is None or (node.offset, node.depth) <
(first_node.offset, first_node.depth)):
first_node = node
typ = (self.type_110 if has_section_start else self.type_010)
# parent_section_index is needed for the last record
if first_node is not None and first_node.depth > 0:
parent_section_index = (first_node.index if first_node.depth
== 1 else first_node.parent_index)
else:
parent_section_index = max(self.section_map.iterkeys())
else:
# Non terminal record
if spanner is not None:
# record is spanned by a single article
parent_section_index = spanner.parent_index
typ = (self.type_110 if parent_section_index == 1 else
self.type_010)
elif not depth_map[1]:
# has only article nodes, i.e. spanned by a section
parent_section_index = depth_map[2][0].parent_index
typ = (self.type_111 if parent_section_index == 1 else
self.type_010)
else:
# has section transitions
if depth_map[2]:
parent_section_index = depth_map[2][0].parent_index
else:
parent_section_index = depth_map[1][0].index
typ = self.type_011
buf.write(typ)
if typ not in (self.type_110, self.type_111) and parent_section_index > 0:
# Write starting section information
if spanner is None:
num_articles = len([a for a in depth_map[1] if a.parent_index
== parent_section_index])
extra = {}
if num_articles > 1:
extra = {0b0100: num_articles}
else:
extra = {0b0001: 0}
buf.write(encode_tbs(parent_section_index, extra))
if spanner is None:
articles = depth_map[2]
sections = set([self.section_map[a.parent_index] for a in
articles])
sections = sorted(sections, key=lambda x:x.offset)
section_map = {s:[a for a in articles if a.parent_index ==
s.index] for s in sections}
for i, section in enumerate(sections):
# All the articles in this record that belong to section
articles = section_map[section]
first_article = articles[0]
last_article = articles[-1]
num = len(articles)
try:
next_sec = sections[i+1]
except:
next_sec = None
extra = {}
if num > 1:
extra[0b0100] = num
if i == 0 and next_sec is not None:
# Write offset to next section from start of record
# For some reason kindlegen only writes this offset
# for the first section transition. Imitate it.
extra[0b0001] = next_sec.offset - data['offset']
buf.write(encode_tbs(first_article.index-section.index, extra))
if next_sec is not None:
buf.write(encode_tbs(last_article.index-next_sec.index,
{0b1000: 0}))
else:
buf.write(encode_tbs(spanner.index - parent_section_index,
{0b0001: 0}))
self.bytestring = buf.getvalue()
def book_tbs(self, data, first):
self.bytestring = b'' self.bytestring = b''
# }}} # }}}
@ -211,16 +332,18 @@ class Indexer(object): # {{{
self.log = oeb.log self.log = oeb.log
self.opts = opts self.opts = opts
self.is_periodical = opts.mobi_periodical self.is_periodical = detect_periodical(self.oeb.toc, self.log)
self.log('Generating MOBI index for a %s'%('periodical' if
self.is_periodical else 'book'))
self.is_flat_periodical = False self.is_flat_periodical = False
if opts.mobi_periodical: if self.is_periodical:
periodical_node = iter(oeb.toc).next() periodical_node = iter(oeb.toc).next()
sections = tuple(periodical_node) sections = tuple(periodical_node)
self.is_flat_periodical = len(sections) == 1 self.is_flat_periodical = len(sections) == 1
self.records = [] self.records = []
self.cncx = CNCX(oeb.toc, opts) self.cncx = CNCX(oeb.toc, self.is_periodical)
if self.is_periodical: if self.is_periodical:
self.indices = self.create_periodical_index() self.indices = self.create_periodical_index()
@ -297,14 +420,13 @@ class Indexer(object): # {{{
buf.write(pack(b'>I', 0)) # Filled in later buf.write(pack(b'>I', 0)) # Filled in later
# Number of index records 24-28 # Number of index records 24-28
buf.write(pack('b>I', len(self.records))) buf.write(pack(b'>I', len(self.records)))
# Index Encoding 28-32 # Index Encoding 28-32
buf.write(pack(b'>I', 65001)) # utf-8 buf.write(pack(b'>I', 65001)) # utf-8
# Index language 32-36 # Unknown 32-36
buf.write(iana2mobi( buf.write(b'\xff'*4)
str(self.oeb.metadata.language[0])))
# Number of index entries 36-40 # Number of index entries 36-40
buf.write(pack(b'>I', len(self.indices))) buf.write(pack(b'>I', len(self.indices)))
@ -349,7 +471,7 @@ class Indexer(object): # {{{
idxt_offset = buf.tell() idxt_offset = buf.tell()
buf.write(b'IDXT') buf.write(b'IDXT')
buf.write(header_length + len(tagx_block)) buf.write(pack(b'>H', header_length + len(tagx_block)))
buf.write(b'\0') buf.write(b'\0')
buf.seek(20) buf.seek(20)
buf.write(pack(b'>I', idxt_offset)) buf.write(pack(b'>I', idxt_offset))
@ -373,12 +495,12 @@ class Indexer(object): # {{{
continue continue
seen.add(offset) seen.add(offset)
index = IndexEntry(offset, label) index = IndexEntry(offset, label)
self.indices.append(index) indices.append(index)
indices.sort(key=lambda x:x.offset) indices.sort(key=lambda x:x.offset)
# Set lengths # Set lengths
for i, index in indices: for i, index in enumerate(indices):
try: try:
next_offset = indices[i+1].offset next_offset = indices[i+1].offset
except: except:
@ -389,11 +511,11 @@ class Indexer(object): # {{{
indices = [i for i in indices if i.length > 0] indices = [i for i in indices if i.length > 0]
# Set index values # Set index values
for i, index in indices: for i, index in enumerate(indices):
index.index = i index.index = i
# Set lengths again to close up any gaps left by filtering # Set lengths again to close up any gaps left by filtering
for i, index in indices: for i, index in enumerate(indices):
try: try:
next_offset = indices[i+1].offset next_offset = indices[i+1].offset
except: except:
@ -459,7 +581,7 @@ class Indexer(object): # {{{
for s, x in enumerate(normalized_sections): for s, x in enumerate(normalized_sections):
sec, normalized_articles = x sec, normalized_articles = x
try: try:
sec.length = normalized_sections[s+1].offset - sec.offset sec.length = normalized_sections[s+1][0].offset - sec.offset
except: except:
sec.length = self.serializer.body_end_offset - sec.offset sec.length = self.serializer.body_end_offset - sec.offset
for i, art in enumerate(normalized_articles): for i, art in enumerate(normalized_articles):
@ -475,17 +597,18 @@ class Indexer(object): # {{{
normalized_articles)) normalized_articles))
normalized_sections[i] = (sec, normalized_articles) normalized_sections[i] = (sec, normalized_articles)
normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1], normalized_sections = list(filter(lambda x: x[0].length > 0 and x[1],
normalized_sections)) normalized_sections))
# Set indices # Set indices
i = 0 i = 0
for sec, normalized_articles in normalized_sections: for sec, articles in normalized_sections:
i += 1 i += 1
sec.index = i sec.index = i
sec.parent_index = 0
for sec, normalized_articles in normalized_sections: for sec, articles in normalized_sections:
for art in normalized_articles: for art in articles:
i += 1 i += 1
art.index = i art.index = i
art.parent_index = sec.index art.parent_index = sec.index
@ -498,7 +621,7 @@ class Indexer(object): # {{{
for s, x in enumerate(normalized_sections): for s, x in enumerate(normalized_sections):
sec, articles = x sec, articles = x
try: try:
next_offset = normalized_sections[s+1].offset next_offset = normalized_sections[s+1][0].offset
except: except:
next_offset = self.serializer.body_end_offset next_offset = self.serializer.body_end_offset
sec.length = next_offset - sec.offset sec.length = next_offset - sec.offset
@ -514,7 +637,7 @@ class Indexer(object): # {{{
for s, x in enumerate(normalized_sections): for s, x in enumerate(normalized_sections):
sec, articles = x sec, articles = x
try: try:
next_sec = normalized_sections[s+1] next_sec = normalized_sections[s+1][0]
except: except:
if (sec.length == 0 or sec.next_offset != if (sec.length == 0 or sec.next_offset !=
self.serializer.body_end_offset): self.serializer.body_end_offset):
@ -546,17 +669,29 @@ class Indexer(object): # {{{
return indices return indices
# }}} # }}}
# TBS {{{
def calculate_trailing_byte_sequences(self): def calculate_trailing_byte_sequences(self):
self.tbs_map = {} self.tbs_map = {}
found_node = False
sections = [i for i in self.indices if i.depth == 1]
section_map = OrderedDict((i.index, i) for i in
sorted(sections, key=lambda x:x.offset))
deepest = max(i.depth for i in self.indices)
for i in xrange(self.number_of_text_records): for i in xrange(self.number_of_text_records):
offset = i * RECORD_SIZE offset = i * RECORD_SIZE
next_offset = offset + RECORD_SIZE next_offset = offset + RECORD_SIZE
data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]), data = {'ends':[], 'completes':[], 'starts':[],
('spans', None)]) 'spans':None, 'offset':offset, 'record_number':i+1}
for index in self.indices: for index in self.indices:
if index.offset >= next_offset: if index.offset >= next_offset:
# Node starts after current record # Node starts after current record
break if index.depth == deepest:
break
else:
continue
if index.next_offset <= offset: if index.next_offset <= offset:
# Node ends before current record # Node ends before current record
continue continue
@ -572,12 +707,21 @@ class Indexer(object): # {{{
if index.next_offset <= next_offset: if index.next_offset <= next_offset:
# Node ends in current record # Node ends in current record
data['ends'].append(index) data['ends'].append(index)
else: elif index.depth == deepest:
data['spans'] = index data['spans'] = index
self.tbs_map[i+1] = TBS(data, self.is_periodical)
if (data['ends'] or data['completes'] or data['starts'] or
data['spans'] is not None):
self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not
found_node, section_map=section_map)
found_node = True
else:
self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False,
after_first=found_node, section_map=section_map)
def get_trailing_byte_sequence(self, num): def get_trailing_byte_sequence(self, num):
return self.tbs_map[num].bytestring return self.tbs_map[num].bytestring
# }}}
# }}} # }}}

View File

@ -29,7 +29,6 @@ EXTH_CODES = {
'identifier': 104, 'identifier': 104,
'subject': 105, 'subject': 105,
'pubdate': 106, 'pubdate': 106,
'date': 106,
'review': 107, 'review': 107,
'contributor': 108, 'contributor': 108,
'rights': 109, 'rights': 109,
@ -55,6 +54,7 @@ class MobiWriter(object):
self.last_text_record_idx = 1 self.last_text_record_idx = 1
def __call__(self, oeb, path_or_stream): def __call__(self, oeb, path_or_stream):
self.log = oeb.log
if hasattr(path_or_stream, 'write'): if hasattr(path_or_stream, 'write'):
return self.dump_stream(oeb, path_or_stream) return self.dump_stream(oeb, path_or_stream)
with open(path_or_stream, 'w+b') as stream: with open(path_or_stream, 'w+b') as stream:
@ -90,6 +90,7 @@ class MobiWriter(object):
self.primary_index_record_idx = None self.primary_index_record_idx = None
try: try:
self.indexer = Indexer(self.serializer, self.last_text_record_idx, self.indexer = Indexer(self.serializer, self.last_text_record_idx,
len(self.records[self.last_text_record_idx]),
self.opts, self.oeb) self.opts, self.oeb)
except: except:
self.log.exception('Failed to generate MOBI index:') self.log.exception('Failed to generate MOBI index:')
@ -98,9 +99,13 @@ class MobiWriter(object):
for i in xrange(len(self.records)): for i in xrange(len(self.records)):
if i == 0: continue if i == 0: continue
tbs = self.indexer.get_trailing_byte_sequence(i) tbs = self.indexer.get_trailing_byte_sequence(i)
self.records[i] += tbs self.records[i] += encode_trailing_data(tbs)
self.records.extend(self.indexer.records) self.records.extend(self.indexer.records)
@property
def is_periodical(self):
return (self.primary_index_record_idx is None or not
self.indexer.is_periodical)
# }}} # }}}
@ -193,7 +198,6 @@ class MobiWriter(object):
self.serializer = Serializer(self.oeb, self.images, self.serializer = Serializer(self.oeb, self.images,
write_page_breaks_after_item=self.write_page_breaks_after_item) write_page_breaks_after_item=self.write_page_breaks_after_item)
text = self.serializer() text = self.serializer()
self.content_length = len(text)
self.text_length = len(text) self.text_length = len(text)
text = StringIO(text) text = StringIO(text)
nrecords = 0 nrecords = 0
@ -201,21 +205,16 @@ class MobiWriter(object):
if self.compression != UNCOMPRESSED: if self.compression != UNCOMPRESSED:
self.oeb.logger.info(' Compressing markup content...') self.oeb.logger.info(' Compressing markup content...')
data, overlap = self.read_text_record(text) while text.tell() < self.text_length:
data, overlap = self.read_text_record(text)
while len(data) > 0:
if self.compression == PALMDOC: if self.compression == PALMDOC:
data = compress_doc(data) data = compress_doc(data)
record = StringIO()
record.write(data)
self.records.append(record.getvalue()) data += overlap
data += pack(b'>B', len(overlap))
self.records.append(data)
nrecords += 1 nrecords += 1
data, overlap = self.read_text_record(text)
# Write information about the mutibyte character overlap, if any
record.write(overlap)
record.write(pack(b'>B', len(overlap)))
self.last_text_record_idx = nrecords self.last_text_record_idx = nrecords
@ -276,8 +275,19 @@ class MobiWriter(object):
exth = self.build_exth() exth = self.build_exth()
last_content_record = len(self.records) - 1 last_content_record = len(self.records) - 1
# FCIS/FLIS (Seem to server no purpose)
flis_number = len(self.records)
self.records.append(
b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+
b'\xff'*4)
fcis = b'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
fcis += pack(b'>I', self.text_length)
fcis += b'\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
fcis_number = len(self.records)
self.records.append(fcis)
# EOF record # EOF record
self.records.append('\xE9\x8E\x0D\x0A') self.records.append(b'\xE9\x8E\x0D\x0A')
record0 = StringIO() record0 = StringIO()
# The MOBI Header # The MOBI Header
@ -307,8 +317,15 @@ class MobiWriter(object):
# 0x10 - 0x13 : UID # 0x10 - 0x13 : UID
# 0x14 - 0x17 : Generator version # 0x14 - 0x17 : Generator version
bt = 0x002
if self.primary_index_record_idx is not None:
if self.indexer.is_flat_periodical:
bt = 0x102
elif self.indexer.is_periodical:
bt = 0x103
record0.write(pack(b'>IIIII', record0.write(pack(b'>IIIII',
0xe8, 0x002, 65001, uid, 6)) 0xe8, bt, 65001, uid, 6))
# 0x18 - 0x1f : Unknown # 0x18 - 0x1f : Unknown
record0.write(b'\xff' * 8) record0.write(b'\xff' * 8)
@ -337,7 +354,8 @@ class MobiWriter(object):
# 0x58 - 0x5b : Format version # 0x58 - 0x5b : Format version
# 0x5c - 0x5f : First image record number # 0x5c - 0x5f : First image record number
record0.write(pack(b'>II', record0.write(pack(b'>II',
6, self.first_image_record if self.first_image_record else 0)) 6, self.first_image_record if self.first_image_record else
len(self.records)-1))
# 0x60 - 0x63 : First HUFF/CDIC record number # 0x60 - 0x63 : First HUFF/CDIC record number
# 0x64 - 0x67 : Number of HUFF/CDIC records # 0x64 - 0x67 : Number of HUFF/CDIC records
@ -346,7 +364,12 @@ class MobiWriter(object):
record0.write(b'\0' * 16) record0.write(b'\0' * 16)
# 0x70 - 0x73 : EXTH flags # 0x70 - 0x73 : EXTH flags
record0.write(pack(b'>I', 0x50)) # Bit 6 (0b1000000) being set indicates the presence of an EXTH header
# The purpose of the other bits is unknown
exth_flags = 0b1011000
if self.is_periodical:
exth_flags |= 0b1000
record0.write(pack(b'>I', exth_flags))
# 0x74 - 0x93 : Unknown # 0x74 - 0x93 : Unknown
record0.write(b'\0' * 32) record0.write(b'\0' * 32)
@ -371,13 +394,13 @@ class MobiWriter(object):
record0.write(b'\0\0\0\x01') record0.write(b'\0\0\0\x01')
# 0xb8 - 0xbb : FCIS record number # 0xb8 - 0xbb : FCIS record number
record0.write(pack(b'>I', 0xffffffff)) record0.write(pack(b'>I', fcis_number))
# 0xbc - 0xbf : Unknown (FCIS record count?) # 0xbc - 0xbf : Unknown (FCIS record count?)
record0.write(pack(b'>I', 0xffffffff)) record0.write(pack(b'>I', 1))
# 0xc0 - 0xc3 : FLIS record number # 0xc0 - 0xc3 : FLIS record number
record0.write(pack(b'>I', 0xffffffff)) record0.write(pack(b'>I', flis_number))
# 0xc4 - 0xc7 : Unknown (FLIS record count?) # 0xc4 - 0xc7 : Unknown (FLIS record count?)
record0.write(pack(b'>I', 1)) record0.write(pack(b'>I', 1))
@ -469,25 +492,33 @@ class MobiWriter(object):
nrecs += 1 nrecs += 1
# Write cdetype # Write cdetype
if not self.opts.mobi_periodical: if self.is_periodical:
data = b'EBOK' data = b'EBOK'
exth.write(pack(b'>II', 501, len(data)+8)) exth.write(pack(b'>II', 501, len(data)+8))
exth.write(data) exth.write(data)
nrecs += 1 nrecs += 1
# Add a publication date entry # Add a publication date entry
if oeb.metadata['date'] != [] : if oeb.metadata['date']:
datestr = str(oeb.metadata['date'][0]) datestr = str(oeb.metadata['date'][0])
elif oeb.metadata['timestamp'] != [] : elif oeb.metadata['timestamp']:
datestr = str(oeb.metadata['timestamp'][0]) datestr = str(oeb.metadata['timestamp'][0])
if datestr is not None: if datestr is not None:
datestr = bytes(datestr)
datestr = datestr.replace(b'+00:00', b'Z')
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8)) exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
exth.write(datestr) exth.write(datestr)
nrecs += 1 nrecs += 1
else: else:
raise NotImplementedError("missing date or timestamp needed for mobi_periodical") raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
# Write the same creator info as kindlegen 1.2
for code, val in [(204, 202), (205, 1), (206, 2), (207, 33307)]:
exth.write(pack(b'>II', code, 12))
exth.write(pack(b'>I', val))
nrecs += 1
if (oeb.metadata.cover and if (oeb.metadata.cover and
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
id = unicode(oeb.metadata.cover[0]) id = unicode(oeb.metadata.cover[0])

View File

@ -1680,11 +1680,18 @@ class TOC(object):
return True return True
return False return False
def iterdescendants(self): def iterdescendants(self, breadth_first=False):
"""Iterate over all descendant nodes in depth-first order.""" """Iterate over all descendant nodes in depth-first order."""
for child in self.nodes: if breadth_first:
for node in child.iter(): for child in self.nodes:
yield node yield child
for child in self.nodes:
for node in child.iterdescendants(breadth_first=True):
yield node
else:
for child in self.nodes:
for node in child.iter():
yield node
def __iter__(self): def __iter__(self):
"""Iterate over all immediate child nodes.""" """Iterate over all immediate child nodes."""

View File

@ -165,6 +165,7 @@ class PDFWriter(QObject): # {{{
printer = get_pdf_printer(self.opts) printer = get_pdf_printer(self.opts)
printer.setOutputFileName(item_path) printer.setOutputFileName(item_path)
self.view.print_(printer) self.view.print_(printer)
printer.abort()
self._render_book() self._render_book()
def _delete_tmpdir(self): def _delete_tmpdir(self):
@ -186,6 +187,7 @@ class PDFWriter(QObject): # {{{
draw_image_page(printer, painter, p, draw_image_page(printer, painter, p,
preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
painter.end() painter.end()
printer.abort()
def _write(self): def _write(self):

View File

@ -24,7 +24,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
url = 'http://ad.zanox.com/ppc/?18817073C15644254T' url = 'http://ad.zanox.com/ppc/?18817073C15644254T'
url_details = ('http://ad.zanox.com/ppc/?18845780C1371495675T&ULP=[[' url_details = ('http://ad.zanox.com/ppc/?18848208C1197627693T&ULP=[['
'http://www.libri.de/shop/action/productDetails?artiId={0}]]') 'http://www.libri.de/shop/action/productDetails?artiId={0}]]')
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):

View File

@ -1892,7 +1892,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
yield r[iindex] yield r[iindex]
def get_next_series_num_for(self, series): def get_next_series_num_for(self, series):
series_id = self.conn.get('SELECT id from series WHERE name=?', series_id = None
if series:
series_id = self.conn.get('SELECT id from series WHERE name=?',
(series,), all=False) (series,), all=False)
if series_id is None: if series_id is None:
if isinstance(tweaks['series_index_auto_increment'], (int, float)): if isinstance(tweaks['series_index_auto_increment'], (int, float)):
@ -3023,8 +3025,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
stream.seek(0) stream.seek(0)
mi = get_metadata(stream, format, use_libprs_metadata=False) mi = get_metadata(stream, format, use_libprs_metadata=False)
stream.seek(0) stream.seek(0)
if not mi.series_index: if mi.series_index is None:
mi.series_index = 1.0 mi.series_index = self.get_next_series_num_for(mi.series)
mi.tags = [_('News')] mi.tags = [_('News')]
if arg['add_title_tag']: if arg['add_title_tag']:
mi.tags += [arg['title']] mi.tags += [arg['title']]
@ -3076,7 +3078,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self._add_newbook_tag(mi) self._add_newbook_tag(mi)
if not add_duplicates and self.has_book(mi): if not add_duplicates and self.has_book(mi):
return None return None
series_index = 1.0 if mi.series_index is None else mi.series_index series_index = self.get_next_series_num_for(mi.series) \
if mi.series_index is None else mi.series_index
aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors) aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors)
title = mi.title title = mi.title
if isbytestring(aus): if isbytestring(aus):
@ -3123,7 +3126,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if not add_duplicates and self.has_book(mi): if not add_duplicates and self.has_book(mi):
duplicates.append((path, format, mi)) duplicates.append((path, format, mi))
continue continue
series_index = 1.0 if mi.series_index is None else mi.series_index series_index = self.get_next_series_num_for(mi.series) \
if mi.series_index is None else mi.series_index
aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors) aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors)
title = mi.title title = mi.title
if isinstance(aus, str): if isinstance(aus, str):
@ -3157,7 +3161,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def import_book(self, mi, formats, notify=True, import_hooks=True, def import_book(self, mi, formats, notify=True, import_hooks=True,
apply_import_tags=True, preserve_uuid=False): apply_import_tags=True, preserve_uuid=False):
series_index = 1.0 if mi.series_index is None else mi.series_index series_index = self.get_next_series_num_for(mi.series) \
if mi.series_index is None else mi.series_index
if apply_import_tags: if apply_import_tags:
self._add_newbook_tag(mi) self._add_newbook_tag(mi)
if not mi.title: if not mi.title: