Sync to trunk.

This commit is contained in:
John Schember 2012-04-24 19:58:01 -04:00
commit bc2aa8d9b5
23 changed files with 548 additions and 151 deletions

44
recipes/berria.recipe Normal file
View File

@ -0,0 +1,44 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Alayn Gortazar <zutoin at gmail dot com>'
'''
www.berria.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Berria(BasicNewsRecipe):
title = 'Berria'
__author__ = 'Alayn Gortazar'
description = 'Euskal Herriko euskarazko egunkaria'
publisher = 'Berria'
category = 'news, politics, sports, Basque Country'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'eu'
remove_empty_feeds = True
masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png'
keep_only_tags = [
dict(id='goiburua'),
dict(name='div', attrs={'class':['ber_ikus']}),
dict(name='section', attrs={'class':'ber_ikus'})
]
remove_tags = [
dict(name='a', attrs={'class':'iruzkinak'}),
dict(name='div', attrs={'class':'laguntzaileak'})
]
extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}'
feeds = [
(u'Edizioa jarraia', u'http://berria.info/rss/ediziojarraia.xml'),
(u'Iritzia', u'http://berria.info/rss/iritzia.xml'),
(u'Euskal Herria', u'http://berria.info/rss/euskalherria.xml'),
(u'Ekonomia', u'http://berria.info/rss/ekonomia.xml'),
(u'Mundua', u'http://berria.info/rss/mundua.xml'),
(u'Kirola', u'http://berria.info/rss/kirola.xml'),
(u'Plaza', u'http://berria.info/rss/plaza.xml')
]

View File

@ -34,7 +34,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
no_javascript = True
remove_empty_feeds = True
encoding = 'utf-8'
remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-date hcf-separate'}]
remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-colon'}, {'class':'hcf-date hcf-separate'}]
def print_version(self, url):
url = url.split('/')
@ -51,6 +51,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
return ''.join(div.findAll(text=True, recursive=False)).strip() if div is not None else None
articles = {}
links = set()
key = None
ans = []
maincol = soup.find('div', attrs={'class':re.compile('hcf-main-col')})
@ -59,7 +60,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
if div['class'] == 'hcf-header':
try:
key = string.capwords(feed_title(div.em.a))
key = string.capwords(feed_title(div.em))
articles[key] = []
ans.append(key)
except:
@ -70,6 +71,12 @@ class TagesspiegelRSS(BasicNewsRecipe):
if not a:
continue
url = 'http://www.tagesspiegel.de' + a['href']
# check for duplicates
if url in links:
continue
links.add(url)
title = self.tag_to_string(a, use_alt=True).strip()
description = ''
pubdate = strftime('%a, %d %b')

View File

@ -29,7 +29,7 @@ class ANDROID(USBMS):
0xc86 : [0x100, 0x0227, 0x0226, 0x222],
0xc87 : [0x0100, 0x0227, 0x0226],
0xc8d : [0x100, 0x0227, 0x0226, 0x222],
0xc91 : [0x0100, 0x0227, 0x0226],
0xc91 : [0x0100, 0x0227, 0x0226, 0x222],
0xc92 : [0x100, 0x0227, 0x0226, 0x222],
0xc97 : [0x100, 0x0227, 0x0226, 0x222],
0xc99 : [0x100, 0x0227, 0x0226, 0x222],

View File

@ -165,6 +165,8 @@ class MOBIOutput(OutputFormatPlugin):
self.log, self.opts, self.oeb = log, opts, oeb
mobi_type = tweaks.get('test_mobi_output_type', 'old')
if self.is_periodical:
mobi_type = 'old' # Amazon does not support KF8 periodicals
create_kf8 = mobi_type in ('new', 'both')
self.remove_html_cover()
@ -172,7 +174,8 @@ class MOBIOutput(OutputFormatPlugin):
add_fonts=create_kf8)
self.check_for_periodical()
kf8 = self.create_kf8(resources) if create_kf8 else None
kf8 = self.create_kf8(resources, for_joint=mobi_type=='both'
) if create_kf8 else None
if mobi_type == 'new':
kf8.write(output_path)
self.extract_mobi(output_path, opts)
@ -181,9 +184,10 @@ class MOBIOutput(OutputFormatPlugin):
self.log('Creating MOBI 6 output')
self.write_mobi(input_plugin, output_path, kf8, resources)
def create_kf8(self, resources):
def create_kf8(self, resources, for_joint=False):
from calibre.ebooks.mobi.writer8.main import create_kf8_book
return create_kf8_book(self.oeb, self.opts, resources)
return create_kf8_book(self.oeb, self.opts, resources,
for_joint=for_joint)
def write_mobi(self, input_plugin, output_path, kf8, resources):
from calibre.ebooks.mobi.mobiml import MobiMLizer

View File

@ -148,6 +148,7 @@ class HeuristicProcessor(object):
return wordcount.words
def markup_italicis(self, html):
self.log.debug("\n\n\nitalicize debugging \n\n\n")
ITALICIZE_WORDS = [
'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.',
@ -156,27 +157,29 @@ class HeuristicProcessor(object):
]
ITALICIZE_STYLE_PATS = [
ur'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_',
ur'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/',
ur'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_',
ur'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~',
ur'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*',
ur'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~',
ur'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_',
ur'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_',
ur'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*',
ur'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_',
ur'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/',
ur'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|',
ur'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*',
ur'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~',
ur'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/',
ur'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'
]
for word in ITALICIZE_WORDS:
html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '<i>%s</i>' % word, html)
def sub(mo):
return '<i>%s</i>'%mo.group('words')
search_text = re.sub(r'(?s)<head[^>]*>.*?</head>', '', html)
search_text = re.sub(r'<[^>]*>', '', search_text)
for pat in ITALICIZE_STYLE_PATS:
html = re.sub(pat, sub, html)
for match in re.finditer(pat, search_text):
ital_string = str(match.group('words'))
#self.log.debug("italicising "+str(match.group(0))+" with <i>"+ital_string+"</i>")
html = re.sub(re.escape(str(match.group(0))), '<i>%s</i>' % ital_string, html)
return html

View File

@ -327,7 +327,7 @@ class MOBIHeader(object): # {{{
self.primary_index_record, = struct.unpack(b'>I',
self.raw[244:248])
if self.file_version >= 8:
if self.length >= 248:
(self.sect_idx, self.skel_idx, self.datp_idx, self.oth_idx
) = struct.unpack_from(b'>4L', self.raw, 248)
self.unknown9 = self.raw[264:self.length]
@ -337,11 +337,12 @@ class MOBIHeader(object): # {{{
# The following are all relative to the position of the header record
# make them absolute for ease of debugging
for x in ('sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
self.relative_records = {'sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
'meta_orth_indx', 'huffman_record_offset',
'first_non_book_record', 'datp_record_offset', 'fcis_number',
'flis_number', 'primary_index_record', 'fdst_idx',
'first_image_index'):
'first_image_index'}
for x in self.relative_records:
if hasattr(self, x) and getattr(self, x) != NULL_INDEX:
setattr(self, x, self.header_offset+getattr(self, x))
@ -355,70 +356,79 @@ class MOBIHeader(object): # {{{
def __str__(self):
ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
a = ans.append
i = lambda d, x : a('%s (null value: %d): %d'%(d, NULL_INDEX, x))
ans.append('Compression: %s'%self.compression)
ans.append('Unused: %r'%self.unused)
ans.append('Number of text records: %d'%self.number_of_text_records)
ans.append('Text record size: %d'%self.text_record_size)
ans.append('Encryption: %s'%self.encryption_type)
ans.append('Unknown: %r'%self.unknown)
ans.append('Identifier: %r'%self.identifier)
ans.append('Header length: %d'% self.length)
ans.append('Type: %s'%self.type)
ans.append('Encoding: %s'%self.encoding)
ans.append('UID: %r'%self.uid)
ans.append('File version: %d'%self.file_version)
i('Meta Orth Index (Sections index in KF8)', self.meta_orth_indx)
i('Meta Infl Index', self.meta_infl_indx)
ans.append('Secondary index record: %d (null val: %d)'%(
self.secondary_index_record, NULL_INDEX))
ans.append('Reserved: %r'%self.reserved)
ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX,
self.first_non_book_record))
ans.append('Full name offset: %d'%self.fullname_offset)
ans.append('Full name length: %d bytes'%self.fullname_length)
ans.append('Langcode: %r'%self.locale_raw)
ans.append('Language: %s'%self.language)
ans.append('Sub language: %s'%self.sublanguage)
ans.append('Input language: %r'%self.input_language)
ans.append('Output language: %r'%self.output_langauage)
ans.append('Min version: %d'%self.min_version)
ans.append('First Image index: %d'%self.first_image_index)
ans.append('Huffman record offset: %d'%self.huffman_record_offset)
ans.append('Huffman record count: %d'%self.huffman_record_count)
ans.append('DATP record offset: %r'%self.datp_record_offset)
ans.append('DATP record count: %r'%self.datp_record_count)
ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
def i(d, x):
x = 'NULL' if x == NULL_INDEX else x
a('%s: %s'%(d, x))
def r(d, attr):
x = getattr(self, attr)
if attr in self.relative_records and x != NULL_INDEX:
a('%s: Absolute: %d Relative: %d'%(d, x, x-self.header_offset))
else:
i(d, x)
a('Compression: %s'%self.compression)
a('Unused: %r'%self.unused)
a('Number of text records: %d'%self.number_of_text_records)
a('Text record size: %d'%self.text_record_size)
a('Encryption: %s'%self.encryption_type)
a('Unknown: %r'%self.unknown)
a('Identifier: %r'%self.identifier)
a('Header length: %d'% self.length)
a('Type: %s'%self.type)
a('Encoding: %s'%self.encoding)
a('UID: %r'%self.uid)
a('File version: %d'%self.file_version)
r('Meta Orth Index', 'meta_orth_indx')
r('Meta Infl Index', 'meta_infl_indx')
r('Secondary index record', 'secondary_index_record')
a('Reserved: %r'%self.reserved)
r('First non-book record', 'first_non_book_record')
a('Full name offset: %d'%self.fullname_offset)
a('Full name length: %d bytes'%self.fullname_length)
a('Langcode: %r'%self.locale_raw)
a('Language: %s'%self.language)
a('Sub language: %s'%self.sublanguage)
a('Input language: %r'%self.input_language)
a('Output language: %r'%self.output_langauage)
a('Min version: %d'%self.min_version)
r('First Image index', 'first_image_index')
r('Huffman record offset', 'huffman_record_offset')
a('Huffman record count: %d'%self.huffman_record_count)
r('DATP record offset', 'datp_record_offset')
a('DATP record count: %r'%self.datp_record_count)
a('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
if self.has_drm_data:
ans.append('Unknown3: %r'%self.unknown3)
ans.append('DRM Offset: %s'%self.drm_offset)
ans.append('DRM Count: %s'%self.drm_count)
ans.append('DRM Size: %s'%self.drm_size)
ans.append('DRM Flags: %r'%self.drm_flags)
a('Unknown3: %r'%self.unknown3)
r('DRM Offset', 'drm_offset')
a('DRM Count: %s'%self.drm_count)
a('DRM Size: %s'%self.drm_size)
a('DRM Flags: %r'%self.drm_flags)
if self.has_extra_data_flags:
ans.append('Unknown4: %r'%self.unknown4)
ans.append('FDST Index: %d'% self.fdst_idx)
ans.append('FDST Count: %d'% self.fdst_count)
ans.append('FCIS number: %d'% self.fcis_number)
ans.append('FCIS count: %d'% self.fcis_count)
ans.append('FLIS number: %d'% self.flis_number)
ans.append('FLIS count: %d'% self.flis_count)
ans.append('Unknown6: %r'% self.unknown6)
ans.append('SRCS record index: %d'%self.srcs_record_index)
ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
ans.append('Unknown7: %r'%self.unknown7)
ans.append(('Extra data flags: %s (has multibyte: %s) '
a('Unknown4: %r'%self.unknown4)
r('FDST Index', 'fdst_idx')
a('FDST Count: %d'% self.fdst_count)
r('FCIS number', 'fcis_number')
a('FCIS count: %d'% self.fcis_count)
r('FLIS number', 'flis_number')
a('FLIS count: %d'% self.flis_count)
a('Unknown6: %r'% self.unknown6)
r('SRCS record index', 'srcs_record_index')
a('Number of SRCS records?: %d'%self.num_srcs_records)
a('Unknown7: %r'%self.unknown7)
a(('Extra data flags: %s (has multibyte: %s) '
'(has indexing: %s) (has uncrossable breaks: %s)')%(
bin(self.extra_data_flags), self.has_multibytes,
self.has_indexing_bytes, self.has_uncrossable_breaks ))
ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
self.primary_index_record))
if self.file_version >= 8:
i('Sections Index', self.sect_idx)
i('SKEL Index', self.skel_idx)
i('DATP Index', self.datp_idx)
i('Other Index', self.oth_idx)
r('NCX index', 'primary_index_record')
if self.length >= 248:
r('Sections Index', 'sect_idx')
r('SKEL Index', 'skel_idx')
r('DATP Index', 'datp_idx')
r('Other Index', 'oth_idx')
if self.unknown9:
a('Unknown9: %r'%self.unknown9)

View File

@ -21,6 +21,8 @@ Elem = namedtuple('Chunk',
'insert_pos toc_text file_number sequence_number start_pos '
'length')
GuideRef = namedtuple('GuideRef', 'type title pos_fid')
def read_index(sections, idx, codec):
table, cncx = OrderedDict(), CNCX([], codec)
@ -124,6 +126,28 @@ class SECTIndex(Index):
)
)
class GuideIndex(Index):
def __init__(self, guideidx, records, codec):
super(GuideIndex, self).__init__(guideidx, records, codec)
self.records = []
if self.table is not None:
for i, text in enumerate(self.table.iterkeys()):
tag_map = self.table[text]
if set(tag_map.iterkeys()) not in ({1, 6}, {1, 2, 3}):
raise ValueError('Guide Index has unknown tags: %s'%
tag_map)
title = self.cncx[tag_map[1][0]]
self.records.append(GuideRef(
text,
title,
tag_map[6] if 6 in tag_map else (tag_map[2], tag_map[3])
)
)
class NCXIndex(Index):
def __init__(self, ncxidx, records, codec):

View File

@ -12,7 +12,8 @@ from itertools import izip
from calibre import CurrentDir
from calibre.ebooks.mobi.debug.headers import TextRecord
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
GuideIndex)
from calibre.ebooks.mobi.utils import read_font_record
from calibre.ebooks.mobi.debug import format_bytes
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
@ -114,6 +115,8 @@ class MOBIFile(object):
self.header.encoding)
self.ncx_index = NCXIndex(self.header.primary_index_record,
self.mf.records, self.header.encoding)
self.guide_index = GuideIndex(self.header.oth_idx, self.mf.records,
self.header.encoding)
def build_files(self):
text = self.raw_text
@ -136,6 +139,8 @@ class MOBIFile(object):
self.files.append(File(skel, skeleton, ftext, first_aid, sections))
def dump_flows(self, ddir):
if self.fdst is None:
raise ValueError('This MOBI file has no FDST record')
for i, x in enumerate(self.fdst.sections):
start, end = x
raw = self.raw_text[start:end]
@ -211,6 +216,10 @@ def inspect_mobi(mobi_file, ddir):
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
fo.write(str(f.ncx_index).encode('utf-8'))
with open(os.path.join(ddir, 'guide.record'), 'wb') as fo:
fo.write(str(f.guide_index).encode('utf-8'))
for part in f.files:
part.dump(os.path.join(ddir, 'files'))

View File

@ -109,7 +109,7 @@ class Mobi8Reader(object):
table, cncx = read_index(self.kf8_sections, self.header.othidx,
self.header.codec)
Item = namedtuple('Item',
'type title div_frag_num')
'type title pos_fid')
for i, ref_type in enumerate(table.iterkeys()):
tag_map = table[ref_type]
@ -119,7 +119,7 @@ class Mobi8Reader(object):
if 3 in tag_map.keys():
fileno = tag_map[3][0]
if 6 in tag_map.keys():
fileno = tag_map[6][0]
fileno = tag_map[6]
self.guide.append(Item(ref_type.decode(self.header.codec),
title, fileno))
@ -287,23 +287,24 @@ class Mobi8Reader(object):
def create_guide(self):
guide = Guide()
for ref_type, ref_title, fileno in self.guide:
has_start = False
for ref_type, ref_title, pos_fid in self.guide:
try:
elem = self.elems[fileno]
except IndexError:
# Happens for thumbnailstandard in Amazon book samples
continue
fi = self.get_file_info(elem.insert_pos)
idtext = self.get_id_tag(elem.insert_pos).decode(self.header.codec)
linktgt = fi.filename
if len(pos_fid) != 2:
continue
except TypeError:
continue # thumbnailstandard record, ignore it
linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid)
if idtext:
linktgt += b'#' + idtext
g = Guide.Reference('%s/%s'%(fi.type, linktgt), os.getcwdu())
g = Guide.Reference(linktgt, os.getcwdu())
g.title, g.type = ref_title, ref_type
if g.title == 'start' or g.type == 'text':
has_start = True
guide.append(g)
so = self.header.exth.start_offset
if so not in {None, NULL_INDEX}:
if so not in {None, NULL_INDEX} and not has_start:
fi = self.get_file_info(so)
if fi.filename is not None:
idtext = self.get_id_tag(so).decode(self.header.codec)

View File

@ -23,6 +23,7 @@ from calibre.ebooks.mobi.writer2.indexer import Indexer
# Disabled as I dont care about uncrossable breaks
WRITE_UNCROSSABLE_BREAKS = False
NULL_INDEX = 0xffffffff
class MobiWriter(object):
@ -30,6 +31,7 @@ class MobiWriter(object):
self.opts = opts
self.resources = resources
self.kf8 = kf8
self.for_joint = kf8 is not None
self.write_page_breaks_after_item = write_page_breaks_after_item
self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
self.prefer_author_sort = opts.prefer_author_sort
@ -61,7 +63,7 @@ class MobiWriter(object):
self.stream = stream
self.records = [None]
self.generate_content()
self.generate_record0()
self.generate_joint_record0() if self.for_joint else self.generate_record0()
self.write_header()
self.write_content()
@ -200,8 +202,6 @@ class MobiWriter(object):
first_image_record = None
if self.resources:
used_images = self.serializer.used_images
if self.kf8 is not None:
used_images |= self.kf8.used_images
first_image_record = len(self.records)
self.resources.serialize(self.records, used_images)
last_content_record = len(self.records) - 1
@ -365,6 +365,68 @@ class MobiWriter(object):
self.records[0] = align_block(record0)
# }}}
def generate_joint_record0(self): # {{{
from calibre.ebooks.mobi.writer8.mobi import (MOBIHeader,
HEADER_FIELDS)
from calibre.ebooks.mobi.writer8.exth import build_exth
# Insert resource records
first_image_record = None
old = len(self.records)
if self.resources:
used_images = self.serializer.used_images | self.kf8.used_images
first_image_record = len(self.records)
self.resources.serialize(self.records, used_images)
resource_record_count = len(self.records) - old
# Insert KF8 records
self.records.append(b'BOUNDARY')
kf8_header_index = len(self.records)
self.kf8.start_offset = (self.serializer.start_offset,
self.kf8.start_offset)
self.records.append(self.kf8.record0)
self.records.extend(self.kf8.records[1:])
first_image_record if first_image_record else len(self.records)
header_fields = {k:getattr(self.kf8, k) for k in HEADER_FIELDS}
# Now change the header fields that need to be different in the MOBI 6
# header
header_fields['first_resource_record'] = first_image_record
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
header_fields['fdst_record'] = NULL_INDEX
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
header_fields['extra_data_flags'] = 0b11
for k, v in {'last_text_record':'last_text_record_idx',
'first_non_text_record':'first_non_text_record_idx',
'ncx_index':'primary_index_record_idx',
}.iteritems():
header_fields[k] = getattr(self, v)
for x in ('skel', 'chunk', 'guide'):
header_fields[x+'_index'] = NULL_INDEX
# Create the MOBI 6 EXTH
opts = self.opts
kuc = 0 if resource_record_count > 0 else None
header_fields['exth'] = build_exth(self.oeb.metadata,
prefer_author_sort=opts.prefer_author_sort,
is_periodical=opts.mobi_periodical,
share_not_sync=opts.share_not_sync,
cover_offset=self.cover_offset,
thumbnail_offset=self.thumbnail_offset,
num_of_resources=resource_record_count,
kf8_unknown_count=kuc, be_kindlegen2=True,
kf8_header_index=kf8_header_index,
start_offset=self.serializer.start_offset,
mobi_doctype=2)
self.records[0] = MOBIHeader(file_version=6)(**header_fields)
# }}}
def write_header(self): # PalmDB header {{{
'''
Write the PalmDB header

View File

@ -27,6 +27,7 @@ EXTH_CODES = {
'source': 112,
'versionnumber': 114,
'startreading': 116,
'kf8_header_index': 121,
'num_of_resources': 125,
'kf8_unknown_count': 131,
'coveroffset': 201,
@ -41,7 +42,7 @@ COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
share_not_sync=True, cover_offset=None, thumbnail_offset=None,
start_offset=None, mobi_doctype=2, num_of_resources=None,
kf8_unknown_count=0, be_kindlegen2=False):
kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None):
exth = BytesIO()
nrecs = 0
@ -153,8 +154,19 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
nrecs += 1
if start_offset is not None:
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
start_offset))
try:
len(start_offset)
except TypeError:
start_offset = [start_offset]
for so in start_offset:
if so is not None:
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
so))
nrecs += 1
if kf8_header_index is not None:
exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12,
kf8_header_index))
nrecs += 1
if num_of_resources is not None:

View File

@ -284,7 +284,10 @@ class GuideIndex(Index):
class NCXIndex(Index):
control_byte_count = 2
''' The commented out parts have been seen in NCX indexes from MOBI 6
periodicals. Since we have no MOBI 8 periodicals to reverse engineer, leave
it for now. '''
# control_byte_count = 2
tag_types = tuple(map(TagMeta, (
('offset', 1, 1, 1, 0),
('length', 2, 1, 2, 0),
@ -295,12 +298,12 @@ class NCXIndex(Index):
('last_child', 23, 1, 64, 0),
('pos_fid', 6, 2, 128, 0),
EndTagTable,
('image', 69, 1, 1, 0),
('description', 70, 1, 2, 0),
('author', 71, 1, 4, 0),
('caption', 72, 1, 8, 0),
('attribution', 73, 1, 16, 0),
EndTagTable
# ('image', 69, 1, 1, 0),
# ('description', 70, 1, 2, 0),
# ('author', 71, 1, 4, 0),
# ('caption', 72, 1, 8, 0),
# ('attribution', 73, 1, 16, 0),
# EndTagTable
)))
def __init__(self, toc_table):

View File

@ -297,7 +297,6 @@ class KF8Writer(object):
self.chunk_records = ChunkIndex(self.chunk_table)()
self.ncx_records = []
toc = self.oeb.toc
max_depth = toc.depth()
entries = []
is_periodical = self.opts.mobi_periodical
if toc.count() < 2:
@ -307,26 +306,37 @@ class KF8Writer(object):
# Flatten the ToC into a depth first list
fl = toc.iter() if is_periodical else toc.iterdescendants()
for i, item in enumerate(fl):
entry = {'index':i, 'depth': max_depth - item.depth() - (0 if
is_periodical else 1), 'href':item.href, 'label':(item.title or
_('Unknown'))}
entries.append(entry)
for child in item:
child.ncx_parent = entry
entry = {'id': id(item), 'index': i, 'href':item.href,
'label':(item.title or _('Unknown')),
'children':[]}
entry['depth'] = getattr(item, 'ncx_hlvl', 0)
p = getattr(item, 'ncx_parent', None)
if p is not None:
entry['parent'] = p['index']
entry['parent_id'] = p
for child in item:
child.ncx_parent = entry['id']
child.ncx_hlvl = entry['depth'] + 1
entry['children'].append(id(child))
if is_periodical:
if item.author:
entry['author'] = item.author
if item.description:
entry['description'] = item.description
entries.append(entry)
# The Kindle requires entries to be sorted by (depth, playorder)
entries.sort(key=lambda entry: (entry['depth'], entry['index']))
for i, entry in enumerate(entries):
entry['index'] = i
id_to_index = {entry['id']:entry['index'] for entry in entries}
for entry in entries:
children = [e for e in entries if e.get('parent', -1) == entry['index']]
children = entry.pop('children')
if children:
entry['first_child'] = children[0]['index']
entry['last_child'] = children[-1]['index']
entry['first_child'] = id_to_index[children[0]]
entry['last_child'] = id_to_index[children[-1]]
if 'parent_id' in entry:
entry['parent'] = id_to_index[entry.pop('parent_id')]
href = entry.pop('href')
href, frag = href.partition('#')[0::2]
aid = self.id_map.get((href, frag), None)
@ -358,18 +368,19 @@ class KF8Writer(object):
if aid is None:
continue
pos, fid = self.aid_offset_map[aid]
if is_guide_ref_start(ref) and fid == 0:
# If fid != 0 then we cannot represent the start position as a
# single number in the EXTH header, so we do not write it to
# EXTH
self.start_offset = pos
if is_guide_ref_start(ref):
chunk = self.chunk_table[pos]
skel = [s for s in self.skel_table if s.file_number ==
chunk.file_number][0]
self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
self.guide_table.append(GuideRef(ref.title or
_('Unknown'), ref.type, (pos, fid)))
if self.guide_table:
self.guide_table.sort(key=lambda x:x.type) # Needed by the Kindle
self.guide_records = GuideIndex(self.guide_table)()
def create_kf8_book(oeb, opts, resources):
def create_kf8_book(oeb, opts, resources, for_joint=False):
writer = KF8Writer(oeb, opts, resources)
return KF8Book(writer)
return KF8Book(writer, for_joint=for_joint)

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time
import time, random
from struct import pack
from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text
@ -25,8 +25,6 @@ class MOBIHeader(Header): # {{{
the file.
'''
FILE_VERSION = 8
DEFINITION = '''
# 0: Compression
compression = DYN
@ -63,7 +61,7 @@ class MOBIHeader(Header): # {{{
encoding = 65001
# 32: UID
uid = random.randint(0, 0xffffffff)
uid = DYN
# 36: File version
file_version = {file_version}
@ -154,7 +152,7 @@ class MOBIHeader(Header): # {{{
# 0b1 - extra multibyte bytes after text records
# 0b10 - TBS indexing data (only used in MOBI 6)
# 0b100 - uncrossable breaks only used in MOBI 6
extra_data_flags = 1
extra_data_flags = DYN
# 244: KF8 Indices
ncx_index = DYN
@ -171,13 +169,18 @@ class MOBIHeader(Header): # {{{
# Padding to allow amazon's DTP service to add data
padding = zeroes(8192)
'''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION)
'''
SHORT_FIELDS = {'compression', 'last_text_record', 'record_size',
'encryption_type', 'unused2'}
ALIGN = True
POSITIONS = {'title_offset':'full_title'}
def __init__(self, file_version=8):
self.DEFINITION = self.DEFINITION.format(file_version=file_version,
record_size=RECORD_SIZE)
super(MOBIHeader, self).__init__()
def format_value(self, name, val):
if name == 'compression':
val = PALMDOC if val else UNCOMPRESSED
@ -185,14 +188,20 @@ class MOBIHeader(Header): # {{{
# }}}
# Fields that need to be set in the MOBI Header are
HEADER_FIELDS = {'compression', 'text_length', 'last_text_record', 'book_type',
'first_non_text_record', 'title_length', 'language_code',
'first_resource_record', 'exth_flags', 'fdst_record',
'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
'guide_index', 'exth', 'full_title', 'extra_data_flags',
'uid'}
class KF8Book(object):
def __init__(self, writer):
self.build_records(writer)
def __init__(self, writer, for_joint=False):
self.build_records(writer, for_joint)
self.used_images = writer.used_images
def build_records(self, writer):
def build_records(self, writer, for_joint):
metadata = writer.oeb.metadata
# The text records
for x in ('last_text_record_idx', 'first_non_text_record_idx'):
@ -222,8 +231,10 @@ class KF8Book(object):
self.first_resource_record = NULL_INDEX
if resources.records:
self.first_resource_record = len(self.records)
self.records.extend(resources.records)
self.num_of_resources = len(resources.records)
before = len(self.records)
if not for_joint:
resources.serialize(self.records, writer.used_images)
self.num_of_resources = len(self.records) - before
# FDST
self.fdst_count = writer.fdst_count
@ -233,12 +244,13 @@ class KF8Book(object):
# EOF
self.records.append(b'\xe9\x8e\r\n') # EOF record
# Miscellaneous header fields
self.compression = writer.compress
self.book_type = 0x101 if writer.opts.mobi_periodical else 2
self.full_title = utf8_text(unicode(metadata.title[0]))
self.title_length = len(self.full_title)
self.extra_data_flags = 0b1
self.uid = random.randint(0, 0xffffffff)
self.language_code = iana2mobi(str(metadata.language[0]))
self.exth_flags = 0b1010000
@ -248,14 +260,14 @@ class KF8Book(object):
self.opts = writer.opts
self.start_offset = writer.start_offset
self.metadata = metadata
self.kuc = 0 if len(resources.records) > 0 else None
@property
def record0(self):
''' We generate the EXTH header and record0 dynamically, to allow other
code to customize various values after build_record() has been
code to customize various values after build_records() has been
called'''
opts = self.opts
kuc = 0 if self.num_of_resources > 0 else None
self.exth = build_exth(self.metadata,
prefer_author_sort=opts.prefer_author_sort,
is_periodical=opts.mobi_periodical,
@ -263,15 +275,10 @@ class KF8Book(object):
cover_offset=self.cover_offset,
thumbnail_offset=self.thumbnail_offset,
num_of_resources=self.num_of_resources,
kf8_unknown_count=kuc, be_kindlegen2=True,
kf8_unknown_count=self.kuc, be_kindlegen2=True,
start_offset=self.start_offset, mobi_doctype=self.book_type)
kwargs = {field:getattr(self, field) for field in
('compression', 'text_length', 'last_text_record', 'book_type',
'first_non_text_record', 'title_length', 'language_code',
'first_resource_record', 'exth_flags', 'fdst_record',
'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
'guide_index', 'exth', 'full_title')}
kwargs = {field:getattr(self, field) for field in HEADER_FIELDS}
return MOBIHeader()(**kwargs)
def write(self, outpath):

View File

@ -115,14 +115,12 @@ class EditMetadataAction(InterfaceAction):
' "Show details" to see which books.')%num
payload = (id_map, tdir, log_file, lm_map)
from calibre.gui2.dialogs.message_box import ProceedNotification
p = ProceedNotification(self.apply_downloaded_metadata,
self.gui.proceed_question(self.apply_downloaded_metadata,
payload, log_file,
_('Download log'), _('Download complete'), msg,
det_msg=det_msg, show_copy_button=show_copy_button,
cancel_callback=lambda x:self.cleanup_bulk_download(tdir),
parent=self.gui, log_is_file=True)
p.show()
log_is_file=True)
def apply_downloaded_metadata(self, payload):
good_ids, tdir, log_file, lm_map = payload

View File

@ -334,6 +334,7 @@ class CoverView(QWidget): # {{{
self.pixmap = pmap
self.do_layout()
self.update()
self.update_tooltip()
if not config['disable_animations']:
self.animation.start()
id_ = self.data.get('id', None)

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QLineEdit, QAbstractListModel, Qt,
QApplication, QCompleter, QMetaObject)
QApplication, QCompleter, pyqtSignal)
from calibre.utils.icu import sort_key, lower
from calibre.gui2 import NONE
@ -158,6 +158,8 @@ class MultiCompleteLineEdit(QLineEdit, LineEditECM):
class MultiCompleteComboBox(EnComboBox):
clear_edit_text = pyqtSignal()
def __init__(self, *args):
EnComboBox.__init__(self, *args)
self.setLineEdit(MultiCompleteLineEdit(self))
@ -169,6 +171,8 @@ class MultiCompleteComboBox(EnComboBox):
self.dummy_model = CompleteModel(self)
c.setModel(self.dummy_model)
self.lineEdit()._completer.setWidget(self)
self.clear_edit_text.connect(self.clearEditText,
type=Qt.QueuedConnection)
def update_items_cache(self, complete_items):
self.lineEdit().update_items_cache(complete_items)
@ -191,8 +195,7 @@ class MultiCompleteComboBox(EnComboBox):
what = unicode(what)
le = self.lineEdit()
if not what.strip():
QMetaObject.invokeMethod(self, 'clearEditText',
Qt.QueuedConnection)
self.clear_edit_text.emit()
else:
self.setEditText(what)
le.selectAll()

View File

@ -95,9 +95,15 @@ class MetadataWidget(Widget, Ui_Form):
if not pm.isNull():
self.cover.setPixmap(pm)
self.cover_data = cover
self.set_cover_tooltip(pm)
else:
self.cover.setPixmap(QPixmap(I('default_cover.png')))
self.cover.setToolTip(_('This book has no cover'))
def set_cover_tooltip(self, pm):
tt = _('Cover size: %(width)d x %(height)d pixels') % dict(
width=pm.width(), height=pm.height())
self.cover.setToolTip(tt)
def initialize_combos(self):
self.initalize_authors()
@ -205,6 +211,7 @@ class MetadataWidget(Widget, Ui_Form):
d.exec_()
else:
self.cover_path.setText(_file)
self.set_cover_tooltip(pix)
self.cover.setPixmap(pix)
self.cover_changed = True
self.cpixmap = pix

View File

@ -158,6 +158,13 @@ _proceed_memory = []
class ProceedNotification(MessageBox): # {{{
'''
WARNING: This class is deprecated. DO not use it as some users ahve
reported crashes when closing the dialog box generated by this class.
Instead use: gui.proceed_question(...) The arguments are the same as for
this class.
'''
def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
det_msg='', show_copy_button=False, parent=None,
cancel_callback=None, log_is_file=False):

View File

@ -99,7 +99,7 @@ typedef unsigned short QRgb565;
#define PFREAL_ONE (1 << PFREAL_SHIFT)
#define PFREAL_HALF (PFREAL_ONE >> 1)
#define TEXT_FLAGS (Qt::TextWordWrap|Qt::TextWrapAnywhere|Qt::TextHideMnemonic|Qt::AlignCenter)
#define TEXT_FLAGS (Qt::TextWordWrap|Qt::TextHideMnemonic|Qt::AlignCenter)
inline PFreal fmul(PFreal a, PFreal b)
{

170
src/calibre/gui2/proceed.py Normal file
View File

@ -0,0 +1,170 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from collections import namedtuple
from PyQt4.Qt import (QDialog, Qt, QLabel, QGridLayout, QPixmap,
QDialogButtonBox, QApplication, QSize, pyqtSignal, QIcon,
QPlainTextEdit)
from calibre.constants import __version__
from calibre.gui2.dialogs.message_box import ViewLog
Question = namedtuple('Question', 'payload callback cancel_callback '
'title msg html_log log_viewer_title log_is_file det_msg '
'show_copy_button')
class ProceedQuestion(QDialog):
ask_question = pyqtSignal(object, object)
def __init__(self, parent):
QDialog.__init__(self, parent)
self.setAttribute(Qt.WA_DeleteOnClose, False)
self.setWindowIcon(QIcon(I('dialog_question.png')))
self.questions = []
self._l = l = QGridLayout(self)
self.setLayout(l)
self.icon_label = ic = QLabel(self)
ic.setPixmap(QPixmap(I('dialog_question.png')))
self.msg_label = msg = QLabel('some random filler text')
msg.setWordWrap(True)
ic.setMaximumWidth(110)
ic.setMaximumHeight(100)
ic.setScaledContents(True)
ic.setStyleSheet('QLabel { margin-right: 10px }')
self.bb = QDialogButtonBox(QDialogButtonBox.Yes|QDialogButtonBox.No)
self.bb.accepted.connect(self.accept)
self.bb.rejected.connect(self.reject)
self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
self.log_button.setIcon(QIcon(I('debug.png')))
self.log_button.clicked.connect(self.show_log)
self.copy_button = self.bb.addButton(_('&Copy to clipboard'),
self.bb.ActionRole)
self.copy_button.clicked.connect(self.copy_to_clipboard)
self.show_det_msg = _('Show &details')
self.hide_det_msg = _('Hide &details')
self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole)
self.det_msg_toggle.clicked.connect(self.toggle_det_msg)
self.det_msg_toggle.setToolTip(
_('Show detailed information about this error'))
self.det_msg = QPlainTextEdit(self)
self.det_msg.setReadOnly(True)
self.bb.button(self.bb.Yes).setDefault(True)
l.addWidget(ic, 0, 0, 1, 1)
l.addWidget(msg, 0, 1, 1, 1)
l.addWidget(self.det_msg, 1, 0, 1, 2)
l.addWidget(self.bb, 2, 0, 1, 2)
self.ask_question.connect(self.do_ask_question,
type=Qt.QueuedConnection)
def copy_to_clipboard(self, *args):
QApplication.clipboard().setText(
'calibre, version %s\n%s: %s\n\n%s' %
(__version__, unicode(self.windowTitle()),
unicode(self.msg_label.text()),
unicode(self.det_msg.toPlainText())))
self.copy_button.setText(_('Copied'))
def accept(self):
if self.questions:
payload, callback, cancel_callback = self.questions[0][:3]
self.questions = self.questions[1:]
self.ask_question.emit(callback, payload)
self.hide()
def reject(self):
if self.questions:
payload, callback, cancel_callback = self.questions[0][:3]
self.questions = self.questions[1:]
self.ask_question.emit(cancel_callback, payload)
self.hide()
def do_ask_question(self, callback, payload):
if callable(callback):
callback(payload)
self.show_question()
def toggle_det_msg(self, *args):
vis = unicode(self.det_msg_toggle.text()) == self.hide_det_msg
self.det_msg_toggle.setText(self.show_det_msg if vis else
self.hide_det_msg)
self.det_msg.setVisible(not vis)
self.do_resize()
def do_resize(self):
sz = self.sizeHint() + QSize(100, 0)
sz.setWidth(min(500, sz.width()))
sz.setHeight(min(500, sz.height()))
self.resize(sz)
def show_question(self):
if self.isVisible(): return
if self.questions:
question = self.questions[0]
self.msg_label.setText(question.msg)
self.setWindowTitle(question.title)
self.log_button.setVisible(bool(question.html_log))
self.copy_button.setVisible(bool(question.show_copy_button))
self.det_msg.setPlainText(question.det_msg or '')
self.det_msg.setVisible(False)
self.det_msg_toggle.setVisible(bool(question.det_msg))
self.det_msg_toggle.setText(self.show_det_msg)
self.bb.button(self.bb.Yes).setDefault(True)
self.do_resize()
self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason)
self.show()
def __call__(self, callback, payload, html_log, log_viewer_title, title,
msg, det_msg='', show_copy_button=False, cancel_callback=None,
log_is_file=False):
'''
A non modal popup that notifies the user that a background task has
been completed. This class guarantees that onlya single popup is
visible at any one time. Other requests are queued and displayed after
the user dismisses the current popup.
:param callback: A callable that is called with payload if the user
asks to proceed. Note that this is always called in the GUI thread.
:param cancel_callback: A callable that is called with the payload if
the users asks not to proceed.
:param payload: Arbitrary object, passed to callback
:param html_log: An HTML or plain text log
:param log_viewer_title: The title for the log viewer window
:param title: The title for this popup
:param msg: The msg to display
:param det_msg: Detailed message
:param log_is_file: If True the html_log parameter is interpreted as
the path to a file on disk containing the log encoded with utf-8
'''
question = Question(payload, callback, cancel_callback, title, msg,
html_log, log_viewer_title, log_is_file, det_msg,
show_copy_button)
self.questions.append(question)
self.show_question()
def show_log(self):
if self.questions:
q = self.questions[0]
log = q.html_log
if q.log_is_file:
with open(log, 'rb') as f:
log = f.read().decode('utf-8')
self.log_viewer = ViewLog(q.log_viewer_title, log,
parent=self)
if __name__ == '__main__':
app = QApplication([])
ProceedQuestion(None).exec_()

View File

@ -43,6 +43,7 @@ from calibre.gui2.tag_browser.ui import TagBrowserMixin
from calibre.gui2.keyboard import Manager
from calibre.gui2.auto_add import AutoAdder
from calibre.library.sqlite import sqlite, DatabaseException
from calibre.gui2.proceed import ProceedQuestion
class Listener(Thread): # {{{
@ -109,6 +110,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
MainWindow.__init__(self, opts, parent=parent, disable_automatic_gc=True)
self.proceed_requested.connect(self.do_proceed,
type=Qt.QueuedConnection)
self.proceed_question = ProceedQuestion(self)
self.keyboard = Manager(self)
_gui = self
self.opts = opts

View File

@ -573,6 +573,18 @@ There is a Word macro package that can automate the conversion of Word documents
generating the Table of Contents much simpler. It is called BookCreator and is available for free
at `mobileread <http://www.mobileread.com/forums/showthread.php?t=28313>`_.
An easy way to generate a Table of Contents when converting a Word document is:
1. Mark your Chapters and sub-Chapters in the doc file with one of the MS built-in styles called 'Heading 1', 'Heading 2', ..., 'Heading 6'. 'Heading 1' equates to the HTML tag <h1>, 'Heading 2' to <h2> etc
2. Save the doc as Webpage-filtered (rather than Webpage) and import the html file into |app|
3. When you convert in |app| you use what you did in step 1 to set the box called 'Detect chapters at' on the Convert - Structure Detection page. For example:
* If you mark Chapters with style 'Heading 2' then set the 'Detect chapters at' box to //h:h2 This will give you a proper external metadata TOC in the converted epub.
* A slightly more complex example...if your book has Sections and Chapters and you want a 2-level nested metadata TOC. Mark the doc Sections with style 'Heading 2' and the Chapters with style 'Heading 3'. When you convert set the 'Detect chapters at' box to //h:h2|//h:h3. On the Convert - TOC page set the 'Level 1 TOC' box to //h:h2 and the 'Level 2 TOC' box to //h:h3.
Convert TXT documents
~~~~~~~~~~~~~~~~~~~~~~