mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Sync to trunk.
This commit is contained in:
commit
3d24e1b87d
@ -43,7 +43,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('div', attrs={'class':'cornerControls'}):
|
||||
@ -63,3 +63,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.title = soup.find('title').contents[0].strip()
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for link_tag in soup.findAll(attrs={"id" : "story"}):
|
||||
link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
|
||||
|
||||
return soup
|
||||
|
@ -151,7 +151,7 @@ class ISBNDB(Source):
|
||||
|
||||
bl = feed.find('BookList')
|
||||
if bl is None:
|
||||
err = tostring(etree.find('errormessage'))
|
||||
err = tostring(feed.find('errormessage'))
|
||||
raise ValueError('ISBNDb query failed:' + err)
|
||||
total_results = int(bl.get('total_results'))
|
||||
shown_results = int(bl.get('shown_results'))
|
||||
|
@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict
|
||||
from calibre.utils.date import utc_tz
|
||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||
get_trailing_data)
|
||||
get_trailing_data, decode_fvwi)
|
||||
from calibre.utils.magick.draw import identify_data
|
||||
|
||||
# PalmDB {{{
|
||||
@ -964,7 +964,8 @@ class TBSIndexing(object): # {{{
|
||||
byts = byts[consumed:]
|
||||
ans.append('Unknown (vwi: always 0?): %d'%arg1)
|
||||
if self.doc_type in (257, 259): # Hierarchical periodical
|
||||
byts, a = self.interpret_periodical(tbs_type, byts)
|
||||
byts, a = self.interpret_periodical(tbs_type, byts,
|
||||
dat['geom'][0])
|
||||
ans += a
|
||||
if byts:
|
||||
sbyts = tuple(hex(b)[2:] for b in byts)
|
||||
@ -973,7 +974,7 @@ class TBSIndexing(object): # {{{
|
||||
ans.append('')
|
||||
return tbs_type, ans
|
||||
|
||||
def interpret_periodical(self, tbs_type, byts):
|
||||
def interpret_periodical(self, tbs_type, byts, record_offset):
|
||||
ans = []
|
||||
|
||||
def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{
|
||||
@ -1014,6 +1015,50 @@ class TBSIndexing(object): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
def read_section_transitions(byts, psi=None): # {{{
|
||||
if psi is None:
|
||||
# Assume parent section is 1
|
||||
psi = self.get_index(1)
|
||||
|
||||
while byts:
|
||||
ai, flags, consumed = decode_fvwi(byts)
|
||||
byts = byts[consumed:]
|
||||
if flags & 0b1000:
|
||||
nsi = self.get_index(psi.index+1)
|
||||
ans.append('Last article in this record of section %d'
|
||||
' (relative to next section index [%d]): '
|
||||
'%d [%d absolute index]'%(psi.index, nsi.index, ai,
|
||||
ai+nsi.index))
|
||||
psi = nsi
|
||||
continue
|
||||
|
||||
ans.append('First article in this record of section %d'
|
||||
' (relative to its parent section): '
|
||||
'%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
|
||||
|
||||
if flags == 0:
|
||||
ans.append('The section %d has only one article'
|
||||
' in this record'%psi.index)
|
||||
continue
|
||||
|
||||
if flags & 0b0100:
|
||||
num = byts[0]
|
||||
byts = byts[1:]
|
||||
ans.append('Number of articles in this record of '
|
||||
'section %d: %d'%(psi.index, num))
|
||||
|
||||
if flags & 0b0010:
|
||||
raise ValueError(
|
||||
'Dont know how to interpret the 0b0010 flag')
|
||||
|
||||
if flags & 0b0001:
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
ans.append('->Offset to start of next section (%d) from start'
|
||||
' of record: %d [%d absolute offset]'%(psi.index+1,
|
||||
arg, arg+record_offset))
|
||||
# }}}
|
||||
|
||||
if tbs_type == 3: # {{{
|
||||
arg2, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
@ -1025,7 +1070,7 @@ class TBSIndexing(object): # {{{
|
||||
flags = arg3 & 0b1111
|
||||
ans.append('First section index (fvwi): %d'%fsi)
|
||||
psi = self.get_index(fsi)
|
||||
ans.append('Flags (flag: always 0?): %d'%flags)
|
||||
ans.append('Flags: %d'%flags)
|
||||
if flags == 4:
|
||||
ans.append('Number of articles in this section: %d'%byts[0])
|
||||
byts = byts[1:]
|
||||
@ -1033,35 +1078,7 @@ class TBSIndexing(object): # {{{
|
||||
pass
|
||||
else:
|
||||
raise ValueError('Unknown flags value: %d'%flags)
|
||||
|
||||
|
||||
if byts:
|
||||
byts = tbs_type_6(byts, psi=psi,
|
||||
msg=('First article of ending section, relative to its'
|
||||
' parent\'s index'),
|
||||
fmsg=('->Offset from start of record to beginning of'
|
||||
' last starting section'))
|
||||
while byts:
|
||||
# We have a transition not just an opening first section
|
||||
psi = self.get_index(psi.index+1)
|
||||
arg, consumed = decint(byts)
|
||||
off = arg >> 4
|
||||
byts = byts[consumed:]
|
||||
flags = arg & 0b1111
|
||||
ans.append('Last article of ending section w.r.t. starting'
|
||||
' section offset (fvwi): %d [%d absolute]'%(off,
|
||||
psi.index+off))
|
||||
ans.append('Flags (always 8?): %d'%flags)
|
||||
byts = tbs_type_6(byts, psi=psi)
|
||||
if byts:
|
||||
# Ended with flag 1,and not EOF, which means there's
|
||||
# another section transition in this record
|
||||
arg, consumed = decint(byts)
|
||||
byts = byts[consumed:]
|
||||
ans.append('->Offset from start of record to beginning of '
|
||||
'last starting section: %d'%(arg))
|
||||
else:
|
||||
break
|
||||
byts = read_section_transitions(byts, psi)
|
||||
|
||||
# }}}
|
||||
|
||||
@ -1124,7 +1141,7 @@ class TBSIndexing(object): # {{{
|
||||
elif flags == 0:
|
||||
byts = tbs_type_6(byts, psi=psi)
|
||||
else:
|
||||
raise ValueError('Unkown flags: %d'%flags)
|
||||
raise ValueError('Unknown flags: %d'%flags)
|
||||
# }}}
|
||||
|
||||
return byts, ans
|
||||
|
@ -39,7 +39,7 @@ def encode_number_as_hex(num):
|
||||
The bytes that follow are simply the hexadecimal representation of the
|
||||
number.
|
||||
'''
|
||||
num = bytes(hex(num)[2:])
|
||||
num = bytes(hex(num)[2:].upper())
|
||||
ans = bytearray(num)
|
||||
ans.insert(0, len(num))
|
||||
return bytes(ans)
|
||||
|
@ -179,7 +179,27 @@ class IndexEntry(object): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class Indexer(object):
|
||||
class TBS(object): # {{{
|
||||
|
||||
'''
|
||||
Take the list of index nodes starting/ending on a record and calculate the
|
||||
trailing byte sequence for the record.
|
||||
'''
|
||||
|
||||
def __init__(self, data, is_periodical):
|
||||
if is_periodical:
|
||||
self.periodical_tbs(data)
|
||||
else:
|
||||
self.book_tbs(data)
|
||||
|
||||
def periodical_tbs(self, data):
|
||||
self.bytestring = b''
|
||||
|
||||
def book_tbs(self, data):
|
||||
self.bytestring = b''
|
||||
# }}}
|
||||
|
||||
class Indexer(object): # {{{
|
||||
|
||||
def __init__(self, serializer, number_of_text_records,
|
||||
size_of_last_text_record, opts, oeb):
|
||||
@ -211,6 +231,8 @@ class Indexer(object):
|
||||
self.records.insert(0, self.create_header())
|
||||
self.records.extend(self.cncx.records)
|
||||
|
||||
self.calculate_trailing_byte_sequences()
|
||||
|
||||
def create_index_record(self): # {{{
|
||||
header_length = 192
|
||||
buf = StringIO()
|
||||
@ -524,3 +546,38 @@ class Indexer(object):
|
||||
return indices
|
||||
# }}}
|
||||
|
||||
def calculate_trailing_byte_sequences(self):
|
||||
self.tbs_map = {}
|
||||
for i in xrange(self.number_of_text_records):
|
||||
offset = i * RECORD_SIZE
|
||||
next_offset = offset + RECORD_SIZE
|
||||
data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]),
|
||||
('spans', None)])
|
||||
for index in self.indices:
|
||||
if index.offset >= next_offset:
|
||||
# Node starts after current record
|
||||
break
|
||||
if index.next_offset <= offset:
|
||||
# Node ends before current record
|
||||
continue
|
||||
if index.offset >= offset:
|
||||
# Node starts in current record
|
||||
if index.next_offset <= next_offset:
|
||||
# Node ends in current record
|
||||
data['completes'].append(index)
|
||||
else:
|
||||
data['starts'].append(index)
|
||||
else:
|
||||
# Node starts before current records
|
||||
if index.next_offset <= next_offset:
|
||||
# Node ends in current record
|
||||
data['ends'].append(index)
|
||||
else:
|
||||
data['spans'] = index
|
||||
self.tbs_map[i+1] = TBS(data, self.is_periodical)
|
||||
|
||||
def get_trailing_byte_sequence(self, num):
|
||||
return self.tbs_map[num].bytestring
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -95,7 +95,13 @@ class MobiWriter(object):
|
||||
self.log.exception('Failed to generate MOBI index:')
|
||||
else:
|
||||
self.primary_index_record_idx = len(self.records)
|
||||
for i in xrange(len(self.records)):
|
||||
if i == 0: continue
|
||||
tbs = self.indexer.get_trailing_byte_sequence(i)
|
||||
self.records[i] += tbs
|
||||
self.records.extend(self.indexer.records)
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
def write_uncrossable_breaks(self): # {{{
|
||||
|
@ -141,7 +141,8 @@ class BaseJob(object):
|
||||
def log_file(self):
|
||||
if self.log_path:
|
||||
return open(self.log_path, 'rb')
|
||||
return cStringIO.StringIO(_('No details available.'))
|
||||
return cStringIO.StringIO(_('No details available.').encode('utf-8',
|
||||
'replace'))
|
||||
|
||||
@property
|
||||
def details(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user