Sync to trunk.

This commit is contained in:
John Schember 2011-07-25 18:20:28 -04:00
commit 3d24e1b87d
7 changed files with 125 additions and 39 deletions

View File

@ -43,7 +43,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class':'cornerControls'}):
@ -63,3 +63,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('title').contents[0].strip()
def postprocess_html(self, soup, first_fetch):
for link_tag in soup.findAll(attrs={"id" : "story"}):
link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
return soup

View File

@ -151,7 +151,7 @@ class ISBNDB(Source):
bl = feed.find('BookList')
if bl is None:
err = tostring(etree.find('errormessage'))
err = tostring(feed.find('errormessage'))
raise ValueError('ISBNDb query failed:' + err)
total_results = int(bl.get('total_results'))
shown_results = int(bl.get('shown_results'))

View File

@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict
from calibre.utils.date import utc_tz
from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
get_trailing_data)
get_trailing_data, decode_fvwi)
from calibre.utils.magick.draw import identify_data
# PalmDB {{{
@ -964,7 +964,8 @@ class TBSIndexing(object): # {{{
byts = byts[consumed:]
ans.append('Unknown (vwi: always 0?): %d'%arg1)
if self.doc_type in (257, 259): # Hierarchical periodical
byts, a = self.interpret_periodical(tbs_type, byts)
byts, a = self.interpret_periodical(tbs_type, byts,
dat['geom'][0])
ans += a
if byts:
sbyts = tuple(hex(b)[2:] for b in byts)
@ -973,7 +974,7 @@ class TBSIndexing(object): # {{{
ans.append('')
return tbs_type, ans
def interpret_periodical(self, tbs_type, byts):
def interpret_periodical(self, tbs_type, byts, record_offset):
ans = []
def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{
@ -1014,6 +1015,50 @@ class TBSIndexing(object): # {{{
# }}}
def read_section_transitions(byts, psi=None): # {{{
if psi is None:
# Assume parent section is 1
psi = self.get_index(1)
while byts:
ai, flags, consumed = decode_fvwi(byts)
byts = byts[consumed:]
if flags & 0b1000:
nsi = self.get_index(psi.index+1)
ans.append('Last article in this record of section %d'
' (relative to next section index [%d]): '
'%d [%d absolute index]'%(psi.index, nsi.index, ai,
ai+nsi.index))
psi = nsi
continue
ans.append('First article in this record of section %d'
' (relative to its parent section): '
'%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
if flags == 0:
ans.append('The section %d has only one article'
' in this record'%psi.index)
continue
if flags & 0b0100:
num = byts[0]
byts = byts[1:]
ans.append('Number of articles in this record of '
'section %d: %d'%(psi.index, num))
if flags & 0b0010:
raise ValueError(
'Dont know how to interpret the 0b0010 flag')
if flags & 0b0001:
arg, consumed = decint(byts)
byts = byts[consumed:]
ans.append('->Offset to start of next section (%d) from start'
' of record: %d [%d absolute offset]'%(psi.index+1,
arg, arg+record_offset))
# }}}
if tbs_type == 3: # {{{
arg2, consumed = decint(byts)
byts = byts[consumed:]
@ -1025,7 +1070,7 @@ class TBSIndexing(object): # {{{
flags = arg3 & 0b1111
ans.append('First section index (fvwi): %d'%fsi)
psi = self.get_index(fsi)
ans.append('Flags (flag: always 0?): %d'%flags)
ans.append('Flags: %d'%flags)
if flags == 4:
ans.append('Number of articles in this section: %d'%byts[0])
byts = byts[1:]
@ -1033,35 +1078,7 @@ class TBSIndexing(object): # {{{
pass
else:
raise ValueError('Unknown flags value: %d'%flags)
if byts:
byts = tbs_type_6(byts, psi=psi,
msg=('First article of ending section, relative to its'
' parent\'s index'),
fmsg=('->Offset from start of record to beginning of'
' last starting section'))
while byts:
# We have a transition not just an opening first section
psi = self.get_index(psi.index+1)
arg, consumed = decint(byts)
off = arg >> 4
byts = byts[consumed:]
flags = arg & 0b1111
ans.append('Last article of ending section w.r.t. starting'
' section offset (fvwi): %d [%d absolute]'%(off,
psi.index+off))
ans.append('Flags (always 8?): %d'%flags)
byts = tbs_type_6(byts, psi=psi)
if byts:
# Ended with flag 1,and not EOF, which means there's
# another section transition in this record
arg, consumed = decint(byts)
byts = byts[consumed:]
ans.append('->Offset from start of record to beginning of '
'last starting section: %d'%(arg))
else:
break
byts = read_section_transitions(byts, psi)
# }}}
@ -1124,7 +1141,7 @@ class TBSIndexing(object): # {{{
elif flags == 0:
byts = tbs_type_6(byts, psi=psi)
else:
raise ValueError('Unkown flags: %d'%flags)
raise ValueError('Unknown flags: %d'%flags)
# }}}
return byts, ans

View File

@ -39,7 +39,7 @@ def encode_number_as_hex(num):
The bytes that follow are simply the hexadecimal representation of the
number.
'''
num = bytes(hex(num)[2:])
num = bytes(hex(num)[2:].upper())
ans = bytearray(num)
ans.insert(0, len(num))
return bytes(ans)

View File

@ -179,7 +179,27 @@ class IndexEntry(object): # {{{
# }}}
class Indexer(object):
class TBS(object): # {{{
'''
Take the list of index nodes starting/ending on a record and calculate the
trailing byte sequence for the record.
'''
def __init__(self, data, is_periodical):
if is_periodical:
self.periodical_tbs(data)
else:
self.book_tbs(data)
def periodical_tbs(self, data):
self.bytestring = b''
def book_tbs(self, data):
self.bytestring = b''
# }}}
class Indexer(object): # {{{
def __init__(self, serializer, number_of_text_records,
size_of_last_text_record, opts, oeb):
@ -211,6 +231,8 @@ class Indexer(object):
self.records.insert(0, self.create_header())
self.records.extend(self.cncx.records)
self.calculate_trailing_byte_sequences()
def create_index_record(self): # {{{
header_length = 192
buf = StringIO()
@ -524,3 +546,38 @@ class Indexer(object):
return indices
# }}}
def calculate_trailing_byte_sequences(self):
self.tbs_map = {}
for i in xrange(self.number_of_text_records):
offset = i * RECORD_SIZE
next_offset = offset + RECORD_SIZE
data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]),
('spans', None)])
for index in self.indices:
if index.offset >= next_offset:
# Node starts after current record
break
if index.next_offset <= offset:
# Node ends before current record
continue
if index.offset >= offset:
# Node starts in current record
if index.next_offset <= next_offset:
# Node ends in current record
data['completes'].append(index)
else:
data['starts'].append(index)
else:
# Node starts before current records
if index.next_offset <= next_offset:
# Node ends in current record
data['ends'].append(index)
else:
data['spans'] = index
self.tbs_map[i+1] = TBS(data, self.is_periodical)
def get_trailing_byte_sequence(self, num):
return self.tbs_map[num].bytestring
# }}}

View File

@ -95,7 +95,13 @@ class MobiWriter(object):
self.log.exception('Failed to generate MOBI index:')
else:
self.primary_index_record_idx = len(self.records)
for i in xrange(len(self.records)):
if i == 0: continue
tbs = self.indexer.get_trailing_byte_sequence(i)
self.records[i] += tbs
self.records.extend(self.indexer.records)
# }}}
def write_uncrossable_breaks(self): # {{{

View File

@ -141,7 +141,8 @@ class BaseJob(object):
def log_file(self):
if self.log_path:
return open(self.log_path, 'rb')
return cStringIO.StringIO(_('No details available.'))
return cStringIO.StringIO(_('No details available.').encode('utf-8',
'replace'))
@property
def details(self):