mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Sync to trunk.
This commit is contained in:
commit
3d24e1b87d
@ -43,7 +43,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
|||||||
lfeeds = self.get_feeds()
|
lfeeds = self.get_feeds()
|
||||||
for feedobj in lfeeds:
|
for feedobj in lfeeds:
|
||||||
feedtitle, feedurl = feedobj
|
feedtitle, feedurl = feedobj
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
articles = []
|
articles = []
|
||||||
soup = self.index_to_soup(feedurl)
|
soup = self.index_to_soup(feedurl)
|
||||||
for item in soup.findAll('div', attrs={'class':'cornerControls'}):
|
for item in soup.findAll('div', attrs={'class':'cornerControls'}):
|
||||||
@ -63,3 +63,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
|||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
article.title = soup.find('title').contents[0].strip()
|
article.title = soup.find('title').contents[0].strip()
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
for link_tag in soup.findAll(attrs={"id" : "story"}):
|
||||||
|
link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
|
||||||
|
|
||||||
|
return soup
|
||||||
|
@ -151,7 +151,7 @@ class ISBNDB(Source):
|
|||||||
|
|
||||||
bl = feed.find('BookList')
|
bl = feed.find('BookList')
|
||||||
if bl is None:
|
if bl is None:
|
||||||
err = tostring(etree.find('errormessage'))
|
err = tostring(feed.find('errormessage'))
|
||||||
raise ValueError('ISBNDb query failed:' + err)
|
raise ValueError('ISBNDb query failed:' + err)
|
||||||
total_results = int(bl.get('total_results'))
|
total_results = int(bl.get('total_results'))
|
||||||
shown_results = int(bl.get('shown_results'))
|
shown_results = int(bl.get('shown_results'))
|
||||||
|
@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict
|
|||||||
from calibre.utils.date import utc_tz
|
from calibre.utils.date import utc_tz
|
||||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||||
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||||
get_trailing_data)
|
get_trailing_data, decode_fvwi)
|
||||||
from calibre.utils.magick.draw import identify_data
|
from calibre.utils.magick.draw import identify_data
|
||||||
|
|
||||||
# PalmDB {{{
|
# PalmDB {{{
|
||||||
@ -964,7 +964,8 @@ class TBSIndexing(object): # {{{
|
|||||||
byts = byts[consumed:]
|
byts = byts[consumed:]
|
||||||
ans.append('Unknown (vwi: always 0?): %d'%arg1)
|
ans.append('Unknown (vwi: always 0?): %d'%arg1)
|
||||||
if self.doc_type in (257, 259): # Hierarchical periodical
|
if self.doc_type in (257, 259): # Hierarchical periodical
|
||||||
byts, a = self.interpret_periodical(tbs_type, byts)
|
byts, a = self.interpret_periodical(tbs_type, byts,
|
||||||
|
dat['geom'][0])
|
||||||
ans += a
|
ans += a
|
||||||
if byts:
|
if byts:
|
||||||
sbyts = tuple(hex(b)[2:] for b in byts)
|
sbyts = tuple(hex(b)[2:] for b in byts)
|
||||||
@ -973,7 +974,7 @@ class TBSIndexing(object): # {{{
|
|||||||
ans.append('')
|
ans.append('')
|
||||||
return tbs_type, ans
|
return tbs_type, ans
|
||||||
|
|
||||||
def interpret_periodical(self, tbs_type, byts):
|
def interpret_periodical(self, tbs_type, byts, record_offset):
|
||||||
ans = []
|
ans = []
|
||||||
|
|
||||||
def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{
|
def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{
|
||||||
@ -1014,6 +1015,50 @@ class TBSIndexing(object): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
def read_section_transitions(byts, psi=None): # {{{
|
||||||
|
if psi is None:
|
||||||
|
# Assume parent section is 1
|
||||||
|
psi = self.get_index(1)
|
||||||
|
|
||||||
|
while byts:
|
||||||
|
ai, flags, consumed = decode_fvwi(byts)
|
||||||
|
byts = byts[consumed:]
|
||||||
|
if flags & 0b1000:
|
||||||
|
nsi = self.get_index(psi.index+1)
|
||||||
|
ans.append('Last article in this record of section %d'
|
||||||
|
' (relative to next section index [%d]): '
|
||||||
|
'%d [%d absolute index]'%(psi.index, nsi.index, ai,
|
||||||
|
ai+nsi.index))
|
||||||
|
psi = nsi
|
||||||
|
continue
|
||||||
|
|
||||||
|
ans.append('First article in this record of section %d'
|
||||||
|
' (relative to its parent section): '
|
||||||
|
'%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
|
||||||
|
|
||||||
|
if flags == 0:
|
||||||
|
ans.append('The section %d has only one article'
|
||||||
|
' in this record'%psi.index)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if flags & 0b0100:
|
||||||
|
num = byts[0]
|
||||||
|
byts = byts[1:]
|
||||||
|
ans.append('Number of articles in this record of '
|
||||||
|
'section %d: %d'%(psi.index, num))
|
||||||
|
|
||||||
|
if flags & 0b0010:
|
||||||
|
raise ValueError(
|
||||||
|
'Dont know how to interpret the 0b0010 flag')
|
||||||
|
|
||||||
|
if flags & 0b0001:
|
||||||
|
arg, consumed = decint(byts)
|
||||||
|
byts = byts[consumed:]
|
||||||
|
ans.append('->Offset to start of next section (%d) from start'
|
||||||
|
' of record: %d [%d absolute offset]'%(psi.index+1,
|
||||||
|
arg, arg+record_offset))
|
||||||
|
# }}}
|
||||||
|
|
||||||
if tbs_type == 3: # {{{
|
if tbs_type == 3: # {{{
|
||||||
arg2, consumed = decint(byts)
|
arg2, consumed = decint(byts)
|
||||||
byts = byts[consumed:]
|
byts = byts[consumed:]
|
||||||
@ -1025,7 +1070,7 @@ class TBSIndexing(object): # {{{
|
|||||||
flags = arg3 & 0b1111
|
flags = arg3 & 0b1111
|
||||||
ans.append('First section index (fvwi): %d'%fsi)
|
ans.append('First section index (fvwi): %d'%fsi)
|
||||||
psi = self.get_index(fsi)
|
psi = self.get_index(fsi)
|
||||||
ans.append('Flags (flag: always 0?): %d'%flags)
|
ans.append('Flags: %d'%flags)
|
||||||
if flags == 4:
|
if flags == 4:
|
||||||
ans.append('Number of articles in this section: %d'%byts[0])
|
ans.append('Number of articles in this section: %d'%byts[0])
|
||||||
byts = byts[1:]
|
byts = byts[1:]
|
||||||
@ -1033,35 +1078,7 @@ class TBSIndexing(object): # {{{
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unknown flags value: %d'%flags)
|
raise ValueError('Unknown flags value: %d'%flags)
|
||||||
|
byts = read_section_transitions(byts, psi)
|
||||||
|
|
||||||
if byts:
|
|
||||||
byts = tbs_type_6(byts, psi=psi,
|
|
||||||
msg=('First article of ending section, relative to its'
|
|
||||||
' parent\'s index'),
|
|
||||||
fmsg=('->Offset from start of record to beginning of'
|
|
||||||
' last starting section'))
|
|
||||||
while byts:
|
|
||||||
# We have a transition not just an opening first section
|
|
||||||
psi = self.get_index(psi.index+1)
|
|
||||||
arg, consumed = decint(byts)
|
|
||||||
off = arg >> 4
|
|
||||||
byts = byts[consumed:]
|
|
||||||
flags = arg & 0b1111
|
|
||||||
ans.append('Last article of ending section w.r.t. starting'
|
|
||||||
' section offset (fvwi): %d [%d absolute]'%(off,
|
|
||||||
psi.index+off))
|
|
||||||
ans.append('Flags (always 8?): %d'%flags)
|
|
||||||
byts = tbs_type_6(byts, psi=psi)
|
|
||||||
if byts:
|
|
||||||
# Ended with flag 1,and not EOF, which means there's
|
|
||||||
# another section transition in this record
|
|
||||||
arg, consumed = decint(byts)
|
|
||||||
byts = byts[consumed:]
|
|
||||||
ans.append('->Offset from start of record to beginning of '
|
|
||||||
'last starting section: %d'%(arg))
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -1124,7 +1141,7 @@ class TBSIndexing(object): # {{{
|
|||||||
elif flags == 0:
|
elif flags == 0:
|
||||||
byts = tbs_type_6(byts, psi=psi)
|
byts = tbs_type_6(byts, psi=psi)
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unkown flags: %d'%flags)
|
raise ValueError('Unknown flags: %d'%flags)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
return byts, ans
|
return byts, ans
|
||||||
|
@ -39,7 +39,7 @@ def encode_number_as_hex(num):
|
|||||||
The bytes that follow are simply the hexadecimal representation of the
|
The bytes that follow are simply the hexadecimal representation of the
|
||||||
number.
|
number.
|
||||||
'''
|
'''
|
||||||
num = bytes(hex(num)[2:])
|
num = bytes(hex(num)[2:].upper())
|
||||||
ans = bytearray(num)
|
ans = bytearray(num)
|
||||||
ans.insert(0, len(num))
|
ans.insert(0, len(num))
|
||||||
return bytes(ans)
|
return bytes(ans)
|
||||||
|
@ -179,7 +179,27 @@ class IndexEntry(object): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class Indexer(object):
|
class TBS(object): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
Take the list of index nodes starting/ending on a record and calculate the
|
||||||
|
trailing byte sequence for the record.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, data, is_periodical):
|
||||||
|
if is_periodical:
|
||||||
|
self.periodical_tbs(data)
|
||||||
|
else:
|
||||||
|
self.book_tbs(data)
|
||||||
|
|
||||||
|
def periodical_tbs(self, data):
|
||||||
|
self.bytestring = b''
|
||||||
|
|
||||||
|
def book_tbs(self, data):
|
||||||
|
self.bytestring = b''
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class Indexer(object): # {{{
|
||||||
|
|
||||||
def __init__(self, serializer, number_of_text_records,
|
def __init__(self, serializer, number_of_text_records,
|
||||||
size_of_last_text_record, opts, oeb):
|
size_of_last_text_record, opts, oeb):
|
||||||
@ -211,6 +231,8 @@ class Indexer(object):
|
|||||||
self.records.insert(0, self.create_header())
|
self.records.insert(0, self.create_header())
|
||||||
self.records.extend(self.cncx.records)
|
self.records.extend(self.cncx.records)
|
||||||
|
|
||||||
|
self.calculate_trailing_byte_sequences()
|
||||||
|
|
||||||
def create_index_record(self): # {{{
|
def create_index_record(self): # {{{
|
||||||
header_length = 192
|
header_length = 192
|
||||||
buf = StringIO()
|
buf = StringIO()
|
||||||
@ -524,3 +546,38 @@ class Indexer(object):
|
|||||||
return indices
|
return indices
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
def calculate_trailing_byte_sequences(self):
|
||||||
|
self.tbs_map = {}
|
||||||
|
for i in xrange(self.number_of_text_records):
|
||||||
|
offset = i * RECORD_SIZE
|
||||||
|
next_offset = offset + RECORD_SIZE
|
||||||
|
data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]),
|
||||||
|
('spans', None)])
|
||||||
|
for index in self.indices:
|
||||||
|
if index.offset >= next_offset:
|
||||||
|
# Node starts after current record
|
||||||
|
break
|
||||||
|
if index.next_offset <= offset:
|
||||||
|
# Node ends before current record
|
||||||
|
continue
|
||||||
|
if index.offset >= offset:
|
||||||
|
# Node starts in current record
|
||||||
|
if index.next_offset <= next_offset:
|
||||||
|
# Node ends in current record
|
||||||
|
data['completes'].append(index)
|
||||||
|
else:
|
||||||
|
data['starts'].append(index)
|
||||||
|
else:
|
||||||
|
# Node starts before current records
|
||||||
|
if index.next_offset <= next_offset:
|
||||||
|
# Node ends in current record
|
||||||
|
data['ends'].append(index)
|
||||||
|
else:
|
||||||
|
data['spans'] = index
|
||||||
|
self.tbs_map[i+1] = TBS(data, self.is_periodical)
|
||||||
|
|
||||||
|
def get_trailing_byte_sequence(self, num):
|
||||||
|
return self.tbs_map[num].bytestring
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
@ -95,7 +95,13 @@ class MobiWriter(object):
|
|||||||
self.log.exception('Failed to generate MOBI index:')
|
self.log.exception('Failed to generate MOBI index:')
|
||||||
else:
|
else:
|
||||||
self.primary_index_record_idx = len(self.records)
|
self.primary_index_record_idx = len(self.records)
|
||||||
|
for i in xrange(len(self.records)):
|
||||||
|
if i == 0: continue
|
||||||
|
tbs = self.indexer.get_trailing_byte_sequence(i)
|
||||||
|
self.records[i] += tbs
|
||||||
self.records.extend(self.indexer.records)
|
self.records.extend(self.indexer.records)
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def write_uncrossable_breaks(self): # {{{
|
def write_uncrossable_breaks(self): # {{{
|
||||||
|
@ -141,7 +141,8 @@ class BaseJob(object):
|
|||||||
def log_file(self):
|
def log_file(self):
|
||||||
if self.log_path:
|
if self.log_path:
|
||||||
return open(self.log_path, 'rb')
|
return open(self.log_path, 'rb')
|
||||||
return cStringIO.StringIO(_('No details available.'))
|
return cStringIO.StringIO(_('No details available.').encode('utf-8',
|
||||||
|
'replace'))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def details(self):
|
def details(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user