diff --git a/recipes/instapaper.recipe b/recipes/instapaper.recipe index c6175a783f..d182e556a2 100644 --- a/recipes/instapaper.recipe +++ b/recipes/instapaper.recipe @@ -43,7 +43,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe): lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll('div', attrs={'class':'cornerControls'}): @@ -63,3 +63,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe): def populate_article_metadata(self, article, soup, first): article.title = soup.find('title').contents[0].strip() + def postprocess_html(self, soup, first_fetch): + for link_tag in soup.findAll(attrs={"id" : "story"}): + link_tag.insert(0,'

'+soup.find('title').contents[0].strip()+'

') + + return soup diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py index b33a625ca7..31c5e69d65 100644 --- a/src/calibre/ebooks/metadata/sources/isbndb.py +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -151,7 +151,7 @@ class ISBNDB(Source): bl = feed.find('BookList') if bl is None: - err = tostring(etree.find('errormessage')) + err = tostring(feed.find('errormessage')) raise ValueError('ISBNDb query failed:' + err) total_results = int(bl.get('total_results')) shown_results = int(bl.get('shown_results')) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index a848f11355..b85d73f55c 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict from calibre.utils.date import utc_tz from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.mobi.utils import (decode_hex_number, decint, - get_trailing_data) + get_trailing_data, decode_fvwi) from calibre.utils.magick.draw import identify_data # PalmDB {{{ @@ -964,7 +964,8 @@ class TBSIndexing(object): # {{{ byts = byts[consumed:] ans.append('Unknown (vwi: always 0?): %d'%arg1) if self.doc_type in (257, 259): # Hierarchical periodical - byts, a = self.interpret_periodical(tbs_type, byts) + byts, a = self.interpret_periodical(tbs_type, byts, + dat['geom'][0]) ans += a if byts: sbyts = tuple(hex(b)[2:] for b in byts) @@ -973,7 +974,7 @@ class TBSIndexing(object): # {{{ ans.append('') return tbs_type, ans - def interpret_periodical(self, tbs_type, byts): + def interpret_periodical(self, tbs_type, byts, record_offset): ans = [] def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{ @@ -1014,6 +1015,50 @@ class TBSIndexing(object): # {{{ # }}} + def read_section_transitions(byts, psi=None): # {{{ + if psi is None: + # Assume parent section is 1 + psi = self.get_index(1) + + while byts: + ai, flags, consumed = decode_fvwi(byts) + byts = byts[consumed:] + if flags & 0b1000: + nsi = self.get_index(psi.index+1) + ans.append('Last article in this record of section %d' + ' (relative to next section index [%d]): ' + '%d [%d absolute index]'%(psi.index, nsi.index, ai, + ai+nsi.index)) + psi = nsi + continue + + ans.append('First article in this record of section %d' + ' (relative to its parent section): ' + '%d [%d absolute index]'%(psi.index, ai, ai+psi.index)) + + if flags == 0: + ans.append('The section %d has only one article' + ' in this record'%psi.index) + continue + + if flags & 0b0100: + num = byts[0] + byts = byts[1:] + ans.append('Number of articles in this record of ' + 'section %d: %d'%(psi.index, num)) + + if flags & 0b0010: + raise ValueError( + 'Dont know how to interpret the 0b0010 flag') + + if flags & 0b0001: + arg, consumed = decint(byts) + byts = byts[consumed:] + ans.append('->Offset to start of next section (%d) from start' + ' of record: %d [%d absolute offset]'%(psi.index+1, + arg, arg+record_offset)) + # }}} + if tbs_type == 3: # {{{ arg2, consumed = decint(byts) byts = byts[consumed:] @@ -1025,7 +1070,7 @@ class TBSIndexing(object): # {{{ flags = arg3 & 0b1111 ans.append('First section index (fvwi): %d'%fsi) psi = self.get_index(fsi) - ans.append('Flags (flag: always 0?): %d'%flags) + ans.append('Flags: %d'%flags) if flags == 4: ans.append('Number of articles in this section: %d'%byts[0]) byts = byts[1:] @@ -1033,35 +1078,7 @@ class TBSIndexing(object): # {{{ pass else: raise ValueError('Unknown flags value: %d'%flags) - - - if byts: - byts = tbs_type_6(byts, psi=psi, - msg=('First article of ending section, relative to its' - ' parent\'s index'), - fmsg=('->Offset from start of record to beginning of' - ' last starting section')) - while byts: - # We have a transition not just an opening first section - psi = self.get_index(psi.index+1) - arg, consumed = decint(byts) - off = arg >> 4 - byts = byts[consumed:] - flags = arg & 0b1111 - ans.append('Last article of ending section w.r.t. starting' - ' section offset (fvwi): %d [%d absolute]'%(off, - psi.index+off)) - ans.append('Flags (always 8?): %d'%flags) - byts = tbs_type_6(byts, psi=psi) - if byts: - # Ended with flag 1,and not EOF, which means there's - # another section transition in this record - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('->Offset from start of record to beginning of ' - 'last starting section: %d'%(arg)) - else: - break + byts = read_section_transitions(byts, psi) # }}} @@ -1124,7 +1141,7 @@ class TBSIndexing(object): # {{{ elif flags == 0: byts = tbs_type_6(byts, psi=psi) else: - raise ValueError('Unkown flags: %d'%flags) + raise ValueError('Unknown flags: %d'%flags) # }}} return byts, ans diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index cf03c613f4..ae1241e2f1 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -39,7 +39,7 @@ def encode_number_as_hex(num): The bytes that follow are simply the hexadecimal representation of the number. ''' - num = bytes(hex(num)[2:]) + num = bytes(hex(num)[2:].upper()) ans = bytearray(num) ans.insert(0, len(num)) return bytes(ans) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 501b23113f..41c5d2ec91 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -179,7 +179,27 @@ class IndexEntry(object): # {{{ # }}} -class Indexer(object): +class TBS(object): # {{{ + + ''' + Take the list of index nodes starting/ending on a record and calculate the + trailing byte sequence for the record. + ''' + + def __init__(self, data, is_periodical): + if is_periodical: + self.periodical_tbs(data) + else: + self.book_tbs(data) + + def periodical_tbs(self, data): + self.bytestring = b'' + + def book_tbs(self, data): + self.bytestring = b'' +# }}} + +class Indexer(object): # {{{ def __init__(self, serializer, number_of_text_records, size_of_last_text_record, opts, oeb): @@ -211,6 +231,8 @@ class Indexer(object): self.records.insert(0, self.create_header()) self.records.extend(self.cncx.records) + self.calculate_trailing_byte_sequences() + def create_index_record(self): # {{{ header_length = 192 buf = StringIO() @@ -524,3 +546,38 @@ class Indexer(object): return indices # }}} + def calculate_trailing_byte_sequences(self): + self.tbs_map = {} + for i in xrange(self.number_of_text_records): + offset = i * RECORD_SIZE + next_offset = offset + RECORD_SIZE + data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]), + ('spans', None)]) + for index in self.indices: + if index.offset >= next_offset: + # Node starts after current record + break + if index.next_offset <= offset: + # Node ends before current record + continue + if index.offset >= offset: + # Node starts in current record + if index.next_offset <= next_offset: + # Node ends in current record + data['completes'].append(index) + else: + data['starts'].append(index) + else: + # Node starts before current records + if index.next_offset <= next_offset: + # Node ends in current record + data['ends'].append(index) + else: + data['spans'] = index + self.tbs_map[i+1] = TBS(data, self.is_periodical) + + def get_trailing_byte_sequence(self, num): + return self.tbs_map[num].bytestring + +# }}} + diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index a031e2e957..06572f48c4 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -95,7 +95,13 @@ class MobiWriter(object): self.log.exception('Failed to generate MOBI index:') else: self.primary_index_record_idx = len(self.records) + for i in xrange(len(self.records)): + if i == 0: continue + tbs = self.indexer.get_trailing_byte_sequence(i) + self.records[i] += tbs self.records.extend(self.indexer.records) + + # }}} def write_uncrossable_breaks(self): # {{{ diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py index f4b54aee95..e75884d387 100644 --- a/src/calibre/utils/ipc/job.py +++ b/src/calibre/utils/ipc/job.py @@ -141,7 +141,8 @@ class BaseJob(object): def log_file(self): if self.log_path: return open(self.log_path, 'rb') - return cStringIO.StringIO(_('No details available.')) + return cStringIO.StringIO(_('No details available.').encode('utf-8', + 'replace')) @property def details(self):