Sync to trunk.

2025-11-21 14:03:03 -05:00 · 2011-07-25 18:20:28 -04:00 · 2011-07-25 18:20:28 -04:00 · 3d24e1b87d
commit 3d24e1b87d
parent 1e1562495d 0c5a37fbc0
7 changed files with 125 additions and 39 deletions
--- a/recipes/instapaper.recipe
+++ b/recipes/instapaper.recipe
@ -43,7 +43,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
-            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll('div', attrs={'class':'cornerControls'}):
@ -63,3 +63,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
    def populate_article_metadata(self, article, soup, first):
        article.title  = soup.find('title').contents[0].strip()

+    def postprocess_html(self, soup, first_fetch):
+        for link_tag in soup.findAll(attrs={"id" : "story"}):
+            link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
+
+        return soup
--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -151,7 +151,7 @@ class ISBNDB(Source):

        bl = feed.find('BookList')
        if bl is None:
-            err = tostring(etree.find('errormessage'))
+            err = tostring(feed.find('errormessage'))
            raise ValueError('ISBNDb query failed:' + err)
        total_results = int(bl.get('total_results'))
        shown_results = int(bl.get('shown_results'))
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict
 from calibre.utils.date import utc_tz
 from calibre.ebooks.mobi.langcodes import main_language, sub_language
 from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
-        get_trailing_data)
+        get_trailing_data, decode_fvwi)
 from calibre.utils.magick.draw import identify_data

 # PalmDB {{{
@ -964,7 +964,8 @@ class TBSIndexing(object): # {{{
            byts = byts[consumed:]
            ans.append('Unknown (vwi: always 0?): %d'%arg1)
            if self.doc_type in (257, 259): # Hierarchical periodical
-                byts, a = self.interpret_periodical(tbs_type, byts)
+                byts, a = self.interpret_periodical(tbs_type, byts,
+                        dat['geom'][0])
                ans += a
            if byts:
                sbyts = tuple(hex(b)[2:] for b in byts)
@ -973,7 +974,7 @@ class TBSIndexing(object): # {{{
        ans.append('')
        return tbs_type, ans

-    def interpret_periodical(self, tbs_type, byts):
+    def interpret_periodical(self, tbs_type, byts, record_offset):
        ans = []

        def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{
@ -1014,6 +1015,50 @@ class TBSIndexing(object): # {{{

        # }}}

+        def read_section_transitions(byts, psi=None): # {{{
+            if psi is None:
+                # Assume parent section is 1
+                psi = self.get_index(1)
+
+            while byts:
+                ai, flags, consumed = decode_fvwi(byts)
+                byts = byts[consumed:]
+                if flags & 0b1000:
+                    nsi = self.get_index(psi.index+1)
+                    ans.append('Last article in this record of section %d'
+                            ' (relative to next section index [%d]): '
+                            '%d [%d absolute index]'%(psi.index, nsi.index, ai,
+                                ai+nsi.index))
+                    psi = nsi
+                    continue
+
+                ans.append('First article in this record of section %d'
+                        ' (relative to its parent section): '
+                        '%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
+
+                if flags == 0:
+                    ans.append('The section %d has only one article'
+                            ' in this record'%psi.index)
+                    continue
+
+                if flags & 0b0100:
+                    num = byts[0]
+                    byts = byts[1:]
+                    ans.append('Number of articles in this record of '
+                        'section %d: %d'%(psi.index, num))
+
+                if flags & 0b0010:
+                    raise ValueError(
+                            'Dont know how to interpret the 0b0010 flag')
+
+                if flags & 0b0001:
+                    arg, consumed = decint(byts)
+                    byts = byts[consumed:]
+                    ans.append('->Offset to start of next section (%d) from start'
+                            ' of record: %d [%d absolute offset]'%(psi.index+1,
+                                arg, arg+record_offset))
+        # }}}
+
        if tbs_type == 3: # {{{
            arg2, consumed = decint(byts)
            byts = byts[consumed:]
@ -1025,7 +1070,7 @@ class TBSIndexing(object): # {{{
            flags = arg3 & 0b1111
            ans.append('First section index (fvwi): %d'%fsi)
            psi = self.get_index(fsi)
-            ans.append('Flags (flag: always 0?): %d'%flags)
+            ans.append('Flags: %d'%flags)
            if flags == 4:
                ans.append('Number of articles in this section: %d'%byts[0])
                byts = byts[1:]
@ -1033,35 +1078,7 @@ class TBSIndexing(object): # {{{
                pass
            else:
                raise ValueError('Unknown flags value: %d'%flags)
-
-
-            if byts:
-                byts = tbs_type_6(byts, psi=psi,
-                    msg=('First article of ending section, relative to its'
-                    ' parent\'s index'),
-                    fmsg=('->Offset from start of record to beginning of'
-                        ' last starting section'))
-            while byts:
-                # We have a transition not just an opening first section
-                psi = self.get_index(psi.index+1)
-                arg, consumed = decint(byts)
-                off = arg >> 4
-                byts = byts[consumed:]
-                flags = arg & 0b1111
-                ans.append('Last article of ending section w.r.t. starting'
-                        ' section offset (fvwi): %d [%d absolute]'%(off,
-                            psi.index+off))
-                ans.append('Flags (always 8?): %d'%flags)
-                byts = tbs_type_6(byts, psi=psi)
-                if byts:
-                    # Ended with flag 1,and not EOF, which means there's
-                    # another section transition in this record
-                    arg, consumed = decint(byts)
-                    byts = byts[consumed:]
-                    ans.append('->Offset from start of record to beginning of '
-                            'last starting section: %d'%(arg))
-                else:
-                    break
+            byts = read_section_transitions(byts, psi)

            # }}}

@ -1124,7 +1141,7 @@ class TBSIndexing(object): # {{{
            elif flags == 0:
                byts = tbs_type_6(byts, psi=psi)
            else:
-                raise ValueError('Unkown flags: %d'%flags)
+                raise ValueError('Unknown flags: %d'%flags)
        # }}}

        return byts, ans
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@ -39,7 +39,7 @@ def encode_number_as_hex(num):
    The bytes that follow are simply the hexadecimal representation of the
    number.
    '''
-    num = bytes(hex(num)[2:])
+    num = bytes(hex(num)[2:].upper())
    ans = bytearray(num)
    ans.insert(0, len(num))
    return bytes(ans)
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@ -179,7 +179,27 @@ class IndexEntry(object): # {{{

 # }}}

-class Indexer(object):
+class TBS(object): # {{{
+
+    '''
+    Take the list of index nodes starting/ending on a record and calculate the
+    trailing byte sequence for the record.
+    '''
+
+    def __init__(self, data, is_periodical):
+        if is_periodical:
+            self.periodical_tbs(data)
+        else:
+            self.book_tbs(data)
+
+    def periodical_tbs(self, data):
+        self.bytestring = b''
+
+    def book_tbs(self, data):
+        self.bytestring = b''
+# }}}
+
+class Indexer(object): # {{{

    def __init__(self, serializer, number_of_text_records,
            size_of_last_text_record, opts, oeb):
@ -211,6 +231,8 @@ class Indexer(object):
        self.records.insert(0, self.create_header())
        self.records.extend(self.cncx.records)

+        self.calculate_trailing_byte_sequences()
+
    def create_index_record(self): # {{{
        header_length = 192
        buf = StringIO()
@ -524,3 +546,38 @@ class Indexer(object):
        return indices
    # }}}

+    def calculate_trailing_byte_sequences(self):
+        self.tbs_map = {}
+        for i in xrange(self.number_of_text_records):
+            offset = i * RECORD_SIZE
+            next_offset = offset + RECORD_SIZE
+            data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]),
+                ('spans', None)])
+            for index in self.indices:
+                if index.offset >= next_offset:
+                    # Node starts after current record
+                    break
+                if index.next_offset <= offset:
+                    # Node ends before current record
+                    continue
+                if index.offset >= offset:
+                    # Node starts in current record
+                    if index.next_offset <= next_offset:
+                        # Node ends in current record
+                        data['completes'].append(index)
+                    else:
+                        data['starts'].append(index)
+                else:
+                    # Node starts before current records
+                    if index.next_offset <= next_offset:
+                        # Node ends in current record
+                        data['ends'].append(index)
+                    else:
+                        data['spans'] = index
+            self.tbs_map[i+1] = TBS(data, self.is_periodical)
+
+    def get_trailing_byte_sequence(self, num):
+        return self.tbs_map[num].bytestring
+
+# }}}
+
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -95,7 +95,13 @@ class MobiWriter(object):
            self.log.exception('Failed to generate MOBI index:')
        else:
            self.primary_index_record_idx = len(self.records)
+            for i in xrange(len(self.records)):
+                if i == 0: continue
+                tbs = self.indexer.get_trailing_byte_sequence(i)
+                self.records[i] += tbs
            self.records.extend(self.indexer.records)
+
+
    # }}}

    def write_uncrossable_breaks(self): # {{{
--- a/src/calibre/utils/ipc/job.py
+++ b/src/calibre/utils/ipc/job.py
@ -141,7 +141,8 @@ class BaseJob(object):
    def log_file(self):
        if self.log_path:
            return open(self.log_path, 'rb')
-        return cStringIO.StringIO(_('No details available.'))
+        return cStringIO.StringIO(_('No details available.').encode('utf-8',
+            'replace'))

    @property
    def details(self):