sync to trunc

2025-08-11 09:13:57 -04:00 · 2011-07-22 21:01:20 +03:00 · 2011-07-22 21:01:20 +03:00 · 1d55467a48
commit 1d55467a48
parent f45c317a69 6f8a5ecd0e
12 changed files with 690 additions and 103 deletions
--- a/recipes/idg_se.recipe
+++ b/recipes/idg_se.recipe
@ -0,0 +1,33 @@
 __license__ = 'GPLv3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class IDGse(BasicNewsRecipe):
    title               = 'IDG'
    description = 'IDG.se'
    language = 'se'
    __author__ = 'zapt0'
    oldest_article = 1
    max_articles_per_feed = 40
    no_stylesheets = True
    encoding = 'ISO-8859-1'
    remove_javascript = True
    feeds          = [(u'Senaste nytt',u'http://feeds.idg.se/idg/vzzs')]
    def print_version(self,url):
            return url + '?articleRenderMode=print&m=print'
    def get_cover_url(this):
        return 'http://idgmedia.idg.se/polopoly_fs/2.3275!images/idgmedia_logo_75.jpg'
    keep_only_tags = [
                                            dict(name='h1'),
                                            dict(name='div', attrs={'class':['divColumn1Article']}),
                                            ]
    #remove ads
    remove_tags = [
                                    dict(name='div', attrs={'id':['preamble_ad']}),
                                    dict(name='ul', attrs={'class':['share']})
                                ]
--- a/recipes/united_daily.recipe
+++ b/recipes/united_daily.recipe
@ -64,7 +64,7 @@ class UnitedDaily(BasicNewsRecipe):
    __author__ = 'Eddie Lau'
    __version__ = '1.1'
-    language = 'zh-TW'
+    language = 'zh_TW'
    publisher = 'United Daily News Group'
    description = 'United Daily (Taiwan)'
    category = 'News, Chinese, Taiwan'
--- a/recipes/utrinski.recipe
+++ b/recipes/utrinski.recipe
@ -0,0 +1,71 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
 '''
 utrinski.com.mk
 '''
 import re
 import datetime
 from calibre.web.feeds.news import BasicNewsRecipe
 class UtrinskiVesnik(BasicNewsRecipe):
    __author__            = 'Darko Spasovski'
    INDEX                 = 'http://www.utrinski.com.mk/'
    title                 = 'Utrinski Vesnik'
    description           = 'Daily Macedonian newspaper'
    masthead_url          = 'http://www.utrinski.com.mk/images/LogoTop.jpg'
    language              = 'mk'
    remove_javascript     = True
    publication_type      = 'newspaper'
    category              = 'news, Macedonia'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    preprocess_regexps    = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
    [
        ## Remove anything before the start of the article.
        (r'<body.*?Article start-->', lambda match: '<body>'),
        ## Remove anything after the end of the article.
        (r'<!--Article end.*?</body>', lambda match : '</body>'),
        ]
    ]
    extra_css             = """
                                body{font-family: Arial,Helvetica,sans-serif}
                                .WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
                            """
    conversion_options = {
                          'comment'  : description,
                          'tags'     : category,
                          'language' : language,
                          'linearize_tables' : True
                        }
    def parse_index(self):
        soup = self.index_to_soup(self.INDEX)
        feeds = []
        for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_TOCTitleBig'}):
            sectionTitle = section.contents[0].string
            tocItemTable = section.findAllPrevious('table')[1]
            if tocItemTable is None: continue
            articles = []
            while True:
                tocItemTable = tocItemTable.nextSibling
                if tocItemTable is None: break
                article = tocItemTable.findAll('a', attrs={'class': 'WB_UTRINSKIVESNIK_TocItem'})
                if len(article)==0: break
                title = self.tag_to_string(article[0], use_alt=True).strip()
                articles.append({'title': title, 'url':'http://www.utrinski.com.mk/' + article[0]['href'], 'description':'', 'date':''})
            if articles:
                feeds.append((sectionTitle, articles))
        return feeds
    def get_cover_url(self):
        datum = datetime.datetime.today().strftime('%d_%m_%Y')
        return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -47,10 +47,12 @@ class ANDROID(USBMS):
            # Google
            0x18d1 : {
                0x0001 : [0x0223],
                0x4e11 : [0x0100, 0x226, 0x227],
-                0x4e12: [0x0100, 0x226, 0x227],
+                0x4e12 : [0x0100, 0x226, 0x227],
-                0x4e21: [0x0100, 0x226, 0x227],
+                0x4e21 : [0x0100, 0x226, 0x227],
-                0xb058: [0x0222, 0x226, 0x227]},
+                0xb058 : [0x0222, 0x226, 0x227]
            },
            # Samsung
            0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -35,9 +35,9 @@ class EB600(USBMS):
    PRODUCT_ID  = [0x1688]
    BCD         = [0x110]
-    VENDOR_NAME      = ['NETRONIX', 'WOLDER']
+    VENDOR_NAME      = ['NETRONIX', 'WOLDER', 'MD86371']
-    WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2']
+    WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2', 'MD86371']
-    WINDOWS_CARD_A_MEM = 'EBOOK'
+    WINDOWS_CARD_A_MEM = ['EBOOK', 'MD86371']
    OSX_MAIN_MEM = 'EB600 Internal Storage Media'
    OSX_CARD_A_MEM = 'EB600 Card Storage Media'
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -8,10 +8,10 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import struct, datetime, sys, os, shutil
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from calibre.utils.date import utc_tz
 from calibre.ebooks.mobi.langcodes import main_language, sub_language
-from calibre.ebooks.mobi.writer2.utils import (decode_hex_number, decint,
+from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
        get_trailing_data)
 from calibre.utils.magick.draw import identify_data
@ -530,21 +530,21 @@ class Tag(object): # {{{
            },
            'chapter_with_subchapters' : {
-                    22 : ('First subchapter index', 'first_subchapter_index'),
+                    22 : ('First subchapter index', 'first_child_index'),
-                    23 : ('Last subchapter index', 'last_subchapter_index'),
+                    23 : ('Last subchapter index', 'last_child_index'),
            },
            'periodical' : {
                    5  : ('Class offset in cncx', 'class_offset'),
-                    22 : ('First section index', 'first_section_index'),
+                    22 : ('First section index', 'first_child_index'),
-                    23 : ('Last section index', 'last_section_index'),
+                    23 : ('Last section index', 'last_child_index'),
            },
            'section' : {
                    5  : ('Class offset in cncx', 'class_offset'),
-                    21 : ('Periodical index', 'periodical_index'),
+                    21 : ('Periodical index', 'parent_index'),
-                    22 : ('First article index', 'first_article_index'),
+                    22 : ('First article index', 'first_child_index'),
-                    23 : ('Last article index', 'last_article_index'),
+                    23 : ('Last article index', 'last_child_index'),
            },
    }
@ -625,11 +625,56 @@ class IndexEntry(object): # {{{
                return tag.cncx_value
        return ''
    @property
    def offset(self):
        for tag in self.tags:
            if tag.attr == 'offset':
                return tag.value
        return 0
    @property
    def size(self):
        for tag in self.tags:
            if tag.attr == 'size':
                return tag.value
        return 0
    @property
    def depth(self):
        for tag in self.tags:
            if tag.attr == 'depth':
                return tag.value
        return 0
    @property
    def parent_index(self):
        for tag in self.tags:
            if tag.attr == 'parent_index':
                return tag.value
        return -1
    @property
    def first_child_index(self):
        for tag in self.tags:
            if tag.attr == 'first_child_index':
                return tag.value
        return -1
    @property
    def last_child_index(self):
        for tag in self.tags:
            if tag.attr == 'last_child_index':
                return tag.value
        return -1
    def __str__(self):
        ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%(
            self.index, self.entry_type, len(self.tags))]
        for tag in self.tags:
            ans.append('\t'+str(tag))
        if self.first_child_index != -1:
            ans.append('\tNumber of children: %d'%(self.last_child_index -
                self.first_child_index + 1))
        return '\n'.join(ans)
 # }}}
@ -679,6 +724,15 @@ class IndexRecord(object): # {{{
            entry_type = ord(indxt[off+consumed])
            self.indices.append(IndexEntry(index, entry_type,
                indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries))
            index = self.indices[-1]
    def get_parent(self, index):
        if index.depth < 1:
            return None
        parent_depth = index.depth - 1
        for p in self.indices:
            if p.depth != parent_depth:
                continue
    def __str__(self):
@ -738,8 +792,7 @@ class CNCX(object) : # {{{
 class TextRecord(object): # {{{
-    def __init__(self, idx, record, extra_data_flags, decompress, index_record,
+    def __init__(self, idx, record, extra_data_flags, decompress):
            doc_type):
        self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
        self.raw = decompress(self.raw)
        if 0 in self.trailing_data:
@ -751,60 +804,6 @@ class TextRecord(object): # {{{
        self.idx = idx
        if 'indexing' in self.trailing_data and index_record is not None:
            self.interpret_indexing(doc_type, index_record.indices)
    def interpret_indexing(self, doc_type, indices):
        raw = self.trailing_data['indexing']
        ident, consumed = decint(raw)
        raw = raw[consumed:]
        entry_type = ident & 0b111
        index_entry_idx = ident >> 3
        index_entry = None
        for i in indices:
            if i.index == index_entry_idx:
                index_entry = i.label
                break
        self.trailing_data['interpreted_indexing'] = (
                'Type: %s, Index Entry: %s'%(entry_type, index_entry))
        if doc_type == 2: # Book
            self.interpret_book_indexing(raw, entry_type)
    def interpret_book_indexing(self, raw, entry_type):
        arg1, consumed = decint(raw)
        raw = raw[consumed:]
        if arg1 != 0:
            raise ValueError('TBS index entry has unknown arg1: %d'%
                    arg1)
        if entry_type == 2:
            desc = ('This record has only a single starting or a single'
                    ' ending point')
            if raw:
                raise ValueError('TBS index entry has unknown extra bytes:'
                        ' %r'%raw)
        elif entry_type == 3:
            desc = ('This record is spanned by a single node (i.e. it'
                    ' has no start or end points)')
            arg2, consumed = decint(raw)
            if arg2 != 0:
                raise ValueError('TBS index entry has unknown arg2: %d'%
                        arg2)
        elif entry_type == 6:
            if len(raw) != 1:
                raise ValueError('TBS index entry has unknown extra bytes:'
                        ' %r'%raw)
            num = ord(raw[0])
            # An unmatched starting or ending point each contributes 1 to
            # this count. A matched pair of starting and ending points
            # together contribute 1 to this count. Note that you can only
            # ever have either 1 unmatched start point or 1 unmatched end
            # point, never both (logically impossible).
            desc = ('This record has %d starting/ending points and/or complete'
                    ' nodes.')%num
        else:
            raise ValueError('Unknown TBS index entry type: %d for book'%entry_type)
        self.trailing_data['interpreted_indexing'] += ' :: ' + desc
    def dump(self, folder):
        name = '%06d'%self.idx
        with open(os.path.join(folder, name+'.txt'), 'wb') as f:
@ -848,6 +847,231 @@ class BinaryRecord(object): # {{{
 # }}}
 class TBSIndexing(object): # {{{
    def __init__(self, text_records, indices, doc_type):
        self.record_indices = OrderedDict()
        self.doc_type = doc_type
        self.indices = indices
        pos = 0
        for r in text_records:
            start = pos
            pos += len(r.raw)
            end = pos - 1
            self.record_indices[r] = x = {'starts':[], 'ends':[],
                    'complete':[], 'geom': (start, end)}
            for entry in indices:
                istart, sz = entry.offset, entry.size
                iend = istart + sz - 1
                has_start = istart >= start and istart <= end
                has_end = iend >= start and iend <= end
                rec = None
                if has_start and has_end:
                    rec = 'complete'
                elif has_start and not has_end:
                    rec = 'starts'
                elif not has_start and has_end:
                    rec = 'ends'
                if rec:
                    x[rec].append(entry)
    def get_index(self, idx):
        for i in self.indices:
            if i.index == idx: return i
        raise IndexError('Index %d not found'%idx)
    def __str__(self):
        ans = ['*'*20 + ' TBS Indexing (%d records) '%len(self.record_indices)+ '*'*20]
        for r, dat in self.record_indices.iteritems():
            ans += self.dump_record(r, dat)[-1]
        return '\n'.join(ans)
    def dump(self, bdir):
        types = defaultdict(list)
        for r, dat in self.record_indices.iteritems():
            tbs_type, strings = self.dump_record(r, dat)
            if tbs_type == 0: continue
            types[tbs_type] += strings
        for typ, strings in types.iteritems():
            with open(os.path.join(bdir, 'tbs_type_%d.txt'%typ), 'wb') as f:
                f.write('\n'.join(strings))
    def dump_record(self, r, dat):
        ans = []
        ans.append('\nRecord #%d: Starts at: %d Ends at: %d'%(r.idx,
            dat['geom'][0], dat['geom'][1]))
        s, e, c = dat['starts'], dat['ends'], dat['complete']
        ans.append(('\tContains: %d index entries '
            '(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e,
                c, s))))
        byts = bytearray(r.trailing_data.get('indexing', b''))
        sbyts = tuple(hex(b)[2:] for b in byts)
        ans.append('TBS bytes: %s'%(' '.join(sbyts)))
        for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)):
            if entries:
                ans.append('\t%s:'%typ)
                for x in entries:
                    ans.append(('\t\tIndex Entry: %d (Parent index: %d, '
                            'Depth: %d, Offset: %d, Size: %d) [%s]')%(
                        x.index, x.parent_index, x.depth, x.offset, x.size, x.label))
        def bin3(num):
            ans = bin(num)[2:]
            return '0'*(3-len(ans)) + ans
        tbs_type = 0
        if len(byts):
            outer, consumed = decint(byts)
            byts = byts[consumed:]
            tbs_type = outer & 0b111
            ans.append('TBS Type: %s (%d)'%(bin3(tbs_type), tbs_type))
            ans.append('Outer Index entry: %d'%(outer >> 3))
            arg1, consumed = decint(byts)
            byts = byts[consumed:]
            ans.append('Unknown (vwi: always 0?): %d'%arg1)
            if self.doc_type in (257, 259): # Hierarchical periodical
                byts, a = self.interpret_periodical(tbs_type, byts)
                ans += a
            if byts:
                sbyts = tuple(hex(b)[2:] for b in byts)
                ans.append('Remaining bytes: %s'%' '.join(sbyts))
        ans.append('')
        return tbs_type, ans
    def interpret_periodical(self, tbs_type, byts):
        ans = []
        def tbs_type_6(byts, psi=None, msg=None): # {{{
            if psi is None:
                # Assume parent section is 1
                psi = self.get_index(1)
            if msg is None:
                msg = ('Article index at start of record or first article'
                    ' index, relative to parent section')
            if byts:
                # byts could be empty
                arg, consumed = decint(byts)
                byts = byts[consumed:]
                flags = (arg & 0b1111)
                ai = (arg >> 4)
                ans.append('%s (fvwi): %d [%d absolute]'%(msg, ai,
                        ai+psi.index))
                if flags == 1:
                    arg, consumed = decint(byts)
                    byts = byts[consumed:]
                    ans.append('EOF (vwi: should be 0): %d'%arg)
                elif flags in (4, 5):
                    num = byts[0]
                    byts = byts[1:]
                    ans.append('Number of article nodes in the record (byte): %d'%num)
                    if flags == 5:
                        arg, consumed = decint(byts)
                        byts = byts[consumed:]
                        ans.append('Unknown ??? (vwi)): %d'%(arg))
                elif flags == 0:
                    pass
                else:
                    raise ValueError('Unknown flags: %d'%flags)
            return byts
        # }}}
        if tbs_type == 3: # {{{
            arg2, consumed = decint(byts)
            byts = byts[consumed:]
            ans.append('Unknown (vwi: always 0?): %d'%arg2)
            arg3, consumed = decint(byts)
            byts = byts[consumed:]
            fsi = arg3 >> 4
            extra = arg3 & 0b1111
            ans.append('First section index (fvwi): %d'%fsi)
            psi = self.get_index(fsi)
            ans.append('Extra bits (flag: always 0?): %d'%extra)
            byts = tbs_type_6(byts, psi=psi,
                    msg=('First article of ending section, relative to its'
                    ' parent\'s index'))
            if byts:
                # We have a transition not just an opening first section
                psi = self.get_index(psi.index+1)
                arg, consumed = decint(byts)
                off = arg >> 4
                byts = byts[consumed:]
                flags = arg & 0b1111
                ans.append('Last article of ending section w.r.t. starting'
                        ' section offset (fvwi): %d [%d absolute]'%(off,
                            psi.index+off))
                ans.append('Flags (always 8?): %d'%flags)
                byts = tbs_type_6(byts, psi=psi)
            # }}}
        elif tbs_type == 7: # {{{
            # This occurs for records that have no section nodes and
            # whose parent section's index == 1
            ans.append('Unknown (maybe vwi?): %r'%bytes(byts[:2]))
            byts = byts[2:]
            arg, consumed = decint(byts)
            byts = byts[consumed:]
            ai = arg >> 4
            flags = arg & 0b1111
            ans.append('Article at start of record (fvwi): %d'%ai)
            if flags == 4:
                num = byts[0]
                byts = byts[1:]
                ans.append('Number of articles in record (byte): %d'%num)
            elif flags == 0:
                pass
            elif flags == 1:
                arg, consumed = decint(byts)
                byts = byts[consumed:]
                ans.append('EOF (vwi: should be 0): %d'%arg)
            else:
                raise ValueError('Unknown flags value: %d'%flags)
        # }}}
        elif tbs_type == 6: # {{{
            # This is used for records spanned by an article whose parent
            # section's index == 1 or for the opening record if it contains the
            # periodical start, section 1 start and at least one article. The
            # two cases are distinguished by the flags on the article index
            # vwi.
            unk = byts[0]
            byts = byts[1:]
            ans.append('Unknown (byte: always 2?): %d'%unk)
            byts = tbs_type_6(byts)
        # }}}
        elif tbs_type == 2: # {{{
            # This occurs for records with no section nodes and whose parent
            # section's index != 1 (undefined (records before the first
            # section) or > 1)
            # This is also used for records that are spanned by an article
            # whose parent section index > 1. In this case the flags of the
            # vwi referring to the article at the start
            # of the record are set to 1 instead of 4.
            arg, consumed = decint(byts)
            byts = byts[consumed:]
            flags = (arg & 0b1111)
            psi = (arg >> 4)
            ans.append('Parent section index (fvwi): %d'%psi)
            psi = self.get_index(psi)
            ans.append('Flags: %d'%flags)
            if flags == 1:
                arg, consumed = decint(byts)
                byts = byts[consumed:]
                ans.append('Unknown (vwi?: always 0?): %d'%arg)
                byts = tbs_type_6(byts, psi=psi)
            elif flags == 0:
                byts = tbs_type_6(byts, psi=psi)
            else:
                raise ValueError('Unkown flags: %d'%flags)
        # }}}
        return byts, ans
 # }}}
 class MOBIFile(object): # {{{
    def __init__(self, stream):
@ -910,8 +1134,7 @@ class MOBIFile(object): # {{{
        if fntbr == 0xffffffff:
            fntbr = len(self.records)
        self.text_records = [TextRecord(r, self.records[r],
-            self.mobi_header.extra_data_flags, decompress, self.index_record,
+            self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
            self.mobi_header.type_raw) for r in xrange(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        for i in xrange(fntbr, len(self.records)):
@ -930,6 +1153,9 @@ class MOBIFile(object): # {{{
            else:
                self.binary_records.append(BinaryRecord(i, r))
        if self.index_record is not None:
            self.tbs_indexing = TBSIndexing(self.text_records,
                    self.index_record.indices, self.mobi_header.type_raw)
    def print_header(self, f=sys.stdout):
        print (str(self.palmdb).encode('utf-8'), file=f)
@ -961,6 +1187,9 @@ def inspect_mobi(path_or_stream, prefix='decompiled'):
            print(str(f.cncx).encode('utf-8'), file=out)
            print('\n\n', file=out)
            print(str(f.index_record), file=out)
        with open(os.path.join(ddir, 'tbs_indexing.txt'), 'wb') as out:
            print(str(f.tbs_indexing), file=out)
        f.tbs_indexing.dump(ddir)
    for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
            ('binary', 'binary_records')]:
--- a/src/calibre/ebooks/mobi/tbs_periodicals.rst
+++ b/src/calibre/ebooks/mobi/tbs_periodicals.rst
@ -0,0 +1,189 @@
 Reverse engineering the trailing byte sequences for hierarchical periodicals
 ===============================================================================
 In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag.
 Opening record
 ----------------
 The text record that contains the opening node for the periodical (depth=0 node in the NCX) can have TBS of 3 different forms:
    1. If it has only the periodical node and no section/article nodes, TBS of type 2, like this::
            Record #1: Starts at: 0 Ends at: 4095
                Contains: 1 index entries (0 ends, 0 complete, 1 starts)
            TBS bytes: 82 80
                Starts:
                    Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 68470) [j_x's Google reader]
            TBS Type: 010 (2)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
    2. A periodical and a section node, but no article nodes, TBS type of 6, like this::
            Record #1: Starts at: 0 Ends at: 4095
                Contains: 2 index entries (0 ends, 0 complete, 2 starts)
            TBS bytes: 86 80 2
                Starts:
                    Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 93254) [j_x's Google reader]
                    Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 541, Size: 49280) [Ars Technica]
            TBS Type: 110 (6)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown (byte: always 2?): 2
    3. If it has both the section 1 node and at least one article node, TBS of type 6, like this::
            Record #1: Starts at: 0 Ends at: 4095
                Contains: 4 index entries (0 ends, 1 complete, 3 starts)
            TBS bytes: 86 80 2 c4 2
                Complete:
                    Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 549, Size: 1866) [Week in gaming: 3DS review, Crysis 2, George Hotz]
                Starts:
                    Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 79253) [j_x's Google reader]
                    Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 541, Size: 35279) [Ars Technica]
                    Index Entry: 6 (Parent index: 1, Depth: 2, Offset: 2415, Size: 2764) [Week in Apple: ZFS on Mac OS X, rogue tethering, DUI apps, and more]
            TBS Type: 110 (6)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown (byte: always 2?): 2
            Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
            Number of article nodes in the record (byte): 2
        If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record.
 Records with no nodes
 ------------------------
 These records are spanned by a single article. They are of two types:
    1. If the parent section index is 1, TBS type of 6, like this::
            Record #4: Starts at: 12288 Ends at: 16383
                Contains: 0 index entries (0 ends, 0 complete, 0 starts)
            TBS bytes: 86 80 2 c1 80
            TBS Type: 110 (6)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown (byte: always 2?): 2
            Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
            EOF (vwi: should be 0): 0
        If the record is before the first article, the TBS bytes would be: 86 80 2
    2. If the parent section index is > 1, TBS type of 2, like this::
            Record #14: Starts at: 53248 Ends at: 57343
                Contains: 0 index entries (0 ends, 0 complete, 0 starts)
            TBS bytes: 82 80 a0 1 e1 80
            TBS Type: 010 (2)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Parent section index (fvwi): 2
            Flags: 0
            Article index at start of record or first article index, relative to parent section (fvwi): 14 [16 absolute]
            EOF (vwi: should be 0): 0
 Records with only article nodes
 -----------------------------------
 Such records have no section transitions (i.e. a section end/section start pair). They have only one or more article nodes. They are of two types:
    1. If the parent section index is 1, TBS type of 7, like this::
            Record #6: Starts at: 20480 Ends at: 24575
                Contains: 2 index entries (1 ends, 0 complete, 1 starts)
            TBS bytes: 87 80 2 80 1 84 2
                Ends:
                    Index Entry: 9 (Parent index: 1, Depth: 2, Offset: 16453, Size: 4199) [Vaccine's success spurs whooping cough comeback]
                Starts:
                    Index Entry: 10 (Parent index: 1, Depth: 2, Offset: 20652, Size: 4246) [Apple's mobile products do not violate Nokia patents, says ITC]
            TBS Type: 111 (7)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Unknown: '\x02\x80' (vwi?: Always 256)
            Article at start of record (fvwi): 8
            Number of articles in record (byte): 2
        If there was only one article in the record, the last two bytes would be replaced by a single byte: 80
        If this record is the first record with an article, then the article at the start of the record should be the last section index. At least, that's what kindlegen does, though if you ask me, it should be the first section index.
    2. If the parent section index is > 1, TBS type of 2, like this::
            Record #16: Starts at: 61440 Ends at: 65535
                Contains: 5 index entries (1 ends, 3 complete, 1 starts)
            TBS bytes: 82 80 a1 80 1 f4 5
                Ends:
                    Index Entry: 17 (Parent index: 2, Depth: 2, Offset: 60920, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
                Complete:
                    Index Entry: 18 (Parent index: 2, Depth: 2, Offset: 62002, Size: 1016) [Rumour: OS X Lion nearing Golden Master stage]
                    Index Entry: 19 (Parent index: 2, Depth: 2, Offset: 63018, Size: 1045) [iOS 4.3.1 released]
                    Index Entry: 20 (Parent index: 2, Depth: 2, Offset: 64063, Size: 972) [Windows 8 'system reset' image leaks]
                Starts:
                    Index Entry: 21 (Parent index: 2, Depth: 2, Offset: 65035, Size: 1057) [Windows Phone 7: Why it's failing]
            TBS Type: 010 (2)
            Outer Index entry: 0
            Unknown (vwi: always 0?): 0
            Parent section index (fvwi) : 2
            Flags: 1
            Unknown (vwi: always 0?): 0
            Article index at start of record or first article index, relative to parent section (fvwi): 15 [17 absolute]
            Number of article nodes in the record (byte): 5
        If there was only one article in the record, the last two bytes would be replaced by a single byte: f0
 Records with a section transition
 -----------------------------------
 In such a record there is a transition from one section to the next. As such the record must have at least one article ending and one article starting, except in the case of the first section.
 TODO: Note you have to test the cases of first section, a single transition and multiple transitions.
    1. The first section::
        Record #2: Starts at: 4096 Ends at: 8191
            Contains: 2 index entries (0 ends, 0 complete, 2 starts)
        TBS bytes: 83 80 80 90 c0
            Starts:
                Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
                Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 7766, Size: 1866) [Week in gaming: 3DS review, Crysis 2, George Hotz]
        TBS Type: 011 (3)
        Outer Index entry: 0
        Unknown (vwi: always 0?): 0
        Unknown (vwi: always 0?): 0
        First section index (fvwi) : 1
        Extra bits: 0
        First section starts
        Article at start of block as offset from parent index (fvwi): 4 [5 absolute]
        Flags: 0
    If there was more than one article at the start then the last byte would be replaced by: c4 n where n is the number of articles
 Ending record
 ----------------
 Logically, ending records must have at least one article ending, one section ending and the periodical ending. They are of TBS type 2, like this::
    Record #17: Starts at: 65536 Ends at: 68684
        Contains: 4 index entries (3 ends, 1 complete, 0 starts)
    TBS bytes: 82 80 c0 4 f4 2
        Ends:
            Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 68470) [j_x's Google reader]
            Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 51234, Size: 17451) [Slashdot]
            Index Entry: 43 (Parent index: 4, Depth: 2, Offset: 65422, Size: 1717) [US ITC May Reverse Judge&#39;s Ruling In Kodak vs. Apple]
        Complete:
            Index Entry: 44 (Parent index: 4, Depth: 2, Offset: 67139, Size: 1546) [Google Starts Testing Google Music Internally]
    TBS Type: 010 (2)
    Outer Index entry: 0
    Unknown (vwi: always 0?): 0
    Parent section index (fvwi): 4
    Flags: 0
    Article at start of block as offset from parent index (fvwi): 39 [43 absolute]
    Number of nodes (byte): 2
 If the record had only a single article end, the last two bytes would be replaced with: f0
--- a/src/calibre/ebooks/mobi/writer2/utils.py
+++ b/src/calibre/ebooks/mobi/writer2/utils.py
@ -79,7 +79,7 @@ def encint(value, forward=True):
 def decint(raw, forward=True):
    '''
-    Read a variable width integer from the bytestring raw and return the
+    Read a variable width integer from the bytestring or bytearray raw and return the
    integer and the number of bytes read. If forward is True bytes are read
    from the start of raw, otherwise from the end of raw.
@ -88,8 +88,10 @@ def decint(raw, forward=True):
    '''
    val = 0
    byts = bytearray()
-    for byte in raw if forward else reversed(raw):
+    src = bytearray(raw)
-        bnum = ord(byte)
+    if not forward:
        src.reverse()
    for bnum in src:
        byts.append(bnum & 0b01111111)
        if bnum & 0b10000000:
            break
@ -161,7 +163,7 @@ def get_trailing_data(record, extra_data_flags):
    '''
    data = OrderedDict()
    for i in xrange(16, -1, -1):
-        flag = 2**i
+        flag = 1 << i # 2**i
        if flag & extra_data_flags:
            if i == 0:
                # Only the first two bits are used for the size since there can
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -18,7 +18,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
-from calibre.ebooks.mobi.writer2.utils import (rescale_image, encint)
+from calibre.ebooks.mobi.utils import (rescale_image, encint)
 EXTH_CODES = {
    'creator': 100,
--- a/src/calibre/translations/msgfmt.py
+++ b/src/calibre/translations/msgfmt.py
@ -1,20 +1,39 @@
 #! /usr/bin/env python
 # Written by Martin v. Loewis <loewis@informatik.hu-berlin.de>
 # Modified by Kovid Goyal <kovid@kovidgoyal.net>
 """Generate binary message catalog from textual translation description.
 This program converts a textual Uniforum-style message catalog (.po file) into
 a binary GNU catalog (.mo file).  This is essentially the same function as the
 GNU msgfmt program, however, it is a simpler implementation.
 Usage: msgfmt.py [OPTIONS] filename.po
 Options:
    -o file
    --output-file=file
        Specify the output file to write to.  If omitted, output will go to a
        file named filename.mo (based off the input file name).
    -h
    --help
        Print this message and exit.
    -V
    --version
        Display version information and exit.
 """
 import sys
 import os
 import getopt
 import struct
 import array
-__version__ = "1.2"
+__version__ = "1.1"
 MESSAGES = {}
 def usage(code, msg=''):
    print >> sys.stderr, __doc__
@ -23,16 +42,16 @@ def usage(code, msg=''):
    sys.exit(code)
-
+def add(id, str, fuzzy):
 def add(id, str, fuzzy, MESSAGES):
    "Add a non-fuzzy translation to the dictionary."
    global MESSAGES
    if not fuzzy and str:
        MESSAGES[id] = str
-
+def generate():
 def generate(MESSAGES):
    "Return the generated output."
    global MESSAGES
    keys = MESSAGES.keys()
    # the keys are sorted in the .mo file
    keys.sort()
@ -44,6 +63,7 @@ def generate(MESSAGES):
        offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
        ids += id + '\0'
        strs += MESSAGES[id] + '\0'
    output = ''
    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
    # the keys start right after the index tables.
    # translated string.
@ -71,9 +91,7 @@ def generate(MESSAGES):
    return output
 def make(filename, outfile):
    MESSAGES = {}
    ID = 1
    STR = 2
@ -101,7 +119,7 @@ def make(filename, outfile):
        lno += 1
        # If we get a comment line after a msgstr, this is a new entry
        if l[0] == '#' and section == STR:
-            add(msgid, msgstr, fuzzy, MESSAGES)
+            add(msgid, msgstr, fuzzy)
            section = None
            fuzzy = 0
        # Record a fuzzy mark
@ -111,16 +129,39 @@ def make(filename, outfile):
        if l[0] == '#':
            continue
        # Now we are in a msgid section, output previous section
-        if l.startswith('msgid'):
+        if l.startswith('msgid') and not l.startswith('msgid_plural'):
            if section == STR:
-                add(msgid, msgstr, fuzzy, MESSAGES)
+                add(msgid, msgstr, fuzzy)
            section = ID
            l = l[5:]
            msgid = msgstr = ''
            is_plural = False
        # This is a message with plural forms
        elif l.startswith('msgid_plural'):
            if section != ID:
                print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\
                    (infile, lno)
                sys.exit(1)
            l = l[12:]
            msgid += '\0' # separator of singular and plural
            is_plural = True
        # Now we are in a msgstr section
        elif l.startswith('msgstr'):
            section = STR
-            l = l[6:]
+            if l.startswith('msgstr['):
                if not is_plural:
                    print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
                        (infile, lno)
                    sys.exit(1)
                l = l.split(']', 1)[1]
                if msgstr:
                    msgstr += '\0' # Separator of the various plural forms
            else:
                if is_plural:
                    print >> sys.stderr, 'indexed msgstr required for plural on  %s:%d' %\
                        (infile, lno)
                    sys.exit(1)
                l = l[6:]
        # Skip empty lines
        l = l.strip()
        if not l:
@ -138,22 +179,40 @@ def make(filename, outfile):
            sys.exit(1)
    # Add last entry
    if section == STR:
-        add(msgid, msgstr, fuzzy, MESSAGES)
+        add(msgid, msgstr, fuzzy)
    # Compute output
-    output = generate(MESSAGES)
+    output = generate()
    outfile.write(output)
 def main():
    try:
-        outfile.write(output)
+        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
-    except IOError,msg:
+                                   ['help', 'version', 'output-file='])
-        print >> sys.stderr, msg
+    except getopt.error, msg:
        usage(1, msg)
    outfile = None
    # parse options
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-V', '--version'):
            print >> sys.stderr, "msgfmt.py", __version__
            sys.exit(0)
        elif opt in ('-o', '--output-file'):
            outfile = arg
    # do it
    if not args:
        print >> sys.stderr, 'No input file given'
        print >> sys.stderr, "Try `msgfmt --help' for more information."
        return
 def main(outfile, args=sys.argv[1:]):
    for filename in args:
        make(filename, outfile)
-    return 0
+
 if __name__ == '__main__':
-    sys.exit(main(sys.stdout))
+    main()
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -71,13 +71,13 @@ def set_translators():
    lang = get_lang()
    if lang:
        buf = iso639 = None
-        if os.access(lang+'.po', os.R_OK):
+        mpath = get_lc_messages_path(lang)
        if mpath and os.access(mpath+'.po', os.R_OK):
            from calibre.translations.msgfmt import make
            buf = cStringIO.StringIO()
-            make(lang+'.po', buf)
+            make(mpath+'.po', buf)
            buf = cStringIO.StringIO(buf.getvalue())
        mpath = get_lc_messages_path(lang)
        if mpath is not None:
            with ZipFile(P('localization/locales.zip',
                allow_user_override=False), 'r') as zf:
--- a/src/calibre/web/feeds/recipes/model.py
+++ b/src/calibre/web/feeds/recipes/model.py
@ -217,6 +217,8 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
            self.all_urns.add(urn)
            if ok(urn):
                lang = x.get('language', 'und')
                if lang:
                    lang = lang.replace('-', '_')
                if lang not in lang_map:
                    lang_map[lang] = factory(NewsCategory, new_root, lang)
                factory(NewsItem, lang_map[lang], urn, x.get('title'))