Sync to trunk.

2025-08-11 09:13:57 -04:00 · 2011-07-24 21:57:20 -04:00 · 2011-07-24 21:57:20 -04:00 · 1e1562495d
commit 1e1562495d
parent 85f859150b 8f40166b9d
15 changed files with 559 additions and 74 deletions
--- a/recipes/corren2.recipe
+++ b/recipes/corren2.recipe
@ -1,39 +1,34 @@
 # -*- coding: utf-8 -*-
 __license__	= 'GPLv3'
 from calibre.web.feeds.news import BasicNewsRecipe
-class AdvancedUserRecipe1255797795(BasicNewsRecipe):
+class AdvancedUserRecipe1311446032(BasicNewsRecipe):
-    title          = u'Corren'
+    title                 = 'Corren'
-    language = 'sv'
+    __author__            = 'Jonas Svensson'
-    __author__ = 'Jonas Svensson'
+    description           = 'News from Sweden'
-    simultaneous_downloads = 1
+    publisher             = 'Corren'
-    no_stylesheets = True
+    category              = 'news, politics, Sweden'
-    oldest_article = 7
+    oldest_article        = 2
    delay                 = 1
    max_articles_per_feed = 100
-    remove_attributes = ['onload']
+    no_stylesheets        = True
-    timefmt = ''
+    use_embedded_content  = False
    encoding              = 'iso-8859-1'
    language              = 'sv'
-    feeds          = [
+    feeds = [
-                   (u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'),
+              (u'Toppnyheter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/')
-                   (u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'),
+              ,(u'Ekonomi', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/')
-                   (u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'),
+              ,(u'Link\xf6ping', u'http://www.corren.se/inc/RssHandler.ashx?id=4122234')
-                   (u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'),
+              ,(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223,4122224,4122226,4122227,4122228,4122229,4122230')
-                   (u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'),
+            ]
                   (u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'),
                   (u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'),
                   (u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'),
                   (u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'),
                   (u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/')
                     ]
    def print_version(self, url):
        url = url.replace("ekonomi/artikel.aspx", "Print.aspx")
        url = url.replace("bostad/artikel.aspx", "Print.aspx")
        url = url.replace("kultur/artikel.aspx", "Print.aspx")
        url = url.replace("motor/artikel.aspx", "Print.aspx")
        url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
        url = url.replace("sport/artikel.aspx", "Print.aspx")
        url = url.replace("asikter/artikel.aspx", "Print.aspx")
        url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
        url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx")
        url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx")
        return url.replace("nyheter/artikel.aspx", "Print.aspx")
    keep_only_tags = [dict(name='div', attrs={'id':'article'}),dict(name='div', attrs={'class':'body'})]
    remove_tags = [
                     dict(name='ul',attrs={'class':'functions'})
                     ,dict(name='a',attrs={'href':'javascript*'})
                     ,dict(name='div',attrs={'class':'box'})
                     ,dict(name='div',attrs={'class':'functionsbottom'})
                  ]
--- a/recipes/dagens_industri.recipe
+++ b/recipes/dagens_industri.recipe
@ -0,0 +1,32 @@
 # -*- coding: utf-8 -*-
 __license__	= 'GPLv3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1311450855(BasicNewsRecipe):
    title          = u'Dagens Industri'
    __author__            = 'Jonas Svensson'
    description           = 'Economy news from Sweden'
    publisher             = 'DI'
    category              = 'news, politics, Sweden'
    oldest_article        = 2
    delay                 = 1
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    language              = 'sv'
    feeds          = [(u'DI', u'http://di.se/rss')]
    keep_only_tags = [dict(name='h1', attrs={'id':'ctl00_ExtraWideContentRegion_WideContentRegion_MainRegion_MainContentRegion_MainBodyRegion_headlineNormal'}),dict(name='div', attrs={'id':'articleBody'})]
    remove_tags = [
                     dict(name='div',attrs={'class':'article-actions clear'})
                     ,dict(name='div',attrs={'class':'article-action-popup'})
                     ,dict(name='div',attrs={'class':'header'})
                     ,dict(name='div',attrs={'class':'content clear'})
                     ,dict(name='div',attrs={'id':'articleAdvertisementDiv'})
                     ,dict(name='ul',attrs={'class':'action-list'})
                  ]
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -12,7 +12,7 @@ from datetime import date
 class Guardian(BasicNewsRecipe):
-    title = u'The Guardian / The Observer'
+    title = u'The Guardian and The Observer'
    if date.today().weekday() == 6:
        base_url = "http://www.guardian.co.uk/theobserver"
    else:
@ -28,7 +28,7 @@ class Guardian(BasicNewsRecipe):
    # List of section titles to ignore
    # For example: ['Sport']
    ignore_sections = []
-    
+
    timefmt = ' [%a, %d %b %Y]'
    keep_only_tags = [
                      dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
@ -94,7 +94,7 @@ class Guardian(BasicNewsRecipe):
                    prefix = section_title + ': '
                for subsection in s.parent.findAll('a', attrs={'class':'book-section'}):
                    yield (prefix + self.tag_to_string(subsection), subsection['href'])
-    
+
    def find_articles(self, url):
        soup = self.index_to_soup(url)
        div = soup.find('div', attrs={'class':'book-index'})
@ -115,7 +115,7 @@ class Guardian(BasicNewsRecipe):
                        'title': title, 'url':url, 'description':desc,
                        'date' : strftime('%a, %d %b'),
                        }
-    
+
    def parse_index(self):
        try:
            feeds = []
--- a/src/calibre/db/tables.py
+++ b/src/calibre/db/tables.py
@ -12,7 +12,7 @@ from datetime import datetime
 from dateutil.tz import tzoffset
 from calibre.constants import plugins
-from calibre.utils.date import parse_date, local_tz
+from calibre.utils.date import parse_date, local_tz, UNDEFINED_DATE
 from calibre.ebooks.metadata import author_to_author_sort
 _c_speedup = plugins['speedup'][0]
@ -29,8 +29,11 @@ def _c_convert_timestamp(val):
    if ret is None:
        return parse_date(val, as_utc=False)
    year, month, day, hour, minutes, seconds, tzsecs = ret
-    return datetime(year, month, day, hour, minutes, seconds,
+    try:
        return datetime(year, month, day, hour, minutes, seconds,
                tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
    except OverflowError:
        return UNDEFINED_DATE.astimezone(local_tz)
 class Table(object):
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -128,7 +128,7 @@ class ANDROID(USBMS):
            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
            'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
-            'GT-S5830_CARD']
+            'GT-S5830_CARD', 'GT-S5570_CARD']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -399,6 +399,7 @@ class IndexHeader(object): # {{{
    def __init__(self, record):
        self.record = record
        raw = self.record.raw
        #open('/t/index_header.bin', 'wb').write(raw)
        if raw[:4] != b'INDX':
            raise ValueError('Invalid Primary Index Record')
@ -406,7 +407,7 @@ class IndexHeader(object): # {{{
        self.unknown1 = raw[8:16]
        self.index_type, = struct.unpack('>I', raw[16:20])
        self.index_type_desc = {0: 'normal', 2:
-                'inflection'}.get(self.index_type, 'unknown')
+                'inflection', 6: 'calibre'}.get(self.index_type, 'unknown')
        self.idxt_start, = struct.unpack('>I', raw[20:24])
        self.index_count, = struct.unpack('>I', raw[24:28])
        self.index_encoding_num, = struct.unpack('>I', raw[28:32])
@ -596,10 +597,11 @@ class IndexEntry(object): # {{{
            0x3f : 'article',
    }
-    def __init__(self, ident, entry_type, raw, cncx, tagx_entries):
+    def __init__(self, ident, entry_type, raw, cncx, tagx_entries, flags=0):
        self.index = ident
        self.raw = raw
        self.tags = []
        self.entry_type_raw = entry_type
        try:
            self.entry_type = self.TYPES[entry_type]
@ -619,6 +621,27 @@ class IndexEntry(object): # {{{
                vals.append(val)
            self.tags.append(Tag(tag, vals, self.entry_type, cncx))
        if flags & 0b10:
            # Look for optional description and author
            desc_tag = [t for t in tagx_entries if t.tag == 22]
            if desc_tag and raw:
                val, consumed = decint(raw)
                raw = raw[consumed:]
                if val:
                    self.tags.append(Tag(desc_tag[0], [val], self.entry_type,
                        cncx))
        if flags & 0b100:
            aut_tag = [t for t in tagx_entries if t.tag == 23]
            if aut_tag and raw:
                val, consumed = decint(raw)
                raw = raw[consumed:]
                if val:
                    self.tags.append(Tag(aut_tag[0], [val], self.entry_type,
                        cncx))
        if raw.replace(b'\x00', b''): # There can be padding null bytes
            raise ValueError('Extra bytes in INDX table entry %d: %r'%(self.index, raw))
    @property
    def label(self):
        for tag in self.tags:
@ -669,8 +692,8 @@ class IndexEntry(object): # {{{
        return -1
    def __str__(self):
-        ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%(
+        ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d)'%(
-            self.index, self.entry_type, len(self.tags))]
+            self.index, self.entry_type, bin(self.entry_type_raw)[2:], len(self.tags))]
        for tag in self.tags:
            ans.append('\t'+str(tag))
        if self.first_child_index != -1:
@ -690,6 +713,7 @@ class IndexRecord(object): # {{{
    def __init__(self, record, index_header, cncx):
        self.record = record
        raw = self.record.raw
        if raw[:4] != b'INDX':
            raise ValueError('Invalid Primary Index Record')
@ -713,6 +737,9 @@ class IndexRecord(object): # {{{
        for i in range(self.idxt_count):
            off, = u(b'>H', indices[i*2:(i+1)*2])
            self.index_offsets.append(off-192)
        rest = indices[(i+1)*2:]
        if rest.replace(b'\0', ''): # There can be padding null bytes
            raise ValueError('Extra bytes after IDXT table: %r'%rest)
        indxt = raw[192:self.idxt_offset]
        self.indices = []
@ -723,8 +750,13 @@ class IndexRecord(object): # {{{
                next_off = len(indxt)
            index, consumed = decode_hex_number(indxt[off:])
            entry_type = ord(indxt[off+consumed])
            d, flags = 1, 0
            if index_header.index_type == 6:
                flags = ord(indxt[off+consumed+d])
                d += 1
            self.indices.append(IndexEntry(index, entry_type,
-                indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries))
+                indxt[off+consumed+d:next_off], cncx,
                index_header.tagx_entries, flags=flags))
            index = self.indices[-1]
    def get_parent(self, index):
@ -744,7 +776,7 @@ class IndexRecord(object): # {{{
                len(w), not bool(w.replace(b'\0', b'')) ))
        a('Header length: %d'%self.header_length)
        u(self.unknown1)
-        a('Header Type: %d'%self.header_type)
+        a('Unknown (header type? index record number? always 1?): %d'%self.header_type)
        u(self.unknown2)
        a('IDXT Offset: %d'%self.idxt_offset)
        a('IDXT Count: %d'%self.idxt_count)
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@ -2,6 +2,7 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 from future_builtins import filter
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -12,7 +13,9 @@ from cStringIO import StringIO
 from collections import OrderedDict
 from calibre.ebooks import normalize
-from calibre.ebooks.mobi.utils import encint
+from calibre.ebook.mobi.writer2 import RECORD_SIZE
 from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex)
 from calibre.ebooks.mobi.langcodes import iana2mobi
 def utf8_text(text):
    '''
@ -37,7 +40,6 @@ def align_block(raw, multiple=4, pad=b'\0'):
    if extra == 0: return raw
    return raw + pad*(multiple - extra)
 class CNCX(object): # {{{
    '''
@ -53,17 +55,11 @@ class CNCX(object): # {{{
        for item in toc:
            if item is self.toc: continue
-            label = item.title
+            self.strings[item.title] = 0
            klass = item.klass
            if opts.mobi_periodical:
-                if item.description:
+                self.strings[item.klass] = 0
                    self.strings[item.description] = 0
                if item.author:
                    self.string[item.author] = 0
            self.strings[label] = self.strings[klass] = 0
        self.records = []
        offset = 0
        buf = StringIO()
        for key in tuple(self.strings.iterkeys()):
@ -90,27 +86,441 @@ class CNCX(object): # {{{
        return self.strings[string]
 # }}}
 class IndexEntry(object): # {{{
    TAG_VALUES = {
            'offset': 1,
            'size': 2,
            'label_offset': 3,
            'depth': 4,
            'class_offset': 5,
            'parent_index': 21,
            'first_child_index': 22,
            'last_child_index': 23,
    }
    RTAG_MAP = dict(TAG_VALUES.itervalues(), TAG_VALUES.iterkeys())
    BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,]
    def __init__(self, offset, label_offset, depth=0, class_offset=None):
        self.offset, self.label_offset = offset, label_offset
        self.depth, self.class_offset = depth, class_offset
        self.length = 0
        self.index = 0
        self.parent_index = None
        self.first_child_index = None
        self.last_child_index = None
    @classmethod
    def tagx_block(cls, for_periodical=True):
        buf = bytearray()
        def add_tag(tag, num_values=1):
            buf.append(tag)
            buf.append(num_values)
            # bitmask
            buf.append(1 << (cls.BITMASKS.index(tag)))
            # eof
            buf.append(0)
        for tag in xrange(1, 5):
            add_tag(tag)
        if for_periodical:
            for tag in (5, 21, 22, 23):
                add_tag(tag)
        # End of TAGX record
        for i in xrange(3): buf.append(0)
        buf.append(1)
        header = b'TAGX'
        header += pack(b'>I', len(buf)) # table length
        header += pack(b'>I', 1) # control byte count
        return header + bytes(buf)
    @property
    def next_offset(self):
        return self.offset + self.length
    @property
    def tag_nums(self):
        for i in range(1, 5):
            yield i
        for attr in ('class_offset', 'parent_index', 'first_child_index',
                'last_child_index'):
            if getattr(self, attr) is not None:
                yield self.TAG_VALUES[attr]
    @property
    def entry_type(self):
        ans = 0
        for tag in self.tag_nums:
            ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x
        return ans
    @property
    def bytestring(self):
        buf = StringIO()
        buf.write(encode_number_as_hex(self.index))
        et = self.entry_type
        buf.write(bytes(bytearray([et])))
        for tag in self.tag_nums:
            attr = self.RTAG_MAP[tag]
            val = getattr(self, attr)
            buf.write(encint(val))
        ans = buf.get_value()
        return ans
 # }}}
 class Indexer(object):
-    def __init__(self, serializer, number_of_text_records, opts, oeb):
+    def __init__(self, serializer, number_of_text_records,
            size_of_last_text_record, opts, oeb):
        self.serializer = serializer
        self.number_of_text_records = number_of_text_records
        self.text_size = (RECORD_SIZE * (self.number_of_text_records-1) +
                            size_of_last_text_record)
        self.oeb = oeb
        self.log = oeb.log
        self.opts = opts
-        self.cncx = CNCX(oeb.toc, opts)
+        self.is_periodical = opts.mobi_periodical
        self.is_flat_periodical = False
        if opts.mobi_periodical:
            periodical_node = iter(oeb.toc).next()
            sections = tuple(periodical_node)
            self.is_flat_periodical = len(sections) == 1
        self.records = []
-    def create_header(self):
+        self.cncx = CNCX(oeb.toc, opts)
        buf = StringIO()
-        # Ident
+        if self.is_periodical:
            self.indices = self.create_periodical_index()
        else:
            self.indices = self.create_book_index()
        self.records.append(self.create_index_record())
        self.records.insert(0, self.create_header())
        self.records.extend(self.cncx.records)
    def create_index_record(self): # {{{
        header_length = 192
        buf = StringIO()
        indices = self.indices
        # Write index entries
        offsets = []
        for i in indices:
            offsets.append(buf.tell())
            buf.write(i.bytestring)
        index_block = align_block(buf.getvalue())
        # Write offsets to index entries as an IDXT block
        idxt_block = b'IDXT'
        buf.truncate(0)
        for offset in offsets:
            buf.write(pack(b'>H', header_length+offset))
        idxt_block = align_block(idxt_block + buf.getvalue())
        body = index_block + idxt_block
        header = b'INDX'
        buf.truncate(0)
        buf.write(pack(b'>I', header_length))
        buf.write(b'\0'*4) # Unknown
        buf.write(pack(b'>I', 1)) # Header type? Or index record number?
        buf.write(b'\0'*4) # Unknown
        # IDXT block offset
        buf.write(pack(b'>I', header_length + len(index_block)))
        # Number of index entries
        buf.write(pack(b'>I', len(offsets)))
        # Unknown
        buf.write(b'\xff'*8)
        # Unknown
        buf.write(b'\0'*156)
        header += buf.getvalue()
        ans = header + body
        if len(ans) > 0x10000:
            raise ValueError('Too many entries (%d) in the TOC'%len(offsets))
        return ans
    # }}}
    def create_header(self): # {{{
        buf = StringIO()
        tagx_block = IndexEntry.tagx_block(self.is_periodical)
        header_length = 192
        # Ident 0 - 4
        buf.write(b'INDX')
-        # Header length
+        # Header length 4 - 8
-        buf.write(pack(b'>I', 192))
+        buf.write(pack(b'>I', header_length))
-        # Index type: 0 - normal, 2 - inflection
+        # Unknown 8-16
        buf.write(b'\0'*8)
        # Index type: 0 - normal, 2 - inflection 16 - 20
        buf.write(pack(b'>I', 2))
        # IDXT offset 20-24
        buf.write(pack(b'>I', 0)) # Filled in later
        # Number of index records 24-28
        buf.write(pack('b>I', len(self.records)))
        # Index Encoding 28-32
        buf.write(pack(b'>I', 65001)) # utf-8
        # Index language 32-36
        buf.write(iana2mobi(
            str(self.oeb.metadata.language[0])))
        # Number of index entries 36-40
        buf.write(pack(b'>I', len(self.indices)))
        # ORDT offset 40-44
        buf.write(pack(b'>I', 0))
        # LIGT offset 44-48
        buf.write(pack(b'>I', 0))
        # Number of LIGT entries 48-52
        buf.write(pack(b'>I', 0))
        # Number of CNCX records 52-56
        buf.write(pack(b'>I', len(self.cncx.records)))
        # Unknown 56-180
        buf.write(b'\0'*124)
        # TAGX offset 180-184
        buf.write(pack(b'>I', header_length))
        # Unknown 184-192
        buf.write(b'\0'*8)
        # TAGX block
        buf.write(tagx_block)
        num = len(self.indices)
        # The index of the last entry in the NCX
        buf.write(encode_number_as_hex(num-1))
        # The number of entries in the NCX
        buf.write(pack(b'>H', num))
        # Padding
        pad = (4 - (buf.tell()%4))%4
        if pad:
            buf.write(b'\0'*pad)
        idxt_offset = buf.tell()
        buf.write(b'IDXT')
        buf.write(header_length + len(tagx_block))
        buf.write(b'\0')
        buf.seek(20)
        buf.write(pack(b'>I', idxt_offset))
        return align_block(buf.getvalue())
    # }}}
    def create_book_index(self): # {{{
        indices = []
        seen = set()
        id_offsets = self.serializer.id_offsets
        for node in self.oeb.toc.iterdescendants():
            try:
                offset = id_offsets[node.href]
                label = self.cncx[node.title]
            except:
                self.log.warn('TOC item %s not found in document'%node.href)
                continue
            if offset in seen:
                continue
            seen.add(offset)
            index = IndexEntry(offset, label)
            self.indices.append(index)
        indices.sort(key=lambda x:x.offset)
        # Set lengths
        for i, index in indices:
            try:
                next_offset = indices[i+1].offset
            except:
                next_offset = self.serializer.body_end_offset
            index.length = next_offset - index.offset
        # Remove empty nodes
        indices = [i for i in indices if i.length > 0]
        # Set index values
        for i, index in indices:
            index.index = i
        # Set lengths again to close up any gaps left by filtering
        for i, index in indices:
            try:
                next_offset = indices[i+1].offset
            except:
                next_offset = self.serializer.body_end_offset
            index.length = next_offset - index.offset
        return indices
    # }}}
    def create_periodical_index(self): # {{{
        periodical_node = iter(self.oeb.toc).next()
        periodical_node_offset = self.serializer.body_start_offset
        periodical_node_size = (self.serializer.body_end_offset -
                periodical_node_offset)
        normalized_sections = []
        id_offsets = self.serializer.id_offsets
        periodical = IndexEntry(periodical_node_offset,
                self.cncx[periodical_node.title],
                class_offset=self.cncx[periodical_node.klass])
        periodical.length = periodical_node_size
        periodical.first_child_index = 1
        seen_sec_offsets = set()
        seen_art_offsets = set()
        for sec in periodical_node:
            normalized_articles = []
            try:
                offset = id_offsets[sec.href]
                label = self.cncx[sec.title]
                klass = self.cncx[sec.klass]
            except:
                continue
            if offset in seen_sec_offsets:
                continue
            seen_sec_offsets.add(offset)
            section = IndexEntry(offset, label, class_offset=klass, depth=1)
            section.parent_index = 0
            for art in sec:
                try:
                    offset = id_offsets[art.href]
                    label = self.cncx[art.title]
                    klass = self.cncx[art.klass]
                except:
                    continue
                if offset in seen_art_offsets:
                    continue
                seen_art_offsets.add(offset)
                article = IndexEntry(offset, label, class_offset=klass,
                        depth=2)
                normalized_articles.append(article)
            if normalized_articles:
                normalized_articles.sort(key=lambda x:x.offset)
                normalized_sections.append((section, normalized_articles))
        normalized_sections.sort(key=lambda x:x[0].offset)
        # Set lengths
        for s, x in enumerate(normalized_sections):
            sec, normalized_articles = x
            try:
                sec.length = normalized_sections[s+1].offset - sec.offset
            except:
                sec.length = self.serializer.body_end_offset - sec.offset
            for i, art in enumerate(normalized_articles):
                try:
                    art.length = normalized_articles[i+1].offset - art.offset
                except:
                    art.length = sec.offset + sec.length - art.offset
        # Filter
        for i, x in list(enumerate(normalized_sections)):
            sec, normalized_articles = x
            normalized_articles = list(filter(lambda x: x.length > 0,
                normalized_articles))
            normalized_sections[i] = (sec, normalized_articles)
        normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1],
            normalized_sections))
        # Set indices
        i = 0
        for sec, normalized_articles in normalized_sections:
            i += 1
            sec.index = i
        for sec, normalized_articles in normalized_sections:
            for art in normalized_articles:
                i += 1
                art.index = i
                art.parent_index = sec.index
        for sec, normalized_articles in normalized_sections:
            sec.first_child_index = normalized_articles[0].index
            sec.last_child_index = normalized_articles[-1].index
        # Set lengths again to close up any gaps left by filtering
        for s, x in enumerate(normalized_sections):
            sec, articles = x
            try:
                next_offset = normalized_sections[s+1].offset
            except:
                next_offset = self.serializer.body_end_offset
            sec.length = next_offset - sec.offset
            for a, art in enumerate(articles):
                try:
                    next_offset = articles[a+1].offset
                except:
                    next_offset = sec.next_offset
                art.length = next_offset - art.offset
        # Sanity check
        for s, x in enumerate(normalized_sections):
            sec, articles = x
            try:
                next_sec = normalized_sections[s+1]
            except:
                if (sec.length == 0 or sec.next_offset !=
                        self.serializer.body_end_offset):
                    raise ValueError('Invalid section layout')
            else:
                if next_sec.offset != sec.next_offset or sec.length == 0:
                    raise ValueError('Invalid section layout')
            for a, art in enumerate(articles):
                try:
                    next_art = articles[a+1]
                except:
                    if (art.length == 0 or art.next_offset !=
                            sec.next_offset):
                        raise ValueError('Invalid article layout')
                else:
                    if art.length == 0 or art.next_offset != next_art.offset:
                        raise ValueError('Invalid article layout')
        # Flatten
        indices = [periodical]
        for sec, articles in normalized_sections:
            indices.append(sec)
            periodical.last_child_index = sec.index
        for sec, articles in normalized_sections:
            for a in articles:
                indices.append(a)
        return indices
    # }}}
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -20,6 +20,7 @@ from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
 from calibre.ebooks.mobi.utils import (rescale_image, encint,
        encode_trailing_data)
 from calibre.ebooks.mobi.writer2.indexer import Indexer
 EXTH_CODES = {
    'creator': 100,
@ -87,6 +88,14 @@ class MobiWriter(object):
    # Indexing {{{
    def generate_index(self):
        self.primary_index_record_idx = None
        try:
            self.indexer = Indexer(self.serializer, self.last_text_record_idx,
                    self.opts, self.oeb)
        except:
            self.log.exception('Failed to generate MOBI index:')
        else:
            self.primary_index_record_idx = len(self.records)
            self.records.extend(self.indexer.records)
    # }}}
    def write_uncrossable_breaks(self): # {{{
@ -202,7 +211,6 @@ class MobiWriter(object):
            record.write(overlap)
            record.write(pack(b'>B', len(overlap)))
        self.last_text_record_idx = nrecords
    def read_text_record(self, text):
@ -265,8 +273,6 @@ class MobiWriter(object):
        # EOF record
        self.records.append('\xE9\x8E\x0D\x0A')
        self.generate_end_records()
        record0 = StringIO()
        # The MOBI Header
        record0.write(pack(b'>HHIHHHH',
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -143,6 +143,7 @@ class Serializer(object):
        spine.extend([item for item in self.oeb.spine if not item.linear])
        for item in spine:
            self.serialize_item(item)
        self.body_end_offset = buf.tell()
        buf.write(b'</body>')
    def serialize_item(self, item):
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -133,6 +133,7 @@ def render_data(mi, use_roman_numbers=True, all_fields=False):
            authors = []
            formatter = EvalFormatter()
            for aut in mi.authors:
                link = ''
                if mi.author_link_map[aut]:
                    link = mi.author_link_map[aut]
                elif gprefs.get('default_author_link'):
--- a/src/calibre/gui2/dialogs/quickview.py
+++ b/src/calibre/gui2/dialogs/quickview.py
@ -183,7 +183,6 @@ class Quickview(QDialog, Ui_Quickview):
        self.items.blockSignals(False)
    def indicate_no_items(self):
        print 'no items'
        self.no_valid_items = True
        self.items.clear()
        self.items.addItem(QListWidgetItem(_('**No items found**')))
--- a/src/calibre/gui2/store/init.py
+++ b/src/calibre/gui2/store/init.py
@ -6,6 +6,8 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.utils.filenames import ascii_filename
 class StorePlugin(object): # {{{
    '''
    A plugin representing an online ebook repository (store). The store can
@ -43,7 +45,7 @@ class StorePlugin(object): # {{{
    The easiest way to handle affiliate money payouts is to randomly select
    between the author's affiliate id and calibre's affiliate id so that
    70% of the time the author's id is used.
-    
+
    See declined.txt for a list of stores that do not want to be included.
    '''
@ -53,7 +55,7 @@ class StorePlugin(object): # {{{
        self.gui = gui
        self.name = name
        self.base_plugin = None
-        self.config = JSONConfig('store/stores/' + self.name)
+        self.config = JSONConfig('store/stores/' + ascii_filename(self.name))
    def open(self, gui, parent=None, detail_item=None, external=False):
        '''
--- a/src/calibre/gui2/update.py
+++ b/src/calibre/gui2/update.py
@ -15,6 +15,7 @@ from calibre.gui2 import config, dynamic, open_url
 from calibre.gui2.dialogs.plugin_updater import get_plugin_updates_available
 URL = 'http://status.calibre-ebook.com/latest'
 #URL = 'http://localhost:8000/latest'
 NO_CALIBRE_UPDATE = '-0.0.0'
 VSEP = '|'
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@ -17,7 +17,7 @@ from datetime import datetime
 from functools import partial
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
-from calibre.utils.date import parse_date, isoformat, local_tz
+from calibre.utils.date import parse_date, isoformat, local_tz, UNDEFINED_DATE
 from calibre import isbytestring, force_unicode
 from calibre.constants import iswindows, DEBUG, plugins
 from calibre.utils.icu import strcmp
@ -39,8 +39,11 @@ def _c_convert_timestamp(val):
    if ret is None:
        return parse_date(val, as_utc=False)
    year, month, day, hour, minutes, seconds, tzsecs = ret
-    return datetime(year, month, day, hour, minutes, seconds,
+    try:
        return datetime(year, month, day, hour, minutes, seconds,
                tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
    except OverflowError:
        return UNDEFINED_DATE.astimezone(local_tz)
 def _py_convert_timestamp(val):
    if val:
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@ -401,7 +401,7 @@ with undefined values in the column. Searching for ``true`` will find all books
 values in the column. Searching for ``yes`` or ``checked`` will find all books with ``Yes`` in the column.
 Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column.
-Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.C`, but not the tag `A`.
+Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.B.C`, but not the tag `A`.
 Identifiers (e.g., isbn, doi, lccn etc) also use an extended syntax. First, note that an identifier has the form ``type:value``, as in ``isbn:123456789``. The extended syntax permits you to specify independently which type and value to search for. Both the type and the value parts of the query can use `equality`, `contains`, or `regular expression` matches. Examples: