diff --git a/recipes/corren2.recipe b/recipes/corren2.recipe index 494be88f10..f53da20fd1 100644 --- a/recipes/corren2.recipe +++ b/recipes/corren2.recipe @@ -1,39 +1,34 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPLv3' + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1255797795(BasicNewsRecipe): - title = u'Corren' - language = 'sv' - __author__ = 'Jonas Svensson' - simultaneous_downloads = 1 - no_stylesheets = True - oldest_article = 7 +class AdvancedUserRecipe1311446032(BasicNewsRecipe): + title = 'Corren' + __author__ = 'Jonas Svensson' + description = 'News from Sweden' + publisher = 'Corren' + category = 'news, politics, Sweden' + oldest_article = 2 + delay = 1 max_articles_per_feed = 100 - remove_attributes = ['onload'] - timefmt = '' + no_stylesheets = True + use_embedded_content = False + encoding = 'iso-8859-1' + language = 'sv' - feeds = [ - (u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'), - (u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'), - (u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'), - (u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'), - (u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'), - (u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'), - (u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'), - (u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'), - (u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'), - (u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/') - ] - - def print_version(self, url): - url = url.replace("ekonomi/artikel.aspx", "Print.aspx") - url = url.replace("bostad/artikel.aspx", "Print.aspx") - url = url.replace("kultur/artikel.aspx", "Print.aspx") - url = url.replace("motor/artikel.aspx", "Print.aspx") - url = url.replace("mat-dryck/artikel.aspx", "Print.aspx") - url = url.replace("sport/artikel.aspx", "Print.aspx") - url = url.replace("asikter/artikel.aspx", "Print.aspx") - url = url.replace("mat-dryck/artikel.aspx", "Print.aspx") - url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx") - url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx") - return url.replace("nyheter/artikel.aspx", "Print.aspx") + feeds = [ + (u'Toppnyheter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/') + ,(u'Ekonomi', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/') + ,(u'Link\xf6ping', u'http://www.corren.se/inc/RssHandler.ashx?id=4122234') + ,(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223,4122224,4122226,4122227,4122228,4122229,4122230') + ] + keep_only_tags = [dict(name='div', attrs={'id':'article'}),dict(name='div', attrs={'class':'body'})] + remove_tags = [ + dict(name='ul',attrs={'class':'functions'}) + ,dict(name='a',attrs={'href':'javascript*'}) + ,dict(name='div',attrs={'class':'box'}) + ,dict(name='div',attrs={'class':'functionsbottom'}) + ] diff --git a/recipes/dagens_industri.recipe b/recipes/dagens_industri.recipe new file mode 100644 index 0000000000..c9b60c72b1 --- /dev/null +++ b/recipes/dagens_industri.recipe @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPLv3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1311450855(BasicNewsRecipe): + title = u'Dagens Industri' + __author__ = 'Jonas Svensson' + description = 'Economy news from Sweden' + publisher = 'DI' + category = 'news, politics, Sweden' + oldest_article = 2 + delay = 1 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'sv' + + feeds = [(u'DI', u'http://di.se/rss')] + + keep_only_tags = [dict(name='h1', attrs={'id':'ctl00_ExtraWideContentRegion_WideContentRegion_MainRegion_MainContentRegion_MainBodyRegion_headlineNormal'}),dict(name='div', attrs={'id':'articleBody'})] + + remove_tags = [ + dict(name='div',attrs={'class':'article-actions clear'}) + ,dict(name='div',attrs={'class':'article-action-popup'}) + ,dict(name='div',attrs={'class':'header'}) + ,dict(name='div',attrs={'class':'content clear'}) + ,dict(name='div',attrs={'id':'articleAdvertisementDiv'}) + ,dict(name='ul',attrs={'class':'action-list'}) + ] diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index c5021cb91d..124820d0a1 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -12,7 +12,7 @@ from datetime import date class Guardian(BasicNewsRecipe): - title = u'The Guardian / The Observer' + title = u'The Guardian and The Observer' if date.today().weekday() == 6: base_url = "http://www.guardian.co.uk/theobserver" else: @@ -28,7 +28,7 @@ class Guardian(BasicNewsRecipe): # List of section titles to ignore # For example: ['Sport'] ignore_sections = [] - + timefmt = ' [%a, %d %b %Y]' keep_only_tags = [ dict(name='div', attrs={'id':["content","article_header","main-article-info",]}), @@ -94,7 +94,7 @@ class Guardian(BasicNewsRecipe): prefix = section_title + ': ' for subsection in s.parent.findAll('a', attrs={'class':'book-section'}): yield (prefix + self.tag_to_string(subsection), subsection['href']) - + def find_articles(self, url): soup = self.index_to_soup(url) div = soup.find('div', attrs={'class':'book-index'}) @@ -115,7 +115,7 @@ class Guardian(BasicNewsRecipe): 'title': title, 'url':url, 'description':desc, 'date' : strftime('%a, %d %b'), } - + def parse_index(self): try: feeds = [] diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index b75effff4b..fa7b001851 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -12,7 +12,7 @@ from datetime import datetime from dateutil.tz import tzoffset from calibre.constants import plugins -from calibre.utils.date import parse_date, local_tz +from calibre.utils.date import parse_date, local_tz, UNDEFINED_DATE from calibre.ebooks.metadata import author_to_author_sort _c_speedup = plugins['speedup'][0] @@ -29,8 +29,11 @@ def _c_convert_timestamp(val): if ret is None: return parse_date(val, as_utc=False) year, month, day, hour, minutes, seconds, tzsecs = ret - return datetime(year, month, day, hour, minutes, seconds, + try: + return datetime(year, month, day, hour, minutes, seconds, tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz) + except OverflowError: + return UNDEFINED_DATE.astimezone(local_tz) class Table(object): diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index d26489c42f..a12f37c7eb 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -128,7 +128,7 @@ class ANDROID(USBMS): '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', - 'GT-S5830_CARD'] + 'GT-S5830_CARD', 'GT-S5570_CARD'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index d861e69cbf..a848f11355 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -399,6 +399,7 @@ class IndexHeader(object): # {{{ def __init__(self, record): self.record = record raw = self.record.raw + #open('/t/index_header.bin', 'wb').write(raw) if raw[:4] != b'INDX': raise ValueError('Invalid Primary Index Record') @@ -406,7 +407,7 @@ class IndexHeader(object): # {{{ self.unknown1 = raw[8:16] self.index_type, = struct.unpack('>I', raw[16:20]) self.index_type_desc = {0: 'normal', 2: - 'inflection'}.get(self.index_type, 'unknown') + 'inflection', 6: 'calibre'}.get(self.index_type, 'unknown') self.idxt_start, = struct.unpack('>I', raw[20:24]) self.index_count, = struct.unpack('>I', raw[24:28]) self.index_encoding_num, = struct.unpack('>I', raw[28:32]) @@ -596,10 +597,11 @@ class IndexEntry(object): # {{{ 0x3f : 'article', } - def __init__(self, ident, entry_type, raw, cncx, tagx_entries): + def __init__(self, ident, entry_type, raw, cncx, tagx_entries, flags=0): self.index = ident self.raw = raw self.tags = [] + self.entry_type_raw = entry_type try: self.entry_type = self.TYPES[entry_type] @@ -619,6 +621,27 @@ class IndexEntry(object): # {{{ vals.append(val) self.tags.append(Tag(tag, vals, self.entry_type, cncx)) + if flags & 0b10: + # Look for optional description and author + desc_tag = [t for t in tagx_entries if t.tag == 22] + if desc_tag and raw: + val, consumed = decint(raw) + raw = raw[consumed:] + if val: + self.tags.append(Tag(desc_tag[0], [val], self.entry_type, + cncx)) + if flags & 0b100: + aut_tag = [t for t in tagx_entries if t.tag == 23] + if aut_tag and raw: + val, consumed = decint(raw) + raw = raw[consumed:] + if val: + self.tags.append(Tag(aut_tag[0], [val], self.entry_type, + cncx)) + + if raw.replace(b'\x00', b''): # There can be padding null bytes + raise ValueError('Extra bytes in INDX table entry %d: %r'%(self.index, raw)) + @property def label(self): for tag in self.tags: @@ -669,8 +692,8 @@ class IndexEntry(object): # {{{ return -1 def __str__(self): - ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%( - self.index, self.entry_type, len(self.tags))] + ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d)'%( + self.index, self.entry_type, bin(self.entry_type_raw)[2:], len(self.tags))] for tag in self.tags: ans.append('\t'+str(tag)) if self.first_child_index != -1: @@ -690,6 +713,7 @@ class IndexRecord(object): # {{{ def __init__(self, record, index_header, cncx): self.record = record raw = self.record.raw + if raw[:4] != b'INDX': raise ValueError('Invalid Primary Index Record') @@ -713,6 +737,9 @@ class IndexRecord(object): # {{{ for i in range(self.idxt_count): off, = u(b'>H', indices[i*2:(i+1)*2]) self.index_offsets.append(off-192) + rest = indices[(i+1)*2:] + if rest.replace(b'\0', ''): # There can be padding null bytes + raise ValueError('Extra bytes after IDXT table: %r'%rest) indxt = raw[192:self.idxt_offset] self.indices = [] @@ -723,8 +750,13 @@ class IndexRecord(object): # {{{ next_off = len(indxt) index, consumed = decode_hex_number(indxt[off:]) entry_type = ord(indxt[off+consumed]) + d, flags = 1, 0 + if index_header.index_type == 6: + flags = ord(indxt[off+consumed+d]) + d += 1 self.indices.append(IndexEntry(index, entry_type, - indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries)) + indxt[off+consumed+d:next_off], cncx, + index_header.tagx_entries, flags=flags)) index = self.indices[-1] def get_parent(self, index): @@ -744,7 +776,7 @@ class IndexRecord(object): # {{{ len(w), not bool(w.replace(b'\0', b'')) )) a('Header length: %d'%self.header_length) u(self.unknown1) - a('Header Type: %d'%self.header_type) + a('Unknown (header type? index record number? always 1?): %d'%self.header_type) u(self.unknown2) a('IDXT Offset: %d'%self.idxt_offset) a('IDXT Count: %d'%self.idxt_count) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index c28b91e63a..501b23113f 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -2,6 +2,7 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai from __future__ import (unicode_literals, division, absolute_import, print_function) +from future_builtins import filter __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' @@ -12,7 +13,9 @@ from cStringIO import StringIO from collections import OrderedDict from calibre.ebooks import normalize -from calibre.ebooks.mobi.utils import encint +from calibre.ebook.mobi.writer2 import RECORD_SIZE +from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex) +from calibre.ebooks.mobi.langcodes import iana2mobi def utf8_text(text): ''' @@ -37,7 +40,6 @@ def align_block(raw, multiple=4, pad=b'\0'): if extra == 0: return raw return raw + pad*(multiple - extra) - class CNCX(object): # {{{ ''' @@ -53,17 +55,11 @@ class CNCX(object): # {{{ for item in toc: if item is self.toc: continue - label = item.title - klass = item.klass + self.strings[item.title] = 0 if opts.mobi_periodical: - if item.description: - self.strings[item.description] = 0 - if item.author: - self.string[item.author] = 0 - self.strings[label] = self.strings[klass] = 0 + self.strings[item.klass] = 0 self.records = [] - offset = 0 buf = StringIO() for key in tuple(self.strings.iterkeys()): @@ -90,27 +86,441 @@ class CNCX(object): # {{{ return self.strings[string] # }}} +class IndexEntry(object): # {{{ + + TAG_VALUES = { + 'offset': 1, + 'size': 2, + 'label_offset': 3, + 'depth': 4, + 'class_offset': 5, + 'parent_index': 21, + 'first_child_index': 22, + 'last_child_index': 23, + } + RTAG_MAP = dict(TAG_VALUES.itervalues(), TAG_VALUES.iterkeys()) + + BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,] + + def __init__(self, offset, label_offset, depth=0, class_offset=None): + self.offset, self.label_offset = offset, label_offset + self.depth, self.class_offset = depth, class_offset + + self.length = 0 + self.index = 0 + + self.parent_index = None + self.first_child_index = None + self.last_child_index = None + + @classmethod + def tagx_block(cls, for_periodical=True): + buf = bytearray() + + def add_tag(tag, num_values=1): + buf.append(tag) + buf.append(num_values) + # bitmask + buf.append(1 << (cls.BITMASKS.index(tag))) + # eof + buf.append(0) + + for tag in xrange(1, 5): + add_tag(tag) + + if for_periodical: + for tag in (5, 21, 22, 23): + add_tag(tag) + + # End of TAGX record + for i in xrange(3): buf.append(0) + buf.append(1) + + header = b'TAGX' + header += pack(b'>I', len(buf)) # table length + header += pack(b'>I', 1) # control byte count + + return header + bytes(buf) + + @property + def next_offset(self): + return self.offset + self.length + + @property + def tag_nums(self): + for i in range(1, 5): + yield i + for attr in ('class_offset', 'parent_index', 'first_child_index', + 'last_child_index'): + if getattr(self, attr) is not None: + yield self.TAG_VALUES[attr] + + @property + def entry_type(self): + ans = 0 + for tag in self.tag_nums: + ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x + return ans + + @property + def bytestring(self): + buf = StringIO() + buf.write(encode_number_as_hex(self.index)) + et = self.entry_type + buf.write(bytes(bytearray([et]))) + + for tag in self.tag_nums: + attr = self.RTAG_MAP[tag] + val = getattr(self, attr) + buf.write(encint(val)) + + ans = buf.get_value() + return ans + +# }}} + class Indexer(object): - def __init__(self, serializer, number_of_text_records, opts, oeb): + def __init__(self, serializer, number_of_text_records, + size_of_last_text_record, opts, oeb): self.serializer = serializer self.number_of_text_records = number_of_text_records + self.text_size = (RECORD_SIZE * (self.number_of_text_records-1) + + size_of_last_text_record) self.oeb = oeb self.log = oeb.log self.opts = opts - self.cncx = CNCX(oeb.toc, opts) + self.is_periodical = opts.mobi_periodical + self.is_flat_periodical = False + if opts.mobi_periodical: + periodical_node = iter(oeb.toc).next() + sections = tuple(periodical_node) + self.is_flat_periodical = len(sections) == 1 self.records = [] - def create_header(self): - buf = StringIO() + self.cncx = CNCX(oeb.toc, opts) - # Ident + if self.is_periodical: + self.indices = self.create_periodical_index() + else: + self.indices = self.create_book_index() + + self.records.append(self.create_index_record()) + self.records.insert(0, self.create_header()) + self.records.extend(self.cncx.records) + + def create_index_record(self): # {{{ + header_length = 192 + buf = StringIO() + indices = self.indices + + # Write index entries + offsets = [] + for i in indices: + offsets.append(buf.tell()) + buf.write(i.bytestring) + index_block = align_block(buf.getvalue()) + + # Write offsets to index entries as an IDXT block + idxt_block = b'IDXT' + buf.truncate(0) + for offset in offsets: + buf.write(pack(b'>H', header_length+offset)) + idxt_block = align_block(idxt_block + buf.getvalue()) + body = index_block + idxt_block + + header = b'INDX' + buf.truncate(0) + buf.write(pack(b'>I', header_length)) + buf.write(b'\0'*4) # Unknown + buf.write(pack(b'>I', 1)) # Header type? Or index record number? + buf.write(b'\0'*4) # Unknown + # IDXT block offset + buf.write(pack(b'>I', header_length + len(index_block))) + # Number of index entries + buf.write(pack(b'>I', len(offsets))) + # Unknown + buf.write(b'\xff'*8) + # Unknown + buf.write(b'\0'*156) + + header += buf.getvalue() + + ans = header + body + if len(ans) > 0x10000: + raise ValueError('Too many entries (%d) in the TOC'%len(offsets)) + return ans + # }}} + + def create_header(self): # {{{ + buf = StringIO() + tagx_block = IndexEntry.tagx_block(self.is_periodical) + header_length = 192 + + # Ident 0 - 4 buf.write(b'INDX') - # Header length - buf.write(pack(b'>I', 192)) + # Header length 4 - 8 + buf.write(pack(b'>I', header_length)) - # Index type: 0 - normal, 2 - inflection + # Unknown 8-16 + buf.write(b'\0'*8) + + # Index type: 0 - normal, 2 - inflection 16 - 20 buf.write(pack(b'>I', 2)) + + # IDXT offset 20-24 + buf.write(pack(b'>I', 0)) # Filled in later + + # Number of index records 24-28 + buf.write(pack('b>I', len(self.records))) + + # Index Encoding 28-32 + buf.write(pack(b'>I', 65001)) # utf-8 + + # Index language 32-36 + buf.write(iana2mobi( + str(self.oeb.metadata.language[0]))) + + # Number of index entries 36-40 + buf.write(pack(b'>I', len(self.indices))) + + # ORDT offset 40-44 + buf.write(pack(b'>I', 0)) + + # LIGT offset 44-48 + buf.write(pack(b'>I', 0)) + + # Number of LIGT entries 48-52 + buf.write(pack(b'>I', 0)) + + # Number of CNCX records 52-56 + buf.write(pack(b'>I', len(self.cncx.records))) + + # Unknown 56-180 + buf.write(b'\0'*124) + + # TAGX offset 180-184 + buf.write(pack(b'>I', header_length)) + + # Unknown 184-192 + buf.write(b'\0'*8) + + # TAGX block + buf.write(tagx_block) + + num = len(self.indices) + + # The index of the last entry in the NCX + buf.write(encode_number_as_hex(num-1)) + + # The number of entries in the NCX + buf.write(pack(b'>H', num)) + + # Padding + pad = (4 - (buf.tell()%4))%4 + if pad: + buf.write(b'\0'*pad) + + idxt_offset = buf.tell() + + buf.write(b'IDXT') + buf.write(header_length + len(tagx_block)) + buf.write(b'\0') + buf.seek(20) + buf.write(pack(b'>I', idxt_offset)) + + return align_block(buf.getvalue()) + # }}} + + def create_book_index(self): # {{{ + indices = [] + seen = set() + id_offsets = self.serializer.id_offsets + + for node in self.oeb.toc.iterdescendants(): + try: + offset = id_offsets[node.href] + label = self.cncx[node.title] + except: + self.log.warn('TOC item %s not found in document'%node.href) + continue + if offset in seen: + continue + seen.add(offset) + index = IndexEntry(offset, label) + self.indices.append(index) + + indices.sort(key=lambda x:x.offset) + + # Set lengths + for i, index in indices: + try: + next_offset = indices[i+1].offset + except: + next_offset = self.serializer.body_end_offset + index.length = next_offset - index.offset + + # Remove empty nodes + indices = [i for i in indices if i.length > 0] + + # Set index values + for i, index in indices: + index.index = i + + # Set lengths again to close up any gaps left by filtering + for i, index in indices: + try: + next_offset = indices[i+1].offset + except: + next_offset = self.serializer.body_end_offset + index.length = next_offset - index.offset + + return indices + + # }}} + + def create_periodical_index(self): # {{{ + periodical_node = iter(self.oeb.toc).next() + periodical_node_offset = self.serializer.body_start_offset + periodical_node_size = (self.serializer.body_end_offset - + periodical_node_offset) + + normalized_sections = [] + + id_offsets = self.serializer.id_offsets + + periodical = IndexEntry(periodical_node_offset, + self.cncx[periodical_node.title], + class_offset=self.cncx[periodical_node.klass]) + periodical.length = periodical_node_size + periodical.first_child_index = 1 + + seen_sec_offsets = set() + seen_art_offsets = set() + + for sec in periodical_node: + normalized_articles = [] + try: + offset = id_offsets[sec.href] + label = self.cncx[sec.title] + klass = self.cncx[sec.klass] + except: + continue + if offset in seen_sec_offsets: + continue + seen_sec_offsets.add(offset) + section = IndexEntry(offset, label, class_offset=klass, depth=1) + section.parent_index = 0 + for art in sec: + try: + offset = id_offsets[art.href] + label = self.cncx[art.title] + klass = self.cncx[art.klass] + except: + continue + if offset in seen_art_offsets: + continue + seen_art_offsets.add(offset) + article = IndexEntry(offset, label, class_offset=klass, + depth=2) + normalized_articles.append(article) + if normalized_articles: + normalized_articles.sort(key=lambda x:x.offset) + normalized_sections.append((section, normalized_articles)) + + normalized_sections.sort(key=lambda x:x[0].offset) + + # Set lengths + for s, x in enumerate(normalized_sections): + sec, normalized_articles = x + try: + sec.length = normalized_sections[s+1].offset - sec.offset + except: + sec.length = self.serializer.body_end_offset - sec.offset + for i, art in enumerate(normalized_articles): + try: + art.length = normalized_articles[i+1].offset - art.offset + except: + art.length = sec.offset + sec.length - art.offset + + # Filter + for i, x in list(enumerate(normalized_sections)): + sec, normalized_articles = x + normalized_articles = list(filter(lambda x: x.length > 0, + normalized_articles)) + normalized_sections[i] = (sec, normalized_articles) + + normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1], + normalized_sections)) + + # Set indices + i = 0 + for sec, normalized_articles in normalized_sections: + i += 1 + sec.index = i + + for sec, normalized_articles in normalized_sections: + for art in normalized_articles: + i += 1 + art.index = i + art.parent_index = sec.index + + for sec, normalized_articles in normalized_sections: + sec.first_child_index = normalized_articles[0].index + sec.last_child_index = normalized_articles[-1].index + + # Set lengths again to close up any gaps left by filtering + for s, x in enumerate(normalized_sections): + sec, articles = x + try: + next_offset = normalized_sections[s+1].offset + except: + next_offset = self.serializer.body_end_offset + sec.length = next_offset - sec.offset + + for a, art in enumerate(articles): + try: + next_offset = articles[a+1].offset + except: + next_offset = sec.next_offset + art.length = next_offset - art.offset + + # Sanity check + for s, x in enumerate(normalized_sections): + sec, articles = x + try: + next_sec = normalized_sections[s+1] + except: + if (sec.length == 0 or sec.next_offset != + self.serializer.body_end_offset): + raise ValueError('Invalid section layout') + else: + if next_sec.offset != sec.next_offset or sec.length == 0: + raise ValueError('Invalid section layout') + for a, art in enumerate(articles): + try: + next_art = articles[a+1] + except: + if (art.length == 0 or art.next_offset != + sec.next_offset): + raise ValueError('Invalid article layout') + else: + if art.length == 0 or art.next_offset != next_art.offset: + raise ValueError('Invalid article layout') + + # Flatten + indices = [periodical] + for sec, articles in normalized_sections: + indices.append(sec) + periodical.last_child_index = sec.index + + for sec, articles in normalized_sections: + for a in articles: + indices.append(a) + + return indices + # }}} + diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 088326a876..a031e2e957 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -20,6 +20,7 @@ from calibre.utils.filenames import ascii_filename from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE) from calibre.ebooks.mobi.utils import (rescale_image, encint, encode_trailing_data) +from calibre.ebooks.mobi.writer2.indexer import Indexer EXTH_CODES = { 'creator': 100, @@ -87,6 +88,14 @@ class MobiWriter(object): # Indexing {{{ def generate_index(self): self.primary_index_record_idx = None + try: + self.indexer = Indexer(self.serializer, self.last_text_record_idx, + self.opts, self.oeb) + except: + self.log.exception('Failed to generate MOBI index:') + else: + self.primary_index_record_idx = len(self.records) + self.records.extend(self.indexer.records) # }}} def write_uncrossable_breaks(self): # {{{ @@ -202,7 +211,6 @@ class MobiWriter(object): record.write(overlap) record.write(pack(b'>B', len(overlap))) - self.last_text_record_idx = nrecords def read_text_record(self, text): @@ -265,8 +273,6 @@ class MobiWriter(object): # EOF record self.records.append('\xE9\x8E\x0D\x0A') - self.generate_end_records() - record0 = StringIO() # The MOBI Header record0.write(pack(b'>HHIHHHH', diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py index d6878bee4a..881937ce73 100644 --- a/src/calibre/ebooks/mobi/writer2/serializer.py +++ b/src/calibre/ebooks/mobi/writer2/serializer.py @@ -143,6 +143,7 @@ class Serializer(object): spine.extend([item for item in self.oeb.spine if not item.linear]) for item in spine: self.serialize_item(item) + self.body_end_offset = buf.tell() buf.write(b'') def serialize_item(self, item): diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index 07a963c81b..c65b6b5d14 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -133,6 +133,7 @@ def render_data(mi, use_roman_numbers=True, all_fields=False): authors = [] formatter = EvalFormatter() for aut in mi.authors: + link = '' if mi.author_link_map[aut]: link = mi.author_link_map[aut] elif gprefs.get('default_author_link'): diff --git a/src/calibre/gui2/dialogs/quickview.py b/src/calibre/gui2/dialogs/quickview.py index 2215a3cc95..926c884773 100644 --- a/src/calibre/gui2/dialogs/quickview.py +++ b/src/calibre/gui2/dialogs/quickview.py @@ -183,7 +183,6 @@ class Quickview(QDialog, Ui_Quickview): self.items.blockSignals(False) def indicate_no_items(self): - print 'no items' self.no_valid_items = True self.items.clear() self.items.addItem(QListWidgetItem(_('**No items found**'))) diff --git a/src/calibre/gui2/store/__init__.py b/src/calibre/gui2/store/__init__.py index d58ccbda84..ae42d82032 100644 --- a/src/calibre/gui2/store/__init__.py +++ b/src/calibre/gui2/store/__init__.py @@ -6,6 +6,8 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +from calibre.utils.filenames import ascii_filename + class StorePlugin(object): # {{{ ''' A plugin representing an online ebook repository (store). The store can @@ -43,7 +45,7 @@ class StorePlugin(object): # {{{ The easiest way to handle affiliate money payouts is to randomly select between the author's affiliate id and calibre's affiliate id so that 70% of the time the author's id is used. - + See declined.txt for a list of stores that do not want to be included. ''' @@ -53,7 +55,7 @@ class StorePlugin(object): # {{{ self.gui = gui self.name = name self.base_plugin = None - self.config = JSONConfig('store/stores/' + self.name) + self.config = JSONConfig('store/stores/' + ascii_filename(self.name)) def open(self, gui, parent=None, detail_item=None, external=False): ''' diff --git a/src/calibre/gui2/update.py b/src/calibre/gui2/update.py index f76d4b8e65..caa1d3f3dc 100644 --- a/src/calibre/gui2/update.py +++ b/src/calibre/gui2/update.py @@ -15,6 +15,7 @@ from calibre.gui2 import config, dynamic, open_url from calibre.gui2.dialogs.plugin_updater import get_plugin_updates_available URL = 'http://status.calibre-ebook.com/latest' +#URL = 'http://localhost:8000/latest' NO_CALIBRE_UPDATE = '-0.0.0' VSEP = '|' diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index a2a85806f5..b5917f1a55 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -17,7 +17,7 @@ from datetime import datetime from functools import partial from calibre.ebooks.metadata import title_sort, author_to_author_sort -from calibre.utils.date import parse_date, isoformat, local_tz +from calibre.utils.date import parse_date, isoformat, local_tz, UNDEFINED_DATE from calibre import isbytestring, force_unicode from calibre.constants import iswindows, DEBUG, plugins from calibre.utils.icu import strcmp @@ -39,8 +39,11 @@ def _c_convert_timestamp(val): if ret is None: return parse_date(val, as_utc=False) year, month, day, hour, minutes, seconds, tzsecs = ret - return datetime(year, month, day, hour, minutes, seconds, + try: + return datetime(year, month, day, hour, minutes, seconds, tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz) + except OverflowError: + return UNDEFINED_DATE.astimezone(local_tz) def _py_convert_timestamp(val): if val: diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index 520206343f..23813a27a8 100755 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -401,7 +401,7 @@ with undefined values in the column. Searching for ``true`` will find all books values in the column. Searching for ``yes`` or ``checked`` will find all books with ``Yes`` in the column. Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column. -Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.C`, but not the tag `A`. +Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.B.C`, but not the tag `A`. Identifiers (e.g., isbn, doi, lccn etc) also use an extended syntax. First, note that an identifier has the form ``type:value``, as in ``isbn:123456789``. The extended syntax permits you to specify independently which type and value to search for. Both the type and the value parts of the query can use `equality`, `contains`, or `regular expression` matches. Examples: