Sync to trunk.

This commit is contained in:
John Schember 2011-07-24 21:57:20 -04:00
commit 1e1562495d
15 changed files with 559 additions and 74 deletions

View File

@ -1,39 +1,34 @@
# -*- coding: utf-8 -*-
__license__ = 'GPLv3'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1255797795(BasicNewsRecipe): class AdvancedUserRecipe1311446032(BasicNewsRecipe):
title = u'Corren' title = 'Corren'
language = 'sv' __author__ = 'Jonas Svensson'
__author__ = 'Jonas Svensson' description = 'News from Sweden'
simultaneous_downloads = 1 publisher = 'Corren'
no_stylesheets = True category = 'news, politics, Sweden'
oldest_article = 7 oldest_article = 2
delay = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_attributes = ['onload'] no_stylesheets = True
timefmt = '' use_embedded_content = False
encoding = 'iso-8859-1'
language = 'sv'
feeds = [ feeds = [
(u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'), (u'Toppnyheter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/')
(u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'), ,(u'Ekonomi', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/')
(u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'), ,(u'Link\xf6ping', u'http://www.corren.se/inc/RssHandler.ashx?id=4122234')
(u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'), ,(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223,4122224,4122226,4122227,4122228,4122229,4122230')
(u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'), ]
(u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'),
(u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'),
(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'),
(u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'),
(u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/')
]
def print_version(self, url):
url = url.replace("ekonomi/artikel.aspx", "Print.aspx")
url = url.replace("bostad/artikel.aspx", "Print.aspx")
url = url.replace("kultur/artikel.aspx", "Print.aspx")
url = url.replace("motor/artikel.aspx", "Print.aspx")
url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
url = url.replace("sport/artikel.aspx", "Print.aspx")
url = url.replace("asikter/artikel.aspx", "Print.aspx")
url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx")
url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx")
return url.replace("nyheter/artikel.aspx", "Print.aspx")
keep_only_tags = [dict(name='div', attrs={'id':'article'}),dict(name='div', attrs={'class':'body'})]
remove_tags = [
dict(name='ul',attrs={'class':'functions'})
,dict(name='a',attrs={'href':'javascript*'})
,dict(name='div',attrs={'class':'box'})
,dict(name='div',attrs={'class':'functionsbottom'})
]

View File

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
__license__ = 'GPLv3'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311450855(BasicNewsRecipe):
title = u'Dagens Industri'
__author__ = 'Jonas Svensson'
description = 'Economy news from Sweden'
publisher = 'DI'
category = 'news, politics, Sweden'
oldest_article = 2
delay = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'sv'
feeds = [(u'DI', u'http://di.se/rss')]
keep_only_tags = [dict(name='h1', attrs={'id':'ctl00_ExtraWideContentRegion_WideContentRegion_MainRegion_MainContentRegion_MainBodyRegion_headlineNormal'}),dict(name='div', attrs={'id':'articleBody'})]
remove_tags = [
dict(name='div',attrs={'class':'article-actions clear'})
,dict(name='div',attrs={'class':'article-action-popup'})
,dict(name='div',attrs={'class':'header'})
,dict(name='div',attrs={'class':'content clear'})
,dict(name='div',attrs={'id':'articleAdvertisementDiv'})
,dict(name='ul',attrs={'class':'action-list'})
]

View File

@ -12,7 +12,7 @@ from datetime import date
class Guardian(BasicNewsRecipe): class Guardian(BasicNewsRecipe):
title = u'The Guardian / The Observer' title = u'The Guardian and The Observer'
if date.today().weekday() == 6: if date.today().weekday() == 6:
base_url = "http://www.guardian.co.uk/theobserver" base_url = "http://www.guardian.co.uk/theobserver"
else: else:
@ -28,7 +28,7 @@ class Guardian(BasicNewsRecipe):
# List of section titles to ignore # List of section titles to ignore
# For example: ['Sport'] # For example: ['Sport']
ignore_sections = [] ignore_sections = []
timefmt = ' [%a, %d %b %Y]' timefmt = ' [%a, %d %b %Y]'
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}), dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
@ -94,7 +94,7 @@ class Guardian(BasicNewsRecipe):
prefix = section_title + ': ' prefix = section_title + ': '
for subsection in s.parent.findAll('a', attrs={'class':'book-section'}): for subsection in s.parent.findAll('a', attrs={'class':'book-section'}):
yield (prefix + self.tag_to_string(subsection), subsection['href']) yield (prefix + self.tag_to_string(subsection), subsection['href'])
def find_articles(self, url): def find_articles(self, url):
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
div = soup.find('div', attrs={'class':'book-index'}) div = soup.find('div', attrs={'class':'book-index'})
@ -115,7 +115,7 @@ class Guardian(BasicNewsRecipe):
'title': title, 'url':url, 'description':desc, 'title': title, 'url':url, 'description':desc,
'date' : strftime('%a, %d %b'), 'date' : strftime('%a, %d %b'),
} }
def parse_index(self): def parse_index(self):
try: try:
feeds = [] feeds = []

View File

@ -12,7 +12,7 @@ from datetime import datetime
from dateutil.tz import tzoffset from dateutil.tz import tzoffset
from calibre.constants import plugins from calibre.constants import plugins
from calibre.utils.date import parse_date, local_tz from calibre.utils.date import parse_date, local_tz, UNDEFINED_DATE
from calibre.ebooks.metadata import author_to_author_sort from calibre.ebooks.metadata import author_to_author_sort
_c_speedup = plugins['speedup'][0] _c_speedup = plugins['speedup'][0]
@ -29,8 +29,11 @@ def _c_convert_timestamp(val):
if ret is None: if ret is None:
return parse_date(val, as_utc=False) return parse_date(val, as_utc=False)
year, month, day, hour, minutes, seconds, tzsecs = ret year, month, day, hour, minutes, seconds, tzsecs = ret
return datetime(year, month, day, hour, minutes, seconds, try:
return datetime(year, month, day, hour, minutes, seconds,
tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz) tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
except OverflowError:
return UNDEFINED_DATE.astimezone(local_tz)
class Table(object): class Table(object):

View File

@ -128,7 +128,7 @@ class ANDROID(USBMS):
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
'GT-S5830_CARD'] 'GT-S5830_CARD', 'GT-S5570_CARD']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',

View File

@ -399,6 +399,7 @@ class IndexHeader(object): # {{{
def __init__(self, record): def __init__(self, record):
self.record = record self.record = record
raw = self.record.raw raw = self.record.raw
#open('/t/index_header.bin', 'wb').write(raw)
if raw[:4] != b'INDX': if raw[:4] != b'INDX':
raise ValueError('Invalid Primary Index Record') raise ValueError('Invalid Primary Index Record')
@ -406,7 +407,7 @@ class IndexHeader(object): # {{{
self.unknown1 = raw[8:16] self.unknown1 = raw[8:16]
self.index_type, = struct.unpack('>I', raw[16:20]) self.index_type, = struct.unpack('>I', raw[16:20])
self.index_type_desc = {0: 'normal', 2: self.index_type_desc = {0: 'normal', 2:
'inflection'}.get(self.index_type, 'unknown') 'inflection', 6: 'calibre'}.get(self.index_type, 'unknown')
self.idxt_start, = struct.unpack('>I', raw[20:24]) self.idxt_start, = struct.unpack('>I', raw[20:24])
self.index_count, = struct.unpack('>I', raw[24:28]) self.index_count, = struct.unpack('>I', raw[24:28])
self.index_encoding_num, = struct.unpack('>I', raw[28:32]) self.index_encoding_num, = struct.unpack('>I', raw[28:32])
@ -596,10 +597,11 @@ class IndexEntry(object): # {{{
0x3f : 'article', 0x3f : 'article',
} }
def __init__(self, ident, entry_type, raw, cncx, tagx_entries): def __init__(self, ident, entry_type, raw, cncx, tagx_entries, flags=0):
self.index = ident self.index = ident
self.raw = raw self.raw = raw
self.tags = [] self.tags = []
self.entry_type_raw = entry_type
try: try:
self.entry_type = self.TYPES[entry_type] self.entry_type = self.TYPES[entry_type]
@ -619,6 +621,27 @@ class IndexEntry(object): # {{{
vals.append(val) vals.append(val)
self.tags.append(Tag(tag, vals, self.entry_type, cncx)) self.tags.append(Tag(tag, vals, self.entry_type, cncx))
if flags & 0b10:
# Look for optional description and author
desc_tag = [t for t in tagx_entries if t.tag == 22]
if desc_tag and raw:
val, consumed = decint(raw)
raw = raw[consumed:]
if val:
self.tags.append(Tag(desc_tag[0], [val], self.entry_type,
cncx))
if flags & 0b100:
aut_tag = [t for t in tagx_entries if t.tag == 23]
if aut_tag and raw:
val, consumed = decint(raw)
raw = raw[consumed:]
if val:
self.tags.append(Tag(aut_tag[0], [val], self.entry_type,
cncx))
if raw.replace(b'\x00', b''): # There can be padding null bytes
raise ValueError('Extra bytes in INDX table entry %d: %r'%(self.index, raw))
@property @property
def label(self): def label(self):
for tag in self.tags: for tag in self.tags:
@ -669,8 +692,8 @@ class IndexEntry(object): # {{{
return -1 return -1
def __str__(self): def __str__(self):
ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%( ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d)'%(
self.index, self.entry_type, len(self.tags))] self.index, self.entry_type, bin(self.entry_type_raw)[2:], len(self.tags))]
for tag in self.tags: for tag in self.tags:
ans.append('\t'+str(tag)) ans.append('\t'+str(tag))
if self.first_child_index != -1: if self.first_child_index != -1:
@ -690,6 +713,7 @@ class IndexRecord(object): # {{{
def __init__(self, record, index_header, cncx): def __init__(self, record, index_header, cncx):
self.record = record self.record = record
raw = self.record.raw raw = self.record.raw
if raw[:4] != b'INDX': if raw[:4] != b'INDX':
raise ValueError('Invalid Primary Index Record') raise ValueError('Invalid Primary Index Record')
@ -713,6 +737,9 @@ class IndexRecord(object): # {{{
for i in range(self.idxt_count): for i in range(self.idxt_count):
off, = u(b'>H', indices[i*2:(i+1)*2]) off, = u(b'>H', indices[i*2:(i+1)*2])
self.index_offsets.append(off-192) self.index_offsets.append(off-192)
rest = indices[(i+1)*2:]
if rest.replace(b'\0', ''): # There can be padding null bytes
raise ValueError('Extra bytes after IDXT table: %r'%rest)
indxt = raw[192:self.idxt_offset] indxt = raw[192:self.idxt_offset]
self.indices = [] self.indices = []
@ -723,8 +750,13 @@ class IndexRecord(object): # {{{
next_off = len(indxt) next_off = len(indxt)
index, consumed = decode_hex_number(indxt[off:]) index, consumed = decode_hex_number(indxt[off:])
entry_type = ord(indxt[off+consumed]) entry_type = ord(indxt[off+consumed])
d, flags = 1, 0
if index_header.index_type == 6:
flags = ord(indxt[off+consumed+d])
d += 1
self.indices.append(IndexEntry(index, entry_type, self.indices.append(IndexEntry(index, entry_type,
indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries)) indxt[off+consumed+d:next_off], cncx,
index_header.tagx_entries, flags=flags))
index = self.indices[-1] index = self.indices[-1]
def get_parent(self, index): def get_parent(self, index):
@ -744,7 +776,7 @@ class IndexRecord(object): # {{{
len(w), not bool(w.replace(b'\0', b'')) )) len(w), not bool(w.replace(b'\0', b'')) ))
a('Header length: %d'%self.header_length) a('Header length: %d'%self.header_length)
u(self.unknown1) u(self.unknown1)
a('Header Type: %d'%self.header_type) a('Unknown (header type? index record number? always 1?): %d'%self.header_type)
u(self.unknown2) u(self.unknown2)
a('IDXT Offset: %d'%self.idxt_offset) a('IDXT Offset: %d'%self.idxt_offset)
a('IDXT Count: %d'%self.idxt_count) a('IDXT Count: %d'%self.idxt_count)

View File

@ -2,6 +2,7 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
from future_builtins import filter
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -12,7 +13,9 @@ from cStringIO import StringIO
from collections import OrderedDict from collections import OrderedDict
from calibre.ebooks import normalize from calibre.ebooks import normalize
from calibre.ebooks.mobi.utils import encint from calibre.ebook.mobi.writer2 import RECORD_SIZE
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex)
from calibre.ebooks.mobi.langcodes import iana2mobi
def utf8_text(text): def utf8_text(text):
''' '''
@ -37,7 +40,6 @@ def align_block(raw, multiple=4, pad=b'\0'):
if extra == 0: return raw if extra == 0: return raw
return raw + pad*(multiple - extra) return raw + pad*(multiple - extra)
class CNCX(object): # {{{ class CNCX(object): # {{{
''' '''
@ -53,17 +55,11 @@ class CNCX(object): # {{{
for item in toc: for item in toc:
if item is self.toc: continue if item is self.toc: continue
label = item.title self.strings[item.title] = 0
klass = item.klass
if opts.mobi_periodical: if opts.mobi_periodical:
if item.description: self.strings[item.klass] = 0
self.strings[item.description] = 0
if item.author:
self.string[item.author] = 0
self.strings[label] = self.strings[klass] = 0
self.records = [] self.records = []
offset = 0 offset = 0
buf = StringIO() buf = StringIO()
for key in tuple(self.strings.iterkeys()): for key in tuple(self.strings.iterkeys()):
@ -90,27 +86,441 @@ class CNCX(object): # {{{
return self.strings[string] return self.strings[string]
# }}} # }}}
class IndexEntry(object): # {{{
TAG_VALUES = {
'offset': 1,
'size': 2,
'label_offset': 3,
'depth': 4,
'class_offset': 5,
'parent_index': 21,
'first_child_index': 22,
'last_child_index': 23,
}
RTAG_MAP = dict(TAG_VALUES.itervalues(), TAG_VALUES.iterkeys())
BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,]
def __init__(self, offset, label_offset, depth=0, class_offset=None):
self.offset, self.label_offset = offset, label_offset
self.depth, self.class_offset = depth, class_offset
self.length = 0
self.index = 0
self.parent_index = None
self.first_child_index = None
self.last_child_index = None
@classmethod
def tagx_block(cls, for_periodical=True):
buf = bytearray()
def add_tag(tag, num_values=1):
buf.append(tag)
buf.append(num_values)
# bitmask
buf.append(1 << (cls.BITMASKS.index(tag)))
# eof
buf.append(0)
for tag in xrange(1, 5):
add_tag(tag)
if for_periodical:
for tag in (5, 21, 22, 23):
add_tag(tag)
# End of TAGX record
for i in xrange(3): buf.append(0)
buf.append(1)
header = b'TAGX'
header += pack(b'>I', len(buf)) # table length
header += pack(b'>I', 1) # control byte count
return header + bytes(buf)
@property
def next_offset(self):
return self.offset + self.length
@property
def tag_nums(self):
for i in range(1, 5):
yield i
for attr in ('class_offset', 'parent_index', 'first_child_index',
'last_child_index'):
if getattr(self, attr) is not None:
yield self.TAG_VALUES[attr]
@property
def entry_type(self):
ans = 0
for tag in self.tag_nums:
ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x
return ans
@property
def bytestring(self):
buf = StringIO()
buf.write(encode_number_as_hex(self.index))
et = self.entry_type
buf.write(bytes(bytearray([et])))
for tag in self.tag_nums:
attr = self.RTAG_MAP[tag]
val = getattr(self, attr)
buf.write(encint(val))
ans = buf.get_value()
return ans
# }}}
class Indexer(object): class Indexer(object):
def __init__(self, serializer, number_of_text_records, opts, oeb): def __init__(self, serializer, number_of_text_records,
size_of_last_text_record, opts, oeb):
self.serializer = serializer self.serializer = serializer
self.number_of_text_records = number_of_text_records self.number_of_text_records = number_of_text_records
self.text_size = (RECORD_SIZE * (self.number_of_text_records-1) +
size_of_last_text_record)
self.oeb = oeb self.oeb = oeb
self.log = oeb.log self.log = oeb.log
self.opts = opts self.opts = opts
self.cncx = CNCX(oeb.toc, opts) self.is_periodical = opts.mobi_periodical
self.is_flat_periodical = False
if opts.mobi_periodical:
periodical_node = iter(oeb.toc).next()
sections = tuple(periodical_node)
self.is_flat_periodical = len(sections) == 1
self.records = [] self.records = []
def create_header(self): self.cncx = CNCX(oeb.toc, opts)
buf = StringIO()
# Ident if self.is_periodical:
self.indices = self.create_periodical_index()
else:
self.indices = self.create_book_index()
self.records.append(self.create_index_record())
self.records.insert(0, self.create_header())
self.records.extend(self.cncx.records)
def create_index_record(self): # {{{
header_length = 192
buf = StringIO()
indices = self.indices
# Write index entries
offsets = []
for i in indices:
offsets.append(buf.tell())
buf.write(i.bytestring)
index_block = align_block(buf.getvalue())
# Write offsets to index entries as an IDXT block
idxt_block = b'IDXT'
buf.truncate(0)
for offset in offsets:
buf.write(pack(b'>H', header_length+offset))
idxt_block = align_block(idxt_block + buf.getvalue())
body = index_block + idxt_block
header = b'INDX'
buf.truncate(0)
buf.write(pack(b'>I', header_length))
buf.write(b'\0'*4) # Unknown
buf.write(pack(b'>I', 1)) # Header type? Or index record number?
buf.write(b'\0'*4) # Unknown
# IDXT block offset
buf.write(pack(b'>I', header_length + len(index_block)))
# Number of index entries
buf.write(pack(b'>I', len(offsets)))
# Unknown
buf.write(b'\xff'*8)
# Unknown
buf.write(b'\0'*156)
header += buf.getvalue()
ans = header + body
if len(ans) > 0x10000:
raise ValueError('Too many entries (%d) in the TOC'%len(offsets))
return ans
# }}}
def create_header(self): # {{{
buf = StringIO()
tagx_block = IndexEntry.tagx_block(self.is_periodical)
header_length = 192
# Ident 0 - 4
buf.write(b'INDX') buf.write(b'INDX')
# Header length # Header length 4 - 8
buf.write(pack(b'>I', 192)) buf.write(pack(b'>I', header_length))
# Index type: 0 - normal, 2 - inflection # Unknown 8-16
buf.write(b'\0'*8)
# Index type: 0 - normal, 2 - inflection 16 - 20
buf.write(pack(b'>I', 2)) buf.write(pack(b'>I', 2))
# IDXT offset 20-24
buf.write(pack(b'>I', 0)) # Filled in later
# Number of index records 24-28
buf.write(pack('b>I', len(self.records)))
# Index Encoding 28-32
buf.write(pack(b'>I', 65001)) # utf-8
# Index language 32-36
buf.write(iana2mobi(
str(self.oeb.metadata.language[0])))
# Number of index entries 36-40
buf.write(pack(b'>I', len(self.indices)))
# ORDT offset 40-44
buf.write(pack(b'>I', 0))
# LIGT offset 44-48
buf.write(pack(b'>I', 0))
# Number of LIGT entries 48-52
buf.write(pack(b'>I', 0))
# Number of CNCX records 52-56
buf.write(pack(b'>I', len(self.cncx.records)))
# Unknown 56-180
buf.write(b'\0'*124)
# TAGX offset 180-184
buf.write(pack(b'>I', header_length))
# Unknown 184-192
buf.write(b'\0'*8)
# TAGX block
buf.write(tagx_block)
num = len(self.indices)
# The index of the last entry in the NCX
buf.write(encode_number_as_hex(num-1))
# The number of entries in the NCX
buf.write(pack(b'>H', num))
# Padding
pad = (4 - (buf.tell()%4))%4
if pad:
buf.write(b'\0'*pad)
idxt_offset = buf.tell()
buf.write(b'IDXT')
buf.write(header_length + len(tagx_block))
buf.write(b'\0')
buf.seek(20)
buf.write(pack(b'>I', idxt_offset))
return align_block(buf.getvalue())
# }}}
def create_book_index(self): # {{{
indices = []
seen = set()
id_offsets = self.serializer.id_offsets
for node in self.oeb.toc.iterdescendants():
try:
offset = id_offsets[node.href]
label = self.cncx[node.title]
except:
self.log.warn('TOC item %s not found in document'%node.href)
continue
if offset in seen:
continue
seen.add(offset)
index = IndexEntry(offset, label)
self.indices.append(index)
indices.sort(key=lambda x:x.offset)
# Set lengths
for i, index in indices:
try:
next_offset = indices[i+1].offset
except:
next_offset = self.serializer.body_end_offset
index.length = next_offset - index.offset
# Remove empty nodes
indices = [i for i in indices if i.length > 0]
# Set index values
for i, index in indices:
index.index = i
# Set lengths again to close up any gaps left by filtering
for i, index in indices:
try:
next_offset = indices[i+1].offset
except:
next_offset = self.serializer.body_end_offset
index.length = next_offset - index.offset
return indices
# }}}
def create_periodical_index(self): # {{{
periodical_node = iter(self.oeb.toc).next()
periodical_node_offset = self.serializer.body_start_offset
periodical_node_size = (self.serializer.body_end_offset -
periodical_node_offset)
normalized_sections = []
id_offsets = self.serializer.id_offsets
periodical = IndexEntry(periodical_node_offset,
self.cncx[periodical_node.title],
class_offset=self.cncx[periodical_node.klass])
periodical.length = periodical_node_size
periodical.first_child_index = 1
seen_sec_offsets = set()
seen_art_offsets = set()
for sec in periodical_node:
normalized_articles = []
try:
offset = id_offsets[sec.href]
label = self.cncx[sec.title]
klass = self.cncx[sec.klass]
except:
continue
if offset in seen_sec_offsets:
continue
seen_sec_offsets.add(offset)
section = IndexEntry(offset, label, class_offset=klass, depth=1)
section.parent_index = 0
for art in sec:
try:
offset = id_offsets[art.href]
label = self.cncx[art.title]
klass = self.cncx[art.klass]
except:
continue
if offset in seen_art_offsets:
continue
seen_art_offsets.add(offset)
article = IndexEntry(offset, label, class_offset=klass,
depth=2)
normalized_articles.append(article)
if normalized_articles:
normalized_articles.sort(key=lambda x:x.offset)
normalized_sections.append((section, normalized_articles))
normalized_sections.sort(key=lambda x:x[0].offset)
# Set lengths
for s, x in enumerate(normalized_sections):
sec, normalized_articles = x
try:
sec.length = normalized_sections[s+1].offset - sec.offset
except:
sec.length = self.serializer.body_end_offset - sec.offset
for i, art in enumerate(normalized_articles):
try:
art.length = normalized_articles[i+1].offset - art.offset
except:
art.length = sec.offset + sec.length - art.offset
# Filter
for i, x in list(enumerate(normalized_sections)):
sec, normalized_articles = x
normalized_articles = list(filter(lambda x: x.length > 0,
normalized_articles))
normalized_sections[i] = (sec, normalized_articles)
normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1],
normalized_sections))
# Set indices
i = 0
for sec, normalized_articles in normalized_sections:
i += 1
sec.index = i
for sec, normalized_articles in normalized_sections:
for art in normalized_articles:
i += 1
art.index = i
art.parent_index = sec.index
for sec, normalized_articles in normalized_sections:
sec.first_child_index = normalized_articles[0].index
sec.last_child_index = normalized_articles[-1].index
# Set lengths again to close up any gaps left by filtering
for s, x in enumerate(normalized_sections):
sec, articles = x
try:
next_offset = normalized_sections[s+1].offset
except:
next_offset = self.serializer.body_end_offset
sec.length = next_offset - sec.offset
for a, art in enumerate(articles):
try:
next_offset = articles[a+1].offset
except:
next_offset = sec.next_offset
art.length = next_offset - art.offset
# Sanity check
for s, x in enumerate(normalized_sections):
sec, articles = x
try:
next_sec = normalized_sections[s+1]
except:
if (sec.length == 0 or sec.next_offset !=
self.serializer.body_end_offset):
raise ValueError('Invalid section layout')
else:
if next_sec.offset != sec.next_offset or sec.length == 0:
raise ValueError('Invalid section layout')
for a, art in enumerate(articles):
try:
next_art = articles[a+1]
except:
if (art.length == 0 or art.next_offset !=
sec.next_offset):
raise ValueError('Invalid article layout')
else:
if art.length == 0 or art.next_offset != next_art.offset:
raise ValueError('Invalid article layout')
# Flatten
indices = [periodical]
for sec, articles in normalized_sections:
indices.append(sec)
periodical.last_child_index = sec.index
for sec, articles in normalized_sections:
for a in articles:
indices.append(a)
return indices
# }}}

View File

@ -20,6 +20,7 @@ from calibre.utils.filenames import ascii_filename
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE) from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
from calibre.ebooks.mobi.utils import (rescale_image, encint, from calibre.ebooks.mobi.utils import (rescale_image, encint,
encode_trailing_data) encode_trailing_data)
from calibre.ebooks.mobi.writer2.indexer import Indexer
EXTH_CODES = { EXTH_CODES = {
'creator': 100, 'creator': 100,
@ -87,6 +88,14 @@ class MobiWriter(object):
# Indexing {{{ # Indexing {{{
def generate_index(self): def generate_index(self):
self.primary_index_record_idx = None self.primary_index_record_idx = None
try:
self.indexer = Indexer(self.serializer, self.last_text_record_idx,
self.opts, self.oeb)
except:
self.log.exception('Failed to generate MOBI index:')
else:
self.primary_index_record_idx = len(self.records)
self.records.extend(self.indexer.records)
# }}} # }}}
def write_uncrossable_breaks(self): # {{{ def write_uncrossable_breaks(self): # {{{
@ -202,7 +211,6 @@ class MobiWriter(object):
record.write(overlap) record.write(overlap)
record.write(pack(b'>B', len(overlap))) record.write(pack(b'>B', len(overlap)))
self.last_text_record_idx = nrecords self.last_text_record_idx = nrecords
def read_text_record(self, text): def read_text_record(self, text):
@ -265,8 +273,6 @@ class MobiWriter(object):
# EOF record # EOF record
self.records.append('\xE9\x8E\x0D\x0A') self.records.append('\xE9\x8E\x0D\x0A')
self.generate_end_records()
record0 = StringIO() record0 = StringIO()
# The MOBI Header # The MOBI Header
record0.write(pack(b'>HHIHHHH', record0.write(pack(b'>HHIHHHH',

View File

@ -143,6 +143,7 @@ class Serializer(object):
spine.extend([item for item in self.oeb.spine if not item.linear]) spine.extend([item for item in self.oeb.spine if not item.linear])
for item in spine: for item in spine:
self.serialize_item(item) self.serialize_item(item)
self.body_end_offset = buf.tell()
buf.write(b'</body>') buf.write(b'</body>')
def serialize_item(self, item): def serialize_item(self, item):

View File

@ -133,6 +133,7 @@ def render_data(mi, use_roman_numbers=True, all_fields=False):
authors = [] authors = []
formatter = EvalFormatter() formatter = EvalFormatter()
for aut in mi.authors: for aut in mi.authors:
link = ''
if mi.author_link_map[aut]: if mi.author_link_map[aut]:
link = mi.author_link_map[aut] link = mi.author_link_map[aut]
elif gprefs.get('default_author_link'): elif gprefs.get('default_author_link'):

View File

@ -183,7 +183,6 @@ class Quickview(QDialog, Ui_Quickview):
self.items.blockSignals(False) self.items.blockSignals(False)
def indicate_no_items(self): def indicate_no_items(self):
print 'no items'
self.no_valid_items = True self.no_valid_items = True
self.items.clear() self.items.clear()
self.items.addItem(QListWidgetItem(_('**No items found**'))) self.items.addItem(QListWidgetItem(_('**No items found**')))

View File

@ -6,6 +6,8 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.utils.filenames import ascii_filename
class StorePlugin(object): # {{{ class StorePlugin(object): # {{{
''' '''
A plugin representing an online ebook repository (store). The store can A plugin representing an online ebook repository (store). The store can
@ -43,7 +45,7 @@ class StorePlugin(object): # {{{
The easiest way to handle affiliate money payouts is to randomly select The easiest way to handle affiliate money payouts is to randomly select
between the author's affiliate id and calibre's affiliate id so that between the author's affiliate id and calibre's affiliate id so that
70% of the time the author's id is used. 70% of the time the author's id is used.
See declined.txt for a list of stores that do not want to be included. See declined.txt for a list of stores that do not want to be included.
''' '''
@ -53,7 +55,7 @@ class StorePlugin(object): # {{{
self.gui = gui self.gui = gui
self.name = name self.name = name
self.base_plugin = None self.base_plugin = None
self.config = JSONConfig('store/stores/' + self.name) self.config = JSONConfig('store/stores/' + ascii_filename(self.name))
def open(self, gui, parent=None, detail_item=None, external=False): def open(self, gui, parent=None, detail_item=None, external=False):
''' '''

View File

@ -15,6 +15,7 @@ from calibre.gui2 import config, dynamic, open_url
from calibre.gui2.dialogs.plugin_updater import get_plugin_updates_available from calibre.gui2.dialogs.plugin_updater import get_plugin_updates_available
URL = 'http://status.calibre-ebook.com/latest' URL = 'http://status.calibre-ebook.com/latest'
#URL = 'http://localhost:8000/latest'
NO_CALIBRE_UPDATE = '-0.0.0' NO_CALIBRE_UPDATE = '-0.0.0'
VSEP = '|' VSEP = '|'

View File

@ -17,7 +17,7 @@ from datetime import datetime
from functools import partial from functools import partial
from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.utils.date import parse_date, isoformat, local_tz from calibre.utils.date import parse_date, isoformat, local_tz, UNDEFINED_DATE
from calibre import isbytestring, force_unicode from calibre import isbytestring, force_unicode
from calibre.constants import iswindows, DEBUG, plugins from calibre.constants import iswindows, DEBUG, plugins
from calibre.utils.icu import strcmp from calibre.utils.icu import strcmp
@ -39,8 +39,11 @@ def _c_convert_timestamp(val):
if ret is None: if ret is None:
return parse_date(val, as_utc=False) return parse_date(val, as_utc=False)
year, month, day, hour, minutes, seconds, tzsecs = ret year, month, day, hour, minutes, seconds, tzsecs = ret
return datetime(year, month, day, hour, minutes, seconds, try:
return datetime(year, month, day, hour, minutes, seconds,
tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz) tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
except OverflowError:
return UNDEFINED_DATE.astimezone(local_tz)
def _py_convert_timestamp(val): def _py_convert_timestamp(val):
if val: if val:

View File

@ -401,7 +401,7 @@ with undefined values in the column. Searching for ``true`` will find all books
values in the column. Searching for ``yes`` or ``checked`` will find all books with ``Yes`` in the column. values in the column. Searching for ``yes`` or ``checked`` will find all books with ``Yes`` in the column.
Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column. Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column.
Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.C`, but not the tag `A`. Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.B.C`, but not the tag `A`.
Identifiers (e.g., isbn, doi, lccn etc) also use an extended syntax. First, note that an identifier has the form ``type:value``, as in ``isbn:123456789``. The extended syntax permits you to specify independently which type and value to search for. Both the type and the value parts of the query can use `equality`, `contains`, or `regular expression` matches. Examples: Identifiers (e.g., isbn, doi, lccn etc) also use an extended syntax. First, note that an identifier has the form ``type:value``, as in ``isbn:123456789``. The extended syntax permits you to specify independently which type and value to search for. Both the type and the value parts of the query can use `equality`, `contains`, or `regular expression` matches. Examples: