Sync to trunk

This commit is contained in:
Alex Stanev 2011-07-20 12:26:11 +03:00
commit c4714d5a82
25 changed files with 1298 additions and 487 deletions

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds import Feed
@ -46,4 +45,3 @@ class GC_gl(BasicNewsRecipe):
}
newArticles.append(newArt)
masterFeed.append((feed.title,newArticles))

BIN
recipes/icons/losandes.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 285 B

View File

@ -1,4 +1,3 @@
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1299694372(BasicNewsRecipe):
@ -9,14 +8,24 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
oldest_article = 365
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
remove_tags = [
dict(name='div', attrs={'id':'text_controls_toggle'})
,dict(name='script')
,dict(name='div', attrs={'id':'text_controls'})
,dict(name='div', attrs={'id':'editing_controls'})
,dict(name='div', attrs={'class':'bar bottom'})
]
use_embedded_content = False
needs_subscription = True
INDEX = u'http://www.instapaper.com'
LOGIN = INDEX + u'/user/login'
feeds = [(u'Instapaper Unread', u'http://www.instapaper.com/u'), (u'Instapaper Starred', u'http://www.instapaper.com/starred')]
feeds = [
(u'Instapaper Unread', u'http://www.instapaper.com/u'),
(u'Instapaper Starred', u'http://www.instapaper.com/starred')
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
@ -37,18 +46,20 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class':'titleRow'}):
description = self.tag_to_string(item.div)
for item in soup.findAll('div', attrs={'class':'cornerControls'}):
#description = self.tag_to_string(item.div)
atag = item.a
if atag and atag.has_key('href'):
url = atag['href']
title = self.tag_to_string(atag)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
'url' :url
})
totalfeeds.append((feedtitle, articles))
return totalfeeds
def print_version(self, url):
return 'http://www.instapaper.com' + url
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('title').contents[0].strip()

View File

@ -1,4 +1,4 @@
import urllib2
import urllib2, re
from calibre.web.feeds.news import BasicNewsRecipe
class JBPress(BasicNewsRecipe):
@ -40,3 +40,12 @@ class JBPress(BasicNewsRecipe):
def print_version(self, url):
url = urllib2.urlopen(url).geturl() # resolve redirect.
return url.replace('/-/', '/print/')
def preprocess_html(self, soup):
# remove breadcrumb
h3s = soup.findAll('h3')
for h3 in h3s:
if re.compile('^JBpress>').match(h3.string):
h3.extract()
return soup

78
recipes/losandes.recipe Normal file
View File

@ -0,0 +1,78 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.losandes.com.ar
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LosAndes(BasicNewsRecipe):
title = 'Los Andes'
__author__ = 'Darko Miletic'
description = 'Noticias de Mendoza, Argentina y el resto del mundo'
publisher = 'Los Andes'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'es_AR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.losandes.com.ar/graficos/losandes.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
h1,h2{font-family: "Times New Roman",Times,serif}
.fechaNota{font-weight: bold; color: gray}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [
dict(name=['meta','link'])
,dict(attrs={'class':['cabecera', 'url']})
]
remove_tags_before=dict(attrs={'class':'cabecera'})
remove_tags_after=dict(attrs={'class':'url'})
feeds = [
(u'Ultimas Noticias' , u'http://www.losandes.com.ar/servicios/rss.asp?r=78' )
,(u'Politica' , u'http://www.losandes.com.ar/servicios/rss.asp?r=68' )
,(u'Economia nacional' , u'http://www.losandes.com.ar/servicios/rss.asp?r=65' )
,(u'Economia internacional' , u'http://www.losandes.com.ar/servicios/rss.asp?r=505')
,(u'Internacionales' , u'http://www.losandes.com.ar/servicios/rss.asp?r=66' )
,(u'Turismo' , u'http://www.losandes.com.ar/servicios/rss.asp?r=502')
,(u'Fincas' , u'http://www.losandes.com.ar/servicios/rss.asp?r=504')
,(u'Isha nos habla' , u'http://www.losandes.com.ar/servicios/rss.asp?r=562')
,(u'Estilo' , u'http://www.losandes.com.ar/servicios/rss.asp?r=81' )
,(u'Cultura' , u'http://www.losandes.com.ar/servicios/rss.asp?r=503')
,(u'Policiales' , u'http://www.losandes.com.ar/servicios/rss.asp?r=70' )
,(u'Deportes' , u'http://www.losandes.com.ar/servicios/rss.asp?r=69' )
,(u'Sociedad' , u'http://www.losandes.com.ar/servicios/rss.asp?r=67' )
,(u'Opinion' , u'http://www.losandes.com.ar/servicios/rss.asp?r=80' )
,(u'Editorial' , u'http://www.losandes.com.ar/servicios/rss.asp?r=76' )
,(u'Mirador' , u'http://www.losandes.com.ar/servicios/rss.asp?r=79' )
]
def print_version(self, url):
artid = url.rpartition('.')[0].rpartition('-')[2]
return "http://www.losandes.com.ar/includes/modulos/imprimir.asp?tipo=noticia&id=" + artid
def get_cover_url(self):
month = strftime("%m").lstrip('0')
day = strftime("%d").lstrip('0')
year = strftime("%Y")
return "http://www.losandes.com.ar/fotografias/fotosnoticias/" + year + "/" + month + "/" + day + "/th_tapa.jpg"
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class LV_gl(BasicNewsRecipe):
title = u'De Luns a Venres (RSS)'
__author__ = u'Susana Sotelo Docío'
description = u'O gratuíto galego'
publisher = u'Galiciaé'
category = u'news'
encoding = 'utf-8'
language = 'gl'
direction = 'ltr'
cover_url = 'http://lv.galiciae.com/new_estilos/lv/logo.gif'
oldest_article = 2
max_articles_per_feed = 200
center_navbar = False
feeds = [
(u'Galicia', u'http://lv.galiciae.com/cache/rss/sec_galicia_gl.rss'),
(u'Cultura', u'http://lv.galiciae.com/cache/rss/sec_cultura_gl.rss'),
(u'Mundo', u'http://lv.galiciae.com/cache/rss/sec_mundo_gl.rss'),
(u'Cidadanía', u'http://lv.galiciae.com/cache/rss/sec_ciudadania_gl.rss'),
(u'Tecnoloxía', u'http://lv.galiciae.com/cache/rss/sec_tecnologia_gl.rss'),
(u'España', u'http://lv.galiciae.com/cache/rss/sec_espana_gl.rss'),
(u'Deportes', u'http://lv.galiciae.com/cache/rss/sec_deportes_gl.rss'),
(u'Economía', u'http://lv.galiciae.com/cache/rss/sec_economia_gl.rss'),
(u'Lercheo', u'http://lv.galiciae.com/cache/rss/sec_gente_gl.rss'),
(u'Medio ambiente', u'http://lv.galiciae.com/cache/rss/sec_medioambiente_gl.rss'),
(u'España/Mundo', u'http://lv.galiciae.com/cache/rss/sec_espanamundo_gl.rss'),
(u'Sociedade', u'http://lv.galiciae.com/cache/rss/sec_sociedad_gl.rss'),
(u'Ciencia', u'http://lv.galiciae.com/cache/rss/sec_ciencia_gl.rss'),
(u'Motor', u'http://lv.galiciae.com/cache/rss/sec_motor_gl.rss'),
(u'Coches', u'http://lv.galiciae.com/cache/rss/sec_coches_gl.rss'),
(u'Motos', u'http://lv.galiciae.com/cache/rss/sec_motos_gl.rss'),
(u'Industriais', u'http://lv.galiciae.com/cache/rss/sec_industriales_gl.rss')
]
extra_css = u' p{text-align:left} '
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\nencoding="' + encoding + '"\ntags="' + category + '"\noverride_css=" p {text-align:left; text-indent: 0cm} "'
def print_version(self, url):
url += '?imprimir&lang=gl'
return url

View File

@ -1,11 +1,10 @@
EMAILADDRESS = 'hoge@foobar.co.jp'
from calibre.web.feeds.news import BasicNewsRecipe
class NBOnline(BasicNewsRecipe):
title = u'Nikkei Business Online'
language = 'ja'
description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
description = u'Nikkei Business Online.\u6CE8\uFF1A\u30E6\u30FC\u30B6\u30FC\u540D\u306Bemail\u30A2\u30C9\u30EC\u30B9\u3068\u30E6\u30FC\u30B6\u30FC\u540D\u3092\u30BB\u30DF\u30B3\u30ED\u30F3\u3067\u533A\u5207\u3063\u3066\u5165\u308C\u3066\u304F\u3060\u3055\u3044\u3002\u4F8B\uFF1Aemail@address.jp;username . PLEASE NOTE: You need to put your email address and username into username filed separeted by ; (semi-colon).'
__author__ = 'Ado Nishimura'
needs_subscription = True
oldest_article = 7
@ -23,8 +22,8 @@ class NBOnline(BasicNewsRecipe):
if self.username is not None and self.password is not None:
br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
br.select_form(name='loginActionForm')
br['email'] = EMAILADDRESS
br['userId'] = self.username
br['email'] = self.username.split(';')[0]
br['userId'] = self.username.split(';')[1]
br['password'] = self.password
br.submit()
return br

View File

@ -11,6 +11,7 @@
<link rel="stylesheet" type="text/css" href="{prefix}/static/browse/browse.css" />
<link type="text/css" href="{prefix}/static/jquery_ui/css/humanity-custom/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="{prefix}/static/jquery.multiselect.css" />
<link rel="apple-touch-icon" href="/static/calibre.png" />
<script type="text/javascript" src="{prefix}/static/jquery.js"></script>
<script type="text/javascript" src="{prefix}/static/jquery.corner.js"></script>

View File

@ -1181,6 +1181,16 @@ class StoreBeWriteStore(StoreBase):
headquarters = 'US'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreBookotekaStore(StoreBase):
name = 'Bookoteka'
author = u'Tomasz Długosz'
description = u'E-booki w Bookotece dostępne są w formacie EPUB oraz PDF. Publikacje sprzedawane w Bookotece są objęte prawami autorskimi. Zobowiązaliśmy się chronić te prawa, ale bez ograniczania dostępu do książki użytkownikowi, który nabył ją w legalny sposób. Dlatego też Bookoteka stosuje tak zwany „watermarking transakcyjny” czyli swego rodzaju znaki wodne.'
actual_plugin = 'calibre.gui2.store.stores.bookoteka_plugin:BookotekaStore'
drm_free_only = True
headquarters = 'PL'
formats = ['EPUB', 'PDF']
class StoreChitankaStore(StoreBase):
name = u'Моята библиотека'
author = 'Alex Stanev'
@ -1218,16 +1228,16 @@ class StoreEbookscomStore(StoreBase):
formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
affiliate = True
class StoreEPubBuyDEStore(StoreBase):
name = 'EPUBBuy DE'
author = 'Charles Haley'
description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore'
drm_free_only = True
headquarters = 'DE'
formats = ['EPUB']
affiliate = True
#class StoreEPubBuyDEStore(StoreBase):
# name = 'EPUBBuy DE'
# author = 'Charles Haley'
# description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
# actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore'
#
# drm_free_only = True
# headquarters = 'DE'
# formats = ['EPUB']
# affiliate = True
class StoreEBookShoppeUKStore(StoreBase):
name = 'ebookShoppe UK'
@ -1465,12 +1475,13 @@ plugins += [
StoreBNStore,
StoreBeamEBooksDEStore,
StoreBeWriteStore,
StoreBookotekaStore,
StoreChitankaStore,
StoreDieselEbooksStore,
StoreEbookNLStore,
StoreEbookscomStore,
StoreEBookShoppeUKStore,
StoreEPubBuyDEStore,
# StoreEPubBuyDEStore,
StoreEHarlequinStore,
StoreEpubBudStore,
StoreFeedbooksStore,

View File

@ -131,7 +131,7 @@ class AZBOOKA(ALEX):
description = _('Communicate with the Azbooka')
VENDOR_NAME = 'LINUX'
WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
MAIN_MEMORY_VOLUME_LABEL = 'Azbooka Internal Memory'

View File

@ -303,6 +303,9 @@ class CSSPreProcessor(object):
class HTMLPreProcessor(object):
PREPROCESS = [
# Remove huge block of contiguous spaces as they slow down
# the following regexes pretty badly
(re.compile(r'\s{10000,}'), lambda m: ''),
# Some idiotic HTML generators (Frontpage I'm looking at you)
# Put all sorts of crap into <head>. This messes up lxml
(re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),

View File

@ -7,9 +7,13 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct, datetime, sys, os
import struct, datetime, sys, os, shutil
from collections import OrderedDict
from calibre.utils.date import utc_tz
from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.mobi.writer2.utils import (decode_hex_number, decint,
get_trailing_data)
from calibre.utils.magick.draw import identify_data
# PalmDB {{{
class PalmDOCAttributes(object):
@ -275,6 +279,8 @@ class MOBIHeader(object): # {{{
self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
self.has_fcis_flis = False
self.has_multibytes = self.has_indexing_bytes = self.has_uncrossable_breaks = False
self.extra_data_flags = 0
if self.has_extra_data_flags:
self.unknown4 = self.raw[180:192]
self.first_content_record, self.last_content_record = \
@ -284,8 +290,11 @@ class MOBIHeader(object): # {{{
self.flis_count) = struct.unpack(b'>IIII',
self.raw[200:216])
self.unknown6 = self.raw[216:240]
self.extra_data_flags = bin(struct.unpack(b'>I',
self.raw[240:244])[0])
self.extra_data_flags = struct.unpack(b'>I',
self.raw[240:244])[0]
self.has_multibytes = bool(self.extra_data_flags & 0b1)
self.has_indexing_bytes = bool(self.extra_data_flags & 0b10)
self.has_uncrossable_breaks = bool(self.extra_data_flags & 0b100)
self.primary_index_record, = struct.unpack(b'>I',
self.raw[244:248])
@ -346,7 +355,10 @@ class MOBIHeader(object): # {{{
ans.append('FLIS number: %d'% self.flis_number)
ans.append('FLIS count: %d'% self.flis_count)
ans.append('Unknown6: %r'% self.unknown6)
ans.append('Extra data flags: %r'%self.extra_data_flags)
ans.append(('Extra data flags: %s (has multibyte: %s) '
'(has indexing: %s) (has uncrossable breaks: %s)')%(
bin(self.extra_data_flags), self.has_multibytes,
self.has_indexing_bytes, self.has_uncrossable_breaks ))
ans.append('Primary index record (null value: %d): %d'%(0xffffffff,
self.primary_index_record))
@ -368,21 +380,20 @@ class TagX(object): # {{{
def __init__(self, raw, control_byte_count):
self.tag = ord(raw[0])
self.num_values = ord(raw[1])
self.bmask = ord(raw[2])
self.bitmask = bin(self.bmask)
self.bitmask = ord(raw[2])
# End of file = 1 iff last entry
# When it is 1 all others are 0
self.eof = ord(raw[3])
self.is_eof = (self.eof == 1 and self.tag == 0 and self.num_values == 0
and self.bmask == 0)
and self.bitmask == 0)
def __repr__(self):
return 'TAGX(tag=%02d, num_values=%d, bitmask=%r (%d), eof=%d)' % (self.tag,
self.num_values, self.bitmask, self.bmask, self.eof)
return 'TAGX(tag=%02d, num_values=%d, bitmask=%r, eof=%d)' % (self.tag,
self.num_values, bin(self.bitmask), self.eof)
# }}}
class PrimaryIndexRecord(object): # {{{
class IndexHeader(object): # {{{
def __init__(self, record):
self.record = record
@ -413,7 +424,7 @@ class PrimaryIndexRecord(object): # {{{
self.ordt_start, = struct.unpack('>I', raw[40:44])
self.ligt_start, = struct.unpack('>I', raw[44:48])
self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52])
self.num_of_ctoc_blocks, = struct.unpack('>I', raw[52:56])
self.num_of_cncx_blocks, = struct.unpack('>I', raw[52:56])
self.unknown2 = raw[56:180]
self.tagx_offset, = struct.unpack(b'>I', raw[180:184])
if self.tagx_offset != self.header_length:
@ -435,11 +446,11 @@ class PrimaryIndexRecord(object): # {{{
self.tagx_control_byte_count))
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
raise ValueError('TAGX last entry is not EOF')
self.tagx_entries = self.tagx_entries[:-1]
idxt0_pos = self.header_length+self.tagx_header_length
last_name_len, = struct.unpack(b'>B', raw[idxt0_pos])
count_pos = idxt0_pos+1+last_name_len
last_num = int(raw[idxt0_pos+1:count_pos], 16)
last_num, consumed = decode_hex_number(raw[idxt0_pos:])
count_pos = idxt0_pos + consumed
self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2])
if last_num != self.ncx_count - 1:
@ -457,9 +468,12 @@ class PrimaryIndexRecord(object): # {{{
def __str__(self):
ans = ['*'*20 + ' Index Header '+ '*'*20]
a = ans.append
def u(w):
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
len(w), not bool(w.replace(b'\0', b'')) ))
a('Header length: %d'%self.header_length)
a('Unknown1: %r (%d bytes) (All zeros: %r)'%(self.unknown1,
len(self.unknown1), not bool(self.unknown1.replace(b'\0', '')) ))
u(self.unknown1)
a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type))
a('Offset to IDXT start: %d'%self.idxt_start)
a('Number of index records: %d'%self.index_count)
@ -471,12 +485,10 @@ class PrimaryIndexRecord(object): # {{{
a('ORDT start: %d'%self.ordt_start)
a('LIGT start: %d'%self.ligt_start)
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
a('Number of CTOC blocks: %d'%self.num_of_ctoc_blocks)
a('Unknown2: %r (%d bytes) (All zeros: %r)'%(self.unknown2,
len(self.unknown2), not bool(self.unknown2.replace(b'\0', '')) ))
a('Number of cncx blocks: %d'%self.num_of_cncx_blocks)
u(self.unknown2)
a('TAGX offset: %d'%self.tagx_offset)
a('Unknown3: %r (%d bytes) (All zeros: %r)'%(self.unknown3,
len(self.unknown3), not bool(self.unknown3.replace(b'\0', '')) ))
u(self.unknown3)
a('\n\n')
a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20)
a('Header length: %d'%self.tagx_header_length)
@ -488,6 +500,354 @@ class PrimaryIndexRecord(object): # {{{
return '\n'.join(ans)
# }}}
class Tag(object): # {{{
'''
Index entries are a collection of tags. Each tag is represented by this
class.
'''
TAG_MAP = {
1: ('offset', 'Offset in HTML'),
2: ('size', 'Size in HTML'),
3: ('label_offset', 'Offset to label in CNCX'),
4: ('depth', 'Depth of this entry in TOC'),
# The remaining tag types have to be interpreted subject to the type
# of index entry they are present in
}
INTERPRET_MAP = {
'subchapter': {
5 : ('Parent chapter index', 'parent_index')
},
'article' : {
5 : ('Class offset in cncx', 'class_offset'),
21 : ('Parent section index', 'parent_index'),
22 : ('Description offset in cncx', 'desc_offset'),
23 : ('Author offset in cncx', 'author_offset'),
},
'chapter_with_subchapters' : {
22 : ('First subchapter index', 'first_subchapter_index'),
23 : ('Last subchapter index', 'last_subchapter_index'),
},
'periodical' : {
5 : ('Class offset in cncx', 'class_offset'),
22 : ('First section index', 'first_section_index'),
23 : ('Last section index', 'last_section_index'),
},
'section' : {
5 : ('Class offset in cncx', 'class_offset'),
21 : ('Periodical index', 'periodical_index'),
22 : ('First article index', 'first_article_index'),
23 : ('Last article index', 'last_article_index'),
},
}
def __init__(self, tagx, vals, entry_type, cncx):
self.value = vals if len(vals) > 1 else vals[0]
self.entry_type = entry_type
self.cncx_value = None
if tagx.tag in self.TAG_MAP:
self.attr, self.desc = self.TAG_MAP[tagx.tag]
else:
try:
td = self.INTERPRET_MAP[entry_type]
except:
raise ValueError('Unknown entry type: %s'%entry_type)
try:
self.desc, self.attr = td[tagx.tag]
except:
raise ValueError('Unknown tag: %d for entry type: %s'%(
tagx.tag, entry_type))
if '_offset' in self.attr:
self.cncx_value = cncx[self.value]
def __str__(self):
if self.cncx_value is not None:
return '%s : %r [%r]'%(self.desc, self.value, self.cncx_value)
return '%s : %r'%(self.desc, self.value)
# }}}
class IndexEntry(object): # {{{
'''
The index is made up of entries, each of which is represented by an
instance of this class. Index entries typically point to offsets int eh
HTML, specify HTML sizes and point to text strings in the CNCX that are
used in the navigation UI.
'''
TYPES = {
# Present in book type files
0x0f : 'chapter',
0x6f : 'chapter_with_subchapters',
0x1f : 'subchapter',
# Present in periodicals
0xdf : 'periodical',
0xff : 'section',
0x3f : 'article',
}
def __init__(self, ident, entry_type, raw, cncx, tagx_entries):
self.index = ident
self.raw = raw
self.tags = []
try:
self.entry_type = self.TYPES[entry_type]
except KeyError:
raise ValueError('Unknown Index Entry type: %s'%hex(entry_type))
expected_tags = [tag for tag in tagx_entries if tag.bitmask &
entry_type]
for tag in expected_tags:
vals = []
for i in range(tag.num_values):
if not raw:
raise ValueError('Index entry does not match TAGX header')
val, consumed = decint(raw)
raw = raw[consumed:]
vals.append(val)
self.tags.append(Tag(tag, vals, self.entry_type, cncx))
@property
def label(self):
for tag in self.tags:
if tag.attr == 'label_offset':
return tag.cncx_value
return ''
def __str__(self):
ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%(
self.index, self.entry_type, len(self.tags))]
for tag in self.tags:
ans.append('\t'+str(tag))
return '\n'.join(ans)
# }}}
class IndexRecord(object): # {{{
'''
Represents all indexing information in the MOBI, apart from indexing info
in the trailing data of the text records.
'''
def __init__(self, record, index_header, cncx):
self.record = record
raw = self.record.raw
if raw[:4] != b'INDX':
raise ValueError('Invalid Primary Index Record')
u = struct.unpack
self.header_length, = u('>I', raw[4:8])
self.unknown1 = raw[8:12]
self.header_type, = u('>I', raw[12:16])
self.unknown2 = raw[16:20]
self.idxt_offset, self.idxt_count = u(b'>II', raw[20:28])
if self.idxt_offset < 192:
raise ValueError('Unknown Index record structure')
self.unknown3 = raw[28:36]
self.unknown4 = raw[36:192] # Should be 156 bytes
self.index_offsets = []
indices = raw[self.idxt_offset:]
if indices[:4] != b'IDXT':
raise ValueError("Invalid IDXT index table")
indices = indices[4:]
for i in range(self.idxt_count):
off, = u(b'>H', indices[i*2:(i+1)*2])
self.index_offsets.append(off-192)
indxt = raw[192:self.idxt_offset]
self.indices = []
for i, off in enumerate(self.index_offsets):
try:
next_off = self.index_offsets[i+1]
except:
next_off = len(indxt)
index, consumed = decode_hex_number(indxt[off:])
entry_type = ord(indxt[off+consumed])
self.indices.append(IndexEntry(index, entry_type,
indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries))
def __str__(self):
ans = ['*'*20 + ' Index Record (%d bytes) '%len(self.record.raw)+ '*'*20]
a = ans.append
def u(w):
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
len(w), not bool(w.replace(b'\0', b'')) ))
a('Header length: %d'%self.header_length)
u(self.unknown1)
a('Header Type: %d'%self.header_type)
u(self.unknown2)
a('IDXT Offset: %d'%self.idxt_offset)
a('IDXT Count: %d'%self.idxt_count)
u(self.unknown3)
u(self.unknown4)
a('Index offsets: %r'%self.index_offsets)
a('\nIndex Entries:')
for entry in self.indices:
a(str(entry)+'\n')
return '\n'.join(ans)
# }}}
class CNCX(object) : # {{{
'''
Parses the records that contain the compiled NCX (all strings from the
NCX). Presents a simple offset : string mapping interface to access the
data.
'''
def __init__(self, records, codec):
self.records = OrderedDict()
pos = 0
for record in records:
raw = record.raw
while pos < len(raw):
length, consumed = decint(raw[pos:])
if length > 0:
self.records[pos] = raw[pos+consumed:pos+consumed+length].decode(
codec)
pos += consumed+length
def __getitem__(self, offset):
return self.records.get(offset)
def __str__(self):
ans = ['*'*20 + ' cncx (%d strings) '%len(self.records)+ '*'*20]
for k, v in self.records.iteritems():
ans.append('%10d : %s'%(k, v))
return '\n'.join(ans)
# }}}
class TextRecord(object): # {{{
def __init__(self, idx, record, extra_data_flags, decompress, index_record,
doc_type):
self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
self.raw = decompress(self.raw)
if 0 in self.trailing_data:
self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
if 1 in self.trailing_data:
self.trailing_data['indexing'] = self.trailing_data.pop(1)
if 2 in self.trailing_data:
self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
self.idx = idx
if 'indexing' in self.trailing_data and index_record is not None:
self.interpret_indexing(doc_type, index_record.indices)
def interpret_indexing(self, doc_type, indices):
raw = self.trailing_data['indexing']
ident, consumed = decint(raw)
raw = raw[consumed:]
entry_type = ident & 0b111
index_entry_idx = ident >> 3
index_entry = None
for i in indices:
if i.index == index_entry_idx:
index_entry = i.label
break
self.trailing_data['interpreted_indexing'] = (
'Type: %s, Index Entry: %s'%(entry_type, index_entry))
if doc_type == 2: # Book
self.interpret_book_indexing(raw, entry_type)
def interpret_book_indexing(self, raw, entry_type):
arg1, consumed = decint(raw)
raw = raw[consumed:]
if arg1 != 0:
raise ValueError('TBS index entry has unknown arg1: %d'%
arg1)
if entry_type == 2:
desc = ('This record has only a single starting or a single'
' ending point')
if raw:
raise ValueError('TBS index entry has unknown extra bytes:'
' %r'%raw)
elif entry_type == 3:
desc = ('This record is spanned by a single node (i.e. it'
' has no start or end points)')
arg2, consumed = decint(raw)
if arg2 != 0:
raise ValueError('TBS index entry has unknown arg2: %d'%
arg2)
elif entry_type == 6:
if len(raw) != 1:
raise ValueError('TBS index entry has unknown extra bytes:'
' %r'%raw)
num = ord(raw[0])
# An unmatched starting or ending point each contributes 1 to
# this count. A matched pair of starting and ending points
# together contribute 1 to this count. Note that you can only
# ever have either 1 unmatched start point or 1 unmatched end
# point, never both (logically impossible).
desc = ('This record has %d starting/ending points and/or complete'
' nodes.')%num
else:
raise ValueError('Unknown TBS index entry type: %d for book'%entry_type)
self.trailing_data['interpreted_indexing'] += ' :: ' + desc
def dump(self, folder):
name = '%06d'%self.idx
with open(os.path.join(folder, name+'.txt'), 'wb') as f:
f.write(self.raw)
with open(os.path.join(folder, name+'.trailing_data'), 'wb') as f:
for k, v in self.trailing_data.iteritems():
raw = '%s : %r\n\n'%(k, v)
f.write(raw.encode('utf-8'))
# }}}
class ImageRecord(object): # {{{
def __init__(self, idx, record, fmt):
self.raw = record.raw
self.fmt = fmt
self.idx = idx
def dump(self, folder):
name = '%06d'%self.idx
with open(os.path.join(folder, name+'.'+self.fmt), 'wb') as f:
f.write(self.raw)
# }}}
class BinaryRecord(object): # {{{
def __init__(self, idx, record):
self.raw = record.raw
sig = self.raw[:4]
name = '%06d'%idx
if sig in (b'FCIS', b'FLIS', b'SRCS'):
name += '-' + sig.decode('ascii')
elif sig == b'\xe9\x8e\r\n':
name += '-' + 'EOF'
self.name = name
def dump(self, folder):
with open(os.path.join(folder, self.name+'.bin'), 'wb') as f:
f.write(self.raw)
# }}}
class MOBIFile(object): # {{{
def __init__(self, stream):
@ -516,10 +876,59 @@ class MOBIFile(object): # {{{
self.mobi_header = MOBIHeader(self.records[0])
self.primary_index_record = None
if 'huff' in self.mobi_header.compression.lower():
huffrecs = [r.raw for r in
xrange(self.mobi_header.huffman_record_offset,
self.mobi_header.huffman_record_offset +
self.mobi_header.huffman_record_count)]
from calibre.ebooks.mobi.huffcdic import HuffReader
huffs = HuffReader(huffrecs)
decompress = huffs.decompress
elif 'palmdoc' in self.mobi_header.compression.lower():
from calibre.ebooks.compression.palmdoc import decompress_doc
decompress = decompress_doc
else:
decompress = lambda x: x
self.index_header = self.index_record = None
self.indexing_record_nums = set()
pir = self.mobi_header.primary_index_record
if pir != 0xffffffff:
self.primary_index_record = PrimaryIndexRecord(self.records[pir])
self.index_header = IndexHeader(self.records[pir])
self.cncx = CNCX(self.records[
pir+2:pir+2+self.index_header.num_of_cncx_blocks],
self.index_header.index_encoding)
self.index_record = IndexRecord(self.records[pir+1],
self.index_header, self.cncx)
self.indexing_record_nums = set(xrange(pir,
pir+2+self.index_header.num_of_cncx_blocks))
ntr = self.mobi_header.number_of_text_records
fntbr = self.mobi_header.first_non_book_record
fii = self.mobi_header.first_image_index
if fntbr == 0xffffffff:
fntbr = len(self.records)
self.text_records = [TextRecord(r, self.records[r],
self.mobi_header.extra_data_flags, decompress, self.index_record,
self.mobi_header.type_raw) for r in xrange(1,
min(len(self.records), ntr+1))]
self.image_records, self.binary_records = [], []
for i in xrange(fntbr, len(self.records)):
if i in self.indexing_record_nums:
continue
r = self.records[i]
fmt = None
if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS',
b'\xe9\x8e\r\n'):
try:
width, height, fmt = identify_data(r.raw)
except:
pass
if fmt is not None:
self.image_records.append(ImageRecord(i, r, fmt))
else:
self.binary_records.append(BinaryRecord(i, r))
def print_header(self, f=sys.stdout):
@ -533,18 +942,33 @@ class MOBIFile(object): # {{{
print (str(self.mobi_header).encode('utf-8'), file=f)
# }}}
def inspect_mobi(path_or_stream):
def inspect_mobi(path_or_stream, prefix='decompiled'):
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
open(path_or_stream, 'rb'))
f = MOBIFile(stream)
ddir = 'debug_' + os.path.splitext(os.path.basename(stream.name))[0]
if not os.path.exists(ddir):
ddir = prefix + '_' + os.path.splitext(os.path.basename(stream.name))[0]
try:
shutil.rmtree(ddir)
except:
pass
os.mkdir(ddir)
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
f.print_header(f=out)
if f.primary_index_record is not None:
with open(os.path.join(ddir, 'primary_index_record.txt'), 'wb') as out:
print(str(f.primary_index_record), file=out)
if f.index_header is not None:
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
print(str(f.index_header), file=out)
print('\n\n', file=out)
print(str(f.cncx).encode('utf-8'), file=out)
print('\n\n', file=out)
print(str(f.index_record), file=out)
for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
('binary', 'binary_records')]:
tdir = os.path.join(ddir, tdir)
os.mkdir(tdir)
for rec in getattr(f, attr):
rec.dump(tdir)
print ('Debug data saved to:', ddir)
def main():

View File

@ -933,6 +933,9 @@ class MobiReader(object):
continue
processed_records.append(i)
data = self.sections[i][0]
if data[:4] in (b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n'):
# A FLIS, FCIS, SRCS or EOF record, ignore
continue
buf = cStringIO.StringIO(data)
image_index += 1
try:

View File

@ -1260,11 +1260,11 @@ class MobiWriter(object):
data = compress_doc(data)
record = StringIO()
record.write(data)
# Marshall's utf-8 break code.
if WRITE_PBREAKS :
# Write trailing muti-byte sequence if any
record.write(overlap)
record.write(pack('>B', len(overlap)))
if WRITE_PBREAKS :
nextra = 0
pbreak = 0
running = offset
@ -1642,6 +1642,61 @@ class MobiWriter(object):
for record in self._records:
self._write(record)
def _clean_text_value(self, text):
if text is not None and text.strip() :
text = text.strip()
if not isinstance(text, unicode):
text = text.decode('utf-8', 'replace')
text = normalize(text).encode('utf-8')
else :
text = "(none)".encode('utf-8')
return text
def _compute_offset_length(self, i, node, entries) :
h = node.href
if h not in self._id_offsets:
self._oeb.log.warning('Could not find TOC entry:', node.title)
return -1, -1
offset = self._id_offsets[h]
length = None
# Calculate length based on next entry's offset
for sibling in entries[i+1:]:
h2 = sibling.href
if h2 in self._id_offsets:
offset2 = self._id_offsets[h2]
if offset2 > offset:
length = offset2 - offset
break
if length is None:
length = self._content_length - offset
return offset, length
def _establish_document_structure(self) :
documentType = None
try :
klass = self._ctoc_map[0]['klass']
except :
klass = None
if klass == 'chapter' or klass == None :
documentType = 'book'
if self.opts.verbose > 2 :
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
self._MobiDoc.documentStructure = MobiBook()
elif klass == 'periodical' :
documentType = klass
if self.opts.verbose > 2 :
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
else :
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
return documentType
# Index {{{
def _generate_index(self):
self._oeb.log('Generating INDX ...')
self._primary_index_record = None
@ -1815,276 +1870,7 @@ class MobiWriter(object):
open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])
self._oeb.log.debug('Index records dumped to', t)
def _clean_text_value(self, text):
if text is not None and text.strip() :
text = text.strip()
if not isinstance(text, unicode):
text = text.decode('utf-8', 'replace')
text = normalize(text).encode('utf-8')
else :
text = "(none)".encode('utf-8')
return text
def _add_to_ctoc(self, ctoc_str, record_offset):
# Write vwilen + string to ctoc
# Return offset
# Is there enough room for this string in the current ctoc record?
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
# flush this ctoc, start a new one
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
# pad with 00
pad = 0xfbf8 - self._ctoc.tell()
# print "padding %d bytes of 00" % pad
self._ctoc.write('\0' * (pad))
self._ctoc_records.append(self._ctoc.getvalue())
self._ctoc.truncate(0)
self._ctoc_offset += 0x10000
record_offset = self._ctoc_offset
offset = self._ctoc.tell() + record_offset
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
return offset
def _add_flat_ctoc_node(self, node, ctoc, title=None):
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
t = node.title if title is None else title
t = self._clean_text_value(t)
self._last_toc_entry = t
# Create an empty dictionary for this node
ctoc_name_map = {}
# article = chapter
if node.klass == 'article' :
ctoc_name_map['klass'] = 'chapter'
else :
ctoc_name_map['klass'] = node.klass
# Add title offset to name map
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
self._chapterCount += 1
# append this node's name_map to map
self._ctoc_map.append(ctoc_name_map)
return
def _add_structured_ctoc_node(self, node, ctoc, title=None):
# Process 'periodical', 'section' and 'article'
# Fetch the offset referencing the current ctoc_record
if node.klass is None :
return
t = node.title if title is None else title
t = self._clean_text_value(t)
self._last_toc_entry = t
# Create an empty dictionary for this node
ctoc_name_map = {}
# Add the klass of this node
ctoc_name_map['klass'] = node.klass
if node.klass == 'chapter':
# Add title offset to name map
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
self._chapterCount += 1
elif node.klass == 'periodical' :
# Add title offset
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
# Look for existing class entry 'periodical' in _ctoc_map
for entry in self._ctoc_map:
if entry['klass'] == 'periodical':
# Use the pre-existing instance
ctoc_name_map['classOffset'] = entry['classOffset']
break
else :
continue
else:
# class names should always be in CNCX 0 - no offset
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
self._periodicalCount += 1
elif node.klass == 'section' :
# Add title offset
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
# Look for existing class entry 'section' in _ctoc_map
for entry in self._ctoc_map:
if entry['klass'] == 'section':
# Use the pre-existing instance
ctoc_name_map['classOffset'] = entry['classOffset']
break
else :
continue
else:
# class names should always be in CNCX 0 - no offset
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
self._sectionCount += 1
elif node.klass == 'article' :
# Add title offset/title
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
# Look for existing class entry 'article' in _ctoc_map
for entry in self._ctoc_map:
if entry['klass'] == 'article':
ctoc_name_map['classOffset'] = entry['classOffset']
break
else :
continue
else:
# class names should always be in CNCX 0 - no offset
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
# Add description offset/description
if node.description :
d = self._clean_text_value(node.description)
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
else :
ctoc_name_map['descriptionOffset'] = None
# Add author offset/attribution
if node.author :
a = self._clean_text_value(node.author)
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
else :
ctoc_name_map['authorOffset'] = None
self._articleCount += 1
else :
raise NotImplementedError( \
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
(node.title, node.klass, node.play_order))
# append this node's name_map to map
self._ctoc_map.append(ctoc_name_map)
def _generate_ctoc(self):
# Generate the compiled TOC strings
# Each node has 1-4 CTOC entries:
# Periodical (0xDF)
# title, class
# Section (0xFF)
# title, class
# Article (0x3F)
# title, class, description, author
# Chapter (0x0F)
# title, class
# nb: Chapters don't actually have @class, so we synthesize it
# in reader._toc_from_navpoint
toc = self._oeb.toc
reduced_toc = []
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
self._last_toc_entry = None
#ctoc = StringIO()
self._ctoc = StringIO()
# Track the individual node types
self._periodicalCount = 0
self._sectionCount = 0
self._articleCount = 0
self._chapterCount = 0
#first = True
if self._conforming_periodical_toc :
self._oeb.logger.info('Generating structured CTOC ...')
for (child) in toc.iter():
if self.opts.verbose > 2 :
self._oeb.logger.info(" %s" % child)
self._add_structured_ctoc_node(child, self._ctoc)
#first = False
else :
self._oeb.logger.info('Generating flat CTOC ...')
previousOffset = -1
currentOffset = 0
for (i, child) in enumerate(toc.iterdescendants()):
# Only add chapters or articles at depth==1
# no class defaults to 'chapter'
if child.klass is None : child.klass = 'chapter'
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
if self.opts.verbose > 2 :
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
(child.klass, child.depth(), child) )
# Test to see if this child's offset is the same as the previous child's
# offset, skip it
h = child.href
if h is None:
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
child.title)
continue
if h not in self._id_offsets:
self._oeb.logger.warn(' Ignoring missing TOC entry:',
unicode(child))
continue
currentOffset = self._id_offsets[h]
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
if currentOffset != previousOffset :
self._add_flat_ctoc_node(child, self._ctoc)
reduced_toc.append(child)
previousOffset = currentOffset
else :
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
else :
if self.opts.verbose > 2 :
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
(child.klass, child.depth(),i))
# Update the TOC with our edited version
self._oeb.toc.nodes = reduced_toc
# Instantiate a MobiDocument(mobitype)
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
not self.opts.mobi_periodical :
mobiType = 0x002
elif self._periodicalCount:
pt = None
if self._oeb.metadata.publication_type:
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
if len(x) > 1:
pt = x[1]
mobiType = {'newspaper':0x101}.get(pt, 0x103)
else :
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
self._MobiDoc = MobiDocument(mobiType)
if self.opts.verbose > 2 :
structType = 'book'
if mobiType > 0x100 :
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
if mobiType > 0x100 :
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
(self._periodicalCount, self._sectionCount, self._articleCount) )
else :
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
# Apparently the CTOC must end with a null byte
self._ctoc.write('\0')
ctoc = self._ctoc.getvalue()
rec_count = len(self._ctoc_records)
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
(rec_count + 1, 'records, last record' if rec_count else 'record,',
len(ctoc)/655) )
return align_block(ctoc)
# Index nodes {{{
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
pos = 0xc0 + indxt.tell()
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
@ -2176,48 +1962,8 @@ class MobiWriter(object):
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
def _compute_offset_length(self, i, node, entries) :
h = node.href
if h not in self._id_offsets:
self._oeb.log.warning('Could not find TOC entry:', node.title)
return -1, -1
# }}}
offset = self._id_offsets[h]
length = None
# Calculate length based on next entry's offset
for sibling in entries[i+1:]:
h2 = sibling.href
if h2 in self._id_offsets:
offset2 = self._id_offsets[h2]
if offset2 > offset:
length = offset2 - offset
break
if length is None:
length = self._content_length - offset
return offset, length
def _establish_document_structure(self) :
documentType = None
try :
klass = self._ctoc_map[0]['klass']
except :
klass = None
if klass == 'chapter' or klass == None :
documentType = 'book'
if self.opts.verbose > 2 :
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
self._MobiDoc.documentStructure = MobiBook()
elif klass == 'periodical' :
documentType = klass
if self.opts.verbose > 2 :
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
else :
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
return documentType
def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) :
sectionTitles = list(child.iter())[1:]
@ -2495,6 +2241,270 @@ class MobiWriter(object):
last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices)
return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name
# }}}
# CTOC {{{
def _add_to_ctoc(self, ctoc_str, record_offset):
# Write vwilen + string to ctoc
# Return offset
# Is there enough room for this string in the current ctoc record?
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
# flush this ctoc, start a new one
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
# pad with 00
pad = 0xfbf8 - self._ctoc.tell()
# print "padding %d bytes of 00" % pad
self._ctoc.write('\0' * (pad))
self._ctoc_records.append(self._ctoc.getvalue())
self._ctoc.truncate(0)
self._ctoc_offset += 0x10000
record_offset = self._ctoc_offset
offset = self._ctoc.tell() + record_offset
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
return offset
def _add_flat_ctoc_node(self, node, ctoc, title=None):
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
t = node.title if title is None else title
t = self._clean_text_value(t)
self._last_toc_entry = t
# Create an empty dictionary for this node
ctoc_name_map = {}
# article = chapter
if node.klass == 'article' :
ctoc_name_map['klass'] = 'chapter'
else :
ctoc_name_map['klass'] = node.klass
# Add title offset to name map
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
self._chapterCount += 1
# append this node's name_map to map
self._ctoc_map.append(ctoc_name_map)
return
def _add_structured_ctoc_node(self, node, ctoc, title=None):
# Process 'periodical', 'section' and 'article'
# Fetch the offset referencing the current ctoc_record
if node.klass is None :
return
t = node.title if title is None else title
t = self._clean_text_value(t)
self._last_toc_entry = t
# Create an empty dictionary for this node
ctoc_name_map = {}
# Add the klass of this node
ctoc_name_map['klass'] = node.klass
if node.klass == 'chapter':
# Add title offset to name map
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
self._chapterCount += 1
elif node.klass == 'periodical' :
# Add title offset
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
# Look for existing class entry 'periodical' in _ctoc_map
for entry in self._ctoc_map:
if entry['klass'] == 'periodical':
# Use the pre-existing instance
ctoc_name_map['classOffset'] = entry['classOffset']
break
else :
continue
else:
# class names should always be in CNCX 0 - no offset
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
self._periodicalCount += 1
elif node.klass == 'section' :
# Add title offset
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
# Look for existing class entry 'section' in _ctoc_map
for entry in self._ctoc_map:
if entry['klass'] == 'section':
# Use the pre-existing instance
ctoc_name_map['classOffset'] = entry['classOffset']
break
else :
continue
else:
# class names should always be in CNCX 0 - no offset
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
self._sectionCount += 1
elif node.klass == 'article' :
# Add title offset/title
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
# Look for existing class entry 'article' in _ctoc_map
for entry in self._ctoc_map:
if entry['klass'] == 'article':
ctoc_name_map['classOffset'] = entry['classOffset']
break
else :
continue
else:
# class names should always be in CNCX 0 - no offset
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
# Add description offset/description
if node.description :
d = self._clean_text_value(node.description)
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
else :
ctoc_name_map['descriptionOffset'] = None
# Add author offset/attribution
if node.author :
a = self._clean_text_value(node.author)
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
else :
ctoc_name_map['authorOffset'] = None
self._articleCount += 1
else :
raise NotImplementedError( \
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
(node.title, node.klass, node.play_order))
# append this node's name_map to map
self._ctoc_map.append(ctoc_name_map)
def _generate_ctoc(self):
# Generate the compiled TOC strings
# Each node has 1-4 CTOC entries:
# Periodical (0xDF)
# title, class
# Section (0xFF)
# title, class
# Article (0x3F)
# title, class, description, author
# Chapter (0x0F)
# title, class
# nb: Chapters don't actually have @class, so we synthesize it
# in reader._toc_from_navpoint
toc = self._oeb.toc
reduced_toc = []
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
self._last_toc_entry = None
#ctoc = StringIO()
self._ctoc = StringIO()
# Track the individual node types
self._periodicalCount = 0
self._sectionCount = 0
self._articleCount = 0
self._chapterCount = 0
#first = True
if self._conforming_periodical_toc :
self._oeb.logger.info('Generating structured CTOC ...')
for (child) in toc.iter():
if self.opts.verbose > 2 :
self._oeb.logger.info(" %s" % child)
self._add_structured_ctoc_node(child, self._ctoc)
#first = False
else :
self._oeb.logger.info('Generating flat CTOC ...')
previousOffset = -1
currentOffset = 0
for (i, child) in enumerate(toc.iterdescendants()):
# Only add chapters or articles at depth==1
# no class defaults to 'chapter'
if child.klass is None : child.klass = 'chapter'
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
if self.opts.verbose > 2 :
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
(child.klass, child.depth(), child) )
# Test to see if this child's offset is the same as the previous child's
# offset, skip it
h = child.href
if h is None:
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
child.title)
continue
if h not in self._id_offsets:
self._oeb.logger.warn(' Ignoring missing TOC entry:',
unicode(child))
continue
currentOffset = self._id_offsets[h]
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
if currentOffset != previousOffset :
self._add_flat_ctoc_node(child, self._ctoc)
reduced_toc.append(child)
previousOffset = currentOffset
else :
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
else :
if self.opts.verbose > 2 :
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
(child.klass, child.depth(),i))
# Update the TOC with our edited version
self._oeb.toc.nodes = reduced_toc
# Instantiate a MobiDocument(mobitype)
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
not self.opts.mobi_periodical :
mobiType = 0x002
elif self._periodicalCount:
pt = None
if self._oeb.metadata.publication_type:
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
if len(x) > 1:
pt = x[1]
mobiType = {'newspaper':0x101}.get(pt, 0x103)
else :
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
self._MobiDoc = MobiDocument(mobiType)
if self.opts.verbose > 2 :
structType = 'book'
if mobiType > 0x100 :
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
if mobiType > 0x100 :
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
(self._periodicalCount, self._sectionCount, self._articleCount) )
else :
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
# Apparently the CTOC must end with a null byte
self._ctoc.write('\0')
ctoc = self._ctoc.getvalue()
rec_count = len(self._ctoc_records)
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
(rec_count + 1, 'records, last record' if rec_count else 'record,',
len(ctoc)/655) )
return align_block(ctoc)
# }}}
class HTMLRecordData(object):
""" A data structure containing indexing/navigation data for an HTML record """

View File

@ -15,10 +15,10 @@ from calibre.ebooks import normalize
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.ebooks.mobi.writer2.serializer import Serializer
from calibre.ebooks.compression.palmdoc import compress_doc
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.utils.filenames import ascii_filename
from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
from calibre.ebooks.mobi.writer2.utils import (rescale_image, encint)
EXTH_CODES = {
'creator': 100,
@ -41,87 +41,10 @@ WRITE_UNCROSSABLE_BREAKS = False
RECORD_SIZE = 0x1000 # 4096
IMAGE_MAX_SIZE = 10 * 1024 * 1024
MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240)
# Almost like the one for MS LIT, but not quite.
DECINT_FORWARD = 0
DECINT_BACKWARD = 1
def decint(value, direction):
'''
Some parts of the Mobipocket format encode data as variable-width integers.
These integers are represented big-endian with 7 bits per byte in bits 1-7.
They may be either forward-encoded, in which case only the LSB has bit 8 set,
or backward-encoded, in which case only the MSB has bit 8 set.
For example, the number 0x11111 would be represented forward-encoded as:
0x04 0x22 0x91
And backward-encoded as:
0x84 0x22 0x11
This function encodes the integer ``value`` as a variable width integer and
returns the bytestring corresponding to it.
'''
# Encode vwi
byts = bytearray()
while True:
b = value & 0x7f
value >>= 7
byts.append(b)
if value == 0:
break
if direction == DECINT_FORWARD:
byts[0] |= 0x80
elif direction == DECINT_BACKWARD:
byts[-1] |= 0x80
return bytes(byts)
def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
'''
Convert image setting all transparent pixels to white and changing format
to JPEG. Ensure the resultant image has a byte size less than
maxsizeb.
If dimen is not None, generate a thumbnail of width=dimen, height=dimen
Returns the image as a bytestring
'''
if dimen is not None:
data = thumbnail(data, width=dimen, height=dimen,
compression_quality=90)[-1]
else:
# Replace transparent pixels with white pixels and convert to JPEG
data = save_cover_data_to(data, 'img.jpg', return_data=True)
if len(data) <= maxsizeb:
return data
orig_data = data
img = Image()
quality = 95
img.load(data)
while len(data) >= maxsizeb and quality >= 10:
quality -= 5
img.set_compression_quality(quality)
data = img.export('jpg')
if len(data) <= maxsizeb:
return data
orig_data = data
scale = 0.9
while len(data) >= maxsizeb and scale >= 0.05:
img = Image()
img.load(orig_data)
w, h = img.size
img.size = (int(scale*w), int(scale*h))
img.set_compression_quality(quality)
data = img.export('jpg')
scale -= 0.05
return data
class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
@ -243,13 +166,13 @@ class MobiWriter(object):
# the next record.
while breaks and (breaks[0] - offset) < RECORD_SIZE:
pbreak = (breaks.pop(0) - running) >> 3
encoded = decint(pbreak, DECINT_FORWARD)
encoded = encint(pbreak)
record.write(encoded)
running += pbreak << 3
nextra += len(encoded)
lsize = 1
while True:
size = decint(nextra + lsize, DECINT_BACKWARD)
size = encint(nextra + lsize, forward=False)
if len(size) == lsize:
break
lsize += 1

View File

@ -0,0 +1,177 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct
from collections import OrderedDict
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
IMAGE_MAX_SIZE = 10 * 1024 * 1024
def decode_hex_number(raw):
'''
Return a variable length number encoded using hexadecimal encoding. These
numbers have the first byte which tells the number of bytes that follow.
The bytes that follow are simply the hexadecimal representation of the
number.
:param raw: Raw binary data as a bytestring
:return: The number and the number of bytes from raw that the number
occupies
'''
length, = struct.unpack(b'>B', raw[0])
raw = raw[1:1+length]
consumed = length+1
return int(raw, 16), consumed
def encode_number_as_hex(num):
'''
Encode num as a variable length encoded hexadecimal number. Returns the
bytestring containing the encoded number. These
numbers have the first byte which tells the number of bytes that follow.
The bytes that follow are simply the hexadecimal representation of the
number.
'''
num = bytes(hex(num)[2:])
ans = bytearray(num)
ans.insert(0, len(num))
return bytes(ans)
def encint(value, forward=True):
'''
Some parts of the Mobipocket format encode data as variable-width integers.
These integers are represented big-endian with 7 bits per byte in bits 1-7.
They may be either forward-encoded, in which case only the first byte has bit 8 set,
or backward-encoded, in which case only the last byte has bit 8 set.
For example, the number 0x11111 = 0b10001000100010001 would be represented
forward-encoded as:
0x04 0x22 0x91 = 0b100 0b100010 0b10010001
And backward-encoded as:
0x84 0x22 0x11 = 0b10000100 0b100010 0b10001
This function encodes the integer ``value`` as a variable width integer and
returns the bytestring corresponding to it.
If forward is True the bytes returned are suitable for prepending to the
output buffer, otherwise they must be append to the output buffer.
'''
# Encode vwi
byts = bytearray()
while True:
b = value & 0b01111111
value >>= 7 # shift value to the right by 7 bits
byts.append(b)
if value == 0:
break
byts[0 if forward else -1] |= 0b10000000
byts.reverse()
return bytes(byts)
def decint(raw, forward=True):
'''
Read a variable width integer from the bytestring raw and return the
integer and the number of bytes read. If forward is True bytes are read
from the start of raw, otherwise from the end of raw.
This function is the inverse of encint above, see its docs for more
details.
'''
val = 0
byts = bytearray()
for byte in raw if forward else reversed(raw):
bnum = ord(byte)
byts.append(bnum & 0b01111111)
if bnum & 0b10000000:
break
if not forward:
byts.reverse()
for byte in byts:
val <<= 7 # Shift value to the left by 7 bits
val |= byte
return val, len(byts)
def test_decint(num):
for d in (True, False):
raw = encint(num, forward=d)
sz = len(raw)
if (num, sz) != decint(raw, forward=d):
raise ValueError('Failed for num %d, forward=%r: %r != %r' % (
num, d, (num, sz), decint(raw, forward=d)))
def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
'''
Convert image setting all transparent pixels to white and changing format
to JPEG. Ensure the resultant image has a byte size less than
maxsizeb.
If dimen is not None, generate a thumbnail of width=dimen, height=dimen
Returns the image as a bytestring
'''
if dimen is not None:
data = thumbnail(data, width=dimen, height=dimen,
compression_quality=90)[-1]
else:
# Replace transparent pixels with white pixels and convert to JPEG
data = save_cover_data_to(data, 'img.jpg', return_data=True)
if len(data) <= maxsizeb:
return data
orig_data = data
img = Image()
quality = 95
img.load(data)
while len(data) >= maxsizeb and quality >= 10:
quality -= 5
img.set_compression_quality(quality)
data = img.export('jpg')
if len(data) <= maxsizeb:
return data
orig_data = data
scale = 0.9
while len(data) >= maxsizeb and scale >= 0.05:
img = Image()
img.load(orig_data)
w, h = img.size
img.size = (int(scale*w), int(scale*h))
img.set_compression_quality(quality)
data = img.export('jpg')
scale -= 0.05
return data
def get_trailing_data(record, extra_data_flags):
'''
Given a text record as a bytestring and the extra data flags from the MOBI
header, return the trailing data as a dictionary, mapping bit number to
data as bytestring. Also returns the record - all trailing data.
:return: Trailing data, record - trailing data
'''
data = OrderedDict()
for i in xrange(16, -1, -1):
flag = 2**i
if flag & extra_data_flags:
if i == 0:
# Only the first two bits are used for the size since there can
# never be more than 3 trailing multibyte chars
sz = (ord(record[-1]) & 0b11) + 1
consumed = 1
else:
sz, consumed = decint(record, forward=False)
if sz > consumed:
data[i] = record[-sz:-consumed]
record = record[:-sz]
return data, record

View File

@ -163,6 +163,8 @@ class OEBReader(object):
if item.media_type in check:
try:
item.data
except KeyboardInterrupt:
raise
except:
self.logger.exception('Failed to parse content in %s'%
item.href)
@ -186,8 +188,13 @@ class OEBReader(object):
href, _ = urldefrag(href)
if not href:
continue
try:
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
except:
self.oeb.log.exception(
'Skipping invalid href: %r'%href)
continue
if not scheme and href not in known:
new.add(href)
elif item.media_type in OEB_STYLES:

View File

@ -47,15 +47,19 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
m.add('series', mi.series)
elif override_input_metadata:
m.clear('series')
if not mi.is_null('isbn'):
identifiers = mi.get_identifiers()
set_isbn = False
for typ, val in identifiers.iteritems():
has = False
if typ.lower() == 'isbn':
set_isbn = True
for x in m.identifier:
if x.scheme.lower() == 'isbn':
x.content = mi.isbn
if x.scheme.lower() == typ.lower():
x.content = val
has = True
if not has:
m.add('identifier', mi.isbn, scheme='ISBN')
elif override_input_metadata:
m.add('identifier', val, scheme=typ.upper())
if override_input_metadata and not set_isbn:
m.filter('identifier', lambda x: x.scheme.lower() == 'isbn')
if not mi.is_null('language'):
m.clear('language')

View File

@ -47,7 +47,10 @@ class ManifestTrimmer(object):
item.data is not None:
hrefs = [r[2] for r in iterlinks(item.data)]
for href in hrefs:
try:
href = item.abshref(urlnormalize(href))
except:
continue
if href in oeb.manifest.hrefs:
found = oeb.manifest.hrefs[href]
if found not in used:

View File

@ -7,8 +7,8 @@ __docformat__ = 'restructuredtext en'
import re, os
from PyQt4.QtCore import SIGNAL, Qt, pyqtSignal
from PyQt4.QtGui import QDialog, QWidget, QDialogButtonBox, \
QBrush, QTextCursor, QTextEdit
from PyQt4.QtGui import (QDialog, QWidget, QDialogButtonBox,
QBrush, QTextCursor, QTextEdit)
from calibre.gui2.convert.regex_builder_ui import Ui_RegexBuilder
from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
@ -16,6 +16,7 @@ from calibre.gui2 import error_dialog, choose_files
from calibre.ebooks.oeb.iterator import EbookIterator
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.constants import iswindows
class RegexBuilder(QDialog, Ui_RegexBuilder):
@ -134,8 +135,18 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
_('Cannot build regex using the GUI builder without a book.'),
show=True)
return False
try:
fpath = db.format(book_id, format, index_is_id=True,
as_path=True)
except OSError:
if iswindows:
import traceback
error_dialog(self, _('Could not open file'),
_('Could not open the file, do you have it open in'
' another program?'), show=True,
det_msg=traceback.format_exc())
return False
raise
try:
self.open_book(fpath)
finally:

View File

@ -4,3 +4,4 @@ or asked not to be included in the store integration.
* Borders (http://www.borders.com/).
* Indigo (http://www.chapters.indigo.ca/).
* Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
* EPubBuy DE: reason: too much traffic for too little sales

View File

@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en'
import re
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class BookotekaStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://bookoteka.pl/ebooki'
detail_url = None
if detail_item:
detail_url = detail_item
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://bookoteka.pl/list?search=' + urllib.quote_plus(query) + '&cat=1&hp=1&type=1'
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//li[@class="EBOOK"]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//a[@class="item_link"]/@href'))
if not id:
continue
cover_url = ''.join(data.xpath('.//a[@class="item_link"]/@style'))
cover_url = re.sub(r'.*\(', '', cover_url)
cover_url = re.sub(r'\).*', '', cover_url)
title = ''.join(data.xpath('.//div[@class="shelf_title"]/a/text()'))
author = ''.join(data.xpath('.//div[@class="shelf_authors"]/text()'))
price = ''.join(data.xpath('.//span[@class="EBOOK"]/text()'))
price = price.replace('.', ',')
formats = ', '.join(data.xpath('.//a[@class="fancybox protected"]/text()'))
counter -= 1
s = SearchResult()
s.cover_url = 'http://bookoteka.pl' + cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.detail_item = 'http://bookoteka.pl' + id.strip()
s.drm = SearchResult.DRM_UNLOCKED
s.formats = formats.strip()
yield s

View File

@ -153,12 +153,22 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS,
bookt.append(TR(thumbnail, data))
# }}}
body.append(HR())
body.append(DIV(
A(_('Switch to the full interface (non-mobile interface)'),
href="/browse",
style="text-decoration: none; color: blue",
title=_('The full interface gives you many more features, '
'but it may not work well on a small screen')),
style="text-align:center"))
return HTML(
HEAD(
TITLE(__appname__ + ' Library'),
LINK(rel='icon', href='http://calibre-ebook.com/favicon.ico',
type='image/x-icon'),
LINK(rel='stylesheet', type='text/css', href=prefix+'/mobile/style.css')
LINK(rel='stylesheet', type='text/css',
href=prefix+'/mobile/style.css'),
LINK(rel='apple-touch-icon', href="/static/calibre.png")
), # End head
body
) # End html

View File

@ -211,9 +211,9 @@ calibre-dev.bat::
Debugging tips
----------------
Running |app| code in a python debugger is not easy unless you install from source on Linux. However, Python is a
Python is a
dynamically typed language with excellent facilities for introspection. Kovid wrote the core |app| code without once
using a debugger. There are two main strategies to debug |app| code:
using a debugger. There are many strategies to debug |app| code:
Using an interactive python interpreter
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -240,6 +240,12 @@ Similarly, you can start the ebook-viewer as::
calibre-debug -w /path/to/file/to/be/viewed
Using the debugger in PyDev
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
It is possible to get the debugger in PyDev working with the |app| development environment,
see the `forum thread <http://www.mobileread.com/forums/showthread.php?t=143208>`_.
Executing arbitrary scripts in the |app| python environment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -405,9 +405,9 @@ To those of you that claim that you need access to the filesystem to so that you
If you are worried that someday |app| will cease to be developed, leaving all your books marooned in its folder structure, explore the powerful "Save to Disk" feature in |app| that lets you export all your files into a folder structure of arbitrary complexity based on their metadata.
Since I keep getting asked why there are numbers at the end of the title folder name, the reason is for *robustness*. That number is the id number of the book record in the |app| database. The presence of the number allows you to have multiple records with the same title and author names. More importantly, it is part of what allows |app| to magically regenerate the database with all metadata if the database file gets corrupted. Given that |app|'s mission is to get you to stop storing metadata in filenames and stop using the filesystem to find things, the increased robustness afforded by the id numbers is well worth the uglier folder names.
Finally, the reason there are numbers at the end of every title folder, is for *robustness*. That number is the id number of the book record in the |app| database. The presence of the number allows you to have multiple records with the same title and author names. It is also part of what allows |app| to magically regenerate the database with all metadata if the database file gets corrupted. Given that |app|'s mission is to get you to stop storing metadata in filenames and stop using the filesystem to find things, the increased robustness afforded by the id numbers is well worth the uglier folder names.
Finally, if you are irrevocably wedded to using the filesystem to store your metadata, feel free to patch your local copy of |app| to use whatever storage scheme you like. But, do not bother me with requests to change the directory structure, **they will be ignored**.
If you are still not convinced, then I'm afraid |app| is not for you. Look elsewhere for your book cataloguing needs. Just so we're clear, **this is not going to change**. Kindly do not contact us in an attempt to get us to change this.
Why doesn't |app| have a column for foo?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~