mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
6e5016cdaa
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.web.feeds import Feed
|
from calibre.web.feeds import Feed
|
||||||
|
|
||||||
@ -46,4 +45,3 @@ class GC_gl(BasicNewsRecipe):
|
|||||||
}
|
}
|
||||||
newArticles.append(newArt)
|
newArticles.append(newArt)
|
||||||
masterFeed.append((feed.title,newArticles))
|
masterFeed.append((feed.title,newArticles))
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
||||||
@ -9,14 +8,24 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
|||||||
oldest_article = 365
|
oldest_article = 365
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':'text_controls_toggle'})
|
||||||
|
,dict(name='script')
|
||||||
|
,dict(name='div', attrs={'id':'text_controls'})
|
||||||
|
,dict(name='div', attrs={'id':'editing_controls'})
|
||||||
|
,dict(name='div', attrs={'class':'bar bottom'})
|
||||||
|
]
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
INDEX = u'http://www.instapaper.com'
|
INDEX = u'http://www.instapaper.com'
|
||||||
LOGIN = INDEX + u'/user/login'
|
LOGIN = INDEX + u'/user/login'
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
feeds = [(u'Instapaper Unread', u'http://www.instapaper.com/u'), (u'Instapaper Starred', u'http://www.instapaper.com/starred')]
|
(u'Instapaper Unread', u'http://www.instapaper.com/u'),
|
||||||
|
(u'Instapaper Starred', u'http://www.instapaper.com/starred')
|
||||||
|
]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
@ -37,18 +46,20 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
|||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
articles = []
|
articles = []
|
||||||
soup = self.index_to_soup(feedurl)
|
soup = self.index_to_soup(feedurl)
|
||||||
for item in soup.findAll('div', attrs={'class':'titleRow'}):
|
for item in soup.findAll('div', attrs={'class':'cornerControls'}):
|
||||||
description = self.tag_to_string(item.div)
|
#description = self.tag_to_string(item.div)
|
||||||
atag = item.a
|
atag = item.a
|
||||||
if atag and atag.has_key('href'):
|
if atag and atag.has_key('href'):
|
||||||
url = atag['href']
|
url = atag['href']
|
||||||
title = self.tag_to_string(atag)
|
|
||||||
date = strftime(self.timefmt)
|
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'url' :url
|
||||||
,'date' :date
|
|
||||||
,'url' :url
|
|
||||||
,'description':description
|
|
||||||
})
|
})
|
||||||
totalfeeds.append((feedtitle, articles))
|
totalfeeds.append((feedtitle, articles))
|
||||||
return totalfeeds
|
return totalfeeds
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'http://www.instapaper.com' + url
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
article.title = soup.find('title').contents[0].strip()
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import urllib2
|
import urllib2, re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class JBPress(BasicNewsRecipe):
|
class JBPress(BasicNewsRecipe):
|
||||||
@ -40,3 +40,12 @@ class JBPress(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
url = urllib2.urlopen(url).geturl() # resolve redirect.
|
url = urllib2.urlopen(url).geturl() # resolve redirect.
|
||||||
return url.replace('/-/', '/print/')
|
return url.replace('/-/', '/print/')
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
# remove breadcrumb
|
||||||
|
h3s = soup.findAll('h3')
|
||||||
|
for h3 in h3s:
|
||||||
|
if re.compile('^JBpress>').match(h3.string):
|
||||||
|
h3.extract()
|
||||||
|
return soup
|
||||||
|
|
||||||
|
44
recipes/luns_a_venres.recipe
Normal file
44
recipes/luns_a_venres.recipe
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class LV_gl(BasicNewsRecipe):
|
||||||
|
title = u'De Luns a Venres (RSS)'
|
||||||
|
__author__ = u'Susana Sotelo Docío'
|
||||||
|
description = u'O gratuíto galego'
|
||||||
|
publisher = u'Galiciaé'
|
||||||
|
category = u'news'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = 'gl'
|
||||||
|
direction = 'ltr'
|
||||||
|
cover_url = 'http://lv.galiciae.com/new_estilos/lv/logo.gif'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
center_navbar = False
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Galicia', u'http://lv.galiciae.com/cache/rss/sec_galicia_gl.rss'),
|
||||||
|
(u'Cultura', u'http://lv.galiciae.com/cache/rss/sec_cultura_gl.rss'),
|
||||||
|
(u'Mundo', u'http://lv.galiciae.com/cache/rss/sec_mundo_gl.rss'),
|
||||||
|
(u'Cidadanía', u'http://lv.galiciae.com/cache/rss/sec_ciudadania_gl.rss'),
|
||||||
|
(u'Tecnoloxía', u'http://lv.galiciae.com/cache/rss/sec_tecnologia_gl.rss'),
|
||||||
|
(u'España', u'http://lv.galiciae.com/cache/rss/sec_espana_gl.rss'),
|
||||||
|
(u'Deportes', u'http://lv.galiciae.com/cache/rss/sec_deportes_gl.rss'),
|
||||||
|
(u'Economía', u'http://lv.galiciae.com/cache/rss/sec_economia_gl.rss'),
|
||||||
|
(u'Lercheo', u'http://lv.galiciae.com/cache/rss/sec_gente_gl.rss'),
|
||||||
|
(u'Medio ambiente', u'http://lv.galiciae.com/cache/rss/sec_medioambiente_gl.rss'),
|
||||||
|
(u'España/Mundo', u'http://lv.galiciae.com/cache/rss/sec_espanamundo_gl.rss'),
|
||||||
|
(u'Sociedade', u'http://lv.galiciae.com/cache/rss/sec_sociedad_gl.rss'),
|
||||||
|
(u'Ciencia', u'http://lv.galiciae.com/cache/rss/sec_ciencia_gl.rss'),
|
||||||
|
(u'Motor', u'http://lv.galiciae.com/cache/rss/sec_motor_gl.rss'),
|
||||||
|
(u'Coches', u'http://lv.galiciae.com/cache/rss/sec_coches_gl.rss'),
|
||||||
|
(u'Motos', u'http://lv.galiciae.com/cache/rss/sec_motos_gl.rss'),
|
||||||
|
(u'Industriais', u'http://lv.galiciae.com/cache/rss/sec_industriales_gl.rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = u' p{text-align:left} '
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\nencoding="' + encoding + '"\ntags="' + category + '"\noverride_css=" p {text-align:left; text-indent: 0cm} "'
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
url += '?imprimir&lang=gl'
|
||||||
|
return url
|
||||||
|
|
@ -1,11 +1,10 @@
|
|||||||
EMAILADDRESS = 'hoge@foobar.co.jp'
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class NBOnline(BasicNewsRecipe):
|
class NBOnline(BasicNewsRecipe):
|
||||||
title = u'Nikkei Business Online'
|
title = u'Nikkei Business Online'
|
||||||
language = 'ja'
|
language = 'ja'
|
||||||
description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
|
description = u'Nikkei Business Online.\u6CE8\uFF1A\u30E6\u30FC\u30B6\u30FC\u540D\u306Bemail\u30A2\u30C9\u30EC\u30B9\u3068\u30E6\u30FC\u30B6\u30FC\u540D\u3092\u30BB\u30DF\u30B3\u30ED\u30F3\u3067\u533A\u5207\u3063\u3066\u5165\u308C\u3066\u304F\u3060\u3055\u3044\u3002\u4F8B\uFF1Aemail@address.jp;username . PLEASE NOTE: You need to put your email address and username into username filed separeted by ; (semi-colon).'
|
||||||
__author__ = 'Ado Nishimura'
|
__author__ = 'Ado Nishimura'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
@ -23,8 +22,8 @@ class NBOnline(BasicNewsRecipe):
|
|||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
|
br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
|
||||||
br.select_form(name='loginActionForm')
|
br.select_form(name='loginActionForm')
|
||||||
br['email'] = EMAILADDRESS
|
br['email'] = self.username.split(';')[0]
|
||||||
br['userId'] = self.username
|
br['userId'] = self.username.split(';')[1]
|
||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
<link rel="stylesheet" type="text/css" href="{prefix}/static/browse/browse.css" />
|
<link rel="stylesheet" type="text/css" href="{prefix}/static/browse/browse.css" />
|
||||||
<link type="text/css" href="{prefix}/static/jquery_ui/css/humanity-custom/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
<link type="text/css" href="{prefix}/static/jquery_ui/css/humanity-custom/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
||||||
<link rel="stylesheet" type="text/css" href="{prefix}/static/jquery.multiselect.css" />
|
<link rel="stylesheet" type="text/css" href="{prefix}/static/jquery.multiselect.css" />
|
||||||
|
<link rel="apple-touch-icon" href="/static/calibre.png" />
|
||||||
|
|
||||||
<script type="text/javascript" src="{prefix}/static/jquery.js"></script>
|
<script type="text/javascript" src="{prefix}/static/jquery.js"></script>
|
||||||
<script type="text/javascript" src="{prefix}/static/jquery.corner.js"></script>
|
<script type="text/javascript" src="{prefix}/static/jquery.corner.js"></script>
|
||||||
|
@ -131,7 +131,7 @@ class AZBOOKA(ALEX):
|
|||||||
description = _('Communicate with the Azbooka')
|
description = _('Communicate with the Azbooka')
|
||||||
|
|
||||||
VENDOR_NAME = 'LINUX'
|
VENDOR_NAME = 'LINUX'
|
||||||
WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
|
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
|
||||||
|
|
||||||
MAIN_MEMORY_VOLUME_LABEL = 'Azbooka Internal Memory'
|
MAIN_MEMORY_VOLUME_LABEL = 'Azbooka Internal Memory'
|
||||||
|
|
||||||
|
@ -7,10 +7,13 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct, datetime, sys, os
|
import struct, datetime, sys, os, shutil
|
||||||
|
from collections import OrderedDict
|
||||||
from calibre.utils.date import utc_tz
|
from calibre.utils.date import utc_tz
|
||||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||||
from calibre.ebooks.mobi.writer2.utils import decode_hex_number
|
from calibre.ebooks.mobi.writer2.utils import (decode_hex_number, decint,
|
||||||
|
get_trailing_data)
|
||||||
|
from calibre.utils.magick.draw import identify_data
|
||||||
|
|
||||||
# PalmDB {{{
|
# PalmDB {{{
|
||||||
class PalmDOCAttributes(object):
|
class PalmDOCAttributes(object):
|
||||||
@ -277,6 +280,7 @@ class MOBIHeader(object): # {{{
|
|||||||
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
|
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
|
||||||
self.has_fcis_flis = False
|
self.has_fcis_flis = False
|
||||||
self.has_multibytes = self.has_indexing_bytes = self.has_uncrossable_breaks = False
|
self.has_multibytes = self.has_indexing_bytes = self.has_uncrossable_breaks = False
|
||||||
|
self.extra_data_flags = 0
|
||||||
if self.has_extra_data_flags:
|
if self.has_extra_data_flags:
|
||||||
self.unknown4 = self.raw[180:192]
|
self.unknown4 = self.raw[180:192]
|
||||||
self.first_content_record, self.last_content_record = \
|
self.first_content_record, self.last_content_record = \
|
||||||
@ -376,18 +380,17 @@ class TagX(object): # {{{
|
|||||||
def __init__(self, raw, control_byte_count):
|
def __init__(self, raw, control_byte_count):
|
||||||
self.tag = ord(raw[0])
|
self.tag = ord(raw[0])
|
||||||
self.num_values = ord(raw[1])
|
self.num_values = ord(raw[1])
|
||||||
self.bmask = ord(raw[2])
|
self.bitmask = ord(raw[2])
|
||||||
self.bitmask = bin(self.bmask)
|
|
||||||
# End of file = 1 iff last entry
|
# End of file = 1 iff last entry
|
||||||
# When it is 1 all others are 0
|
# When it is 1 all others are 0
|
||||||
self.eof = ord(raw[3])
|
self.eof = ord(raw[3])
|
||||||
|
|
||||||
self.is_eof = (self.eof == 1 and self.tag == 0 and self.num_values == 0
|
self.is_eof = (self.eof == 1 and self.tag == 0 and self.num_values == 0
|
||||||
and self.bmask == 0)
|
and self.bitmask == 0)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'TAGX(tag=%02d, num_values=%d, bitmask=%r (%d), eof=%d)' % (self.tag,
|
return 'TAGX(tag=%02d, num_values=%d, bitmask=%r, eof=%d)' % (self.tag,
|
||||||
self.num_values, self.bitmask, self.bmask, self.eof)
|
self.num_values, bin(self.bitmask), self.eof)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class IndexHeader(object): # {{{
|
class IndexHeader(object): # {{{
|
||||||
@ -421,7 +424,7 @@ class IndexHeader(object): # {{{
|
|||||||
self.ordt_start, = struct.unpack('>I', raw[40:44])
|
self.ordt_start, = struct.unpack('>I', raw[40:44])
|
||||||
self.ligt_start, = struct.unpack('>I', raw[44:48])
|
self.ligt_start, = struct.unpack('>I', raw[44:48])
|
||||||
self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52])
|
self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52])
|
||||||
self.num_of_ctoc_blocks, = struct.unpack('>I', raw[52:56])
|
self.num_of_cncx_blocks, = struct.unpack('>I', raw[52:56])
|
||||||
self.unknown2 = raw[56:180]
|
self.unknown2 = raw[56:180]
|
||||||
self.tagx_offset, = struct.unpack(b'>I', raw[180:184])
|
self.tagx_offset, = struct.unpack(b'>I', raw[180:184])
|
||||||
if self.tagx_offset != self.header_length:
|
if self.tagx_offset != self.header_length:
|
||||||
@ -443,6 +446,7 @@ class IndexHeader(object): # {{{
|
|||||||
self.tagx_control_byte_count))
|
self.tagx_control_byte_count))
|
||||||
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
||||||
raise ValueError('TAGX last entry is not EOF')
|
raise ValueError('TAGX last entry is not EOF')
|
||||||
|
self.tagx_entries = self.tagx_entries[:-1]
|
||||||
|
|
||||||
idxt0_pos = self.header_length+self.tagx_header_length
|
idxt0_pos = self.header_length+self.tagx_header_length
|
||||||
last_num, consumed = decode_hex_number(raw[idxt0_pos:])
|
last_num, consumed = decode_hex_number(raw[idxt0_pos:])
|
||||||
@ -481,7 +485,7 @@ class IndexHeader(object): # {{{
|
|||||||
a('ORDT start: %d'%self.ordt_start)
|
a('ORDT start: %d'%self.ordt_start)
|
||||||
a('LIGT start: %d'%self.ligt_start)
|
a('LIGT start: %d'%self.ligt_start)
|
||||||
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
|
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
|
||||||
a('Number of CTOC blocks: %d'%self.num_of_ctoc_blocks)
|
a('Number of cncx blocks: %d'%self.num_of_cncx_blocks)
|
||||||
u(self.unknown2)
|
u(self.unknown2)
|
||||||
a('TAGX offset: %d'%self.tagx_offset)
|
a('TAGX offset: %d'%self.tagx_offset)
|
||||||
u(self.unknown3)
|
u(self.unknown3)
|
||||||
@ -496,16 +500,141 @@ class IndexHeader(object): # {{{
|
|||||||
return '\n'.join(ans)
|
return '\n'.join(ans)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
class Tag(object): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
Index entries are a collection of tags. Each tag is represented by this
|
||||||
|
class.
|
||||||
|
'''
|
||||||
|
|
||||||
|
TAG_MAP = {
|
||||||
|
1: ('offset', 'Offset in HTML'),
|
||||||
|
2: ('size', 'Size in HTML'),
|
||||||
|
3: ('label_offset', 'Offset to label in CNCX'),
|
||||||
|
4: ('depth', 'Depth of this entry in TOC'),
|
||||||
|
|
||||||
|
# The remaining tag types have to be interpreted subject to the type
|
||||||
|
# of index entry they are present in
|
||||||
|
}
|
||||||
|
|
||||||
|
INTERPRET_MAP = {
|
||||||
|
'subchapter': {
|
||||||
|
5 : ('Parent chapter index', 'parent_index')
|
||||||
|
},
|
||||||
|
|
||||||
|
'article' : {
|
||||||
|
5 : ('Class offset in cncx', 'class_offset'),
|
||||||
|
21 : ('Parent section index', 'parent_index'),
|
||||||
|
22 : ('Description offset in cncx', 'desc_offset'),
|
||||||
|
23 : ('Author offset in cncx', 'author_offset'),
|
||||||
|
},
|
||||||
|
|
||||||
|
'chapter_with_subchapters' : {
|
||||||
|
22 : ('First subchapter index', 'first_subchapter_index'),
|
||||||
|
23 : ('Last subchapter index', 'last_subchapter_index'),
|
||||||
|
},
|
||||||
|
|
||||||
|
'periodical' : {
|
||||||
|
5 : ('Class offset in cncx', 'class_offset'),
|
||||||
|
22 : ('First section index', 'first_section_index'),
|
||||||
|
23 : ('Last section index', 'last_section_index'),
|
||||||
|
},
|
||||||
|
|
||||||
|
'section' : {
|
||||||
|
5 : ('Class offset in cncx', 'class_offset'),
|
||||||
|
21 : ('Periodical index', 'periodical_index'),
|
||||||
|
22 : ('First article index', 'first_article_index'),
|
||||||
|
23 : ('Last article index', 'last_article_index'),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, tagx, vals, entry_type, cncx):
|
||||||
|
self.value = vals if len(vals) > 1 else vals[0]
|
||||||
|
self.entry_type = entry_type
|
||||||
|
self.cncx_value = None
|
||||||
|
if tagx.tag in self.TAG_MAP:
|
||||||
|
self.attr, self.desc = self.TAG_MAP[tagx.tag]
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
td = self.INTERPRET_MAP[entry_type]
|
||||||
|
except:
|
||||||
|
raise ValueError('Unknown entry type: %s'%entry_type)
|
||||||
|
try:
|
||||||
|
self.desc, self.attr = td[tagx.tag]
|
||||||
|
except:
|
||||||
|
raise ValueError('Unknown tag: %d for entry type: %s'%(
|
||||||
|
tagx.tag, entry_type))
|
||||||
|
if '_offset' in self.attr:
|
||||||
|
self.cncx_value = cncx[self.value]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.cncx_value is not None:
|
||||||
|
return '%s : %r [%r]'%(self.desc, self.value, self.cncx_value)
|
||||||
|
return '%s : %r'%(self.desc, self.value)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
class IndexEntry(object): # {{{
|
class IndexEntry(object): # {{{
|
||||||
|
|
||||||
def __init__(self, ident, entry_type, raw):
|
'''
|
||||||
self.id = ident
|
The index is made up of entries, each of which is represented by an
|
||||||
self.entry_type = entry_type
|
instance of this class. Index entries typically point to offsets int eh
|
||||||
|
HTML, specify HTML sizes and point to text strings in the CNCX that are
|
||||||
|
used in the navigation UI.
|
||||||
|
'''
|
||||||
|
|
||||||
|
TYPES = {
|
||||||
|
# Present in book type files
|
||||||
|
0x0f : 'chapter',
|
||||||
|
0x6f : 'chapter_with_subchapters',
|
||||||
|
0x1f : 'subchapter',
|
||||||
|
# Present in periodicals
|
||||||
|
0xdf : 'periodical',
|
||||||
|
0xff : 'section',
|
||||||
|
0x3f : 'article',
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, ident, entry_type, raw, cncx, tagx_entries):
|
||||||
|
self.index = ident
|
||||||
|
self.raw = raw
|
||||||
|
self.tags = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.entry_type = self.TYPES[entry_type]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError('Unknown Index Entry type: %s'%hex(entry_type))
|
||||||
|
|
||||||
|
expected_tags = [tag for tag in tagx_entries if tag.bitmask &
|
||||||
|
entry_type]
|
||||||
|
|
||||||
|
for tag in expected_tags:
|
||||||
|
vals = []
|
||||||
|
for i in range(tag.num_values):
|
||||||
|
if not raw:
|
||||||
|
raise ValueError('Index entry does not match TAGX header')
|
||||||
|
val, consumed = decint(raw)
|
||||||
|
raw = raw[consumed:]
|
||||||
|
vals.append(val)
|
||||||
|
self.tags.append(Tag(tag, vals, self.entry_type, cncx))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%(
|
||||||
|
self.index, self.entry_type, len(self.tags))]
|
||||||
|
for tag in self.tags:
|
||||||
|
ans.append('\t'+str(tag))
|
||||||
|
return '\n'.join(ans)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class IndexRecord(object): # {{{
|
class IndexRecord(object): # {{{
|
||||||
|
|
||||||
def __init__(self, record):
|
'''
|
||||||
|
Represents all indexing information in the MOBI, apart from indexing info
|
||||||
|
in the trailing data of the text records.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, record, index_header, cncx):
|
||||||
self.record = record
|
self.record = record
|
||||||
raw = self.record.raw
|
raw = self.record.raw
|
||||||
if raw[:4] != b'INDX':
|
if raw[:4] != b'INDX':
|
||||||
@ -534,16 +663,19 @@ class IndexRecord(object): # {{{
|
|||||||
|
|
||||||
indxt = raw[192:self.idxt_offset]
|
indxt = raw[192:self.idxt_offset]
|
||||||
self.indices = []
|
self.indices = []
|
||||||
for off in self.index_offsets:
|
for i, off in enumerate(self.index_offsets):
|
||||||
index = indxt[off:]
|
try:
|
||||||
ident, consumed = decode_hex_number(index)
|
next_off = self.index_offsets[i+1]
|
||||||
index = index[consumed:]
|
except:
|
||||||
entry_type = u(b'>B', index[0])
|
next_off = len(indxt)
|
||||||
self.indices.append(IndexEntry(ident, entry_type, index[1:]))
|
index, consumed = decode_hex_number(indxt[off:])
|
||||||
|
entry_type = ord(indxt[off+consumed])
|
||||||
|
self.indices.append(IndexEntry(index, entry_type,
|
||||||
|
indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries))
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
ans = ['*'*20 + ' Index Record (%d bytes)'%len(self.record.raw)+ '*'*20]
|
ans = ['*'*20 + ' Index Record (%d bytes) '%len(self.record.raw)+ '*'*20]
|
||||||
a = ans.append
|
a = ans.append
|
||||||
def u(w):
|
def u(w):
|
||||||
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
|
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
|
||||||
@ -557,11 +689,103 @@ class IndexRecord(object): # {{{
|
|||||||
u(self.unknown3)
|
u(self.unknown3)
|
||||||
u(self.unknown4)
|
u(self.unknown4)
|
||||||
a('Index offsets: %r'%self.index_offsets)
|
a('Index offsets: %r'%self.index_offsets)
|
||||||
|
a('\nIndex Entries:')
|
||||||
|
for entry in self.indices:
|
||||||
|
a(str(entry)+'\n')
|
||||||
|
|
||||||
return '\n'.join(ans)
|
return '\n'.join(ans)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
class CNCX(object) : # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
Parses the records that contain the compiled NCX (all strings from the
|
||||||
|
NCX). Presents a simple offset : string mapping interface to access the
|
||||||
|
data.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, records, codec):
|
||||||
|
self.records = OrderedDict()
|
||||||
|
pos = 0
|
||||||
|
for record in records:
|
||||||
|
raw = record.raw
|
||||||
|
while pos < len(raw):
|
||||||
|
length, consumed = decint(raw[pos:])
|
||||||
|
if length > 0:
|
||||||
|
self.records[pos] = raw[pos+consumed:pos+consumed+length].decode(
|
||||||
|
codec)
|
||||||
|
pos += consumed+length
|
||||||
|
|
||||||
|
def __getitem__(self, offset):
|
||||||
|
return self.records.get(offset)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' cncx (%d strings) '%len(self.records)+ '*'*20]
|
||||||
|
for k, v in self.records.iteritems():
|
||||||
|
ans.append('%10d : %s'%(k, v))
|
||||||
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class TextRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record, extra_data_flags, decompress):
|
||||||
|
self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
|
||||||
|
self.raw = decompress(self.raw)
|
||||||
|
if 0 in self.trailing_data:
|
||||||
|
self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
|
||||||
|
if 1 in self.trailing_data:
|
||||||
|
self.trailing_data['indexing'] = self.trailing_data.pop(1)
|
||||||
|
if 2 in self.trailing_data:
|
||||||
|
self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
|
||||||
|
|
||||||
|
self.idx = idx
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
name = '%06d'%self.idx
|
||||||
|
with open(os.path.join(folder, name+'.txt'), 'wb') as f:
|
||||||
|
f.write(self.raw)
|
||||||
|
with open(os.path.join(folder, name+'.trailing_data'), 'wb') as f:
|
||||||
|
for k, v in self.trailing_data.iteritems():
|
||||||
|
raw = '%s : %r\n\n'%(k, v)
|
||||||
|
f.write(raw.encode('utf-8'))
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class ImageRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record, fmt):
|
||||||
|
self.raw = record.raw
|
||||||
|
self.fmt = fmt
|
||||||
|
self.idx = idx
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
name = '%06d'%self.idx
|
||||||
|
with open(os.path.join(folder, name+'.'+self.fmt), 'wb') as f:
|
||||||
|
f.write(self.raw)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class BinaryRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record):
|
||||||
|
self.raw = record.raw
|
||||||
|
sig = self.raw[:4]
|
||||||
|
name = '%06d'%idx
|
||||||
|
if sig in (b'FCIS', b'FLIS', b'SRCS'):
|
||||||
|
name += '-' + sig.decode('ascii')
|
||||||
|
elif sig == b'\xe9\x8e\r\n':
|
||||||
|
name += '-' + 'EOF'
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
with open(os.path.join(folder, self.name+'.bin'), 'wb') as f:
|
||||||
|
f.write(self.raw)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
class MOBIFile(object): # {{{
|
class MOBIFile(object): # {{{
|
||||||
|
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
@ -590,11 +814,58 @@ class MOBIFile(object): # {{{
|
|||||||
|
|
||||||
self.mobi_header = MOBIHeader(self.records[0])
|
self.mobi_header = MOBIHeader(self.records[0])
|
||||||
|
|
||||||
|
if 'huff' in self.mobi_header.compression.lower():
|
||||||
|
huffrecs = [r.raw for r in
|
||||||
|
xrange(self.mobi_header.huffman_record_offset,
|
||||||
|
self.mobi_header.huffman_record_offset +
|
||||||
|
self.mobi_header.huffman_record_count)]
|
||||||
|
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||||
|
huffs = HuffReader(huffrecs)
|
||||||
|
decompress = huffs.decompress
|
||||||
|
elif 'palmdoc' in self.mobi_header.compression.lower():
|
||||||
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
|
decompress = decompress_doc
|
||||||
|
else:
|
||||||
|
decompress = lambda x: x
|
||||||
|
|
||||||
self.index_header = None
|
self.index_header = None
|
||||||
|
self.indexing_record_nums = set()
|
||||||
pir = self.mobi_header.primary_index_record
|
pir = self.mobi_header.primary_index_record
|
||||||
if pir != 0xffffffff:
|
if pir != 0xffffffff:
|
||||||
self.index_header = IndexHeader(self.records[pir])
|
self.index_header = IndexHeader(self.records[pir])
|
||||||
self.index_record = IndexRecord(self.records[pir+1])
|
self.cncx = CNCX(self.records[
|
||||||
|
pir+2:pir+2+self.index_header.num_of_cncx_blocks],
|
||||||
|
self.index_header.index_encoding)
|
||||||
|
self.index_record = IndexRecord(self.records[pir+1],
|
||||||
|
self.index_header, self.cncx)
|
||||||
|
self.indexing_record_nums = set(xrange(pir,
|
||||||
|
pir+2+self.index_header.num_of_cncx_blocks))
|
||||||
|
|
||||||
|
|
||||||
|
ntr = self.mobi_header.number_of_text_records
|
||||||
|
fntbr = self.mobi_header.first_non_book_record
|
||||||
|
fii = self.mobi_header.first_image_index
|
||||||
|
if fntbr == 0xffffffff:
|
||||||
|
fntbr = len(self.records)
|
||||||
|
self.text_records = [TextRecord(r, self.records[r],
|
||||||
|
self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
|
||||||
|
min(len(self.records), ntr+1))]
|
||||||
|
self.image_records, self.binary_records = [], []
|
||||||
|
for i in xrange(fntbr, len(self.records)):
|
||||||
|
if i in self.indexing_record_nums:
|
||||||
|
continue
|
||||||
|
r = self.records[i]
|
||||||
|
fmt = None
|
||||||
|
if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS',
|
||||||
|
b'\xe9\x8e\r\n'):
|
||||||
|
try:
|
||||||
|
width, height, fmt = identify_data(r.raw)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if fmt is not None:
|
||||||
|
self.image_records.append(ImageRecord(i, r, fmt))
|
||||||
|
else:
|
||||||
|
self.binary_records.append(BinaryRecord(i, r))
|
||||||
|
|
||||||
|
|
||||||
def print_header(self, f=sys.stdout):
|
def print_header(self, f=sys.stdout):
|
||||||
@ -608,12 +879,15 @@ class MOBIFile(object): # {{{
|
|||||||
print (str(self.mobi_header).encode('utf-8'), file=f)
|
print (str(self.mobi_header).encode('utf-8'), file=f)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def inspect_mobi(path_or_stream):
|
def inspect_mobi(path_or_stream, prefix='decompiled'):
|
||||||
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
|
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
|
||||||
open(path_or_stream, 'rb'))
|
open(path_or_stream, 'rb'))
|
||||||
f = MOBIFile(stream)
|
f = MOBIFile(stream)
|
||||||
ddir = 'debug_' + os.path.splitext(os.path.basename(stream.name))[0]
|
ddir = prefix + '_' + os.path.splitext(os.path.basename(stream.name))[0]
|
||||||
if not os.path.exists(ddir):
|
try:
|
||||||
|
shutil.rmtree(ddir)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
os.mkdir(ddir)
|
os.mkdir(ddir)
|
||||||
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
||||||
f.print_header(f=out)
|
f.print_header(f=out)
|
||||||
@ -621,8 +895,17 @@ def inspect_mobi(path_or_stream):
|
|||||||
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
|
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
|
||||||
print(str(f.index_header), file=out)
|
print(str(f.index_header), file=out)
|
||||||
print('\n\n', file=out)
|
print('\n\n', file=out)
|
||||||
|
print(str(f.cncx).encode('utf-8'), file=out)
|
||||||
|
print('\n\n', file=out)
|
||||||
print(str(f.index_record), file=out)
|
print(str(f.index_record), file=out)
|
||||||
|
|
||||||
|
for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
|
||||||
|
('binary', 'binary_records')]:
|
||||||
|
tdir = os.path.join(ddir, tdir)
|
||||||
|
os.mkdir(tdir)
|
||||||
|
for rec in getattr(f, attr):
|
||||||
|
rec.dump(tdir)
|
||||||
|
|
||||||
print ('Debug data saved to:', ddir)
|
print ('Debug data saved to:', ddir)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -933,6 +933,9 @@ class MobiReader(object):
|
|||||||
continue
|
continue
|
||||||
processed_records.append(i)
|
processed_records.append(i)
|
||||||
data = self.sections[i][0]
|
data = self.sections[i][0]
|
||||||
|
if data[:4] in (b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n'):
|
||||||
|
# A FLIS, FCIS, SRCS or EOF record, ignore
|
||||||
|
continue
|
||||||
buf = cStringIO.StringIO(data)
|
buf = cStringIO.StringIO(data)
|
||||||
image_index += 1
|
image_index += 1
|
||||||
try:
|
try:
|
||||||
|
@ -8,6 +8,7 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||||
|
|
||||||
@ -150,4 +151,26 @@ def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
|
|||||||
scale -= 0.05
|
scale -= 0.05
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def get_trailing_data(record, extra_data_flags):
|
||||||
|
'''
|
||||||
|
Given a text record as a bytestring and the extra data flags from the MOBI
|
||||||
|
header, return the trailing data as a dictionary, mapping bit number to
|
||||||
|
data as bytestring. Also returns the record - all trailing data.
|
||||||
|
|
||||||
|
:return: Trailing data, record - trailing data
|
||||||
|
'''
|
||||||
|
data = OrderedDict()
|
||||||
|
for i in xrange(16, -1, -1):
|
||||||
|
flag = 2**i
|
||||||
|
if flag & extra_data_flags:
|
||||||
|
if i == 0:
|
||||||
|
# Only the first two bits are used for the size since there can
|
||||||
|
# never be more than 3 trailing multibyte chars
|
||||||
|
sz = ord(record[-1]) & 0b11
|
||||||
|
consumed = 1
|
||||||
|
else:
|
||||||
|
sz, consumed = decint(record, forward=False)
|
||||||
|
data[i] = record[-(sz+consumed):-consumed]
|
||||||
|
record = record[:-(sz+consumed)]
|
||||||
|
return data, record
|
||||||
|
|
||||||
|
@ -188,8 +188,13 @@ class OEBReader(object):
|
|||||||
href, _ = urldefrag(href)
|
href, _ = urldefrag(href)
|
||||||
if not href:
|
if not href:
|
||||||
continue
|
continue
|
||||||
|
try:
|
||||||
href = item.abshref(urlnormalize(href))
|
href = item.abshref(urlnormalize(href))
|
||||||
scheme = urlparse(href).scheme
|
scheme = urlparse(href).scheme
|
||||||
|
except:
|
||||||
|
self.oeb.log.exception(
|
||||||
|
'Skipping invalid href: %r'%href)
|
||||||
|
continue
|
||||||
if not scheme and href not in known:
|
if not scheme and href not in known:
|
||||||
new.add(href)
|
new.add(href)
|
||||||
elif item.media_type in OEB_STYLES:
|
elif item.media_type in OEB_STYLES:
|
||||||
|
@ -47,7 +47,10 @@ class ManifestTrimmer(object):
|
|||||||
item.data is not None:
|
item.data is not None:
|
||||||
hrefs = [r[2] for r in iterlinks(item.data)]
|
hrefs = [r[2] for r in iterlinks(item.data)]
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
|
try:
|
||||||
href = item.abshref(urlnormalize(href))
|
href = item.abshref(urlnormalize(href))
|
||||||
|
except:
|
||||||
|
continue
|
||||||
if href in oeb.manifest.hrefs:
|
if href in oeb.manifest.hrefs:
|
||||||
found = oeb.manifest.hrefs[href]
|
found = oeb.manifest.hrefs[href]
|
||||||
if found not in used:
|
if found not in used:
|
||||||
|
@ -153,12 +153,22 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS,
|
|||||||
bookt.append(TR(thumbnail, data))
|
bookt.append(TR(thumbnail, data))
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
body.append(HR())
|
||||||
|
body.append(DIV(
|
||||||
|
A(_('Switch to the full interface (non-mobile interface)'),
|
||||||
|
href="/browse",
|
||||||
|
style="text-decoration: none; color: blue",
|
||||||
|
title=_('The full interface gives you many more features, '
|
||||||
|
'but it may not work well on a small screen')),
|
||||||
|
style="text-align:center"))
|
||||||
return HTML(
|
return HTML(
|
||||||
HEAD(
|
HEAD(
|
||||||
TITLE(__appname__ + ' Library'),
|
TITLE(__appname__ + ' Library'),
|
||||||
LINK(rel='icon', href='http://calibre-ebook.com/favicon.ico',
|
LINK(rel='icon', href='http://calibre-ebook.com/favicon.ico',
|
||||||
type='image/x-icon'),
|
type='image/x-icon'),
|
||||||
LINK(rel='stylesheet', type='text/css', href=prefix+'/mobile/style.css')
|
LINK(rel='stylesheet', type='text/css',
|
||||||
|
href=prefix+'/mobile/style.css'),
|
||||||
|
LINK(rel='apple-touch-icon', href="/static/calibre.png")
|
||||||
), # End head
|
), # End head
|
||||||
body
|
body
|
||||||
) # End html
|
) # End html
|
||||||
|
@ -405,9 +405,9 @@ To those of you that claim that you need access to the filesystem to so that you
|
|||||||
|
|
||||||
If you are worried that someday |app| will cease to be developed, leaving all your books marooned in its folder structure, explore the powerful "Save to Disk" feature in |app| that lets you export all your files into a folder structure of arbitrary complexity based on their metadata.
|
If you are worried that someday |app| will cease to be developed, leaving all your books marooned in its folder structure, explore the powerful "Save to Disk" feature in |app| that lets you export all your files into a folder structure of arbitrary complexity based on their metadata.
|
||||||
|
|
||||||
Since I keep getting asked why there are numbers at the end of the title folder name, the reason is for *robustness*. That number is the id number of the book record in the |app| database. The presence of the number allows you to have multiple records with the same title and author names. More importantly, it is part of what allows |app| to magically regenerate the database with all metadata if the database file gets corrupted. Given that |app|'s mission is to get you to stop storing metadata in filenames and stop using the filesystem to find things, the increased robustness afforded by the id numbers is well worth the uglier folder names.
|
Finally, the reason there are numbers at the end of every title folder, is for *robustness*. That number is the id number of the book record in the |app| database. The presence of the number allows you to have multiple records with the same title and author names. It is also part of what allows |app| to magically regenerate the database with all metadata if the database file gets corrupted. Given that |app|'s mission is to get you to stop storing metadata in filenames and stop using the filesystem to find things, the increased robustness afforded by the id numbers is well worth the uglier folder names.
|
||||||
|
|
||||||
Finally, if you are irrevocably wedded to using the filesystem to store your metadata, feel free to patch your local copy of |app| to use whatever storage scheme you like. But, do not bother me with requests to change the directory structure, **they will be ignored**.
|
If you are still not convinced, then I'm afraid |app| is not for you. Look elsewhere for your book cataloguing needs. Just so we're clear, **this is not going to change**. Kindly do not contact us in an attempt to get us to change this.
|
||||||
|
|
||||||
Why doesn't |app| have a column for foo?
|
Why doesn't |app| have a column for foo?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
Loading…
x
Reference in New Issue
Block a user