GwR add metadata editing for DRM files, fix catalog CLI options

This commit is contained in:
GRiker 2010-02-18 04:38:13 -07:00
commit 7d69048a4b
11 changed files with 615 additions and 92 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 647 B

View File

@ -0,0 +1,37 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class Kathimerini(BasicNewsRecipe):
title = 'Kathimerini'
__author__ = 'Pan'
description = 'News from Greece'
max_articles_per_feed = 100
oldest_article = 100
publisher = 'Kathimerini'
category = 'news, GR'
language = 'el'
no_stylesheets = True
remove_tags_before = dict(name='td',attrs={'class':'news'})
remove_tags_after = dict(name='td',attrs={'class':'news'})
remove_attributes = ['width', 'src','header','footer']
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
'http://wk.kathimerini.gr/xml_files/politics.xml'),
(u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1',
' http://wk.kathimerini.gr/xml_files/ell.xml'),
(u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2',
' http://wk.kathimerini.gr/xml_files/world.xml'),
(u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
'http://wk.kathimerini.gr/xml_files/economy_1.xml'),
(u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2',
'http://wk.kathimerini.gr/xml_files/economy_2.xml'),
(u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
'http://wk.kathimerini.gr/xml_files/economy_3.xml'),
(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2',
'http://wk.kathimerini.gr/xml_files/civ.xml'),
(u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2',
'http://wk.kathimerini.gr/xml_files/st.xml')]
def print_version(self, url):
return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')

View File

@ -0,0 +1,74 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.wired.co.uk
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Wired_UK(BasicNewsRecipe):
title = 'Wired Magazine - UK edition'
__author__ = 'Darko Miletic'
description = 'Gaming news'
publisher = 'Conde Nast Digital'
category = 'news, games, IT, gadgets'
oldest_article = 32
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
language = 'en_GB'
extra_css = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} '
index = 'http://www.wired.co.uk/wired-magazine.aspx'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'class':'article-box'})]
remove_tags = [
dict(name=['object','embed','iframe','link'])
,dict(attrs={'class':['opts','comment','stories']})
]
remove_tags_after = dict(name='div',attrs={'class':'stories'})
remove_attributes = ['height','width']
def parse_index(self):
totalfeeds = []
soup = self.index_to_soup(self.index)
maincontent = soup.find('div',attrs={'class':'main-content'})
mfeed = []
if maincontent:
st = maincontent.find(attrs={'class':'most-wired-box'})
if st:
for itt in st.findAll('a',href=True):
url = 'http://www.wired.co.uk' + itt['href']
title = self.tag_to_string(itt)
description = ''
date = strftime(self.timefmt)
mfeed.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append(('Articles', mfeed))
return totalfeeds
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.index)
cover_item = soup.find('span', attrs={'class':'cover'})
if cover_item:
cover_url = cover_item.img['src']
return cover_url
def print_version(self, url):
return url + '?page=all'

30
resources/tanea.recipe Normal file
View File

@ -0,0 +1,30 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class TaNea(BasicNewsRecipe):
title = u'Ta Nea'
__author__ = 'Pan'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
remove_tags_before = dict(name='div',attrs={'id':'print-body'})
remove_tags_after = dict(name='div',attrs={'id':'text'})
feeds = [
(u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
u'http://www.tanea.gr/default.asp?pid=66&la=1'),
(u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=67&la=1'),
(u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
u'http://www.tanea.gr/default.asp?pid=68&la=1'),
(u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=69&la=1'),
(u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=79&la=1'),
(u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=80&la=1'),
(u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=81&la=1')]
def print_version(self, url):
return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')

View File

@ -249,6 +249,7 @@ class CatalogPlugin(Plugin):
#: dest = 'catalog_title',
#: help = (_('Title of generated catalog. \nDefault:') + " '" +
#: '%default' + "'"))]
#: cli_options parsed in library.cli:catalog_option_parser()
cli_options = []
@ -275,9 +276,10 @@ class CatalogPlugin(Plugin):
def get_output_fields(self, opts):
# Return a list of requested fields, with opts.sort_by first
all_fields = set(
['author_sort','authors','comments','cover','formats', 'id','isbn','pubdate','publisher','rating',
'series_index','series','size','tags','timestamp',
'title','uuid'])
['author_sort','authors','comments','cover','formats',
'id','isbn','pubdate','publisher','rating',
'series_index','series','size','tags','timestamp',
'title','uuid'])
fields = all_fields
if opts.fields != 'all':

View File

@ -0,0 +1,8 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Used for chm input
'''

View File

@ -0,0 +1,384 @@
from __future__ import with_statement
''' CHM File decoding support '''
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.'
import os, shutil, uuid
from tempfile import mkdtemp
from mimetypes import guess_type as guess_mimetype
from BeautifulSoup import BeautifulSoup
from lxml import html
from pychm.chm import CHMFile
from pychm.chmlib import (
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
chm_enumerate,
)
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.toc import TOC
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
def match_string(s1, s2_already_lowered):
if s1 is not None and s2_already_lowered is not None:
if s1.lower()==s2_already_lowered:
return True
return False
def option_parser():
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
parser.add_option("-t", "--title", action="store", type="string", \
dest="title", help=_("Set the book title"))
parser.add_option('--title-sort', action='store', type='string', default=None,
dest='title_sort', help=_('Set sort key for the title'))
parser.add_option("-a", "--author", action="store", type="string", \
dest="author", help=_("Set the author"))
parser.add_option('--author-sort', action='store', type='string', default=None,
dest='author_sort', help=_('Set sort key for the author'))
parser.add_option("-c", "--category", action="store", type="string", \
dest="category", help=_("The category this book belongs"
" to. E.g.: History"))
parser.add_option("--thumbnail", action="store", type="string", \
dest="thumbnail", help=_("Path to a graphic that will be"
" set as this files' thumbnail"))
parser.add_option("--comment", action="store", type="string", \
dest="freetext", help=_("Path to a txt file containing a comment."))
parser.add_option("--get-thumbnail", action="store_true", \
dest="get_thumbnail", default=False, \
help=_("Extract thumbnail from LRF file"))
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
parser.add_option('--classification', default=None, help=_('Set the book classification'))
parser.add_option('--creator', default=None, help=_('Set the book creator'))
parser.add_option('--producer', default=None, help=_('Set the book producer'))
parser.add_option('--get-cover', action='store_true', default=False,
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
parser.add_option('--bookid', action='store', type='string', default=None,
dest='book_id', help=_('Set book ID'))
parser.add_option('--font-delta', action='store', type='int', default=0,
dest='font_delta', help=_('Set font delta'))
return parser
class CHMError(Exception):
pass
class CHMReader(CHMFile):
def __init__(self, input, log):
CHMFile.__init__(self)
if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log
self._sourcechm = input
self._contents = None
self._playorder = 0
self._metadata = False
self._extracted = False
# location of '.hhc' file, which is the CHM TOC.
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
self.hhc_path = self.root + ".hhc"
def _parse_toc(self, ul, basedir=os.getcwdu()):
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
self._playorder += 1
for li in ul('li', recursive=False):
href = li.object('param', {'name': 'Local'})[0]['value']
if href.count('#'):
href, frag = href.split('#')
else:
frag = None
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
#print "========>", name
toc.add_item(href, frag, name, play_order=self._playorder)
self._playorder += 1
if li.ul:
child = self._parse_toc(li.ul)
child.parent = toc
toc.append(child)
#print toc
return toc
def GetFile(self, path):
# have to have abs paths for ResolveObject, but Contents() deliberately
# makes them relative. So we don't have to worry, re-add the leading /.
# note this path refers to the internal CHM structure
if path[0] != '/':
path = '/' + path
res, ui = self.ResolveObject(path)
if res != CHM_RESOLVE_SUCCESS:
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
size, data = self.RetrieveObject(ui)
if size == 0:
raise CHMError("'%s' is zero bytes in length!"%(path,))
return data
def ExtractFiles(self, output_dir=os.getcwdu()):
for path in self.Contents():
lpath = os.path.join(output_dir, path)
self._ensure_dir(lpath)
data = self.GetFile(path)
with open(lpath, 'wb') as f:
if guess_mimetype(path)[0] == ('text/html'):
data = self._reformat(data)
f.write(data)
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
self._extracted = True
def _reformat(self, data):
try:
soup = BeautifulSoup(data)
except UnicodeEncodeError:
# hit some strange encoding problems...
print "Unable to parse html for cleaning, leaving it :("
return data
# nuke javascript...
[s.extract() for s in soup('script')]
# remove forward and back nav bars from the top/bottom of each page
# cos they really fuck with the flow of things and generally waste space
# since we can't use [a,b] syntax to select arbitrary items from a list
# we'll have to do this manually...
t = soup('table')
if t:
if (t[0].previousSibling is None
or t[0].previousSibling.previousSibling is None):
t[0].extract()
if (t[-1].nextSibling is None
or t[-1].nextSibling.nextSibling is None):
t[-1].extract()
# for some very odd reason each page's content appears to be in a table
# too. and this table has sub-tables for random asides... grr.
# some images seem to be broken in some chm's :/
for img in soup('img'):
try:
# some are supposedly "relative"... lies.
while img['src'].startswith('../'): img['src'] = img['src'][3:]
# some have ";<junk>" at the end.
img['src'] = img['src'].split(';')[0]
except KeyError:
# and some don't even have a src= ?!
pass
# now give back some pretty html.
return soup.prettify()
def Contents(self):
if self._contents is not None:
return self._contents
paths = []
def get_paths(chm, ui, ctx):
# skip directories
# note this path refers to the internal CHM structure
if ui.path[-1] != '/':
# and make paths relative
paths.append(ui.path.lstrip('/'))
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
self._contents = paths
return self._contents
def _ensure_dir(self, path):
dir = os.path.dirname(path)
if not os.path.isdir(dir):
os.makedirs(dir)
def extract_content(self, output_dir=os.getcwdu()):
self.ExtractFiles(output_dir=output_dir)
class CHMInput(InputFormatPlugin):
name = 'CHM Input'
author = 'Kovid Goyal and Alex Bramley'
description = 'Convert CHM files to OEB'
file_types = set(['chm'])
options = set([
OptionRecommendation(name='dummy_option', recommended_value=False,
help=_('dummy option until real options are determined.')),
])
def _chmtohtml(self, output_dir, chm_path, no_images, log):
log.debug('Opening CHM file')
rdr = CHMReader(chm_path, log)
log.debug('Extracting CHM to %s' % output_dir)
rdr.extract_content(output_dir)
return rdr.hhc_path
def convert(self, stream, options, file_ext, log, accelerators):
from calibre.ebooks.metadata.chm import get_metadata_
log.debug('Processing CHM...')
tdir = mkdtemp(prefix='chm2oeb_')
from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8'
no_images = False #options.no_images
chm_name = stream.name
#chm_data = stream.read()
#closing stream so CHM can be opened by external library
stream.close()
log.debug('tdir=%s' % tdir)
log.debug('stream.name=%s' % stream.name)
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
mainpath = os.path.join(tdir, mainname)
metadata = get_metadata_(tdir)
odi = options.debug_pipeline
options.debug_pipeline = None
# try a custom conversion:
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
# try using html converter:
htmlpath = self._create_html_root(mainpath, log)
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
options.debug_pipeline = odi
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
shutil.rmtree(tdir)
return oeb
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
# use HTMLInput plugin to generate book
from calibre.ebooks.html.input import HTMLInput
opts.breadth_first = True
htmlinput = HTMLInput(None)
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
return oeb
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import DirContainer
oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False)
self.oeb = oeb
metadata = oeb.metadata
if mi.title:
metadata.add('title', mi.title)
if mi.authors:
for a in mi.authors:
metadata.add('creator', a, attrib={'role':'aut'})
if mi.publisher:
metadata.add('publisher', mi.publisher)
if mi.isbn:
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
if not metadata.language:
oeb.logger.warn(u'Language not specified')
metadata.add('language', get_lang())
if not metadata.creator:
oeb.logger.warn('Creator not specified')
metadata.add('creator', _('Unknown'))
if not metadata.title:
oeb.logger.warn('Title not specified')
metadata.add('title', _('Unknown'))
bookid = str(uuid.uuid4())
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
for ident in metadata.identifier:
if 'id' in ident.attrib:
self.oeb.uid = metadata.identifier[0]
break
hhcdata = self._read_file(hhcpath)
hhcroot = html.fromstring(hhcdata)
chapters = self._process_nodes(hhcroot)
#print "============================="
#print "Printing hhcroot"
#print etree.tostring(hhcroot, pretty_print=True)
#print "============================="
log.debug('Found %d section nodes' % len(chapters))
if len(chapters) > 0:
path0 = chapters[0][1]
subpath = os.path.dirname(path0)
htmlpath = os.path.join(basedir, subpath)
oeb.container = DirContainer(htmlpath, log)
for chapter in chapters:
title = chapter[0]
basename = os.path.basename(chapter[1])
self._add_item(oeb, title, basename)
oeb.container = DirContainer(htmlpath, oeb.log)
return oeb
def _create_html_root(self, hhcpath, log):
hhcdata = self._read_file(hhcpath)
hhcroot = html.fromstring(hhcdata)
chapters = self._process_nodes(hhcroot)
#print "============================="
#print "Printing hhcroot"
#print etree.tostring(hhcroot, pretty_print=True)
#print "============================="
log.debug('Found %d section nodes' % len(chapters))
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
f = open(htmlpath, 'wb')
f.write("<HTML><HEAD></HEAD><BODY>\r\n")
if chapters:
path0 = chapters[0][1]
subpath = os.path.dirname(path0)
for chapter in chapters:
title = chapter[0]
rsrcname = os.path.basename(chapter[1])
rsrcpath = os.path.join(subpath, rsrcname)
# title should already be url encoded
url = "<br /><a href=" + rsrcpath + ">" + title + " </a>\r\n"
f.write(url)
f.write("</BODY></HTML>")
f.close()
return htmlpath
def _read_file(self, name):
f = open(name, 'rb')
data = f.read()
f.close()
return data
def _visit_node(self, node, chapters, depth):
# check that node is a normal node (not a comment, DOCTYPE, etc.)
# (normal nodes have string tags)
if isinstance(node.tag, basestring):
if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
for child in node:
if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
chapter_title = child.attrib['value']
if match_string(child.tag,'param') and match_string(child.attrib['name'],'local'):
chapter_path = child.attrib['value']
if chapter_title is not None and chapter_path is not None:
chapter = [chapter_title, chapter_path, depth]
chapters.append(chapter)
if node.tag=="UL":
depth = depth + 1
if node.tag=="/UL":
depth = depth - 1
def _process_nodes(self, root):
chapters = []
depth = 0
for node in root.iter():
self._visit_node(node, chapters, depth)
return chapters
def _add_item(self, oeb, title, path):
bname = os.path.basename(path)
id, href = oeb.manifest.generate(id='html',
href=ascii_filename(bname))
item = oeb.manifest.add(id, href, 'text/html')
item.html_input_href = bname
oeb.spine.add(item, True)
oeb.toc.add(title, item.href)

View File

@ -107,7 +107,13 @@ class MetadataUpdater(object):
self.cover_record = self.thumbnail_record = None
self.timestamp = None
self.pdbrecords = self.get_pdbrecords()
self.drm_block = self.fetchDRMdata()
self.drm_block = None
if self.encryption_type != 0:
if self.have_exth:
self.drm_block = self.fetchDRMdata()
else:
raise MobiError('Unable to set metadata on DRM file without EXTH header')
self.original_exth_records = {}
if not have_exth:
@ -117,38 +123,14 @@ class MetadataUpdater(object):
self.fetchEXTHFields()
def fetchDRMdata(self):
''' Grab everything between end of EXTH and title '''
'''
if False and self.have_exth:
print "incoming file has EXTH header"
# 20:24 = mobiHeaderLength, 16=PDBHeader size, 4 = len('EXTH')
exth_off = int(unpack('>I', self.record0[20:24])[0] + 16)
print "exth_off = 0x%x" % exth_off
exth_len_offset = exth_off + 4
print "exth_len_offset = 0x%x" % exth_len_offset
exth_len = int(unpack('>I', self.record0[exth_len_offset:exth_len_offset+4])[0])
print "len(EXTH) = 0x%x" % exth_len
title_offset = int(unpack('>I', self.record0[0x54:0x58])[0])
print "offset of full title = 0x%x" % title_offset
drm_off = exth_off + exth_len
print "DRM data begins at 0x%x" % drm_off
print "DRM len is 0x%x bytes" % (title_offset - drm_off)
return self.record0[drm_off:drm_off + (title_offset - drm_off)]
else:
'''
if True:
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
drm_string = ''
for x in range(self.drm_key_count):
base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
drm_string += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
return drm_string
else:
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
title_offset = int(unpack('>I', self.record0[0x54:0x58])[0])
drm_blocklen = title_offset - drm_offset
return self.record0[drm_offset:drm_offset + drm_blocklen]
''' Fetch the DRM keys '''
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
drm_keys = ''
for x in range(self.drm_key_count):
base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
return drm_keys
def fetchEXTHFields(self):
stream = self.stream
@ -224,7 +206,8 @@ class MetadataUpdater(object):
def create_exth(self, new_title=None, exth=None):
# Add an EXTH block to record 0, rewrite the stream
# self.hexdump(self.record0)
if isinstance(new_title, unicode):
new_title = new_title.encode(self.codec, 'replace')
# Fetch the existing title
title_offset, = unpack('>L', self.record0[0x54:0x58])
@ -248,12 +231,13 @@ class MetadataUpdater(object):
exth = ['EXTH', pack('>II', 12, 0), pad]
exth = ''.join(exth)
# Update drm_offset
self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth))
if True:
# Update drm_offset(0xa8), title_offset(0x54)
if self.encryption_type != 0:
self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth))
self.record0[0xb0:0xb4] = pack('>L', len(self.drm_block))
# Update title_offset
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block))
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block))
else:
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
if new_title:
self.record0[0x58:0x5c] = pack('>L', len(new_title))
@ -262,21 +246,15 @@ class MetadataUpdater(object):
new_record0 = StringIO()
new_record0.write(self.record0[:0x10 + mobi_header_length])
new_record0.write(exth)
new_record0.write(self.drm_block)
if new_title:
#new_record0.write(new_title.encode(self.codec, 'replace'))
new_title = (new_title or _('Unknown')).encode(self.codec, 'replace')
new_record0.write(new_title)
else:
new_record0.write(title_in_file)
if self.encryption_type != 0:
new_record0.write(self.drm_block)
new_record0.write(new_title if new_title else title_in_file)
# Pad to a 4-byte boundary
trail = len(new_record0.getvalue()) % 4
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
new_record0.write(pad)
#self.hexdump(new_record0.getvalue())
# Rebuild the stream, update the pdbrecords pointers
self.patchSection(0,new_record0.getvalue())
@ -386,11 +364,7 @@ class MetadataUpdater(object):
recs.append((202, pack('>I', self.thumbnail_rindex)))
pop_exth_record(202)
if getattr(self, 'encryption_type', -1) != 0:
prints(u"Setting metadata for '%s' (DRM)" % mi.title)
# raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
# Restore any original EXTH fields that weren't modified/updated
# Restore any original EXTH fields that weren't updated
for id in sorted(self.original_exth_records):
recs.append((id, self.original_exth_records[id]))
recs = sorted(recs, key=lambda x:(x[0],x[0]))

View File

@ -1376,7 +1376,7 @@ class MobiWriter(object):
self._text_length,
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
uid = random.randint(0, 0xffffffff)
title = str(metadata.title[0])
title = unicode(metadata.title[0]).encode('utf-8')
# The MOBI Header
# 0x0 - 0x3

View File

@ -1,4 +1,4 @@
import datetime, htmlentitydefs, os, re, shutil, time
import datetime, htmlentitydefs, os, re, shutil
from collections import namedtuple
from copy import deepcopy
@ -21,7 +21,7 @@ FIELDS = ['all', 'author_sort', 'authors', 'comments',
class CSV_XML(CatalogPlugin):
'CSV/XML catalog generator'
Option = namedtuple('Option', 'option, default, dest, help')
Option = namedtuple('Option', 'option, default, dest, action, help')
name = 'Catalog_CSV_XML'
description = 'CSV/XML catalog generator'
@ -34,6 +34,7 @@ class CSV_XML(CatalogPlugin):
Option('--fields',
default = 'all',
dest = 'fields',
action = None,
help = _('The fields to output when cataloging books in the '
'database. Should be a comma-separated list of fields.\n'
'Available fields: %s.\n'
@ -43,6 +44,7 @@ class CSV_XML(CatalogPlugin):
Option('--sort-by',
default = 'id',
dest = 'sort_by',
action = None,
help = _('Output field to sort on.\n'
'Available fields: author_sort, id, rating, size, timestamp, title.\n'
"Default: '%default'\n"
@ -241,7 +243,7 @@ class CSV_XML(CatalogPlugin):
class EPUB_MOBI(CatalogPlugin):
'ePub catalog generator'
Option = namedtuple('Option', 'option, default, dest, help')
Option = namedtuple('Option', 'option, default, dest, action, help')
name = 'Catalog_EPUB_MOBI'
description = 'EPUB/MOBI catalog generator'
@ -254,12 +256,14 @@ class EPUB_MOBI(CatalogPlugin):
cli_options = [Option('--catalog-title',
default = 'My Books',
dest = 'catalog_title',
action = None,
help = _('Title of generated catalog used as title in metadata.\n'
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--debug-pipeline',
default=None,
dest='debug_pipeline',
action = None,
help=_("Save the output from different stages of the conversion "
"pipeline to the specified "
"directory. Useful if you are unsure at which stage "
@ -269,48 +273,56 @@ class EPUB_MOBI(CatalogPlugin):
Option('--exclude-genre',
default='\[[\w ]*\]',
dest='exclude_genre',
action = None,
help=_("Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[<tag>]'\n"
"Applies to: ePub, MOBI output formats")),
Option('--exclude-tags',
default=('~,'+_('Catalog')),
dest='exclude_tags',
action = None,
help=_("Comma-separated list of tag words indicating book should be excluded from output. Case-insensitive.\n"
"--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--generate-titles',
default=True,
default=False,
dest='generate_titles',
action = 'store_true',
help=_("Include 'Titles' section in catalog.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--generate-recently-added',
default=True,
default=False,
dest='generate_recently_added',
action = 'store_true',
help=_("Include 'Recently Added' section in catalog.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--note-tag',
default='*',
dest='note_tag',
action = None,
help=_("Tag prefix for user notes, e.g. '*Jeff might enjoy reading this'.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--numbers-as-text',
default=False,
dest='numbers_as_text',
action = None,
help=_("Sort titles with leading numbers as text, e.g.,\n'2001: A Space Odyssey' sorts as \n'Two Thousand One: A Space Odyssey'.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--output-profile',
default=None,
dest='output_profile',
action = None,
help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--read-tag',
default='+',
dest='read_tag',
action = None,
help=_("Tag indicating book has been read.\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
]
@ -3418,13 +3430,12 @@ class EPUB_MOBI(CatalogPlugin):
def run(self, path_to_output, opts, db, notification=DummyReporter()):
opts.log = log = Log()
opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
self.opts = opts
# Add local options
opts.creator = "calibre"
# Finalize output_profile
op = self.opts.output_profile
op = opts.output_profile
if op is None:
op = 'default'
if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower():
@ -3434,13 +3445,30 @@ class EPUB_MOBI(CatalogPlugin):
op = "kindle"
opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 100
opts.authorClip = 100 if op.endswith('dx') or 'kindle' not in op else 60
self.opts.output_profile = op
opts.output_profile = op
opts.basename = "Catalog"
opts.cli_environment = not hasattr(opts,'sync')
# GwR *** hardwired to sort by author, could be an option if passed in opts
opts.sort_descriptions_by_author = True
# If exclude_genre is blank, assume user wants all genre tags included
if opts.exclude_genre.strip() == '':
opts.exclude_genre = '\[^.\]'
log(" converting empty exclude_genre to '\[^.\]'")
if opts.connected_device['name']:
if opts.connected_device['serial']:
log(" connected_device: '%s' #%s%s " % \
(opts.connected_device['name'],
opts.connected_device['serial'][0:4],
'x' * (len(opts.connected_device['serial']) - 4)))
else:
log(" connected_device: '%s'" % opts.connected_device['name'])
for storage in opts.connected_device['storage']:
if storage:
log(" mount point: %s" % storage)
if opts.verbose:
opts_dict = vars(opts)
log(u"%s(): Generating %s %sin %s environment" %
@ -3458,26 +3486,6 @@ class EPUB_MOBI(CatalogPlugin):
sections_list.append('Genres')
log(u"Creating Sections for %s" % ', '.join(sections_list))
# If exclude_genre is blank, assume user wants all genre tags included
if opts.exclude_genre.strip() == '':
opts.exclude_genre = '\[^.\]'
log(" converting empty exclude_genre to '\[^.\]'")
if opts.connected_device['name']:
if opts.connected_device['serial']:
log(" connected_device: '%s' #%s%s " % \
(opts.connected_device['name'],
opts.connected_device['serial'][0:4],
'x' * (len(opts.connected_device['serial']) - 4)))
else:
log(" connected_device: '%s'" % opts.connected_device['name'])
for storage in opts.connected_device['storage']:
if storage:
log(" mount point: %s" % storage)
# for book in opts.connected_device['books']:
# log("%s: %s" % (book.title, book.path))
# Display opts
keys = opts_dict.keys()
keys.sort()
@ -3488,6 +3496,8 @@ class EPUB_MOBI(CatalogPlugin):
'search_text','sort_by','sort_descriptions_by_author','sync']:
log(" %s: %s" % (key, opts_dict[key]))
self.opts = opts
# Launch the Catalog builder
catalog = self.CatalogBuilder(db, opts, self, report_progress=notification)
if opts.verbose:

View File

@ -587,9 +587,6 @@ def command_export(args, dbpath):
do_export(get_db(dbpath, opts), ids, dir, opts)
return 0
# GR additions
def catalog_option_parser(args):
from calibre.customize.ui import available_catalog_formats, plugin_for_catalog_format
from calibre.utils.logging import Log
@ -599,10 +596,17 @@ def catalog_option_parser(args):
# Fetch the extension-specific CLI options from the plugin
plugin = plugin_for_catalog_format(fmt)
for option in plugin.cli_options:
parser.add_option(option.option,
default=option.default,
dest=option.dest,
help=option.help)
if option.action:
parser.add_option(option.option,
default=option.default,
dest=option.dest,
action=option.action,
help=option.help)
else:
parser.add_option(option.option,
default=option.default,
dest=option.dest,
help=option.help)
return plugin