mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
GwR add metadata editing for DRM files, fix catalog CLI options
This commit is contained in:
commit
7d69048a4b
BIN
resources/images/news/wired_uk.png
Normal file
BIN
resources/images/news/wired_uk.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 647 B |
37
resources/kathemerini.recipe
Normal file
37
resources/kathemerini.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Kathimerini(BasicNewsRecipe):
|
||||
title = 'Kathimerini'
|
||||
__author__ = 'Pan'
|
||||
description = 'News from Greece'
|
||||
max_articles_per_feed = 100
|
||||
oldest_article = 100
|
||||
publisher = 'Kathimerini'
|
||||
category = 'news, GR'
|
||||
language = 'el'
|
||||
no_stylesheets = True
|
||||
remove_tags_before = dict(name='td',attrs={'class':'news'})
|
||||
remove_tags_after = dict(name='td',attrs={'class':'news'})
|
||||
remove_attributes = ['width', 'src','header','footer']
|
||||
|
||||
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
|
||||
'http://wk.kathimerini.gr/xml_files/politics.xml'),
|
||||
(u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1',
|
||||
' http://wk.kathimerini.gr/xml_files/ell.xml'),
|
||||
(u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2',
|
||||
' http://wk.kathimerini.gr/xml_files/world.xml'),
|
||||
(u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_1.xml'),
|
||||
(u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_2.xml'),
|
||||
(u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_3.xml'),
|
||||
(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/civ.xml'),
|
||||
(u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/st.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')
|
||||
|
||||
|
74
resources/recipes/wired_uk.recipe
Normal file
74
resources/recipes/wired_uk.recipe
Normal file
@ -0,0 +1,74 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.wired.co.uk
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Wired_UK(BasicNewsRecipe):
|
||||
title = 'Wired Magazine - UK edition'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Gaming news'
|
||||
publisher = 'Conde Nast Digital'
|
||||
category = 'news, games, IT, gadgets'
|
||||
oldest_article = 32
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
|
||||
language = 'en_GB'
|
||||
extra_css = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} '
|
||||
index = 'http://www.wired.co.uk/wired-magazine.aspx'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'article-box'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','embed','iframe','link'])
|
||||
,dict(attrs={'class':['opts','comment','stories']})
|
||||
]
|
||||
remove_tags_after = dict(name='div',attrs={'class':'stories'})
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
maincontent = soup.find('div',attrs={'class':'main-content'})
|
||||
mfeed = []
|
||||
if maincontent:
|
||||
st = maincontent.find(attrs={'class':'most-wired-box'})
|
||||
if st:
|
||||
for itt in st.findAll('a',href=True):
|
||||
url = 'http://www.wired.co.uk' + itt['href']
|
||||
title = self.tag_to_string(itt)
|
||||
description = ''
|
||||
date = strftime(self.timefmt)
|
||||
mfeed.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append(('Articles', mfeed))
|
||||
return totalfeeds
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.index)
|
||||
cover_item = soup.find('span', attrs={'class':'cover'})
|
||||
if cover_item:
|
||||
cover_url = cover_item.img['src']
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?page=all'
|
30
resources/tanea.recipe
Normal file
30
resources/tanea.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class TaNea(BasicNewsRecipe):
|
||||
title = u'Ta Nea'
|
||||
__author__ = 'Pan'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags_before = dict(name='div',attrs={'id':'print-body'})
|
||||
remove_tags_after = dict(name='div',attrs={'id':'text'})
|
||||
|
||||
feeds = [
|
||||
(u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
|
||||
u'http://www.tanea.gr/default.asp?pid=66&la=1'),
|
||||
(u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=67&la=1'),
|
||||
(u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
|
||||
u'http://www.tanea.gr/default.asp?pid=68&la=1'),
|
||||
(u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=69&la=1'),
|
||||
(u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=79&la=1'),
|
||||
(u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=80&la=1'),
|
||||
(u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=81&la=1')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')
|
@ -249,6 +249,7 @@ class CatalogPlugin(Plugin):
|
||||
#: dest = 'catalog_title',
|
||||
#: help = (_('Title of generated catalog. \nDefault:') + " '" +
|
||||
#: '%default' + "'"))]
|
||||
#: cli_options parsed in library.cli:catalog_option_parser()
|
||||
|
||||
cli_options = []
|
||||
|
||||
@ -275,9 +276,10 @@ class CatalogPlugin(Plugin):
|
||||
def get_output_fields(self, opts):
|
||||
# Return a list of requested fields, with opts.sort_by first
|
||||
all_fields = set(
|
||||
['author_sort','authors','comments','cover','formats', 'id','isbn','pubdate','publisher','rating',
|
||||
'series_index','series','size','tags','timestamp',
|
||||
'title','uuid'])
|
||||
['author_sort','authors','comments','cover','formats',
|
||||
'id','isbn','pubdate','publisher','rating',
|
||||
'series_index','series','size','tags','timestamp',
|
||||
'title','uuid'])
|
||||
|
||||
fields = all_fields
|
||||
if opts.fields != 'all':
|
||||
|
8
src/calibre/ebooks/chm/__init__.py
Normal file
8
src/calibre/ebooks/chm/__init__.py
Normal file
@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Used for chm input
|
||||
'''
|
384
src/calibre/ebooks/chm/input.py
Normal file
384
src/calibre/ebooks/chm/input.py
Normal file
@ -0,0 +1,384 @@
|
||||
from __future__ import with_statement
|
||||
''' CHM File decoding support '''
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, shutil, uuid
|
||||
from tempfile import mkdtemp
|
||||
from mimetypes import guess_type as guess_mimetype
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from lxml import html
|
||||
from pychm.chm import CHMFile
|
||||
from pychm.chmlib import (
|
||||
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
||||
chm_enumerate,
|
||||
)
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
|
||||
|
||||
def match_string(s1, s2_already_lowered):
|
||||
if s1 is not None and s2_already_lowered is not None:
|
||||
if s1.lower()==s2_already_lowered:
|
||||
return True
|
||||
return False
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
||||
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
||||
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help=_("Set the book title"))
|
||||
parser.add_option('--title-sort', action='store', type='string', default=None,
|
||||
dest='title_sort', help=_('Set sort key for the title'))
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help=_("Set the author"))
|
||||
parser.add_option('--author-sort', action='store', type='string', default=None,
|
||||
dest='author_sort', help=_('Set sort key for the author'))
|
||||
parser.add_option("-c", "--category", action="store", type="string", \
|
||||
dest="category", help=_("The category this book belongs"
|
||||
" to. E.g.: History"))
|
||||
parser.add_option("--thumbnail", action="store", type="string", \
|
||||
dest="thumbnail", help=_("Path to a graphic that will be"
|
||||
" set as this files' thumbnail"))
|
||||
parser.add_option("--comment", action="store", type="string", \
|
||||
dest="freetext", help=_("Path to a txt file containing a comment."))
|
||||
parser.add_option("--get-thumbnail", action="store_true", \
|
||||
dest="get_thumbnail", default=False, \
|
||||
help=_("Extract thumbnail from LRF file"))
|
||||
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
|
||||
parser.add_option('--classification', default=None, help=_('Set the book classification'))
|
||||
parser.add_option('--creator', default=None, help=_('Set the book creator'))
|
||||
parser.add_option('--producer', default=None, help=_('Set the book producer'))
|
||||
parser.add_option('--get-cover', action='store_true', default=False,
|
||||
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
|
||||
parser.add_option('--bookid', action='store', type='string', default=None,
|
||||
dest='book_id', help=_('Set book ID'))
|
||||
parser.add_option('--font-delta', action='store', type='int', default=0,
|
||||
dest='font_delta', help=_('Set font delta'))
|
||||
return parser
|
||||
|
||||
class CHMError(Exception):
|
||||
pass
|
||||
|
||||
class CHMReader(CHMFile):
|
||||
def __init__(self, input, log):
|
||||
CHMFile.__init__(self)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
self._metadata = False
|
||||
self._extracted = False
|
||||
|
||||
# location of '.hhc' file, which is the CHM TOC.
|
||||
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
||||
self.hhc_path = self.root + ".hhc"
|
||||
|
||||
|
||||
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
||||
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
||||
self._playorder += 1
|
||||
for li in ul('li', recursive=False):
|
||||
href = li.object('param', {'name': 'Local'})[0]['value']
|
||||
if href.count('#'):
|
||||
href, frag = href.split('#')
|
||||
else:
|
||||
frag = None
|
||||
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
|
||||
#print "========>", name
|
||||
toc.add_item(href, frag, name, play_order=self._playorder)
|
||||
self._playorder += 1
|
||||
if li.ul:
|
||||
child = self._parse_toc(li.ul)
|
||||
child.parent = toc
|
||||
toc.append(child)
|
||||
#print toc
|
||||
return toc
|
||||
|
||||
|
||||
def GetFile(self, path):
|
||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||
# makes them relative. So we don't have to worry, re-add the leading /.
|
||||
# note this path refers to the internal CHM structure
|
||||
if path[0] != '/':
|
||||
path = '/' + path
|
||||
res, ui = self.ResolveObject(path)
|
||||
if res != CHM_RESOLVE_SUCCESS:
|
||||
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
||||
size, data = self.RetrieveObject(ui)
|
||||
if size == 0:
|
||||
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
||||
return data
|
||||
|
||||
def ExtractFiles(self, output_dir=os.getcwdu()):
|
||||
for path in self.Contents():
|
||||
lpath = os.path.join(output_dir, path)
|
||||
self._ensure_dir(lpath)
|
||||
data = self.GetFile(path)
|
||||
with open(lpath, 'wb') as f:
|
||||
if guess_mimetype(path)[0] == ('text/html'):
|
||||
data = self._reformat(data)
|
||||
f.write(data)
|
||||
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
|
||||
self._extracted = True
|
||||
|
||||
def _reformat(self, data):
|
||||
try:
|
||||
soup = BeautifulSoup(data)
|
||||
except UnicodeEncodeError:
|
||||
# hit some strange encoding problems...
|
||||
print "Unable to parse html for cleaning, leaving it :("
|
||||
return data
|
||||
# nuke javascript...
|
||||
[s.extract() for s in soup('script')]
|
||||
# remove forward and back nav bars from the top/bottom of each page
|
||||
# cos they really fuck with the flow of things and generally waste space
|
||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||
# we'll have to do this manually...
|
||||
t = soup('table')
|
||||
if t:
|
||||
if (t[0].previousSibling is None
|
||||
or t[0].previousSibling.previousSibling is None):
|
||||
t[0].extract()
|
||||
if (t[-1].nextSibling is None
|
||||
or t[-1].nextSibling.nextSibling is None):
|
||||
t[-1].extract()
|
||||
# for some very odd reason each page's content appears to be in a table
|
||||
# too. and this table has sub-tables for random asides... grr.
|
||||
|
||||
# some images seem to be broken in some chm's :/
|
||||
for img in soup('img'):
|
||||
try:
|
||||
# some are supposedly "relative"... lies.
|
||||
while img['src'].startswith('../'): img['src'] = img['src'][3:]
|
||||
# some have ";<junk>" at the end.
|
||||
img['src'] = img['src'].split(';')[0]
|
||||
except KeyError:
|
||||
# and some don't even have a src= ?!
|
||||
pass
|
||||
# now give back some pretty html.
|
||||
return soup.prettify()
|
||||
|
||||
def Contents(self):
|
||||
if self._contents is not None:
|
||||
return self._contents
|
||||
paths = []
|
||||
def get_paths(chm, ui, ctx):
|
||||
# skip directories
|
||||
# note this path refers to the internal CHM structure
|
||||
if ui.path[-1] != '/':
|
||||
# and make paths relative
|
||||
paths.append(ui.path.lstrip('/'))
|
||||
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||
self._contents = paths
|
||||
return self._contents
|
||||
|
||||
def _ensure_dir(self, path):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.isdir(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
def extract_content(self, output_dir=os.getcwdu()):
|
||||
self.ExtractFiles(output_dir=output_dir)
|
||||
|
||||
|
||||
class CHMInput(InputFormatPlugin):
|
||||
|
||||
name = 'CHM Input'
|
||||
author = 'Kovid Goyal and Alex Bramley'
|
||||
description = 'Convert CHM files to OEB'
|
||||
file_types = set(['chm'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='dummy_option', recommended_value=False,
|
||||
help=_('dummy option until real options are determined.')),
|
||||
])
|
||||
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||
log.debug('Opening CHM file')
|
||||
rdr = CHMReader(chm_path, log)
|
||||
log.debug('Extracting CHM to %s' % output_dir)
|
||||
rdr.extract_content(output_dir)
|
||||
return rdr.hhc_path
|
||||
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.metadata.chm import get_metadata_
|
||||
|
||||
log.debug('Processing CHM...')
|
||||
tdir = mkdtemp(prefix='chm2oeb_')
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = 'utf-8'
|
||||
no_images = False #options.no_images
|
||||
chm_name = stream.name
|
||||
#chm_data = stream.read()
|
||||
|
||||
#closing stream so CHM can be opened by external library
|
||||
stream.close()
|
||||
log.debug('tdir=%s' % tdir)
|
||||
log.debug('stream.name=%s' % stream.name)
|
||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
||||
mainpath = os.path.join(tdir, mainname)
|
||||
|
||||
metadata = get_metadata_(tdir)
|
||||
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
# try a custom conversion:
|
||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||
# try using html converter:
|
||||
htmlpath = self._create_html_root(mainpath, log)
|
||||
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
||||
options.debug_pipeline = odi
|
||||
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
|
||||
shutil.rmtree(tdir)
|
||||
return oeb
|
||||
|
||||
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
||||
# use HTMLInput plugin to generate book
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
opts.breadth_first = True
|
||||
htmlinput = HTMLInput(None)
|
||||
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
|
||||
return oeb
|
||||
|
||||
|
||||
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import DirContainer
|
||||
oeb = create_oebbook(log, None, opts, self,
|
||||
encoding=opts.input_encoding, populate=False)
|
||||
self.oeb = oeb
|
||||
|
||||
metadata = oeb.metadata
|
||||
if mi.title:
|
||||
metadata.add('title', mi.title)
|
||||
if mi.authors:
|
||||
for a in mi.authors:
|
||||
metadata.add('creator', a, attrib={'role':'aut'})
|
||||
if mi.publisher:
|
||||
metadata.add('publisher', mi.publisher)
|
||||
if mi.isbn:
|
||||
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
|
||||
if not metadata.language:
|
||||
oeb.logger.warn(u'Language not specified')
|
||||
metadata.add('language', get_lang())
|
||||
if not metadata.creator:
|
||||
oeb.logger.warn('Creator not specified')
|
||||
metadata.add('creator', _('Unknown'))
|
||||
if not metadata.title:
|
||||
oeb.logger.warn('Title not specified')
|
||||
metadata.add('title', _('Unknown'))
|
||||
|
||||
bookid = str(uuid.uuid4())
|
||||
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||
for ident in metadata.identifier:
|
||||
if 'id' in ident.attrib:
|
||||
self.oeb.uid = metadata.identifier[0]
|
||||
break
|
||||
|
||||
hhcdata = self._read_file(hhcpath)
|
||||
hhcroot = html.fromstring(hhcdata)
|
||||
chapters = self._process_nodes(hhcroot)
|
||||
#print "============================="
|
||||
#print "Printing hhcroot"
|
||||
#print etree.tostring(hhcroot, pretty_print=True)
|
||||
#print "============================="
|
||||
log.debug('Found %d section nodes' % len(chapters))
|
||||
|
||||
if len(chapters) > 0:
|
||||
path0 = chapters[0][1]
|
||||
subpath = os.path.dirname(path0)
|
||||
htmlpath = os.path.join(basedir, subpath)
|
||||
|
||||
oeb.container = DirContainer(htmlpath, log)
|
||||
for chapter in chapters:
|
||||
title = chapter[0]
|
||||
basename = os.path.basename(chapter[1])
|
||||
self._add_item(oeb, title, basename)
|
||||
|
||||
oeb.container = DirContainer(htmlpath, oeb.log)
|
||||
return oeb
|
||||
|
||||
def _create_html_root(self, hhcpath, log):
|
||||
hhcdata = self._read_file(hhcpath)
|
||||
hhcroot = html.fromstring(hhcdata)
|
||||
chapters = self._process_nodes(hhcroot)
|
||||
#print "============================="
|
||||
#print "Printing hhcroot"
|
||||
#print etree.tostring(hhcroot, pretty_print=True)
|
||||
#print "============================="
|
||||
log.debug('Found %d section nodes' % len(chapters))
|
||||
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
||||
f = open(htmlpath, 'wb')
|
||||
f.write("<HTML><HEAD></HEAD><BODY>\r\n")
|
||||
|
||||
if chapters:
|
||||
path0 = chapters[0][1]
|
||||
subpath = os.path.dirname(path0)
|
||||
|
||||
for chapter in chapters:
|
||||
title = chapter[0]
|
||||
rsrcname = os.path.basename(chapter[1])
|
||||
rsrcpath = os.path.join(subpath, rsrcname)
|
||||
# title should already be url encoded
|
||||
url = "<br /><a href=" + rsrcpath + ">" + title + " </a>\r\n"
|
||||
f.write(url)
|
||||
|
||||
f.write("</BODY></HTML>")
|
||||
f.close()
|
||||
return htmlpath
|
||||
|
||||
|
||||
def _read_file(self, name):
|
||||
f = open(name, 'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
return data
|
||||
|
||||
def _visit_node(self, node, chapters, depth):
|
||||
# check that node is a normal node (not a comment, DOCTYPE, etc.)
|
||||
# (normal nodes have string tags)
|
||||
if isinstance(node.tag, basestring):
|
||||
if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
|
||||
for child in node:
|
||||
if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
|
||||
chapter_title = child.attrib['value']
|
||||
if match_string(child.tag,'param') and match_string(child.attrib['name'],'local'):
|
||||
chapter_path = child.attrib['value']
|
||||
if chapter_title is not None and chapter_path is not None:
|
||||
chapter = [chapter_title, chapter_path, depth]
|
||||
chapters.append(chapter)
|
||||
if node.tag=="UL":
|
||||
depth = depth + 1
|
||||
if node.tag=="/UL":
|
||||
depth = depth - 1
|
||||
|
||||
def _process_nodes(self, root):
|
||||
chapters = []
|
||||
depth = 0
|
||||
for node in root.iter():
|
||||
self._visit_node(node, chapters, depth)
|
||||
return chapters
|
||||
|
||||
def _add_item(self, oeb, title, path):
|
||||
bname = os.path.basename(path)
|
||||
id, href = oeb.manifest.generate(id='html',
|
||||
href=ascii_filename(bname))
|
||||
item = oeb.manifest.add(id, href, 'text/html')
|
||||
item.html_input_href = bname
|
||||
oeb.spine.add(item, True)
|
||||
oeb.toc.add(title, item.href)
|
||||
|
@ -107,7 +107,13 @@ class MetadataUpdater(object):
|
||||
self.cover_record = self.thumbnail_record = None
|
||||
self.timestamp = None
|
||||
self.pdbrecords = self.get_pdbrecords()
|
||||
self.drm_block = self.fetchDRMdata()
|
||||
|
||||
self.drm_block = None
|
||||
if self.encryption_type != 0:
|
||||
if self.have_exth:
|
||||
self.drm_block = self.fetchDRMdata()
|
||||
else:
|
||||
raise MobiError('Unable to set metadata on DRM file without EXTH header')
|
||||
|
||||
self.original_exth_records = {}
|
||||
if not have_exth:
|
||||
@ -117,38 +123,14 @@ class MetadataUpdater(object):
|
||||
self.fetchEXTHFields()
|
||||
|
||||
def fetchDRMdata(self):
|
||||
''' Grab everything between end of EXTH and title '''
|
||||
'''
|
||||
if False and self.have_exth:
|
||||
print "incoming file has EXTH header"
|
||||
# 20:24 = mobiHeaderLength, 16=PDBHeader size, 4 = len('EXTH')
|
||||
exth_off = int(unpack('>I', self.record0[20:24])[0] + 16)
|
||||
print "exth_off = 0x%x" % exth_off
|
||||
exth_len_offset = exth_off + 4
|
||||
print "exth_len_offset = 0x%x" % exth_len_offset
|
||||
exth_len = int(unpack('>I', self.record0[exth_len_offset:exth_len_offset+4])[0])
|
||||
print "len(EXTH) = 0x%x" % exth_len
|
||||
title_offset = int(unpack('>I', self.record0[0x54:0x58])[0])
|
||||
print "offset of full title = 0x%x" % title_offset
|
||||
drm_off = exth_off + exth_len
|
||||
print "DRM data begins at 0x%x" % drm_off
|
||||
print "DRM len is 0x%x bytes" % (title_offset - drm_off)
|
||||
return self.record0[drm_off:drm_off + (title_offset - drm_off)]
|
||||
else:
|
||||
'''
|
||||
if True:
|
||||
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
|
||||
self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
|
||||
drm_string = ''
|
||||
for x in range(self.drm_key_count):
|
||||
base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
|
||||
drm_string += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
|
||||
return drm_string
|
||||
else:
|
||||
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
|
||||
title_offset = int(unpack('>I', self.record0[0x54:0x58])[0])
|
||||
drm_blocklen = title_offset - drm_offset
|
||||
return self.record0[drm_offset:drm_offset + drm_blocklen]
|
||||
''' Fetch the DRM keys '''
|
||||
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
|
||||
self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
|
||||
drm_keys = ''
|
||||
for x in range(self.drm_key_count):
|
||||
base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
|
||||
drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
|
||||
return drm_keys
|
||||
|
||||
def fetchEXTHFields(self):
|
||||
stream = self.stream
|
||||
@ -224,7 +206,8 @@ class MetadataUpdater(object):
|
||||
|
||||
def create_exth(self, new_title=None, exth=None):
|
||||
# Add an EXTH block to record 0, rewrite the stream
|
||||
# self.hexdump(self.record0)
|
||||
if isinstance(new_title, unicode):
|
||||
new_title = new_title.encode(self.codec, 'replace')
|
||||
|
||||
# Fetch the existing title
|
||||
title_offset, = unpack('>L', self.record0[0x54:0x58])
|
||||
@ -248,12 +231,13 @@ class MetadataUpdater(object):
|
||||
exth = ['EXTH', pack('>II', 12, 0), pad]
|
||||
exth = ''.join(exth)
|
||||
|
||||
# Update drm_offset
|
||||
self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
||||
if True:
|
||||
# Update drm_offset(0xa8), title_offset(0x54)
|
||||
if self.encryption_type != 0:
|
||||
self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
||||
self.record0[0xb0:0xb4] = pack('>L', len(self.drm_block))
|
||||
# Update title_offset
|
||||
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block))
|
||||
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block))
|
||||
else:
|
||||
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
||||
|
||||
if new_title:
|
||||
self.record0[0x58:0x5c] = pack('>L', len(new_title))
|
||||
@ -262,21 +246,15 @@ class MetadataUpdater(object):
|
||||
new_record0 = StringIO()
|
||||
new_record0.write(self.record0[:0x10 + mobi_header_length])
|
||||
new_record0.write(exth)
|
||||
new_record0.write(self.drm_block)
|
||||
if new_title:
|
||||
#new_record0.write(new_title.encode(self.codec, 'replace'))
|
||||
new_title = (new_title or _('Unknown')).encode(self.codec, 'replace')
|
||||
new_record0.write(new_title)
|
||||
else:
|
||||
new_record0.write(title_in_file)
|
||||
if self.encryption_type != 0:
|
||||
new_record0.write(self.drm_block)
|
||||
new_record0.write(new_title if new_title else title_in_file)
|
||||
|
||||
# Pad to a 4-byte boundary
|
||||
trail = len(new_record0.getvalue()) % 4
|
||||
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
|
||||
new_record0.write(pad)
|
||||
|
||||
#self.hexdump(new_record0.getvalue())
|
||||
|
||||
# Rebuild the stream, update the pdbrecords pointers
|
||||
self.patchSection(0,new_record0.getvalue())
|
||||
|
||||
@ -386,11 +364,7 @@ class MetadataUpdater(object):
|
||||
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
||||
pop_exth_record(202)
|
||||
|
||||
if getattr(self, 'encryption_type', -1) != 0:
|
||||
prints(u"Setting metadata for '%s' (DRM)" % mi.title)
|
||||
# raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
|
||||
|
||||
# Restore any original EXTH fields that weren't modified/updated
|
||||
# Restore any original EXTH fields that weren't updated
|
||||
for id in sorted(self.original_exth_records):
|
||||
recs.append((id, self.original_exth_records[id]))
|
||||
recs = sorted(recs, key=lambda x:(x[0],x[0]))
|
||||
|
@ -1376,7 +1376,7 @@ class MobiWriter(object):
|
||||
self._text_length,
|
||||
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
|
||||
uid = random.randint(0, 0xffffffff)
|
||||
title = str(metadata.title[0])
|
||||
title = unicode(metadata.title[0]).encode('utf-8')
|
||||
# The MOBI Header
|
||||
|
||||
# 0x0 - 0x3
|
||||
|
@ -1,4 +1,4 @@
|
||||
import datetime, htmlentitydefs, os, re, shutil, time
|
||||
import datetime, htmlentitydefs, os, re, shutil
|
||||
|
||||
from collections import namedtuple
|
||||
from copy import deepcopy
|
||||
@ -21,7 +21,7 @@ FIELDS = ['all', 'author_sort', 'authors', 'comments',
|
||||
class CSV_XML(CatalogPlugin):
|
||||
'CSV/XML catalog generator'
|
||||
|
||||
Option = namedtuple('Option', 'option, default, dest, help')
|
||||
Option = namedtuple('Option', 'option, default, dest, action, help')
|
||||
|
||||
name = 'Catalog_CSV_XML'
|
||||
description = 'CSV/XML catalog generator'
|
||||
@ -34,6 +34,7 @@ class CSV_XML(CatalogPlugin):
|
||||
Option('--fields',
|
||||
default = 'all',
|
||||
dest = 'fields',
|
||||
action = None,
|
||||
help = _('The fields to output when cataloging books in the '
|
||||
'database. Should be a comma-separated list of fields.\n'
|
||||
'Available fields: %s.\n'
|
||||
@ -43,6 +44,7 @@ class CSV_XML(CatalogPlugin):
|
||||
Option('--sort-by',
|
||||
default = 'id',
|
||||
dest = 'sort_by',
|
||||
action = None,
|
||||
help = _('Output field to sort on.\n'
|
||||
'Available fields: author_sort, id, rating, size, timestamp, title.\n'
|
||||
"Default: '%default'\n"
|
||||
@ -241,7 +243,7 @@ class CSV_XML(CatalogPlugin):
|
||||
class EPUB_MOBI(CatalogPlugin):
|
||||
'ePub catalog generator'
|
||||
|
||||
Option = namedtuple('Option', 'option, default, dest, help')
|
||||
Option = namedtuple('Option', 'option, default, dest, action, help')
|
||||
|
||||
name = 'Catalog_EPUB_MOBI'
|
||||
description = 'EPUB/MOBI catalog generator'
|
||||
@ -254,12 +256,14 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
cli_options = [Option('--catalog-title',
|
||||
default = 'My Books',
|
||||
dest = 'catalog_title',
|
||||
action = None,
|
||||
help = _('Title of generated catalog used as title in metadata.\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
Option('--debug-pipeline',
|
||||
default=None,
|
||||
dest='debug_pipeline',
|
||||
action = None,
|
||||
help=_("Save the output from different stages of the conversion "
|
||||
"pipeline to the specified "
|
||||
"directory. Useful if you are unsure at which stage "
|
||||
@ -269,48 +273,56 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
Option('--exclude-genre',
|
||||
default='\[[\w ]*\]',
|
||||
dest='exclude_genre',
|
||||
action = None,
|
||||
help=_("Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[<tag>]'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
Option('--exclude-tags',
|
||||
default=('~,'+_('Catalog')),
|
||||
dest='exclude_tags',
|
||||
action = None,
|
||||
help=_("Comma-separated list of tag words indicating book should be excluded from output. Case-insensitive.\n"
|
||||
"--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
Option('--generate-titles',
|
||||
default=True,
|
||||
default=False,
|
||||
dest='generate_titles',
|
||||
action = 'store_true',
|
||||
help=_("Include 'Titles' section in catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
Option('--generate-recently-added',
|
||||
default=True,
|
||||
default=False,
|
||||
dest='generate_recently_added',
|
||||
action = 'store_true',
|
||||
help=_("Include 'Recently Added' section in catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
Option('--note-tag',
|
||||
default='*',
|
||||
dest='note_tag',
|
||||
action = None,
|
||||
help=_("Tag prefix for user notes, e.g. '*Jeff might enjoy reading this'.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
Option('--numbers-as-text',
|
||||
default=False,
|
||||
dest='numbers_as_text',
|
||||
action = None,
|
||||
help=_("Sort titles with leading numbers as text, e.g.,\n'2001: A Space Odyssey' sorts as \n'Two Thousand One: A Space Odyssey'.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
Option('--output-profile',
|
||||
default=None,
|
||||
dest='output_profile',
|
||||
action = None,
|
||||
help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
Option('--read-tag',
|
||||
default='+',
|
||||
dest='read_tag',
|
||||
action = None,
|
||||
help=_("Tag indicating book has been read.\n" "Default: '%default'\n"
|
||||
"Applies to: ePub, MOBI output formats")),
|
||||
]
|
||||
@ -3418,13 +3430,12 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
def run(self, path_to_output, opts, db, notification=DummyReporter()):
|
||||
opts.log = log = Log()
|
||||
opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
|
||||
self.opts = opts
|
||||
|
||||
# Add local options
|
||||
opts.creator = "calibre"
|
||||
|
||||
# Finalize output_profile
|
||||
op = self.opts.output_profile
|
||||
op = opts.output_profile
|
||||
if op is None:
|
||||
op = 'default'
|
||||
if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower():
|
||||
@ -3434,13 +3445,30 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
op = "kindle"
|
||||
opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 100
|
||||
opts.authorClip = 100 if op.endswith('dx') or 'kindle' not in op else 60
|
||||
self.opts.output_profile = op
|
||||
opts.output_profile = op
|
||||
|
||||
opts.basename = "Catalog"
|
||||
opts.cli_environment = not hasattr(opts,'sync')
|
||||
# GwR *** hardwired to sort by author, could be an option if passed in opts
|
||||
opts.sort_descriptions_by_author = True
|
||||
|
||||
# If exclude_genre is blank, assume user wants all genre tags included
|
||||
if opts.exclude_genre.strip() == '':
|
||||
opts.exclude_genre = '\[^.\]'
|
||||
log(" converting empty exclude_genre to '\[^.\]'")
|
||||
|
||||
if opts.connected_device['name']:
|
||||
if opts.connected_device['serial']:
|
||||
log(" connected_device: '%s' #%s%s " % \
|
||||
(opts.connected_device['name'],
|
||||
opts.connected_device['serial'][0:4],
|
||||
'x' * (len(opts.connected_device['serial']) - 4)))
|
||||
else:
|
||||
log(" connected_device: '%s'" % opts.connected_device['name'])
|
||||
for storage in opts.connected_device['storage']:
|
||||
if storage:
|
||||
log(" mount point: %s" % storage)
|
||||
|
||||
if opts.verbose:
|
||||
opts_dict = vars(opts)
|
||||
log(u"%s(): Generating %s %sin %s environment" %
|
||||
@ -3458,26 +3486,6 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
sections_list.append('Genres')
|
||||
log(u"Creating Sections for %s" % ', '.join(sections_list))
|
||||
|
||||
# If exclude_genre is blank, assume user wants all genre tags included
|
||||
if opts.exclude_genre.strip() == '':
|
||||
opts.exclude_genre = '\[^.\]'
|
||||
log(" converting empty exclude_genre to '\[^.\]'")
|
||||
|
||||
if opts.connected_device['name']:
|
||||
if opts.connected_device['serial']:
|
||||
log(" connected_device: '%s' #%s%s " % \
|
||||
(opts.connected_device['name'],
|
||||
opts.connected_device['serial'][0:4],
|
||||
'x' * (len(opts.connected_device['serial']) - 4)))
|
||||
else:
|
||||
log(" connected_device: '%s'" % opts.connected_device['name'])
|
||||
|
||||
for storage in opts.connected_device['storage']:
|
||||
if storage:
|
||||
log(" mount point: %s" % storage)
|
||||
# for book in opts.connected_device['books']:
|
||||
# log("%s: %s" % (book.title, book.path))
|
||||
|
||||
# Display opts
|
||||
keys = opts_dict.keys()
|
||||
keys.sort()
|
||||
@ -3488,6 +3496,8 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
'search_text','sort_by','sort_descriptions_by_author','sync']:
|
||||
log(" %s: %s" % (key, opts_dict[key]))
|
||||
|
||||
self.opts = opts
|
||||
|
||||
# Launch the Catalog builder
|
||||
catalog = self.CatalogBuilder(db, opts, self, report_progress=notification)
|
||||
if opts.verbose:
|
||||
|
@ -587,9 +587,6 @@ def command_export(args, dbpath):
|
||||
do_export(get_db(dbpath, opts), ids, dir, opts)
|
||||
return 0
|
||||
|
||||
|
||||
# GR additions
|
||||
|
||||
def catalog_option_parser(args):
|
||||
from calibre.customize.ui import available_catalog_formats, plugin_for_catalog_format
|
||||
from calibre.utils.logging import Log
|
||||
@ -599,10 +596,17 @@ def catalog_option_parser(args):
|
||||
# Fetch the extension-specific CLI options from the plugin
|
||||
plugin = plugin_for_catalog_format(fmt)
|
||||
for option in plugin.cli_options:
|
||||
parser.add_option(option.option,
|
||||
default=option.default,
|
||||
dest=option.dest,
|
||||
help=option.help)
|
||||
if option.action:
|
||||
parser.add_option(option.option,
|
||||
default=option.default,
|
||||
dest=option.dest,
|
||||
action=option.action,
|
||||
help=option.help)
|
||||
else:
|
||||
parser.add_option(option.option,
|
||||
default=option.default,
|
||||
dest=option.dest,
|
||||
help=option.help)
|
||||
|
||||
return plugin
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user