diff --git a/resources/images/news/wired_uk.png b/resources/images/news/wired_uk.png new file mode 100644 index 0000000000..c807e36d1f Binary files /dev/null and b/resources/images/news/wired_uk.png differ diff --git a/resources/kathemerini.recipe b/resources/kathemerini.recipe new file mode 100644 index 0000000000..b68a35d0a8 --- /dev/null +++ b/resources/kathemerini.recipe @@ -0,0 +1,37 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe + +class Kathimerini(BasicNewsRecipe): + title = 'Kathimerini' + __author__ = 'Pan' + description = 'News from Greece' + max_articles_per_feed = 100 + oldest_article = 100 + publisher = 'Kathimerini' + category = 'news, GR' + language = 'el' + no_stylesheets = True + remove_tags_before = dict(name='td',attrs={'class':'news'}) + remove_tags_after = dict(name='td',attrs={'class':'news'}) + remove_attributes = ['width', 'src','header','footer'] + + feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', + 'http://wk.kathimerini.gr/xml_files/politics.xml'), + (u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1', + ' http://wk.kathimerini.gr/xml_files/ell.xml'), + (u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2', + ' http://wk.kathimerini.gr/xml_files/world.xml'), + (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', + 'http://wk.kathimerini.gr/xml_files/economy_1.xml'), + (u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2', + 'http://wk.kathimerini.gr/xml_files/economy_2.xml'), + (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', + 'http://wk.kathimerini.gr/xml_files/economy_3.xml'), + (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', + 'http://wk.kathimerini.gr/xml_files/civ.xml'), + (u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2', + 'http://wk.kathimerini.gr/xml_files/st.xml')] + + def print_version(self, url): + return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/') + + diff --git a/resources/recipes/wired_uk.recipe b/resources/recipes/wired_uk.recipe new file mode 100644 index 0000000000..4c682feef2 --- /dev/null +++ b/resources/recipes/wired_uk.recipe @@ -0,0 +1,74 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +www.wired.co.uk +''' + +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class Wired_UK(BasicNewsRecipe): + title = 'Wired Magazine - UK edition' + __author__ = 'Darko Miletic' + description = 'Gaming news' + publisher = 'Conde Nast Digital' + category = 'news, games, IT, gadgets' + oldest_article = 32 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif' + language = 'en_GB' + extra_css = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} ' + index = 'http://www.wired.co.uk/wired-magazine.aspx' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags = [dict(name='div', attrs={'class':'article-box'})] + remove_tags = [ + dict(name=['object','embed','iframe','link']) + ,dict(attrs={'class':['opts','comment','stories']}) + ] + remove_tags_after = dict(name='div',attrs={'class':'stories'}) + remove_attributes = ['height','width'] + + + def parse_index(self): + totalfeeds = [] + soup = self.index_to_soup(self.index) + maincontent = soup.find('div',attrs={'class':'main-content'}) + mfeed = [] + if maincontent: + st = maincontent.find(attrs={'class':'most-wired-box'}) + if st: + for itt in st.findAll('a',href=True): + url = 'http://www.wired.co.uk' + itt['href'] + title = self.tag_to_string(itt) + description = '' + date = strftime(self.timefmt) + mfeed.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + totalfeeds.append(('Articles', mfeed)) + return totalfeeds + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.index) + cover_item = soup.find('span', attrs={'class':'cover'}) + if cover_item: + cover_url = cover_item.img['src'] + return cover_url + + def print_version(self, url): + return url + '?page=all' diff --git a/resources/tanea.recipe b/resources/tanea.recipe new file mode 100644 index 0000000000..4e6cd09c3f --- /dev/null +++ b/resources/tanea.recipe @@ -0,0 +1,30 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe + +class TaNea(BasicNewsRecipe): + title = u'Ta Nea' + __author__ = 'Pan' + oldest_article = 1 + max_articles_per_feed = 100 + no_stylesheets = True + + remove_tags_before = dict(name='div',attrs={'id':'print-body'}) + remove_tags_after = dict(name='div',attrs={'id':'text'}) + + feeds = [ + (u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1', + u'http://www.tanea.gr/default.asp?pid=66&la=1'), + (u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82', + u'http://www.tanea.gr/default.asp?pid=67&la=1'), + (u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1', + u'http://www.tanea.gr/default.asp?pid=68&la=1'), + (u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82', + u'http://www.tanea.gr/default.asp?pid=69&la=1'), + (u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82', + u'http://www.tanea.gr/default.asp?pid=79&la=1'), + (u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82', + u'http://www.tanea.gr/default.asp?pid=80&la=1'), + (u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82', + u'http://www.tanea.gr/default.asp?pid=81&la=1')] + + def print_version(self, url): + return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96') diff --git a/src/calibre/customize/__init__.py b/src/calibre/customize/__init__.py index 42b379fa0b..84cc3d04ee 100644 --- a/src/calibre/customize/__init__.py +++ b/src/calibre/customize/__init__.py @@ -249,6 +249,7 @@ class CatalogPlugin(Plugin): #: dest = 'catalog_title', #: help = (_('Title of generated catalog. \nDefault:') + " '" + #: '%default' + "'"))] + #: cli_options parsed in library.cli:catalog_option_parser() cli_options = [] @@ -275,9 +276,10 @@ class CatalogPlugin(Plugin): def get_output_fields(self, opts): # Return a list of requested fields, with opts.sort_by first all_fields = set( - ['author_sort','authors','comments','cover','formats', 'id','isbn','pubdate','publisher','rating', - 'series_index','series','size','tags','timestamp', - 'title','uuid']) + ['author_sort','authors','comments','cover','formats', + 'id','isbn','pubdate','publisher','rating', + 'series_index','series','size','tags','timestamp', + 'title','uuid']) fields = all_fields if opts.fields != 'all': diff --git a/src/calibre/ebooks/chm/__init__.py b/src/calibre/ebooks/chm/__init__.py new file mode 100644 index 0000000000..d7d77da4b6 --- /dev/null +++ b/src/calibre/ebooks/chm/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +Used for chm input +''' diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/chm/input.py new file mode 100644 index 0000000000..ecb54dffdb --- /dev/null +++ b/src/calibre/ebooks/chm/input.py @@ -0,0 +1,384 @@ +from __future__ import with_statement +''' CHM File decoding support ''' +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ,' \ + ' and Alex Bramley .' + +import os, shutil, uuid +from tempfile import mkdtemp +from mimetypes import guess_type as guess_mimetype + +from BeautifulSoup import BeautifulSoup +from lxml import html +from pychm.chm import CHMFile +from pychm.chmlib import ( + CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL, + chm_enumerate, +) + +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation +from calibre.utils.config import OptionParser +from calibre.ebooks.metadata.toc import TOC +from calibre.utils.localization import get_lang +from calibre.utils.filenames import ascii_filename + + +def match_string(s1, s2_already_lowered): + if s1 is not None and s2_already_lowered is not None: + if s1.lower()==s2_already_lowered: + return True + return False + +def option_parser(): + parser = OptionParser(usage=_('%prog [options] mybook.chm')) + parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output') + parser.add_option('--verbose', default=False, action='store_true', dest='verbose') + parser.add_option("-t", "--title", action="store", type="string", \ + dest="title", help=_("Set the book title")) + parser.add_option('--title-sort', action='store', type='string', default=None, + dest='title_sort', help=_('Set sort key for the title')) + parser.add_option("-a", "--author", action="store", type="string", \ + dest="author", help=_("Set the author")) + parser.add_option('--author-sort', action='store', type='string', default=None, + dest='author_sort', help=_('Set sort key for the author')) + parser.add_option("-c", "--category", action="store", type="string", \ + dest="category", help=_("The category this book belongs" + " to. E.g.: History")) + parser.add_option("--thumbnail", action="store", type="string", \ + dest="thumbnail", help=_("Path to a graphic that will be" + " set as this files' thumbnail")) + parser.add_option("--comment", action="store", type="string", \ + dest="freetext", help=_("Path to a txt file containing a comment.")) + parser.add_option("--get-thumbnail", action="store_true", \ + dest="get_thumbnail", default=False, \ + help=_("Extract thumbnail from LRF file")) + parser.add_option('--publisher', default=None, help=_('Set the publisher')) + parser.add_option('--classification', default=None, help=_('Set the book classification')) + parser.add_option('--creator', default=None, help=_('Set the book creator')) + parser.add_option('--producer', default=None, help=_('Set the book producer')) + parser.add_option('--get-cover', action='store_true', default=False, + help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.')) + parser.add_option('--bookid', action='store', type='string', default=None, + dest='book_id', help=_('Set book ID')) + parser.add_option('--font-delta', action='store', type='int', default=0, + dest='font_delta', help=_('Set font delta')) + return parser + +class CHMError(Exception): + pass + +class CHMReader(CHMFile): + def __init__(self, input, log): + CHMFile.__init__(self) + if not self.LoadCHM(input): + raise CHMError("Unable to open CHM file '%s'"%(input,)) + self.log = log + self._sourcechm = input + self._contents = None + self._playorder = 0 + self._metadata = False + self._extracted = False + + # location of '.hhc' file, which is the CHM TOC. + self.root, ext = os.path.splitext(self.topics.lstrip('/')) + self.hhc_path = self.root + ".hhc" + + + def _parse_toc(self, ul, basedir=os.getcwdu()): + toc = TOC(play_order=self._playorder, base_path=basedir, text='') + self._playorder += 1 + for li in ul('li', recursive=False): + href = li.object('param', {'name': 'Local'})[0]['value'] + if href.count('#'): + href, frag = href.split('#') + else: + frag = None + name = self._deentity(li.object('param', {'name': 'Name'})[0]['value']) + #print "========>", name + toc.add_item(href, frag, name, play_order=self._playorder) + self._playorder += 1 + if li.ul: + child = self._parse_toc(li.ul) + child.parent = toc + toc.append(child) + #print toc + return toc + + + def GetFile(self, path): + # have to have abs paths for ResolveObject, but Contents() deliberately + # makes them relative. So we don't have to worry, re-add the leading /. + # note this path refers to the internal CHM structure + if path[0] != '/': + path = '/' + path + res, ui = self.ResolveObject(path) + if res != CHM_RESOLVE_SUCCESS: + raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename)) + size, data = self.RetrieveObject(ui) + if size == 0: + raise CHMError("'%s' is zero bytes in length!"%(path,)) + return data + + def ExtractFiles(self, output_dir=os.getcwdu()): + for path in self.Contents(): + lpath = os.path.join(output_dir, path) + self._ensure_dir(lpath) + data = self.GetFile(path) + with open(lpath, 'wb') as f: + if guess_mimetype(path)[0] == ('text/html'): + data = self._reformat(data) + f.write(data) + #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir]) + self._extracted = True + + def _reformat(self, data): + try: + soup = BeautifulSoup(data) + except UnicodeEncodeError: + # hit some strange encoding problems... + print "Unable to parse html for cleaning, leaving it :(" + return data + # nuke javascript... + [s.extract() for s in soup('script')] + # remove forward and back nav bars from the top/bottom of each page + # cos they really fuck with the flow of things and generally waste space + # since we can't use [a,b] syntax to select arbitrary items from a list + # we'll have to do this manually... + t = soup('table') + if t: + if (t[0].previousSibling is None + or t[0].previousSibling.previousSibling is None): + t[0].extract() + if (t[-1].nextSibling is None + or t[-1].nextSibling.nextSibling is None): + t[-1].extract() + # for some very odd reason each page's content appears to be in a table + # too. and this table has sub-tables for random asides... grr. + + # some images seem to be broken in some chm's :/ + for img in soup('img'): + try: + # some are supposedly "relative"... lies. + while img['src'].startswith('../'): img['src'] = img['src'][3:] + # some have ";" at the end. + img['src'] = img['src'].split(';')[0] + except KeyError: + # and some don't even have a src= ?! + pass + # now give back some pretty html. + return soup.prettify() + + def Contents(self): + if self._contents is not None: + return self._contents + paths = [] + def get_paths(chm, ui, ctx): + # skip directories + # note this path refers to the internal CHM structure + if ui.path[-1] != '/': + # and make paths relative + paths.append(ui.path.lstrip('/')) + chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None) + self._contents = paths + return self._contents + + def _ensure_dir(self, path): + dir = os.path.dirname(path) + if not os.path.isdir(dir): + os.makedirs(dir) + + def extract_content(self, output_dir=os.getcwdu()): + self.ExtractFiles(output_dir=output_dir) + + +class CHMInput(InputFormatPlugin): + + name = 'CHM Input' + author = 'Kovid Goyal and Alex Bramley' + description = 'Convert CHM files to OEB' + file_types = set(['chm']) + + options = set([ + OptionRecommendation(name='dummy_option', recommended_value=False, + help=_('dummy option until real options are determined.')), + ]) + + def _chmtohtml(self, output_dir, chm_path, no_images, log): + log.debug('Opening CHM file') + rdr = CHMReader(chm_path, log) + log.debug('Extracting CHM to %s' % output_dir) + rdr.extract_content(output_dir) + return rdr.hhc_path + + + def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.metadata.chm import get_metadata_ + + log.debug('Processing CHM...') + tdir = mkdtemp(prefix='chm2oeb_') + from calibre.customize.ui import plugin_for_input_format + html_input = plugin_for_input_format('html') + for opt in html_input.options: + setattr(options, opt.option.name, opt.recommended_value) + options.input_encoding = 'utf-8' + no_images = False #options.no_images + chm_name = stream.name + #chm_data = stream.read() + + #closing stream so CHM can be opened by external library + stream.close() + log.debug('tdir=%s' % tdir) + log.debug('stream.name=%s' % stream.name) + mainname = self._chmtohtml(tdir, chm_name, no_images, log) + mainpath = os.path.join(tdir, mainname) + + metadata = get_metadata_(tdir) + + odi = options.debug_pipeline + options.debug_pipeline = None + # try a custom conversion: + #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata) + # try using html converter: + htmlpath = self._create_html_root(mainpath, log) + oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata) + options.debug_pipeline = odi + #log.debug('DEBUG: Not removing tempdir %s' % tdir) + shutil.rmtree(tdir) + return oeb + + def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi): + # use HTMLInput plugin to generate book + from calibre.ebooks.html.input import HTMLInput + opts.breadth_first = True + htmlinput = HTMLInput(None) + oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi) + return oeb + + + def _create_oebbook(self, hhcpath, basedir, opts, log, mi): + from calibre.ebooks.conversion.plumber import create_oebbook + from calibre.ebooks.oeb.base import DirContainer + oeb = create_oebbook(log, None, opts, self, + encoding=opts.input_encoding, populate=False) + self.oeb = oeb + + metadata = oeb.metadata + if mi.title: + metadata.add('title', mi.title) + if mi.authors: + for a in mi.authors: + metadata.add('creator', a, attrib={'role':'aut'}) + if mi.publisher: + metadata.add('publisher', mi.publisher) + if mi.isbn: + metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'}) + if not metadata.language: + oeb.logger.warn(u'Language not specified') + metadata.add('language', get_lang()) + if not metadata.creator: + oeb.logger.warn('Creator not specified') + metadata.add('creator', _('Unknown')) + if not metadata.title: + oeb.logger.warn('Title not specified') + metadata.add('title', _('Unknown')) + + bookid = str(uuid.uuid4()) + metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') + for ident in metadata.identifier: + if 'id' in ident.attrib: + self.oeb.uid = metadata.identifier[0] + break + + hhcdata = self._read_file(hhcpath) + hhcroot = html.fromstring(hhcdata) + chapters = self._process_nodes(hhcroot) + #print "=============================" + #print "Printing hhcroot" + #print etree.tostring(hhcroot, pretty_print=True) + #print "=============================" + log.debug('Found %d section nodes' % len(chapters)) + + if len(chapters) > 0: + path0 = chapters[0][1] + subpath = os.path.dirname(path0) + htmlpath = os.path.join(basedir, subpath) + + oeb.container = DirContainer(htmlpath, log) + for chapter in chapters: + title = chapter[0] + basename = os.path.basename(chapter[1]) + self._add_item(oeb, title, basename) + + oeb.container = DirContainer(htmlpath, oeb.log) + return oeb + + def _create_html_root(self, hhcpath, log): + hhcdata = self._read_file(hhcpath) + hhcroot = html.fromstring(hhcdata) + chapters = self._process_nodes(hhcroot) + #print "=============================" + #print "Printing hhcroot" + #print etree.tostring(hhcroot, pretty_print=True) + #print "=============================" + log.debug('Found %d section nodes' % len(chapters)) + htmlpath = os.path.splitext(hhcpath)[0] + ".html" + f = open(htmlpath, 'wb') + f.write("\r\n") + + if chapters: + path0 = chapters[0][1] + subpath = os.path.dirname(path0) + + for chapter in chapters: + title = chapter[0] + rsrcname = os.path.basename(chapter[1]) + rsrcpath = os.path.join(subpath, rsrcname) + # title should already be url encoded + url = "
" + title + " \r\n" + f.write(url) + + f.write("") + f.close() + return htmlpath + + + def _read_file(self, name): + f = open(name, 'rb') + data = f.read() + f.close() + return data + + def _visit_node(self, node, chapters, depth): + # check that node is a normal node (not a comment, DOCTYPE, etc.) + # (normal nodes have string tags) + if isinstance(node.tag, basestring): + if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'): + for child in node: + if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'): + chapter_title = child.attrib['value'] + if match_string(child.tag,'param') and match_string(child.attrib['name'],'local'): + chapter_path = child.attrib['value'] + if chapter_title is not None and chapter_path is not None: + chapter = [chapter_title, chapter_path, depth] + chapters.append(chapter) + if node.tag=="UL": + depth = depth + 1 + if node.tag=="/UL": + depth = depth - 1 + + def _process_nodes(self, root): + chapters = [] + depth = 0 + for node in root.iter(): + self._visit_node(node, chapters, depth) + return chapters + + def _add_item(self, oeb, title, path): + bname = os.path.basename(path) + id, href = oeb.manifest.generate(id='html', + href=ascii_filename(bname)) + item = oeb.manifest.add(id, href, 'text/html') + item.html_input_href = bname + oeb.spine.add(item, True) + oeb.toc.add(title, item.href) + diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index e0a3dfc28b..86a78b34eb 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -107,7 +107,13 @@ class MetadataUpdater(object): self.cover_record = self.thumbnail_record = None self.timestamp = None self.pdbrecords = self.get_pdbrecords() - self.drm_block = self.fetchDRMdata() + + self.drm_block = None + if self.encryption_type != 0: + if self.have_exth: + self.drm_block = self.fetchDRMdata() + else: + raise MobiError('Unable to set metadata on DRM file without EXTH header') self.original_exth_records = {} if not have_exth: @@ -117,38 +123,14 @@ class MetadataUpdater(object): self.fetchEXTHFields() def fetchDRMdata(self): - ''' Grab everything between end of EXTH and title ''' - ''' - if False and self.have_exth: - print "incoming file has EXTH header" - # 20:24 = mobiHeaderLength, 16=PDBHeader size, 4 = len('EXTH') - exth_off = int(unpack('>I', self.record0[20:24])[0] + 16) - print "exth_off = 0x%x" % exth_off - exth_len_offset = exth_off + 4 - print "exth_len_offset = 0x%x" % exth_len_offset - exth_len = int(unpack('>I', self.record0[exth_len_offset:exth_len_offset+4])[0]) - print "len(EXTH) = 0x%x" % exth_len - title_offset = int(unpack('>I', self.record0[0x54:0x58])[0]) - print "offset of full title = 0x%x" % title_offset - drm_off = exth_off + exth_len - print "DRM data begins at 0x%x" % drm_off - print "DRM len is 0x%x bytes" % (title_offset - drm_off) - return self.record0[drm_off:drm_off + (title_offset - drm_off)] - else: - ''' - if True: - drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0]) - self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0]) - drm_string = '' - for x in range(self.drm_key_count): - base_addr = drm_offset + (x * self.DRM_KEY_SIZE) - drm_string += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE] - return drm_string - else: - drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0]) - title_offset = int(unpack('>I', self.record0[0x54:0x58])[0]) - drm_blocklen = title_offset - drm_offset - return self.record0[drm_offset:drm_offset + drm_blocklen] + ''' Fetch the DRM keys ''' + drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0]) + self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0]) + drm_keys = '' + for x in range(self.drm_key_count): + base_addr = drm_offset + (x * self.DRM_KEY_SIZE) + drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE] + return drm_keys def fetchEXTHFields(self): stream = self.stream @@ -224,7 +206,8 @@ class MetadataUpdater(object): def create_exth(self, new_title=None, exth=None): # Add an EXTH block to record 0, rewrite the stream - # self.hexdump(self.record0) + if isinstance(new_title, unicode): + new_title = new_title.encode(self.codec, 'replace') # Fetch the existing title title_offset, = unpack('>L', self.record0[0x54:0x58]) @@ -248,12 +231,13 @@ class MetadataUpdater(object): exth = ['EXTH', pack('>II', 12, 0), pad] exth = ''.join(exth) - # Update drm_offset - self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth)) - if True: + # Update drm_offset(0xa8), title_offset(0x54) + if self.encryption_type != 0: + self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth)) self.record0[0xb0:0xb4] = pack('>L', len(self.drm_block)) - # Update title_offset - self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block)) + self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block)) + else: + self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth)) if new_title: self.record0[0x58:0x5c] = pack('>L', len(new_title)) @@ -262,21 +246,15 @@ class MetadataUpdater(object): new_record0 = StringIO() new_record0.write(self.record0[:0x10 + mobi_header_length]) new_record0.write(exth) - new_record0.write(self.drm_block) - if new_title: - #new_record0.write(new_title.encode(self.codec, 'replace')) - new_title = (new_title or _('Unknown')).encode(self.codec, 'replace') - new_record0.write(new_title) - else: - new_record0.write(title_in_file) + if self.encryption_type != 0: + new_record0.write(self.drm_block) + new_record0.write(new_title if new_title else title_in_file) # Pad to a 4-byte boundary trail = len(new_record0.getvalue()) % 4 pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte new_record0.write(pad) - #self.hexdump(new_record0.getvalue()) - # Rebuild the stream, update the pdbrecords pointers self.patchSection(0,new_record0.getvalue()) @@ -386,11 +364,7 @@ class MetadataUpdater(object): recs.append((202, pack('>I', self.thumbnail_rindex))) pop_exth_record(202) - if getattr(self, 'encryption_type', -1) != 0: - prints(u"Setting metadata for '%s' (DRM)" % mi.title) - # raise MobiError('Setting metadata in DRMed MOBI files is not supported.') - - # Restore any original EXTH fields that weren't modified/updated + # Restore any original EXTH fields that weren't updated for id in sorted(self.original_exth_records): recs.append((id, self.original_exth_records[id])) recs = sorted(recs, key=lambda x:(x[0],x[0])) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 8de702a617..0f26e25609 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -1376,7 +1376,7 @@ class MobiWriter(object): self._text_length, self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf) uid = random.randint(0, 0xffffffff) - title = str(metadata.title[0]) + title = unicode(metadata.title[0]).encode('utf-8') # The MOBI Header # 0x0 - 0x3 diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index ea306162ae..2032785ae9 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1,4 +1,4 @@ -import datetime, htmlentitydefs, os, re, shutil, time +import datetime, htmlentitydefs, os, re, shutil from collections import namedtuple from copy import deepcopy @@ -21,7 +21,7 @@ FIELDS = ['all', 'author_sort', 'authors', 'comments', class CSV_XML(CatalogPlugin): 'CSV/XML catalog generator' - Option = namedtuple('Option', 'option, default, dest, help') + Option = namedtuple('Option', 'option, default, dest, action, help') name = 'Catalog_CSV_XML' description = 'CSV/XML catalog generator' @@ -34,6 +34,7 @@ class CSV_XML(CatalogPlugin): Option('--fields', default = 'all', dest = 'fields', + action = None, help = _('The fields to output when cataloging books in the ' 'database. Should be a comma-separated list of fields.\n' 'Available fields: %s.\n' @@ -43,6 +44,7 @@ class CSV_XML(CatalogPlugin): Option('--sort-by', default = 'id', dest = 'sort_by', + action = None, help = _('Output field to sort on.\n' 'Available fields: author_sort, id, rating, size, timestamp, title.\n' "Default: '%default'\n" @@ -241,7 +243,7 @@ class CSV_XML(CatalogPlugin): class EPUB_MOBI(CatalogPlugin): 'ePub catalog generator' - Option = namedtuple('Option', 'option, default, dest, help') + Option = namedtuple('Option', 'option, default, dest, action, help') name = 'Catalog_EPUB_MOBI' description = 'EPUB/MOBI catalog generator' @@ -254,12 +256,14 @@ class EPUB_MOBI(CatalogPlugin): cli_options = [Option('--catalog-title', default = 'My Books', dest = 'catalog_title', + action = None, help = _('Title of generated catalog used as title in metadata.\n' "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), Option('--debug-pipeline', default=None, dest='debug_pipeline', + action = None, help=_("Save the output from different stages of the conversion " "pipeline to the specified " "directory. Useful if you are unsure at which stage " @@ -269,48 +273,56 @@ class EPUB_MOBI(CatalogPlugin): Option('--exclude-genre', default='\[[\w ]*\]', dest='exclude_genre', + action = None, help=_("Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[]'\n" "Applies to: ePub, MOBI output formats")), Option('--exclude-tags', default=('~,'+_('Catalog')), dest='exclude_tags', + action = None, help=_("Comma-separated list of tag words indicating book should be excluded from output. Case-insensitive.\n" "--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), Option('--generate-titles', - default=True, + default=False, dest='generate_titles', + action = 'store_true', help=_("Include 'Titles' section in catalog.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), Option('--generate-recently-added', - default=True, + default=False, dest='generate_recently_added', + action = 'store_true', help=_("Include 'Recently Added' section in catalog.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), Option('--note-tag', default='*', dest='note_tag', + action = None, help=_("Tag prefix for user notes, e.g. '*Jeff might enjoy reading this'.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), Option('--numbers-as-text', default=False, dest='numbers_as_text', + action = None, help=_("Sort titles with leading numbers as text, e.g.,\n'2001: A Space Odyssey' sorts as \n'Two Thousand One: A Space Odyssey'.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), Option('--output-profile', default=None, dest='output_profile', + action = None, help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), Option('--read-tag', default='+', dest='read_tag', + action = None, help=_("Tag indicating book has been read.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), ] @@ -3418,13 +3430,12 @@ class EPUB_MOBI(CatalogPlugin): def run(self, path_to_output, opts, db, notification=DummyReporter()): opts.log = log = Log() opts.fmt = self.fmt = path_to_output.rpartition('.')[2] - self.opts = opts # Add local options opts.creator = "calibre" # Finalize output_profile - op = self.opts.output_profile + op = opts.output_profile if op is None: op = 'default' if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower(): @@ -3434,13 +3445,30 @@ class EPUB_MOBI(CatalogPlugin): op = "kindle" opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 100 opts.authorClip = 100 if op.endswith('dx') or 'kindle' not in op else 60 - self.opts.output_profile = op + opts.output_profile = op opts.basename = "Catalog" opts.cli_environment = not hasattr(opts,'sync') # GwR *** hardwired to sort by author, could be an option if passed in opts opts.sort_descriptions_by_author = True + # If exclude_genre is blank, assume user wants all genre tags included + if opts.exclude_genre.strip() == '': + opts.exclude_genre = '\[^.\]' + log(" converting empty exclude_genre to '\[^.\]'") + + if opts.connected_device['name']: + if opts.connected_device['serial']: + log(" connected_device: '%s' #%s%s " % \ + (opts.connected_device['name'], + opts.connected_device['serial'][0:4], + 'x' * (len(opts.connected_device['serial']) - 4))) + else: + log(" connected_device: '%s'" % opts.connected_device['name']) + for storage in opts.connected_device['storage']: + if storage: + log(" mount point: %s" % storage) + if opts.verbose: opts_dict = vars(opts) log(u"%s(): Generating %s %sin %s environment" % @@ -3458,26 +3486,6 @@ class EPUB_MOBI(CatalogPlugin): sections_list.append('Genres') log(u"Creating Sections for %s" % ', '.join(sections_list)) - # If exclude_genre is blank, assume user wants all genre tags included - if opts.exclude_genre.strip() == '': - opts.exclude_genre = '\[^.\]' - log(" converting empty exclude_genre to '\[^.\]'") - - if opts.connected_device['name']: - if opts.connected_device['serial']: - log(" connected_device: '%s' #%s%s " % \ - (opts.connected_device['name'], - opts.connected_device['serial'][0:4], - 'x' * (len(opts.connected_device['serial']) - 4))) - else: - log(" connected_device: '%s'" % opts.connected_device['name']) - - for storage in opts.connected_device['storage']: - if storage: - log(" mount point: %s" % storage) -# for book in opts.connected_device['books']: -# log("%s: %s" % (book.title, book.path)) - # Display opts keys = opts_dict.keys() keys.sort() @@ -3488,6 +3496,8 @@ class EPUB_MOBI(CatalogPlugin): 'search_text','sort_by','sort_descriptions_by_author','sync']: log(" %s: %s" % (key, opts_dict[key])) + self.opts = opts + # Launch the Catalog builder catalog = self.CatalogBuilder(db, opts, self, report_progress=notification) if opts.verbose: diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index f474590f7d..1c0c6d5808 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -587,9 +587,6 @@ def command_export(args, dbpath): do_export(get_db(dbpath, opts), ids, dir, opts) return 0 - -# GR additions - def catalog_option_parser(args): from calibre.customize.ui import available_catalog_formats, plugin_for_catalog_format from calibre.utils.logging import Log @@ -599,10 +596,17 @@ def catalog_option_parser(args): # Fetch the extension-specific CLI options from the plugin plugin = plugin_for_catalog_format(fmt) for option in plugin.cli_options: - parser.add_option(option.option, - default=option.default, - dest=option.dest, - help=option.help) + if option.action: + parser.add_option(option.option, + default=option.default, + dest=option.dest, + action=option.action, + help=option.help) + else: + parser.add_option(option.option, + default=option.default, + dest=option.dest, + help=option.help) return plugin