mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
0.8.42+, iBooks Section layout revision
This commit is contained in:
commit
ffc0b3fcc0
@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class IDGse(BasicNewsRecipe):
|
||||
title = 'IDG'
|
||||
__author__ = 'zapt0'
|
||||
__author__ = 'Stanislav Khromov'
|
||||
language = 'sv'
|
||||
description = 'IDG.se'
|
||||
oldest_article = 1
|
||||
@ -15,6 +15,9 @@ class IDGse(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'Dagens IDG-nyheter',u'http://feeds.idg.se/idg/ETkj?format=xml')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self,url):
|
||||
return url + '?articleRenderMode=print&m=print'
|
||||
|
||||
|
@ -15,7 +15,7 @@ import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class Microwave_and_RF(BasicNewsRecipe):
|
||||
class Microwaves_and_RF(BasicNewsRecipe):
|
||||
|
||||
Convert_Grayscale = False # Convert images to gray scale or not
|
||||
|
||||
@ -25,9 +25,9 @@ class Microwave_and_RF(BasicNewsRecipe):
|
||||
# Add sections that want to be included from the magazine
|
||||
include_sections = []
|
||||
|
||||
title = u'Microwave and RF'
|
||||
__author__ = 'kiavash'
|
||||
description = u'Microwave and RF Montly Magazine'
|
||||
title = u'Microwaves and RF'
|
||||
__author__ = u'kiavash'
|
||||
description = u'Microwaves and RF Montly Magazine'
|
||||
publisher = 'Penton Media, Inc.'
|
||||
publication_type = 'magazine'
|
||||
site = 'http://mwrf.com'
|
||||
@ -96,9 +96,16 @@ class Microwave_and_RF(BasicNewsRecipe):
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
# Fetches the main page of Microwave and RF
|
||||
# Fetches the main page of Microwaves and RF
|
||||
soup = self.index_to_soup(self.site)
|
||||
|
||||
# First page has the ad, Let's find the redirect address.
|
||||
url = soup.find('span', attrs={'class':'commonCopy'}).find('a').get('href')
|
||||
if url.startswith('/'):
|
||||
url = self.site + url
|
||||
|
||||
soup = self.index_to_soup(url)
|
||||
|
||||
# Searches the site for Issue ID link then returns the href address
|
||||
# pointing to the latest issue
|
||||
latest_issue = soup.find('a', attrs={'href':lambda x: x and 'IssueID' in x}).get('href')
|
||||
|
155
recipes/satmagazine.recipe
Normal file
155
recipes/satmagazine.recipe
Normal file
@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Title: SatMagazine
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
##
|
||||
## Written: Feb 2012
|
||||
## Last Edited: Mar 2012
|
||||
##
|
||||
|
||||
# Feb 2012: Initial release
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
|
||||
'''
|
||||
satmagazine.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SatMagazine(BasicNewsRecipe):
|
||||
|
||||
title = u'SatMagazine'
|
||||
description = u'North American Satellite Markets...'
|
||||
publisher = 'Satnews Publishers'
|
||||
publication_type = 'magazine'
|
||||
INDEX = 'http://www.satmagazine.com/cgi-bin/display_edition.cgi'
|
||||
__author__ = 'kiavash'
|
||||
|
||||
language = 'en'
|
||||
asciiize = True
|
||||
timeout = 120
|
||||
simultaneous_downloads = 2
|
||||
|
||||
# Flattens all the tables to make it compatible with Nook
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
keep_only_tags = [dict(name='span', attrs={'class':'story'})]
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { font-size: 175%; font-weight: bold; } \
|
||||
h2 { font-size: 150%; font-weight: bold; } \
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
# Remove the line breaks, href links and float left/right and picture width/height.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<a.*?>'), lambda h1: ''),
|
||||
(re.compile(r'</a>'), lambda h2: ''),
|
||||
(re.compile(r'float:.*?'), lambda h3: ''),
|
||||
(re.compile(r'width:.*?px'), lambda h4: ''),
|
||||
(re.compile(r'height:.*?px'), lambda h5: '')
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
article_info = []
|
||||
feeds = []
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
# Find Cover image
|
||||
cover = soup.find('img', src=True, alt='Cover Image')
|
||||
if cover is not None:
|
||||
self.cover_url = cover['src']
|
||||
self.log('Found Cover image:', self.cover_url)
|
||||
|
||||
soup = soup.find('div', attrs={'id':'middlecontent'}) # main part of the site that has the articles
|
||||
|
||||
#Find the Magazine date
|
||||
ts = soup.find('span', attrs={'class':'master_heading'}) # contains the string with the date
|
||||
ds = ' '.join(self.tag_to_string(ts).strip().split()[:2])
|
||||
self.log('Found Current Issue:', ds)
|
||||
self.timefmt = ' [%s]'%ds
|
||||
|
||||
#sections = soup.findAll('span', attrs={'class':'upper_heading'})
|
||||
|
||||
articles = soup.findAll('span', attrs={'class':'heading'})
|
||||
|
||||
descriptions = soup.findAll('span', attrs={'class':'story'})
|
||||
|
||||
title_number = 0
|
||||
|
||||
# Goes thru all the articles one by one and sort them out
|
||||
for article in articles:
|
||||
|
||||
title = self.tag_to_string(article)
|
||||
url = article.find('a').get('href')
|
||||
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
desc = self.tag_to_string(descriptions[title_number])
|
||||
#self.log('\t\t', desc)
|
||||
|
||||
article_info.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':self.timefmt})
|
||||
|
||||
title_number = title_number + 1
|
||||
|
||||
if article_info:
|
||||
feeds.append((self.title, article_info))
|
||||
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
# Finds all the images
|
||||
for figure in soup.findAll('img', attrs = {'src' : True}):
|
||||
|
||||
# if the image is an ad then remove it.
|
||||
if (figure['alt'].find('_ad_') >=0) or (figure['alt'].find('_snipe_') >=0):
|
||||
del figure['src']
|
||||
del figure['alt']
|
||||
del figure['border']
|
||||
del figure['hspace']
|
||||
del figure['vspace']
|
||||
del figure['align']
|
||||
del figure['size']
|
||||
figure.name = 'font'
|
||||
continue
|
||||
|
||||
figure['style'] = 'display:block' # adds /n before and after the image
|
||||
|
||||
# Makes the title standing out
|
||||
for title in soup.findAll('b'):
|
||||
title.name = 'h3'
|
||||
|
||||
# Removes all unrelated links
|
||||
for link in soup.findAll('a', attrs = {'href': True}):
|
||||
link.name = 'font'
|
||||
del link['href']
|
||||
del link['target']
|
||||
|
||||
return soup
|
@ -11,7 +11,7 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
title = u'Süddeutsche.de' # 2012-01-26 AGe Correct Title
|
||||
description = 'News from Germany, Access to online content' # 2012-01-26 AGe
|
||||
__author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2012-01-26
|
||||
publisher = 'Süddeutsche Zeitung' # 2012-01-26 AGe add
|
||||
publisher = u'Süddeutsche Zeitung' # 2012-01-26 AGe add
|
||||
category = 'news, politics, Germany' # 2012-01-26 AGe add
|
||||
timefmt = ' [%a, %d %b %Y]' # 2012-01-26 AGe add %a
|
||||
oldest_article = 7
|
||||
|
@ -9,10 +9,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre import strftime
|
||||
|
||||
class SueddeutcheZeitung(BasicNewsRecipe):
|
||||
title = 'Süddeutsche Zeitung'
|
||||
title = u'Süddeutsche Zeitung'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Germany. Access to paid content.'
|
||||
publisher = 'Süddeutsche Zeitung'
|
||||
publisher = u'Süddeutsche Zeitung'
|
||||
category = 'news, politics, Germany'
|
||||
no_stylesheets = True
|
||||
oldest_article = 2
|
||||
|
@ -502,3 +502,13 @@ tweak_book_prefer = 'epub'
|
||||
# negative number to increase or decrease the font size.
|
||||
change_book_details_font_size_by = 0
|
||||
|
||||
#: Compile General Program Mode templates to Python
|
||||
# Compiled general program mode templates are significantly faster than
|
||||
# interpreted templates. Setting this tweak to True causes calibre to compile
|
||||
# (in most cases) general program mode templates. Setting it to False causes
|
||||
# calibre to use the old behavior -- interpreting the templates. Set the tweak
|
||||
# to False if some compiled templates produce incorrect values.
|
||||
# Default: compile_gpm_templates = True
|
||||
# No compile: compile_gpm_templates = False
|
||||
compile_gpm_templates = True
|
||||
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 85 KiB |
@ -81,7 +81,7 @@ class ANDROID(USBMS):
|
||||
0x4e11 : [0x0100, 0x226, 0x227],
|
||||
0x4e12 : [0x0100, 0x226, 0x227],
|
||||
0x4e21 : [0x0100, 0x226, 0x227, 0x231],
|
||||
0x4e22 : [0x0100, 0x226, 0x227],
|
||||
0x4e22 : [0x0100, 0x226, 0x227, 0x231],
|
||||
0xb058 : [0x0222, 0x226, 0x227],
|
||||
0x0ff9 : [0x0226],
|
||||
0xdddd : [0x216],
|
||||
@ -194,7 +194,8 @@ class ANDROID(USBMS):
|
||||
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
|
||||
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
||||
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
|
||||
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC']
|
||||
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
|
||||
'FILE-CD_GADGET']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -7,6 +7,22 @@ import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
def run_mobi_unpack(stream, options, log, accelerators):
|
||||
from mobiunpack.mobi_unpack import Mobi8Reader
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
|
||||
wdir = PersistentTemporaryDirectory('_unpack_space')
|
||||
m8r = Mobi8Reader(stream, wdir)
|
||||
if m8r.isK8():
|
||||
epub_path = m8r.processMobi8()
|
||||
epub_input = plugin_for_input_format('epub')
|
||||
for opt in epub_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = m8r.getCodec()
|
||||
return epub_input.convert(open(epub_path,'rb'), options,
|
||||
'epub', log, accelerators)
|
||||
|
||||
class MOBIInput(InputFormatPlugin):
|
||||
|
||||
name = 'MOBI Input'
|
||||
@ -18,23 +34,12 @@ class MOBIInput(InputFormatPlugin):
|
||||
accelerators):
|
||||
|
||||
if os.environ.get('USE_MOBIUNPACK', None) is not None:
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
pos = stream.tell()
|
||||
try:
|
||||
from mobiunpack.mobi_unpack import Mobi8Reader
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
|
||||
wdir = PersistentTemporaryDirectory('_unpack_space')
|
||||
m8r = Mobi8Reader(stream, wdir)
|
||||
if m8r.isK8():
|
||||
epub_path = m8r.processMobi8()
|
||||
epub_input = plugin_for_input_format('epub')
|
||||
for opt in epub_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = m8r.getCodec()
|
||||
return epub_input.convert(open(epub_path,'rb'), options,
|
||||
'epub', log, accelerators)
|
||||
return run_mobi_unpack(stream, options, log, accelerators)
|
||||
except Exception:
|
||||
log.exception('mobi_unpack code not working')
|
||||
stream.seek(pos)
|
||||
|
||||
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
|
||||
from lxml import html
|
||||
@ -52,7 +57,7 @@ class MOBIInput(InputFormatPlugin):
|
||||
mr.extract_content(u'.', parse_cache)
|
||||
|
||||
if mr.kf8_type is not None:
|
||||
log('Found KF8 MOBI')
|
||||
log('Found KF8 MOBI of type %r'%mr.kf8_type)
|
||||
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
|
||||
return os.path.abspath(Mobi8Reader(mr, log)())
|
||||
|
||||
|
@ -18,9 +18,6 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
file_type = 'mobi'
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='rescale_images', recommended_value=False,
|
||||
help=_('Modify images to meet Palm device size limitations.')
|
||||
),
|
||||
OptionRecommendation(name='prefer_author_sort',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('When present, use author sort field as author.')
|
||||
@ -59,7 +56,16 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
help=_('Enable sharing of book content via Facebook etc. '
|
||||
' on the Kindle. WARNING: Using this feature means that '
|
||||
' the book will not auto sync its last read position '
|
||||
' on multiple devices. Complain to Amazon.'))
|
||||
' on multiple devices. Complain to Amazon.')
|
||||
),
|
||||
OptionRecommendation(name='mobi_keep_original_images',
|
||||
recommended_value=False,
|
||||
help=_('By default calibre converts all images to JPEG format '
|
||||
'in the output MOBI file. This is for maximum compatibility '
|
||||
'as some older MOBI viewers have problems with other image '
|
||||
'formats. This option tells calibre not to do this. '
|
||||
'Useful if your document contains lots of GIF/PNG images that '
|
||||
'become very large when converted to JPEG.')),
|
||||
])
|
||||
|
||||
def check_for_periodical(self):
|
||||
@ -167,12 +173,7 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
mobimlizer(oeb, opts)
|
||||
self.check_for_periodical()
|
||||
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
|
||||
from calibre.utils.config import tweaks
|
||||
if tweaks.get('new_mobi_writer', True):
|
||||
from calibre.ebooks.mobi.writer2.main import MobiWriter
|
||||
MobiWriter
|
||||
else:
|
||||
from calibre.ebooks.mobi.writer import MobiWriter
|
||||
from calibre.ebooks.mobi.writer2.main import MobiWriter
|
||||
writer = MobiWriter(opts,
|
||||
write_page_breaks_after_item=write_page_breaks_after_item)
|
||||
writer(oeb, output_path)
|
||||
|
@ -9,16 +9,19 @@ import copy, traceback
|
||||
|
||||
from calibre import prints
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.ebooks.metadata.book import SC_COPYABLE_FIELDS
|
||||
from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL
|
||||
from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS
|
||||
from calibre.ebooks.metadata.book import TOP_LEVEL_IDENTIFIERS
|
||||
from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
|
||||
from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
|
||||
SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS,
|
||||
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
|
||||
from calibre.library.field_metadata import FieldMetadata
|
||||
from calibre.utils.date import isoformat, format_date
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.formatter import TemplateFormatter
|
||||
|
||||
# Special sets used to optimize the performance of getting and setting
|
||||
# attributes on Metadata objects
|
||||
SIMPLE_GET = frozenset(STANDARD_METADATA_FIELDS - TOP_LEVEL_IDENTIFIERS)
|
||||
SIMPLE_SET = frozenset(SIMPLE_GET - {'identifiers'})
|
||||
|
||||
def human_readable(size, precision=2):
|
||||
""" Convert a size in bytes into megabytes """
|
||||
return ('%.'+str(precision)+'f'+ 'MB') % ((size/(1024.*1024.)),)
|
||||
@ -136,6 +139,8 @@ class Metadata(object):
|
||||
|
||||
def __getattribute__(self, field):
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
if field in SIMPLE_GET:
|
||||
return _data.get(field, None)
|
||||
if field in TOP_LEVEL_IDENTIFIERS:
|
||||
return _data.get('identifiers').get(field, None)
|
||||
if field == 'language':
|
||||
@ -143,8 +148,6 @@ class Metadata(object):
|
||||
return _data.get('languages', [])[0]
|
||||
except:
|
||||
return NULL_VALUES['language']
|
||||
if field in STANDARD_METADATA_FIELDS:
|
||||
return _data.get(field, None)
|
||||
try:
|
||||
return object.__getattribute__(self, field)
|
||||
except AttributeError:
|
||||
@ -173,7 +176,11 @@ class Metadata(object):
|
||||
|
||||
def __setattr__(self, field, val, extra=None):
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
if field in TOP_LEVEL_IDENTIFIERS:
|
||||
if field in SIMPLE_SET:
|
||||
if val is None:
|
||||
val = copy.copy(NULL_VALUES.get(field, None))
|
||||
_data[field] = val
|
||||
elif field in TOP_LEVEL_IDENTIFIERS:
|
||||
field, val = self._clean_identifier(field, val)
|
||||
identifiers = _data['identifiers']
|
||||
identifiers.pop(field, None)
|
||||
@ -188,10 +195,6 @@ class Metadata(object):
|
||||
if val and val.lower() != 'und':
|
||||
langs = [val]
|
||||
_data['languages'] = langs
|
||||
elif field in STANDARD_METADATA_FIELDS:
|
||||
if val is None:
|
||||
val = copy.copy(NULL_VALUES.get(field, None))
|
||||
_data[field] = val
|
||||
elif field in _data['user_metadata'].iterkeys():
|
||||
_data['user_metadata'][field]['#value#'] = val
|
||||
_data['user_metadata'][field]['#extra#'] = extra
|
||||
@ -404,9 +407,19 @@ class Metadata(object):
|
||||
'''
|
||||
if metadata is None:
|
||||
traceback.print_stack()
|
||||
else:
|
||||
for key in metadata:
|
||||
self.set_user_metadata(key, metadata[key])
|
||||
return
|
||||
|
||||
um = {}
|
||||
for key, meta in metadata.iteritems():
|
||||
m = meta.copy()
|
||||
if '#value#' not in m:
|
||||
if m['datatype'] == 'text' and m['is_multiple']:
|
||||
m['#value#'] = []
|
||||
else:
|
||||
m['#value#'] = None
|
||||
um[key] = m
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
_data['user_metadata'].update(um)
|
||||
|
||||
def set_user_metadata(self, field, metadata):
|
||||
'''
|
||||
@ -420,9 +433,11 @@ class Metadata(object):
|
||||
if metadata is None:
|
||||
traceback.print_stack()
|
||||
return
|
||||
m = {}
|
||||
for k in metadata:
|
||||
m[k] = copy.copy(metadata[k])
|
||||
m = dict(metadata)
|
||||
# Copying the elements should not be necessary. The objects referenced
|
||||
# in the dict should not change. Of course, they can be replaced.
|
||||
# for k,v in metadata.iteritems():
|
||||
# m[k] = copy.copy(v)
|
||||
if '#value#' not in m:
|
||||
if m['datatype'] == 'text' and m['is_multiple']:
|
||||
m['#value#'] = []
|
||||
@ -543,6 +558,7 @@ class Metadata(object):
|
||||
# Happens if x is not a text, is_multiple field
|
||||
# on self
|
||||
lstags = []
|
||||
self_tags = []
|
||||
ot, st = map(frozenset, (lotags, lstags))
|
||||
for t in st.intersection(ot):
|
||||
sidx = lstags.index(t)
|
||||
|
@ -9,16 +9,21 @@ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
|
||||
'Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, cStringIO
|
||||
import os, cStringIO, imghdr
|
||||
from struct import pack, unpack
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.ebooks import normalize
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
||||
from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN
|
||||
from calibre.ebooks.mobi.utils import rescale_image
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.utils.date import now as nowf
|
||||
|
||||
def is_image(ss):
|
||||
if ss is None:
|
||||
return False
|
||||
return imghdr.what(None, ss[:200]) is not None
|
||||
|
||||
class StreamSlicer(object):
|
||||
|
||||
def __init__(self, stream, start=0, stop=None):
|
||||
@ -161,11 +166,10 @@ class MetadataUpdater(object):
|
||||
if id == 106:
|
||||
self.timestamp = content
|
||||
elif id == 201:
|
||||
rindex, = self.cover_rindex, = unpack('>i', content)
|
||||
if rindex > 0 :
|
||||
self.cover_record = self.record(rindex + image_base)
|
||||
rindex, = self.cover_rindex, = unpack('>I', content)
|
||||
self.cover_record = self.record(rindex + image_base)
|
||||
elif id == 202:
|
||||
rindex, = self.thumbnail_rindex, = unpack('>i', content)
|
||||
rindex, = self.thumbnail_rindex, = unpack('>I', content)
|
||||
if rindex > 0 :
|
||||
self.thumbnail_record = self.record(rindex + image_base)
|
||||
|
||||
@ -416,17 +420,17 @@ class MetadataUpdater(object):
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
if self.cover_record is not None:
|
||||
if is_image(self.cover_record):
|
||||
size = len(self.cover_record)
|
||||
cover = rescale_image(data, size)
|
||||
if len(cover) <= size:
|
||||
cover += '\0' * (size - len(cover))
|
||||
cover += b'\0' * (size - len(cover))
|
||||
self.cover_record[:] = cover
|
||||
if self.thumbnail_record is not None:
|
||||
if is_image(self.thumbnail_record):
|
||||
size = len(self.thumbnail_record)
|
||||
thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
|
||||
if len(thumbnail) <= size:
|
||||
thumbnail += '\0' * (size - len(thumbnail))
|
||||
thumbnail += b'\0' * (size - len(thumbnail))
|
||||
self.thumbnail_record[:] = thumbnail
|
||||
return
|
||||
|
||||
|
@ -1148,7 +1148,8 @@ class OPFCreator(Metadata):
|
||||
self.manifest = Manifest.from_paths(entries)
|
||||
self.manifest.set_basedir(self.base_path)
|
||||
|
||||
def create_manifest_from_files_in(self, files_and_dirs):
|
||||
def create_manifest_from_files_in(self, files_and_dirs,
|
||||
exclude=lambda x:False):
|
||||
entries = []
|
||||
|
||||
def dodir(dir):
|
||||
@ -1156,7 +1157,7 @@ class OPFCreator(Metadata):
|
||||
root, files = spec[0], spec[-1]
|
||||
for name in files:
|
||||
path = os.path.join(root, name)
|
||||
if os.path.isfile(path):
|
||||
if os.path.isfile(path) and not exclude(path):
|
||||
entries.append((path, None))
|
||||
|
||||
for i in files_and_dirs:
|
||||
|
@ -6,3 +6,8 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
class MobiError(Exception):
|
||||
pass
|
||||
|
||||
MAX_THUMB_SIZE = 16 * 1024
|
||||
MAX_THUMB_DIMEN = (180, 240)
|
||||
|
||||
|
||||
|
@ -14,8 +14,9 @@ from lxml import html
|
||||
|
||||
from calibre.utils.date import utc_tz
|
||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||
get_trailing_data, decode_tbs)
|
||||
get_trailing_data, decode_tbs, read_font_record)
|
||||
from calibre.utils.magick.draw import identify_data
|
||||
|
||||
def format_bytes(byts):
|
||||
@ -151,6 +152,10 @@ class EXTHRecord(object):
|
||||
117 : 'adult',
|
||||
118 : 'retailprice',
|
||||
119 : 'retailpricecurrency',
|
||||
121 : 'KF8 header section index',
|
||||
125 : 'KF8 resources (images/fonts) count',
|
||||
129 : 'KF8 cover URI',
|
||||
131 : 'KF8 unknown count',
|
||||
201 : 'coveroffset',
|
||||
202 : 'thumboffset',
|
||||
203 : 'hasfakecover',
|
||||
@ -169,9 +174,10 @@ class EXTHRecord(object):
|
||||
503 : 'updatedtitle',
|
||||
}.get(self.type, repr(self.type))
|
||||
|
||||
if self.name in ('coveroffset', 'thumboffset', 'hasfakecover',
|
||||
if (self.name in {'coveroffset', 'thumboffset', 'hasfakecover',
|
||||
'Creator Major Version', 'Creator Minor Version',
|
||||
'Creator Build Number', 'Creator Software', 'startreading'):
|
||||
'Creator Build Number', 'Creator Software', 'startreading'} or
|
||||
self.type in {121, 125, 131}):
|
||||
self.data, = struct.unpack(b'>I', self.data)
|
||||
|
||||
def __str__(self):
|
||||
@ -338,9 +344,9 @@ class MOBIHeader(object): # {{{
|
||||
ans.append('File version: %d'%self.file_version)
|
||||
ans.append('Reserved: %r'%self.reserved)
|
||||
ans.append('Secondary index record: %d (null val: %d)'%(
|
||||
self.secondary_index_record, 0xffffffff))
|
||||
self.secondary_index_record, NULL_INDEX))
|
||||
ans.append('Reserved2: %r'%self.reserved2)
|
||||
ans.append('First non-book record (null value: %d): %d'%(0xffffffff,
|
||||
ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX,
|
||||
self.first_non_book_record))
|
||||
ans.append('Full name offset: %d'%self.fullname_offset)
|
||||
ans.append('Full name length: %d bytes'%self.fullname_length)
|
||||
@ -379,7 +385,7 @@ class MOBIHeader(object): # {{{
|
||||
'(has indexing: %s) (has uncrossable breaks: %s)')%(
|
||||
bin(self.extra_data_flags), self.has_multibytes,
|
||||
self.has_indexing_bytes, self.has_uncrossable_breaks ))
|
||||
ans.append('Primary index record (null value: %d): %d'%(0xffffffff,
|
||||
ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
|
||||
self.primary_index_record))
|
||||
|
||||
ans = '\n'.join(ans)
|
||||
@ -1149,6 +1155,25 @@ class BinaryRecord(object): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class FontRecord(object): # {{{
|
||||
|
||||
def __init__(self, idx, record):
|
||||
self.raw = record.raw
|
||||
name = '%06d'%idx
|
||||
self.font = read_font_record(self.raw)
|
||||
if self.font['err']:
|
||||
raise ValueError('Failed to read font record: %s Headers: %s'%(
|
||||
self.font['err'], self.font['headers']))
|
||||
self.payload = (self.font['font_data'] if self.font['font_data'] else
|
||||
self.font['raw_data'])
|
||||
self.name = '%s.%s'%(name, self.font['ext'])
|
||||
|
||||
def dump(self, folder):
|
||||
with open(os.path.join(folder, self.name), 'wb') as f:
|
||||
f.write(self.payload)
|
||||
|
||||
# }}}
|
||||
|
||||
class TBSIndexing(object): # {{{
|
||||
|
||||
def __init__(self, text_records, indices, doc_type):
|
||||
@ -1382,7 +1407,7 @@ class MOBIFile(object): # {{{
|
||||
self.index_header = self.index_record = None
|
||||
self.indexing_record_nums = set()
|
||||
pir = self.mobi_header.primary_index_record
|
||||
if pir != 0xffffffff:
|
||||
if pir != NULL_INDEX:
|
||||
self.index_header = IndexHeader(self.records[pir])
|
||||
self.cncx = CNCX(self.records[
|
||||
pir+2:pir+2+self.index_header.num_of_cncx_blocks],
|
||||
@ -1393,7 +1418,7 @@ class MOBIFile(object): # {{{
|
||||
pir+2+self.index_header.num_of_cncx_blocks))
|
||||
self.secondary_index_record = self.secondary_index_header = None
|
||||
sir = self.mobi_header.secondary_index_record
|
||||
if sir != 0xffffffff:
|
||||
if sir != NULL_INDEX:
|
||||
self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
|
||||
self.indexing_record_nums.add(sir)
|
||||
self.secondary_index_record = SecondaryIndexRecord(
|
||||
@ -1404,12 +1429,13 @@ class MOBIFile(object): # {{{
|
||||
ntr = self.mobi_header.number_of_text_records
|
||||
fntbr = self.mobi_header.first_non_book_record
|
||||
fii = self.mobi_header.first_image_index
|
||||
if fntbr == 0xffffffff:
|
||||
if fntbr == NULL_INDEX:
|
||||
fntbr = len(self.records)
|
||||
self.text_records = [TextRecord(r, self.records[r],
|
||||
self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
|
||||
min(len(self.records), ntr+1))]
|
||||
self.image_records, self.binary_records = [], []
|
||||
self.font_records = []
|
||||
image_index = 0
|
||||
for i in xrange(fntbr, len(self.records)):
|
||||
if i in self.indexing_record_nums or i in self.huffman_record_nums:
|
||||
@ -1419,13 +1445,15 @@ class MOBIFile(object): # {{{
|
||||
fmt = None
|
||||
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
|
||||
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||
b'AUDI', b'VIDE'}:
|
||||
b'AUDI', b'VIDE', b'FONT'}:
|
||||
try:
|
||||
width, height, fmt = identify_data(r.raw)
|
||||
except:
|
||||
pass
|
||||
if fmt is not None:
|
||||
self.image_records.append(ImageRecord(image_index, r, fmt))
|
||||
elif r.raw[:4] == b'FONT':
|
||||
self.font_records.append(FontRecord(i, r))
|
||||
else:
|
||||
self.binary_records.append(BinaryRecord(i, r))
|
||||
|
||||
@ -1465,10 +1493,11 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
|
||||
of.write(rec.raw)
|
||||
alltext += rec.raw
|
||||
of.seek(0)
|
||||
root = html.fromstring(alltext.decode('utf-8'))
|
||||
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
|
||||
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
|
||||
include_meta_content_type=True))
|
||||
if f.mobi_header.file_version < 8:
|
||||
root = html.fromstring(alltext.decode('utf-8'))
|
||||
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
|
||||
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
|
||||
include_meta_content_type=True))
|
||||
|
||||
|
||||
if f.index_header is not None:
|
||||
@ -1490,7 +1519,7 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
|
||||
f.tbs_indexing.dump(ddir)
|
||||
|
||||
for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
|
||||
('binary', 'binary_records')]:
|
||||
('binary', 'binary_records'), ('font', 'font_records')]:
|
||||
tdir = os.path.join(ddir, tdir)
|
||||
os.mkdir(tdir)
|
||||
for rec in getattr(f, attr):
|
||||
|
@ -27,29 +27,30 @@ class EXTHHeader(object): # {{{
|
||||
self.has_fake_cover = True
|
||||
self.start_offset = None
|
||||
left = self.num_items
|
||||
self.kf8_header = None
|
||||
|
||||
while left > 0:
|
||||
left -= 1
|
||||
id, size = struct.unpack('>LL', raw[pos:pos + 8])
|
||||
idx, size = struct.unpack('>LL', raw[pos:pos + 8])
|
||||
content = raw[pos + 8:pos + size]
|
||||
pos += size
|
||||
if id >= 100 and id < 200:
|
||||
self.process_metadata(id, content, codec)
|
||||
elif id == 203:
|
||||
if idx >= 100 and idx < 200:
|
||||
self.process_metadata(idx, content, codec)
|
||||
elif idx == 203:
|
||||
self.has_fake_cover = bool(struct.unpack('>L', content)[0])
|
||||
elif id == 201:
|
||||
elif idx == 201:
|
||||
co, = struct.unpack('>L', content)
|
||||
if co < NULL_INDEX:
|
||||
self.cover_offset = co
|
||||
elif id == 202:
|
||||
elif idx == 202:
|
||||
self.thumbnail_offset, = struct.unpack('>L', content)
|
||||
elif id == 501:
|
||||
elif idx == 501:
|
||||
# cdetype
|
||||
pass
|
||||
elif id == 502:
|
||||
elif idx == 502:
|
||||
# last update time
|
||||
pass
|
||||
elif id == 503: # Long title
|
||||
elif idx == 503: # Long title
|
||||
# Amazon seems to regard this as the definitive book title
|
||||
# rather than the title from the PDB header. In fact when
|
||||
# sending MOBI files through Amazon's email service if the
|
||||
@ -60,43 +61,45 @@ class EXTHHeader(object): # {{{
|
||||
except:
|
||||
pass
|
||||
#else:
|
||||
# print 'unknown record', id, repr(content)
|
||||
# print 'unknown record', idx, repr(content)
|
||||
if title:
|
||||
self.mi.title = replace_entities(title)
|
||||
|
||||
def process_metadata(self, id, content, codec):
|
||||
if id == 100:
|
||||
if self.mi.authors == [_('Unknown')]:
|
||||
def process_metadata(self, idx, content, codec):
|
||||
if idx == 100:
|
||||
if self.mi.is_null('authors'):
|
||||
self.mi.authors = []
|
||||
au = content.decode(codec, 'ignore').strip()
|
||||
self.mi.authors.append(au)
|
||||
if re.match(r'\S+?\s*,\s+\S+', au.strip()):
|
||||
self.mi.author_sort = au.strip()
|
||||
elif id == 101:
|
||||
elif idx == 101:
|
||||
self.mi.publisher = content.decode(codec, 'ignore').strip()
|
||||
elif id == 103:
|
||||
elif idx == 103:
|
||||
self.mi.comments = content.decode(codec, 'ignore')
|
||||
elif id == 104:
|
||||
elif idx == 104:
|
||||
self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '')
|
||||
elif id == 105:
|
||||
elif idx == 105:
|
||||
if not self.mi.tags:
|
||||
self.mi.tags = []
|
||||
self.mi.tags.extend([x.strip() for x in content.decode(codec,
|
||||
'ignore').split(';')])
|
||||
self.mi.tags = list(set(self.mi.tags))
|
||||
elif id == 106:
|
||||
elif idx == 106:
|
||||
try:
|
||||
self.mi.pubdate = parse_date(content, as_utc=False)
|
||||
except:
|
||||
pass
|
||||
elif id == 108:
|
||||
pass # Producer
|
||||
elif id == 113:
|
||||
elif idx == 108:
|
||||
self.mi.book_producer = content.decode(codec, 'ignore').strip()
|
||||
elif idx == 113:
|
||||
pass # ASIN or UUID
|
||||
elif id == 116:
|
||||
elif idx == 116:
|
||||
self.start_offset, = struct.unpack(b'>L', content)
|
||||
elif idx == 121:
|
||||
self.kf8_header, = struct.unpack(b'>L', content)
|
||||
#else:
|
||||
# print 'unhandled metadata record', id, repr(content)
|
||||
# print 'unhandled metadata record', idx, repr(content)
|
||||
# }}}
|
||||
|
||||
class BookHeader(object):
|
||||
|
@ -154,6 +154,8 @@ def update_flow_links(mobi8_reader, resource_map, log):
|
||||
'valid font in %s' % (num, tag))
|
||||
else:
|
||||
replacement = '"%s"'%('../'+ href)
|
||||
if href.endswith('.failed'):
|
||||
replacement = '"%s"'%('failed-'+href)
|
||||
tag = font_index_pattern.sub(replacement, tag, 1)
|
||||
|
||||
# process links to other css pieces
|
||||
|
@ -105,29 +105,29 @@ class MobiReader(object):
|
||||
user_encoding, self.log, try_extra_data_fix=try_extra_data_fix)
|
||||
self.name = self.name.decode(self.book_header.codec, 'replace')
|
||||
self.kf8_type = None
|
||||
is_kf8 = self.book_header.mobi_version == 8
|
||||
if is_kf8:
|
||||
k8i = getattr(self.book_header.exth, 'kf8_header', None)
|
||||
|
||||
if self.book_header.mobi_version == 8:
|
||||
self.kf8_type = 'standalone'
|
||||
else: # Check for joint mobi 6 and kf 8 file
|
||||
KF8_BOUNDARY = b'BOUNDARY'
|
||||
for i, x in enumerate(self.sections[:-1]):
|
||||
sec = x[0]
|
||||
if (len(sec) == len(KF8_BOUNDARY) and sec ==
|
||||
KF8_BOUNDARY):
|
||||
try:
|
||||
self.book_header = BookHeader(self.sections[i+1][0],
|
||||
self.ident, user_encoding, self.log)
|
||||
# The following are only correct in the Mobi 6
|
||||
# header not the Mobi 8 header
|
||||
for x in ('first_image_index',):
|
||||
setattr(self.book_header, x, getattr(bh, x))
|
||||
if hasattr(self.book_header, 'huff_offset'):
|
||||
self.book_header.huff_offset += i + 1
|
||||
self.kf8_type = 'joint'
|
||||
self.kf8_boundary = i
|
||||
except:
|
||||
self.book_header = bh
|
||||
break
|
||||
elif k8i is not None: # Check for joint mobi 6 and kf 8 file
|
||||
try:
|
||||
raw = self.sections[k8i-1][0]
|
||||
except:
|
||||
raw = None
|
||||
if raw == b'BOUNDARY':
|
||||
try:
|
||||
self.book_header = BookHeader(self.sections[k8i][0],
|
||||
self.ident, user_encoding, self.log)
|
||||
# The following are only correct in the Mobi 6
|
||||
# header not the Mobi 8 header
|
||||
for x in ('first_image_index',):
|
||||
setattr(self.book_header, x, getattr(bh, x))
|
||||
if hasattr(self.book_header, 'huff_offset'):
|
||||
self.book_header.huff_offset += k8i
|
||||
self.kf8_type = 'joint'
|
||||
self.kf8_boundary = k8i-1
|
||||
except:
|
||||
self.book_header = bh
|
||||
|
||||
def check_for_drm(self):
|
||||
if self.book_header.encryption_type != 0:
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct, re, os, zlib, imghdr
|
||||
import struct, re, os, imghdr
|
||||
from collections import namedtuple
|
||||
from itertools import repeat
|
||||
|
||||
@ -16,6 +16,7 @@ from calibre.ebooks.mobi.reader.index import read_index
|
||||
from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
|
||||
from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
|
||||
from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
|
||||
from calibre.ebooks.mobi.utils import read_font_record
|
||||
|
||||
Part = namedtuple('Part',
|
||||
'num type filename start end aid')
|
||||
@ -339,23 +340,16 @@ class Mobi8Reader(object):
|
||||
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
|
||||
pass # Ignore these records
|
||||
elif typ == b'FONT':
|
||||
# fonts only exist in K8 ebooks
|
||||
# Format:
|
||||
# bytes 0 - 3: 'FONT'
|
||||
# bytes 4 - 7: ?? Expanded size in bytes ??
|
||||
# bytes 8 - 11: ?? number of files ??
|
||||
# bytes 12 - 15: ?? offset to start of compressed data ?? (typically 0x00000018 = 24)
|
||||
# bytes 16 - 23: ?? typically all 0x00 ?? Are these compression flags from zlib?
|
||||
# The compressed data begins with 2 bytes of header and has 4 bytes of checksum at the end
|
||||
data = data[26:-4]
|
||||
uncompressed_data = zlib.decompress(data, -15)
|
||||
hdr = uncompressed_data[0:4]
|
||||
ext = 'dat'
|
||||
if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf':
|
||||
ext = 'ttf'
|
||||
href = "fonts/%05d.%s" % (fname_idx, ext)
|
||||
font = read_font_record(data)
|
||||
href = "fonts/%05d.%s" % (fname_idx, font['ext'])
|
||||
if font['err']:
|
||||
self.log.warn('Reading font record %d failed: %s'%(
|
||||
fname_idx, font['err']))
|
||||
if font['headers']:
|
||||
self.log.debug('Font record headers: %s'%font['headers'])
|
||||
with open(href.replace('/', os.sep), 'wb') as f:
|
||||
f.write(uncompressed_data)
|
||||
f.write(font['font_data'] if font['font_data'] else
|
||||
font['raw_data'])
|
||||
else:
|
||||
imgtype = imghdr.what(None, data)
|
||||
if imgtype is None:
|
||||
@ -379,7 +373,11 @@ class Mobi8Reader(object):
|
||||
|
||||
opf = OPFCreator(os.getcwdu(), mi)
|
||||
opf.guide = guide
|
||||
opf.create_manifest_from_files_in([os.getcwdu()])
|
||||
|
||||
def exclude(path):
|
||||
return os.path.basename(path) == 'debug-raw.html'
|
||||
|
||||
opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
|
||||
opf.create_spine(spine)
|
||||
opf.set_toc(toc)
|
||||
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct, string
|
||||
import struct, string, imghdr, zlib
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||
@ -124,12 +124,18 @@ def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
|
||||
to JPEG. Ensure the resultant image has a byte size less than
|
||||
maxsizeb.
|
||||
|
||||
If dimen is not None, generate a thumbnail of width=dimen, height=dimen
|
||||
If dimen is not None, generate a thumbnail of
|
||||
width=dimen, height=dimen or width, height = dimen (depending on the type
|
||||
of dimen)
|
||||
|
||||
Returns the image as a bytestring
|
||||
'''
|
||||
if dimen is not None:
|
||||
data = thumbnail(data, width=dimen, height=dimen,
|
||||
if hasattr(dimen, '__len__'):
|
||||
width, height = dimen
|
||||
else:
|
||||
width = height = dimen
|
||||
data = thumbnail(data, width=width, height=height,
|
||||
compression_quality=90)[-1]
|
||||
else:
|
||||
# Replace transparent pixels with white pixels and convert to JPEG
|
||||
@ -363,3 +369,127 @@ def to_base(num, base=32):
|
||||
ans.reverse()
|
||||
return ''.join(ans)
|
||||
|
||||
def mobify_image(data):
|
||||
'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
|
||||
what = imghdr.what(None, data)
|
||||
|
||||
if what == 'png':
|
||||
im = Image()
|
||||
im.load(data)
|
||||
data = im.export('gif')
|
||||
return data
|
||||
|
||||
def read_zlib_header(header):
|
||||
header = bytearray(header)
|
||||
# See sec 2.2 of RFC 1950 for the zlib stream format
|
||||
# http://www.ietf.org/rfc/rfc1950.txt
|
||||
if (header[0]*256 + header[1])%31 != 0:
|
||||
return None, 'Bad zlib header, FCHECK failed'
|
||||
|
||||
cmf = header[0] & 0b1111
|
||||
cinfo = header[0] >> 4
|
||||
if cmf != 8:
|
||||
return None, 'Unknown zlib compression method: %d'%cmf
|
||||
if cinfo > 7:
|
||||
return None, 'Invalid CINFO field in zlib header: %d'%cinfo
|
||||
fdict = (header[1]&0b10000)>>5
|
||||
if fdict != 0:
|
||||
return None, 'FDICT based zlib compression not supported'
|
||||
wbits = cinfo + 8
|
||||
return wbits, None
|
||||
|
||||
|
||||
def read_font_record(data, extent=1040): # {{{
|
||||
'''
|
||||
Return the font encoded in the MOBI FONT record represented by data.
|
||||
The return value in a dict with fields raw_data, font_data, err, ext,
|
||||
headers.
|
||||
|
||||
:param extent: The number of obfuscated bytes. So far I have only
|
||||
encountered files with 1040 obfuscated bytes. If you encounter an
|
||||
obfuscated record for which this function fails, try different extent
|
||||
values (easily automated).
|
||||
|
||||
raw_data is the raw data in the font record
|
||||
font_data is the decoded font_data or None if an error occurred
|
||||
err is not None if some error occurred
|
||||
ext is the font type (ttf for TrueType, dat for unknown and failed if an
|
||||
error occurred)
|
||||
headers is the list of decoded headers from the font record or None if
|
||||
decoding failed
|
||||
'''
|
||||
# Format:
|
||||
# bytes 0 - 3: 'FONT'
|
||||
# bytes 4 - 7: Uncompressed size
|
||||
# bytes 8 - 11: flags
|
||||
# bit 1 - zlib compression
|
||||
# bit 2 - XOR obfuscated
|
||||
# bytes 12 - 15: offset to start of compressed data
|
||||
# bytes 16 - 19: length of XOR string
|
||||
# bytes 19 - 23: offset to start of XOR data
|
||||
# The zlib compressed data begins with 2 bytes of header and
|
||||
# has 4 bytes of checksum at the end
|
||||
ans = {'raw_data':data, 'font_data':None, 'err':None, 'ext':'failed',
|
||||
'headers':None}
|
||||
|
||||
try:
|
||||
usize, flags, dstart, xor_len, xor_start = struct.unpack_from(
|
||||
b'>LLLLL', data, 4)
|
||||
except:
|
||||
ans['err'] = 'Failed to read font record header fields'
|
||||
return ans
|
||||
font_data = data[dstart:]
|
||||
ans['headers'] = {'usize':usize, 'flags':bin(flags), 'xor_len':xor_len,
|
||||
'xor_start':xor_start, 'dstart':dstart}
|
||||
|
||||
if flags & 0b10:
|
||||
# De-obfuscate the data
|
||||
key = bytearray(data[xor_start:xor_start+xor_len])
|
||||
buf = bytearray(font_data)
|
||||
extent = len(font_data) if extent is None else extent
|
||||
extent = min(extent, len(font_data))
|
||||
|
||||
for n in xrange(extent):
|
||||
buf[n] ^= key[n%xor_len] # XOR of buf and key
|
||||
|
||||
font_data = bytes(buf)
|
||||
|
||||
if flags & 0b1:
|
||||
# ZLIB compressed data
|
||||
wbits, err = read_zlib_header(font_data[:2])
|
||||
if err is not None:
|
||||
ans['err'] = err
|
||||
return ans
|
||||
adler32, = struct.unpack_from(b'>I', font_data, len(font_data) - 4)
|
||||
try:
|
||||
# remove two bytes of zlib header and 4 bytes of trailing checksum
|
||||
# negative wbits indicates no standard gzip header
|
||||
font_data = zlib.decompress(font_data[2:-4], -wbits, usize)
|
||||
except Exception as e:
|
||||
ans['err'] = 'Failed to zlib decompress font data (%s)'%e
|
||||
return ans
|
||||
|
||||
if len(font_data) != usize:
|
||||
ans['err'] = 'Uncompressed font size mismatch'
|
||||
return ans
|
||||
|
||||
if False:
|
||||
# For some reason these almost never match, probably Amazon has a
|
||||
# buggy Adler32 implementation
|
||||
sig = (zlib.adler32(font_data) & 0xffffffff)
|
||||
if sig != adler32:
|
||||
ans['err'] = ('Adler checksum did not match. Stored: %d '
|
||||
'Calculated: %d')%(adler32, sig)
|
||||
return ans
|
||||
|
||||
ans['font_data'] = font_data
|
||||
sig = font_data[:4]
|
||||
ans['ext'] = ('ttf' if sig in {b'\0\1\0\0', b'true', b'ttcf'}
|
||||
else 'otf' if sig == b'OTTO' else 'dat')
|
||||
|
||||
return ans
|
||||
# }}}
|
||||
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -18,9 +18,10 @@ from calibre.ebooks.compression.palmdoc import compress_doc
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
|
||||
from calibre.ebooks.mobi.utils import (rescale_image, encint,
|
||||
from calibre.ebooks.mobi.utils import (rescale_image, encint, mobify_image,
|
||||
encode_trailing_data, align_block, detect_periodical)
|
||||
from calibre.ebooks.mobi.writer2.indexer import Indexer
|
||||
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
|
||||
|
||||
EXTH_CODES = {
|
||||
'creator': 100,
|
||||
@ -46,9 +47,6 @@ EXTH_CODES = {
|
||||
# Disabled as I dont care about uncrossable breaks
|
||||
WRITE_UNCROSSABLE_BREAKS = False
|
||||
|
||||
MAX_THUMB_SIZE = 16 * 1024
|
||||
MAX_THUMB_DIMEN = (180, 240)
|
||||
|
||||
class MobiWriter(object):
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
@ -181,7 +179,11 @@ class MobiWriter(object):
|
||||
for item in self.oeb.manifest.values():
|
||||
if item.media_type not in OEB_RASTER_IMAGES: continue
|
||||
try:
|
||||
data = rescale_image(item.data)
|
||||
data = item.data
|
||||
if self.opts.mobi_keep_original_images:
|
||||
data = mobify_image(data)
|
||||
else:
|
||||
data = rescale_image(data)
|
||||
except:
|
||||
oeb.logger.warn('Bad image file %r' % item.href)
|
||||
continue
|
||||
|
@ -36,7 +36,9 @@ class RescaleImages(object):
|
||||
ext = 'JPEG'
|
||||
|
||||
raw = item.data
|
||||
if not raw: continue
|
||||
if hasattr(raw, 'xpath') or not raw:
|
||||
# Probably an svg image
|
||||
continue
|
||||
try:
|
||||
img = Image()
|
||||
img.load(raw)
|
||||
|
@ -21,7 +21,8 @@ class PluginWidget(Widget, Ui_Form):
|
||||
|
||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||
Widget.__init__(self, parent,
|
||||
['prefer_author_sort', 'rescale_images', 'toc_title',
|
||||
['prefer_author_sort', 'toc_title',
|
||||
'mobi_keep_original_images',
|
||||
'mobi_ignore_margins', 'mobi_toc_at_start',
|
||||
'dont_compress', 'no_inline_toc', 'share_not_sync',
|
||||
'personal_doc']#, 'mobi_navpoints_only_deepest']
|
||||
|
@ -6,7 +6,7 @@
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>521</width>
|
||||
<width>588</width>
|
||||
<height>342</height>
|
||||
</rect>
|
||||
</property>
|
||||
@ -14,47 +14,6 @@
|
||||
<string>Form</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout">
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>&Title for Table of Contents:</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>opt_toc_title</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QLineEdit" name="opt_toc_title"/>
|
||||
</item>
|
||||
<item row="4" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_rescale_images">
|
||||
<property name="text">
|
||||
<string>Rescale images for &Palm devices</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_prefer_author_sort">
|
||||
<property name="text">
|
||||
<string>Use author &sort for author</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="0">
|
||||
<widget class="QCheckBox" name="opt_dont_compress">
|
||||
<property name="text">
|
||||
<string>Disable compression of the file contents</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="opt_no_inline_toc">
|
||||
<property name="text">
|
||||
<string>Do not add Table of Contents to book</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="8" column="0" colspan="2">
|
||||
<widget class="QGroupBox" name="groupBox">
|
||||
<property name="title">
|
||||
@ -125,6 +84,47 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_prefer_author_sort">
|
||||
<property name="text">
|
||||
<string>Use author &sort for author</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>&Title for Table of Contents:</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>opt_toc_title</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QLineEdit" name="opt_toc_title"/>
|
||||
</item>
|
||||
<item row="6" column="0">
|
||||
<widget class="QCheckBox" name="opt_dont_compress">
|
||||
<property name="text">
|
||||
<string>Disable compression of the file contents</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="opt_no_inline_toc">
|
||||
<property name="text">
|
||||
<string>Do not add Table of Contents to book</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_mobi_keep_original_images">
|
||||
<property name="text">
|
||||
<string>Do not convert all images to &JPEG (may result in images not working in older viewers)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources/>
|
||||
|
@ -5,11 +5,14 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
|
||||
from PyQt4.Qt import (Qt, QApplication, QStyle, QIcon, QDoubleSpinBox,
|
||||
QVariant, QSpinBox, QStyledItemDelegate, QComboBox, QTextDocument,
|
||||
QAbstractTextDocumentLayout, QFont, QFontInfo)
|
||||
|
||||
from calibre.gui2 import UNDEFINED_QDATETIME, error_dialog, rating_font
|
||||
from calibre.constants import iswindows
|
||||
from calibre.gui2.widgets import EnLineEdit
|
||||
from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
|
||||
from calibre.utils.date import now, format_date, qt_to_dt
|
||||
@ -27,7 +30,10 @@ class RatingDelegate(QStyledItemDelegate): # {{{
|
||||
QStyledItemDelegate.__init__(self, *args, **kwargs)
|
||||
self.rf = QFont(rating_font())
|
||||
self.em = Qt.ElideMiddle
|
||||
self.rf.setPointSize(QFontInfo(QApplication.font()).pointSize())
|
||||
delta = 0
|
||||
if iswindows and sys.getwindowsversion().major >= 6:
|
||||
delta = 2
|
||||
self.rf.setPointSize(QFontInfo(QApplication.font()).pointSize()+delta)
|
||||
|
||||
def createEditor(self, parent, option, index):
|
||||
sb = QStyledItemDelegate.createEditor(self, parent, option, index)
|
||||
|
@ -170,7 +170,7 @@
|
||||
<item row="8" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_remember_window_size">
|
||||
<property name="text">
|
||||
<string>Remember last used &window size</string>
|
||||
<string>Remember last used &window size and layout</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -117,7 +117,10 @@ class Rule(object): # {{{
|
||||
'lt': ('1', '', ''),
|
||||
'gt': ('', '', '1')
|
||||
}[action]
|
||||
return "cmp(raw_field('%s'), %s, '%s', '%s', '%s')" % (col, val, lt, eq, gt)
|
||||
if col == 'size':
|
||||
return "cmp(booksize(), %s, '%s', '%s', '%s')" % (val, lt, eq, gt)
|
||||
else:
|
||||
return "cmp(raw_field('%s'), %s, '%s', '%s', '%s')" % (col, val, lt, eq, gt)
|
||||
|
||||
def rating_condition(self, col, action, val):
|
||||
lt, eq, gt = {
|
||||
|
@ -227,6 +227,25 @@ class CustomColumns(object):
|
||||
return self.conn.get('''SELECT extra FROM %s
|
||||
WHERE book=?'''%lt, (idx,), all=False)
|
||||
|
||||
def get_custom_and_extra(self, idx, label=None, num=None, index_is_id=False):
|
||||
if label is not None:
|
||||
data = self.custom_column_label_map[label]
|
||||
if num is not None:
|
||||
data = self.custom_column_num_map[num]
|
||||
idx = idx if index_is_id else self.id(idx)
|
||||
row = self.data._data[idx]
|
||||
ans = row[self.FIELD_MAP[data['num']]]
|
||||
if data['is_multiple'] and data['datatype'] == 'text':
|
||||
ans = ans.split(data['multiple_seps']['cache_to_list']) if ans else []
|
||||
if data['display'].get('sort_alpha', False):
|
||||
ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
|
||||
if data['datatype'] != 'series':
|
||||
return (ans, None)
|
||||
ign,lt = self.custom_table_names(data['num'])
|
||||
extra = self.conn.get('''SELECT extra FROM %s
|
||||
WHERE book=?'''%lt, (idx,), all=False)
|
||||
return (ans, extra)
|
||||
|
||||
# convenience methods for tag editing
|
||||
def get_custom_items_with_ids(self, label=None, num=None):
|
||||
if label is not None:
|
||||
|
@ -910,7 +910,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
Convenience method to return metadata as a :class:`Metadata` object.
|
||||
Note that the list of formats is not verified.
|
||||
'''
|
||||
row = self.data._data[idx] if index_is_id else self.data[idx]
|
||||
idx = idx if index_is_id else self.id(idx)
|
||||
try:
|
||||
row = self.data._data[idx]
|
||||
except:
|
||||
row = None
|
||||
|
||||
if row is None:
|
||||
raise ValueError('No book with id: %d'%idx)
|
||||
|
||||
fm = self.FIELD_MAP
|
||||
mi = Metadata(None, template_cache=self.formatter_template_cache)
|
||||
|
||||
@ -948,14 +956,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
mi.book_size = row[fm['size']]
|
||||
mi.ondevice_col= row[fm['ondevice']]
|
||||
mi.last_modified = row[fm['last_modified']]
|
||||
id = idx if index_is_id else self.id(idx)
|
||||
formats = row[fm['formats']]
|
||||
mi.format_metadata = {}
|
||||
if not formats:
|
||||
good_formats = None
|
||||
else:
|
||||
formats = sorted(formats.split(','))
|
||||
mi.format_metadata = FormatMetadata(self, id, formats)
|
||||
mi.format_metadata = FormatMetadata(self, idx, formats)
|
||||
good_formats = FormatsList(formats, mi.format_metadata)
|
||||
mi.formats = good_formats
|
||||
tags = row[fm['tags']]
|
||||
@ -968,19 +975,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
if mi.series:
|
||||
mi.series_index = row[fm['series_index']]
|
||||
mi.rating = row[fm['rating']]
|
||||
mi.set_identifiers(self.get_identifiers(id, index_is_id=True))
|
||||
mi.application_id = id
|
||||
mi.id = id
|
||||
mi.set_identifiers(self.get_identifiers(idx, index_is_id=True))
|
||||
mi.application_id = idx
|
||||
mi.id = idx
|
||||
|
||||
mi.set_all_user_metadata(self.field_metadata.custom_field_metadata())
|
||||
for key, meta in self.field_metadata.custom_iteritems():
|
||||
mi.set_user_metadata(key, meta)
|
||||
if meta['datatype'] == 'composite':
|
||||
mi.set(key, val=row[meta['rec_index']])
|
||||
else:
|
||||
mi.set(key, val=self.get_custom(idx, label=meta['label'],
|
||||
index_is_id=index_is_id),
|
||||
extra=self.get_custom_extra(idx, label=meta['label'],
|
||||
index_is_id=index_is_id))
|
||||
val, extra = self.get_custom_and_extra(idx, label=meta['label'],
|
||||
index_is_id=True)
|
||||
mi.set(key, val=val, extra=extra)
|
||||
|
||||
user_cats = self.prefs['user_categories']
|
||||
user_cat_vals = {}
|
||||
@ -999,12 +1005,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
if get_cover:
|
||||
if cover_as_data:
|
||||
cdata = self.cover(id, index_is_id=True)
|
||||
cdata = self.cover(idx, index_is_id=True)
|
||||
if cdata:
|
||||
mi.cover_data = ('jpeg', cdata)
|
||||
else:
|
||||
mi.cover = self.cover(id, index_is_id=True, as_path=True)
|
||||
mi.has_cover = _('Yes') if self.has_cover(id) else ''
|
||||
mi.cover = self.cover(idx, index_is_id=True, as_path=True)
|
||||
mi.has_cover = _('Yes') if self.has_cover(idx) else ''
|
||||
return mi
|
||||
|
||||
def has_book(self, mi):
|
||||
|
@ -388,6 +388,7 @@ class FieldMetadata(dict):
|
||||
def __init__(self):
|
||||
self._field_metadata = copy.deepcopy(self._field_metadata_prototype)
|
||||
self._tb_cats = OrderedDict()
|
||||
self._tb_custom_fields = {}
|
||||
self._search_term_map = {}
|
||||
self.custom_label_to_key_map = {}
|
||||
for k,v in self._field_metadata:
|
||||
@ -477,10 +478,8 @@ class FieldMetadata(dict):
|
||||
yield (key, self._tb_cats[key])
|
||||
|
||||
def custom_iteritems(self):
|
||||
for key in self._tb_cats:
|
||||
fm = self._tb_cats[key]
|
||||
if fm['is_custom']:
|
||||
yield (key, self._tb_cats[key])
|
||||
for key, meta in self._tb_custom_fields.iteritems():
|
||||
yield (key, meta)
|
||||
|
||||
def items(self):
|
||||
return list(self.iteritems())
|
||||
@ -516,6 +515,8 @@ class FieldMetadata(dict):
|
||||
return l
|
||||
|
||||
def custom_field_metadata(self, include_composites=True):
|
||||
if include_composites:
|
||||
return self._tb_custom_fields
|
||||
l = {}
|
||||
for k in self.custom_field_keys(include_composites):
|
||||
l[k] = self._tb_cats[k]
|
||||
@ -537,6 +538,7 @@ class FieldMetadata(dict):
|
||||
'is_custom':True, 'is_category':is_category,
|
||||
'link_column':'value','category_sort':'value',
|
||||
'is_csp' : is_csp, 'is_editable': is_editable,}
|
||||
self._tb_custom_fields[key] = self._tb_cats[key]
|
||||
self._add_search_terms_to_map(key, [key])
|
||||
self.custom_label_to_key_map[label] = key
|
||||
if datatype == 'series':
|
||||
|
@ -29,6 +29,7 @@ It can convert every input format in the following list, to every output format.
|
||||
PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers.
|
||||
PDB is also a generic format. |app| supports eReder, Plucker, PML and zTxt PDB files.
|
||||
DJVU support is only for converting DJVU files that contain embedded text. These are typically generated by OCR software.
|
||||
MOBI books can be of two types Mobi6 and KF8. |app| currently fully supports Mobi6 and supports conversion from, but not to, KF8
|
||||
|
||||
.. _best-source-formats:
|
||||
|
||||
|
@ -57,7 +57,7 @@ For example, assume you want to use the template::
|
||||
|
||||
{series} - {series_index} - {title}
|
||||
|
||||
If the book has no series, the answer will be ``- - title``. Many people would rather the result be simply ``title``, without the hyphens. To do this, use the extended syntax ``{field:|prefix_text|suffix_text}``. When you use this syntax, if field has the value SERIES then the result will be ``prefix_textSERIESsuffix_text``. If field has no value, then the result will be the empty string (nothing); the prefix and suffix are ignored. The prefix and suffix can contain blanks.
|
||||
If the book has no series, the answer will be ``- - title``. Many people would rather the result be simply ``title``, without the hyphens. To do this, use the extended syntax ``{field:|prefix_text|suffix_text}``. When you use this syntax, if field has the value SERIES then the result will be ``prefix_textSERIESsuffix_text``. If field has no value, then the result will be the empty string (nothing); the prefix and suffix are ignored. The prefix and suffix can contain blanks. **Do not use subtemplates (`{ ... }`) or functions (see below) as the prefix or the suffix.**
|
||||
|
||||
Using this syntax, we can solve the above series problem with the template::
|
||||
|
||||
@ -65,7 +65,7 @@ Using this syntax, we can solve the above series problem with the template::
|
||||
|
||||
The hyphens will be included only if the book has a series index, which it will have only if it has a series.
|
||||
|
||||
Notes: you must include the : character if you want to use a prefix or a suffix. You must either use no \| characters or both of them; using one, as in ``{field:| - }``, is not allowed. It is OK not to provide any text for one side or the other, such as in ``{series:|| - }``. Using ``{title:||}`` is the same as using ``{title}``.
|
||||
Notes: you must include the : character if you want to use a prefix or a suffix. You must either use no \| characters or both of them; using one, as in ``{field:| - }``, is not allowed. It is OK not to provide any text for one side or the other, such as in ``{series:|| - }``. Using ``{title:||}`` is the same as using ``{title}``.
|
||||
|
||||
Second: formatting. Suppose you wanted to ensure that the series_index is always formatted as three digits with leading zeros. This would do the trick::
|
||||
|
||||
@ -112,7 +112,7 @@ Functions are always applied before format specifications. See further down for
|
||||
|
||||
The syntax for using functions is ``{field:function(arguments)}``, or ``{field:function(arguments)|prefix|suffix}``. Arguments are separated by commas. Commas inside arguments must be preceeded by a backslash ( '\\' ). The last (or only) argument cannot contain a closing parenthesis ( ')' ). Functions return the value of the field used in the template, suitably modified.
|
||||
|
||||
If you have programming experience, please note that the syntax in this mode (single function) is not what you might expect. Strings are not quoted. Spaces are significant. All arguments must be constants; there is no sub-evaluation. **Do not use subtemplates (`{ ... }`) as function arguments.** Instead, use :ref:`template program mode <template_mode>` and :ref:`general program mode <general_mode>`.
|
||||
Important: If you have programming experience, please note that the syntax in this mode (single function) is not what you might expect. Strings are not quoted. Spaces are significant. All arguments must be constants; there is no sub-evaluation. **Do not use subtemplates (`{ ... }`) as function arguments.** Instead, use :ref:`template program mode <template_mode>` and :ref:`general program mode <general_mode>`.
|
||||
|
||||
Many functions use regular expressions. In all cases, regular expression matching is case-insensitive.
|
||||
|
||||
@ -245,6 +245,7 @@ The following functions are available in addition to those described in single-f
|
||||
* ``current_library_name() -- `` return the last name on the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_name()'}``.
|
||||
* ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string.
|
||||
* ``divide(x, y)`` -- returns x / y. Throws an exception if either x or y are not numbers.
|
||||
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables.
|
||||
* ``field(name)`` -- returns the metadata field named by ``name``.
|
||||
* ``first_non_empty(value, value, ...)`` -- returns the first value that is not empty. If all values are empty, then the empty value is returned. You can have as many values as you want.
|
||||
* ``format_date(x, date_format)`` -- format_date(val, format_string) -- format the value, which must be a date field, using the format_string, returning a string. The formatting codes are::
|
||||
@ -269,7 +270,19 @@ The following functions are available in addition to those described in single-f
|
||||
AP : use a 12-hour clock instead of a 24-hour clock, with 'AP' replaced by the localized string for AM or PM.
|
||||
iso : the date with time and timezone. Must be the only format present.
|
||||
|
||||
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables.
|
||||
* finish_formatting(val, fmt, prefix, suffix) -- apply the format, prefix, and suffix to a value in the same way as done in a template like ``{series_index:05.2f| - |- }``. This function is provided to ease conversion of complex single-function- or template-program-mode templates to :ref:`general program mode <general_mode>` (see below) to take advantage of GPM template compilation. For example, the following program produces the same output as the above template::
|
||||
|
||||
program: finish_formatting(field("series_index"), "05.2f", " - ", " - ")
|
||||
|
||||
Another example: for the template ``{series:re(([^\s])[^\s]+(\s|$),\1)}{series_index:0>2s| - | - }{title}`` use::
|
||||
|
||||
program:
|
||||
strcat(
|
||||
re(field('series'), '([^\s])[^\s]+(\s|$)', '\1'),
|
||||
finish_formatting(field('series_index'), '0>2s', ' - ', ' - '),
|
||||
field('title')
|
||||
)
|
||||
|
||||
* ``formats_modtimes(date_format)`` -- return a comma-separated list of colon_separated items representing modification times for the formats of a book. The date_format parameter specifies how the date is to be formatted. See the date_format function for details. You can use the select function to get the mod time for a specific format. Note that format names are always uppercase, as in EPUB.
|
||||
* ``formats_sizes()`` -- return a comma-separated list of colon_separated items representing sizes in bytes of the formats of a book. You can use the select function to get the size for a specific format. Note that format names are always uppercase, as in EPUB.
|
||||
* ``has_cover()`` -- return ``Yes`` if the book has a cover, otherwise return the empty string
|
||||
@ -312,7 +325,7 @@ Using general program mode
|
||||
|
||||
For more complicated template programs, it is sometimes easier to avoid template syntax (all the `{` and `}` characters), instead writing a more classical-looking program. You can do this in |app| by beginning the template with `program:`. In this case, no template processing is done. The special variable `$` is not set. It is up to your program to produce the correct results.
|
||||
|
||||
One advantage of `program:` mode is that the brackets are no longer special. For example, it is not necessary to use `[[` and `]]` when using the `template()` function.
|
||||
One advantage of `program:` mode is that the brackets are no longer special. For example, it is not necessary to use `[[` and `]]` when using the `template()` function. Another advantage is that program mode templates are compiled to Python and can run much faster than templates in the other two modes. Speed improvement depends on the complexity of the templates; the more complicated the template the more the improvement. Compilation is turned off or on using the tweak ``compile_gpm_templates`` (Compile General Program Mode templates to Python). The main reason to turn off compilation is if a compiled template does not work, in which case please file a bug report.
|
||||
|
||||
The following example is a `program:` mode implementation of a recipe on the MobileRead forum: "Put series into the title, using either initials or a shortened form. Strip leading articles from the series name (any)." For example, for the book The Two Towers in the Lord of the Rings series, the recipe gives `LotR [02] The Two Towers`. Using standard templates, the recipe requires three custom columns and a plugboard, as explained in the following:
|
||||
|
||||
|
@ -11,7 +11,8 @@ __docformat__ = 'restructuredtext en'
|
||||
import re, string, traceback
|
||||
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.formatter_functions import formatter_functions
|
||||
from calibre.utils.formatter_functions import formatter_functions, compile_user_function
|
||||
from calibre.utils.config import tweaks
|
||||
|
||||
class _Parser(object):
|
||||
LEX_OP = 1
|
||||
@ -172,6 +173,138 @@ class _Parser(object):
|
||||
self.error(_('expression is not function or constant'))
|
||||
|
||||
|
||||
class _CompileParser(_Parser):
|
||||
def __init__(self, val, prog, parent, compile_text):
|
||||
self.lex_pos = 0
|
||||
self.prog = prog[0]
|
||||
self.prog_len = len(self.prog)
|
||||
if prog[1] != '':
|
||||
self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
|
||||
self.parent = parent
|
||||
parent.locals = {'$':val}
|
||||
self.parent_kwargs = parent.kwargs
|
||||
self.parent_book = parent.book
|
||||
self.parent_locals = parent.locals
|
||||
self.compile_text = compile_text
|
||||
|
||||
def program(self):
|
||||
if self.compile_text:
|
||||
t = self.compile_text
|
||||
self.compile_text = '\n'
|
||||
self.max_level = 0
|
||||
val = self.statement()
|
||||
if not self.token_is_eof():
|
||||
self.error(_('syntax error - program ends before EOF'))
|
||||
if self.compile_text:
|
||||
t += "\targs=[[]"
|
||||
for i in range(0, self.max_level):
|
||||
t += ", None"
|
||||
t += ']'
|
||||
self.compile_text = t + self.compile_text + "\treturn args[0][0]\n"
|
||||
return val
|
||||
|
||||
def statement(self, level=0):
|
||||
while True:
|
||||
val = self.expr(level)
|
||||
if self.token_is_eof():
|
||||
return val
|
||||
if not self.token_op_is_a_semicolon():
|
||||
return val
|
||||
if self.compile_text:
|
||||
self.compile_text += "\targs[%d] = list()\n"%(level,)
|
||||
self.consume()
|
||||
if self.token_is_eof():
|
||||
return val
|
||||
|
||||
def expr(self, level):
|
||||
if self.compile_text:
|
||||
self.max_level = max(level, self.max_level)
|
||||
|
||||
if self.token_is_id():
|
||||
funcs = formatter_functions().get_functions()
|
||||
# We have an identifier. Determine if it is a function
|
||||
id = self.token()
|
||||
if not self.token_op_is_a_lparen():
|
||||
if self.token_op_is_a_equals():
|
||||
# classic assignment statement
|
||||
self.consume()
|
||||
cls = funcs['assign']
|
||||
if self.compile_text:
|
||||
self.compile_text += '\targs[%d] = list()\n'%(level+1,)
|
||||
val = cls.eval_(self.parent, self.parent_kwargs,
|
||||
self.parent_book, self.parent_locals, id, self.expr(level+1))
|
||||
if self.compile_text:
|
||||
self.compile_text += "\tlocals['%s'] = args[%d][0]\n"%(id, level+1)
|
||||
self.compile_text += "\targs[%d].append(args[%d][0])\n"%(level, level+1)
|
||||
return val
|
||||
val = self.parent.locals.get(id, None)
|
||||
if val is None:
|
||||
self.error(_('Unknown identifier ') + id)
|
||||
if self.compile_text:
|
||||
self.compile_text += "\targs[%d].append(locals.get('%s'))\n"%(level, id)
|
||||
return val
|
||||
# We have a function.
|
||||
# Check if it is a known one. We do this here so error reporting is
|
||||
# better, as it can identify the tokens near the problem.
|
||||
if id not in funcs:
|
||||
self.error(_('unknown function {0}').format(id))
|
||||
|
||||
# Eat the paren
|
||||
self.consume()
|
||||
args = list()
|
||||
if self.compile_text:
|
||||
self.compile_text += '\targs[%d] = list()\n'%(level+1, )
|
||||
if id == 'field':
|
||||
val = self.expr(level+1)
|
||||
val = self.parent.get_value(val, [], self.parent_kwargs)
|
||||
if self.compile_text:
|
||||
self.compile_text += "\targs[%d].append(formatter.get_value(args[%d][0], [], kwargs))\n"%(level, level+1)
|
||||
if self.token() != ')':
|
||||
self.error(_('missing closing parenthesis'))
|
||||
return val
|
||||
while not self.token_op_is_a_rparen():
|
||||
if id == 'assign' and len(args) == 0:
|
||||
# Must handle the lvalue semantics of the assign function.
|
||||
# The first argument is the name of the destination, not
|
||||
# the value.
|
||||
if not self.token_is_id():
|
||||
self.error('assign requires the first parameter be an id')
|
||||
t = self.token()
|
||||
args.append(t)
|
||||
if self.compile_text:
|
||||
self.compile_text += "\targs[%d].append('%s')\n"%(level+1, t)
|
||||
else:
|
||||
# evaluate the argument (recursive call)
|
||||
args.append(self.statement(level=level+1))
|
||||
if not self.token_op_is_a_comma():
|
||||
break
|
||||
self.consume()
|
||||
if self.token() != ')':
|
||||
self.error(_('missing closing parenthesis'))
|
||||
|
||||
# Evaluate the function
|
||||
cls = funcs[id]
|
||||
if cls.arg_count != -1 and len(args) != cls.arg_count:
|
||||
self.error('incorrect number of arguments for function {}'.format(id))
|
||||
if self.compile_text:
|
||||
self.compile_text += (
|
||||
"\targs[%d].append(self.__funcs__['%s']"
|
||||
".evaluate(formatter, kwargs, book, locals, *args[%d]))\n")%(level, id, level+1)
|
||||
return cls.eval_(self.parent, self.parent_kwargs,
|
||||
self.parent_book, self.parent_locals, *args)
|
||||
elif self.token_is_constant():
|
||||
# String or number
|
||||
v = self.token()
|
||||
if self.compile_text:
|
||||
tv = v.replace("\\", "\\\\")
|
||||
tv = tv.replace("'", "\\'")
|
||||
self.compile_text += "\targs[%d].append('%s')\n"%(level, tv)
|
||||
return v
|
||||
else:
|
||||
self.error(_('expression is not function or constant'))
|
||||
|
||||
compile_counter = 0
|
||||
|
||||
class TemplateFormatter(string.Formatter):
|
||||
'''
|
||||
Provides a format function that substitutes '' for any missing value
|
||||
@ -249,15 +382,36 @@ class TemplateFormatter(string.Formatter):
|
||||
# keep a cache of the lex'ed program under the theory that re-lexing
|
||||
# is much more expensive than the cache lookup. This is certainly true
|
||||
# for more than a few tokens, but it isn't clear for simple programs.
|
||||
if column_name is not None and self.template_cache is not None:
|
||||
lprog = self.template_cache.get(column_name, None)
|
||||
if not lprog:
|
||||
if tweaks['compile_gpm_templates']:
|
||||
if column_name is not None and self.template_cache is not None:
|
||||
lprog = self.template_cache.get(column_name, None)
|
||||
if lprog:
|
||||
return lprog.evaluate(self, self.kwargs, self.book, self.locals)
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
self.template_cache[column_name] = lprog
|
||||
compile_text = ('__funcs__ = formatter_functions().get_functions()\n'
|
||||
'def evaluate(self, formatter, kwargs, book, locals):\n'
|
||||
)
|
||||
else:
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
compile_text = None
|
||||
parser = _CompileParser(val, lprog, self, compile_text)
|
||||
val = parser.program()
|
||||
if parser.compile_text:
|
||||
global compile_counter
|
||||
compile_counter += 1
|
||||
f = compile_user_function("__A" + str(compile_counter), 'doc', -1, parser.compile_text)
|
||||
self.template_cache[column_name] = f
|
||||
else:
|
||||
if column_name is not None and self.template_cache is not None:
|
||||
lprog = self.template_cache.get(column_name, None)
|
||||
if not lprog:
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
self.template_cache[column_name] = lprog
|
||||
else:
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
parser = _Parser(val, lprog, self)
|
||||
return parser.program()
|
||||
parser = _Parser(val, lprog, self)
|
||||
val = parser.program()
|
||||
return val
|
||||
|
||||
################## Override parent classes methods #####################
|
||||
|
||||
|
@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import inspect, re, traceback
|
||||
|
||||
from calibre import human_readable
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.titlecase import titlecase
|
||||
from calibre.utils.icu import capitalize, strcmp, sort_key
|
||||
from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE
|
||||
@ -1118,12 +1119,28 @@ class BuiltinCurrentLibraryName(BuiltinFormatterFunction):
|
||||
from calibre.library import current_library_name
|
||||
return current_library_name()
|
||||
|
||||
class BuiltinFinishFormatting(BuiltinFormatterFunction):
|
||||
name = 'finish_formatting'
|
||||
arg_count = 4
|
||||
category = 'Formatting values'
|
||||
__doc__ = doc = _('finish_formatting(val, fmt, prefix, suffix) -- apply the '
|
||||
'format, prefix, and suffix to a value in the same way as '
|
||||
'done in a template like {series_index:05.2f| - |- }. For '
|
||||
'example, the following program produces the same output '
|
||||
'as the above template: '
|
||||
'program: finish_formatting(field("series_index"), "05.2f", " - ", " - ")')
|
||||
|
||||
def evaluate(self, formatter, kwargs, mi, locals_, val, fmt, prefix, suffix):
|
||||
if not val:
|
||||
return val
|
||||
return prefix + formatter._do_format(val, fmt) + suffix
|
||||
|
||||
_formatter_builtins = [
|
||||
BuiltinAdd(), BuiltinAnd(), BuiltinAssign(), BuiltinBooksize(),
|
||||
BuiltinCapitalize(), BuiltinCmp(), BuiltinContains(), BuiltinCount(),
|
||||
BuiltinCurrentLibraryName(),
|
||||
BuiltinDaysBetween(), BuiltinDivide(), BuiltinEval(),
|
||||
BuiltinFirstNonEmpty(), BuiltinField(), BuiltinFormatDate(),
|
||||
BuiltinDaysBetween(), BuiltinDivide(), BuiltinEval(), BuiltinFirstNonEmpty(),
|
||||
BuiltinField(), BuiltinFinishFormatting(), BuiltinFormatDate(),
|
||||
BuiltinFormatNumber(), BuiltinFormatsModtimes(), BuiltinFormatsSizes(),
|
||||
BuiltinHasCover(), BuiltinHumanReadable(), BuiltinIdentifierInList(),
|
||||
BuiltinIfempty(), BuiltinLanguageCodes(), BuiltinLanguageStrings(),
|
||||
@ -1156,11 +1173,14 @@ def compile_user_function(name, doc, arg_count, eval_func):
|
||||
for line in eval_func.splitlines()])
|
||||
prog = '''
|
||||
from calibre.utils.formatter_functions import FormatterUserFunction
|
||||
from calibre.utils.formatter_functions import formatter_functions
|
||||
class UserFunction(FormatterUserFunction):
|
||||
''' + func
|
||||
locals = {}
|
||||
exec prog in locals
|
||||
cls = locals['UserFunction'](name, doc, arg_count, eval_func)
|
||||
locals_ = {}
|
||||
if DEBUG:
|
||||
print prog
|
||||
exec prog in locals_
|
||||
cls = locals_['UserFunction'](name, doc, arg_count, eval_func)
|
||||
return cls
|
||||
|
||||
def load_user_template_functions(funcs):
|
||||
|
@ -14,7 +14,7 @@ from PIL import Image
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre import browser, relpath, unicode_path
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.constants import filesystem_encoding, iswindows
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
@ -213,6 +213,8 @@ class RecursiveFetcher(object):
|
||||
is_local = 5
|
||||
if is_local > 0:
|
||||
url = url[is_local:]
|
||||
if iswindows and url.startswith('/'):
|
||||
url = url[1:]
|
||||
with open(url, 'rb') as f:
|
||||
data = response(f.read())
|
||||
data.newurl = 'file:'+url # This is what mechanize does for
|
||||
|
Loading…
x
Reference in New Issue
Block a user