Sync to ldolse heuristics branch.

This commit is contained in:
John Schember 2011-01-16 08:54:31 -05:00
commit 51e7a555e1
28 changed files with 450 additions and 249 deletions

View File

@ -0,0 +1,32 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1295081935(BasicNewsRecipe):
title = u'Mail & Guardian ZA News'
__author__ = '77ja65'
language = 'en'
oldest_article = 7
max_articles_per_feed = 30
no_stylesheets = True
masthead_url = 'http://c1608832.cdn.cloudfiles.rackspacecloud.com/mg_logo.gif'
remove_tags_after = [dict(id='content')]
feeds = [
(u'National News', u'http://www.mg.co.za/rss/national'),
(u'Top Stories', u'http://www.mg.co.za/rss'),
(u'Africa News', u'http://www.mg.co.za/rss/africa'),
(u'Sport', u'http://www.mg.co.za/rss/sport'),
(u'Business', u'http://www.mg.co.za/rss/business'),
(u'And In Other News', u'http://www.mg.co.za/rss/and-in-other-news'),
(u'World News', u'http://www.mg.co.za/rss/world')
]
def print_version(self, url):
return url.replace('http://www.mg.co.za/article/',
'http://www.mg.co.za/printformat/single/')
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-
weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-
weight:normal;font-size:small;}
'''

View File

@ -1,5 +1,5 @@
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup #from calibre.ebooks.BeautifulSoup import BeautifulSoup
from urllib import quote from urllib import quote
class SportsIllustratedRecipe(BasicNewsRecipe) : class SportsIllustratedRecipe(BasicNewsRecipe) :
@ -91,7 +91,7 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
# expire : no idea what value to use # expire : no idea what value to use
# All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js # All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
def preprocess_html(self, soup): '''def preprocess_html(self, soup):
header = soup.find('div', attrs = {'class' : 'siv_artheader'}) header = soup.find('div', attrs = {'class' : 'siv_artheader'})
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>') homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
body = homeMadeSoup.body body = homeMadeSoup.body
@ -115,4 +115,5 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
body.append(para) body.append(para)
return homeMadeSoup return homeMadeSoup
'''

View File

@ -0,0 +1,28 @@
{
"contains": "def evaluate(self, formatter, kwargs, mi, locals,\n val, test, value_if_present, value_if_not):\n if re.search(test, val):\n return value_if_present\n else:\n return value_if_not\n",
"divide": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x / y)\n",
"uppercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.upper()\n",
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
"capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
"list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n",
"shorten": "def evaluate(self, formatter, kwargs, mi, locals,\n val, leading, center_string, trailing):\n l = max(0, int(leading))\n t = max(0, int(trailing))\n if len(val) > l + len(center_string) + t:\n return val[0:l] + center_string + ('' if t == 0 else val[-t:])\n else:\n return val\n",
"re": "def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):\n return re.sub(pattern, replacement, val)\n",
"add": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x + y)\n",
"lookup": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if len(args) == 2: # here for backwards compatibility\n if val:\n return formatter.vformat('{'+args[0].strip()+'}', [], kwargs)\n else:\n return formatter.vformat('{'+args[1].strip()+'}', [], kwargs)\n if (len(args) % 2) != 1:\n raise ValueError(_('lookup requires either 2 or an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return formatter.vformat('{' + args[i].strip() + '}', [], kwargs)\n if re.search(args[i], val):\n return formatter.vformat('{'+args[i+1].strip() + '}', [], kwargs)\n i += 2\n",
"template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.safe_format(template, kwargs, 'TEMPLATE', mi)\n",
"print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n",
"titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n",
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
"multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
"count": "def evaluate(self, formatter, kwargs, mi, locals, val, sep):\n return unicode(len(val.split(sep)))\n",
"lowercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.lower()\n",
"assign": "def evaluate(self, formatter, kwargs, mi, locals, target, value):\n locals[target] = value\n return value\n",
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val):\n return args[i+1]\n i += 2\n",
"strcmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n v = strcmp(x, y)\n if v < 0:\n return lt\n if v == 0:\n return eq\n return gt\n",
"cmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n x = float(x if x else 0)\n y = float(y if y else 0)\n if x < y:\n return lt\n if x == y:\n return eq\n return gt\n"
}

View File

@ -84,6 +84,23 @@ class Resources(Command):
cPickle.dump(complete, open(dest, 'wb'), -1) cPickle.dump(complete, open(dest, 'wb'), -1)
self.info('\tCreating template-functions.json')
dest = self.j(self.RESOURCES, 'template-functions.json')
function_dict = {}
import inspect
from calibre.utils.formatter_functions import all_builtin_functions
for obj in all_builtin_functions:
eval_func = inspect.getmembers(obj,
lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
try:
lines = [l[4:] for l in inspect.getsourcelines(eval_func[0][1])[0]]
except:
continue
lines = ''.join(lines)
function_dict[obj.name] = lines
import json
json.dump(function_dict, open(dest, 'wb'), indent=4)
def clean(self): def clean(self):
for x in ('scripts', 'recipes', 'ebook-convert-complete'): for x in ('scripts', 'recipes', 'ebook-convert-complete'):
x = self.j(self.RESOURCES, x+'.pickle') x = self.j(self.RESOURCES, x+'.pickle')

View File

@ -33,6 +33,6 @@ class SNE(USBMS):
STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card' STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card'
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books' EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books'
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = False

View File

@ -174,13 +174,19 @@ class Dehyphenator(object):
retain hyphens. retain hyphens.
''' '''
def __init__(self): def __init__(self, verbose=0, log=None):
self.log = default_log if log is None else log
self.verbose = verbose
# Add common suffixes to the regex below to increase the likelihood of a match - # Add common suffixes to the regex below to increase the likelihood of a match -
# don't add suffixes which are also complete words, such as 'able' or 'sex' # don't add suffixes which are also complete words, such as 'able' or 'sex'
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$", re.IGNORECASE) # only remove if it's not already the point of hyphenation
self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"
self.suffixes = re.compile(r"^%s" % self.suffix_string, re.IGNORECASE)
self.removesuffixes = re.compile(r"%s" % self.suffix_string, re.IGNORECASE)
# remove prefixes if the prefix was not already the point of hyphenation # remove prefixes if the prefix was not already the point of hyphenation
self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE) self.prefix_string = '^(dis|re|un|in|ex)'
self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE) self.prefixes = re.compile(r'%s$' % self.prefix_string, re.IGNORECASE)
self.removeprefix = re.compile(r'%s' % self.prefix_string, re.IGNORECASE)
def dehyphenate(self, match): def dehyphenate(self, match):
firsthalf = match.group('firstpart') firsthalf = match.group('firstpart')
@ -191,31 +197,44 @@ class Dehyphenator(object):
wraptags = '' wraptags = ''
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf) hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
dehyphenated = unicode(firsthalf) + unicode(secondhalf) dehyphenated = unicode(firsthalf) + unicode(secondhalf)
lookupword = self.removesuffixes.sub('', dehyphenated) if self.suffixes.match(secondhalf) is None:
if self.prefixes.match(firsthalf) is None: lookupword = self.removesuffixes.sub('', dehyphenated)
else:
lookupword = dehyphenated
if len(firsthalf) > 3 and self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword) lookupword = self.removeprefix.sub('', lookupword)
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated) if self.verbose > 2:
self.log("lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated))
try: try:
searchresult = self.html.find(lookupword.lower()) searchresult = self.html.find(lookupword.lower())
except: except:
return hyphenated return hyphenated
if self.format == 'html_cleanup' or self.format == 'txt_cleanup': if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
if self.html.find(lookupword) != -1 or searchresult != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
#print "Cleanup:returned dehyphenated word: " + str(dehyphenated) if self.verbose > 2:
self.log(" Cleanup:returned dehyphenated word: " + str(dehyphenated))
return dehyphenated return dehyphenated
elif self.html.find(hyphenated) != -1: elif self.html.find(hyphenated) != -1:
#print "Cleanup:returned hyphenated word: " + str(hyphenated) if self.verbose > 2:
self.log(" Cleanup:returned hyphenated word: " + str(hyphenated))
return hyphenated return hyphenated
else: else:
#print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf) if self.verbose > 2:
self.log(" Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf))
return firsthalf+u'\u2014'+wraptags+secondhalf return firsthalf+u'\u2014'+wraptags+secondhalf
else: else:
if len(firsthalf) <= 2 and len(secondhalf) <= 2:
if self.verbose > 2:
self.log("too short, returned hyphenated word: " + str(hyphenated))
return hyphenated
if self.html.find(lookupword) != -1 or searchresult != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
#print "returned dehyphenated word: " + str(dehyphenated) if self.verbose > 2:
self.log(" returned dehyphenated word: " + str(dehyphenated))
return dehyphenated return dehyphenated
else: else:
#print " returned hyphenated word: " + str(hyphenated) if self.verbose > 2:
self.log(" returned hyphenated word: " + str(hyphenated))
return hyphenated return hyphenated
def __call__(self, html, format, length=1): def __call__(self, html, format, length=1):
@ -228,7 +247,7 @@ class Dehyphenator(object):
elif format == 'txt': elif format == 'txt':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length) intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
elif format == 'individual_words': elif format == 'individual_words':
intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|)\u0020*(?P<secondpart>\w+)\b[^<]*<') # for later, not called anywhere yet intextmatch = re.compile(u'(?!<)(?P<firstpart>\w+)(-|)\s*(?P<secondpart>\w+)(?![^<]*?>)')
elif format == 'html_cleanup': elif format == 'html_cleanup':
intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)') intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
elif format == 'txt_cleanup': elif format == 'txt_cleanup':
@ -512,7 +531,7 @@ class HTMLPreProcessor(object):
if is_pdftohtml and length > -1: if is_pdftohtml and length > -1:
# Dehyphenate # Dehyphenate
dehyphenator = Dehyphenator() dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
html = dehyphenator(html,'html', length) html = dehyphenator(html,'html', length)
if is_pdftohtml: if is_pdftohtml:

View File

@ -322,11 +322,11 @@ class HeuristicProcessor(object):
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
# Delete microsoft 'smart' tags # Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\w+>', '', html) html = re.sub('(?i)</?st1:\w+>', '', html)
# Get rid of empty span, bold, font, & italics tags # Get rid of empty span, bold, font, em, & italics tags
html = re.sub(r'\s*<font[^>]*>\s*</font>\s*', '', html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html) html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html) html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html) html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
self.deleted_nbsps = True self.deleted_nbsps = True
return html return html
@ -376,27 +376,31 @@ class HeuristicProcessor(object):
except: except:
self.log("Can't get wordcount") self.log("Can't get wordcount")
if 0 < self.totalwords < 50: print "found "+unicode(self.totalwords)+" words in the flow"
if self.totalwords < 50:
self.log("flow is too short, not running heuristics") self.log("flow is too short, not running heuristics")
return html return html
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
html = self.arrange_htm_line_endings(html) html = self.arrange_htm_line_endings(html)
###### Check Markup ###### if self.cleanup_required():
# ###### Check Markup ######
# some lit files don't have any <p> tags or equivalent (generally just plain text between #
# <pre> tags), check and mark up line endings if required before proceeding # some lit files don't have any <p> tags or equivalent (generally just plain text between
if self.no_markup(html, 0.1): # <pre> tags), check and mark up line endings if required before proceeding
self.log("not enough paragraph markers, adding now") # fix indents must run after this step
# markup using text processing if self.no_markup(html, 0.1):
html = self.markup_pre(html) self.log("not enough paragraph markers, adding now")
# markup using text processing
html = self.markup_pre(html)
# Replace series of non-breaking spaces with text-indent # Replace series of non-breaking spaces with text-indent
if getattr(self.extra_opts, 'fix_indents', False): if getattr(self.extra_opts, 'fix_indents', False):
html = self.fix_nbsp_indents(html) html = self.fix_nbsp_indents(html)
if self.cleanup_required(): if self.cleanup_required():
# fix indents must run before this step, as it removes non-breaking spaces
html = self.cleanup_markup(html) html = self.cleanup_markup(html)
# ADE doesn't render <br />, change to empty paragraphs # ADE doesn't render <br />, change to empty paragraphs
@ -420,26 +424,26 @@ class HeuristicProcessor(object):
self.log("deleting blank lines") self.log("deleting blank lines")
html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html) html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
html = self.blankreg.sub('', html) html = self.blankreg.sub('', html)
# Determine line ending type
# Some OCR sourced files have line breaks in the html using a combination of span & p tags
# span are used for hard line breaks, p for new paragraphs. Determine which is used so
# that lines can be un-wrapped across page boundaries
format = self.analyze_line_endings(html)
# Check Line histogram to determine if the document uses hard line breaks, If 50% or
# more of the lines break in the same region of the document then unwrapping is required
docanalysis = DocAnalysis(format, html)
hardbreaks = docanalysis.line_histogram(.50)
self.log("Hard line breaks check returned "+unicode(hardbreaks))
# Calculate Length
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor)
self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
###### Unwrap lines ###### ###### Unwrap lines ######
if getattr(self.extra_opts, 'unwrap_lines', False): if getattr(self.extra_opts, 'unwrap_lines', False):
# Determine line ending type
# Some OCR sourced files have line breaks in the html using a combination of span & p tags
# span are used for hard line breaks, p for new paragraphs. Determine which is used so
# that lines can be un-wrapped across page boundaries
format = self.analyze_line_endings(html)
# Check Line histogram to determine if the document uses hard line breaks, If 50% or
# more of the lines break in the same region of the document then unwrapping is required
docanalysis = DocAnalysis(format, html)
hardbreaks = docanalysis.line_histogram(.50)
self.log("Hard line breaks check returned "+unicode(hardbreaks))
# Calculate Length
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor)
self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
if hardbreaks or unwrap_factor < 0.4: if hardbreaks or unwrap_factor < 0.4:
self.log("Unwrapping required, unwrapping Lines") self.log("Unwrapping required, unwrapping Lines")
@ -447,15 +451,16 @@ class HeuristicProcessor(object):
dehyphenator = Dehyphenator() dehyphenator = Dehyphenator()
html = dehyphenator(html,'html', length) html = dehyphenator(html,'html', length)
html = self.punctuation_unwrap(length, html, 'html') html = self.punctuation_unwrap(length, html, 'html')
#check any remaining hyphens, but only unwrap if there is a match # unwrap remaining hyphens based on line length, but only remove if there is a match
dehyphenator = Dehyphenator() dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
html = dehyphenator(html,'html_cleanup', length) html = dehyphenator(html,'html_cleanup', length)
if getattr(self.extra_opts, 'dehyphenate', False): if getattr(self.extra_opts, 'dehyphenate', False):
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed # dehyphenate in cleanup mode to fix anything previous conversions/editing missed
self.log("Fixing hyphenated content") self.log("Fixing hyphenated content")
dehyphenator = Dehyphenator() dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
html = dehyphenator(html,'html_cleanup', length) html = dehyphenator(html,'html_cleanup', length)
html = dehyphenator(html, 'individual_words', length)
# If still no sections after unwrapping mark split points on lines with no punctuation # If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False): if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):

View File

@ -285,7 +285,6 @@ class RTFInput(InputFormatPlugin):
try: try:
xml = self.generate_xml(stream.name) xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e: except RtfInvalidCodeException, e:
raise
raise ValueError(_('This RTF file has a feature calibre does not ' raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e) 'support. Convert it to HTML first and then try it.\n%s')%e)

View File

@ -226,10 +226,6 @@ class ParseRtf:
try: try:
return_value = process_tokens_obj.process_tokens() return_value = process_tokens_obj.process_tokens()
except InvalidRtfException, msg: except InvalidRtfException, msg:
try:
os.remove(self.__temp_file)
except OSError:
pass
#Check to see if the file is correctly encoded #Check to see if the file is correctly encoded
encode_obj = default_encoding.DefaultEncoding( encode_obj = default_encoding.DefaultEncoding(
in_file = self.__temp_file, in_file = self.__temp_file,
@ -241,14 +237,17 @@ class ParseRtf:
check_encoding_obj = check_encoding.CheckEncoding( check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
) )
enc = encode_obj.get_codepage() enc = 'cp' + encode_obj.get_codepage()
if enc != 'mac_roman': msg = 'Exception in token processing'
enc = 'cp' + enc
if check_encoding_obj.check_encoding(self.__file, enc): if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \ file_name = self.__file if isinstance(self.__file, str) \
else self.__file.encode('utf-8') else self.__file.encode('utf-8')
msg = 'File %s does not appear to be correctly encoded.\n' % file_name msg = 'File %s does not appear to be correctly encoded.\n' % file_name
raise InvalidRtfException, msg try:
os.remove(self.__temp_file)
except OSError:
pass
raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo( delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file, in_file = self.__temp_file,
copy = self.__copy, copy = self.__copy,

View File

@ -74,9 +74,6 @@ class DefaultEncoding:
if not self.__datafetched: if not self.__datafetched:
self._encoding() self._encoding()
self.__datafetched = True self.__datafetched = True
if self.__platform == 'Macintosh':
code_page = self.__code_page
else:
code_page = 'ansicpg' + self.__code_page code_page = 'ansicpg' + self.__code_page
return self.__platform, code_page, self.__default_num return self.__platform, code_page, self.__default_num
@ -94,49 +91,60 @@ class DefaultEncoding:
def _encoding(self): def _encoding(self):
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
cpfound = False
if not self.__fetchraw: if not self.__fetchraw:
for line in read_obj: for line in read_obj:
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'mi<mk<rtfhed-end': if self.__token_info == 'mi<mk<rtfhed-end':
break break
if self.__token_info == 'cw<ri<ansi-codpg':
#cw<ri<ansi-codpg<nu<10000
self.__code_page = line[20:-1] if int(line[20:-1]) \
else '1252'
if self.__token_info == 'cw<ri<macintosh_': if self.__token_info == 'cw<ri<macintosh_':
self.__platform = 'Macintosh' self.__platform = 'Macintosh'
self.__code_page = 'mac_roman'
elif self.__token_info == 'cw<ri<pc________': elif self.__token_info == 'cw<ri<pc________':
self.__platform = 'IBMPC' self.__platform = 'IBMPC'
self.__code_page = '437'
elif self.__token_info == 'cw<ri<pca_______': elif self.__token_info == 'cw<ri<pca_______':
self.__platform = 'OS/2' self.__platform = 'OS/2'
self.__code_page = '850' if self.__token_info == 'cw<ri<ansi-codpg' \
and int(line[20:-1]):
self.__code_page = line[20:-1]
if self.__token_info == 'cw<ri<deflt-font': if self.__token_info == 'cw<ri<deflt-font':
self.__default_num = line[20:-1] self.__default_num = line[20:-1]
cpfound = True
#cw<ri<deflt-font<nu<0 #cw<ri<deflt-font<nu<0
if self.__platform != 'Windows' and \
not cpfound:
if self.__platform == 'Macintosh':
self.__code_page = '10000'
elif self.__platform == 'IBMPC':
self.__code_page = '437'
elif self.__platform == 'OS/2':
self.__code_page = '850'
else: else:
fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+') fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+') fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
for line in read_obj: for line in read_obj:
if fenc.search(line):
enc = fenc.search(line).group(1)
if fenccp.search(line): if fenccp.search(line):
cp = fenccp.search(line).group(1) cp = fenccp.search(line).group(1)
if not int(cp): if not int(cp):
self.__code_page = cp self.__code_page = cp
cpfound = True
break break
if fenc.search(line): if self.__platform != 'Windows' and \
enc = fenc.search(line).group(1) not cpfound:
if enc == 'mac': if enc == 'mac':
self.__code_page = 'mac_roman' self.__code_page = '10000'
elif enc == 'pc': elif enc == 'pc':
self.__code_page = '437' self.__code_page = '437'
elif enc == 'pca': elif enc == 'pca':
self.__code_page = '850' self.__code_page = '850'
# if __name__ == '__main__': if __name__ == '__main__':
# encode_obj = DefaultEncoding( import sys
# in_file = sys.argv[1], encode_obj = DefaultEncoding(
# bug_handler = Exception, in_file = sys.argv[1],
# check_raw = True, bug_handler = Exception,
# ) check_raw = True,
# print encode_obj.get_codepage() )
print encode_obj.get_codepage()

View File

@ -20,7 +20,7 @@ import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class DeleteInfo: class DeleteInfo:
"""Delelet unecessary destination groups""" """Delete unecessary destination groups"""
def __init__(self, def __init__(self,
in_file , in_file ,
bug_handler, bug_handler,
@ -31,17 +31,14 @@ class DeleteInfo:
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__run_level = run_level
self.__initiate_allow()
self.__bracket_count= 0 self.__bracket_count= 0
self.__ob_count = 0 self.__ob_count = 0
self.__cb_count = 0 self.__cb_count = 0
# self.__after_asterisk = False
# self.__delete = 0
self.__initiate_allow()
self.__ob = 0 self.__ob = 0
self.__write_cb = False self.__write_cb = False
self.__run_level = run_level
self.__found_delete = False self.__found_delete = False
# self.__list = False
def __initiate_allow(self): def __initiate_allow(self):
""" """
@ -57,6 +54,8 @@ class DeleteInfo:
'cw<an<annotation', 'cw<an<annotation',
'cw<cm<comment___', 'cw<cm<comment___',
'cw<it<lovr-table', 'cw<it<lovr-table',
# info table
'cw<di<company___',
# 'cw<ls<list______', # 'cw<ls<list______',
) )
self.__not_allowable = ( self.__not_allowable = (
@ -116,7 +115,6 @@ class DeleteInfo:
""" """
# Test for {\*}, in which case don't enter # Test for {\*}, in which case don't enter
# delete state # delete state
# self.__after_asterisk = False # only enter this function once
self.__found_delete = True self.__found_delete = True
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
if self.__delete_count == self.__cb_count: if self.__delete_count == self.__cb_count:
@ -128,7 +126,7 @@ class DeleteInfo:
# not sure what happens here! # not sure what happens here!
# believe I have a '{\*} # believe I have a '{\*}
if self.__run_level > 3: if self.__run_level > 3:
msg = 'flag problem\n' msg = 'Flag problem\n'
raise self.__bug_handler, msg raise self.__bug_handler, msg
return True return True
elif self.__token_info in self.__allowable : elif self.__token_info in self.__allowable :
@ -173,8 +171,8 @@ class DeleteInfo:
Return True for all control words. Return True for all control words.
Return False otherwise. Return False otherwise.
""" """
if self.__delete_count == self.__cb_count and self.__token_info ==\ if self.__delete_count == self.__cb_count and \
'cb<nu<clos-brack': self.__token_info == 'cb<nu<clos-brack':
self.__state = 'default' self.__state = 'default'
if self.__write_cb: if self.__write_cb:
self.__write_cb = False self.__write_cb = False
@ -186,32 +184,24 @@ class DeleteInfo:
return False return False
def delete_info(self): def delete_info(self):
"""Main method for handling other methods. Read one line in at """Main method for handling other methods. Read one line at
a time, and determine whether to print the line based on the state.""" a time, and determine whether to print the line based on the state."""
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as self.__write_obj: with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj: for line in read_obj:
#ob<nu<open-brack<0001 #ob<nu<open-brack<0001
to_print = True
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1] self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1] self.__cb_count = line[-5:-1]
# Get action to perform
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if not action: if not action:
sys.stderr.write(_('No action in dictionary state is "%s" \n') sys.stderr.write('No action in dictionary state is "%s" \n'
% self.__state) % self.__state)
to_print = action(line) # Print if allowed by action
# if self.__after_asterisk: if action(line):
# to_print = self.__asterisk_func(line)
# elif self.__list:
# self.__in_list_func(line)
# elif self.__delete:
# to_print = self.__delete_func(line)
# else:
# to_print = self.__default_func(line)
if to_print:
self.__write_obj.write(line) self.__write_obj.write(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:

View File

@ -15,8 +15,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os, tempfile, re
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Info: class Info:
""" """
Make tags for document-information Make tags for document-information
@ -42,12 +44,14 @@ class Info:
self.__copy = copy self.__copy = copy
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
""" """
self.__text_string = '' self.__text_string = ''
self.__state = 'before_info_table' self.__state = 'before_info_table'
self.rmspace = re.compile(r'\s+')
self.__state_dict = { self.__state_dict = {
'before_info_table': self.__before_info_table_func, 'before_info_table': self.__before_info_table_func,
'after_info_table': self.__after_info_table_func, 'after_info_table': self.__after_info_table_func,
@ -58,27 +62,49 @@ class Info:
self.__info_table_dict = { self.__info_table_dict = {
'cw<di<title_____' : (self.__found_tag_with_text_func, 'title'), 'cw<di<title_____' : (self.__found_tag_with_text_func, 'title'),
'cw<di<author____' : (self.__found_tag_with_text_func, 'author'), 'cw<di<author____' : (self.__found_tag_with_text_func, 'author'),
'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'),
'cw<di<manager___' : (self.__found_tag_with_text_func, 'manager'),
'cw<di<company___' : (self.__found_tag_with_text_func, 'company'),
'cw<di<keywords__' : (self.__found_tag_with_text_func, 'keywords'), 'cw<di<keywords__' : (self.__found_tag_with_text_func, 'keywords'),
'cw<di<category__' : (self.__found_tag_with_text_func, 'category'),
'cw<di<doc-notes_' : (self.__found_tag_with_text_func, 'doc-notes'), 'cw<di<doc-notes_' : (self.__found_tag_with_text_func, 'doc-notes'),
'cw<di<subject___' : (self.__found_tag_with_text_func, 'subject'), 'cw<di<subject___' : (self.__found_tag_with_text_func, 'subject'),
'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'), 'cw<di<linkbase__' : (self.__found_tag_with_text_func, 'hyperlink-base'),
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'), 'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'), 'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
'cw<di<edit-time_' : (self.__single_field_func, 'editing-time'), 'cw<di<edit-time_' : (self.__found_tag_with_tokens_func, 'editing-time'),
'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'),
'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'),
'cw<di<num-of-wor' : (self.__single_field_func, 'number-of-words'), 'cw<di<num-of-wor' : (self.__single_field_func, 'number-of-words'),
'cw<di<num-of-chr' : (self.__single_field_func, 'number-of-characters'), 'cw<di<num-of-chr' : (self.__single_field_func, 'number-of-characters'),
'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'),
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'), 'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
'cw<di<version___' : (self.__single_field_func, 'version'),
'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'),
'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'),
} }
self.__token_dict = { self.__token_dict = {
'year______' : 'year', 'year______' : 'year',
'month_____' : 'month', 'month_____' : 'month',
'day_______' : 'day', 'day_______' : 'day',
'minute____' : 'minute', 'minute____' : 'minute',
'second____' : 'second',
'revis-time' : 'revision-time', 'revis-time' : 'revision-time',
'create-tim' : 'creation-time',
'edit-time_' : 'editing-time',
'print-time' : 'printing-time',
'backuptime' : 'backup-time',
'num-of-wor' : 'number-of-words', 'num-of-wor' : 'number-of-words',
'num-of-chr' : 'number-of-characters', 'num-of-chr' : 'number-of-characters',
'numofchrws' : 'number-of-characters-without-space',
'num-of-pag' : 'number-of-pages', 'num-of-pag' : 'number-of-pages',
'version___' : 'version',
'intern-ver' : 'internal-version-number',
'internalID' : 'internal-id-number',
} }
def __before_info_table_func(self, line): def __before_info_table_func(self, line):
""" """
Required: Required:
@ -92,6 +118,7 @@ class Info:
if self.__token_info == 'mi<mk<doc-in-beg': if self.__token_info == 'mi<mk<doc-in-beg':
self.__state = 'in_info_table' self.__state = 'in_info_table'
self.__write_obj.write(line) self.__write_obj.write(line)
def __in_info_table_func(self, line): def __in_info_table_func(self, line):
""" """
Requires: Requires:
@ -112,6 +139,7 @@ class Info:
action(line, tag) action(line, tag)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __found_tag_with_text_func(self, line, tag): def __found_tag_with_text_func(self, line, tag):
""" """
Requires: Requires:
@ -126,6 +154,7 @@ class Info:
""" """
self.__tag = tag self.__tag = tag
self.__state = 'collect_text' self.__state = 'collect_text'
def __collect_text_func(self, line): def __collect_text_func(self, line):
""" """
Requires: Requires:
@ -139,14 +168,17 @@ class Info:
""" """
if self.__token_info == 'mi<mk<docinf-end': if self.__token_info == 'mi<mk<docinf-end':
self.__state = 'in_info_table' self.__state = 'in_info_table'
self.__write_obj.write( #Don't print empty tags
'mi<tg<open______<%s\n' if len(self.rmspace.sub('',self.__text_string)):
'tx<nu<__________<%s\n' self.__write_obj.write(
'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag) 'mi<tg<open______<%s\n'
) 'tx<nu<__________<%s\n'
'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag)
)
self.__text_string = '' self.__text_string = ''
elif line[0:2] == 'tx': elif line[0:2] == 'tx':
self.__text_string += line[17:-1] self.__text_string += line[17:-1]
def __found_tag_with_tokens_func(self, line, tag): def __found_tag_with_tokens_func(self, line, tag):
""" """
Requires: Requires:
@ -163,6 +195,7 @@ class Info:
self.__state = 'collect_tokens' self.__state = 'collect_tokens'
self.__text_string = 'mi<tg<empty-att_<%s' % tag self.__text_string = 'mi<tg<empty-att_<%s' % tag
#mi<tg<empty-att_<page-definition<margin>33\n #mi<tg<empty-att_<page-definition<margin>33\n
def __collect_tokens_func(self, line): def __collect_tokens_func(self, line):
""" """
Requires: Requires:
@ -194,18 +227,19 @@ class Info:
att = line[6:16] att = line[6:16]
value = line[20:-1] value = line[20:-1]
att_changed = self.__token_dict.get(att) att_changed = self.__token_dict.get(att)
if att_changed == None: if att_changed is None:
if self.__run_level > 3: if self.__run_level > 3:
msg = 'no dictionary match for %s\n' % att msg = 'No dictionary match for %s\n' % att
raise self.__bug_handler, msg raise self.__bug_handler, msg
else: else:
self.__text_string += '<%s>%s' % (att_changed, value) self.__text_string += '<%s>%s' % (att_changed, value)
def __single_field_func(self, line, tag): def __single_field_func(self, line, tag):
value = line[20:-1] value = line[20:-1]
self.__write_obj.write( self.__write_obj.write(
'mi<tg<empty-att_<%s' 'mi<tg<empty-att_<%s<%s>%s\n' % (tag, tag, value)
'<%s>%s\n' % (tag, tag, value)
) )
def __after_info_table_func(self, line): def __after_info_table_func(self, line):
""" """
Requires: Requires:
@ -217,6 +251,7 @@ class Info:
the file. the file.
""" """
self.__write_obj.write(line) self.__write_obj.write(line)
def fix_info(self): def fix_info(self):
""" """
Requires: Requires:
@ -234,20 +269,15 @@ class Info:
information table, simply write the line to the output file. information table, simply write the line to the output file.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'wb') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read: self.__token_info = line[:16]
line_to_read = read_obj.readline() action = self.__state_dict.get(self.__state)
line = line_to_read if action is None:
self.__token_info = line[:16] sys.stderr.write('No matching state in module styles.py\n')
action = self.__state_dict.get(self.__state) sys.stderr.write(self.__state + '\n')
if action == None: action(line)
sys.stderr.write('no no matching state in module styles.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "info.data") copy_obj.copy_file(self.__write_to, "info.data")

View File

@ -70,7 +70,7 @@ class ProcessTokens:
';' : ('mc', ';', self.ms_sub_func), ';' : ('mc', ';', self.ms_sub_func),
# this must be wrong # this must be wrong
'-' : ('mc', '-', self.ms_sub_func), '-' : ('mc', '-', self.ms_sub_func),
'line' : ('mi', 'hardline-break', self.hardline_func), #calibre 'line' : ('mi', 'hardline-break', self.direct_conv_func), #calibre
# misc => ml # misc => ml
'*' : ('ml', 'asterisk__', self.default_func), '*' : ('ml', 'asterisk__', self.default_func),
':' : ('ml', 'colon_____', self.default_func), ':' : ('ml', 'colon_____', self.default_func),
@ -78,7 +78,6 @@ class ProcessTokens:
'backslash' : ('nu', '\\', self.text_func), 'backslash' : ('nu', '\\', self.text_func),
'ob' : ('nu', '{', self.text_func), 'ob' : ('nu', '{', self.text_func),
'cb' : ('nu', '}', self.text_func), 'cb' : ('nu', '}', self.text_func),
#'line' : ('nu', ' ', self.text_func), calibre
# paragraph formatting => pf # paragraph formatting => pf
'page' : ('pf', 'page-break', self.default_func), 'page' : ('pf', 'page-break', self.default_func),
'par' : ('pf', 'par-end___', self.default_func), 'par' : ('pf', 'par-end___', self.default_func),
@ -231,11 +230,15 @@ class ProcessTokens:
'trhdr' : ('tb', 'row-header', self.default_func), 'trhdr' : ('tb', 'row-header', self.default_func),
# preamble => pr # preamble => pr
# document information => di # document information => di
# TODO integrate \userprops
'info' : ('di', 'doc-info__', self.default_func), 'info' : ('di', 'doc-info__', self.default_func),
'title' : ('di', 'title_____', self.default_func),
'author' : ('di', 'author____', self.default_func), 'author' : ('di', 'author____', self.default_func),
'operator' : ('di', 'operator__', self.default_func), 'operator' : ('di', 'operator__', self.default_func),
'title' : ('di', 'title_____', self.default_func), 'manager' : ('di', 'manager___', self.default_func),
'company' : ('di', 'company___', self.default_func),
'keywords' : ('di', 'keywords__', self.default_func), 'keywords' : ('di', 'keywords__', self.default_func),
'category' : ('di', 'category__', self.default_func),
'doccomm' : ('di', 'doc-notes_', self.default_func), 'doccomm' : ('di', 'doc-notes_', self.default_func),
'comment' : ('di', 'doc-notes_', self.default_func), 'comment' : ('di', 'doc-notes_', self.default_func),
'subject' : ('di', 'subject___', self.default_func), 'subject' : ('di', 'subject___', self.default_func),
@ -244,11 +247,19 @@ class ProcessTokens:
'mo' : ('di', 'month_____', self.default_func), 'mo' : ('di', 'month_____', self.default_func),
'dy' : ('di', 'day_______', self.default_func), 'dy' : ('di', 'day_______', self.default_func),
'min' : ('di', 'minute____', self.default_func), 'min' : ('di', 'minute____', self.default_func),
'sec' : ('di', 'second____', self.default_func),
'revtim' : ('di', 'revis-time', self.default_func), 'revtim' : ('di', 'revis-time', self.default_func),
'edmins' : ('di', 'edit-time_', self.default_func),
'printim' : ('di', 'print-time', self.default_func),
'buptim' : ('di', 'backuptime', self.default_func),
'nofwords' : ('di', 'num-of-wor', self.default_func), 'nofwords' : ('di', 'num-of-wor', self.default_func),
'nofchars' : ('di', 'num-of-chr', self.default_func), 'nofchars' : ('di', 'num-of-chr', self.default_func),
'nofcharsws' : ('di', 'numofchrws', self.default_func),
'nofpages' : ('di', 'num-of-pag', self.default_func), 'nofpages' : ('di', 'num-of-pag', self.default_func),
'edmins' : ('di', 'edit-time_', self.default_func), 'version' : ('di', 'version___', self.default_func),
'vern' : ('di', 'intern-ver', self.default_func),
'hlinkbase' : ('di', 'linkbase__', self.default_func),
'id' : ('di', 'internalID', self.default_func),
# headers and footers => hf # headers and footers => hf
'headerf' : ('hf', 'head-first', self.default_func), 'headerf' : ('hf', 'head-first', self.default_func),
'headerl' : ('hf', 'head-left_', self.default_func), 'headerl' : ('hf', 'head-left_', self.default_func),
@ -605,7 +616,7 @@ class ProcessTokens:
def ms_sub_func(self, pre, token, num): def ms_sub_func(self, pre, token, num):
return 'tx<mc<__________<%s\n' % token return 'tx<mc<__________<%s\n' % token
def hardline_func(self, pre, token, num): def direct_conv_func(self, pre, token, num):
return 'mi<tg<empty_____<%s\n' % token return 'mi<tg<empty_____<%s\n' % token
def default_func(self, pre, token, num): def default_func(self, pre, token, num):

View File

@ -27,11 +27,13 @@ class Tokenize:
bug_handler, bug_handler,
copy = None, copy = None,
run_level = 1, run_level = 1,
): # out_file = None,
):
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
# self.__out_file = out_file
self.__compile_expressions() self.__compile_expressions()
#variables #variables
self.__uc_char = 0 self.__uc_char = 0
@ -113,6 +115,8 @@ class Tokenize:
def __sub_reg_split(self,input_file): def __sub_reg_split(self,input_file):
input_file = self.__replace_spchar.mreplace(input_file) input_file = self.__replace_spchar.mreplace(input_file)
# this is for older RTF
input_file = self.__par_exp.sub('\n\\par \n', input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file) input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file) input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data #remove \n in bin data
@ -127,7 +131,7 @@ class Tokenize:
# this is for older RTF # this is for older RTF
#line = re.sub(self.__par_exp, '\\par ', line) #line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \ #return filter(lambda x: len(x) > 0, \
#(self.__remove_line.sub('', x) for x in tokens)) #(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self): def __compile_expressions(self):
SIMPLE_RPL = { SIMPLE_RPL = {
@ -153,8 +157,6 @@ class Tokenize:
# put a backslash in front of to eliminate special cases and # put a backslash in front of to eliminate special cases and
# make processing easier # make processing easier
"}": "\\}", "}": "\\}",
# this is for older RTF
r'\\$': '\\par ',
} }
self.__replace_spchar = MReplace(SIMPLE_RPL) self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u #add ;? in case of char following \u
@ -168,10 +170,12 @@ class Tokenize:
#why keep backslash whereas \is replaced before? #why keep backslash whereas \is replaced before?
#remove \n from endline char #remove \n from endline char
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)") self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF
self.__par_exp = re.compile(r'\\\n+')
# self.__par_exp = re.compile(r'\\$')
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}") #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})") #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)") #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#self.__par_exp = re.compile(r'\\$')
#self.__remove_line = re.compile(r'\n+') #self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)") #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)") ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
@ -199,7 +203,24 @@ class Tokenize:
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data") copy_obj.copy_file(self.__write_to, "tokenize.data")
# if self.__out_file:
# self.__file = self.__out_file
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
#self.__special_tokens = [ '_', '~', "'", '{', '}' ] #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
# import sys
# def main(args=sys.argv):
# if len(args) < 1:
# print 'No file'
# return
# file = 'data_tokens.txt'
# if len(args) == 3:
# file = args[2]
# to = Tokenize(args[1], Exception, out_file = file)
# to.tokenize()
# if __name__ == '__main__':
# sys.exit(main())

View File

@ -106,7 +106,7 @@ class TXTInput(InputFormatPlugin):
log.debug('Auto detected paragraph type as %s' % options.paragraph_type) log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
# Dehyphenate # Dehyphenate
dehyphenator = Dehyphenator() dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
txt = dehyphenator(txt,'txt', length) txt = dehyphenator(txt,'txt', length)
# We don't check for block because the processor assumes block. # We don't check for block because the processor assumes block.
@ -137,11 +137,6 @@ class TXTInput(InputFormatPlugin):
setattr(options, 'format_scene_breaks', True) setattr(options, 'format_scene_breaks', True)
setattr(options, 'dehyphenate', True) setattr(options, 'dehyphenate', True)
# Dehyphenate in cleanup mode for missed txt and markdown conversion
dehyphenator = Dehyphenator()
html = dehyphenator(html,'txt_cleanup', length)
html = dehyphenator(html,'html_cleanup', length)
from calibre.customize.ui import plugin_for_input_format from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html') html_input = plugin_for_input_format('html')
for opt in html_input.options: for opt in html_input.options:

View File

@ -505,7 +505,7 @@ class FileDialog(QObject):
self.selected_files = [] self.selected_files = []
if mode == QFileDialog.AnyFile: if mode == QFileDialog.AnyFile:
f = unicode(QFileDialog.getSaveFileName(parent, title, initial_dir, ftext, "")) f = unicode(QFileDialog.getSaveFileName(parent, title, initial_dir, ftext, ""))
if f and os.path.exists(f): if f:
self.selected_files.append(f) self.selected_files.append(f)
elif mode == QFileDialog.ExistingFile: elif mode == QFileDialog.ExistingFile:
f = unicode(QFileDialog.getOpenFileName(parent, title, initial_dir, ftext, "")) f = unicode(QFileDialog.getOpenFileName(parent, title, initial_dir, ftext, ""))

View File

@ -28,7 +28,7 @@ class GenerateCatalogAction(InterfaceAction):
if not ids: if not ids:
return error_dialog(self.gui, _('No books selected'), return error_dialog(self.gui, _('No books selected'),
_('No books selected to generate catalog for'), _('No books selected for catalog generation'),
show=True) show=True)
db = self.gui.library_view.model().db db = self.gui.library_view.model().db
@ -55,9 +55,9 @@ class GenerateCatalogAction(InterfaceAction):
def catalog_generated(self, job): def catalog_generated(self, job):
if job.result: if job.result:
# Search terms nulled catalog results # Error during catalog generation
return error_dialog(self.gui, _('No books found'), return error_dialog(self.gui, _('Catalog generation terminated'),
_("No books to catalog\nCheck job details"), job.result,
show=True) show=True)
if job.failed: if job.failed:
return self.gui.job_exception(job) return self.gui.job_exception(job)

View File

@ -94,7 +94,7 @@ class EditMetadataAction(InterfaceAction):
get_social_metadata = config['get_social_metadata'] get_social_metadata = config['get_social_metadata']
else: else:
get_social_metadata = set_social_metadata get_social_metadata = set_social_metadata
from calibre.gui2.metadata import DoDownload from calibre.gui2.metadata.bulk_download import DoDownload
if set_social_metadata is not None and set_social_metadata: if set_social_metadata is not None and set_social_metadata:
x = _('social metadata') x = _('social metadata')
else: else:

View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -730,7 +730,7 @@ class TagsModel(QAbstractItemModel): # {{{
else: else:
collapse_model = 'partition' collapse_model = 'partition'
collapse_template = tweaks['categories_collapsed_popularity_template'] collapse_template = tweaks['categories_collapsed_popularity_template']
collapse_letter = None collapse_letter = collapse_letter_sk = None
for i, r in enumerate(self.row_map): for i, r in enumerate(self.row_map):
if self.hidden_categories and self.categories[i] in self.hidden_categories: if self.hidden_categories and self.categories[i] in self.hidden_categories:
@ -782,8 +782,17 @@ class TagsModel(QAbstractItemModel): # {{{
ts = tag.sort ts = tag.sort
if not ts: if not ts:
ts = ' ' ts = ' '
if upper(ts[0]) != collapse_letter: try:
sk = sort_key(ts)[0]
except:
sk = ts[0]
if sk != collapse_letter_sk:
collapse_letter = upper(ts[0]) collapse_letter = upper(ts[0])
try:
collapse_letter_sk = sort_key(collapse_letter)[0]
except:
collapse_letter_sk = collapse_letter
sub_cat = TagTreeItem(parent=category, sub_cat = TagTreeItem(parent=category,
data = collapse_letter, data = collapse_letter,
category_icon = category_node.icon, category_icon = category_node.icon,

View File

@ -386,11 +386,13 @@ class LineEditECM(object):
action_lower_case = case_menu.addAction(_('Lower Case')) action_lower_case = case_menu.addAction(_('Lower Case'))
action_swap_case = case_menu.addAction(_('Swap Case')) action_swap_case = case_menu.addAction(_('Swap Case'))
action_title_case = case_menu.addAction(_('Title Case')) action_title_case = case_menu.addAction(_('Title Case'))
action_capitalize = case_menu.addAction(_('Capitalize'))
self.connect(action_upper_case, SIGNAL('triggered()'), self.upper_case) self.connect(action_upper_case, SIGNAL('triggered()'), self.upper_case)
self.connect(action_lower_case, SIGNAL('triggered()'), self.lower_case) self.connect(action_lower_case, SIGNAL('triggered()'), self.lower_case)
self.connect(action_swap_case, SIGNAL('triggered()'), self.swap_case) self.connect(action_swap_case, SIGNAL('triggered()'), self.swap_case)
self.connect(action_title_case, SIGNAL('triggered()'), self.title_case) self.connect(action_title_case, SIGNAL('triggered()'), self.title_case)
self.connect(action_capitalize, SIGNAL('triggered()'), self.capitalize)
menu.addMenu(case_menu) menu.addMenu(case_menu)
menu.exec_(event.globalPos()) menu.exec_(event.globalPos())
@ -408,6 +410,10 @@ class LineEditECM(object):
from calibre.utils.titlecase import titlecase from calibre.utils.titlecase import titlecase
self.setText(titlecase(unicode(self.text()))) self.setText(titlecase(unicode(self.text())))
def capitalize(self):
from calibre.utils.icu import capitalize
self.setText(capitalize(unicode(self.text())))
class EnLineEdit(LineEditECM, QLineEdit): class EnLineEdit(LineEditECM, QLineEdit):

View File

@ -1144,7 +1144,9 @@ class EPUB_MOBI(CatalogPlugin):
def error(self): def error(self):
def fget(self): def fget(self):
return self.__error return self.__error
return property(fget=fget) def fset(self, val):
self.__error = val
return property(fget=fget,fset=fset)
@dynamic_property @dynamic_property
def generateForKindle(self): def generateForKindle(self):
def fget(self): def fget(self):
@ -1411,6 +1413,88 @@ class EPUB_MOBI(CatalogPlugin):
except: except:
pass pass
def fetchBooksByAuthor(self):
'''
Generate a list of titles sorted by author from the database
return = Success
'''
self.updateProgressFullStep("Sorting database")
'''
# Sort titles case-insensitive, by author
self.booksByAuthor = sorted(self.booksByTitle,
key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
'''
self.booksByAuthor = list(self.booksByTitle)
self.booksByAuthor.sort(self.author_compare)
if False and self.verbose:
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
for title in self.booksByAuthor:
self.opts.log.info((u" %-30s %-20s%5s " % \
(title['title'][:30],
title['series'][:20] if title['series'] else '',
title['series_index'],
)).encode('utf-8'))
raise SystemExit
# Build the unique_authors set from existing data
authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
# authors[]: (([0]:friendly [1]:sort))
# unique_authors[]: (([0]:friendly [1]:sort [2]:book_count))
books_by_current_author = 0
current_author = authors[0]
multiple_authors = False
unique_authors = []
for (i,author) in enumerate(authors):
if author != current_author:
# Note that current_author and author are tuples: (friendly, sort)
multiple_authors = True
if author != current_author and i:
# Warn, exit if friendly matches previous, but sort doesn't
if author[0] == current_author[0]:
error_msg = _('''
\n*** Metadata error ***
Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
then rebuild the catalog.\n''').format(author[0])
self.opts.log.warn(error_msg)
self.error = error_msg
return False
# New author, save the previous author/sort/count
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
current_author = author
books_by_current_author = 1
elif i==0 and len(authors) == 1:
# Allow for single-book lists
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
else:
books_by_current_author += 1
else:
# Add final author to list or single-author dataset
if (current_author == author and len(authors) > 1) or not multiple_authors:
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
if False and self.verbose:
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
for author in unique_authors:
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
author[2])).encode('utf-8'))
self.authors = unique_authors
return True
def fetchBooksByTitle(self): def fetchBooksByTitle(self):
self.updateProgressFullStep("Fetching database") self.updateProgressFullStep("Fetching database")
@ -1562,90 +1646,9 @@ class EPUB_MOBI(CatalogPlugin):
title['title_sort'][0:40])).decode('mac-roman')) title['title_sort'][0:40])).decode('mac-roman'))
return True return True
else: else:
self.error = _("No books found to catalog.\nCheck 'Excluded books' criteria in E-book options.")
return False return False
def fetchBooksByAuthor(self):
'''
Generate a list of titles sorted by author from the database
return = Success
'''
self.updateProgressFullStep("Sorting database")
'''
# Sort titles case-insensitive, by author
self.booksByAuthor = sorted(self.booksByTitle,
key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
'''
self.booksByAuthor = list(self.booksByTitle)
self.booksByAuthor.sort(self.author_compare)
if False and self.verbose:
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
for title in self.booksByAuthor:
self.opts.log.info((u" %-30s %-20s%5s " % \
(title['title'][:30],
title['series'][:20] if title['series'] else '',
title['series_index'],
)).encode('utf-8'))
raise SystemExit
# Build the unique_authors set from existing data
authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
# authors[]: (([0]:friendly [1]:sort))
# unique_authors[]: (([0]:friendly [1]:sort [2]:book_count))
books_by_current_author = 0
current_author = authors[0]
multiple_authors = False
unique_authors = []
for (i,author) in enumerate(authors):
if author != current_author:
# Note that current_author and author are tuples: (friendly, sort)
multiple_authors = True
if author != current_author and i:
# Warn, exit if friendly matches previous, but sort doesn't
if author[0] == current_author[0]:
error_msg = _('''
\n*** Metadata error ***
Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
then rebuild the catalog.
*** Terminating catalog generation ***\n''').format(author[0])
self.opts.log.warn(error_msg)
return False
# New author, save the previous author/sort/count
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
current_author = author
books_by_current_author = 1
elif i==0 and len(authors) == 1:
# Allow for single-book lists
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
else:
books_by_current_author += 1
else:
# Add final author to list or single-author dataset
if (current_author == author and len(authors) > 1) or not multiple_authors:
unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author))
if False and self.verbose:
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
for author in unique_authors:
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
author[2])).encode('utf-8'))
self.authors = unique_authors
return True
def fetchBookmarks(self): def fetchBookmarks(self):
''' '''
Collect bookmarks for catalog entries Collect bookmarks for catalog entries
@ -5069,6 +5072,8 @@ then rebuild the catalog.
abort_after_input_dump=False) abort_after_input_dump=False)
plumber.merge_ui_recommendations(recommendations) plumber.merge_ui_recommendations(recommendations)
plumber.run() plumber.run()
return 0 # returns to gui2.actions.catalog:catalog_generated()
return None
else: else:
return 1 # returns to gui2.actions.catalog:catalog_generated()
return catalog.error

View File

@ -693,8 +693,12 @@ def command_catalog(args, dbpath):
} }
with plugin: with plugin:
plugin.run(args[1], opts, get_db(dbpath, opts)) ret = plugin.run(args[1], opts, get_db(dbpath, opts))
return 0 if ret is None:
ret = 0
else:
ret = 1
return ret
# end of GR additions # end of GR additions

View File

@ -690,11 +690,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
mi = Metadata(None) mi = Metadata(None)
aut_list = row[fm['au_map']] aut_list = row[fm['au_map']]
aut_list = [p.split(':::') for p in aut_list.split(':#:')] if aut_list:
aut_list = [p.split(':::') for p in aut_list.split(':#:') if p]
else:
aut_list = []
aum = [] aum = []
aus = {} aus = {}
for (author, author_sort) in aut_list: for (author, author_sort) in aut_list:
aum.append(author) aum.append(author.replace('|', ','))
aus[author] = author_sort.replace('|', ',') aus[author] = author_sort.replace('|', ',')
mi.title = row[fm['title']] mi.title = row[fm['title']]
mi.authors = aum mi.authors = aum

View File

@ -437,6 +437,15 @@ My antivirus program claims |app| is a virus/trojan?
Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it. Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it.
How do I backup |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~
The most important thing to backup is the |app| library folder, that contains all your books and metadata. This is the folder you chose for your |app| library when you ran |app| for the first time. You can get the path to the library folder by clicking the |app| icon on the main toolbar. You must backup this complete folder with all its files and sub-folders.
You can switch |app| to using a backed up library folder by simply clicking the |app| icon on the toolbar and choosing your backup library folder.
If you want to backup the |app| configuration/plugins, you have to backup the config directory. You can find this config directory via :guilabel:`Preferences->Miscellaneous`. Note that restoring configuration directories is not officially supported, but should work in most cases. Just copy the contents of the backup directory into the current configuration directory to restore.
How do I use purchased EPUB books with |app|? How do I use purchased EPUB books with |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|. Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|.

View File

@ -77,7 +77,7 @@ class FormatterFunction(object):
exc_traceback)[-2:]).replace('\n', '') exc_traceback)[-2:]).replace('\n', '')
return _('Exception ' + info) return _('Exception ' + info)
all_builtin_functions = []
class BuiltinFormatterFunction(FormatterFunction): class BuiltinFormatterFunction(FormatterFunction):
def __init__(self): def __init__(self):
formatter_functions.register_builtin(self) formatter_functions.register_builtin(self)
@ -88,6 +88,7 @@ class BuiltinFormatterFunction(FormatterFunction):
except: except:
lines = [] lines = []
self.program_text = ''.join(lines) self.program_text = ''.join(lines)
all_builtin_functions.append(self)
class BuiltinStrcmp(BuiltinFormatterFunction): class BuiltinStrcmp(BuiltinFormatterFunction):
name = 'strcmp' name = 'strcmp'

View File

@ -80,7 +80,7 @@ def icu_case_sensitive_strcmp(collator, a, b):
def icu_capitalize(s): def icu_capitalize(s):
s = lower(s) s = lower(s)
return s.replace(s[0], upper(s[0]), 1) return s.replace(s[0], upper(s[0]), 1) if s else s
load_icu() load_icu()
load_collator() load_collator()