mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to ldolse heuristics branch.
This commit is contained in:
commit
51e7a555e1
32
resources/recipes/mail_and_guardian.recipe
Normal file
32
resources/recipes/mail_and_guardian.recipe
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1295081935(BasicNewsRecipe):
|
||||||
|
title = u'Mail & Guardian ZA News'
|
||||||
|
__author__ = '77ja65'
|
||||||
|
language = 'en'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
no_stylesheets = True
|
||||||
|
masthead_url = 'http://c1608832.cdn.cloudfiles.rackspacecloud.com/mg_logo.gif'
|
||||||
|
remove_tags_after = [dict(id='content')]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'National News', u'http://www.mg.co.za/rss/national'),
|
||||||
|
(u'Top Stories', u'http://www.mg.co.za/rss'),
|
||||||
|
(u'Africa News', u'http://www.mg.co.za/rss/africa'),
|
||||||
|
(u'Sport', u'http://www.mg.co.za/rss/sport'),
|
||||||
|
(u'Business', u'http://www.mg.co.za/rss/business'),
|
||||||
|
(u'And In Other News', u'http://www.mg.co.za/rss/and-in-other-news'),
|
||||||
|
(u'World News', u'http://www.mg.co.za/rss/world')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://www.mg.co.za/article/',
|
||||||
|
'http://www.mg.co.za/printformat/single/')
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-
|
||||||
|
weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-
|
||||||
|
weight:normal;font-size:small;}
|
||||||
|
'''
|
@ -1,5 +1,5 @@
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
#from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
|
||||||
class SportsIllustratedRecipe(BasicNewsRecipe) :
|
class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||||
@ -91,7 +91,7 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
|||||||
# expire : no idea what value to use
|
# expire : no idea what value to use
|
||||||
# All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
|
# All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
'''def preprocess_html(self, soup):
|
||||||
header = soup.find('div', attrs = {'class' : 'siv_artheader'})
|
header = soup.find('div', attrs = {'class' : 'siv_artheader'})
|
||||||
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
|
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
|
||||||
body = homeMadeSoup.body
|
body = homeMadeSoup.body
|
||||||
@ -115,4 +115,5 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
|||||||
body.append(para)
|
body.append(para)
|
||||||
|
|
||||||
return homeMadeSoup
|
return homeMadeSoup
|
||||||
|
'''
|
||||||
|
|
||||||
|
28
resources/template-functions.json
Normal file
28
resources/template-functions.json
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
{
|
||||||
|
"contains": "def evaluate(self, formatter, kwargs, mi, locals,\n val, test, value_if_present, value_if_not):\n if re.search(test, val):\n return value_if_present\n else:\n return value_if_not\n",
|
||||||
|
"divide": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x / y)\n",
|
||||||
|
"uppercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.upper()\n",
|
||||||
|
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
|
||||||
|
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
|
||||||
|
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
|
||||||
|
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
|
||||||
|
"capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
|
||||||
|
"list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n",
|
||||||
|
"shorten": "def evaluate(self, formatter, kwargs, mi, locals,\n val, leading, center_string, trailing):\n l = max(0, int(leading))\n t = max(0, int(trailing))\n if len(val) > l + len(center_string) + t:\n return val[0:l] + center_string + ('' if t == 0 else val[-t:])\n else:\n return val\n",
|
||||||
|
"re": "def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):\n return re.sub(pattern, replacement, val)\n",
|
||||||
|
"add": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x + y)\n",
|
||||||
|
"lookup": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if len(args) == 2: # here for backwards compatibility\n if val:\n return formatter.vformat('{'+args[0].strip()+'}', [], kwargs)\n else:\n return formatter.vformat('{'+args[1].strip()+'}', [], kwargs)\n if (len(args) % 2) != 1:\n raise ValueError(_('lookup requires either 2 or an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return formatter.vformat('{' + args[i].strip() + '}', [], kwargs)\n if re.search(args[i], val):\n return formatter.vformat('{'+args[i+1].strip() + '}', [], kwargs)\n i += 2\n",
|
||||||
|
"template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.safe_format(template, kwargs, 'TEMPLATE', mi)\n",
|
||||||
|
"print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n",
|
||||||
|
"titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n",
|
||||||
|
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
|
||||||
|
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
|
||||||
|
"multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",
|
||||||
|
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
|
||||||
|
"count": "def evaluate(self, formatter, kwargs, mi, locals, val, sep):\n return unicode(len(val.split(sep)))\n",
|
||||||
|
"lowercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.lower()\n",
|
||||||
|
"assign": "def evaluate(self, formatter, kwargs, mi, locals, target, value):\n locals[target] = value\n return value\n",
|
||||||
|
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val):\n return args[i+1]\n i += 2\n",
|
||||||
|
"strcmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n v = strcmp(x, y)\n if v < 0:\n return lt\n if v == 0:\n return eq\n return gt\n",
|
||||||
|
"cmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n x = float(x if x else 0)\n y = float(y if y else 0)\n if x < y:\n return lt\n if x == y:\n return eq\n return gt\n"
|
||||||
|
}
|
@ -84,6 +84,23 @@ class Resources(Command):
|
|||||||
|
|
||||||
cPickle.dump(complete, open(dest, 'wb'), -1)
|
cPickle.dump(complete, open(dest, 'wb'), -1)
|
||||||
|
|
||||||
|
self.info('\tCreating template-functions.json')
|
||||||
|
dest = self.j(self.RESOURCES, 'template-functions.json')
|
||||||
|
function_dict = {}
|
||||||
|
import inspect
|
||||||
|
from calibre.utils.formatter_functions import all_builtin_functions
|
||||||
|
for obj in all_builtin_functions:
|
||||||
|
eval_func = inspect.getmembers(obj,
|
||||||
|
lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
|
||||||
|
try:
|
||||||
|
lines = [l[4:] for l in inspect.getsourcelines(eval_func[0][1])[0]]
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
lines = ''.join(lines)
|
||||||
|
function_dict[obj.name] = lines
|
||||||
|
import json
|
||||||
|
json.dump(function_dict, open(dest, 'wb'), indent=4)
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
|
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
|
||||||
x = self.j(self.RESOURCES, x+'.pickle')
|
x = self.j(self.RESOURCES, x+'.pickle')
|
||||||
|
@ -33,6 +33,6 @@ class SNE(USBMS):
|
|||||||
STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card'
|
STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card'
|
||||||
|
|
||||||
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books'
|
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books'
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = False
|
||||||
|
|
||||||
|
|
||||||
|
@ -174,13 +174,19 @@ class Dehyphenator(object):
|
|||||||
retain hyphens.
|
retain hyphens.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, verbose=0, log=None):
|
||||||
|
self.log = default_log if log is None else log
|
||||||
|
self.verbose = verbose
|
||||||
# Add common suffixes to the regex below to increase the likelihood of a match -
|
# Add common suffixes to the regex below to increase the likelihood of a match -
|
||||||
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
||||||
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$", re.IGNORECASE)
|
# only remove if it's not already the point of hyphenation
|
||||||
|
self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"
|
||||||
|
self.suffixes = re.compile(r"^%s" % self.suffix_string, re.IGNORECASE)
|
||||||
|
self.removesuffixes = re.compile(r"%s" % self.suffix_string, re.IGNORECASE)
|
||||||
# remove prefixes if the prefix was not already the point of hyphenation
|
# remove prefixes if the prefix was not already the point of hyphenation
|
||||||
self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
|
self.prefix_string = '^(dis|re|un|in|ex)'
|
||||||
self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
|
self.prefixes = re.compile(r'%s$' % self.prefix_string, re.IGNORECASE)
|
||||||
|
self.removeprefix = re.compile(r'%s' % self.prefix_string, re.IGNORECASE)
|
||||||
|
|
||||||
def dehyphenate(self, match):
|
def dehyphenate(self, match):
|
||||||
firsthalf = match.group('firstpart')
|
firsthalf = match.group('firstpart')
|
||||||
@ -191,31 +197,44 @@ class Dehyphenator(object):
|
|||||||
wraptags = ''
|
wraptags = ''
|
||||||
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
|
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
|
||||||
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
|
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
|
||||||
lookupword = self.removesuffixes.sub('', dehyphenated)
|
if self.suffixes.match(secondhalf) is None:
|
||||||
if self.prefixes.match(firsthalf) is None:
|
lookupword = self.removesuffixes.sub('', dehyphenated)
|
||||||
|
else:
|
||||||
|
lookupword = dehyphenated
|
||||||
|
if len(firsthalf) > 3 and self.prefixes.match(firsthalf) is None:
|
||||||
lookupword = self.removeprefix.sub('', lookupword)
|
lookupword = self.removeprefix.sub('', lookupword)
|
||||||
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
|
if self.verbose > 2:
|
||||||
|
self.log("lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated))
|
||||||
try:
|
try:
|
||||||
searchresult = self.html.find(lookupword.lower())
|
searchresult = self.html.find(lookupword.lower())
|
||||||
except:
|
except:
|
||||||
return hyphenated
|
return hyphenated
|
||||||
if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
|
if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
|
||||||
if self.html.find(lookupword) != -1 or searchresult != -1:
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
#print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
|
if self.verbose > 2:
|
||||||
|
self.log(" Cleanup:returned dehyphenated word: " + str(dehyphenated))
|
||||||
return dehyphenated
|
return dehyphenated
|
||||||
elif self.html.find(hyphenated) != -1:
|
elif self.html.find(hyphenated) != -1:
|
||||||
#print "Cleanup:returned hyphenated word: " + str(hyphenated)
|
if self.verbose > 2:
|
||||||
|
self.log(" Cleanup:returned hyphenated word: " + str(hyphenated))
|
||||||
return hyphenated
|
return hyphenated
|
||||||
else:
|
else:
|
||||||
#print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)
|
if self.verbose > 2:
|
||||||
|
self.log(" Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf))
|
||||||
return firsthalf+u'\u2014'+wraptags+secondhalf
|
return firsthalf+u'\u2014'+wraptags+secondhalf
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
if len(firsthalf) <= 2 and len(secondhalf) <= 2:
|
||||||
|
if self.verbose > 2:
|
||||||
|
self.log("too short, returned hyphenated word: " + str(hyphenated))
|
||||||
|
return hyphenated
|
||||||
if self.html.find(lookupword) != -1 or searchresult != -1:
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
#print "returned dehyphenated word: " + str(dehyphenated)
|
if self.verbose > 2:
|
||||||
|
self.log(" returned dehyphenated word: " + str(dehyphenated))
|
||||||
return dehyphenated
|
return dehyphenated
|
||||||
else:
|
else:
|
||||||
#print " returned hyphenated word: " + str(hyphenated)
|
if self.verbose > 2:
|
||||||
|
self.log(" returned hyphenated word: " + str(hyphenated))
|
||||||
return hyphenated
|
return hyphenated
|
||||||
|
|
||||||
def __call__(self, html, format, length=1):
|
def __call__(self, html, format, length=1):
|
||||||
@ -228,7 +247,7 @@ class Dehyphenator(object):
|
|||||||
elif format == 'txt':
|
elif format == 'txt':
|
||||||
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
|
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
|
||||||
elif format == 'individual_words':
|
elif format == 'individual_words':
|
||||||
intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)\u0020*(?P<secondpart>\w+)\b[^<]*<') # for later, not called anywhere yet
|
intextmatch = re.compile(u'(?!<)(?P<firstpart>\w+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)')
|
||||||
elif format == 'html_cleanup':
|
elif format == 'html_cleanup':
|
||||||
intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
|
intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
|
||||||
elif format == 'txt_cleanup':
|
elif format == 'txt_cleanup':
|
||||||
@ -512,7 +531,7 @@ class HTMLPreProcessor(object):
|
|||||||
|
|
||||||
if is_pdftohtml and length > -1:
|
if is_pdftohtml and length > -1:
|
||||||
# Dehyphenate
|
# Dehyphenate
|
||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
|
||||||
html = dehyphenator(html,'html', length)
|
html = dehyphenator(html,'html', length)
|
||||||
|
|
||||||
if is_pdftohtml:
|
if is_pdftohtml:
|
||||||
|
@ -322,11 +322,11 @@ class HeuristicProcessor(object):
|
|||||||
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
||||||
# Delete microsoft 'smart' tags
|
# Delete microsoft 'smart' tags
|
||||||
html = re.sub('(?i)</?st1:\w+>', '', html)
|
html = re.sub('(?i)</?st1:\w+>', '', html)
|
||||||
# Get rid of empty span, bold, font, & italics tags
|
# Get rid of empty span, bold, font, em, & italics tags
|
||||||
html = re.sub(r'\s*<font[^>]*>\s*</font>\s*', '', html)
|
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
|
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
|
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
|
||||||
self.deleted_nbsps = True
|
self.deleted_nbsps = True
|
||||||
return html
|
return html
|
||||||
|
|
||||||
@ -376,27 +376,31 @@ class HeuristicProcessor(object):
|
|||||||
except:
|
except:
|
||||||
self.log("Can't get wordcount")
|
self.log("Can't get wordcount")
|
||||||
|
|
||||||
if 0 < self.totalwords < 50:
|
print "found "+unicode(self.totalwords)+" words in the flow"
|
||||||
|
if self.totalwords < 50:
|
||||||
self.log("flow is too short, not running heuristics")
|
self.log("flow is too short, not running heuristics")
|
||||||
return html
|
return html
|
||||||
|
|
||||||
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
|
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
|
||||||
html = self.arrange_htm_line_endings(html)
|
html = self.arrange_htm_line_endings(html)
|
||||||
|
|
||||||
###### Check Markup ######
|
if self.cleanup_required():
|
||||||
#
|
###### Check Markup ######
|
||||||
# some lit files don't have any <p> tags or equivalent (generally just plain text between
|
#
|
||||||
# <pre> tags), check and mark up line endings if required before proceeding
|
# some lit files don't have any <p> tags or equivalent (generally just plain text between
|
||||||
if self.no_markup(html, 0.1):
|
# <pre> tags), check and mark up line endings if required before proceeding
|
||||||
self.log("not enough paragraph markers, adding now")
|
# fix indents must run after this step
|
||||||
# markup using text processing
|
if self.no_markup(html, 0.1):
|
||||||
html = self.markup_pre(html)
|
self.log("not enough paragraph markers, adding now")
|
||||||
|
# markup using text processing
|
||||||
|
html = self.markup_pre(html)
|
||||||
|
|
||||||
# Replace series of non-breaking spaces with text-indent
|
# Replace series of non-breaking spaces with text-indent
|
||||||
if getattr(self.extra_opts, 'fix_indents', False):
|
if getattr(self.extra_opts, 'fix_indents', False):
|
||||||
html = self.fix_nbsp_indents(html)
|
html = self.fix_nbsp_indents(html)
|
||||||
|
|
||||||
if self.cleanup_required():
|
if self.cleanup_required():
|
||||||
|
# fix indents must run before this step, as it removes non-breaking spaces
|
||||||
html = self.cleanup_markup(html)
|
html = self.cleanup_markup(html)
|
||||||
|
|
||||||
# ADE doesn't render <br />, change to empty paragraphs
|
# ADE doesn't render <br />, change to empty paragraphs
|
||||||
@ -420,26 +424,26 @@ class HeuristicProcessor(object):
|
|||||||
self.log("deleting blank lines")
|
self.log("deleting blank lines")
|
||||||
html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
|
html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
|
||||||
html = self.blankreg.sub('', html)
|
html = self.blankreg.sub('', html)
|
||||||
|
|
||||||
|
# Determine line ending type
|
||||||
|
# Some OCR sourced files have line breaks in the html using a combination of span & p tags
|
||||||
|
# span are used for hard line breaks, p for new paragraphs. Determine which is used so
|
||||||
|
# that lines can be un-wrapped across page boundaries
|
||||||
|
format = self.analyze_line_endings(html)
|
||||||
|
|
||||||
|
# Check Line histogram to determine if the document uses hard line breaks, If 50% or
|
||||||
|
# more of the lines break in the same region of the document then unwrapping is required
|
||||||
|
docanalysis = DocAnalysis(format, html)
|
||||||
|
hardbreaks = docanalysis.line_histogram(.50)
|
||||||
|
self.log("Hard line breaks check returned "+unicode(hardbreaks))
|
||||||
|
|
||||||
|
# Calculate Length
|
||||||
|
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
|
||||||
|
length = docanalysis.line_length(unwrap_factor)
|
||||||
|
self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
|
||||||
|
|
||||||
###### Unwrap lines ######
|
###### Unwrap lines ######
|
||||||
if getattr(self.extra_opts, 'unwrap_lines', False):
|
if getattr(self.extra_opts, 'unwrap_lines', False):
|
||||||
# Determine line ending type
|
|
||||||
# Some OCR sourced files have line breaks in the html using a combination of span & p tags
|
|
||||||
# span are used for hard line breaks, p for new paragraphs. Determine which is used so
|
|
||||||
# that lines can be un-wrapped across page boundaries
|
|
||||||
format = self.analyze_line_endings(html)
|
|
||||||
|
|
||||||
# Check Line histogram to determine if the document uses hard line breaks, If 50% or
|
|
||||||
# more of the lines break in the same region of the document then unwrapping is required
|
|
||||||
docanalysis = DocAnalysis(format, html)
|
|
||||||
hardbreaks = docanalysis.line_histogram(.50)
|
|
||||||
self.log("Hard line breaks check returned "+unicode(hardbreaks))
|
|
||||||
|
|
||||||
# Calculate Length
|
|
||||||
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
|
|
||||||
length = docanalysis.line_length(unwrap_factor)
|
|
||||||
self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
|
|
||||||
|
|
||||||
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
|
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
|
||||||
if hardbreaks or unwrap_factor < 0.4:
|
if hardbreaks or unwrap_factor < 0.4:
|
||||||
self.log("Unwrapping required, unwrapping Lines")
|
self.log("Unwrapping required, unwrapping Lines")
|
||||||
@ -447,15 +451,16 @@ class HeuristicProcessor(object):
|
|||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator()
|
||||||
html = dehyphenator(html,'html', length)
|
html = dehyphenator(html,'html', length)
|
||||||
html = self.punctuation_unwrap(length, html, 'html')
|
html = self.punctuation_unwrap(length, html, 'html')
|
||||||
#check any remaining hyphens, but only unwrap if there is a match
|
# unwrap remaining hyphens based on line length, but only remove if there is a match
|
||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
|
||||||
html = dehyphenator(html,'html_cleanup', length)
|
html = dehyphenator(html,'html_cleanup', length)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'dehyphenate', False):
|
if getattr(self.extra_opts, 'dehyphenate', False):
|
||||||
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
|
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
|
||||||
self.log("Fixing hyphenated content")
|
self.log("Fixing hyphenated content")
|
||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
|
||||||
html = dehyphenator(html,'html_cleanup', length)
|
html = dehyphenator(html,'html_cleanup', length)
|
||||||
|
html = dehyphenator(html, 'individual_words', length)
|
||||||
|
|
||||||
# If still no sections after unwrapping mark split points on lines with no punctuation
|
# If still no sections after unwrapping mark split points on lines with no punctuation
|
||||||
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
|
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
|
||||||
|
@ -285,7 +285,6 @@ class RTFInput(InputFormatPlugin):
|
|||||||
try:
|
try:
|
||||||
xml = self.generate_xml(stream.name)
|
xml = self.generate_xml(stream.name)
|
||||||
except RtfInvalidCodeException, e:
|
except RtfInvalidCodeException, e:
|
||||||
raise
|
|
||||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||||
'support. Convert it to HTML first and then try it.\n%s')%e)
|
'support. Convert it to HTML first and then try it.\n%s')%e)
|
||||||
|
|
||||||
|
@ -226,10 +226,6 @@ class ParseRtf:
|
|||||||
try:
|
try:
|
||||||
return_value = process_tokens_obj.process_tokens()
|
return_value = process_tokens_obj.process_tokens()
|
||||||
except InvalidRtfException, msg:
|
except InvalidRtfException, msg:
|
||||||
try:
|
|
||||||
os.remove(self.__temp_file)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
#Check to see if the file is correctly encoded
|
#Check to see if the file is correctly encoded
|
||||||
encode_obj = default_encoding.DefaultEncoding(
|
encode_obj = default_encoding.DefaultEncoding(
|
||||||
in_file = self.__temp_file,
|
in_file = self.__temp_file,
|
||||||
@ -241,14 +237,17 @@ class ParseRtf:
|
|||||||
check_encoding_obj = check_encoding.CheckEncoding(
|
check_encoding_obj = check_encoding.CheckEncoding(
|
||||||
bug_handler = RtfInvalidCodeException,
|
bug_handler = RtfInvalidCodeException,
|
||||||
)
|
)
|
||||||
enc = encode_obj.get_codepage()
|
enc = 'cp' + encode_obj.get_codepage()
|
||||||
if enc != 'mac_roman':
|
msg = 'Exception in token processing'
|
||||||
enc = 'cp' + enc
|
|
||||||
if check_encoding_obj.check_encoding(self.__file, enc):
|
if check_encoding_obj.check_encoding(self.__file, enc):
|
||||||
file_name = self.__file if isinstance(self.__file, str) \
|
file_name = self.__file if isinstance(self.__file, str) \
|
||||||
else self.__file.encode('utf-8')
|
else self.__file.encode('utf-8')
|
||||||
msg = 'File %s does not appear to be correctly encoded.\n' % file_name
|
msg = 'File %s does not appear to be correctly encoded.\n' % file_name
|
||||||
raise InvalidRtfException, msg
|
try:
|
||||||
|
os.remove(self.__temp_file)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise InvalidRtfException, msg
|
||||||
delete_info_obj = delete_info.DeleteInfo(
|
delete_info_obj = delete_info.DeleteInfo(
|
||||||
in_file = self.__temp_file,
|
in_file = self.__temp_file,
|
||||||
copy = self.__copy,
|
copy = self.__copy,
|
||||||
|
@ -74,9 +74,6 @@ class DefaultEncoding:
|
|||||||
if not self.__datafetched:
|
if not self.__datafetched:
|
||||||
self._encoding()
|
self._encoding()
|
||||||
self.__datafetched = True
|
self.__datafetched = True
|
||||||
if self.__platform == 'Macintosh':
|
|
||||||
code_page = self.__code_page
|
|
||||||
else:
|
|
||||||
code_page = 'ansicpg' + self.__code_page
|
code_page = 'ansicpg' + self.__code_page
|
||||||
return self.__platform, code_page, self.__default_num
|
return self.__platform, code_page, self.__default_num
|
||||||
|
|
||||||
@ -94,49 +91,60 @@ class DefaultEncoding:
|
|||||||
|
|
||||||
def _encoding(self):
|
def _encoding(self):
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
|
cpfound = False
|
||||||
if not self.__fetchraw:
|
if not self.__fetchraw:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
if self.__token_info == 'mi<mk<rtfhed-end':
|
if self.__token_info == 'mi<mk<rtfhed-end':
|
||||||
break
|
break
|
||||||
if self.__token_info == 'cw<ri<ansi-codpg':
|
|
||||||
#cw<ri<ansi-codpg<nu<10000
|
|
||||||
self.__code_page = line[20:-1] if int(line[20:-1]) \
|
|
||||||
else '1252'
|
|
||||||
if self.__token_info == 'cw<ri<macintosh_':
|
if self.__token_info == 'cw<ri<macintosh_':
|
||||||
self.__platform = 'Macintosh'
|
self.__platform = 'Macintosh'
|
||||||
self.__code_page = 'mac_roman'
|
|
||||||
elif self.__token_info == 'cw<ri<pc________':
|
elif self.__token_info == 'cw<ri<pc________':
|
||||||
self.__platform = 'IBMPC'
|
self.__platform = 'IBMPC'
|
||||||
self.__code_page = '437'
|
|
||||||
elif self.__token_info == 'cw<ri<pca_______':
|
elif self.__token_info == 'cw<ri<pca_______':
|
||||||
self.__platform = 'OS/2'
|
self.__platform = 'OS/2'
|
||||||
self.__code_page = '850'
|
if self.__token_info == 'cw<ri<ansi-codpg' \
|
||||||
|
and int(line[20:-1]):
|
||||||
|
self.__code_page = line[20:-1]
|
||||||
if self.__token_info == 'cw<ri<deflt-font':
|
if self.__token_info == 'cw<ri<deflt-font':
|
||||||
self.__default_num = line[20:-1]
|
self.__default_num = line[20:-1]
|
||||||
|
cpfound = True
|
||||||
#cw<ri<deflt-font<nu<0
|
#cw<ri<deflt-font<nu<0
|
||||||
|
if self.__platform != 'Windows' and \
|
||||||
|
not cpfound:
|
||||||
|
if self.__platform == 'Macintosh':
|
||||||
|
self.__code_page = '10000'
|
||||||
|
elif self.__platform == 'IBMPC':
|
||||||
|
self.__code_page = '437'
|
||||||
|
elif self.__platform == 'OS/2':
|
||||||
|
self.__code_page = '850'
|
||||||
else:
|
else:
|
||||||
fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
|
fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
|
||||||
fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
|
fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
|
||||||
|
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
|
if fenc.search(line):
|
||||||
|
enc = fenc.search(line).group(1)
|
||||||
if fenccp.search(line):
|
if fenccp.search(line):
|
||||||
cp = fenccp.search(line).group(1)
|
cp = fenccp.search(line).group(1)
|
||||||
if not int(cp):
|
if not int(cp):
|
||||||
self.__code_page = cp
|
self.__code_page = cp
|
||||||
|
cpfound = True
|
||||||
break
|
break
|
||||||
if fenc.search(line):
|
if self.__platform != 'Windows' and \
|
||||||
enc = fenc.search(line).group(1)
|
not cpfound:
|
||||||
if enc == 'mac':
|
if enc == 'mac':
|
||||||
self.__code_page = 'mac_roman'
|
self.__code_page = '10000'
|
||||||
elif enc == 'pc':
|
elif enc == 'pc':
|
||||||
self.__code_page = '437'
|
self.__code_page = '437'
|
||||||
elif enc == 'pca':
|
elif enc == 'pca':
|
||||||
self.__code_page = '850'
|
self.__code_page = '850'
|
||||||
|
|
||||||
# if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# encode_obj = DefaultEncoding(
|
import sys
|
||||||
# in_file = sys.argv[1],
|
encode_obj = DefaultEncoding(
|
||||||
# bug_handler = Exception,
|
in_file = sys.argv[1],
|
||||||
# check_raw = True,
|
bug_handler = Exception,
|
||||||
# )
|
check_raw = True,
|
||||||
# print encode_obj.get_codepage()
|
)
|
||||||
|
print encode_obj.get_codepage()
|
||||||
|
@ -20,7 +20,7 @@ import sys, os, tempfile
|
|||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
|
|
||||||
class DeleteInfo:
|
class DeleteInfo:
|
||||||
"""Delelet unecessary destination groups"""
|
"""Delete unecessary destination groups"""
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
in_file ,
|
in_file ,
|
||||||
bug_handler,
|
bug_handler,
|
||||||
@ -31,17 +31,14 @@ class DeleteInfo:
|
|||||||
self.__bug_handler = bug_handler
|
self.__bug_handler = bug_handler
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
|
self.__run_level = run_level
|
||||||
|
self.__initiate_allow()
|
||||||
self.__bracket_count= 0
|
self.__bracket_count= 0
|
||||||
self.__ob_count = 0
|
self.__ob_count = 0
|
||||||
self.__cb_count = 0
|
self.__cb_count = 0
|
||||||
# self.__after_asterisk = False
|
|
||||||
# self.__delete = 0
|
|
||||||
self.__initiate_allow()
|
|
||||||
self.__ob = 0
|
self.__ob = 0
|
||||||
self.__write_cb = False
|
self.__write_cb = False
|
||||||
self.__run_level = run_level
|
|
||||||
self.__found_delete = False
|
self.__found_delete = False
|
||||||
# self.__list = False
|
|
||||||
|
|
||||||
def __initiate_allow(self):
|
def __initiate_allow(self):
|
||||||
"""
|
"""
|
||||||
@ -57,6 +54,8 @@ class DeleteInfo:
|
|||||||
'cw<an<annotation',
|
'cw<an<annotation',
|
||||||
'cw<cm<comment___',
|
'cw<cm<comment___',
|
||||||
'cw<it<lovr-table',
|
'cw<it<lovr-table',
|
||||||
|
# info table
|
||||||
|
'cw<di<company___',
|
||||||
# 'cw<ls<list______',
|
# 'cw<ls<list______',
|
||||||
)
|
)
|
||||||
self.__not_allowable = (
|
self.__not_allowable = (
|
||||||
@ -116,7 +115,6 @@ class DeleteInfo:
|
|||||||
"""
|
"""
|
||||||
# Test for {\*}, in which case don't enter
|
# Test for {\*}, in which case don't enter
|
||||||
# delete state
|
# delete state
|
||||||
# self.__after_asterisk = False # only enter this function once
|
|
||||||
self.__found_delete = True
|
self.__found_delete = True
|
||||||
if self.__token_info == 'cb<nu<clos-brack':
|
if self.__token_info == 'cb<nu<clos-brack':
|
||||||
if self.__delete_count == self.__cb_count:
|
if self.__delete_count == self.__cb_count:
|
||||||
@ -128,7 +126,7 @@ class DeleteInfo:
|
|||||||
# not sure what happens here!
|
# not sure what happens here!
|
||||||
# believe I have a '{\*}
|
# believe I have a '{\*}
|
||||||
if self.__run_level > 3:
|
if self.__run_level > 3:
|
||||||
msg = 'flag problem\n'
|
msg = 'Flag problem\n'
|
||||||
raise self.__bug_handler, msg
|
raise self.__bug_handler, msg
|
||||||
return True
|
return True
|
||||||
elif self.__token_info in self.__allowable :
|
elif self.__token_info in self.__allowable :
|
||||||
@ -173,8 +171,8 @@ class DeleteInfo:
|
|||||||
Return True for all control words.
|
Return True for all control words.
|
||||||
Return False otherwise.
|
Return False otherwise.
|
||||||
"""
|
"""
|
||||||
if self.__delete_count == self.__cb_count and self.__token_info ==\
|
if self.__delete_count == self.__cb_count and \
|
||||||
'cb<nu<clos-brack':
|
self.__token_info == 'cb<nu<clos-brack':
|
||||||
self.__state = 'default'
|
self.__state = 'default'
|
||||||
if self.__write_cb:
|
if self.__write_cb:
|
||||||
self.__write_cb = False
|
self.__write_cb = False
|
||||||
@ -186,32 +184,24 @@ class DeleteInfo:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def delete_info(self):
|
def delete_info(self):
|
||||||
"""Main method for handling other methods. Read one line in at
|
"""Main method for handling other methods. Read one line at
|
||||||
a time, and determine whether to print the line based on the state."""
|
a time, and determine whether to print the line based on the state."""
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
with open(self.__write_to, 'w') as self.__write_obj:
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
#ob<nu<open-brack<0001
|
#ob<nu<open-brack<0001
|
||||||
to_print = True
|
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
if self.__token_info == 'ob<nu<open-brack':
|
if self.__token_info == 'ob<nu<open-brack':
|
||||||
self.__ob_count = line[-5:-1]
|
self.__ob_count = line[-5:-1]
|
||||||
if self.__token_info == 'cb<nu<clos-brack':
|
if self.__token_info == 'cb<nu<clos-brack':
|
||||||
self.__cb_count = line[-5:-1]
|
self.__cb_count = line[-5:-1]
|
||||||
|
# Get action to perform
|
||||||
action = self.__state_dict.get(self.__state)
|
action = self.__state_dict.get(self.__state)
|
||||||
if not action:
|
if not action:
|
||||||
sys.stderr.write(_('No action in dictionary state is "%s" \n')
|
sys.stderr.write('No action in dictionary state is "%s" \n'
|
||||||
% self.__state)
|
% self.__state)
|
||||||
to_print = action(line)
|
# Print if allowed by action
|
||||||
# if self.__after_asterisk:
|
if action(line):
|
||||||
# to_print = self.__asterisk_func(line)
|
|
||||||
# elif self.__list:
|
|
||||||
# self.__in_list_func(line)
|
|
||||||
# elif self.__delete:
|
|
||||||
# to_print = self.__delete_func(line)
|
|
||||||
# else:
|
|
||||||
# to_print = self.__default_func(line)
|
|
||||||
if to_print:
|
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
|
@ -15,8 +15,10 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile
|
import sys, os, tempfile, re
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
|
|
||||||
class Info:
|
class Info:
|
||||||
"""
|
"""
|
||||||
Make tags for document-information
|
Make tags for document-information
|
||||||
@ -42,12 +44,14 @@ class Info:
|
|||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
"""
|
"""
|
||||||
self.__text_string = ''
|
self.__text_string = ''
|
||||||
self.__state = 'before_info_table'
|
self.__state = 'before_info_table'
|
||||||
|
self.rmspace = re.compile(r'\s+')
|
||||||
self.__state_dict = {
|
self.__state_dict = {
|
||||||
'before_info_table': self.__before_info_table_func,
|
'before_info_table': self.__before_info_table_func,
|
||||||
'after_info_table': self.__after_info_table_func,
|
'after_info_table': self.__after_info_table_func,
|
||||||
@ -58,27 +62,49 @@ class Info:
|
|||||||
self.__info_table_dict = {
|
self.__info_table_dict = {
|
||||||
'cw<di<title_____' : (self.__found_tag_with_text_func, 'title'),
|
'cw<di<title_____' : (self.__found_tag_with_text_func, 'title'),
|
||||||
'cw<di<author____' : (self.__found_tag_with_text_func, 'author'),
|
'cw<di<author____' : (self.__found_tag_with_text_func, 'author'),
|
||||||
|
'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'),
|
||||||
|
'cw<di<manager___' : (self.__found_tag_with_text_func, 'manager'),
|
||||||
|
'cw<di<company___' : (self.__found_tag_with_text_func, 'company'),
|
||||||
'cw<di<keywords__' : (self.__found_tag_with_text_func, 'keywords'),
|
'cw<di<keywords__' : (self.__found_tag_with_text_func, 'keywords'),
|
||||||
|
'cw<di<category__' : (self.__found_tag_with_text_func, 'category'),
|
||||||
'cw<di<doc-notes_' : (self.__found_tag_with_text_func, 'doc-notes'),
|
'cw<di<doc-notes_' : (self.__found_tag_with_text_func, 'doc-notes'),
|
||||||
'cw<di<subject___' : (self.__found_tag_with_text_func, 'subject'),
|
'cw<di<subject___' : (self.__found_tag_with_text_func, 'subject'),
|
||||||
'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'),
|
'cw<di<linkbase__' : (self.__found_tag_with_text_func, 'hyperlink-base'),
|
||||||
|
|
||||||
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
|
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
|
||||||
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
|
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
|
||||||
'cw<di<edit-time_' : (self.__single_field_func, 'editing-time'),
|
'cw<di<edit-time_' : (self.__found_tag_with_tokens_func, 'editing-time'),
|
||||||
|
'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'),
|
||||||
|
'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'),
|
||||||
|
|
||||||
'cw<di<num-of-wor' : (self.__single_field_func, 'number-of-words'),
|
'cw<di<num-of-wor' : (self.__single_field_func, 'number-of-words'),
|
||||||
'cw<di<num-of-chr' : (self.__single_field_func, 'number-of-characters'),
|
'cw<di<num-of-chr' : (self.__single_field_func, 'number-of-characters'),
|
||||||
|
'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'),
|
||||||
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
|
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
|
||||||
|
'cw<di<version___' : (self.__single_field_func, 'version'),
|
||||||
|
'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'),
|
||||||
|
'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'),
|
||||||
}
|
}
|
||||||
self.__token_dict = {
|
self.__token_dict = {
|
||||||
'year______' : 'year',
|
'year______' : 'year',
|
||||||
'month_____' : 'month',
|
'month_____' : 'month',
|
||||||
'day_______' : 'day',
|
'day_______' : 'day',
|
||||||
'minute____' : 'minute',
|
'minute____' : 'minute',
|
||||||
|
'second____' : 'second',
|
||||||
'revis-time' : 'revision-time',
|
'revis-time' : 'revision-time',
|
||||||
|
'create-tim' : 'creation-time',
|
||||||
|
'edit-time_' : 'editing-time',
|
||||||
|
'print-time' : 'printing-time',
|
||||||
|
'backuptime' : 'backup-time',
|
||||||
'num-of-wor' : 'number-of-words',
|
'num-of-wor' : 'number-of-words',
|
||||||
'num-of-chr' : 'number-of-characters',
|
'num-of-chr' : 'number-of-characters',
|
||||||
|
'numofchrws' : 'number-of-characters-without-space',
|
||||||
'num-of-pag' : 'number-of-pages',
|
'num-of-pag' : 'number-of-pages',
|
||||||
|
'version___' : 'version',
|
||||||
|
'intern-ver' : 'internal-version-number',
|
||||||
|
'internalID' : 'internal-id-number',
|
||||||
}
|
}
|
||||||
|
|
||||||
def __before_info_table_func(self, line):
|
def __before_info_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -92,6 +118,7 @@ class Info:
|
|||||||
if self.__token_info == 'mi<mk<doc-in-beg':
|
if self.__token_info == 'mi<mk<doc-in-beg':
|
||||||
self.__state = 'in_info_table'
|
self.__state = 'in_info_table'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __in_info_table_func(self, line):
|
def __in_info_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -112,6 +139,7 @@ class Info:
|
|||||||
action(line, tag)
|
action(line, tag)
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __found_tag_with_text_func(self, line, tag):
|
def __found_tag_with_text_func(self, line, tag):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -126,6 +154,7 @@ class Info:
|
|||||||
"""
|
"""
|
||||||
self.__tag = tag
|
self.__tag = tag
|
||||||
self.__state = 'collect_text'
|
self.__state = 'collect_text'
|
||||||
|
|
||||||
def __collect_text_func(self, line):
|
def __collect_text_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -139,14 +168,17 @@ class Info:
|
|||||||
"""
|
"""
|
||||||
if self.__token_info == 'mi<mk<docinf-end':
|
if self.__token_info == 'mi<mk<docinf-end':
|
||||||
self.__state = 'in_info_table'
|
self.__state = 'in_info_table'
|
||||||
self.__write_obj.write(
|
#Don't print empty tags
|
||||||
'mi<tg<open______<%s\n'
|
if len(self.rmspace.sub('',self.__text_string)):
|
||||||
'tx<nu<__________<%s\n'
|
self.__write_obj.write(
|
||||||
'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag)
|
'mi<tg<open______<%s\n'
|
||||||
)
|
'tx<nu<__________<%s\n'
|
||||||
|
'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag)
|
||||||
|
)
|
||||||
self.__text_string = ''
|
self.__text_string = ''
|
||||||
elif line[0:2] == 'tx':
|
elif line[0:2] == 'tx':
|
||||||
self.__text_string += line[17:-1]
|
self.__text_string += line[17:-1]
|
||||||
|
|
||||||
def __found_tag_with_tokens_func(self, line, tag):
|
def __found_tag_with_tokens_func(self, line, tag):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -163,6 +195,7 @@ class Info:
|
|||||||
self.__state = 'collect_tokens'
|
self.__state = 'collect_tokens'
|
||||||
self.__text_string = 'mi<tg<empty-att_<%s' % tag
|
self.__text_string = 'mi<tg<empty-att_<%s' % tag
|
||||||
#mi<tg<empty-att_<page-definition<margin>33\n
|
#mi<tg<empty-att_<page-definition<margin>33\n
|
||||||
|
|
||||||
def __collect_tokens_func(self, line):
|
def __collect_tokens_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -194,18 +227,19 @@ class Info:
|
|||||||
att = line[6:16]
|
att = line[6:16]
|
||||||
value = line[20:-1]
|
value = line[20:-1]
|
||||||
att_changed = self.__token_dict.get(att)
|
att_changed = self.__token_dict.get(att)
|
||||||
if att_changed == None:
|
if att_changed is None:
|
||||||
if self.__run_level > 3:
|
if self.__run_level > 3:
|
||||||
msg = 'no dictionary match for %s\n' % att
|
msg = 'No dictionary match for %s\n' % att
|
||||||
raise self.__bug_handler, msg
|
raise self.__bug_handler, msg
|
||||||
else:
|
else:
|
||||||
self.__text_string += '<%s>%s' % (att_changed, value)
|
self.__text_string += '<%s>%s' % (att_changed, value)
|
||||||
|
|
||||||
def __single_field_func(self, line, tag):
|
def __single_field_func(self, line, tag):
|
||||||
value = line[20:-1]
|
value = line[20:-1]
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'mi<tg<empty-att_<%s'
|
'mi<tg<empty-att_<%s<%s>%s\n' % (tag, tag, value)
|
||||||
'<%s>%s\n' % (tag, tag, value)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def __after_info_table_func(self, line):
|
def __after_info_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -217,6 +251,7 @@ class Info:
|
|||||||
the file.
|
the file.
|
||||||
"""
|
"""
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def fix_info(self):
|
def fix_info(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -234,20 +269,15 @@ class Info:
|
|||||||
information table, simply write the line to the output file.
|
information table, simply write the line to the output file.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'wb') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
action = self.__state_dict.get(self.__state)
|
||||||
line = line_to_read
|
if action is None:
|
||||||
self.__token_info = line[:16]
|
sys.stderr.write('No matching state in module styles.py\n')
|
||||||
action = self.__state_dict.get(self.__state)
|
sys.stderr.write(self.__state + '\n')
|
||||||
if action == None:
|
action(line)
|
||||||
sys.stderr.write('no no matching state in module styles.py\n')
|
|
||||||
sys.stderr.write(self.__state + '\n')
|
|
||||||
action(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "info.data")
|
copy_obj.copy_file(self.__write_to, "info.data")
|
||||||
|
@ -70,7 +70,7 @@ class ProcessTokens:
|
|||||||
';' : ('mc', ';', self.ms_sub_func),
|
';' : ('mc', ';', self.ms_sub_func),
|
||||||
# this must be wrong
|
# this must be wrong
|
||||||
'-' : ('mc', '-', self.ms_sub_func),
|
'-' : ('mc', '-', self.ms_sub_func),
|
||||||
'line' : ('mi', 'hardline-break', self.hardline_func), #calibre
|
'line' : ('mi', 'hardline-break', self.direct_conv_func), #calibre
|
||||||
# misc => ml
|
# misc => ml
|
||||||
'*' : ('ml', 'asterisk__', self.default_func),
|
'*' : ('ml', 'asterisk__', self.default_func),
|
||||||
':' : ('ml', 'colon_____', self.default_func),
|
':' : ('ml', 'colon_____', self.default_func),
|
||||||
@ -78,7 +78,6 @@ class ProcessTokens:
|
|||||||
'backslash' : ('nu', '\\', self.text_func),
|
'backslash' : ('nu', '\\', self.text_func),
|
||||||
'ob' : ('nu', '{', self.text_func),
|
'ob' : ('nu', '{', self.text_func),
|
||||||
'cb' : ('nu', '}', self.text_func),
|
'cb' : ('nu', '}', self.text_func),
|
||||||
#'line' : ('nu', ' ', self.text_func), calibre
|
|
||||||
# paragraph formatting => pf
|
# paragraph formatting => pf
|
||||||
'page' : ('pf', 'page-break', self.default_func),
|
'page' : ('pf', 'page-break', self.default_func),
|
||||||
'par' : ('pf', 'par-end___', self.default_func),
|
'par' : ('pf', 'par-end___', self.default_func),
|
||||||
@ -231,11 +230,15 @@ class ProcessTokens:
|
|||||||
'trhdr' : ('tb', 'row-header', self.default_func),
|
'trhdr' : ('tb', 'row-header', self.default_func),
|
||||||
# preamble => pr
|
# preamble => pr
|
||||||
# document information => di
|
# document information => di
|
||||||
|
# TODO integrate \userprops
|
||||||
'info' : ('di', 'doc-info__', self.default_func),
|
'info' : ('di', 'doc-info__', self.default_func),
|
||||||
|
'title' : ('di', 'title_____', self.default_func),
|
||||||
'author' : ('di', 'author____', self.default_func),
|
'author' : ('di', 'author____', self.default_func),
|
||||||
'operator' : ('di', 'operator__', self.default_func),
|
'operator' : ('di', 'operator__', self.default_func),
|
||||||
'title' : ('di', 'title_____', self.default_func),
|
'manager' : ('di', 'manager___', self.default_func),
|
||||||
|
'company' : ('di', 'company___', self.default_func),
|
||||||
'keywords' : ('di', 'keywords__', self.default_func),
|
'keywords' : ('di', 'keywords__', self.default_func),
|
||||||
|
'category' : ('di', 'category__', self.default_func),
|
||||||
'doccomm' : ('di', 'doc-notes_', self.default_func),
|
'doccomm' : ('di', 'doc-notes_', self.default_func),
|
||||||
'comment' : ('di', 'doc-notes_', self.default_func),
|
'comment' : ('di', 'doc-notes_', self.default_func),
|
||||||
'subject' : ('di', 'subject___', self.default_func),
|
'subject' : ('di', 'subject___', self.default_func),
|
||||||
@ -244,11 +247,19 @@ class ProcessTokens:
|
|||||||
'mo' : ('di', 'month_____', self.default_func),
|
'mo' : ('di', 'month_____', self.default_func),
|
||||||
'dy' : ('di', 'day_______', self.default_func),
|
'dy' : ('di', 'day_______', self.default_func),
|
||||||
'min' : ('di', 'minute____', self.default_func),
|
'min' : ('di', 'minute____', self.default_func),
|
||||||
|
'sec' : ('di', 'second____', self.default_func),
|
||||||
'revtim' : ('di', 'revis-time', self.default_func),
|
'revtim' : ('di', 'revis-time', self.default_func),
|
||||||
|
'edmins' : ('di', 'edit-time_', self.default_func),
|
||||||
|
'printim' : ('di', 'print-time', self.default_func),
|
||||||
|
'buptim' : ('di', 'backuptime', self.default_func),
|
||||||
'nofwords' : ('di', 'num-of-wor', self.default_func),
|
'nofwords' : ('di', 'num-of-wor', self.default_func),
|
||||||
'nofchars' : ('di', 'num-of-chr', self.default_func),
|
'nofchars' : ('di', 'num-of-chr', self.default_func),
|
||||||
|
'nofcharsws' : ('di', 'numofchrws', self.default_func),
|
||||||
'nofpages' : ('di', 'num-of-pag', self.default_func),
|
'nofpages' : ('di', 'num-of-pag', self.default_func),
|
||||||
'edmins' : ('di', 'edit-time_', self.default_func),
|
'version' : ('di', 'version___', self.default_func),
|
||||||
|
'vern' : ('di', 'intern-ver', self.default_func),
|
||||||
|
'hlinkbase' : ('di', 'linkbase__', self.default_func),
|
||||||
|
'id' : ('di', 'internalID', self.default_func),
|
||||||
# headers and footers => hf
|
# headers and footers => hf
|
||||||
'headerf' : ('hf', 'head-first', self.default_func),
|
'headerf' : ('hf', 'head-first', self.default_func),
|
||||||
'headerl' : ('hf', 'head-left_', self.default_func),
|
'headerl' : ('hf', 'head-left_', self.default_func),
|
||||||
@ -605,7 +616,7 @@ class ProcessTokens:
|
|||||||
def ms_sub_func(self, pre, token, num):
|
def ms_sub_func(self, pre, token, num):
|
||||||
return 'tx<mc<__________<%s\n' % token
|
return 'tx<mc<__________<%s\n' % token
|
||||||
|
|
||||||
def hardline_func(self, pre, token, num):
|
def direct_conv_func(self, pre, token, num):
|
||||||
return 'mi<tg<empty_____<%s\n' % token
|
return 'mi<tg<empty_____<%s\n' % token
|
||||||
|
|
||||||
def default_func(self, pre, token, num):
|
def default_func(self, pre, token, num):
|
||||||
|
@ -27,11 +27,13 @@ class Tokenize:
|
|||||||
bug_handler,
|
bug_handler,
|
||||||
copy = None,
|
copy = None,
|
||||||
run_level = 1,
|
run_level = 1,
|
||||||
):
|
# out_file = None,
|
||||||
|
):
|
||||||
self.__file = in_file
|
self.__file = in_file
|
||||||
self.__bug_handler = bug_handler
|
self.__bug_handler = bug_handler
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
|
# self.__out_file = out_file
|
||||||
self.__compile_expressions()
|
self.__compile_expressions()
|
||||||
#variables
|
#variables
|
||||||
self.__uc_char = 0
|
self.__uc_char = 0
|
||||||
@ -113,6 +115,8 @@ class Tokenize:
|
|||||||
|
|
||||||
def __sub_reg_split(self,input_file):
|
def __sub_reg_split(self,input_file):
|
||||||
input_file = self.__replace_spchar.mreplace(input_file)
|
input_file = self.__replace_spchar.mreplace(input_file)
|
||||||
|
# this is for older RTF
|
||||||
|
input_file = self.__par_exp.sub('\n\\par \n', input_file)
|
||||||
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
|
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
|
||||||
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
|
||||||
#remove \n in bin data
|
#remove \n in bin data
|
||||||
@ -127,7 +131,7 @@ class Tokenize:
|
|||||||
# this is for older RTF
|
# this is for older RTF
|
||||||
#line = re.sub(self.__par_exp, '\\par ', line)
|
#line = re.sub(self.__par_exp, '\\par ', line)
|
||||||
#return filter(lambda x: len(x) > 0, \
|
#return filter(lambda x: len(x) > 0, \
|
||||||
#(self.__remove_line.sub('', x) for x in tokens))
|
#(self.__remove_line.sub('', x) for x in tokens))
|
||||||
|
|
||||||
def __compile_expressions(self):
|
def __compile_expressions(self):
|
||||||
SIMPLE_RPL = {
|
SIMPLE_RPL = {
|
||||||
@ -153,8 +157,6 @@ class Tokenize:
|
|||||||
# put a backslash in front of to eliminate special cases and
|
# put a backslash in front of to eliminate special cases and
|
||||||
# make processing easier
|
# make processing easier
|
||||||
"}": "\\}",
|
"}": "\\}",
|
||||||
# this is for older RTF
|
|
||||||
r'\\$': '\\par ',
|
|
||||||
}
|
}
|
||||||
self.__replace_spchar = MReplace(SIMPLE_RPL)
|
self.__replace_spchar = MReplace(SIMPLE_RPL)
|
||||||
#add ;? in case of char following \u
|
#add ;? in case of char following \u
|
||||||
@ -168,10 +170,12 @@ class Tokenize:
|
|||||||
#why keep backslash whereas \is replaced before?
|
#why keep backslash whereas \is replaced before?
|
||||||
#remove \n from endline char
|
#remove \n from endline char
|
||||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||||
|
#this is for old RTF
|
||||||
|
self.__par_exp = re.compile(r'\\\n+')
|
||||||
|
# self.__par_exp = re.compile(r'\\$')
|
||||||
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
||||||
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
||||||
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
|
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
|
||||||
#self.__par_exp = re.compile(r'\\$')
|
|
||||||
#self.__remove_line = re.compile(r'\n+')
|
#self.__remove_line = re.compile(r'\n+')
|
||||||
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
|
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
|
||||||
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
|
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
|
||||||
@ -199,7 +203,24 @@ class Tokenize:
|
|||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "tokenize.data")
|
copy_obj.copy_file(self.__write_to, "tokenize.data")
|
||||||
|
# if self.__out_file:
|
||||||
|
# self.__file = self.__out_file
|
||||||
copy_obj.rename(self.__write_to, self.__file)
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
os.remove(self.__write_to)
|
os.remove(self.__write_to)
|
||||||
|
|
||||||
#self.__special_tokens = [ '_', '~', "'", '{', '}' ]
|
#self.__special_tokens = [ '_', '~', "'", '{', '}' ]
|
||||||
|
|
||||||
|
# import sys
|
||||||
|
# def main(args=sys.argv):
|
||||||
|
# if len(args) < 1:
|
||||||
|
# print 'No file'
|
||||||
|
# return
|
||||||
|
# file = 'data_tokens.txt'
|
||||||
|
# if len(args) == 3:
|
||||||
|
# file = args[2]
|
||||||
|
# to = Tokenize(args[1], Exception, out_file = file)
|
||||||
|
# to.tokenize()
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# sys.exit(main())
|
@ -106,7 +106,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
|
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
|
||||||
|
|
||||||
# Dehyphenate
|
# Dehyphenate
|
||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
|
||||||
txt = dehyphenator(txt,'txt', length)
|
txt = dehyphenator(txt,'txt', length)
|
||||||
|
|
||||||
# We don't check for block because the processor assumes block.
|
# We don't check for block because the processor assumes block.
|
||||||
@ -137,11 +137,6 @@ class TXTInput(InputFormatPlugin):
|
|||||||
setattr(options, 'format_scene_breaks', True)
|
setattr(options, 'format_scene_breaks', True)
|
||||||
setattr(options, 'dehyphenate', True)
|
setattr(options, 'dehyphenate', True)
|
||||||
|
|
||||||
# Dehyphenate in cleanup mode for missed txt and markdown conversion
|
|
||||||
dehyphenator = Dehyphenator()
|
|
||||||
html = dehyphenator(html,'txt_cleanup', length)
|
|
||||||
html = dehyphenator(html,'html_cleanup', length)
|
|
||||||
|
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
html_input = plugin_for_input_format('html')
|
html_input = plugin_for_input_format('html')
|
||||||
for opt in html_input.options:
|
for opt in html_input.options:
|
||||||
|
@ -505,7 +505,7 @@ class FileDialog(QObject):
|
|||||||
self.selected_files = []
|
self.selected_files = []
|
||||||
if mode == QFileDialog.AnyFile:
|
if mode == QFileDialog.AnyFile:
|
||||||
f = unicode(QFileDialog.getSaveFileName(parent, title, initial_dir, ftext, ""))
|
f = unicode(QFileDialog.getSaveFileName(parent, title, initial_dir, ftext, ""))
|
||||||
if f and os.path.exists(f):
|
if f:
|
||||||
self.selected_files.append(f)
|
self.selected_files.append(f)
|
||||||
elif mode == QFileDialog.ExistingFile:
|
elif mode == QFileDialog.ExistingFile:
|
||||||
f = unicode(QFileDialog.getOpenFileName(parent, title, initial_dir, ftext, ""))
|
f = unicode(QFileDialog.getOpenFileName(parent, title, initial_dir, ftext, ""))
|
||||||
|
@ -28,7 +28,7 @@ class GenerateCatalogAction(InterfaceAction):
|
|||||||
|
|
||||||
if not ids:
|
if not ids:
|
||||||
return error_dialog(self.gui, _('No books selected'),
|
return error_dialog(self.gui, _('No books selected'),
|
||||||
_('No books selected to generate catalog for'),
|
_('No books selected for catalog generation'),
|
||||||
show=True)
|
show=True)
|
||||||
|
|
||||||
db = self.gui.library_view.model().db
|
db = self.gui.library_view.model().db
|
||||||
@ -55,9 +55,9 @@ class GenerateCatalogAction(InterfaceAction):
|
|||||||
|
|
||||||
def catalog_generated(self, job):
|
def catalog_generated(self, job):
|
||||||
if job.result:
|
if job.result:
|
||||||
# Search terms nulled catalog results
|
# Error during catalog generation
|
||||||
return error_dialog(self.gui, _('No books found'),
|
return error_dialog(self.gui, _('Catalog generation terminated'),
|
||||||
_("No books to catalog\nCheck job details"),
|
job.result,
|
||||||
show=True)
|
show=True)
|
||||||
if job.failed:
|
if job.failed:
|
||||||
return self.gui.job_exception(job)
|
return self.gui.job_exception(job)
|
||||||
|
@ -94,7 +94,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
get_social_metadata = config['get_social_metadata']
|
get_social_metadata = config['get_social_metadata']
|
||||||
else:
|
else:
|
||||||
get_social_metadata = set_social_metadata
|
get_social_metadata = set_social_metadata
|
||||||
from calibre.gui2.metadata import DoDownload
|
from calibre.gui2.metadata.bulk_download import DoDownload
|
||||||
if set_social_metadata is not None and set_social_metadata:
|
if set_social_metadata is not None and set_social_metadata:
|
||||||
x = _('social metadata')
|
x = _('social metadata')
|
||||||
else:
|
else:
|
||||||
|
9
src/calibre/gui2/metadata/__init__.py
Normal file
9
src/calibre/gui2/metadata/__init__.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -730,7 +730,7 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
else:
|
else:
|
||||||
collapse_model = 'partition'
|
collapse_model = 'partition'
|
||||||
collapse_template = tweaks['categories_collapsed_popularity_template']
|
collapse_template = tweaks['categories_collapsed_popularity_template']
|
||||||
collapse_letter = None
|
collapse_letter = collapse_letter_sk = None
|
||||||
|
|
||||||
for i, r in enumerate(self.row_map):
|
for i, r in enumerate(self.row_map):
|
||||||
if self.hidden_categories and self.categories[i] in self.hidden_categories:
|
if self.hidden_categories and self.categories[i] in self.hidden_categories:
|
||||||
@ -782,8 +782,17 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
ts = tag.sort
|
ts = tag.sort
|
||||||
if not ts:
|
if not ts:
|
||||||
ts = ' '
|
ts = ' '
|
||||||
if upper(ts[0]) != collapse_letter:
|
try:
|
||||||
|
sk = sort_key(ts)[0]
|
||||||
|
except:
|
||||||
|
sk = ts[0]
|
||||||
|
|
||||||
|
if sk != collapse_letter_sk:
|
||||||
collapse_letter = upper(ts[0])
|
collapse_letter = upper(ts[0])
|
||||||
|
try:
|
||||||
|
collapse_letter_sk = sort_key(collapse_letter)[0]
|
||||||
|
except:
|
||||||
|
collapse_letter_sk = collapse_letter
|
||||||
sub_cat = TagTreeItem(parent=category,
|
sub_cat = TagTreeItem(parent=category,
|
||||||
data = collapse_letter,
|
data = collapse_letter,
|
||||||
category_icon = category_node.icon,
|
category_icon = category_node.icon,
|
||||||
|
@ -386,11 +386,13 @@ class LineEditECM(object):
|
|||||||
action_lower_case = case_menu.addAction(_('Lower Case'))
|
action_lower_case = case_menu.addAction(_('Lower Case'))
|
||||||
action_swap_case = case_menu.addAction(_('Swap Case'))
|
action_swap_case = case_menu.addAction(_('Swap Case'))
|
||||||
action_title_case = case_menu.addAction(_('Title Case'))
|
action_title_case = case_menu.addAction(_('Title Case'))
|
||||||
|
action_capitalize = case_menu.addAction(_('Capitalize'))
|
||||||
|
|
||||||
self.connect(action_upper_case, SIGNAL('triggered()'), self.upper_case)
|
self.connect(action_upper_case, SIGNAL('triggered()'), self.upper_case)
|
||||||
self.connect(action_lower_case, SIGNAL('triggered()'), self.lower_case)
|
self.connect(action_lower_case, SIGNAL('triggered()'), self.lower_case)
|
||||||
self.connect(action_swap_case, SIGNAL('triggered()'), self.swap_case)
|
self.connect(action_swap_case, SIGNAL('triggered()'), self.swap_case)
|
||||||
self.connect(action_title_case, SIGNAL('triggered()'), self.title_case)
|
self.connect(action_title_case, SIGNAL('triggered()'), self.title_case)
|
||||||
|
self.connect(action_capitalize, SIGNAL('triggered()'), self.capitalize)
|
||||||
|
|
||||||
menu.addMenu(case_menu)
|
menu.addMenu(case_menu)
|
||||||
menu.exec_(event.globalPos())
|
menu.exec_(event.globalPos())
|
||||||
@ -408,6 +410,10 @@ class LineEditECM(object):
|
|||||||
from calibre.utils.titlecase import titlecase
|
from calibre.utils.titlecase import titlecase
|
||||||
self.setText(titlecase(unicode(self.text())))
|
self.setText(titlecase(unicode(self.text())))
|
||||||
|
|
||||||
|
def capitalize(self):
|
||||||
|
from calibre.utils.icu import capitalize
|
||||||
|
self.setText(capitalize(unicode(self.text())))
|
||||||
|
|
||||||
|
|
||||||
class EnLineEdit(LineEditECM, QLineEdit):
|
class EnLineEdit(LineEditECM, QLineEdit):
|
||||||
|
|
||||||
|
@ -1144,7 +1144,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
def error(self):
|
def error(self):
|
||||||
def fget(self):
|
def fget(self):
|
||||||
return self.__error
|
return self.__error
|
||||||
return property(fget=fget)
|
def fset(self, val):
|
||||||
|
self.__error = val
|
||||||
|
return property(fget=fget,fset=fset)
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def generateForKindle(self):
|
def generateForKindle(self):
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -1411,6 +1413,88 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def fetchBooksByAuthor(self):
|
||||||
|
'''
|
||||||
|
Generate a list of titles sorted by author from the database
|
||||||
|
return = Success
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.updateProgressFullStep("Sorting database")
|
||||||
|
|
||||||
|
'''
|
||||||
|
# Sort titles case-insensitive, by author
|
||||||
|
self.booksByAuthor = sorted(self.booksByTitle,
|
||||||
|
key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.booksByAuthor = list(self.booksByTitle)
|
||||||
|
self.booksByAuthor.sort(self.author_compare)
|
||||||
|
|
||||||
|
if False and self.verbose:
|
||||||
|
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
|
||||||
|
self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
|
||||||
|
for title in self.booksByAuthor:
|
||||||
|
self.opts.log.info((u" %-30s %-20s%5s " % \
|
||||||
|
(title['title'][:30],
|
||||||
|
title['series'][:20] if title['series'] else '',
|
||||||
|
title['series_index'],
|
||||||
|
)).encode('utf-8'))
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
|
# Build the unique_authors set from existing data
|
||||||
|
authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
|
||||||
|
|
||||||
|
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
|
||||||
|
# authors[]: (([0]:friendly [1]:sort))
|
||||||
|
# unique_authors[]: (([0]:friendly [1]:sort [2]:book_count))
|
||||||
|
books_by_current_author = 0
|
||||||
|
current_author = authors[0]
|
||||||
|
multiple_authors = False
|
||||||
|
unique_authors = []
|
||||||
|
for (i,author) in enumerate(authors):
|
||||||
|
if author != current_author:
|
||||||
|
# Note that current_author and author are tuples: (friendly, sort)
|
||||||
|
multiple_authors = True
|
||||||
|
|
||||||
|
if author != current_author and i:
|
||||||
|
# Warn, exit if friendly matches previous, but sort doesn't
|
||||||
|
if author[0] == current_author[0]:
|
||||||
|
error_msg = _('''
|
||||||
|
\n*** Metadata error ***
|
||||||
|
Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
|
||||||
|
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
|
||||||
|
then rebuild the catalog.\n''').format(author[0])
|
||||||
|
|
||||||
|
self.opts.log.warn(error_msg)
|
||||||
|
self.error = error_msg
|
||||||
|
return False
|
||||||
|
|
||||||
|
# New author, save the previous author/sort/count
|
||||||
|
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
||||||
|
books_by_current_author))
|
||||||
|
current_author = author
|
||||||
|
books_by_current_author = 1
|
||||||
|
elif i==0 and len(authors) == 1:
|
||||||
|
# Allow for single-book lists
|
||||||
|
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
||||||
|
books_by_current_author))
|
||||||
|
else:
|
||||||
|
books_by_current_author += 1
|
||||||
|
else:
|
||||||
|
# Add final author to list or single-author dataset
|
||||||
|
if (current_author == author and len(authors) > 1) or not multiple_authors:
|
||||||
|
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
||||||
|
books_by_current_author))
|
||||||
|
|
||||||
|
if False and self.verbose:
|
||||||
|
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
|
||||||
|
for author in unique_authors:
|
||||||
|
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
|
||||||
|
author[2])).encode('utf-8'))
|
||||||
|
|
||||||
|
self.authors = unique_authors
|
||||||
|
return True
|
||||||
|
|
||||||
def fetchBooksByTitle(self):
|
def fetchBooksByTitle(self):
|
||||||
|
|
||||||
self.updateProgressFullStep("Fetching database")
|
self.updateProgressFullStep("Fetching database")
|
||||||
@ -1562,90 +1646,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
title['title_sort'][0:40])).decode('mac-roman'))
|
title['title_sort'][0:40])).decode('mac-roman'))
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
self.error = _("No books found to catalog.\nCheck 'Excluded books' criteria in E-book options.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def fetchBooksByAuthor(self):
|
|
||||||
'''
|
|
||||||
Generate a list of titles sorted by author from the database
|
|
||||||
return = Success
|
|
||||||
'''
|
|
||||||
|
|
||||||
self.updateProgressFullStep("Sorting database")
|
|
||||||
|
|
||||||
'''
|
|
||||||
# Sort titles case-insensitive, by author
|
|
||||||
self.booksByAuthor = sorted(self.booksByTitle,
|
|
||||||
key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
|
|
||||||
'''
|
|
||||||
|
|
||||||
self.booksByAuthor = list(self.booksByTitle)
|
|
||||||
self.booksByAuthor.sort(self.author_compare)
|
|
||||||
|
|
||||||
if False and self.verbose:
|
|
||||||
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
|
|
||||||
self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
|
|
||||||
for title in self.booksByAuthor:
|
|
||||||
self.opts.log.info((u" %-30s %-20s%5s " % \
|
|
||||||
(title['title'][:30],
|
|
||||||
title['series'][:20] if title['series'] else '',
|
|
||||||
title['series_index'],
|
|
||||||
)).encode('utf-8'))
|
|
||||||
raise SystemExit
|
|
||||||
|
|
||||||
# Build the unique_authors set from existing data
|
|
||||||
authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
|
|
||||||
|
|
||||||
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
|
|
||||||
# authors[]: (([0]:friendly [1]:sort))
|
|
||||||
# unique_authors[]: (([0]:friendly [1]:sort [2]:book_count))
|
|
||||||
books_by_current_author = 0
|
|
||||||
current_author = authors[0]
|
|
||||||
multiple_authors = False
|
|
||||||
unique_authors = []
|
|
||||||
for (i,author) in enumerate(authors):
|
|
||||||
if author != current_author:
|
|
||||||
# Note that current_author and author are tuples: (friendly, sort)
|
|
||||||
multiple_authors = True
|
|
||||||
|
|
||||||
if author != current_author and i:
|
|
||||||
# Warn, exit if friendly matches previous, but sort doesn't
|
|
||||||
if author[0] == current_author[0]:
|
|
||||||
error_msg = _('''
|
|
||||||
\n*** Metadata error ***
|
|
||||||
Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
|
|
||||||
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
|
|
||||||
then rebuild the catalog.
|
|
||||||
*** Terminating catalog generation ***\n''').format(author[0])
|
|
||||||
|
|
||||||
self.opts.log.warn(error_msg)
|
|
||||||
return False
|
|
||||||
|
|
||||||
# New author, save the previous author/sort/count
|
|
||||||
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
|
||||||
books_by_current_author))
|
|
||||||
current_author = author
|
|
||||||
books_by_current_author = 1
|
|
||||||
elif i==0 and len(authors) == 1:
|
|
||||||
# Allow for single-book lists
|
|
||||||
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
|
||||||
books_by_current_author))
|
|
||||||
else:
|
|
||||||
books_by_current_author += 1
|
|
||||||
else:
|
|
||||||
# Add final author to list or single-author dataset
|
|
||||||
if (current_author == author and len(authors) > 1) or not multiple_authors:
|
|
||||||
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
|
||||||
books_by_current_author))
|
|
||||||
|
|
||||||
if False and self.verbose:
|
|
||||||
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
|
|
||||||
for author in unique_authors:
|
|
||||||
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
|
|
||||||
author[2])).encode('utf-8'))
|
|
||||||
|
|
||||||
self.authors = unique_authors
|
|
||||||
return True
|
|
||||||
|
|
||||||
def fetchBookmarks(self):
|
def fetchBookmarks(self):
|
||||||
'''
|
'''
|
||||||
Collect bookmarks for catalog entries
|
Collect bookmarks for catalog entries
|
||||||
@ -5069,6 +5072,8 @@ then rebuild the catalog.
|
|||||||
abort_after_input_dump=False)
|
abort_after_input_dump=False)
|
||||||
plumber.merge_ui_recommendations(recommendations)
|
plumber.merge_ui_recommendations(recommendations)
|
||||||
plumber.run()
|
plumber.run()
|
||||||
return 0
|
# returns to gui2.actions.catalog:catalog_generated()
|
||||||
|
return None
|
||||||
else:
|
else:
|
||||||
return 1
|
# returns to gui2.actions.catalog:catalog_generated()
|
||||||
|
return catalog.error
|
||||||
|
@ -693,8 +693,12 @@ def command_catalog(args, dbpath):
|
|||||||
}
|
}
|
||||||
|
|
||||||
with plugin:
|
with plugin:
|
||||||
plugin.run(args[1], opts, get_db(dbpath, opts))
|
ret = plugin.run(args[1], opts, get_db(dbpath, opts))
|
||||||
return 0
|
if ret is None:
|
||||||
|
ret = 0
|
||||||
|
else:
|
||||||
|
ret = 1
|
||||||
|
return ret
|
||||||
|
|
||||||
# end of GR additions
|
# end of GR additions
|
||||||
|
|
||||||
|
@ -690,11 +690,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
mi = Metadata(None)
|
mi = Metadata(None)
|
||||||
|
|
||||||
aut_list = row[fm['au_map']]
|
aut_list = row[fm['au_map']]
|
||||||
aut_list = [p.split(':::') for p in aut_list.split(':#:')]
|
if aut_list:
|
||||||
|
aut_list = [p.split(':::') for p in aut_list.split(':#:') if p]
|
||||||
|
else:
|
||||||
|
aut_list = []
|
||||||
aum = []
|
aum = []
|
||||||
aus = {}
|
aus = {}
|
||||||
for (author, author_sort) in aut_list:
|
for (author, author_sort) in aut_list:
|
||||||
aum.append(author)
|
aum.append(author.replace('|', ','))
|
||||||
aus[author] = author_sort.replace('|', ',')
|
aus[author] = author_sort.replace('|', ',')
|
||||||
mi.title = row[fm['title']]
|
mi.title = row[fm['title']]
|
||||||
mi.authors = aum
|
mi.authors = aum
|
||||||
|
@ -437,6 +437,15 @@ My antivirus program claims |app| is a virus/trojan?
|
|||||||
|
|
||||||
Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it.
|
Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it.
|
||||||
|
|
||||||
|
How do I backup |app|?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The most important thing to backup is the |app| library folder, that contains all your books and metadata. This is the folder you chose for your |app| library when you ran |app| for the first time. You can get the path to the library folder by clicking the |app| icon on the main toolbar. You must backup this complete folder with all its files and sub-folders.
|
||||||
|
|
||||||
|
You can switch |app| to using a backed up library folder by simply clicking the |app| icon on the toolbar and choosing your backup library folder.
|
||||||
|
|
||||||
|
If you want to backup the |app| configuration/plugins, you have to backup the config directory. You can find this config directory via :guilabel:`Preferences->Miscellaneous`. Note that restoring configuration directories is not officially supported, but should work in most cases. Just copy the contents of the backup directory into the current configuration directory to restore.
|
||||||
|
|
||||||
How do I use purchased EPUB books with |app|?
|
How do I use purchased EPUB books with |app|?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|.
|
Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|.
|
||||||
|
@ -77,7 +77,7 @@ class FormatterFunction(object):
|
|||||||
exc_traceback)[-2:]).replace('\n', '')
|
exc_traceback)[-2:]).replace('\n', '')
|
||||||
return _('Exception ' + info)
|
return _('Exception ' + info)
|
||||||
|
|
||||||
|
all_builtin_functions = []
|
||||||
class BuiltinFormatterFunction(FormatterFunction):
|
class BuiltinFormatterFunction(FormatterFunction):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
formatter_functions.register_builtin(self)
|
formatter_functions.register_builtin(self)
|
||||||
@ -88,6 +88,7 @@ class BuiltinFormatterFunction(FormatterFunction):
|
|||||||
except:
|
except:
|
||||||
lines = []
|
lines = []
|
||||||
self.program_text = ''.join(lines)
|
self.program_text = ''.join(lines)
|
||||||
|
all_builtin_functions.append(self)
|
||||||
|
|
||||||
class BuiltinStrcmp(BuiltinFormatterFunction):
|
class BuiltinStrcmp(BuiltinFormatterFunction):
|
||||||
name = 'strcmp'
|
name = 'strcmp'
|
||||||
|
@ -80,7 +80,7 @@ def icu_case_sensitive_strcmp(collator, a, b):
|
|||||||
|
|
||||||
def icu_capitalize(s):
|
def icu_capitalize(s):
|
||||||
s = lower(s)
|
s = lower(s)
|
||||||
return s.replace(s[0], upper(s[0]), 1)
|
return s.replace(s[0], upper(s[0]), 1) if s else s
|
||||||
|
|
||||||
load_icu()
|
load_icu()
|
||||||
load_collator()
|
load_collator()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user