mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
py3: cleanup some str() calls in ported code.
They're usually not needed as they are casting things we already know are (unicode) strings.
This commit is contained in:
parent
991c913bb2
commit
651fdaa129
@ -5,6 +5,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from polyglot.builtins import native_string_type
|
||||||
|
|
||||||
|
|
||||||
class ConversionUserFeedBack(Exception):
|
class ConversionUserFeedBack(Exception):
|
||||||
|
|
||||||
@ -25,4 +27,4 @@ class ConversionUserFeedBack(Exception):
|
|||||||
|
|
||||||
# Ensure exception uses fully qualified name as this is used to detect it in
|
# Ensure exception uses fully qualified name as this is used to detect it in
|
||||||
# the GUI.
|
# the GUI.
|
||||||
ConversionUserFeedBack.__name__ = str('calibre.ebooks.conversion.ConversionUserFeedBack')
|
ConversionUserFeedBack.__name__ = native_string_type('calibre.ebooks.conversion.ConversionUserFeedBack')
|
||||||
|
@ -75,8 +75,8 @@ def smarten_punctuation(html, log=None):
|
|||||||
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
||||||
preprocessor = HeuristicProcessor(log=log)
|
preprocessor = HeuristicProcessor(log=log)
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
start = 'calibre-smartypants-'+str(uuid4())
|
start = 'calibre-smartypants-'+unicode_type(uuid4())
|
||||||
stop = 'calibre-smartypants-'+str(uuid4())
|
stop = 'calibre-smartypants-'+unicode_type(uuid4())
|
||||||
html = html.replace('<!--', start)
|
html = html.replace('<!--', start)
|
||||||
html = html.replace('-->', stop)
|
html = html.replace('-->', stop)
|
||||||
html = preprocessor.fix_nbsp_indents(html)
|
html = preprocessor.fix_nbsp_indents(html)
|
||||||
@ -152,20 +152,20 @@ class DocAnalysis(object):
|
|||||||
maxLineLength=1900 # Discard larger than this to stay in range
|
maxLineLength=1900 # Discard larger than this to stay in range
|
||||||
buckets=20 # Each line is divided into a bucket based on length
|
buckets=20 # Each line is divided into a bucket based on length
|
||||||
|
|
||||||
# print "there are "+str(len(lines))+" lines"
|
# print("there are "+unicode_type(len(lines))+" lines")
|
||||||
# max = 0
|
# max = 0
|
||||||
# for line in self.lines:
|
# for line in self.lines:
|
||||||
# l = len(line)
|
# l = len(line)
|
||||||
# if l > max:
|
# if l > max:
|
||||||
# max = l
|
# max = l
|
||||||
# print "max line found is "+str(max)
|
# print("max line found is "+unicode_type(max))
|
||||||
# Build the line length histogram
|
# Build the line length histogram
|
||||||
hRaw = [0 for i in range(0,buckets)]
|
hRaw = [0 for i in range(0,buckets)]
|
||||||
for line in self.lines:
|
for line in self.lines:
|
||||||
l = len(line)
|
l = len(line)
|
||||||
if l > minLineLength and l < maxLineLength:
|
if l > minLineLength and l < maxLineLength:
|
||||||
l = int(l/100)
|
l = int(l/100)
|
||||||
# print "adding "+str(l)
|
# print("adding "+unicode_type(l))
|
||||||
hRaw[l]+=1
|
hRaw[l]+=1
|
||||||
|
|
||||||
# Normalize the histogram into percents
|
# Normalize the histogram into percents
|
||||||
@ -174,8 +174,8 @@ class DocAnalysis(object):
|
|||||||
h = [float(count)/totalLines for count in hRaw]
|
h = [float(count)/totalLines for count in hRaw]
|
||||||
else:
|
else:
|
||||||
h = []
|
h = []
|
||||||
# print "\nhRaw histogram lengths are: "+str(hRaw)
|
# print("\nhRaw histogram lengths are: "+unicode_type(hRaw))
|
||||||
# print " percents are: "+str(h)+"\n"
|
# print(" percents are: "+unicode_type(h)+"\n")
|
||||||
|
|
||||||
# Find the biggest bucket
|
# Find the biggest bucket
|
||||||
maxValue = 0
|
maxValue = 0
|
||||||
@ -184,10 +184,10 @@ class DocAnalysis(object):
|
|||||||
maxValue = h[i]
|
maxValue = h[i]
|
||||||
|
|
||||||
if maxValue < percent:
|
if maxValue < percent:
|
||||||
# print "Line lengths are too variable. Not unwrapping."
|
# print("Line lengths are too variable. Not unwrapping.")
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
# print str(maxValue)+" of the lines were in one bucket"
|
# print(unicode_type(maxValue)+" of the lines were in one bucket")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@ -232,7 +232,7 @@ class Dehyphenator(object):
|
|||||||
if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None:
|
if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None:
|
||||||
lookupword = self.removeprefix.sub('', lookupword)
|
lookupword = self.removeprefix.sub('', lookupword)
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log("lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated))
|
self.log("lookup word is: "+lookupword+", orig is: " + hyphenated)
|
||||||
try:
|
try:
|
||||||
searchresult = self.html.find(lookupword.lower())
|
searchresult = self.html.find(lookupword.lower())
|
||||||
except:
|
except:
|
||||||
@ -240,33 +240,33 @@ class Dehyphenator(object):
|
|||||||
if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
|
if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
|
||||||
if self.html.find(lookupword) != -1 or searchresult != -1:
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" Cleanup:returned dehyphenated word: " + str(dehyphenated))
|
self.log(" Cleanup:returned dehyphenated word: " + dehyphenated)
|
||||||
return dehyphenated
|
return dehyphenated
|
||||||
elif self.html.find(hyphenated) != -1:
|
elif self.html.find(hyphenated) != -1:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" Cleanup:returned hyphenated word: " + str(hyphenated))
|
self.log(" Cleanup:returned hyphenated word: " + hyphenated)
|
||||||
return hyphenated
|
return hyphenated
|
||||||
else:
|
else:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf))
|
self.log(" Cleanup:returning original text "+firsthalf+" + linefeed "+secondhalf)
|
||||||
return firsthalf+'\u2014'+wraptags+secondhalf
|
return firsthalf+'\u2014'+wraptags+secondhalf
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6:
|
if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log("too short, returned hyphenated word: " + str(hyphenated))
|
self.log("too short, returned hyphenated word: " + hyphenated)
|
||||||
return hyphenated
|
return hyphenated
|
||||||
if len(firsthalf) <= 2 and len(secondhalf) <= 2:
|
if len(firsthalf) <= 2 and len(secondhalf) <= 2:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log("too short, returned hyphenated word: " + str(hyphenated))
|
self.log("too short, returned hyphenated word: " + hyphenated)
|
||||||
return hyphenated
|
return hyphenated
|
||||||
if self.html.find(lookupword) != -1 or searchresult != -1:
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" returned dehyphenated word: " + str(dehyphenated))
|
self.log(" returned dehyphenated word: " + dehyphenated)
|
||||||
return dehyphenated
|
return dehyphenated
|
||||||
else:
|
else:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" returned hyphenated word: " + str(hyphenated))
|
self.log(" returned hyphenated word: " + hyphenated)
|
||||||
return hyphenated
|
return hyphenated
|
||||||
|
|
||||||
def __call__(self, html, format, length=1):
|
def __call__(self, html, format, length=1):
|
||||||
@ -595,7 +595,7 @@ class HTMLPreProcessor(object):
|
|||||||
docanalysis = DocAnalysis('pdf', html)
|
docanalysis = DocAnalysis('pdf', html)
|
||||||
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
|
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
|
||||||
if length:
|
if length:
|
||||||
# print "The pdf line length returned is " + str(length)
|
# print("The pdf line length returned is " + unicode_type(length))
|
||||||
# unwrap em/en dashes
|
# unwrap em/en dashes
|
||||||
end_rules.append((re.compile(
|
end_rules.append((re.compile(
|
||||||
r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: ''))
|
r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: ''))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user