py3: cleanup some str() calls in ported code.

They're usually not needed as they are casting things we already know
are (unicode) strings.
This commit is contained in:
Eli Schwartz 2019-05-27 12:32:30 -04:00
parent 991c913bb2
commit 651fdaa129
No known key found for this signature in database
GPG Key ID: CEB167EFB5722BD6
2 changed files with 21 additions and 19 deletions

View File

@ -5,6 +5,8 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from polyglot.builtins import native_string_type
class ConversionUserFeedBack(Exception): class ConversionUserFeedBack(Exception):
@ -25,4 +27,4 @@ class ConversionUserFeedBack(Exception):
# Ensure exception uses fully qualified name as this is used to detect it in # Ensure exception uses fully qualified name as this is used to detect it in
# the GUI. # the GUI.
ConversionUserFeedBack.__name__ = str('calibre.ebooks.conversion.ConversionUserFeedBack') ConversionUserFeedBack.__name__ = native_string_type('calibre.ebooks.conversion.ConversionUserFeedBack')

View File

@ -75,8 +75,8 @@ def smarten_punctuation(html, log=None):
from calibre.ebooks.conversion.utils import HeuristicProcessor from calibre.ebooks.conversion.utils import HeuristicProcessor
preprocessor = HeuristicProcessor(log=log) preprocessor = HeuristicProcessor(log=log)
from uuid import uuid4 from uuid import uuid4
start = 'calibre-smartypants-'+str(uuid4()) start = 'calibre-smartypants-'+unicode_type(uuid4())
stop = 'calibre-smartypants-'+str(uuid4()) stop = 'calibre-smartypants-'+unicode_type(uuid4())
html = html.replace('<!--', start) html = html.replace('<!--', start)
html = html.replace('-->', stop) html = html.replace('-->', stop)
html = preprocessor.fix_nbsp_indents(html) html = preprocessor.fix_nbsp_indents(html)
@ -152,20 +152,20 @@ class DocAnalysis(object):
maxLineLength=1900 # Discard larger than this to stay in range maxLineLength=1900 # Discard larger than this to stay in range
buckets=20 # Each line is divided into a bucket based on length buckets=20 # Each line is divided into a bucket based on length
# print "there are "+str(len(lines))+" lines" # print("there are "+unicode_type(len(lines))+" lines")
# max = 0 # max = 0
# for line in self.lines: # for line in self.lines:
# l = len(line) # l = len(line)
# if l > max: # if l > max:
# max = l # max = l
# print "max line found is "+str(max) # print("max line found is "+unicode_type(max))
# Build the line length histogram # Build the line length histogram
hRaw = [0 for i in range(0,buckets)] hRaw = [0 for i in range(0,buckets)]
for line in self.lines: for line in self.lines:
l = len(line) l = len(line)
if l > minLineLength and l < maxLineLength: if l > minLineLength and l < maxLineLength:
l = int(l/100) l = int(l/100)
# print "adding "+str(l) # print("adding "+unicode_type(l))
hRaw[l]+=1 hRaw[l]+=1
# Normalize the histogram into percents # Normalize the histogram into percents
@ -174,8 +174,8 @@ class DocAnalysis(object):
h = [float(count)/totalLines for count in hRaw] h = [float(count)/totalLines for count in hRaw]
else: else:
h = [] h = []
# print "\nhRaw histogram lengths are: "+str(hRaw) # print("\nhRaw histogram lengths are: "+unicode_type(hRaw))
# print " percents are: "+str(h)+"\n" # print(" percents are: "+unicode_type(h)+"\n")
# Find the biggest bucket # Find the biggest bucket
maxValue = 0 maxValue = 0
@ -184,10 +184,10 @@ class DocAnalysis(object):
maxValue = h[i] maxValue = h[i]
if maxValue < percent: if maxValue < percent:
# print "Line lengths are too variable. Not unwrapping." # print("Line lengths are too variable. Not unwrapping.")
return False return False
else: else:
# print str(maxValue)+" of the lines were in one bucket" # print(unicode_type(maxValue)+" of the lines were in one bucket")
return True return True
@ -232,7 +232,7 @@ class Dehyphenator(object):
if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None: if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword) lookupword = self.removeprefix.sub('', lookupword)
if self.verbose > 2: if self.verbose > 2:
self.log("lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)) self.log("lookup word is: "+lookupword+", orig is: " + hyphenated)
try: try:
searchresult = self.html.find(lookupword.lower()) searchresult = self.html.find(lookupword.lower())
except: except:
@ -240,33 +240,33 @@ class Dehyphenator(object):
if self.format == 'html_cleanup' or self.format == 'txt_cleanup': if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
if self.html.find(lookupword) != -1 or searchresult != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
if self.verbose > 2: if self.verbose > 2:
self.log(" Cleanup:returned dehyphenated word: " + str(dehyphenated)) self.log(" Cleanup:returned dehyphenated word: " + dehyphenated)
return dehyphenated return dehyphenated
elif self.html.find(hyphenated) != -1: elif self.html.find(hyphenated) != -1:
if self.verbose > 2: if self.verbose > 2:
self.log(" Cleanup:returned hyphenated word: " + str(hyphenated)) self.log(" Cleanup:returned hyphenated word: " + hyphenated)
return hyphenated return hyphenated
else: else:
if self.verbose > 2: if self.verbose > 2:
self.log(" Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)) self.log(" Cleanup:returning original text "+firsthalf+" + linefeed "+secondhalf)
return firsthalf+'\u2014'+wraptags+secondhalf return firsthalf+'\u2014'+wraptags+secondhalf
else: else:
if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6: if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6:
if self.verbose > 2: if self.verbose > 2:
self.log("too short, returned hyphenated word: " + str(hyphenated)) self.log("too short, returned hyphenated word: " + hyphenated)
return hyphenated return hyphenated
if len(firsthalf) <= 2 and len(secondhalf) <= 2: if len(firsthalf) <= 2 and len(secondhalf) <= 2:
if self.verbose > 2: if self.verbose > 2:
self.log("too short, returned hyphenated word: " + str(hyphenated)) self.log("too short, returned hyphenated word: " + hyphenated)
return hyphenated return hyphenated
if self.html.find(lookupword) != -1 or searchresult != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
if self.verbose > 2: if self.verbose > 2:
self.log(" returned dehyphenated word: " + str(dehyphenated)) self.log(" returned dehyphenated word: " + dehyphenated)
return dehyphenated return dehyphenated
else: else:
if self.verbose > 2: if self.verbose > 2:
self.log(" returned hyphenated word: " + str(hyphenated)) self.log(" returned hyphenated word: " + hyphenated)
return hyphenated return hyphenated
def __call__(self, html, format, length=1): def __call__(self, html, format, length=1):
@ -595,7 +595,7 @@ class HTMLPreProcessor(object):
docanalysis = DocAnalysis('pdf', html) docanalysis = DocAnalysis('pdf', html)
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor')) length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
if length: if length:
# print "The pdf line length returned is " + str(length) # print("The pdf line length returned is " + unicode_type(length))
# unwrap em/en dashes # unwrap em/en dashes
end_rules.append((re.compile( end_rules.append((re.compile(
r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: '')) r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: ''))