mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: More unicode porting
This commit is contained in:
parent
06dc7dd15b
commit
4730fce41b
@ -1,3 +1,4 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
'''
|
||||
Created on 23 Sep 2010
|
||||
|
||||
@ -23,7 +24,7 @@ class _Parser(object):
|
||||
LEX_NUM = 4
|
||||
LEX_EOF = 5
|
||||
|
||||
LEX_CONSTANTS = frozenset([LEX_STR, LEX_NUM])
|
||||
LEX_CONSTANTS = frozenset((LEX_STR, LEX_NUM))
|
||||
|
||||
def __init__(self, val, prog, funcs, parent):
|
||||
self.lex_pos = 0
|
||||
@ -205,7 +206,7 @@ class TemplateFormatter(string.Formatter):
|
||||
elif 'bcdoxXn'.find(typ) >= 0:
|
||||
try:
|
||||
val = int(val)
|
||||
except:
|
||||
except Exception:
|
||||
raise ValueError(
|
||||
_('format: type {0} requires an integer value, got {1}').format(typ, val))
|
||||
elif 'eEfFgGn%'.find(typ) >= 0:
|
||||
|
@ -18,12 +18,12 @@ from PyQt5.QtCore import QBuffer, QByteArray, Qt
|
||||
from PyQt5.QtGui import QColor, QImage, QImageReader, QImageWriter, QPixmap, QTransform
|
||||
|
||||
from calibre import fit_image, force_unicode
|
||||
from calibre.constants import iswindows, plugins
|
||||
from calibre.constants import iswindows, plugins, ispy3
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.config_base import tweaks
|
||||
from calibre.utils.filenames import atomic_rename
|
||||
from calibre.utils.imghdr import what
|
||||
from polyglot.builtins import string_or_bytes
|
||||
from polyglot.builtins import string_or_bytes, unicode_type
|
||||
|
||||
# Utilities {{{
|
||||
imageops, imageops_err = plugins['imageops']
|
||||
@ -465,11 +465,11 @@ def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
|
||||
cmd[cmd.index(q)] = r
|
||||
if not as_filter:
|
||||
repl(True, iname), repl(False, oname)
|
||||
if iswindows:
|
||||
if iswindows and not ispy3:
|
||||
# subprocess in python 2 cannot handle unicode strings that are not
|
||||
# encodeable in mbcs, so we fail here, where it is more explicit,
|
||||
# instead.
|
||||
cmd = [x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd]
|
||||
cmd = [x.encode('mbcs') if isinstance(x, unicode_type) else x for x in cmd]
|
||||
if isinstance(cwd, type('')):
|
||||
cwd = cwd.encode('mbcs')
|
||||
stdin = subprocess.PIPE if as_filter else None
|
||||
@ -534,7 +534,7 @@ def encode_jpeg(file_path, quality=80):
|
||||
from calibre.utils.speedups import ReadOnlyFileBuffer
|
||||
quality = max(0, min(100, int(quality)))
|
||||
exe = get_exe_path('cjpeg')
|
||||
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [str(quality)]
|
||||
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [unicode_type(quality)]
|
||||
img = QImage()
|
||||
if not img.load(file_path):
|
||||
raise ValueError('%s is not a valid image file' % file_path)
|
||||
|
@ -16,7 +16,7 @@ from itertools import islice
|
||||
from calibre import detect_ncpus as cpu_count, as_unicode
|
||||
from calibre.constants import plugins, filesystem_encoding
|
||||
from calibre.utils.icu import primary_sort_key, primary_find, primary_collator
|
||||
from polyglot.builtins import iteritems, itervalues, map, unicode_type, range, zip, raw_input, filter, getcwd
|
||||
from polyglot.builtins import iteritems, itervalues, map, unicode_type, range, zip, raw_input, filter, getcwd, unicode_type
|
||||
from polyglot.queue import Queue
|
||||
|
||||
DEFAULT_LEVEL1 = '/'
|
||||
@ -294,12 +294,12 @@ def test(return_tests=False):
|
||||
|
||||
start = memory()
|
||||
for i in range(10):
|
||||
doit(str(i))
|
||||
doit(unicode_type(i))
|
||||
gc.collect()
|
||||
used10 = memory() - start
|
||||
start = memory()
|
||||
for i in range(100):
|
||||
doit(str(i))
|
||||
doit(unicode_type(i))
|
||||
gc.collect()
|
||||
used100 = memory() - start
|
||||
if used100 > 0 and used10 > 0:
|
||||
|
@ -1,4 +1,5 @@
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
try:
|
||||
from time import monotonic
|
||||
|
@ -16,7 +16,7 @@ from threading import Lock, local
|
||||
|
||||
from polyglot import socketserver
|
||||
from polyglot.http_server import HTTPServer, SimpleHTTPRequestHandler
|
||||
from polyglot.builtins import error_message, getcwd
|
||||
from polyglot.builtins import error_message, getcwd, unicode_type
|
||||
|
||||
# Compiler {{{
|
||||
|
||||
@ -107,9 +107,9 @@ class HTTPRequestHandler(SimpleHTTPRequestHandler): # {{{
|
||||
self.send_response(rtype)
|
||||
self.send_header("Accept-Ranges", "bytes")
|
||||
self.send_header("Content-Range", 'bytes ' +
|
||||
str(start_range) + '-' + str(end_range - 1) + '/' + str(size))
|
||||
self.send_header("Content-Type", str(mimetype))
|
||||
self.send_header("Content-Length", str(end_range - start_range))
|
||||
unicode_type(start_range) + '-' + unicode_type(end_range - 1) + '/' + unicode_type(size))
|
||||
self.send_header("Content-Type", unicode_type(mimetype))
|
||||
self.send_header("Content-Length", unicode_type(end_range - start_range))
|
||||
self.send_header("Last-Modified", self.date_time_string(int(mtime)))
|
||||
self.end_headers()
|
||||
return f, start_range, end_range
|
||||
|
@ -1,6 +1,8 @@
|
||||
#!/usr/bin/python2
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__author__ = "Chad Miller <smartypantspy@chad.org>, Kovid Goyal <kovid at kovidgoyal.net>"
|
||||
__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"
|
||||
|
||||
@ -525,7 +527,7 @@ def smartyPants(text, attr='1'):
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def educateQuotes(str):
|
||||
def educateQuotes(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
|
||||
@ -539,32 +541,32 @@ def educateQuotes(str):
|
||||
|
||||
# Special case if the very first character is a quote
|
||||
# followed by punctuation at a non-word-break. Close the quotes by brute force:
|
||||
str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", str)
|
||||
str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", str)
|
||||
text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", text)
|
||||
text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", text)
|
||||
|
||||
# Special case for double sets of quotes, e.g.:
|
||||
# <p>He said, "'Quoted' words in a larger quote."</p>
|
||||
str = re.sub(r""""'(?=\w)""", """“‘""", str)
|
||||
str = re.sub(r"""'"(?=\w)""", """‘“""", str)
|
||||
str = re.sub(r'''""(?=\w)''', """““""", str)
|
||||
str = re.sub(r"""''(?=\w)""", """‘‘""", str)
|
||||
str = re.sub(r'''\"\'''', """”’""", str)
|
||||
str = re.sub(r'''\'\"''', """’”""", str)
|
||||
str = re.sub(r'''""''', """””""", str)
|
||||
str = re.sub(r"""''""", """’’""", str)
|
||||
text = re.sub(r""""'(?=\w)""", """“‘""", text)
|
||||
text = re.sub(r"""'"(?=\w)""", """‘“""", text)
|
||||
text = re.sub(r'''""(?=\w)''', """““""", text)
|
||||
text = re.sub(r"""''(?=\w)""", """‘‘""", text)
|
||||
text = re.sub(r'''\"\'''', """”’""", text)
|
||||
text = re.sub(r'''\'\"''', """’”""", text)
|
||||
text = re.sub(r'''""''', """””""", text)
|
||||
text = re.sub(r"""''""", """’’""", text)
|
||||
|
||||
# Special case for decade abbreviations (the '80s --> ’80s):
|
||||
# See http://practicaltypography.com/apostrophes.html
|
||||
str = re.sub(r"""(\W|^)'(?=\d{2}s)""", r"""\1’""", str)
|
||||
text = re.sub(r"""(\W|^)'(?=\d{2}s)""", r"""\1’""", text)
|
||||
# Measurements in feet and inches or longitude/latitude: 19' 43.5" --> 19′ 43.5″
|
||||
str = re.sub(r'''(\W|^)([-0-9.]+\s*)'(\s*[-0-9.]+)"''', r'\1\2′\3″', str)
|
||||
text = re.sub(r'''(\W|^)([-0-9.]+\s*)'(\s*[-0-9.]+)"''', r'\1\2′\3″', text)
|
||||
|
||||
# Special case for Quotes at inside of other entities, e.g.:
|
||||
# <p>A double quote--"within dashes"--would be nice.</p>
|
||||
str = re.sub(r"""(?<=\W)"(?=\w)""", r"""“""", str)
|
||||
str = re.sub(r"""(?<=\W)'(?=\w)""", r"""‘""", str)
|
||||
str = re.sub(r"""(?<=\w)"(?=\W)""", r"""”""", str)
|
||||
str = re.sub(r"""(?<=\w)'(?=\W)""", r"""’""", str)
|
||||
text = re.sub(r"""(?<=\W)"(?=\w)""", r"""“""", text)
|
||||
text = re.sub(r"""(?<=\W)'(?=\w)""", r"""‘""", text)
|
||||
text = re.sub(r"""(?<=\w)"(?=\W)""", r"""”""", text)
|
||||
text = re.sub(r"""(?<=\w)'(?=\W)""", r"""’""", text)
|
||||
|
||||
# The following are commented out as smartypants tokenizes text by
|
||||
# stripping out html tags. Therefore, there is no guarantee that the
|
||||
@ -572,12 +574,12 @@ def educateQuotes(str):
|
||||
# meaningful
|
||||
|
||||
# Special case for Quotes at end of line with a preceeding space (may change just to end of line)
|
||||
# str = re.sub(r"""(?<=\s)"$""", r"""”""", str)
|
||||
# str = re.sub(r"""(?<=\s)'$""", r"""’""", str)
|
||||
# text = re.sub(r"""(?<=\s)"$""", r"""”""", text)
|
||||
# text = re.sub(r"""(?<=\s)'$""", r"""’""", text)
|
||||
|
||||
# Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
|
||||
# str = re.sub(r"""^"(?=\s)""", r"""“""", str)
|
||||
# str = re.sub(r"""^'(?=\s)""", r"""‘""", str)
|
||||
# text = re.sub(r"""^"(?=\s)""", r"""“""", text)
|
||||
# text = re.sub(r"""^'(?=\s)""", r"""‘""", text)
|
||||
|
||||
close_class = r"""[^\ \t\r\n\[\{\(\-]"""
|
||||
dec_dashes = r"""–|—"""
|
||||
@ -595,24 +597,24 @@ def educateQuotes(str):
|
||||
' # the quote
|
||||
(?=\w) # followed by a word character
|
||||
""" % (dec_dashes,), re.VERBOSE)
|
||||
str = opening_single_quotes_regex.sub(r"""\1‘""", str)
|
||||
text = opening_single_quotes_regex.sub(r"""\1‘""", text)
|
||||
|
||||
closing_single_quotes_regex = re.compile(r"""
|
||||
(%s)
|
||||
'
|
||||
(?!\s | s\b | \d)
|
||||
""" % (close_class,), re.VERBOSE)
|
||||
str = closing_single_quotes_regex.sub(r"""\1’""", str)
|
||||
text = closing_single_quotes_regex.sub(r"""\1’""", text)
|
||||
|
||||
closing_single_quotes_regex = re.compile(r"""
|
||||
(%s)
|
||||
'
|
||||
(\s | s\b)
|
||||
""" % (close_class,), re.VERBOSE)
|
||||
str = closing_single_quotes_regex.sub(r"""\1’\2""", str)
|
||||
text = closing_single_quotes_regex.sub(r"""\1’\2""", text)
|
||||
|
||||
# Any remaining single quotes should be opening ones:
|
||||
str = re.sub(r"""'""", r"""‘""", str)
|
||||
text = re.sub(r"""'""", r"""‘""", text)
|
||||
|
||||
# Get most opening double quotes:
|
||||
opening_double_quotes_regex = re.compile(r"""
|
||||
@ -627,7 +629,7 @@ def educateQuotes(str):
|
||||
" # the quote
|
||||
(?=\w) # followed by a word character
|
||||
""" % (dec_dashes,), re.VERBOSE)
|
||||
str = opening_double_quotes_regex.sub(r"""\1“""", str)
|
||||
text = opening_double_quotes_regex.sub(r"""\1“""", text)
|
||||
|
||||
# Double closing quotes:
|
||||
closing_double_quotes_regex = re.compile(r"""
|
||||
@ -635,25 +637,25 @@ def educateQuotes(str):
|
||||
"
|
||||
(?=\s)
|
||||
""" % (close_class,), re.VERBOSE)
|
||||
str = closing_double_quotes_regex.sub(r"""”""", str)
|
||||
text = closing_double_quotes_regex.sub(r"""”""", text)
|
||||
|
||||
closing_double_quotes_regex = re.compile(r"""
|
||||
(%s) # character that indicates the quote should be closing
|
||||
"
|
||||
""" % (close_class,), re.VERBOSE)
|
||||
str = closing_double_quotes_regex.sub(r"""\1”""", str)
|
||||
text = closing_double_quotes_regex.sub(r"""\1”""", text)
|
||||
|
||||
if str.endswith('-"'):
|
||||
if text.endswith('-"'):
|
||||
# A string that endswith -" is sometimes used for dialogue
|
||||
str = str[:-1] + '”'
|
||||
text = text[:-1] + '”'
|
||||
|
||||
# Any remaining quotes should be opening ones.
|
||||
str = re.sub(r'"', r"""“""", str)
|
||||
text = re.sub(r'"', r"""“""", text)
|
||||
|
||||
return str
|
||||
return text
|
||||
|
||||
|
||||
def educateBackticks(str):
|
||||
def educateBackticks(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
Returns: The string, with ``backticks'' -style double quotes
|
||||
@ -662,12 +664,12 @@ def educateBackticks(str):
|
||||
Example output: “Isn't this fun?”
|
||||
"""
|
||||
|
||||
str = re.sub(r"""``""", r"""“""", str)
|
||||
str = re.sub(r"""''""", r"""”""", str)
|
||||
return str
|
||||
text = re.sub(r"""``""", r"""“""", text)
|
||||
text = re.sub(r"""''""", r"""”""", text)
|
||||
return text
|
||||
|
||||
|
||||
def educateSingleBackticks(str):
|
||||
def educateSingleBackticks(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
Returns: The string, with `backticks' -style single quotes
|
||||
@ -677,12 +679,12 @@ def educateSingleBackticks(str):
|
||||
Example output: ‘Isn’t this fun?’
|
||||
"""
|
||||
|
||||
str = re.sub(r"""`""", r"""‘""", str)
|
||||
str = re.sub(r"""'""", r"""’""", str)
|
||||
return str
|
||||
text = re.sub(r"""`""", r"""‘""", text)
|
||||
text = re.sub(r"""'""", r"""’""", text)
|
||||
return text
|
||||
|
||||
|
||||
def educateDashes(str):
|
||||
def educateDashes(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
|
||||
@ -690,12 +692,12 @@ def educateDashes(str):
|
||||
an em-dash HTML entity.
|
||||
"""
|
||||
|
||||
str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards)
|
||||
str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards)
|
||||
return str
|
||||
text = re.sub(r"""---""", r"""–""", text) # en (yes, backwards)
|
||||
text = re.sub(r"""--""", r"""—""", text) # em (yes, backwards)
|
||||
return text
|
||||
|
||||
|
||||
def educateDashesOldSchool(str):
|
||||
def educateDashesOldSchool(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
|
||||
@ -704,12 +706,12 @@ def educateDashesOldSchool(str):
|
||||
an em-dash HTML entity.
|
||||
"""
|
||||
|
||||
str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards)
|
||||
str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards)
|
||||
return str
|
||||
text = re.sub(r"""---""", r"""—""", text) # em (yes, backwards)
|
||||
text = re.sub(r"""--""", r"""–""", text) # en (yes, backwards)
|
||||
return text
|
||||
|
||||
|
||||
def educateDashesOldSchoolInverted(str):
|
||||
def educateDashesOldSchoolInverted(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
|
||||
@ -724,12 +726,12 @@ def educateDashesOldSchoolInverted(str):
|
||||
the shortcut should be shorter to type. (Thanks to Aaron
|
||||
Swartz for the idea.)
|
||||
"""
|
||||
str = re.sub(r"""---""", r"""–""", str) # em
|
||||
str = re.sub(r"""--""", r"""—""", str) # en
|
||||
return str
|
||||
text = re.sub(r"""---""", r"""–""", text) # em
|
||||
text = re.sub(r"""--""", r"""—""", text) # en
|
||||
return text
|
||||
|
||||
|
||||
def educateEllipses(str):
|
||||
def educateEllipses(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
Returns: The string, with each instance of "..." translated to
|
||||
@ -739,12 +741,12 @@ def educateEllipses(str):
|
||||
Example output: Huh…?
|
||||
"""
|
||||
|
||||
str = re.sub(r"""\.\.\.""", r"""…""", str)
|
||||
str = re.sub(r"""\. \. \.""", r"""…""", str)
|
||||
return str
|
||||
text = re.sub(r"""\.\.\.""", r"""…""", text)
|
||||
text = re.sub(r"""\. \. \.""", r"""…""", text)
|
||||
return text
|
||||
|
||||
|
||||
def stupefyEntities(str):
|
||||
def stupefyEntities(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
Returns: The string, with each SmartyPants HTML entity translated to
|
||||
@ -754,21 +756,21 @@ def stupefyEntities(str):
|
||||
Example output: "Hello -- world."
|
||||
"""
|
||||
|
||||
str = re.sub(r"""–""", r"""-""", str) # en-dash
|
||||
str = re.sub(r"""—""", r"""--""", str) # em-dash
|
||||
text = re.sub(r"""–""", r"""-""", text) # en-dash
|
||||
text = re.sub(r"""—""", r"""--""", text) # em-dash
|
||||
|
||||
str = re.sub(r"""‘""", r"""'""", str) # open single quote
|
||||
str = re.sub(r"""’""", r"""'""", str) # close single quote
|
||||
text = re.sub(r"""‘""", r"""'""", text) # open single quote
|
||||
text = re.sub(r"""’""", r"""'""", text) # close single quote
|
||||
|
||||
str = re.sub(r"""“""", r'''"''', str) # open double quote
|
||||
str = re.sub(r"""”""", r'''"''', str) # close double quote
|
||||
text = re.sub(r"""“""", r'''"''', text) # open double quote
|
||||
text = re.sub(r"""”""", r'''"''', text) # close double quote
|
||||
|
||||
str = re.sub(r"""…""", r"""...""", str) # ellipsis
|
||||
text = re.sub(r"""…""", r"""...""", text) # ellipsis
|
||||
|
||||
return str
|
||||
return text
|
||||
|
||||
|
||||
def processEscapes(str):
|
||||
def processEscapes(text):
|
||||
r"""
|
||||
Parameter: String.
|
||||
Returns: The string, with after processing the following backslash
|
||||
@ -784,17 +786,17 @@ def processEscapes(str):
|
||||
\- -
|
||||
\` `
|
||||
"""
|
||||
str = re.sub(r"""\\\\""", r"""\""", str)
|
||||
str = re.sub(r'''\\"''', r""""""", str)
|
||||
str = re.sub(r"""\\'""", r"""'""", str)
|
||||
str = re.sub(r"""\\\.""", r""".""", str)
|
||||
str = re.sub(r"""\\-""", r"""-""", str)
|
||||
str = re.sub(r"""\\`""", r"""`""", str)
|
||||
text = re.sub(r"""\\\\""", r"""\""", text)
|
||||
text = re.sub(r'''\\"''', r""""""", text)
|
||||
text = re.sub(r"""\\'""", r"""'""", text)
|
||||
text = re.sub(r"""\\\.""", r""".""", text)
|
||||
text = re.sub(r"""\\-""", r"""-""", text)
|
||||
text = re.sub(r"""\\`""", r"""`""", text)
|
||||
|
||||
return str
|
||||
return text
|
||||
|
||||
|
||||
def _tokenize(str):
|
||||
def _tokenize(html):
|
||||
"""
|
||||
Parameter: String containing HTML markup.
|
||||
Returns: Reference to an array of the tokens comprising the input
|
||||
@ -817,7 +819,7 @@ def _tokenize(str):
|
||||
# %s # nested tags """ % (nested_tags,)
|
||||
tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
|
||||
|
||||
token_match = tag_soup.search(str)
|
||||
token_match = tag_soup.search(html)
|
||||
|
||||
previous_end = 0
|
||||
while token_match is not None:
|
||||
@ -827,10 +829,10 @@ def _tokenize(str):
|
||||
tokens.append(['tag', token_match.group(2)])
|
||||
|
||||
previous_end = token_match.end()
|
||||
token_match = tag_soup.search(str, token_match.end())
|
||||
token_match = tag_soup.search(html, token_match.end())
|
||||
|
||||
if previous_end < len(str):
|
||||
tokens.append(['text', str[previous_end:]])
|
||||
if previous_end < len(html):
|
||||
tokens.append(['text', html[previous_end:]])
|
||||
|
||||
return tokens
|
||||
|
||||
|
@ -1,4 +1,7 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__author__ = "stackoverflow community"
|
||||
__docformat__ = 'restructuredtext en'
|
||||
"""
|
||||
|
@ -1,5 +1,8 @@
|
||||
#!/usr/bin/python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
"""
|
||||
Get word, character, and Asian character counts
|
||||
|
||||
@ -44,7 +47,7 @@ def filter_jchars(c):
|
||||
|
||||
|
||||
def nonj_len(word):
|
||||
u"""Returns number of non-Asian words in {word}
|
||||
"""Returns number of non-Asian words in {word}
|
||||
- 日本語AアジアンB -> 2
|
||||
- hello -> 1
|
||||
@param word: A word, possibly containing Asian characters
|
||||
@ -56,7 +59,7 @@ def nonj_len(word):
|
||||
# -> ['spam', 'eggs']
|
||||
# The length of which is 2!
|
||||
chars = [filter_jchars(c) for c in word]
|
||||
return len(u''.join(chars).split())
|
||||
return len(''.join(chars).split())
|
||||
|
||||
|
||||
def get_wordcount(text):
|
||||
@ -66,8 +69,8 @@ def get_wordcount(text):
|
||||
"""
|
||||
|
||||
characters = len(text)
|
||||
chars_no_spaces = sum([not x.isspace() for x in text])
|
||||
asian_chars = sum([is_asian(x) for x in text])
|
||||
chars_no_spaces = sum(not x.isspace() for x in text)
|
||||
asian_chars = sum(is_asian(x) for x in text)
|
||||
non_asian_words = nonj_len(text)
|
||||
words = non_asian_words + asian_chars
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user