py3: More unicode porting

This commit is contained in:
Kovid Goyal 2019-05-29 17:55:26 +05:30
parent 06dc7dd15b
commit 4730fce41b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
8 changed files with 106 additions and 96 deletions

View File

@ -1,3 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals
''' '''
Created on 23 Sep 2010 Created on 23 Sep 2010
@ -23,7 +24,7 @@ class _Parser(object):
LEX_NUM = 4 LEX_NUM = 4
LEX_EOF = 5 LEX_EOF = 5
LEX_CONSTANTS = frozenset([LEX_STR, LEX_NUM]) LEX_CONSTANTS = frozenset((LEX_STR, LEX_NUM))
def __init__(self, val, prog, funcs, parent): def __init__(self, val, prog, funcs, parent):
self.lex_pos = 0 self.lex_pos = 0
@ -205,7 +206,7 @@ class TemplateFormatter(string.Formatter):
elif 'bcdoxXn'.find(typ) >= 0: elif 'bcdoxXn'.find(typ) >= 0:
try: try:
val = int(val) val = int(val)
except: except Exception:
raise ValueError( raise ValueError(
_('format: type {0} requires an integer value, got {1}').format(typ, val)) _('format: type {0} requires an integer value, got {1}').format(typ, val))
elif 'eEfFgGn%'.find(typ) >= 0: elif 'eEfFgGn%'.find(typ) >= 0:

View File

@ -18,12 +18,12 @@ from PyQt5.QtCore import QBuffer, QByteArray, Qt
from PyQt5.QtGui import QColor, QImage, QImageReader, QImageWriter, QPixmap, QTransform from PyQt5.QtGui import QColor, QImage, QImageReader, QImageWriter, QPixmap, QTransform
from calibre import fit_image, force_unicode from calibre import fit_image, force_unicode
from calibre.constants import iswindows, plugins from calibre.constants import iswindows, plugins, ispy3
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
from calibre.utils.filenames import atomic_rename from calibre.utils.filenames import atomic_rename
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
from polyglot.builtins import string_or_bytes from polyglot.builtins import string_or_bytes, unicode_type
# Utilities {{{ # Utilities {{{
imageops, imageops_err = plugins['imageops'] imageops, imageops_err = plugins['imageops']
@ -465,11 +465,11 @@ def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
cmd[cmd.index(q)] = r cmd[cmd.index(q)] = r
if not as_filter: if not as_filter:
repl(True, iname), repl(False, oname) repl(True, iname), repl(False, oname)
if iswindows: if iswindows and not ispy3:
# subprocess in python 2 cannot handle unicode strings that are not # subprocess in python 2 cannot handle unicode strings that are not
# encodeable in mbcs, so we fail here, where it is more explicit, # encodeable in mbcs, so we fail here, where it is more explicit,
# instead. # instead.
cmd = [x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd] cmd = [x.encode('mbcs') if isinstance(x, unicode_type) else x for x in cmd]
if isinstance(cwd, type('')): if isinstance(cwd, type('')):
cwd = cwd.encode('mbcs') cwd = cwd.encode('mbcs')
stdin = subprocess.PIPE if as_filter else None stdin = subprocess.PIPE if as_filter else None
@ -534,7 +534,7 @@ def encode_jpeg(file_path, quality=80):
from calibre.utils.speedups import ReadOnlyFileBuffer from calibre.utils.speedups import ReadOnlyFileBuffer
quality = max(0, min(100, int(quality))) quality = max(0, min(100, int(quality)))
exe = get_exe_path('cjpeg') exe = get_exe_path('cjpeg')
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [str(quality)] cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [unicode_type(quality)]
img = QImage() img = QImage()
if not img.load(file_path): if not img.load(file_path):
raise ValueError('%s is not a valid image file' % file_path) raise ValueError('%s is not a valid image file' % file_path)

View File

@ -16,7 +16,7 @@ from itertools import islice
from calibre import detect_ncpus as cpu_count, as_unicode from calibre import detect_ncpus as cpu_count, as_unicode
from calibre.constants import plugins, filesystem_encoding from calibre.constants import plugins, filesystem_encoding
from calibre.utils.icu import primary_sort_key, primary_find, primary_collator from calibre.utils.icu import primary_sort_key, primary_find, primary_collator
from polyglot.builtins import iteritems, itervalues, map, unicode_type, range, zip, raw_input, filter, getcwd from polyglot.builtins import iteritems, itervalues, map, unicode_type, range, zip, raw_input, filter, getcwd, unicode_type
from polyglot.queue import Queue from polyglot.queue import Queue
DEFAULT_LEVEL1 = '/' DEFAULT_LEVEL1 = '/'
@ -294,12 +294,12 @@ def test(return_tests=False):
start = memory() start = memory()
for i in range(10): for i in range(10):
doit(str(i)) doit(unicode_type(i))
gc.collect() gc.collect()
used10 = memory() - start used10 = memory() - start
start = memory() start = memory()
for i in range(100): for i in range(100):
doit(str(i)) doit(unicode_type(i))
gc.collect() gc.collect()
used100 = memory() - start used100 = memory() - start
if used100 > 0 and used10 > 0: if used100 > 0 and used10 > 0:

View File

@ -1,4 +1,5 @@
# vim:fileencoding=utf-8 # vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
try: try:
from time import monotonic from time import monotonic

View File

@ -16,7 +16,7 @@ from threading import Lock, local
from polyglot import socketserver from polyglot import socketserver
from polyglot.http_server import HTTPServer, SimpleHTTPRequestHandler from polyglot.http_server import HTTPServer, SimpleHTTPRequestHandler
from polyglot.builtins import error_message, getcwd from polyglot.builtins import error_message, getcwd, unicode_type
# Compiler {{{ # Compiler {{{
@ -107,9 +107,9 @@ class HTTPRequestHandler(SimpleHTTPRequestHandler): # {{{
self.send_response(rtype) self.send_response(rtype)
self.send_header("Accept-Ranges", "bytes") self.send_header("Accept-Ranges", "bytes")
self.send_header("Content-Range", 'bytes ' + self.send_header("Content-Range", 'bytes ' +
str(start_range) + '-' + str(end_range - 1) + '/' + str(size)) unicode_type(start_range) + '-' + unicode_type(end_range - 1) + '/' + unicode_type(size))
self.send_header("Content-Type", str(mimetype)) self.send_header("Content-Type", unicode_type(mimetype))
self.send_header("Content-Length", str(end_range - start_range)) self.send_header("Content-Length", unicode_type(end_range - start_range))
self.send_header("Last-Modified", self.date_time_string(int(mtime))) self.send_header("Last-Modified", self.date_time_string(int(mtime)))
self.end_headers() self.end_headers()
return f, start_range, end_range return f, start_range, end_range

View File

@ -1,6 +1,8 @@
#!/usr/bin/python2 #!/usr/bin/python2
# vim:fileencoding=utf-8 # vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__author__ = "Chad Miller <smartypantspy@chad.org>, Kovid Goyal <kovid at kovidgoyal.net>" __author__ = "Chad Miller <smartypantspy@chad.org>, Kovid Goyal <kovid at kovidgoyal.net>"
__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom" __description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"
@ -525,7 +527,7 @@ def smartyPants(text, attr='1'):
return "".join(result) return "".join(result)
def educateQuotes(str): def educateQuotes(text):
""" """
Parameter: String. Parameter: String.
@ -539,32 +541,32 @@ def educateQuotes(str):
# Special case if the very first character is a quote # Special case if the very first character is a quote
# followed by punctuation at a non-word-break. Close the quotes by brute force: # followed by punctuation at a non-word-break. Close the quotes by brute force:
str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""&#8217;""", str) text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""&#8217;""", text)
str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""&#8221;""", str) text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""&#8221;""", text)
# Special case for double sets of quotes, e.g.: # Special case for double sets of quotes, e.g.:
# <p>He said, "'Quoted' words in a larger quote."</p> # <p>He said, "'Quoted' words in a larger quote."</p>
str = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", str) text = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", text)
str = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", str) text = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", text)
str = re.sub(r'''""(?=\w)''', """&#8220;&#8220;""", str) text = re.sub(r'''""(?=\w)''', """&#8220;&#8220;""", text)
str = re.sub(r"""''(?=\w)""", """&#8216;&#8216;""", str) text = re.sub(r"""''(?=\w)""", """&#8216;&#8216;""", text)
str = re.sub(r'''\"\'''', """&#8221;&#8217;""", str) text = re.sub(r'''\"\'''', """&#8221;&#8217;""", text)
str = re.sub(r'''\'\"''', """&#8217;&#8221;""", str) text = re.sub(r'''\'\"''', """&#8217;&#8221;""", text)
str = re.sub(r'''""''', """&#8221;&#8221;""", str) text = re.sub(r'''""''', """&#8221;&#8221;""", text)
str = re.sub(r"""''""", """&#8217;&#8217;""", str) text = re.sub(r"""''""", """&#8217;&#8217;""", text)
# Special case for decade abbreviations (the '80s --> 80s): # Special case for decade abbreviations (the '80s --> 80s):
# See http://practicaltypography.com/apostrophes.html # See http://practicaltypography.com/apostrophes.html
str = re.sub(r"""(\W|^)'(?=\d{2}s)""", r"""\1&#8217;""", str) text = re.sub(r"""(\W|^)'(?=\d{2}s)""", r"""\1&#8217;""", text)
# Measurements in feet and inches or longitude/latitude: 19' 43.5" --> 19 43.5″ # Measurements in feet and inches or longitude/latitude: 19' 43.5" --> 19 43.5″
str = re.sub(r'''(\W|^)([-0-9.]+\s*)'(\s*[-0-9.]+)"''', r'\1\2&#8242;\3&#8243;', str) text = re.sub(r'''(\W|^)([-0-9.]+\s*)'(\s*[-0-9.]+)"''', r'\1\2&#8242;\3&#8243;', text)
# Special case for Quotes at inside of other entities, e.g.: # Special case for Quotes at inside of other entities, e.g.:
# <p>A double quote--"within dashes"--would be nice.</p> # <p>A double quote--"within dashes"--would be nice.</p>
str = re.sub(r"""(?<=\W)"(?=\w)""", r"""&#8220;""", str) text = re.sub(r"""(?<=\W)"(?=\w)""", r"""&#8220;""", text)
str = re.sub(r"""(?<=\W)'(?=\w)""", r"""&#8216;""", str) text = re.sub(r"""(?<=\W)'(?=\w)""", r"""&#8216;""", text)
str = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", str) text = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", text)
str = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", str) text = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", text)
# The following are commented out as smartypants tokenizes text by # The following are commented out as smartypants tokenizes text by
# stripping out html tags. Therefore, there is no guarantee that the # stripping out html tags. Therefore, there is no guarantee that the
@ -572,12 +574,12 @@ def educateQuotes(str):
# meaningful # meaningful
# Special case for Quotes at end of line with a preceeding space (may change just to end of line) # Special case for Quotes at end of line with a preceeding space (may change just to end of line)
# str = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", str) # text = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", text)
# str = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", str) # text = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", text)
# Special case for Quotes at beginning of line with a space - multiparagraph quoted text: # Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
# str = re.sub(r"""^"(?=\s)""", r"""&#8220;""", str) # text = re.sub(r"""^"(?=\s)""", r"""&#8220;""", text)
# str = re.sub(r"""^'(?=\s)""", r"""&#8216;""", str) # text = re.sub(r"""^'(?=\s)""", r"""&#8216;""", text)
close_class = r"""[^\ \t\r\n\[\{\(\-]""" close_class = r"""[^\ \t\r\n\[\{\(\-]"""
dec_dashes = r"""&#8211;|&#8212;""" dec_dashes = r"""&#8211;|&#8212;"""
@ -595,24 +597,24 @@ def educateQuotes(str):
' # the quote ' # the quote
(?=\w) # followed by a word character (?=\w) # followed by a word character
""" % (dec_dashes,), re.VERBOSE) """ % (dec_dashes,), re.VERBOSE)
str = opening_single_quotes_regex.sub(r"""\1&#8216;""", str) text = opening_single_quotes_regex.sub(r"""\1&#8216;""", text)
closing_single_quotes_regex = re.compile(r""" closing_single_quotes_regex = re.compile(r"""
(%s) (%s)
' '
(?!\s | s\b | \d) (?!\s | s\b | \d)
""" % (close_class,), re.VERBOSE) """ % (close_class,), re.VERBOSE)
str = closing_single_quotes_regex.sub(r"""\1&#8217;""", str) text = closing_single_quotes_regex.sub(r"""\1&#8217;""", text)
closing_single_quotes_regex = re.compile(r""" closing_single_quotes_regex = re.compile(r"""
(%s) (%s)
' '
(\s | s\b) (\s | s\b)
""" % (close_class,), re.VERBOSE) """ % (close_class,), re.VERBOSE)
str = closing_single_quotes_regex.sub(r"""\1&#8217;\2""", str) text = closing_single_quotes_regex.sub(r"""\1&#8217;\2""", text)
# Any remaining single quotes should be opening ones: # Any remaining single quotes should be opening ones:
str = re.sub(r"""'""", r"""&#8216;""", str) text = re.sub(r"""'""", r"""&#8216;""", text)
# Get most opening double quotes: # Get most opening double quotes:
opening_double_quotes_regex = re.compile(r""" opening_double_quotes_regex = re.compile(r"""
@ -627,7 +629,7 @@ def educateQuotes(str):
" # the quote " # the quote
(?=\w) # followed by a word character (?=\w) # followed by a word character
""" % (dec_dashes,), re.VERBOSE) """ % (dec_dashes,), re.VERBOSE)
str = opening_double_quotes_regex.sub(r"""\1&#8220;""", str) text = opening_double_quotes_regex.sub(r"""\1&#8220;""", text)
# Double closing quotes: # Double closing quotes:
closing_double_quotes_regex = re.compile(r""" closing_double_quotes_regex = re.compile(r"""
@ -635,25 +637,25 @@ def educateQuotes(str):
" "
(?=\s) (?=\s)
""" % (close_class,), re.VERBOSE) """ % (close_class,), re.VERBOSE)
str = closing_double_quotes_regex.sub(r"""&#8221;""", str) text = closing_double_quotes_regex.sub(r"""&#8221;""", text)
closing_double_quotes_regex = re.compile(r""" closing_double_quotes_regex = re.compile(r"""
(%s) # character that indicates the quote should be closing (%s) # character that indicates the quote should be closing
" "
""" % (close_class,), re.VERBOSE) """ % (close_class,), re.VERBOSE)
str = closing_double_quotes_regex.sub(r"""\1&#8221;""", str) text = closing_double_quotes_regex.sub(r"""\1&#8221;""", text)
if str.endswith('-"'): if text.endswith('-"'):
# A string that endswith -" is sometimes used for dialogue # A string that endswith -" is sometimes used for dialogue
str = str[:-1] + '&#8221;' text = text[:-1] + '&#8221;'
# Any remaining quotes should be opening ones. # Any remaining quotes should be opening ones.
str = re.sub(r'"', r"""&#8220;""", str) text = re.sub(r'"', r"""&#8220;""", text)
return str return text
def educateBackticks(str): def educateBackticks(text):
""" """
Parameter: String. Parameter: String.
Returns: The string, with ``backticks'' -style double quotes Returns: The string, with ``backticks'' -style double quotes
@ -662,12 +664,12 @@ def educateBackticks(str):
Example output: &#8220;Isn't this fun?&#8221; Example output: &#8220;Isn't this fun?&#8221;
""" """
str = re.sub(r"""``""", r"""&#8220;""", str) text = re.sub(r"""``""", r"""&#8220;""", text)
str = re.sub(r"""''""", r"""&#8221;""", str) text = re.sub(r"""''""", r"""&#8221;""", text)
return str return text
def educateSingleBackticks(str): def educateSingleBackticks(text):
""" """
Parameter: String. Parameter: String.
Returns: The string, with `backticks' -style single quotes Returns: The string, with `backticks' -style single quotes
@ -677,12 +679,12 @@ def educateSingleBackticks(str):
Example output: &#8216;Isn&#8217;t this fun?&#8217; Example output: &#8216;Isn&#8217;t this fun?&#8217;
""" """
str = re.sub(r"""`""", r"""&#8216;""", str) text = re.sub(r"""`""", r"""&#8216;""", text)
str = re.sub(r"""'""", r"""&#8217;""", str) text = re.sub(r"""'""", r"""&#8217;""", text)
return str return text
def educateDashes(str): def educateDashes(text):
""" """
Parameter: String. Parameter: String.
@ -690,12 +692,12 @@ def educateDashes(str):
an em-dash HTML entity. an em-dash HTML entity.
""" """
str = re.sub(r"""---""", r"""&#8211;""", str) # en (yes, backwards) text = re.sub(r"""---""", r"""&#8211;""", text) # en (yes, backwards)
str = re.sub(r"""--""", r"""&#8212;""", str) # em (yes, backwards) text = re.sub(r"""--""", r"""&#8212;""", text) # em (yes, backwards)
return str return text
def educateDashesOldSchool(str): def educateDashesOldSchool(text):
""" """
Parameter: String. Parameter: String.
@ -704,12 +706,12 @@ def educateDashesOldSchool(str):
an em-dash HTML entity. an em-dash HTML entity.
""" """
str = re.sub(r"""---""", r"""&#8212;""", str) # em (yes, backwards) text = re.sub(r"""---""", r"""&#8212;""", text) # em (yes, backwards)
str = re.sub(r"""--""", r"""&#8211;""", str) # en (yes, backwards) text = re.sub(r"""--""", r"""&#8211;""", text) # en (yes, backwards)
return str return text
def educateDashesOldSchoolInverted(str): def educateDashesOldSchoolInverted(text):
""" """
Parameter: String. Parameter: String.
@ -724,12 +726,12 @@ def educateDashesOldSchoolInverted(str):
the shortcut should be shorter to type. (Thanks to Aaron the shortcut should be shorter to type. (Thanks to Aaron
Swartz for the idea.) Swartz for the idea.)
""" """
str = re.sub(r"""---""", r"""&#8211;""", str) # em text = re.sub(r"""---""", r"""&#8211;""", text) # em
str = re.sub(r"""--""", r"""&#8212;""", str) # en text = re.sub(r"""--""", r"""&#8212;""", text) # en
return str return text
def educateEllipses(str): def educateEllipses(text):
""" """
Parameter: String. Parameter: String.
Returns: The string, with each instance of "..." translated to Returns: The string, with each instance of "..." translated to
@ -739,12 +741,12 @@ def educateEllipses(str):
Example output: Huh&#8230;? Example output: Huh&#8230;?
""" """
str = re.sub(r"""\.\.\.""", r"""&#8230;""", str) text = re.sub(r"""\.\.\.""", r"""&#8230;""", text)
str = re.sub(r"""\. \. \.""", r"""&#8230;""", str) text = re.sub(r"""\. \. \.""", r"""&#8230;""", text)
return str return text
def stupefyEntities(str): def stupefyEntities(text):
""" """
Parameter: String. Parameter: String.
Returns: The string, with each SmartyPants HTML entity translated to Returns: The string, with each SmartyPants HTML entity translated to
@ -754,21 +756,21 @@ def stupefyEntities(str):
Example output: "Hello -- world." Example output: "Hello -- world."
""" """
str = re.sub(r"""&#8211;""", r"""-""", str) # en-dash text = re.sub(r"""&#8211;""", r"""-""", text) # en-dash
str = re.sub(r"""&#8212;""", r"""--""", str) # em-dash text = re.sub(r"""&#8212;""", r"""--""", text) # em-dash
str = re.sub(r"""&#8216;""", r"""'""", str) # open single quote text = re.sub(r"""&#8216;""", r"""'""", text) # open single quote
str = re.sub(r"""&#8217;""", r"""'""", str) # close single quote text = re.sub(r"""&#8217;""", r"""'""", text) # close single quote
str = re.sub(r"""&#8220;""", r'''"''', str) # open double quote text = re.sub(r"""&#8220;""", r'''"''', text) # open double quote
str = re.sub(r"""&#8221;""", r'''"''', str) # close double quote text = re.sub(r"""&#8221;""", r'''"''', text) # close double quote
str = re.sub(r"""&#8230;""", r"""...""", str) # ellipsis text = re.sub(r"""&#8230;""", r"""...""", text) # ellipsis
return str return text
def processEscapes(str): def processEscapes(text):
r""" r"""
Parameter: String. Parameter: String.
Returns: The string, with after processing the following backslash Returns: The string, with after processing the following backslash
@ -784,17 +786,17 @@ def processEscapes(str):
\- &#45; \- &#45;
\` &#96; \` &#96;
""" """
str = re.sub(r"""\\\\""", r"""&#92;""", str) text = re.sub(r"""\\\\""", r"""&#92;""", text)
str = re.sub(r'''\\"''', r"""&#34;""", str) text = re.sub(r'''\\"''', r"""&#34;""", text)
str = re.sub(r"""\\'""", r"""&#39;""", str) text = re.sub(r"""\\'""", r"""&#39;""", text)
str = re.sub(r"""\\\.""", r"""&#46;""", str) text = re.sub(r"""\\\.""", r"""&#46;""", text)
str = re.sub(r"""\\-""", r"""&#45;""", str) text = re.sub(r"""\\-""", r"""&#45;""", text)
str = re.sub(r"""\\`""", r"""&#96;""", str) text = re.sub(r"""\\`""", r"""&#96;""", text)
return str return text
def _tokenize(str): def _tokenize(html):
""" """
Parameter: String containing HTML markup. Parameter: String containing HTML markup.
Returns: Reference to an array of the tokens comprising the input Returns: Reference to an array of the tokens comprising the input
@ -817,7 +819,7 @@ def _tokenize(str):
# %s # nested tags """ % (nested_tags,) # %s # nested tags """ % (nested_tags,)
tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
token_match = tag_soup.search(str) token_match = tag_soup.search(html)
previous_end = 0 previous_end = 0
while token_match is not None: while token_match is not None:
@ -827,10 +829,10 @@ def _tokenize(str):
tokens.append(['tag', token_match.group(2)]) tokens.append(['tag', token_match.group(2)])
previous_end = token_match.end() previous_end = token_match.end()
token_match = tag_soup.search(str, token_match.end()) token_match = tag_soup.search(html, token_match.end())
if previous_end < len(str): if previous_end < len(html):
tokens.append(['text', str[previous_end:]]) tokens.append(['text', html[previous_end:]])
return tokens return tokens

View File

@ -1,4 +1,7 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
__author__ = "stackoverflow community" __author__ = "stackoverflow community"
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
""" """

View File

@ -1,5 +1,8 @@
#!/usr/bin/python2 #!/usr/bin/python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
""" """
Get word, character, and Asian character counts Get word, character, and Asian character counts
@ -44,7 +47,7 @@ def filter_jchars(c):
def nonj_len(word): def nonj_len(word):
u"""Returns number of non-Asian words in {word} """Returns number of non-Asian words in {word}
- 日本語AアジアンB -> 2 - 日本語AアジアンB -> 2
- hello -> 1 - hello -> 1
@param word: A word, possibly containing Asian characters @param word: A word, possibly containing Asian characters
@ -56,7 +59,7 @@ def nonj_len(word):
# -> ['spam', 'eggs'] # -> ['spam', 'eggs']
# The length of which is 2! # The length of which is 2!
chars = [filter_jchars(c) for c in word] chars = [filter_jchars(c) for c in word]
return len(u''.join(chars).split()) return len(''.join(chars).split())
def get_wordcount(text): def get_wordcount(text):
@ -66,8 +69,8 @@ def get_wordcount(text):
""" """
characters = len(text) characters = len(text)
chars_no_spaces = sum([not x.isspace() for x in text]) chars_no_spaces = sum(not x.isspace() for x in text)
asian_chars = sum([is_asian(x) for x in text]) asian_chars = sum(is_asian(x) for x in text)
non_asian_words = nonj_len(text) non_asian_words = nonj_len(text)
words = non_asian_words + asian_chars words = non_asian_words + asian_chars