unicode_type (regex)

unicode_type(
str(
This commit is contained in:
un-pogaz 2025-11-16 18:27:08 +01:00
parent 2a9ea4550f
commit 072d8f2fec
8 changed files with 54 additions and 67 deletions

View File

@ -16,7 +16,7 @@ from functools import partial
from multiprocessing.pool import ThreadPool as Pool
from threading import Thread
from polyglot.builtins import as_bytes, unicode_type
from polyglot.builtins import as_bytes
Job = namedtuple('Job', 'cmd human_text cwd')
@ -32,7 +32,7 @@ def run_worker(job, decorate=True):
try:
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd)
except Exception as err:
return False, human_text, unicode_type(err)
return False, human_text, str(err)
stdout, stderr = p.communicate()
if stdout:
stdout = stdout.decode('utf-8')

View File

@ -813,19 +813,18 @@ class ISO639(Command): # {{{
m3to2 = {}
nm = {}
codes2, codes3 = set(), set()
unicode_type = str
for x in entries:
two = x.get('alpha_2')
if two:
two = unicode_type(two)
two = str(two)
threeb = x.get('alpha_3')
if threeb:
threeb = unicode_type(threeb)
threeb = str(threeb)
if threeb is None:
continue
name = x.get('inverted_name') or x.get('name')
if name:
name = unicode_type(name)
name = str(name)
if not name or name[0] in '!~=/\'"':
continue
@ -869,18 +868,17 @@ class ISO3166(ISO639): # {{{
codes = set()
three_map = {}
name_map = {}
unicode_type = str
for x in db['3166-1']:
two = x.get('alpha_2')
if two:
two = unicode_type(two)
two = str(two)
codes.add(two)
name_map[two] = x.get('common_name') or x.get('name')
if name_map[two]:
name_map[two] = unicode_type(name_map[two])
name_map[two] = str(name_map[two])
three = x.get('alpha_3')
if three:
three_map[unicode_type(three)] = two
three_map[str(three)] = two
x = {'names':name_map, 'codes':frozenset(codes), 'three_map':three_map}
from calibre.utils.serialize import msgpack_dumps
with open(dest, 'wb') as f:

View File

@ -19,8 +19,6 @@
import re
from polyglot.builtins import unicode_type
from .namespaces import (
ANIMNS,
CHARTNS,
@ -72,20 +70,20 @@ def cnv_color(attribute, arg, element):
''' A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
rr, gg and bb are 8-bit hexadecimal digits.
'''
return unicode_type(arg)
return str(arg)
def cnv_configtype(attribute, arg, element):
if unicode_type(arg) not in ('boolean', 'short', 'int', 'long',
if str(arg) not in ('boolean', 'short', 'int', 'long',
'double', 'string', 'datetime', 'base64Binary'):
raise ValueError(f"'{unicode_type(arg)}' not allowed")
return unicode_type(arg)
raise ValueError(f"'{arg!s}' not allowed")
return str(arg)
def cnv_data_source_has_labels(attribute, arg, element):
if unicode_type(arg) not in ('none','row','column','both'):
raise ValueError(f"'{unicode_type(arg)}' not allowed")
return unicode_type(arg)
if str(arg) not in ('none','row','column','both'):
raise ValueError(f"'{arg!s}' not allowed")
return str(arg)
# Understand different date formats
@ -94,30 +92,30 @@ def cnv_date(attribute, arg, element):
''' A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
value.
'''
return unicode_type(arg)
return str(arg)
def cnv_dateTime(attribute, arg, element):
''' A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
value.
'''
return unicode_type(arg)
return str(arg)
def cnv_double(attribute, arg, element):
return unicode_type(arg)
return str(arg)
def cnv_duration(attribute, arg, element):
return unicode_type(arg)
return str(arg)
def cnv_family(attribute, arg, element):
''' A style family '''
if unicode_type(arg) not in ('text', 'paragraph', 'section', 'ruby', 'table', 'table-column', 'table-row', 'table-cell',
if str(arg) not in ('text', 'paragraph', 'section', 'ruby', 'table', 'table-column', 'table-row', 'table-cell',
'graphic', 'presentation', 'drawing-page', 'chart'):
raise ValueError(f"'{unicode_type(arg)}' not allowed")
return unicode_type(arg)
raise ValueError(f"'{arg!s}' not allowed")
return str(arg)
def __save_prefix(attribute, arg, element):
@ -126,7 +124,7 @@ def __save_prefix(attribute, arg, element):
return str(arg)
namespace = element.get_knownns(prefix)
if namespace is None:
# raise ValueError(f"'{unicode_type(prefix)}' is an unknown prefix")
# raise ValueError(f"'{str(prefix)}' is an unknown prefix")
return str(arg)
return str(arg)
@ -141,21 +139,21 @@ def cnv_formula(attribute, arg, element):
def cnv_ID(attribute, arg, element):
return unicode_type(arg)
return str(arg)
def cnv_IDREF(attribute, arg, element):
return unicode_type(arg)
return str(arg)
def cnv_integer(attribute, arg, element):
return unicode_type(arg)
return str(arg)
def cnv_legend_position(attribute, arg, element):
if unicode_type(arg) not in ('start', 'end', 'top', 'bottom', 'top-start', 'bottom-start', 'top-end', 'bottom-end'):
raise ValueError(f"'{unicode_type(arg)}' not allowed")
return unicode_type(arg)
if str(arg) not in ('start', 'end', 'top', 'bottom', 'top-start', 'bottom-start', 'top-end', 'bottom-end'):
raise ValueError(f"'{arg!s}' not allowed")
return str(arg)
pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')
@ -187,9 +185,9 @@ def cnv_lengthorpercent(attribute, arg, element):
def cnv_metavaluetype(attribute, arg, element):
if unicode_type(arg) not in ('float', 'date', 'time', 'boolean', 'string'):
raise ValueError(f"'{unicode_type(arg)}' not allowed")
return unicode_type(arg)
if str(arg) not in ('float', 'date', 'time', 'boolean', 'string'):
raise ValueError(f"'{arg!s}' not allowed")
return str(arg)
def cnv_major_minor(attribute, arg, element):
@ -249,7 +247,7 @@ def cnv_NCNames(attribute, arg, element):
def cnv_nonNegativeInteger(attribute, arg, element):
return unicode_type(arg)
return str(arg)
pattern_percent = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)%')
@ -282,7 +280,7 @@ def cnv_points(attribute, arg, element):
def cnv_positiveInteger(attribute, arg, element):
return unicode_type(arg)
return str(arg)
def cnv_string(attribute, arg, element):
@ -290,19 +288,19 @@ def cnv_string(attribute, arg, element):
def cnv_textnoteclass(attribute, arg, element):
if unicode_type(arg) not in ('footnote', 'endnote'):
raise ValueError(f"'{unicode_type(arg)}' not allowed")
return unicode_type(arg)
if str(arg) not in ('footnote', 'endnote'):
raise ValueError(f"'{arg!s}' not allowed")
return str(arg)
# Understand different time formats
def cnv_time(attribute, arg, element):
return unicode_type(arg)
return str(arg)
def cnv_token(attribute, arg, element):
return unicode_type(arg)
return str(arg)
pattern_viewbox = re.compile(r'-?[0-9]+([ ]+-?[0-9]+){3}$')
@ -316,9 +314,9 @@ def cnv_viewbox(attribute, arg, element):
def cnv_xlinkshow(attribute, arg, element):
if unicode_type(arg) not in ('new', 'replace', 'embed'):
raise ValueError(f"'{unicode_type(arg)}' not allowed")
return unicode_type(arg)
if str(arg) not in ('new', 'replace', 'embed'):
raise ValueError(f"'{arg!s}' not allowed")
return str(arg)
attrconverters = {

View File

@ -21,8 +21,6 @@
import re
from polyglot.builtins import unicode_type
from .style import ListLevelProperties
from .text import ListLevelStyleBullet, ListLevelStyleNumber, ListStyle
@ -95,8 +93,8 @@ def styleFromList(styleName, specArray, spacing, showAllLevels):
else:
lls = ListLevelStyleBullet(level=(i+1),bulletchar=bullet[0])
llp = ListLevelProperties()
llp.setAttribute('spacebefore', unicode_type(cssLengthNum * (i+1)) + cssLengthUnits)
llp.setAttribute('minlabelwidth', unicode_type(cssLengthNum) + cssLengthUnits)
llp.setAttribute('spacebefore', str(cssLengthNum * (i+1)) + cssLengthUnits)
llp.setAttribute('minlabelwidth', str(cssLengthNum) + cssLengthUnits)
lls.addElement(llp)
listStyle.addElement(lls)
i += 1

View File

@ -25,8 +25,6 @@
import xml.dom
from xml.dom.minicompat import EmptyNodeList, defproperty
from polyglot.builtins import unicode_type
from . import grammar
from .attrconverters import AttrConverters
from .namespaces import nsdict
@ -85,7 +83,7 @@ def _nssplit(qualifiedName):
def _nsassign(namespace):
return nsdict.setdefault(namespace,'ns' + unicode_type(len(nsdict)))
return nsdict.setdefault(namespace,'ns' + str(len(nsdict)))
# Exceptions
@ -472,10 +470,10 @@ class Element(Node):
f.write('<'+self.tagName)
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(unicode_type(namespace))+'"')
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
for qname in self.attributes.keys():
prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(unicode_type(prefix+':'+qname[1]))+'='+_quoteattr(str(self.attributes[qname]).encode('utf-8')))
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(str(self.attributes[qname]).encode('utf-8')))
f.write('>')
def write_close_tag(self, level, f):
@ -486,10 +484,10 @@ class Element(Node):
f.write('<'+self.tagName)
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(unicode_type(namespace))+'"')
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
for qname in self.attributes.keys():
prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(unicode_type(prefix+':'+qname[1]))+'='+_quoteattr(str(self.attributes[qname]).encode('utf-8')))
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(str(self.attributes[qname]).encode('utf-8')))
if self.childNodes:
f.write('>')
for element in self.childNodes:

View File

@ -23,8 +23,6 @@
import xml.dom.minidom
import zipfile
from polyglot.builtins import unicode_type
from .elementtypes import empty_elements, inline_elements
from .namespaces import nsdict
@ -100,7 +98,7 @@ class TextProps:
def __unicode__(self):
return f'[italic={unicode_type(self.italic)}, bold=i{unicode_type(self.bold)}, fixed={unicode_type(self.fixed)}]'
return f'[italic={self.italic!s}, bold=i{self.bold!s}, fixed={self.fixed!s}]'
__str__ = __unicode__
@ -129,7 +127,7 @@ class ParagraphProps:
def __unicode__(self):
return f'[bq={unicode_type(self.blockquote)}, h={self.headingLevel}, code={unicode_type(self.code)}]'
return f'[bq={self.blockquote!s}, h={self.headingLevel}, code={self.code!s}]'
__str__ = __unicode__
@ -400,7 +398,7 @@ class ODF2MoinMoin:
buffer.append(' '*indent)
i += 1
if props.ordered:
number = unicode_type(i)
number = str(i)
number = ' ' + number + '. '
buffer.append(' 1. ')
else:

View File

@ -25,8 +25,6 @@ from xml.dom import Node
from xml.sax import handler
from xml.sax.saxutils import escape, quoteattr
from polyglot.builtins import unicode_type
from .namespaces import (
ANIMNS,
CHARTNS,
@ -1344,7 +1342,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
self.list_class_stack.append(number_class)
attrs = {}
if tag_name == 'ol' and self.list_number_map[number_class] != 1:
attrs = {'start': unicode_type(self.list_number_map[number_class])}
attrs = {'start': str(self.list_number_map[number_class])}
if self.generate_css:
attrs['class'] = list_class
self.opentag(str(tag_name), attrs)

View File

@ -27,7 +27,6 @@ import zipfile
from io import BytesIO
from xml.sax.xmlreader import InputSource
from polyglot.builtins import unicode_type
from polyglot.io import PolyglotBytesIO, PolyglotStringIO
from . import element, manifest, meta
@ -397,7 +396,7 @@ class OpenDocument:
if what_it_is == IS_FILENAME:
self._z.write(fileobj, arcname, zipfile.ZIP_STORED)
else:
zi = zipfile.ZipInfo(unicode_type(arcname), self._now)
zi = zipfile.ZipInfo(str(arcname), self._now)
zi.compress_type = zipfile.ZIP_STORED
zi.external_attr = UNIXPERMS
self._z.writestr(zi, fileobj)