unicode_type (regex)

unicode_type( str(
2026-05-20 22:12:37 -04:00 · 2025-11-16 18:27:08 +01:00
parent 2a9ea4550f
commit 072d8f2fec
8 changed files with 54 additions and 67 deletions
@@ -16,7 +16,7 @@ from functools import partial
 from multiprocessing.pool import ThreadPool as Pool
 from threading import Thread

-from polyglot.builtins import as_bytes, unicode_type
+from polyglot.builtins import as_bytes

 Job = namedtuple('Job', 'cmd human_text cwd')

@@ -32,7 +32,7 @@ def run_worker(job, decorate=True):
    try:
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd)
    except Exception as err:
-        return False, human_text, unicode_type(err)
+        return False, human_text, str(err)
    stdout, stderr = p.communicate()
    if stdout:
        stdout = stdout.decode('utf-8')
@@ -813,19 +813,18 @@ class ISO639(Command):  # {{{
        m3to2 = {}
        nm = {}
        codes2, codes3 = set(), set()
-        unicode_type = str
        for x in entries:
            two = x.get('alpha_2')
            if two:
-                two = unicode_type(two)
+                two = str(two)
            threeb = x.get('alpha_3')
            if threeb:
-                threeb = unicode_type(threeb)
+                threeb = str(threeb)
            if threeb is None:
                continue
            name = x.get('inverted_name') or x.get('name')
            if name:
-                name = unicode_type(name)
+                name = str(name)
            if not name or name[0] in '!~=/\'"':
                continue

@@ -869,18 +868,17 @@ class ISO3166(ISO639):  # {{{
        codes = set()
        three_map = {}
        name_map = {}
-        unicode_type = str
        for x in db['3166-1']:
            two = x.get('alpha_2')
            if two:
-                two = unicode_type(two)
+                two = str(two)
            codes.add(two)
            name_map[two] = x.get('common_name') or x.get('name')
            if name_map[two]:
-                name_map[two] = unicode_type(name_map[two])
+                name_map[two] = str(name_map[two])
            three = x.get('alpha_3')
            if three:
-                three_map[unicode_type(three)] = two
+                three_map[str(three)] = two
        x = {'names':name_map, 'codes':frozenset(codes), 'three_map':three_map}
        from calibre.utils.serialize import msgpack_dumps
        with open(dest, 'wb') as f:
@@ -19,8 +19,6 @@

 import re

-from polyglot.builtins import unicode_type
-
 from .namespaces import (
    ANIMNS,
    CHARTNS,
@@ -72,20 +70,20 @@ def cnv_color(attribute, arg, element):
    ''' A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
        rr, gg and bb are 8-bit hexadecimal digits.
    '''
-    return unicode_type(arg)
+    return str(arg)


 def cnv_configtype(attribute, arg, element):
-    if unicode_type(arg) not in ('boolean', 'short', 'int', 'long',
+    if str(arg) not in ('boolean', 'short', 'int', 'long',
    'double', 'string', 'datetime', 'base64Binary'):
-        raise ValueError(f"'{unicode_type(arg)}' not allowed")
-    return unicode_type(arg)
+        raise ValueError(f"'{arg!s}' not allowed")
+    return str(arg)


 def cnv_data_source_has_labels(attribute, arg, element):
-    if unicode_type(arg) not in ('none','row','column','both'):
-        raise ValueError(f"'{unicode_type(arg)}' not allowed")
-    return unicode_type(arg)
+    if str(arg) not in ('none','row','column','both'):
+        raise ValueError(f"'{arg!s}' not allowed")
+    return str(arg)


 # Understand different date formats
@@ -94,30 +92,30 @@ def cnv_date(attribute, arg, element):
    ''' A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
        value.
    '''
-    return unicode_type(arg)
+    return str(arg)


 def cnv_dateTime(attribute, arg, element):
    ''' A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
        value.
    '''
-    return unicode_type(arg)
+    return str(arg)


 def cnv_double(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 def cnv_duration(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 def cnv_family(attribute, arg, element):
    ''' A style family '''
-    if unicode_type(arg) not in ('text', 'paragraph', 'section', 'ruby', 'table', 'table-column', 'table-row', 'table-cell',
+    if str(arg) not in ('text', 'paragraph', 'section', 'ruby', 'table', 'table-column', 'table-row', 'table-cell',
      'graphic', 'presentation', 'drawing-page', 'chart'):
-        raise ValueError(f"'{unicode_type(arg)}' not allowed")
-    return unicode_type(arg)
+        raise ValueError(f"'{arg!s}' not allowed")
+    return str(arg)


 def __save_prefix(attribute, arg, element):
@@ -126,7 +124,7 @@ def __save_prefix(attribute, arg, element):
        return str(arg)
    namespace = element.get_knownns(prefix)
    if namespace is None:
-        # raise ValueError(f"'{unicode_type(prefix)}' is an unknown prefix")
+        # raise ValueError(f"'{str(prefix)}' is an unknown prefix")
        return str(arg)
    return str(arg)

@@ -141,21 +139,21 @@ def cnv_formula(attribute, arg, element):


 def cnv_ID(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 def cnv_IDREF(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 def cnv_integer(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 def cnv_legend_position(attribute, arg, element):
-    if unicode_type(arg) not in ('start', 'end', 'top', 'bottom', 'top-start', 'bottom-start', 'top-end', 'bottom-end'):
-        raise ValueError(f"'{unicode_type(arg)}' not allowed")
-    return unicode_type(arg)
+    if str(arg) not in ('start', 'end', 'top', 'bottom', 'top-start', 'bottom-start', 'top-end', 'bottom-end'):
+        raise ValueError(f"'{arg!s}' not allowed")
+    return str(arg)


 pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')
@@ -187,9 +185,9 @@ def cnv_lengthorpercent(attribute, arg, element):


 def cnv_metavaluetype(attribute, arg, element):
-    if unicode_type(arg) not in ('float', 'date', 'time', 'boolean', 'string'):
-        raise ValueError(f"'{unicode_type(arg)}' not allowed")
-    return unicode_type(arg)
+    if str(arg) not in ('float', 'date', 'time', 'boolean', 'string'):
+        raise ValueError(f"'{arg!s}' not allowed")
+    return str(arg)


 def cnv_major_minor(attribute, arg, element):
@@ -249,7 +247,7 @@ def cnv_NCNames(attribute, arg, element):


 def cnv_nonNegativeInteger(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 pattern_percent = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)%')
@@ -282,7 +280,7 @@ def cnv_points(attribute, arg, element):


 def cnv_positiveInteger(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 def cnv_string(attribute, arg, element):
@@ -290,19 +288,19 @@ def cnv_string(attribute, arg, element):


 def cnv_textnoteclass(attribute, arg, element):
-    if unicode_type(arg) not in ('footnote', 'endnote'):
-        raise ValueError(f"'{unicode_type(arg)}' not allowed")
-    return unicode_type(arg)
+    if str(arg) not in ('footnote', 'endnote'):
+        raise ValueError(f"'{arg!s}' not allowed")
+    return str(arg)


 # Understand different time formats

 def cnv_time(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 def cnv_token(attribute, arg, element):
-    return unicode_type(arg)
+    return str(arg)


 pattern_viewbox = re.compile(r'-?[0-9]+([ ]+-?[0-9]+){3}$')
@@ -316,9 +314,9 @@ def cnv_viewbox(attribute, arg, element):


 def cnv_xlinkshow(attribute, arg, element):
-    if unicode_type(arg) not in ('new', 'replace', 'embed'):
-        raise ValueError(f"'{unicode_type(arg)}' not allowed")
-    return unicode_type(arg)
+    if str(arg) not in ('new', 'replace', 'embed'):
+        raise ValueError(f"'{arg!s}' not allowed")
+    return str(arg)


 attrconverters = {
@@ -21,8 +21,6 @@

 import re

-from polyglot.builtins import unicode_type
-
 from .style import ListLevelProperties
 from .text import ListLevelStyleBullet, ListLevelStyleNumber, ListStyle

@@ -95,8 +93,8 @@ def styleFromList(styleName, specArray, spacing, showAllLevels):
        else:
            lls = ListLevelStyleBullet(level=(i+1),bulletchar=bullet[0])
        llp = ListLevelProperties()
-        llp.setAttribute('spacebefore', unicode_type(cssLengthNum * (i+1)) + cssLengthUnits)
-        llp.setAttribute('minlabelwidth', unicode_type(cssLengthNum) + cssLengthUnits)
+        llp.setAttribute('spacebefore', str(cssLengthNum * (i+1)) + cssLengthUnits)
+        llp.setAttribute('minlabelwidth', str(cssLengthNum) + cssLengthUnits)
        lls.addElement(llp)
        listStyle.addElement(lls)
        i += 1
@@ -25,8 +25,6 @@
 import xml.dom
 from xml.dom.minicompat import EmptyNodeList, defproperty

-from polyglot.builtins import unicode_type
-
 from . import grammar
 from .attrconverters import AttrConverters
 from .namespaces import nsdict
@@ -85,7 +83,7 @@ def _nssplit(qualifiedName):


 def _nsassign(namespace):
-    return nsdict.setdefault(namespace,'ns' + unicode_type(len(nsdict)))
+    return nsdict.setdefault(namespace,'ns' + str(len(nsdict)))


 # Exceptions
@@ -472,10 +470,10 @@ class Element(Node):
        f.write('<'+self.tagName)
        if level == 0:
            for namespace, prefix in self.namespaces.items():
-                f.write(' xmlns:' + prefix + '="'+ _escape(unicode_type(namespace))+'"')
+                f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
        for qname in self.attributes.keys():
            prefix = self.get_nsprefix(qname[0])
-            f.write(' '+_escape(unicode_type(prefix+':'+qname[1]))+'='+_quoteattr(str(self.attributes[qname]).encode('utf-8')))
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(str(self.attributes[qname]).encode('utf-8')))
        f.write('>')

    def write_close_tag(self, level, f):
@@ -486,10 +484,10 @@ class Element(Node):
        f.write('<'+self.tagName)
        if level == 0:
            for namespace, prefix in self.namespaces.items():
-                f.write(' xmlns:' + prefix + '="'+ _escape(unicode_type(namespace))+'"')
+                f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
        for qname in self.attributes.keys():
            prefix = self.get_nsprefix(qname[0])
-            f.write(' '+_escape(unicode_type(prefix+':'+qname[1]))+'='+_quoteattr(str(self.attributes[qname]).encode('utf-8')))
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(str(self.attributes[qname]).encode('utf-8')))
        if self.childNodes:
            f.write('>')
            for element in self.childNodes:
@@ -23,8 +23,6 @@
 import xml.dom.minidom
 import zipfile

-from polyglot.builtins import unicode_type
-
 from .elementtypes import empty_elements, inline_elements
 from .namespaces import nsdict

@@ -100,7 +98,7 @@ class TextProps:

    def __unicode__(self):

-        return f'[italic={unicode_type(self.italic)}, bold=i{unicode_type(self.bold)}, fixed={unicode_type(self.fixed)}]'
+        return f'[italic={self.italic!s}, bold=i{self.bold!s}, fixed={self.fixed!s}]'
    __str__ = __unicode__


@@ -129,7 +127,7 @@ class ParagraphProps:

    def __unicode__(self):

-        return f'[bq={unicode_type(self.blockquote)}, h={self.headingLevel}, code={unicode_type(self.code)}]'
+        return f'[bq={self.blockquote!s}, h={self.headingLevel}, code={self.code!s}]'
    __str__ = __unicode__


@@ -400,7 +398,7 @@ class ODF2MoinMoin:
            buffer.append(' '*indent)
            i += 1
            if props.ordered:
-                number = unicode_type(i)
+                number = str(i)
                number = ' ' + number + '. '
                buffer.append(' 1. ')
            else:
@@ -25,8 +25,6 @@ from xml.dom import Node
 from xml.sax import handler
 from xml.sax.saxutils import escape, quoteattr

-from polyglot.builtins import unicode_type
-
 from .namespaces import (
    ANIMNS,
    CHARTNS,
@@ -1344,7 +1342,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
        self.list_class_stack.append(number_class)
        attrs = {}
        if tag_name == 'ol' and self.list_number_map[number_class] != 1:
-            attrs = {'start': unicode_type(self.list_number_map[number_class])}
+            attrs = {'start': str(self.list_number_map[number_class])}
        if self.generate_css:
            attrs['class'] = list_class
        self.opentag(str(tag_name), attrs)
@@ -27,7 +27,6 @@ import zipfile
 from io import BytesIO
 from xml.sax.xmlreader import InputSource

-from polyglot.builtins import unicode_type
 from polyglot.io import PolyglotBytesIO, PolyglotStringIO

 from . import element, manifest, meta
@@ -397,7 +396,7 @@ class OpenDocument:
            if what_it_is == IS_FILENAME:
                self._z.write(fileobj, arcname, zipfile.ZIP_STORED)
            else:
-                zi = zipfile.ZipInfo(unicode_type(arcname), self._now)
+                zi = zipfile.ZipInfo(str(arcname), self._now)
                zi.compress_type = zipfile.ZIP_STORED
                zi.external_attr = UNIXPERMS
                self._z.writestr(zi, fileobj)