mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Some fixes to the last py3 merge
This commit is contained in:
parent
4c5e9a20a4
commit
749885778d
@ -584,7 +584,10 @@ class HTMLPreProcessor(object):
|
|||||||
end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
|
end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
|
||||||
end_rules.append(
|
end_rules.append(
|
||||||
# Un wrap using punctuation
|
# Un wrap using punctuation
|
||||||
(re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\\\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), # noqa
|
(re.compile((
|
||||||
|
u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]'
|
||||||
|
u'|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?'
|
||||||
|
u'\s*[\w\d$(])') % length, re.UNICODE), wrap_lines),
|
||||||
)
|
)
|
||||||
|
|
||||||
for rule in self.PREPROCESS + start_rules:
|
for rule in self.PREPROCESS + start_rules:
|
||||||
|
@ -24,7 +24,7 @@ from calibre.ebooks.metadata.toc import TOC
|
|||||||
from calibre.ebooks.mobi.utils import read_font_record
|
from calibre.ebooks.mobi.utils import read_font_record
|
||||||
from calibre.ebooks.oeb.parse_utils import parse_html
|
from calibre.ebooks.oeb.parse_utils import parse_html
|
||||||
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||||
from polyglot.builtins import range, zip, unicode_type, getcwd
|
from polyglot.builtins import range, zip, unicode_type, getcwd, as_unicode
|
||||||
from polyglot.urllib import urldefrag
|
from polyglot.urllib import urldefrag
|
||||||
|
|
||||||
Part = namedtuple('Part',
|
Part = namedtuple('Part',
|
||||||
@ -403,7 +403,7 @@ class Mobi8Reader(object):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
entry['href'] = href
|
entry['href'] = href
|
||||||
entry['idtag'] = idtag.decode(self.header.codec)
|
entry['idtag'] = as_unicode(idtag, self.header.codec or 'utf-8')
|
||||||
|
|
||||||
for e in remove:
|
for e in remove:
|
||||||
index_entries.remove(e)
|
index_entries.remove(e)
|
||||||
|
@ -600,17 +600,18 @@ class DirContainer(object):
|
|||||||
def namelist(self):
|
def namelist(self):
|
||||||
names = []
|
names = []
|
||||||
base = self.rootdir
|
base = self.rootdir
|
||||||
if isinstance(base, unicode_type):
|
|
||||||
base = base.encode(filesystem_encoding)
|
|
||||||
for root, dirs, files in os.walk(base):
|
for root, dirs, files in os.walk(base):
|
||||||
for fname in files:
|
for fname in files:
|
||||||
fname = os.path.join(root, fname)
|
fname = os.path.join(root, fname)
|
||||||
fname = fname.replace(b'\\', b'/')
|
if isinstance(fname, bytes):
|
||||||
if not isinstance(fname, unicode_type):
|
|
||||||
try:
|
try:
|
||||||
fname = fname.decode(filesystem_encoding)
|
fname = fname.decode(filesystem_encoding)
|
||||||
except:
|
except Exception:
|
||||||
continue
|
try:
|
||||||
|
fname = fname.decode('utf-8')
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
fname = fname.replace('\\', '/')
|
||||||
names.append(fname)
|
names.append(fname)
|
||||||
return names
|
return names
|
||||||
|
|
||||||
|
@ -207,7 +207,7 @@ class PMLMLizer(object):
|
|||||||
text = re.sub('[ ]{2,}', ' ', text)
|
text = re.sub('[ ]{2,}', ' ', text)
|
||||||
|
|
||||||
# Condense excessive \c empty line sequences.
|
# Condense excessive \c empty line sequences.
|
||||||
text = re.sub(r'(\\c\\s*\\c\\s*){2,}', r'\\c \n\\c\n', text)
|
text = re.sub(r'(\\c\s*\\c\s*){2,}', r'\\c \n\\c\n', text)
|
||||||
|
|
||||||
# Remove excessive newlines.
|
# Remove excessive newlines.
|
||||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||||
|
@ -608,10 +608,9 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
|||||||
# when determining uniqueness for a style, ingorne these values, since
|
# when determining uniqueness for a style, ingorne these values, since
|
||||||
# they don't tell us if the style is unique
|
# they don't tell us if the style is unique
|
||||||
ignore_values = ['style-num', 'nest-level', 'in-table']
|
ignore_values = ['style-num', 'nest-level', 'in-table']
|
||||||
for k, v in self.__att_val_dict.items():
|
for k in sorted(self.__att_val_dict):
|
||||||
if k in ignore_values:
|
if k not in ignore_values:
|
||||||
continue
|
my_string += '%s:%s' % (k, self.__att_val_dict[k])
|
||||||
my_string += '%s:%s' % (k, v)
|
|
||||||
if my_string in self.__style_num_strings:
|
if my_string in self.__style_num_strings:
|
||||||
num = self.__style_num_strings.index(my_string)
|
num = self.__style_num_strings.index(my_string)
|
||||||
num += 1 # since indexing starts at zero, rather than 1
|
num += 1 # since indexing starts at zero, rather than 1
|
||||||
@ -635,9 +634,10 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
|||||||
the_value = self.__att_val_dict['tabs']
|
the_value = self.__att_val_dict['tabs']
|
||||||
# the_value = the_value[:-1]
|
# the_value = the_value[:-1]
|
||||||
style_string += ('<%s>%s' % ('tabs', the_value))
|
style_string += ('<%s>%s' % ('tabs', the_value))
|
||||||
for k, v in self.__att_val_dict.items():
|
exclude = frozenset(['name', 'style-num', 'in-table'] + tabs_list)
|
||||||
if k not in ['name', 'style-num', 'in-table'] + tabs_list:
|
for k in sorted(self.__att_val_dict):
|
||||||
style_string += ('<%s>%s' % (k, v))
|
if k not in exclude:
|
||||||
|
style_string += ('<%s>%s' % (k, self.__att_val_dict[k]))
|
||||||
style_string += '\n'
|
style_string += '\n'
|
||||||
self.__body_style_strings.append(style_string)
|
self.__body_style_strings.append(style_string)
|
||||||
|
|
||||||
@ -685,9 +685,10 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
|||||||
the_value = self.__att_val_dict['tabs']
|
the_value = self.__att_val_dict['tabs']
|
||||||
# the_value = the_value[:-1]
|
# the_value = the_value[:-1]
|
||||||
self.__write_obj.write('<%s>%s' % ('tabs', the_value))
|
self.__write_obj.write('<%s>%s' % ('tabs', the_value))
|
||||||
keys = sorted(self.__att_val_dict.keys())
|
keys = sorted(self.__att_val_dict)
|
||||||
|
exclude = frozenset(['name', 'style-num', 'in-table'] + tabs_list)
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if key not in ['name', 'style-num', 'in-table'] + tabs_list:
|
if key not in exclude:
|
||||||
self.__write_obj.write('<%s>%s' % (key, self.__att_val_dict[key]))
|
self.__write_obj.write('<%s>%s' % (key, self.__att_val_dict[key]))
|
||||||
self.__write_obj.write('\n')
|
self.__write_obj.write('\n')
|
||||||
self.__write_obj.write(self.__start2_marker)
|
self.__write_obj.write(self.__start2_marker)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user