py3: More unicode porting

This commit is contained in:
Kovid Goyal 2019-06-14 18:39:58 +05:30
parent 59ddfa67e5
commit d782fa0ae4
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
9 changed files with 41 additions and 32 deletions

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -70,19 +71,19 @@ class RBMLizer(object):
def mlize_spine(self):
self.link_hrefs = {}
output = [u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
output = ['<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
output.append(self.get_cover_page())
output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
output.append('ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
output.append(self.get_text())
output.append(u'</BODY></HTML>')
output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
output.append('</BODY></HTML>')
output = ''.join(output).replace('ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
output = self.clean_text(output)
return output
def get_cover_page(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
output = u''
output = ''
if 'cover' in self.oeb_book.guide:
if self.name_map.get(self.oeb_book.guide['cover'].href, None):
output += '<IMG SRC="%s">' % self.name_map[self.oeb_book.guide['cover'].href]
@ -97,10 +98,10 @@ class RBMLizer(object):
return output
def get_toc(self):
toc = [u'']
toc = ['']
if self.opts.inline_toc:
self.log.debug('Generating table of contents...')
toc.append(u'<H1>%s</H1><UL>\n' % _('Table of Contents:'))
toc.append('<H1>%s</H1><UL>\n' % _('Table of Contents:'))
for item in self.oeb_book.toc:
if item.href in self.link_hrefs.keys():
toc.append('<LI><A HREF="#%s">%s</A></LI>\n' % (self.link_hrefs[item.href], item.title))
@ -113,7 +114,7 @@ class RBMLizer(object):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
output = [u'']
output = ['']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
@ -129,7 +130,7 @@ class RBMLizer(object):
if aid not in self.link_hrefs.keys():
self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
aid = self.link_hrefs[aid]
return u'<A NAME="%s"></A>' % aid
return '<A NAME="%s"></A>' % aid
def clean_text(self, text):
# Remove anchors that do not have links
@ -148,16 +149,16 @@ class RBMLizer(object):
if p is not None and isinstance(p.tag, string_or_bytes) and namespace(p.tag) == XHTML_NS \
and elem.tail:
return [elem.tail]
return [u'']
return ['']
text = [u'']
text = ['']
style = stylizer.style(elem)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
if hasattr(elem, 'tail') and elem.tail:
return [elem.tail]
return [u'']
return ['']
tag = barename(elem.tag)
tag_count = 0

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -13,7 +14,7 @@ from calibre.ebooks.rb import HEADER
from calibre.ebooks.rb import RocketBookError
from calibre.ebooks.metadata.rb import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
from polyglot.builtins import range
from polyglot.builtins import range, as_unicode
from polyglot.urllib import unquote
@ -74,7 +75,7 @@ class Reader(object):
if toc_item.flags in (1, 2):
return
output = u''
output = ''
self.stream.seek(toc_item.offset)
if toc_item.flags == 8:
@ -109,7 +110,7 @@ class Reader(object):
images = []
for item in self.toc:
iname = item.name.decode('utf-8')
iname = as_unicode(item.name)
if iname.lower().endswith('html'):
self.log.debug('HTML item %s found...' % iname)
html.append(iname)

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, John Schember john@nachtimwald.com'
__docformat__ = 'restructuredtext en'
@ -6,4 +8,3 @@ __docformat__ = 'restructuredtext en'
'''
Used for txt output
'''

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '''2011, John Schember <john@nachtimwald.com>
@ -42,7 +43,7 @@ class MarkdownMLizer(OEB2HTML):
return txt
def mlize_spine(self, oeb_book):
output = [u'']
output = ['']
for item in oeb_book.spine:
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
self.rewrite_ids(item.data, item)

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'

View File

@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
@ -17,7 +19,7 @@ from calibre.ebooks.conversion.preprocess import DocAnalysis
from calibre.utils.cleantext import clean_ascii_chars
from polyglot.builtins import iteritems, unicode_type, map, range, long_type
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
def clean_txt(txt):
@ -201,7 +203,7 @@ def separate_hard_scene_breaks(txt):
return '\n%s\n' % line
else:
return line
txt = re.sub(unicode_type(r'(?miu)^[ \t-=~\/_]+$'), lambda mo: sep_break(mo.group()), txt)
txt = re.sub(r'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
return txt
@ -242,7 +244,7 @@ def split_string_separator(txt, size):
size -= 2
txt = []
for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
idx = part.rfind('.')
idx = part.rfind(b'.')
if idx == -1:
part += b'\n\n'
else:

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>'
@ -34,7 +35,7 @@ class TextileMLizer(OEB2HTML):
self.in_a_link = False
self.our_ids = []
self.images = {}
self.id_no_text = u''
self.id_no_text = ''
self.style_embed = []
self.remove_space_after_newline = False
self.base_hrefs = [item.href for item in oeb_book.spine]
@ -56,7 +57,7 @@ class TextileMLizer(OEB2HTML):
return txt
def mlize_spine(self, oeb_book):
output = [u'']
output = ['']
for item in oeb_book.spine:
self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
self.rewrite_ids(item.data, item)

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -80,8 +80,8 @@ class TXTMLizer(object):
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
output += self.dump_text(content.find(XHTML('body')), stylizer, item)
output += '\n\n\n\n\n\n'
output = u''.join(output)
output = u'\n'.join(l.rstrip() for l in output.splitlines())
output = ''.join(output)
output = '\n'.join(l.rstrip() for l in output.splitlines())
output = self.cleanup_text(output)
return output
@ -97,12 +97,12 @@ class TXTMLizer(object):
return text
def get_toc(self):
toc = [u'']
toc = ['']
if getattr(self.opts, 'inline_toc', None):
self.log.debug('Generating table of contents...')
toc.append(u'%s\n\n' % _(u'Table of Contents:'))
toc.append('%s\n\n' % _('Table of Contents:'))
for item in self.toc_titles:
toc.append(u'* %s\n\n' % item)
toc.append('* %s\n\n' % item)
return ''.join(toc)
def create_flat_toc(self, nodes):
@ -224,11 +224,11 @@ class TXTMLizer(object):
# Are we in a paragraph block?
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
if self.opts.remove_paragraph_spacing and not in_heading:
text.append(u'\t')
text.append('\t')
in_block = True
if tag in SPACE_TAGS:
text.append(u' ')
text.append(' ')
# Hard scene breaks.
if tag == 'hr':
@ -250,9 +250,9 @@ class TXTMLizer(object):
text += self.dump_text(item, stylizer, page)
if in_block:
text.append(u'\n\n')
text.append('\n\n')
if in_heading:
text.append(u'\n')
text.append('\n')
self.last_was_heading = True
else:
self.last_was_heading = False