mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: More unicode porting
This commit is contained in:
parent
59ddfa67e5
commit
d782fa0ae4
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
@ -70,19 +71,19 @@ class RBMLizer(object):
|
||||
|
||||
def mlize_spine(self):
|
||||
self.link_hrefs = {}
|
||||
output = [u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
|
||||
output = ['<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
|
||||
output.append(self.get_cover_page())
|
||||
output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
|
||||
output.append('ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
|
||||
output.append(self.get_text())
|
||||
output.append(u'</BODY></HTML>')
|
||||
output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
|
||||
output.append('</BODY></HTML>')
|
||||
output = ''.join(output).replace('ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
|
||||
output = self.clean_text(output)
|
||||
return output
|
||||
|
||||
def get_cover_page(self):
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.base import XHTML
|
||||
output = u''
|
||||
output = ''
|
||||
if 'cover' in self.oeb_book.guide:
|
||||
if self.name_map.get(self.oeb_book.guide['cover'].href, None):
|
||||
output += '<IMG SRC="%s">' % self.name_map[self.oeb_book.guide['cover'].href]
|
||||
@ -97,10 +98,10 @@ class RBMLizer(object):
|
||||
return output
|
||||
|
||||
def get_toc(self):
|
||||
toc = [u'']
|
||||
toc = ['']
|
||||
if self.opts.inline_toc:
|
||||
self.log.debug('Generating table of contents...')
|
||||
toc.append(u'<H1>%s</H1><UL>\n' % _('Table of Contents:'))
|
||||
toc.append('<H1>%s</H1><UL>\n' % _('Table of Contents:'))
|
||||
for item in self.oeb_book.toc:
|
||||
if item.href in self.link_hrefs.keys():
|
||||
toc.append('<LI><A HREF="#%s">%s</A></LI>\n' % (self.link_hrefs[item.href], item.title))
|
||||
@ -113,7 +114,7 @@ class RBMLizer(object):
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.base import XHTML
|
||||
|
||||
output = [u'']
|
||||
output = ['']
|
||||
for item in self.oeb_book.spine:
|
||||
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
@ -129,7 +130,7 @@ class RBMLizer(object):
|
||||
if aid not in self.link_hrefs.keys():
|
||||
self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
||||
aid = self.link_hrefs[aid]
|
||||
return u'<A NAME="%s"></A>' % aid
|
||||
return '<A NAME="%s"></A>' % aid
|
||||
|
||||
def clean_text(self, text):
|
||||
# Remove anchors that do not have links
|
||||
@ -148,16 +149,16 @@ class RBMLizer(object):
|
||||
if p is not None and isinstance(p.tag, string_or_bytes) and namespace(p.tag) == XHTML_NS \
|
||||
and elem.tail:
|
||||
return [elem.tail]
|
||||
return [u'']
|
||||
return ['']
|
||||
|
||||
text = [u'']
|
||||
text = ['']
|
||||
style = stylizer.style(elem)
|
||||
|
||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||
or style['visibility'] == 'hidden':
|
||||
if hasattr(elem, 'tail') and elem.tail:
|
||||
return [elem.tail]
|
||||
return [u'']
|
||||
return ['']
|
||||
|
||||
tag = barename(elem.tag)
|
||||
tag_count = 0
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
@ -13,7 +14,7 @@ from calibre.ebooks.rb import HEADER
|
||||
from calibre.ebooks.rb import RocketBookError
|
||||
from calibre.ebooks.metadata.rb import get_metadata
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from polyglot.builtins import range
|
||||
from polyglot.builtins import range, as_unicode
|
||||
from polyglot.urllib import unquote
|
||||
|
||||
|
||||
@ -74,7 +75,7 @@ class Reader(object):
|
||||
if toc_item.flags in (1, 2):
|
||||
return
|
||||
|
||||
output = u''
|
||||
output = ''
|
||||
self.stream.seek(toc_item.offset)
|
||||
|
||||
if toc_item.flags == 8:
|
||||
@ -109,7 +110,7 @@ class Reader(object):
|
||||
images = []
|
||||
|
||||
for item in self.toc:
|
||||
iname = item.name.decode('utf-8')
|
||||
iname = as_unicode(item.name)
|
||||
if iname.lower().endswith('html'):
|
||||
self.log.debug('HTML item %s found...' % iname)
|
||||
html.append(iname)
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python2
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, John Schember john@nachtimwald.com'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
@ -6,4 +8,3 @@ __docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
Used for txt output
|
||||
'''
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '''2011, John Schember <john@nachtimwald.com>
|
||||
@ -42,7 +43,7 @@ class MarkdownMLizer(OEB2HTML):
|
||||
return txt
|
||||
|
||||
def mlize_spine(self, oeb_book):
|
||||
output = [u'']
|
||||
output = ['']
|
||||
for item in oeb_book.spine:
|
||||
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
|
||||
self.rewrite_ids(item.data, item)
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
@ -1,4 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
@ -17,7 +19,7 @@ from calibre.ebooks.conversion.preprocess import DocAnalysis
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from polyglot.builtins import iteritems, unicode_type, map, range, long_type
|
||||
|
||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
||||
HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
||||
|
||||
|
||||
def clean_txt(txt):
|
||||
@ -201,7 +203,7 @@ def separate_hard_scene_breaks(txt):
|
||||
return '\n%s\n' % line
|
||||
else:
|
||||
return line
|
||||
txt = re.sub(unicode_type(r'(?miu)^[ \t-=~\/_]+$'), lambda mo: sep_break(mo.group()), txt)
|
||||
txt = re.sub(r'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
|
||||
return txt
|
||||
|
||||
|
||||
@ -242,7 +244,7 @@ def split_string_separator(txt, size):
|
||||
size -= 2
|
||||
txt = []
|
||||
for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
|
||||
idx = part.rfind('.')
|
||||
idx = part.rfind(b'.')
|
||||
if idx == -1:
|
||||
part += b'\n\n'
|
||||
else:
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>'
|
||||
@ -34,7 +35,7 @@ class TextileMLizer(OEB2HTML):
|
||||
self.in_a_link = False
|
||||
self.our_ids = []
|
||||
self.images = {}
|
||||
self.id_no_text = u''
|
||||
self.id_no_text = ''
|
||||
self.style_embed = []
|
||||
self.remove_space_after_newline = False
|
||||
self.base_hrefs = [item.href for item in oeb_book.spine]
|
||||
@ -56,7 +57,7 @@ class TextileMLizer(OEB2HTML):
|
||||
return txt
|
||||
|
||||
def mlize_spine(self, oeb_book):
|
||||
output = [u'']
|
||||
output = ['']
|
||||
for item in oeb_book.spine:
|
||||
self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
|
||||
self.rewrite_ids(item.data, item)
|
||||
|
@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
@ -80,8 +80,8 @@ class TXTMLizer(object):
|
||||
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
output += self.dump_text(content.find(XHTML('body')), stylizer, item)
|
||||
output += '\n\n\n\n\n\n'
|
||||
output = u''.join(output)
|
||||
output = u'\n'.join(l.rstrip() for l in output.splitlines())
|
||||
output = ''.join(output)
|
||||
output = '\n'.join(l.rstrip() for l in output.splitlines())
|
||||
output = self.cleanup_text(output)
|
||||
|
||||
return output
|
||||
@ -97,12 +97,12 @@ class TXTMLizer(object):
|
||||
return text
|
||||
|
||||
def get_toc(self):
|
||||
toc = [u'']
|
||||
toc = ['']
|
||||
if getattr(self.opts, 'inline_toc', None):
|
||||
self.log.debug('Generating table of contents...')
|
||||
toc.append(u'%s\n\n' % _(u'Table of Contents:'))
|
||||
toc.append('%s\n\n' % _('Table of Contents:'))
|
||||
for item in self.toc_titles:
|
||||
toc.append(u'* %s\n\n' % item)
|
||||
toc.append('* %s\n\n' % item)
|
||||
return ''.join(toc)
|
||||
|
||||
def create_flat_toc(self, nodes):
|
||||
@ -224,11 +224,11 @@ class TXTMLizer(object):
|
||||
# Are we in a paragraph block?
|
||||
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
||||
if self.opts.remove_paragraph_spacing and not in_heading:
|
||||
text.append(u'\t')
|
||||
text.append('\t')
|
||||
in_block = True
|
||||
|
||||
if tag in SPACE_TAGS:
|
||||
text.append(u' ')
|
||||
text.append(' ')
|
||||
|
||||
# Hard scene breaks.
|
||||
if tag == 'hr':
|
||||
@ -250,9 +250,9 @@ class TXTMLizer(object):
|
||||
text += self.dump_text(item, stylizer, page)
|
||||
|
||||
if in_block:
|
||||
text.append(u'\n\n')
|
||||
text.append('\n\n')
|
||||
if in_heading:
|
||||
text.append(u'\n')
|
||||
text.append('\n')
|
||||
self.last_was_heading = True
|
||||
else:
|
||||
self.last_was_heading = False
|
||||
|
Loading…
x
Reference in New Issue
Block a user