mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: More unicode porting
This commit is contained in:
parent
59ddfa67e5
commit
d782fa0ae4
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -70,19 +71,19 @@ class RBMLizer(object):
|
|||||||
|
|
||||||
def mlize_spine(self):
|
def mlize_spine(self):
|
||||||
self.link_hrefs = {}
|
self.link_hrefs = {}
|
||||||
output = [u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
|
output = ['<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
|
||||||
output.append(self.get_cover_page())
|
output.append(self.get_cover_page())
|
||||||
output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
|
output.append('ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
|
||||||
output.append(self.get_text())
|
output.append(self.get_text())
|
||||||
output.append(u'</BODY></HTML>')
|
output.append('</BODY></HTML>')
|
||||||
output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
|
output = ''.join(output).replace('ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
|
||||||
output = self.clean_text(output)
|
output = self.clean_text(output)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def get_cover_page(self):
|
def get_cover_page(self):
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.ebooks.oeb.base import XHTML
|
from calibre.ebooks.oeb.base import XHTML
|
||||||
output = u''
|
output = ''
|
||||||
if 'cover' in self.oeb_book.guide:
|
if 'cover' in self.oeb_book.guide:
|
||||||
if self.name_map.get(self.oeb_book.guide['cover'].href, None):
|
if self.name_map.get(self.oeb_book.guide['cover'].href, None):
|
||||||
output += '<IMG SRC="%s">' % self.name_map[self.oeb_book.guide['cover'].href]
|
output += '<IMG SRC="%s">' % self.name_map[self.oeb_book.guide['cover'].href]
|
||||||
@ -97,10 +98,10 @@ class RBMLizer(object):
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
def get_toc(self):
|
def get_toc(self):
|
||||||
toc = [u'']
|
toc = ['']
|
||||||
if self.opts.inline_toc:
|
if self.opts.inline_toc:
|
||||||
self.log.debug('Generating table of contents...')
|
self.log.debug('Generating table of contents...')
|
||||||
toc.append(u'<H1>%s</H1><UL>\n' % _('Table of Contents:'))
|
toc.append('<H1>%s</H1><UL>\n' % _('Table of Contents:'))
|
||||||
for item in self.oeb_book.toc:
|
for item in self.oeb_book.toc:
|
||||||
if item.href in self.link_hrefs.keys():
|
if item.href in self.link_hrefs.keys():
|
||||||
toc.append('<LI><A HREF="#%s">%s</A></LI>\n' % (self.link_hrefs[item.href], item.title))
|
toc.append('<LI><A HREF="#%s">%s</A></LI>\n' % (self.link_hrefs[item.href], item.title))
|
||||||
@ -113,7 +114,7 @@ class RBMLizer(object):
|
|||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.ebooks.oeb.base import XHTML
|
from calibre.ebooks.oeb.base import XHTML
|
||||||
|
|
||||||
output = [u'']
|
output = ['']
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
|
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
@ -129,7 +130,7 @@ class RBMLizer(object):
|
|||||||
if aid not in self.link_hrefs.keys():
|
if aid not in self.link_hrefs.keys():
|
||||||
self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
||||||
aid = self.link_hrefs[aid]
|
aid = self.link_hrefs[aid]
|
||||||
return u'<A NAME="%s"></A>' % aid
|
return '<A NAME="%s"></A>' % aid
|
||||||
|
|
||||||
def clean_text(self, text):
|
def clean_text(self, text):
|
||||||
# Remove anchors that do not have links
|
# Remove anchors that do not have links
|
||||||
@ -148,16 +149,16 @@ class RBMLizer(object):
|
|||||||
if p is not None and isinstance(p.tag, string_or_bytes) and namespace(p.tag) == XHTML_NS \
|
if p is not None and isinstance(p.tag, string_or_bytes) and namespace(p.tag) == XHTML_NS \
|
||||||
and elem.tail:
|
and elem.tail:
|
||||||
return [elem.tail]
|
return [elem.tail]
|
||||||
return [u'']
|
return ['']
|
||||||
|
|
||||||
text = [u'']
|
text = ['']
|
||||||
style = stylizer.style(elem)
|
style = stylizer.style(elem)
|
||||||
|
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
or style['visibility'] == 'hidden':
|
or style['visibility'] == 'hidden':
|
||||||
if hasattr(elem, 'tail') and elem.tail:
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
return [elem.tail]
|
return [elem.tail]
|
||||||
return [u'']
|
return ['']
|
||||||
|
|
||||||
tag = barename(elem.tag)
|
tag = barename(elem.tag)
|
||||||
tag_count = 0
|
tag_count = 0
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -13,7 +14,7 @@ from calibre.ebooks.rb import HEADER
|
|||||||
from calibre.ebooks.rb import RocketBookError
|
from calibre.ebooks.rb import RocketBookError
|
||||||
from calibre.ebooks.metadata.rb import get_metadata
|
from calibre.ebooks.metadata.rb import get_metadata
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from polyglot.builtins import range
|
from polyglot.builtins import range, as_unicode
|
||||||
from polyglot.urllib import unquote
|
from polyglot.urllib import unquote
|
||||||
|
|
||||||
|
|
||||||
@ -74,7 +75,7 @@ class Reader(object):
|
|||||||
if toc_item.flags in (1, 2):
|
if toc_item.flags in (1, 2):
|
||||||
return
|
return
|
||||||
|
|
||||||
output = u''
|
output = ''
|
||||||
self.stream.seek(toc_item.offset)
|
self.stream.seek(toc_item.offset)
|
||||||
|
|
||||||
if toc_item.flags == 8:
|
if toc_item.flags == 8:
|
||||||
@ -109,7 +110,7 @@ class Reader(object):
|
|||||||
images = []
|
images = []
|
||||||
|
|
||||||
for item in self.toc:
|
for item in self.toc:
|
||||||
iname = item.name.decode('utf-8')
|
iname = as_unicode(item.name)
|
||||||
if iname.lower().endswith('html'):
|
if iname.lower().endswith('html'):
|
||||||
self.log.debug('HTML item %s found...' % iname)
|
self.log.debug('HTML item %s found...' % iname)
|
||||||
html.append(iname)
|
html.append(iname)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, John Schember john@nachtimwald.com'
|
__copyright__ = '2008, John Schember john@nachtimwald.com'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
@ -6,4 +8,3 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Used for txt output
|
Used for txt output
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '''2011, John Schember <john@nachtimwald.com>
|
__copyright__ = '''2011, John Schember <john@nachtimwald.com>
|
||||||
@ -42,7 +43,7 @@ class MarkdownMLizer(OEB2HTML):
|
|||||||
return txt
|
return txt
|
||||||
|
|
||||||
def mlize_spine(self, oeb_book):
|
def mlize_spine(self, oeb_book):
|
||||||
output = [u'']
|
output = ['']
|
||||||
for item in oeb_book.spine:
|
for item in oeb_book.spine:
|
||||||
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
|
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
|
||||||
self.rewrite_ids(item.data, item)
|
self.rewrite_ids(item.data, item)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
@ -17,7 +19,7 @@ from calibre.ebooks.conversion.preprocess import DocAnalysis
|
|||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from polyglot.builtins import iteritems, unicode_type, map, range, long_type
|
from polyglot.builtins import iteritems, unicode_type, map, range, long_type
|
||||||
|
|
||||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
||||||
|
|
||||||
|
|
||||||
def clean_txt(txt):
|
def clean_txt(txt):
|
||||||
@ -201,7 +203,7 @@ def separate_hard_scene_breaks(txt):
|
|||||||
return '\n%s\n' % line
|
return '\n%s\n' % line
|
||||||
else:
|
else:
|
||||||
return line
|
return line
|
||||||
txt = re.sub(unicode_type(r'(?miu)^[ \t-=~\/_]+$'), lambda mo: sep_break(mo.group()), txt)
|
txt = re.sub(r'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
|
||||||
@ -242,7 +244,7 @@ def split_string_separator(txt, size):
|
|||||||
size -= 2
|
size -= 2
|
||||||
txt = []
|
txt = []
|
||||||
for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
|
for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
|
||||||
idx = part.rfind('.')
|
idx = part.rfind(b'.')
|
||||||
if idx == -1:
|
if idx == -1:
|
||||||
part += b'\n\n'
|
part += b'\n\n'
|
||||||
else:
|
else:
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>'
|
__copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>'
|
||||||
@ -34,7 +35,7 @@ class TextileMLizer(OEB2HTML):
|
|||||||
self.in_a_link = False
|
self.in_a_link = False
|
||||||
self.our_ids = []
|
self.our_ids = []
|
||||||
self.images = {}
|
self.images = {}
|
||||||
self.id_no_text = u''
|
self.id_no_text = ''
|
||||||
self.style_embed = []
|
self.style_embed = []
|
||||||
self.remove_space_after_newline = False
|
self.remove_space_after_newline = False
|
||||||
self.base_hrefs = [item.href for item in oeb_book.spine]
|
self.base_hrefs = [item.href for item in oeb_book.spine]
|
||||||
@ -56,7 +57,7 @@ class TextileMLizer(OEB2HTML):
|
|||||||
return txt
|
return txt
|
||||||
|
|
||||||
def mlize_spine(self, oeb_book):
|
def mlize_spine(self, oeb_book):
|
||||||
output = [u'']
|
output = ['']
|
||||||
for item in oeb_book.spine:
|
for item in oeb_book.spine:
|
||||||
self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
|
self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
|
||||||
self.rewrite_ids(item.data, item)
|
self.rewrite_ids(item.data, item)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import print_function
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -80,8 +80,8 @@ class TXTMLizer(object):
|
|||||||
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
output += self.dump_text(content.find(XHTML('body')), stylizer, item)
|
output += self.dump_text(content.find(XHTML('body')), stylizer, item)
|
||||||
output += '\n\n\n\n\n\n'
|
output += '\n\n\n\n\n\n'
|
||||||
output = u''.join(output)
|
output = ''.join(output)
|
||||||
output = u'\n'.join(l.rstrip() for l in output.splitlines())
|
output = '\n'.join(l.rstrip() for l in output.splitlines())
|
||||||
output = self.cleanup_text(output)
|
output = self.cleanup_text(output)
|
||||||
|
|
||||||
return output
|
return output
|
||||||
@ -97,12 +97,12 @@ class TXTMLizer(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def get_toc(self):
|
def get_toc(self):
|
||||||
toc = [u'']
|
toc = ['']
|
||||||
if getattr(self.opts, 'inline_toc', None):
|
if getattr(self.opts, 'inline_toc', None):
|
||||||
self.log.debug('Generating table of contents...')
|
self.log.debug('Generating table of contents...')
|
||||||
toc.append(u'%s\n\n' % _(u'Table of Contents:'))
|
toc.append('%s\n\n' % _('Table of Contents:'))
|
||||||
for item in self.toc_titles:
|
for item in self.toc_titles:
|
||||||
toc.append(u'* %s\n\n' % item)
|
toc.append('* %s\n\n' % item)
|
||||||
return ''.join(toc)
|
return ''.join(toc)
|
||||||
|
|
||||||
def create_flat_toc(self, nodes):
|
def create_flat_toc(self, nodes):
|
||||||
@ -224,11 +224,11 @@ class TXTMLizer(object):
|
|||||||
# Are we in a paragraph block?
|
# Are we in a paragraph block?
|
||||||
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
||||||
if self.opts.remove_paragraph_spacing and not in_heading:
|
if self.opts.remove_paragraph_spacing and not in_heading:
|
||||||
text.append(u'\t')
|
text.append('\t')
|
||||||
in_block = True
|
in_block = True
|
||||||
|
|
||||||
if tag in SPACE_TAGS:
|
if tag in SPACE_TAGS:
|
||||||
text.append(u' ')
|
text.append(' ')
|
||||||
|
|
||||||
# Hard scene breaks.
|
# Hard scene breaks.
|
||||||
if tag == 'hr':
|
if tag == 'hr':
|
||||||
@ -250,9 +250,9 @@ class TXTMLizer(object):
|
|||||||
text += self.dump_text(item, stylizer, page)
|
text += self.dump_text(item, stylizer, page)
|
||||||
|
|
||||||
if in_block:
|
if in_block:
|
||||||
text.append(u'\n\n')
|
text.append('\n\n')
|
||||||
if in_heading:
|
if in_heading:
|
||||||
text.append(u'\n')
|
text.append('\n')
|
||||||
self.last_was_heading = True
|
self.last_was_heading = True
|
||||||
else:
|
else:
|
||||||
self.last_was_heading = False
|
self.last_was_heading = False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user