mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Merge branch 'py3' of https://github.com/eli-schwartz/calibre
This commit is contained in:
commit
015ff1611b
@ -5,6 +5,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from polyglot.builtins import native_string_type
|
||||||
|
|
||||||
|
|
||||||
class ConversionUserFeedBack(Exception):
|
class ConversionUserFeedBack(Exception):
|
||||||
|
|
||||||
@ -25,4 +27,4 @@ class ConversionUserFeedBack(Exception):
|
|||||||
|
|
||||||
# Ensure exception uses fully qualified name as this is used to detect it in
|
# Ensure exception uses fully qualified name as this is used to detect it in
|
||||||
# the GUI.
|
# the GUI.
|
||||||
ConversionUserFeedBack.__name__ = str('calibre.ebooks.conversion.ConversionUserFeedBack')
|
ConversionUserFeedBack.__name__ = native_string_type('calibre.ebooks.conversion.ConversionUserFeedBack')
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
''' CHM File decoding support '''
|
''' CHM File decoding support '''
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||||
@ -64,7 +66,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
metadata = Metadata(os.path.basename(chm_name))
|
metadata = Metadata(os.path.basename(chm_name))
|
||||||
encoding = self._chm_reader.get_encoding() or options.input_encoding or 'cp1252'
|
encoding = self._chm_reader.get_encoding() or options.input_encoding or 'cp1252'
|
||||||
self._chm_reader.CloseCHM()
|
self._chm_reader.CloseCHM()
|
||||||
# print tdir, mainpath
|
# print((tdir, mainpath))
|
||||||
# from calibre import ipython
|
# from calibre import ipython
|
||||||
# ipython()
|
# ipython()
|
||||||
|
|
||||||
@ -117,10 +119,10 @@ class CHMInput(InputFormatPlugin):
|
|||||||
strip_encoding_pats=True, resolve_entities=True)[0]
|
strip_encoding_pats=True, resolve_entities=True)[0]
|
||||||
hhcroot = html.fromstring(hhcdata)
|
hhcroot = html.fromstring(hhcdata)
|
||||||
toc = self._process_nodes(hhcroot)
|
toc = self._process_nodes(hhcroot)
|
||||||
# print "============================="
|
# print("=============================")
|
||||||
# print "Printing hhcroot"
|
# print("Printing hhcroot")
|
||||||
# print etree.tostring(hhcroot, pretty_print=True)
|
# print(etree.tostring(hhcroot, pretty_print=True))
|
||||||
# print "============================="
|
# print("=============================")
|
||||||
log.debug('Found %d section nodes' % toc.count())
|
log.debug('Found %d section nodes' % toc.count())
|
||||||
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
||||||
base = os.path.dirname(os.path.abspath(htmlpath))
|
base = os.path.dirname(os.path.abspath(htmlpath))
|
||||||
@ -183,7 +185,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
p = node.xpath('ancestor::ul[1]/ancestor::li[1]/object[1]')
|
p = node.xpath('ancestor::ul[1]/ancestor::li[1]/object[1]')
|
||||||
parent = p[0] if p else None
|
parent = p[0] if p else None
|
||||||
toc = ancestor_map.get(parent, toc)
|
toc = ancestor_map.get(parent, toc)
|
||||||
title = href = u''
|
title = href = ''
|
||||||
for param in node.xpath('./param'):
|
for param in node.xpath('./param'):
|
||||||
if match_string(param.attrib['name'], 'name'):
|
if match_string(param.attrib['name'], 'name'):
|
||||||
title = param.attrib['value']
|
title = param.attrib['value']
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
@ -7,7 +8,7 @@ import os, re, posixpath
|
|||||||
from itertools import cycle
|
from itertools import cycle
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from polyglot.builtins import unicode_type, as_bytes, getcwd
|
from polyglot.builtins import as_bytes, getcwd
|
||||||
|
|
||||||
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
|
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
|
||||||
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
|
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
|
||||||
@ -44,7 +45,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
import uuid, hashlib
|
import uuid, hashlib
|
||||||
idpf_key = opf.raw_unique_identifier
|
idpf_key = opf.raw_unique_identifier
|
||||||
if idpf_key:
|
if idpf_key:
|
||||||
idpf_key = re.sub(u'[\u0020\u0009\u000d\u000a]', u'', idpf_key)
|
idpf_key = re.sub('[\u0020\u0009\u000d\u000a]', '', idpf_key)
|
||||||
idpf_key = hashlib.sha1(idpf_key.encode('utf-8')).digest()
|
idpf_key = hashlib.sha1(idpf_key.encode('utf-8')).digest()
|
||||||
key = None
|
key = None
|
||||||
for item in opf.identifier_iter():
|
for item in opf.identifier_iter():
|
||||||
@ -269,7 +270,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
|
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
|
||||||
opf = self.find_opf()
|
opf = self.find_opf()
|
||||||
if opf is None:
|
if opf is None:
|
||||||
for f in walk(u'.'):
|
for f in walk('.'):
|
||||||
if f.lower().endswith('.opf') and '__MACOSX' not in f and \
|
if f.lower().endswith('.opf') and '__MACOSX' not in f and \
|
||||||
not os.path.basename(f).startswith('.'):
|
not os.path.basename(f).startswith('.'):
|
||||||
opf = os.path.abspath(f)
|
opf = os.path.abspath(f)
|
||||||
@ -369,7 +370,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
href = text = None
|
href = text = None
|
||||||
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
||||||
text = etree.tostring(
|
text = etree.tostring(
|
||||||
x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(
|
x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(
|
||||||
x.xpath('descendant-or-self::*/@title')).strip()
|
x.xpath('descendant-or-self::*/@title')).strip()
|
||||||
href = x.get('href')
|
href = x.get('href')
|
||||||
if href:
|
if href:
|
||||||
|
@ -8,7 +8,7 @@ import os, re
|
|||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
from polyglot.builtins import iteritems, unicode_type, getcwd
|
from polyglot.builtins import iteritems, getcwd
|
||||||
|
|
||||||
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
||||||
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
|
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
|
||||||
@ -71,7 +71,7 @@ class FB2Input(InputFormatPlugin):
|
|||||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||||
css = ''
|
css = ''
|
||||||
for s in stylesheets:
|
for s in stylesheets:
|
||||||
css += etree.tostring(s, encoding=unicode_type, method='text',
|
css += etree.tostring(s, encoding='unicode', method='text',
|
||||||
with_tail=False) + '\n\n'
|
with_tail=False) + '\n\n'
|
||||||
if css:
|
if css:
|
||||||
import css_parser, logging
|
import css_parser, logging
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -67,80 +68,80 @@ class FB2Output(OutputFormatPlugin):
|
|||||||
# Children's
|
# Children's
|
||||||
'child_tale', # Fairy Tales
|
'child_tale', # Fairy Tales
|
||||||
'child_verse', # Verses
|
'child_verse', # Verses
|
||||||
'child_prose', # Prose
|
'child_prose', # Prose
|
||||||
'child_sf', # Science Fiction
|
'child_sf', # Science Fiction
|
||||||
'child_det', # Detectives & Thrillers
|
'child_det', # Detectives & Thrillers
|
||||||
'child_adv', # Adventures
|
'child_adv', # Adventures
|
||||||
'child_education', # Educational
|
'child_education', # Educational
|
||||||
'children', # Other
|
'children', # Other
|
||||||
# Poetry & Dramaturgy
|
# Poetry & Dramaturgy
|
||||||
'poetry', # Poetry
|
'poetry', # Poetry
|
||||||
'dramaturgy', # Dramaturgy
|
'dramaturgy', # Dramaturgy
|
||||||
# Antique literature
|
# Antique literature
|
||||||
'antique_ant', # Antique
|
'antique_ant', # Antique
|
||||||
'antique_european', # European
|
'antique_european', # European
|
||||||
'antique_russian', # Old russian
|
'antique_russian', # Old russian
|
||||||
'antique_east', # Old east
|
'antique_east', # Old east
|
||||||
'antique_myths', # Myths. Legends. Epos
|
'antique_myths', # Myths. Legends. Epos
|
||||||
'antique', # Other
|
'antique', # Other
|
||||||
# Scientific#educational
|
# Scientific#educational
|
||||||
'sci_history', # History
|
'sci_history', # History
|
||||||
'sci_psychology', # Psychology
|
'sci_psychology', # Psychology
|
||||||
'sci_culture', # Cultural science
|
'sci_culture', # Cultural science
|
||||||
'sci_religion', # Religious studies
|
'sci_religion', # Religious studies
|
||||||
'sci_philosophy', # Philosophy
|
'sci_philosophy', # Philosophy
|
||||||
'sci_politics', # Politics
|
'sci_politics', # Politics
|
||||||
'sci_business', # Business literature
|
'sci_business', # Business literature
|
||||||
'sci_juris', # Jurisprudence
|
'sci_juris', # Jurisprudence
|
||||||
'sci_linguistic', # Linguistics
|
'sci_linguistic', # Linguistics
|
||||||
'sci_medicine', # Medicine
|
'sci_medicine', # Medicine
|
||||||
'sci_phys', # Physics
|
'sci_phys', # Physics
|
||||||
'sci_math', # Mathematics
|
'sci_math', # Mathematics
|
||||||
'sci_chem', # Chemistry
|
'sci_chem', # Chemistry
|
||||||
'sci_biology', # Biology
|
'sci_biology', # Biology
|
||||||
'sci_tech', # Technical
|
'sci_tech', # Technical
|
||||||
'science', # Other
|
'science', # Other
|
||||||
# Computers & Internet
|
# Computers & Internet
|
||||||
'comp_www', # Internet
|
'comp_www', # Internet
|
||||||
'comp_programming', # Programming
|
'comp_programming', # Programming
|
||||||
'comp_hard', # Hardware
|
'comp_hard', # Hardware
|
||||||
'comp_soft', # Software
|
'comp_soft', # Software
|
||||||
'comp_db', # Databases
|
'comp_db', # Databases
|
||||||
'comp_osnet', # OS & Networking
|
'comp_osnet', # OS & Networking
|
||||||
'computers', # Other
|
'computers', # Other
|
||||||
# Reference
|
# Reference
|
||||||
'ref_encyc', # Encyclopedias
|
'ref_encyc', # Encyclopedias
|
||||||
'ref_dict', # Dictionaries
|
'ref_dict', # Dictionaries
|
||||||
'ref_ref', # Reference
|
'ref_ref', # Reference
|
||||||
'ref_guide', # Guidebooks
|
'ref_guide', # Guidebooks
|
||||||
'reference', # Other
|
'reference', # Other
|
||||||
# Nonfiction
|
# Nonfiction
|
||||||
'nonf_biography', # Biography & Memoirs
|
'nonf_biography', # Biography & Memoirs
|
||||||
'nonf_publicism', # Publicism
|
'nonf_publicism', # Publicism
|
||||||
'nonf_criticism', # Criticism
|
'nonf_criticism', # Criticism
|
||||||
'design', # Art & design
|
'design', # Art & design
|
||||||
'nonfiction', # Other
|
'nonfiction', # Other
|
||||||
# Religion & Inspiration
|
# Religion & Inspiration
|
||||||
'religion_rel', # Religion
|
'religion_rel', # Religion
|
||||||
'religion_esoterics', # Esoterics
|
'religion_esoterics', # Esoterics
|
||||||
'religion_self', # Self#improvement
|
'religion_self', # Self#improvement
|
||||||
'religion', # Other
|
'religion', # Other
|
||||||
# Humor
|
# Humor
|
||||||
'humor_anecdote', # Anecdote (funny stories)
|
'humor_anecdote', # Anecdote (funny stories)
|
||||||
'humor_prose', # Prose
|
'humor_prose', # Prose
|
||||||
'humor_verse', # Verses
|
'humor_verse', # Verses
|
||||||
'humor', # Other
|
'humor', # Other
|
||||||
# Home & Family
|
# Home & Family
|
||||||
'home_cooking', # Cooking
|
'home_cooking', # Cooking
|
||||||
'home_pets', # Pets
|
'home_pets', # Pets
|
||||||
'home_crafts', # Hobbies & Crafts
|
'home_crafts', # Hobbies & Crafts
|
||||||
'home_entertain', # Entertaining
|
'home_entertain', # Entertaining
|
||||||
'home_health', # Health
|
'home_health', # Health
|
||||||
'home_garden', # Garden
|
'home_garden', # Garden
|
||||||
'home_diy', # Do it yourself
|
'home_diy', # Do it yourself
|
||||||
'home_sport', # Sports
|
'home_sport', # Sports
|
||||||
'home_sex', # Erotica & sex
|
'home_sex', # Erotica & sex
|
||||||
'home', # Other
|
'home', # Other
|
||||||
]
|
]
|
||||||
ui_data = {
|
ui_data = {
|
||||||
'sectionize': {
|
'sectionize': {
|
||||||
|
@ -100,7 +100,7 @@ class HTMLZOutput(OutputFormatPlugin):
|
|||||||
for item in oeb_book.manifest:
|
for item in oeb_book.manifest:
|
||||||
if item.media_type in OEB_IMAGES and item.href in images:
|
if item.media_type in OEB_IMAGES and item.href in images:
|
||||||
if item.media_type == SVG_MIME:
|
if item.media_type == SVG_MIME:
|
||||||
data = unicode_type(etree.tostring(item.data, encoding=unicode_type))
|
data = etree.tostring(item.data, encoding='unicode')
|
||||||
else:
|
else:
|
||||||
data = item.data
|
data = item.data
|
||||||
fname = os.path.join(tdir, u'images', images[item.href])
|
fname = os.path.join(tdir, u'images', images[item.href])
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
@ -85,4 +85,4 @@ class LRFInput(InputFormatPlugin):
|
|||||||
with open('content.opf', 'wb') as f:
|
with open('content.opf', 'wb') as f:
|
||||||
f.write(result)
|
f.write(result)
|
||||||
styles.write()
|
styles.write()
|
||||||
return os.path.abspath(u'content.opf')
|
return os.path.abspath('content.opf')
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
@ -188,7 +188,7 @@ class LRFOutput(OutputFormatPlugin):
|
|||||||
self.flatten_toc()
|
self.flatten_toc()
|
||||||
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
with TemporaryDirectory(u'_lrf_output') as tdir:
|
with TemporaryDirectory('_lrf_output') as tdir:
|
||||||
from calibre.customize.ui import plugin_for_output_format
|
from calibre.customize.ui import plugin_for_output_format
|
||||||
oeb_output = plugin_for_output_format('oeb')
|
oeb_output = plugin_for_output_format('oeb')
|
||||||
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
|
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
@ -29,13 +30,13 @@ class MOBIInput(InputFormatPlugin):
|
|||||||
mr = MobiReader(stream, log, options.input_encoding,
|
mr = MobiReader(stream, log, options.input_encoding,
|
||||||
options.debug_pipeline)
|
options.debug_pipeline)
|
||||||
if mr.kf8_type is None:
|
if mr.kf8_type is None:
|
||||||
mr.extract_content(u'.', parse_cache)
|
mr.extract_content('.', parse_cache)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
mr = MobiReader(stream, log, options.input_encoding,
|
mr = MobiReader(stream, log, options.input_encoding,
|
||||||
options.debug_pipeline, try_extra_data_fix=True)
|
options.debug_pipeline, try_extra_data_fix=True)
|
||||||
if mr.kf8_type is None:
|
if mr.kf8_type is None:
|
||||||
mr.extract_content(u'.', parse_cache)
|
mr.extract_content('.', parse_cache)
|
||||||
|
|
||||||
if mr.kf8_type is not None:
|
if mr.kf8_type is not None:
|
||||||
log('Found KF8 MOBI of type %r'%mr.kf8_type)
|
log('Found KF8 MOBI of type %r'%mr.kf8_type)
|
||||||
@ -52,7 +53,8 @@ class MOBIInput(InputFormatPlugin):
|
|||||||
if raw:
|
if raw:
|
||||||
if isinstance(raw, unicode_type):
|
if isinstance(raw, unicode_type):
|
||||||
raw = raw.encode('utf-8')
|
raw = raw.encode('utf-8')
|
||||||
open(u'debug-raw.html', 'wb').write(raw)
|
with open('debug-raw.html', 'wb') as f:
|
||||||
|
f.write(raw)
|
||||||
from calibre.ebooks.oeb.base import close_self_closing_tags
|
from calibre.ebooks.oeb.base import close_self_closing_tags
|
||||||
for f, root in parse_cache.items():
|
for f, root in parse_cache.items():
|
||||||
raw = html.tostring(root, encoding='utf-8', method='xml',
|
raw = html.tostring(root, encoding='utf-8', method='xml',
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -38,7 +39,7 @@ class PDFInput(InputFormatPlugin):
|
|||||||
with open(u'index.xml', 'rb') as f:
|
with open(u'index.xml', 'rb') as f:
|
||||||
xml = clean_ascii_chars(f.read())
|
xml = clean_ascii_chars(f.read())
|
||||||
PDFDocument(xml, self.opts, self.log)
|
PDFDocument(xml, self.opts, self.log)
|
||||||
return os.path.join(getcwd(), u'metadata.opf')
|
return os.path.join(getcwd(), 'metadata.opf')
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
@ -57,7 +58,7 @@ class PDFInput(InputFormatPlugin):
|
|||||||
mi = get_metadata(stream, 'pdf')
|
mi = get_metadata(stream, 'pdf')
|
||||||
opf = OPFCreator(getcwd(), mi)
|
opf = OPFCreator(getcwd(), mi)
|
||||||
|
|
||||||
manifest = [(u'index.html', None)]
|
manifest = [('index.html', None)]
|
||||||
|
|
||||||
images = os.listdir(getcwd())
|
images = os.listdir(getcwd())
|
||||||
images.remove('index.html')
|
images.remove('index.html')
|
||||||
@ -66,16 +67,16 @@ class PDFInput(InputFormatPlugin):
|
|||||||
log.debug('Generating manifest...')
|
log.debug('Generating manifest...')
|
||||||
opf.create_manifest(manifest)
|
opf.create_manifest(manifest)
|
||||||
|
|
||||||
opf.create_spine([u'index.html'])
|
opf.create_spine(['index.html'])
|
||||||
log.debug('Rendering manifest...')
|
log.debug('Rendering manifest...')
|
||||||
with open(u'metadata.opf', 'wb') as opffile:
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
opf.render(opffile)
|
opf.render(opffile)
|
||||||
if os.path.exists(u'toc.ncx'):
|
if os.path.exists('toc.ncx'):
|
||||||
ncxid = opf.manifest.id_for_path('toc.ncx')
|
ncxid = opf.manifest.id_for_path('toc.ncx')
|
||||||
if ncxid:
|
if ncxid:
|
||||||
with open(u'metadata.opf', 'r+b') as f:
|
with open('metadata.opf', 'r+b') as f:
|
||||||
raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
|
raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
|
|
||||||
return os.path.join(getcwd(), u'metadata.opf')
|
return os.path.join(getcwd(), 'metadata.opf')
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
@ -30,7 +31,7 @@ class PDFMetadata(object): # {{{
|
|||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
self.title = _(u'Unknown')
|
self.title = _(u'Unknown')
|
||||||
self.author = _(u'Unknown')
|
self.author = _(u'Unknown')
|
||||||
self.tags = u''
|
self.tags = ''
|
||||||
self.mi = mi
|
self.mi = mi
|
||||||
|
|
||||||
if mi is not None:
|
if mi is not None:
|
||||||
@ -39,7 +40,7 @@ class PDFMetadata(object): # {{{
|
|||||||
if mi.authors:
|
if mi.authors:
|
||||||
self.author = authors_to_string(mi.authors)
|
self.author = authors_to_string(mi.authors)
|
||||||
if mi.tags:
|
if mi.tags:
|
||||||
self.tags = u', '.join(mi.tags)
|
self.tags = ', '.join(mi.tags)
|
||||||
|
|
||||||
self.title = force_unicode(self.title)
|
self.title = force_unicode(self.title)
|
||||||
self.author = force_unicode(self.author)
|
self.author = force_unicode(self.author)
|
||||||
@ -242,16 +243,16 @@ class PDFOutput(OutputFormatPlugin):
|
|||||||
elif iswindows and rule.type == rule.STYLE_RULE:
|
elif iswindows and rule.type == rule.STYLE_RULE:
|
||||||
from tinycss.fonts3 import parse_font_family, serialize_font_family
|
from tinycss.fonts3 import parse_font_family, serialize_font_family
|
||||||
s = rule.style
|
s = rule.style
|
||||||
f = s.getProperty(u'font-family')
|
f = s.getProperty('font-family')
|
||||||
if f is not None:
|
if f is not None:
|
||||||
font_families = parse_font_family(css_text(f.propertyValue))
|
font_families = parse_font_family(css_text(f.propertyValue))
|
||||||
ff = [x for x in font_families if x.lower() != u'courier']
|
ff = [x for x in font_families if x.lower() != 'courier']
|
||||||
if len(ff) != len(font_families):
|
if len(ff) != len(font_families):
|
||||||
if 'courier' not in self.filtered_font_warnings:
|
if 'courier' not in self.filtered_font_warnings:
|
||||||
# See https://bugs.launchpad.net/bugs/1665835
|
# See https://bugs.launchpad.net/bugs/1665835
|
||||||
self.filtered_font_warnings.add(u'courier')
|
self.filtered_font_warnings.add('courier')
|
||||||
self.log.warn(u'Removing courier font family as it does not render on windows')
|
self.log.warn('Removing courier font family as it does not render on windows')
|
||||||
f.propertyValue.cssText = serialize_font_family(ff or [u'monospace'])
|
f.propertyValue.cssText = serialize_font_family(ff or ['monospace'])
|
||||||
|
|
||||||
def convert_text(self, oeb_book):
|
def convert_text(self, oeb_book):
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
@ -73,9 +74,9 @@ class PMLInput(InputFormatPlugin):
|
|||||||
imgs = glob.glob(os.path.join(tdir, os.path.splitext(os.path.basename(stream.name))[0] + '_img', '*.png'))
|
imgs = glob.glob(os.path.join(tdir, os.path.splitext(os.path.basename(stream.name))[0] + '_img', '*.png'))
|
||||||
# No images in Dropbook location try generic images directory
|
# No images in Dropbook location try generic images directory
|
||||||
if not imgs:
|
if not imgs:
|
||||||
imgs = glob.glob(os.path.join(os.path.join(tdir, u'images'), u'*.png'))
|
imgs = glob.glob(os.path.join(os.path.join(tdir, 'images'), '*.png'))
|
||||||
if imgs:
|
if imgs:
|
||||||
os.makedirs(os.path.join(getcwd(), u'images'))
|
os.makedirs(os.path.join(getcwd(), 'images'))
|
||||||
for img in imgs:
|
for img in imgs:
|
||||||
pimg_name = os.path.basename(img)
|
pimg_name = os.path.basename(img)
|
||||||
pimg_path = os.path.join(getcwd(), 'images', pimg_name)
|
pimg_path = os.path.join(getcwd(), 'images', pimg_name)
|
||||||
@ -99,11 +100,11 @@ class PMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
if file_ext == 'pmlz':
|
if file_ext == 'pmlz':
|
||||||
log.debug('De-compressing content to temporary directory...')
|
log.debug('De-compressing content to temporary directory...')
|
||||||
with TemporaryDirectory(u'_unpmlz') as tdir:
|
with TemporaryDirectory('_unpmlz') as tdir:
|
||||||
zf = ZipFile(stream)
|
zf = ZipFile(stream)
|
||||||
zf.extractall(tdir)
|
zf.extractall(tdir)
|
||||||
|
|
||||||
pmls = glob.glob(os.path.join(tdir, u'*.pml'))
|
pmls = glob.glob(os.path.join(tdir, '*.pml'))
|
||||||
for pml in pmls:
|
for pml in pmls:
|
||||||
html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
|
html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
|
||||||
html_path = os.path.join(getcwd(), html_name)
|
html_path = os.path.join(getcwd(), html_name)
|
||||||
@ -114,8 +115,8 @@ class PMLInput(InputFormatPlugin):
|
|||||||
toc += ttoc
|
toc += ttoc
|
||||||
images = self.get_images(stream, tdir, True)
|
images = self.get_images(stream, tdir, True)
|
||||||
else:
|
else:
|
||||||
toc = self.process_pml(stream, u'index.html')
|
toc = self.process_pml(stream, 'index.html')
|
||||||
pages.append(u'index.html')
|
pages.append('index.html')
|
||||||
|
|
||||||
if hasattr(stream, 'name'):
|
if hasattr(stream, 'name'):
|
||||||
images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name)))
|
images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name)))
|
||||||
@ -131,14 +132,14 @@ class PMLInput(InputFormatPlugin):
|
|||||||
log.debug('Reading metadata from input file...')
|
log.debug('Reading metadata from input file...')
|
||||||
mi = get_metadata(stream, 'pml')
|
mi = get_metadata(stream, 'pml')
|
||||||
if 'images/cover.png' in images:
|
if 'images/cover.png' in images:
|
||||||
mi.cover = u'images/cover.png'
|
mi.cover = 'images/cover.png'
|
||||||
opf = OPFCreator(getcwd(), mi)
|
opf = OPFCreator(getcwd(), mi)
|
||||||
log.debug('Generating manifest...')
|
log.debug('Generating manifest...')
|
||||||
opf.create_manifest(manifest_items)
|
opf.create_manifest(manifest_items)
|
||||||
opf.create_spine(pages)
|
opf.create_spine(pages)
|
||||||
opf.set_toc(toc)
|
opf.set_toc(toc)
|
||||||
with open(u'metadata.opf', 'wb') as opffile:
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
with open(u'toc.ncx', 'wb') as tocfile:
|
with open('toc.ncx', 'wb') as tocfile:
|
||||||
opf.render(opffile, tocfile, u'toc.ncx')
|
opf.render(opffile, tocfile, 'toc.ncx')
|
||||||
|
|
||||||
return os.path.join(getcwd(), u'metadata.opf')
|
return os.path.join(getcwd(), 'metadata.opf')
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import with_statement
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
@ -65,7 +65,8 @@ class RecipeInput(InputFormatPlugin):
|
|||||||
zf = ZipFile(recipe_or_file, 'r')
|
zf = ZipFile(recipe_or_file, 'r')
|
||||||
zf.extractall()
|
zf.extractall()
|
||||||
zf.close()
|
zf.close()
|
||||||
self.recipe_source = open(u'download.recipe', 'rb').read()
|
with open('download.recipe', 'rb') as f:
|
||||||
|
self.recipe_source = f.read()
|
||||||
recipe = compile_recipe(self.recipe_source)
|
recipe = compile_recipe(self.recipe_source)
|
||||||
recipe.needs_subscription = False
|
recipe.needs_subscription = False
|
||||||
self.recipe_object = recipe(opts, log, self.report_progress)
|
self.recipe_object = recipe(opts, log, self.report_progress)
|
||||||
@ -87,7 +88,8 @@ class RecipeInput(InputFormatPlugin):
|
|||||||
self.recipe_source = self.recipe_source.encode('utf-8')
|
self.recipe_source = self.recipe_source.encode('utf-8')
|
||||||
recipe = compile_recipe(self.recipe_source)
|
recipe = compile_recipe(self.recipe_source)
|
||||||
elif os.access(recipe_or_file, os.R_OK):
|
elif os.access(recipe_or_file, os.R_OK):
|
||||||
self.recipe_source = open(recipe_or_file, 'rb').read()
|
with open(recipe_or_file, 'rb') as f:
|
||||||
|
self.recipe_source = f.read()
|
||||||
recipe = compile_recipe(self.recipe_source)
|
recipe = compile_recipe(self.recipe_source)
|
||||||
log('Using custom recipe')
|
log('Using custom recipe')
|
||||||
else:
|
else:
|
||||||
@ -140,11 +142,11 @@ class RecipeInput(InputFormatPlugin):
|
|||||||
for key, val in self.recipe_object.conversion_options.items():
|
for key, val in self.recipe_object.conversion_options.items():
|
||||||
setattr(opts, key, val)
|
setattr(opts, key, val)
|
||||||
|
|
||||||
for f in os.listdir(u'.'):
|
for f in os.listdir('.'):
|
||||||
if f.endswith('.opf'):
|
if f.endswith('.opf'):
|
||||||
return os.path.abspath(f)
|
return os.path.abspath(f)
|
||||||
|
|
||||||
for f in walk(u'.'):
|
for f in walk('.'):
|
||||||
if f.endswith('.opf'):
|
if f.endswith('.opf'):
|
||||||
return os.path.abspath(f)
|
return os.path.abspath(f)
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
@ -20,7 +22,7 @@ from calibre import (extract, walk, isbytestring, filesystem_encoding,
|
|||||||
from calibre.constants import __version__
|
from calibre.constants import __version__
|
||||||
from polyglot.builtins import unicode_type, string_or_bytes, map
|
from polyglot.builtins import unicode_type, string_or_bytes, map
|
||||||
|
|
||||||
DEBUG_README=u'''
|
DEBUG_README=b'''
|
||||||
This debug directory contains snapshots of the e-book as it passes through the
|
This debug directory contains snapshots of the e-book as it passes through the
|
||||||
various stages of conversion. The stages are:
|
various stages of conversion. The stages are:
|
||||||
|
|
||||||
@ -848,7 +850,7 @@ OptionRecommendation(name='search_replace',
|
|||||||
rec = self.get_option_by_name(name)
|
rec = self.get_option_by_name(name)
|
||||||
help = getattr(rec, 'help', None)
|
help = getattr(rec, 'help', None)
|
||||||
if help is not None:
|
if help is not None:
|
||||||
return help.replace('%default', str(rec.recommended_value))
|
return help.replace('%default', unicode_type(rec.recommended_value))
|
||||||
|
|
||||||
def get_all_help(self):
|
def get_all_help(self):
|
||||||
ans = {}
|
ans = {}
|
||||||
@ -1061,8 +1063,8 @@ OptionRecommendation(name='search_replace',
|
|||||||
self.opts.debug_pipeline = os.path.abspath(self.opts.debug_pipeline)
|
self.opts.debug_pipeline = os.path.abspath(self.opts.debug_pipeline)
|
||||||
if not os.path.exists(self.opts.debug_pipeline):
|
if not os.path.exists(self.opts.debug_pipeline):
|
||||||
os.makedirs(self.opts.debug_pipeline)
|
os.makedirs(self.opts.debug_pipeline)
|
||||||
open(os.path.join(self.opts.debug_pipeline, 'README.txt'),
|
with open(os.path.join(self.opts.debug_pipeline, 'README.txt'), 'wb') as f:
|
||||||
'wb').write(DEBUG_README.encode('utf-8'))
|
f.write(DEBUG_README)
|
||||||
for x in ('input', 'parsed', 'structure', 'processed'):
|
for x in ('input', 'parsed', 'structure', 'processed'):
|
||||||
x = os.path.join(self.opts.debug_pipeline, x)
|
x = os.path.join(self.opts.debug_pipeline, x)
|
||||||
if os.path.exists(x):
|
if os.path.exists(x):
|
||||||
|
@ -75,8 +75,8 @@ def smarten_punctuation(html, log=None):
|
|||||||
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
||||||
preprocessor = HeuristicProcessor(log=log)
|
preprocessor = HeuristicProcessor(log=log)
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
start = 'calibre-smartypants-'+str(uuid4())
|
start = 'calibre-smartypants-'+unicode_type(uuid4())
|
||||||
stop = 'calibre-smartypants-'+str(uuid4())
|
stop = 'calibre-smartypants-'+unicode_type(uuid4())
|
||||||
html = html.replace('<!--', start)
|
html = html.replace('<!--', start)
|
||||||
html = html.replace('-->', stop)
|
html = html.replace('-->', stop)
|
||||||
html = preprocessor.fix_nbsp_indents(html)
|
html = preprocessor.fix_nbsp_indents(html)
|
||||||
@ -152,20 +152,20 @@ class DocAnalysis(object):
|
|||||||
maxLineLength=1900 # Discard larger than this to stay in range
|
maxLineLength=1900 # Discard larger than this to stay in range
|
||||||
buckets=20 # Each line is divided into a bucket based on length
|
buckets=20 # Each line is divided into a bucket based on length
|
||||||
|
|
||||||
# print "there are "+str(len(lines))+" lines"
|
# print("there are "+unicode_type(len(lines))+" lines")
|
||||||
# max = 0
|
# max = 0
|
||||||
# for line in self.lines:
|
# for line in self.lines:
|
||||||
# l = len(line)
|
# l = len(line)
|
||||||
# if l > max:
|
# if l > max:
|
||||||
# max = l
|
# max = l
|
||||||
# print "max line found is "+str(max)
|
# print("max line found is "+unicode_type(max))
|
||||||
# Build the line length histogram
|
# Build the line length histogram
|
||||||
hRaw = [0 for i in range(0,buckets)]
|
hRaw = [0 for i in range(0,buckets)]
|
||||||
for line in self.lines:
|
for line in self.lines:
|
||||||
l = len(line)
|
l = len(line)
|
||||||
if l > minLineLength and l < maxLineLength:
|
if l > minLineLength and l < maxLineLength:
|
||||||
l = int(l/100)
|
l = int(l // 100)
|
||||||
# print "adding "+str(l)
|
# print("adding "+unicode_type(l))
|
||||||
hRaw[l]+=1
|
hRaw[l]+=1
|
||||||
|
|
||||||
# Normalize the histogram into percents
|
# Normalize the histogram into percents
|
||||||
@ -174,8 +174,8 @@ class DocAnalysis(object):
|
|||||||
h = [float(count)/totalLines for count in hRaw]
|
h = [float(count)/totalLines for count in hRaw]
|
||||||
else:
|
else:
|
||||||
h = []
|
h = []
|
||||||
# print "\nhRaw histogram lengths are: "+str(hRaw)
|
# print("\nhRaw histogram lengths are: "+unicode_type(hRaw))
|
||||||
# print " percents are: "+str(h)+"\n"
|
# print(" percents are: "+unicode_type(h)+"\n")
|
||||||
|
|
||||||
# Find the biggest bucket
|
# Find the biggest bucket
|
||||||
maxValue = 0
|
maxValue = 0
|
||||||
@ -184,10 +184,10 @@ class DocAnalysis(object):
|
|||||||
maxValue = h[i]
|
maxValue = h[i]
|
||||||
|
|
||||||
if maxValue < percent:
|
if maxValue < percent:
|
||||||
# print "Line lengths are too variable. Not unwrapping."
|
# print("Line lengths are too variable. Not unwrapping.")
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
# print str(maxValue)+" of the lines were in one bucket"
|
# print(unicode_type(maxValue)+" of the lines were in one bucket")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@ -232,7 +232,7 @@ class Dehyphenator(object):
|
|||||||
if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None:
|
if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None:
|
||||||
lookupword = self.removeprefix.sub('', lookupword)
|
lookupword = self.removeprefix.sub('', lookupword)
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log("lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated))
|
self.log("lookup word is: "+lookupword+", orig is: " + hyphenated)
|
||||||
try:
|
try:
|
||||||
searchresult = self.html.find(lookupword.lower())
|
searchresult = self.html.find(lookupword.lower())
|
||||||
except:
|
except:
|
||||||
@ -240,33 +240,33 @@ class Dehyphenator(object):
|
|||||||
if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
|
if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
|
||||||
if self.html.find(lookupword) != -1 or searchresult != -1:
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" Cleanup:returned dehyphenated word: " + str(dehyphenated))
|
self.log(" Cleanup:returned dehyphenated word: " + dehyphenated)
|
||||||
return dehyphenated
|
return dehyphenated
|
||||||
elif self.html.find(hyphenated) != -1:
|
elif self.html.find(hyphenated) != -1:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" Cleanup:returned hyphenated word: " + str(hyphenated))
|
self.log(" Cleanup:returned hyphenated word: " + hyphenated)
|
||||||
return hyphenated
|
return hyphenated
|
||||||
else:
|
else:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf))
|
self.log(" Cleanup:returning original text "+firsthalf+" + linefeed "+secondhalf)
|
||||||
return firsthalf+'\u2014'+wraptags+secondhalf
|
return firsthalf+'\u2014'+wraptags+secondhalf
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6:
|
if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log("too short, returned hyphenated word: " + str(hyphenated))
|
self.log("too short, returned hyphenated word: " + hyphenated)
|
||||||
return hyphenated
|
return hyphenated
|
||||||
if len(firsthalf) <= 2 and len(secondhalf) <= 2:
|
if len(firsthalf) <= 2 and len(secondhalf) <= 2:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log("too short, returned hyphenated word: " + str(hyphenated))
|
self.log("too short, returned hyphenated word: " + hyphenated)
|
||||||
return hyphenated
|
return hyphenated
|
||||||
if self.html.find(lookupword) != -1 or searchresult != -1:
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" returned dehyphenated word: " + str(dehyphenated))
|
self.log(" returned dehyphenated word: " + dehyphenated)
|
||||||
return dehyphenated
|
return dehyphenated
|
||||||
else:
|
else:
|
||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log(" returned hyphenated word: " + str(hyphenated))
|
self.log(" returned hyphenated word: " + hyphenated)
|
||||||
return hyphenated
|
return hyphenated
|
||||||
|
|
||||||
def __call__(self, html, format, length=1):
|
def __call__(self, html, format, length=1):
|
||||||
@ -595,7 +595,7 @@ class HTMLPreProcessor(object):
|
|||||||
docanalysis = DocAnalysis('pdf', html)
|
docanalysis = DocAnalysis('pdf', html)
|
||||||
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
|
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
|
||||||
if length:
|
if length:
|
||||||
# print "The pdf line length returned is " + str(length)
|
# print("The pdf line length returned is " + unicode_type(length))
|
||||||
# unwrap em/en dashes
|
# unwrap em/en dashes
|
||||||
end_rules.append((re.compile(
|
end_rules.append((re.compile(
|
||||||
r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: ''))
|
r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: ''))
|
||||||
|
@ -19,7 +19,6 @@ from calibre.utils.localization import canonicalize_lang
|
|||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
||||||
from polyglot.builtins import unicode_type
|
|
||||||
|
|
||||||
|
|
||||||
def fromstring(raw, parser=RECOVER_PARSER):
|
def fromstring(raw, parser=RECOVER_PARSER):
|
||||||
@ -56,7 +55,7 @@ def read_doc_props(raw, mi, XPath):
|
|||||||
|
|
||||||
desc = XPath('//dc:description')(root)
|
desc = XPath('//dc:description')(root)
|
||||||
if desc:
|
if desc:
|
||||||
raw = etree.tostring(desc[0], method='text', encoding=unicode_type)
|
raw = etree.tostring(desc[0], method='text', encoding='unicode')
|
||||||
raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary
|
raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary
|
||||||
mi.comments = raw.strip()
|
mi.comments = raw.strip()
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ from operator import itemgetter
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.utils.icu import partition_by_first_letter, sort_key
|
from calibre.utils.icu import partition_by_first_letter, sort_key
|
||||||
from polyglot.builtins import iteritems, unicode_type, filter
|
from polyglot.builtins import iteritems, filter
|
||||||
|
|
||||||
|
|
||||||
def get_applicable_xe_fields(index, xe_fields, XPath, expand):
|
def get_applicable_xe_fields(index, xe_fields, XPath, expand):
|
||||||
@ -246,7 +246,7 @@ def polish_index_markup(index, blocks):
|
|||||||
a = block.xpath('descendant::a[1]')
|
a = block.xpath('descendant::a[1]')
|
||||||
text = ''
|
text = ''
|
||||||
if a:
|
if a:
|
||||||
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip()
|
text = etree.tostring(a[0], method='text', with_tail=False, encoding='unicode').strip()
|
||||||
if ':' in text:
|
if ':' in text:
|
||||||
path_map[block] = parts = list(filter(None, (x.strip() for x in text.split(':'))))
|
path_map[block] = parts = list(filter(None, (x.strip() for x in text.split(':'))))
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
|
@ -12,7 +12,7 @@ from lxml.etree import tostring
|
|||||||
|
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
||||||
from polyglot.builtins import iteritems, unicode_type, range
|
from polyglot.builtins import iteritems, range
|
||||||
|
|
||||||
|
|
||||||
def from_headings(body, log, namespace):
|
def from_headings(body, log, namespace):
|
||||||
@ -93,7 +93,7 @@ def link_to_txt(a, styles, object_map):
|
|||||||
if rs.css.get('display', None) == 'none':
|
if rs.css.get('display', None) == 'none':
|
||||||
a.remove(child)
|
a.remove(child)
|
||||||
|
|
||||||
return tostring(a, method='text', with_tail=False, encoding=unicode_type).strip()
|
return tostring(a, method='text', with_tail=False, encoding='unicode').strip()
|
||||||
|
|
||||||
|
|
||||||
def from_toc(docx, link_map, styles, object_map, log, namespace):
|
def from_toc(docx, link_map, styles, object_map, log, namespace):
|
||||||
|
@ -14,7 +14,7 @@ from lxml import etree
|
|||||||
from calibre.ebooks import parse_css_length
|
from calibre.ebooks import parse_css_length
|
||||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||||
from calibre.utils.localization import lang_as_iso639_1
|
from calibre.utils.localization import lang_as_iso639_1
|
||||||
from polyglot.builtins import iteritems, unicode_type, filter
|
from polyglot.builtins import iteritems, filter
|
||||||
from tinycss.css21 import CSS21Parser
|
from tinycss.css21 import CSS21Parser
|
||||||
|
|
||||||
css_parser = CSS21Parser()
|
css_parser = CSS21Parser()
|
||||||
@ -46,7 +46,7 @@ def bmap(x):
|
|||||||
|
|
||||||
|
|
||||||
def is_dropcaps(html_tag, tag_style):
|
def is_dropcaps(html_tag, tag_style):
|
||||||
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode_type, with_tail=False)) < 5 and tag_style['float'] == 'left'
|
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding='unicode', with_tail=False)) < 5 and tag_style['float'] == 'left'
|
||||||
|
|
||||||
|
|
||||||
class CombinedStyle(object):
|
class CombinedStyle(object):
|
||||||
|
@ -65,7 +65,7 @@ class FB2MLizer(object):
|
|||||||
output = self.clean_text(u''.join(output))
|
output = self.clean_text(u''.join(output))
|
||||||
|
|
||||||
if self.opts.pretty_print:
|
if self.opts.pretty_print:
|
||||||
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode_type, pretty_print=True)
|
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding='unicode', pretty_print=True)
|
||||||
else:
|
else:
|
||||||
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
|
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ NAMESPACES = {
|
|||||||
'xlink' : 'http://www.w3.org/1999/xlink'
|
'xlink' : 'http://www.w3.org/1999/xlink'
|
||||||
}
|
}
|
||||||
|
|
||||||
tostring = partial(etree.tostring, method='text', encoding=unicode_type)
|
tostring = partial(etree.tostring, method='text', encoding='unicode')
|
||||||
|
|
||||||
|
|
||||||
def XLINK(tag):
|
def XLINK(tag):
|
||||||
@ -448,7 +448,7 @@ def ensure_namespace(doc):
|
|||||||
break
|
break
|
||||||
if bare_tags:
|
if bare_tags:
|
||||||
import re
|
import re
|
||||||
raw = etree.tostring(doc, encoding=unicode_type)
|
raw = etree.tostring(doc, encoding='unicode')
|
||||||
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
|
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
|
||||||
doc = etree.fromstring(raw)
|
doc = etree.fromstring(raw)
|
||||||
return doc
|
return doc
|
||||||
|
@ -893,7 +893,7 @@ class OPF(object): # {{{
|
|||||||
ans = None
|
ans = None
|
||||||
for match in self.pubdate_path(self.metadata):
|
for match in self.pubdate_path(self.metadata):
|
||||||
try:
|
try:
|
||||||
val = parse_date(etree.tostring(match, encoding=unicode_type,
|
val = parse_date(etree.tostring(match, encoding='unicode',
|
||||||
method='text', with_tail=False).strip())
|
method='text', with_tail=False).strip())
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
@ -906,7 +906,7 @@ class OPF(object): # {{{
|
|||||||
least_val = least_elem = None
|
least_val = least_elem = None
|
||||||
for match in self.pubdate_path(self.metadata):
|
for match in self.pubdate_path(self.metadata):
|
||||||
try:
|
try:
|
||||||
cval = parse_date(etree.tostring(match, encoding=unicode_type,
|
cval = parse_date(etree.tostring(match, encoding='unicode',
|
||||||
method='text', with_tail=False).strip())
|
method='text', with_tail=False).strip())
|
||||||
except:
|
except:
|
||||||
match.getparent().remove(match)
|
match.getparent().remove(match)
|
||||||
@ -964,7 +964,7 @@ class OPF(object): # {{{
|
|||||||
for attr, val in iteritems(x.attrib):
|
for attr, val in iteritems(x.attrib):
|
||||||
if attr.endswith('scheme'):
|
if attr.endswith('scheme'):
|
||||||
typ = icu_lower(val)
|
typ = icu_lower(val)
|
||||||
val = etree.tostring(x, with_tail=False, encoding=unicode_type,
|
val = etree.tostring(x, with_tail=False, encoding='unicode',
|
||||||
method='text').strip()
|
method='text').strip()
|
||||||
if val and typ not in ('calibre', 'uuid'):
|
if val and typ not in ('calibre', 'uuid'):
|
||||||
if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
|
if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
|
||||||
@ -973,7 +973,7 @@ class OPF(object): # {{{
|
|||||||
found_scheme = True
|
found_scheme = True
|
||||||
break
|
break
|
||||||
if not found_scheme:
|
if not found_scheme:
|
||||||
val = etree.tostring(x, with_tail=False, encoding=unicode_type,
|
val = etree.tostring(x, with_tail=False, encoding='unicode',
|
||||||
method='text').strip()
|
method='text').strip()
|
||||||
if val.lower().startswith('urn:isbn:'):
|
if val.lower().startswith('urn:isbn:'):
|
||||||
val = check_isbn(val.split(':')[-1])
|
val = check_isbn(val.split(':')[-1])
|
||||||
|
@ -210,7 +210,7 @@ class TOC(list):
|
|||||||
text = u''
|
text = u''
|
||||||
for txt in txt_path(nl):
|
for txt in txt_path(nl):
|
||||||
text += etree.tostring(txt, method='text',
|
text += etree.tostring(txt, method='text',
|
||||||
encoding=unicode_type, with_tail=False)
|
encoding='unicode', with_tail=False)
|
||||||
content = content_path(np)
|
content = content_path(np)
|
||||||
if content and text:
|
if content and text:
|
||||||
content = content[0]
|
content = content[0]
|
||||||
|
@ -235,7 +235,7 @@ class KF8Writer(object):
|
|||||||
root = self.data(item)
|
root = self.data(item)
|
||||||
|
|
||||||
for svg in XPath('//svg:svg')(root):
|
for svg in XPath('//svg:svg')(root):
|
||||||
raw = etree.tostring(svg, encoding=unicode_type, with_tail=False)
|
raw = etree.tostring(svg, encoding='unicode', with_tail=False)
|
||||||
idx = len(self.flows)
|
idx = len(self.flows)
|
||||||
self.flows.append(raw)
|
self.flows.append(raw)
|
||||||
p = svg.getparent()
|
p = svg.getparent()
|
||||||
|
@ -400,7 +400,7 @@ def xml2str(root, pretty_print=False, strip_comments=False, with_tail=True):
|
|||||||
|
|
||||||
|
|
||||||
def xml2text(elem, pretty_print=False):
|
def xml2text(elem, pretty_print=False):
|
||||||
return etree.tostring(elem, method='text', encoding=unicode_type, with_tail=False, pretty_print=pretty_print)
|
return etree.tostring(elem, method='text', encoding='unicode', with_tail=False, pretty_print=pretty_print)
|
||||||
|
|
||||||
|
|
||||||
def escape_cdata(root):
|
def escape_cdata(root):
|
||||||
|
@ -112,7 +112,7 @@ def _html4_parse(data):
|
|||||||
for elem in data.iter(tag=etree.Comment):
|
for elem in data.iter(tag=etree.Comment):
|
||||||
if elem.text:
|
if elem.text:
|
||||||
elem.text = elem.text.strip('-')
|
elem.text = elem.text.strip('-')
|
||||||
data = etree.tostring(data, encoding=unicode_type)
|
data = etree.tostring(data, encoding='unicode')
|
||||||
|
|
||||||
# Setting huge_tree=True causes crashes in windows with large files
|
# Setting huge_tree=True causes crashes in windows with large files
|
||||||
parser = etree.XMLParser(no_network=True)
|
parser = etree.XMLParser(no_network=True)
|
||||||
@ -273,7 +273,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
|||||||
if not namespace(data.tag):
|
if not namespace(data.tag):
|
||||||
log.warn('Forcing', filename, 'into XHTML namespace')
|
log.warn('Forcing', filename, 'into XHTML namespace')
|
||||||
data.attrib['xmlns'] = XHTML_NS
|
data.attrib['xmlns'] = XHTML_NS
|
||||||
data = etree.tostring(data, encoding=unicode_type)
|
data = etree.tostring(data, encoding='unicode')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = etree.fromstring(data, parser=parser)
|
data = etree.fromstring(data, parser=parser)
|
||||||
|
@ -120,7 +120,7 @@ def get_element_text(elem, resolve_property, resolve_pseudo_property, capitalize
|
|||||||
if before:
|
if before:
|
||||||
ans.append(before)
|
ans.append(before)
|
||||||
if for_pseudo is not None:
|
if for_pseudo is not None:
|
||||||
ans.append(tostring(elem, method='text', encoding=unicode_type, with_tail=False))
|
ans.append(tostring(elem, method='text', encoding='unicode', with_tail=False))
|
||||||
else:
|
else:
|
||||||
if elem.text:
|
if elem.text:
|
||||||
ans.append(elem.text)
|
ans.append(elem.text)
|
||||||
|
@ -142,7 +142,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name):
|
|||||||
text = ''
|
text = ''
|
||||||
for txt in child_xpath(nl, 'text'):
|
for txt in child_xpath(nl, 'text'):
|
||||||
text += etree.tostring(txt, method='text',
|
text += etree.tostring(txt, method='text',
|
||||||
encoding=unicode_type, with_tail=False)
|
encoding='unicode', with_tail=False)
|
||||||
content = child_xpath(navpoint, 'content')
|
content = child_xpath(navpoint, 'content')
|
||||||
if content:
|
if content:
|
||||||
content = content[0]
|
content = content[0]
|
||||||
@ -190,7 +190,7 @@ def parse_ncx(container, ncx_name):
|
|||||||
def add_from_li(container, li, parent, nav_name):
|
def add_from_li(container, li, parent, nav_name):
|
||||||
dest = frag = text = None
|
dest = frag = text = None
|
||||||
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
||||||
text = etree.tostring(x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
|
text = etree.tostring(x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
|
||||||
href = x.get('href')
|
href = x.get('href')
|
||||||
if href:
|
if href:
|
||||||
dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
|
dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
|
||||||
@ -225,7 +225,7 @@ def parse_nav(container, nav_name):
|
|||||||
if ol is not None:
|
if ol is not None:
|
||||||
process_nav_node(container, ol, toc_root, nav_name)
|
process_nav_node(container, ol, toc_root, nav_name)
|
||||||
for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
|
for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
|
||||||
text = etree.tostring(h, method='text', encoding=unicode_type, with_tail=False) or h.get('title')
|
text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title')
|
||||||
if text:
|
if text:
|
||||||
toc_root.toc_title = text
|
toc_root.toc_title = text
|
||||||
break
|
break
|
||||||
@ -323,7 +323,7 @@ def get_nav_landmarks(container):
|
|||||||
for a in li.iterdescendants(XHTML('a')):
|
for a in li.iterdescendants(XHTML('a')):
|
||||||
href, rtype = a.get('href'), a.get(et)
|
href, rtype = a.get('href'), a.get(et)
|
||||||
if href:
|
if href:
|
||||||
title = etree.tostring(a, method='text', encoding=unicode_type, with_tail=False).strip()
|
title = etree.tostring(a, method='text', encoding='unicode', with_tail=False).strip()
|
||||||
href, frag = href.partition('#')[::2]
|
href, frag = href.partition('#')[::2]
|
||||||
name = container.href_to_name(href, nav)
|
name = container.href_to_name(href, nav)
|
||||||
if container.has_name(name):
|
if container.has_name(name):
|
||||||
|
@ -428,7 +428,7 @@ class OEBReader(object):
|
|||||||
'descendant::calibre:meta[@name = "description"]')
|
'descendant::calibre:meta[@name = "description"]')
|
||||||
if descriptionElement:
|
if descriptionElement:
|
||||||
description = etree.tostring(descriptionElement[0],
|
description = etree.tostring(descriptionElement[0],
|
||||||
method='text', encoding=unicode_type).strip()
|
method='text', encoding='unicode').strip()
|
||||||
if not description:
|
if not description:
|
||||||
description = None
|
description = None
|
||||||
else:
|
else:
|
||||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os, re
|
import os, re
|
||||||
from calibre.utils.date import isoformat, now
|
from calibre.utils.date import isoformat, now
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
from polyglot.builtins import iteritems, unicode_type, filter
|
from polyglot.builtins import iteritems, filter
|
||||||
filter
|
filter
|
||||||
|
|
||||||
|
|
||||||
@ -206,7 +206,7 @@ class MergeMetadata(object):
|
|||||||
for item in affected_items:
|
for item in affected_items:
|
||||||
body = XPath('//h:body')(item.data)
|
body = XPath('//h:body')(item.data)
|
||||||
if body:
|
if body:
|
||||||
text = etree.tostring(body[0], method='text', encoding=unicode_type)
|
text = etree.tostring(body[0], method='text', encoding='unicode')
|
||||||
else:
|
else:
|
||||||
text = ''
|
text = ''
|
||||||
text = re.sub(r'\s+', '', text)
|
text = re.sub(r'\s+', '', text)
|
||||||
|
@ -20,7 +20,7 @@ from calibre.ebooks.epub import rules
|
|||||||
from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
|
from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
|
||||||
urldefrag, rewrite_links, urlunquote, XHTML, urlnormalize)
|
urldefrag, rewrite_links, urlunquote, XHTML, urlnormalize)
|
||||||
from calibre.ebooks.oeb.polish.split import do_split
|
from calibre.ebooks.oeb.polish.split import do_split
|
||||||
from polyglot.builtins import iteritems, unicode_type, range, map
|
from polyglot.builtins import iteritems, range, map
|
||||||
from css_selectors import Select, SelectorError
|
from css_selectors import Select, SelectorError
|
||||||
|
|
||||||
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
||||||
@ -295,7 +295,7 @@ class FlowSplitter(object):
|
|||||||
if body is None:
|
if body is None:
|
||||||
return False
|
return False
|
||||||
txt = re.sub(u'\\s+|\\xa0', '',
|
txt = re.sub(u'\\s+|\\xa0', '',
|
||||||
etree.tostring(body, method='text', encoding=unicode_type))
|
etree.tostring(body, method='text', encoding='unicode'))
|
||||||
if len(txt) > 1:
|
if len(txt) > 1:
|
||||||
return False
|
return False
|
||||||
for img in root.xpath('//h:img', namespaces=NAMESPACES):
|
for img in root.xpath('//h:img', namespaces=NAMESPACES):
|
||||||
|
@ -11,7 +11,7 @@ from itertools import count
|
|||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from polyglot.builtins import unicode_type, range, map
|
from polyglot.builtins import range, map
|
||||||
|
|
||||||
|
|
||||||
class Font(object):
|
class Font(object):
|
||||||
@ -76,10 +76,10 @@ class Text(Element):
|
|||||||
|
|
||||||
text.tail = ''
|
text.tail = ''
|
||||||
self.text_as_string = etree.tostring(text, method='text',
|
self.text_as_string = etree.tostring(text, method='text',
|
||||||
encoding=unicode_type)
|
encoding='unicode')
|
||||||
self.raw = text.text if text.text else u''
|
self.raw = text.text if text.text else u''
|
||||||
for x in text.iterchildren():
|
for x in text.iterchildren():
|
||||||
self.raw += etree.tostring(x, method='xml', encoding=unicode_type)
|
self.raw += etree.tostring(x, method='xml', encoding='unicode')
|
||||||
self.average_character_width = self.width/len(self.text_as_string)
|
self.average_character_width = self.width/len(self.text_as_string)
|
||||||
|
|
||||||
def coalesce(self, other, page_number):
|
def coalesce(self, other, page_number):
|
||||||
|
@ -135,7 +135,7 @@ class PMLMLizer(object):
|
|||||||
text = [u'']
|
text = [u'']
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to PML markup...' % item.href)
|
self.log.debug('Converting %s to PML markup...' % item.href)
|
||||||
content = unicode_type(etree.tostring(item.data, encoding=unicode_type))
|
content = etree.tostring(item.data, encoding='unicode')
|
||||||
content = self.prepare_text(content)
|
content = self.prepare_text(content)
|
||||||
content = etree.fromstring(content)
|
content = etree.fromstring(content)
|
||||||
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
|
@ -120,7 +120,7 @@ class RTFMLizer(object):
|
|||||||
self.log.debug('Converting %s to RTF markup...' % item.href)
|
self.log.debug('Converting %s to RTF markup...' % item.href)
|
||||||
# Removing comments is needed as comments with -- inside them can
|
# Removing comments is needed as comments with -- inside them can
|
||||||
# cause fromstring() to fail
|
# cause fromstring() to fail
|
||||||
content = re.sub(u'<!--.*?-->', u'', etree.tostring(item.data, encoding=unicode_type), flags=re.DOTALL)
|
content = re.sub(u'<!--.*?-->', u'', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL)
|
||||||
content = self.remove_newlines(content)
|
content = self.remove_newlines(content)
|
||||||
content = self.remove_tabs(content)
|
content = self.remove_tabs(content)
|
||||||
content = etree.fromstring(content)
|
content = etree.fromstring(content)
|
||||||
|
@ -85,7 +85,7 @@ class SNBMLizer(object):
|
|||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
output = [u'']
|
output = [u'']
|
||||||
stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
content = unicode_type(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode_type))
|
content = etree.tostring(self.item.data.find(XHTML('body')), encoding='unicode')
|
||||||
# content = self.remove_newlines(content)
|
# content = self.remove_newlines(content)
|
||||||
trees = {}
|
trees = {}
|
||||||
for subitem, subtitle in self.subitems:
|
for subitem, subtitle in self.subitems:
|
||||||
|
@ -12,7 +12,7 @@ Transform OEB content into plain text
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from polyglot.builtins import unicode_type, string_or_bytes
|
from polyglot.builtins import string_or_bytes
|
||||||
|
|
||||||
|
|
||||||
BLOCK_TAGS = [
|
BLOCK_TAGS = [
|
||||||
@ -74,7 +74,7 @@ class TXTMLizer(object):
|
|||||||
for x in item.data.iterdescendants(etree.Comment):
|
for x in item.data.iterdescendants(etree.Comment):
|
||||||
if x.text and '--' in x.text:
|
if x.text and '--' in x.text:
|
||||||
x.text = x.text.replace('--', '__')
|
x.text = x.text.replace('--', '__')
|
||||||
content = unicode_type(etree.tostring(item.data, encoding=unicode_type))
|
content = etree.tostring(item.data, encoding='unicode')
|
||||||
content = self.remove_newlines(content)
|
content = self.remove_newlines(content)
|
||||||
content = etree.fromstring(content)
|
content = etree.fromstring(content)
|
||||||
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
|
@ -360,7 +360,7 @@ class EditorWidget(QWebView, LineEditECM): # {{{
|
|||||||
for body in root.xpath('//body'):
|
for body in root.xpath('//body'):
|
||||||
if body.text:
|
if body.text:
|
||||||
elems.append(body.text)
|
elems.append(body.text)
|
||||||
elems += [html.tostring(x, encoding=unicode_type) for x in body if
|
elems += [html.tostring(x, encoding='unicode') for x in body if
|
||||||
x.tag not in ('script', 'style')]
|
x.tag not in ('script', 'style')]
|
||||||
|
|
||||||
if len(elems) > 1:
|
if len(elems) > 1:
|
||||||
|
@ -840,7 +840,7 @@ class PluginUpdaterDialog(SizePersistedDialog):
|
|||||||
continue
|
continue
|
||||||
if heading_node.text_content().lower().find('version history') != -1:
|
if heading_node.text_content().lower().find('version history') != -1:
|
||||||
div_node = spoiler_node.xpath('div')[0]
|
div_node = spoiler_node.xpath('div')[0]
|
||||||
text = html.tostring(div_node, method='html', encoding=unicode_type)
|
text = html.tostring(div_node, method='html', encoding='unicode')
|
||||||
return re.sub(r'<div\s.*?>', '<div>', text)
|
return re.sub(r'<div\s.*?>', '<div>', text)
|
||||||
except:
|
except:
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
|
@ -65,7 +65,7 @@ def beautify_text(raw, syntax):
|
|||||||
else:
|
else:
|
||||||
root = parse(raw, line_numbers=False)
|
root = parse(raw, line_numbers=False)
|
||||||
pretty_html_tree(None, root)
|
pretty_html_tree(None, root)
|
||||||
return etree.tostring(root, encoding=unicode_type)
|
return etree.tostring(root, encoding='unicode')
|
||||||
|
|
||||||
|
|
||||||
class LineNumberMap(dict): # {{{
|
class LineNumberMap(dict): # {{{
|
||||||
|
@ -16,7 +16,7 @@ from calibre.gui2.tweak_book import tprefs, editors, current_container
|
|||||||
from calibre.gui2.tweak_book.search import get_search_regex, InvalidRegex, initialize_search_request
|
from calibre.gui2.tweak_book.search import get_search_regex, InvalidRegex, initialize_search_request
|
||||||
from calibre.gui2.tweak_book.widgets import BusyCursor
|
from calibre.gui2.tweak_book.widgets import BusyCursor
|
||||||
from calibre.gui2.widgets2 import HistoryComboBox
|
from calibre.gui2.widgets2 import HistoryComboBox
|
||||||
from polyglot.builtins import iteritems, unicode_type, error_message
|
from polyglot.builtins import iteritems, error_message
|
||||||
|
|
||||||
# UI {{{
|
# UI {{{
|
||||||
|
|
||||||
@ -179,7 +179,7 @@ def run_text_search(search, current_editor, current_editor_name, searchable_name
|
|||||||
else:
|
else:
|
||||||
root = current_container().parsed(fname)
|
root = current_container().parsed(fname)
|
||||||
if hasattr(root, 'xpath'):
|
if hasattr(root, 'xpath'):
|
||||||
raw = tostring(root, method='text', encoding=unicode_type, with_tail=True)
|
raw = tostring(root, method='text', encoding='unicode', with_tail=True)
|
||||||
else:
|
else:
|
||||||
raw = current_container().raw_data(fname)
|
raw = current_container().raw_data(fname)
|
||||||
if pat.search(raw) is not None:
|
if pat.search(raw) is not None:
|
||||||
|
@ -10,7 +10,7 @@ from collections import defaultdict
|
|||||||
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.utils.icu import numeric_sort_key
|
from calibre.utils.icu import numeric_sort_key
|
||||||
from polyglot.builtins import iteritems, unicode_type, string_or_bytes
|
from polyglot.builtins import iteritems, string_or_bytes
|
||||||
from polyglot.plistlib import loads
|
from polyglot.plistlib import loads
|
||||||
|
|
||||||
application_locations = ('/Applications', '~/Applications', '~/Desktop')
|
application_locations = ('/Applications', '~/Applications', '~/Desktop')
|
||||||
@ -30,8 +30,8 @@ def generate_public_uti_map():
|
|||||||
for table in tables:
|
for table in tables:
|
||||||
for tr in table.xpath('descendant::tr')[1:]:
|
for tr in table.xpath('descendant::tr')[1:]:
|
||||||
td = tr.xpath('descendant::td')
|
td = tr.xpath('descendant::td')
|
||||||
identifier = etree.tostring(td[0], method='text', encoding=unicode_type).strip()
|
identifier = etree.tostring(td[0], method='text', encoding='unicode').strip()
|
||||||
tags = etree.tostring(td[2], method='text', encoding=unicode_type).strip()
|
tags = etree.tostring(td[2], method='text', encoding='unicode').strip()
|
||||||
identifier = identifier.split()[0].replace('\u200b', '')
|
identifier = identifier.split()[0].replace('\u200b', '')
|
||||||
exts = [x.strip()[1:].lower() for x in tags.split(',') if x.strip().startswith('.')]
|
exts = [x.strip()[1:].lower() for x in tags.split(',') if x.strip().startswith('.')]
|
||||||
for ext in exts:
|
for ext in exts:
|
||||||
|
@ -43,7 +43,7 @@ class Article(object):
|
|||||||
if summary and '<' in summary:
|
if summary and '<' in summary:
|
||||||
try:
|
try:
|
||||||
s = html.fragment_fromstring(summary, create_parent=True)
|
s = html.fragment_fromstring(summary, create_parent=True)
|
||||||
summary = html.tostring(s, method='text', encoding=unicode_type)
|
summary = html.tostring(s, method='text', encoding='unicode')
|
||||||
except:
|
except:
|
||||||
print('Failed to process article summary, deleting:')
|
print('Failed to process article summary, deleting:')
|
||||||
print(summary.encode('utf-8'))
|
print(summary.encode('utf-8'))
|
||||||
|
@ -743,7 +743,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
heading.text = extracted_title
|
heading.text = extracted_title
|
||||||
body.insert(0, heading)
|
body.insert(0, heading)
|
||||||
|
|
||||||
raw_html = tostring(root, encoding=unicode_type)
|
raw_html = tostring(root, encoding='unicode')
|
||||||
|
|
||||||
return raw_html
|
return raw_html
|
||||||
|
|
||||||
@ -1667,7 +1667,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
return tag
|
return tag
|
||||||
if callable(getattr(tag, 'xpath', None)) and not hasattr(tag, 'contents'): # a lxml tag
|
if callable(getattr(tag, 'xpath', None)) and not hasattr(tag, 'contents'): # a lxml tag
|
||||||
from lxml.etree import tostring
|
from lxml.etree import tostring
|
||||||
ans = tostring(tag, method='text', encoding=unicode_type, with_tail=False)
|
ans = tostring(tag, method='text', encoding='unicode', with_tail=False)
|
||||||
else:
|
else:
|
||||||
strings = []
|
strings = []
|
||||||
for item in tag.contents:
|
for item in tag.contents:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user