mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
use entity_to_unicode properly
This commit is contained in:
parent
4ac041cace
commit
bebf905648
@ -9,8 +9,10 @@ Transform OEB content into FB2 markup
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
|
||||||
|
from calibre import entity_to_unicode
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||||
@ -75,7 +77,13 @@ class FB2MLizer(object):
|
|||||||
return images
|
return images
|
||||||
|
|
||||||
def clean_text(self, text):
|
def clean_text(self, text):
|
||||||
return text.replace('&', '')
|
for entity in set(re.findall('&.+?;', text)):
|
||||||
|
mo = re.search('(%s)' % entity[1:-1], text)
|
||||||
|
text = text.replace(entity, entity_to_unicode(mo))
|
||||||
|
|
||||||
|
text = text.replace('&', '')
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer, tag_stack=[]):
|
def dump_text(self, elem, stylizer, tag_stack=[]):
|
||||||
if not isinstance(elem.tag, basestring) \
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
@ -88,7 +88,7 @@ class PMLMLizer(object):
|
|||||||
|
|
||||||
def add_page_anchor(self, href):
|
def add_page_anchor(self, href):
|
||||||
href = os.path.splitext(os.path.basename(href))[0]
|
href = os.path.splitext(os.path.basename(href))[0]
|
||||||
return '\\Q="%s"' % href
|
return u'\\Q="%s"' % href
|
||||||
|
|
||||||
def clean_text(self, text):
|
def clean_text(self, text):
|
||||||
# Remove excess spaces at beginning and end of lines
|
# Remove excess spaces at beginning and end of lines
|
||||||
@ -110,7 +110,8 @@ class PMLMLizer(object):
|
|||||||
text = text.replace('\\Q="%s"' % unused, '')
|
text = text.replace('\\Q="%s"' % unused, '')
|
||||||
|
|
||||||
for entity in set(re.findall('&.+?;', text)):
|
for entity in set(re.findall('&.+?;', text)):
|
||||||
text = text.replace(entity, entity_to_unicode(entity[1:-1]))
|
mo = re.search('(%s)' % entity[1:-1], text)
|
||||||
|
text = text.replace(entity, entity_to_unicode(mo))
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
'''
|
|
||||||
Write content to TXT.
|
|
||||||
'''
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, re, sys
|
'''
|
||||||
|
Write content to TXT.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre import entity_to_unicode
|
||||||
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
@ -83,6 +85,11 @@ class TxtWriter(object):
|
|||||||
for symbol in HTML_SYMBOLS:
|
for symbol in HTML_SYMBOLS:
|
||||||
for code in HTML_SYMBOLS[symbol]:
|
for code in HTML_SYMBOLS[symbol]:
|
||||||
content = content.replace(code, symbol)
|
content = content.replace(code, symbol)
|
||||||
|
|
||||||
|
for entity in set(re.findall('&.+?;', content)):
|
||||||
|
mo = re.search('(%s)' % entity[1:-1], content)
|
||||||
|
content = content.replace(entity, entity_to_unicode(mo))
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def cleanup_text(self, text):
|
def cleanup_text(self, text):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user