mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: When editing HTML files that have charset encoding declarations, automatically change the declared encoding (if any) to UTF-8 on save, since the editor always saves files in the UTF-8 encoding.
This prevents a mismatch between the declared encoding and the actual encoding if the HTML file was originally in an encoding other than UTF-8.
This commit is contained in:
parent
78be882aff
commit
01d2a6df56
@ -19,15 +19,31 @@ ENCODING_PATS = [
|
||||
]
|
||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||
|
||||
def strip_encoding_declarations(raw):
|
||||
limit = 50*1024
|
||||
def strip_encoding_declarations(raw, limit=50*1024):
|
||||
prefix = raw[:limit]
|
||||
suffix = raw[limit:]
|
||||
for pat in ENCODING_PATS:
|
||||
prefix = raw[:limit]
|
||||
suffix = raw[limit:]
|
||||
prefix = pat.sub('', prefix)
|
||||
raw = prefix + suffix
|
||||
raw = prefix + suffix
|
||||
return raw
|
||||
|
||||
def replace_encoding_declarations(raw, enc='utf-8', limit=50*1024):
|
||||
prefix = raw[:limit]
|
||||
suffix = raw[limit:]
|
||||
changed = [False]
|
||||
def sub(m):
|
||||
ans = m.group()
|
||||
if m.group(1).lower() != enc.lower():
|
||||
changed[0] = True
|
||||
start, end = m.start(1) - m.start(0), m.end(1) - m.end(0)
|
||||
ans = ans[:start] + enc + ans[end:]
|
||||
return ans
|
||||
|
||||
for pat in ENCODING_PATS:
|
||||
prefix = pat.sub(sub, prefix)
|
||||
raw = prefix + suffix
|
||||
return raw, changed[0]
|
||||
|
||||
def substitute_entites(raw):
|
||||
from calibre import xml_entity_to_unicode
|
||||
return ENTITY_PATTERN.sub(xml_entity_to_unicode, raw)
|
||||
|
@ -15,6 +15,7 @@ from PyQt4.Qt import (
|
||||
|
||||
from calibre import prints
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.ebooks.chardet import replace_encoding_declarations
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2.tweak_book import actions, current_container, tprefs, dictionaries, editor_toolbar_actions
|
||||
from calibre.gui2.tweak_book.editor import SPELL_PROPERTY
|
||||
@ -136,6 +137,9 @@ class Editor(QMainWindow):
|
||||
def data(self):
|
||||
def fget(self):
|
||||
ans = self.get_raw_data()
|
||||
ans, changed = replace_encoding_declarations(ans, enc='utf-8', limit=4*1024)
|
||||
if changed:
|
||||
self.data = ans
|
||||
return ans.encode('utf-8')
|
||||
def fset(self, val):
|
||||
self.editor.load_text(val, syntax=self.syntax)
|
||||
|
Loading…
x
Reference in New Issue
Block a user