From 7cdb090057139c77e28161943ef5df5c505666d9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 1 Feb 2014 12:12:03 +0530 Subject: [PATCH] Diff tool: When detecting encoding of text files, look for an encoding declaration at the top in the format used by vim/emacs etc. --- src/calibre/gui2/tweak_book/diff/main.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/tweak_book/diff/main.py b/src/calibre/gui2/tweak_book/diff/main.py index 75a61bb5af..65f3db3180 100644 --- a/src/calibre/gui2/tweak_book/diff/main.py +++ b/src/calibre/gui2/tweak_book/diff/main.py @@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2014, Kovid Goyal ' -import sys, os +import sys, os, re from functools import partial from PyQt4.Qt import ( @@ -93,6 +93,7 @@ def changed_files(list_of_names1, list_of_names2, get_data1, get_data2): added_names.add(name) return cache, changed_names, renamed_names, removed_names, added_names + def get_decoded_raw(name): from calibre.ebooks.chardet import xml_to_unicode, force_encoding with open(name, 'rb') as f: @@ -107,7 +108,11 @@ def get_decoded_raw(name): if syntax in {'html', 'xml'}: raw = xml_to_unicode(raw, verbose=True)[0] else: - enc = force_encoding(raw, verbose=True) + m = re.search(r"coding[:=]\s*([-\w.]+)", raw[:1024], flags=re.I) + if m is not None: + enc = m.group(1) + else: + enc = force_encoding(raw, verbose=True) try: raw = raw.decode(enc) except ValueError: