From 78e28cbe9e8fa5d67acf0126cc38ddf9ddcb5d3a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 4 Sep 2014 15:45:24 +0530
Subject: [PATCH] When detecting the encoding of HTML documents, if the
 document contains multiple charset declarations, prefer the HTML 5 syntax to
 the HTML 4 syntax. Fixes #1364961 [Unicode Conversion on Amazon after Release
 2.x](https://bugs.launchpad.net/calibre/+bug/1364961)

---
 src/calibre/ebooks/chardet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/chardet.py b/src/calibre/ebooks/chardet.py
index 61aefafdac..a550b4265d 100644
--- a/src/calibre/ebooks/chardet.py
+++ b/src/calibre/ebooks/chardet.py
@@ -12,10 +12,10 @@ import re, codecs
 ENCODING_PATS = [
     # XML declaration
     re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE),
-    # HTML 4 Pragma directive
-    re.compile(r'''<meta\s+?[^<>]*?content\s*=\s*['"][^'"]*?charset=([-_a-z0-9]+)[^'"]*?['"][^<>]*>(?:\s*</meta>){0,1}''', re.IGNORECASE),
     # HTML 5 charset
     re.compile(r'''<meta\s+charset=['"]([-_a-z0-9]+)['"][^<>]*>(?:\s*</meta>){0,1}''', re.IGNORECASE),
+    # HTML 4 Pragma directive
+    re.compile(r'''<meta\s+?[^<>]*?content\s*=\s*['"][^'"]*?charset=([-_a-z0-9]+)[^'"]*?['"][^<>]*>(?:\s*</meta>){0,1}''', re.IGNORECASE),
 ]
 ENTITY_PATTERN = re.compile(r'&(\S+?);')