From 0fda25f369df54519004e106762eff8d6dadebba Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Mon, 1 Aug 2022 07:56:45 +0530
Subject: [PATCH] Use simple HTML markup for comments formatting from Google
Books
---
src/calibre/ebooks/metadata/sources/google.py | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 3be9c5ba00..11d7cf9b25 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -15,7 +15,7 @@ try:
except ImportError:
from Queue import Empty, Queue
-from calibre import as_unicode, replace_entities
+from calibre import as_unicode, replace_entities, prepare_string_for_xml
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import authors_to_string, check_isbn
from calibre.ebooks.metadata.book.base import Metadata
@@ -36,7 +36,13 @@ def pretty_google_books_comments(raw):
# Paragraphs in the comments are removed but whatever software googl uses
# to do this does not insert a space so we often find the pattern
# word.Capital in the comments which can be used to find paragraph markers.
- raw = re.sub(r'([a-z)"”])\.([A-Z("“])', '\\1.\n\n\\2', raw)
+ parts = []
+ for x in re.split(r'([a-z)"”])(\.)([A-Z("“])', raw):
+ if x == '.':
+ parts.append('.
\n\n')
+ else:
+ parts.append(prepare_string_for_xml(x))
+ raw = '
' + ''.join(parts) + '
'
return raw
@@ -196,7 +202,7 @@ def to_metadata(browser, log, entry_, timeout, running_a_test=False): # {{{
class GoogleBooks(Source):
name = 'Google'
- version = (1, 0, 8)
+ version = (1, 1, 0)
minimum_calibre_version = (2, 80, 0)
description = _('Downloads metadata and covers from Google Books')
@@ -492,7 +498,7 @@ class GoogleBooks(Source):
result_queue.put(ans)
return
except Exception:
- self.log.exception('Failed to get metadata for Google identifier:', identifiers['google'])
+ log.exception('Failed to get metadata for Google identifier:', identifiers['google'])
del identifiers['google']
query = self.create_query(