From d17c59f6957c7e13de8b7fb4e95b742fbe932539 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 23 Feb 2025 11:00:16 +0530
Subject: [PATCH] kepubify: Add the useless JS script that Kobo includes in
 KEPUB it generates

This script is obviously better injected by firmware and I assume
modern firmware does that but Kobo's KEPUBification process has not been
updated to leave it out. So mirror it on the off chance that they
actually think this is the right way to do this.
---
 resources/templates/kobo.js                   | 226 ++++++++++++++++++
 src/calibre/ebooks/oeb/polish/container.py    |  14 +-
 src/calibre/ebooks/oeb/polish/kepubify.py     |  90 +++++--
 .../ebooks/oeb/polish/tests/kepubify.py       |  12 +-
 4 files changed, 307 insertions(+), 35 deletions(-)
 create mode 100644 resources/templates/kobo.js

diff --git a/resources/templates/kobo.js b/resources/templates/kobo.js
new file mode 100644
index 0000000000..594a4dfcce
--- /dev/null
+++ b/resources/templates/kobo.js
@@ -0,0 +1,226 @@
+var gPosition = 0;
+var gProgress = 0;
+var gCurrentPage = 0;
+var gPageCount = 0;
+var gClientHeight = null;
+
+function getPosition()
+{
+	return gPosition;
+}
+
+function getProgress()
+{
+	return gProgress;
+}
+
+function getPageCount()
+{
+	return gPageCount;
+}
+
+function getCurrentPage()
+{
+	return gCurrentPage;
+}
+
+function turnOnNightMode(nightModeOn) {
+	var body = document.getElementsByTagName('body')[0].style;
+	var aTags = document.getElementsByTagName('a');
+
+	var textColor;
+	var bgColor;
+
+	if (nightModeOn > 0) {
+		textColor = "#FFFFFF !important";
+		bgColor = "#000000 !important";
+	} else {
+		textColor = "#000000 !important";
+		bgColor = "#FFFFFF !important";
+	}
+
+	for (i = 0; i < aTags.length; i++) {
+		aTags[i].style.color = textColor;
+	}
+
+	body.color = textColor;
+	body.backgroundColor = bgColor;
+
+	window.device.turnOnNightModeDone();
+}
+
+function setupBookColumns()
+{
+	var body = document.getElementsByTagName('body')[0].style;
+	body.marginLeft = '0px !important';
+	body.marginRight = '0px !important';
+	body.marginTop = '0px !important';
+	body.marginBottom = '0px !important';
+	body.paddingTop = '0px !important';
+	body.paddingBottom = '0px !important';
+	body.webkitNbspMode = 'space';
+
+    var bc = document.getElementById('book-columns').style;
+    bc.width = (window.innerWidth * 2) + 'px !important';
+    bc.height = window.innerHeight  + 'px !important';
+    bc.marginTop = '0px !important';
+    bc.webkitColumnWidth = window.innerWidth + 'px !important';
+    bc.webkitColumnGap = '0px !important';
+	bc.overflow = 'none';
+	bc.paddingTop = '0px !important';
+	bc.paddingBottom = '0px !important';
+	gCurrentPage = 1;
+	gProgress = gPosition = 0;
+
+	var bi = document.getElementById('book-inner').style;
+	bi.marginLeft = '10px';
+	bi.marginRight = '10px';
+	bi.padding = '0';
+
+	window.device.print ("bc.height = "+ bc.height);
+	window.device.print ("window.innerHeight ="+  window.innerHeight);
+
+	gPageCount = document.body.scrollWidth / window.innerWidth;
+
+	if (gClientHeight < window.innerHeight) {
+		gPageCount = 1;
+	}
+}
+
+function paginate(tagId)
+{
+	// Get the height of the page. We do this only once. In setupBookColumns we compare this
+	// value to the height of the window and then decide wether to force the page count to one.
+	if (gClientHeight == undefined) {
+		gClientHeight = document.getElementById('book-columns').clientHeight;
+	}
+
+	setupBookColumns();
+	//window.scrollTo(0, window.innerHeight);
+
+	window.device.reportPageCount(gPageCount);
+	var tagIdPageNumber = 0;
+	if (tagId.length > 0) {
+		tagIdPageNumber = estimatePageNumberForAnchor (tagId);
+	}
+	window.device.finishedPagination(tagId, tagIdPageNumber);
+}
+
+function repaginate(tagId) {
+	window.device.print ("repaginating, gPageCount:" + gPageCount);
+	paginate(tagId);
+}
+
+function paginateAndMaintainProgress()
+{
+	var savedProgress = gProgress;
+	setupBookColumns();
+	goProgress(savedProgress);
+}
+
+function updateBookmark()
+{
+	gProgress = (gCurrentPage - 1.0) / gPageCount;
+	var anchorName = estimateFirstAnchorForPageNumber(gCurrentPage - 1);
+	window.device.finishedUpdateBookmark(anchorName);
+}
+
+function goBack()
+{
+	if (gCurrentPage > 1)
+	{
+		--gCurrentPage;
+		gPosition -= window.innerWidth;
+		window.scrollTo(gPosition, 0);
+		window.device.pageChanged();
+	} else {
+		window.device.previousChapter();
+	}
+}
+
+function goForward()
+{
+	if (gCurrentPage < gPageCount)
+	{
+		++gCurrentPage;
+		gPosition += window.innerWidth;
+		window.scrollTo(gPosition, 0);
+		window.device.pageChanged();
+	} else {
+		window.device.nextChapter();
+	}
+}
+
+function goPage(pageNumber, callPageReadyWhenDone)
+{
+	if (pageNumber > 0 && pageNumber <= gPageCount)
+	{
+		gCurrentPage = pageNumber;
+		gPosition = (gCurrentPage - 1) * window.innerWidth;
+		window.scrollTo(gPosition, 0);
+		if (callPageReadyWhenDone > 0) {
+			window.device.pageReady();
+		} else {
+			window.device.pageChanged();
+		}
+	}
+}
+
+function goProgress(progress)
+{
+	progress += 0.0001;
+
+	var progressPerPage = 1.0 / gPageCount;
+	var newPage = 0;
+
+	for (var page = 0; page < gPageCount; page++) {
+		var low = page * progressPerPage;
+		var high = low + progressPerPage;
+		if (progress >= low && progress < high) {
+			newPage = page;
+			break;
+		}
+	}
+
+	gCurrentPage = newPage + 1;
+	gPosition = (gCurrentPage - 1) * window.innerWidth;
+	window.scrollTo(gPosition, 0);
+	updateProgress();
+}
+
+/* BOOKMARKING CODE */
+
+/**
+ * Estimate the first anchor for the specified page number. This is used on the broken WebKit
+ * where we do not know for sure if the specific anchor actually is on the page.
+ */
+
+
+function estimateFirstAnchorForPageNumber(page)
+{
+	var spans = document.getElementsByTagName('span');
+	var lastKoboSpanId = "";
+	for (var i = 0; i < spans.length; i++) {
+		if (spans[i].id.substr(0, 5) == "kobo.") {
+			lastKoboSpanId = spans[i].id;
+			if (spans[i].offsetTop >= (page * window.innerHeight)) {
+				return spans[i].id;
+			}
+		}
+	}
+	return lastKoboSpanId;
+}
+
+/**
+ * Estimate the page number for the specified anchor. This is used on the broken WebKit where we
+ * do not know for sure how things are columnized. The page number returned is zero based.
+ */
+
+function estimatePageNumberForAnchor(spanId)
+{
+	var span = document.getElementById(spanId);
+	if (span) {
+		return Math.floor(span.offsetTop / window.innerHeight);
+	}
+	return 0;
+}
diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py
index 6edd901211..d36476ff21 100644
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@@ -312,14 +312,14 @@ class Container(ContainerBase):  # {{{
         clone_dir(self.root, dest_dir)
         return self.data_for_clone(dest_dir)
 
-    def add_name_to_manifest(self, name, process_manifest_item=None):
+    def add_name_to_manifest(self, name, process_manifest_item=None, suggested_id=''):
         ' Add an entry to the manifest for a file with the specified name. Returns the manifest id. '
         all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
         c = 0
-        item_id = 'id'
+        item_id = suggested_id = suggested_id or 'id'
         while item_id in all_ids:
             c += 1
-            item_id = f'id{c}'
+            item_id = f'{suggested_id}-{c}'
         manifest = self.opf_xpath('//opf:manifest')[0]
         href = self.name_to_href(name, self.opf_name)
         item = manifest.makeelement(OPF('item'),
@@ -347,7 +347,11 @@ class Container(ContainerBase):  # {{{
             name = f'{base}-{c}.{ext}'
         return name
 
-    def add_file(self, name, data=b'', media_type=None, spine_index=None, modify_name_if_needed=False, process_manifest_item=None):
+    def add_file(
+            self, name, data=b'', media_type=None, spine_index=None,
+            modify_name_if_needed=False, process_manifest_item=None,
+            suggested_id='',
+        ):
         ''' Add a file to this container. Entries for the file are
         automatically created in the OPF manifest and spine
         (if the file is a text document) '''
@@ -374,7 +378,7 @@ class Container(ContainerBase):  # {{{
         self.mime_map[name] = mt
         if self.ok_to_be_unmanifested(name):
             return name
-        item_id = self.add_name_to_manifest(name, process_manifest_item=process_manifest_item)
+        item_id = self.add_name_to_manifest(name, process_manifest_item=process_manifest_item, suggested_id=suggested_id)
         if mt in OEB_DOCS:
             manifest = self.opf_xpath('//opf:manifest')[0]
             spine = self.opf_xpath('//opf:spine')[0]
diff --git a/src/calibre/ebooks/oeb/polish/kepubify.py b/src/calibre/ebooks/oeb/polish/kepubify.py
index c2de0086e7..74ae1754b8 100644
--- a/src/calibre/ebooks/oeb/polish/kepubify.py
+++ b/src/calibre/ebooks/oeb/polish/kepubify.py
@@ -34,8 +34,11 @@ from calibre.ebooks.oeb.polish.utils import extract, insert_self_closing
 from calibre.spell.break_iterator import sentence_positions
 from calibre.srv.render_book import Profiler, calculate_number_of_workers
 from calibre.utils.localization import canonicalize_lang, get_lang
+from calibre.utils.short_uuid import uuid4
 
-KOBO_CSS_CLASS = 'kobostylehacks'
+KOBO_CSS_ID = 'kobostylehacks'
+KOBO_JS_NAME = 'kobo.js'
+KOBO_CSS_NAME = 'kobo.css'
 OUTER_DIV_ID = 'book-columns'
 INNER_DIV_ID = 'book-inner'
 KOBO_SPAN_CLASS = 'koboSpan'
@@ -72,12 +75,18 @@ def outer_html(node):
     return etree.tostring(node, encoding='unicode', with_tail=False)
 
 
-def add_style(root, opts: Options, cls=KOBO_CSS_CLASS) -> bool:
+@lru_cache(2)
+def kobo_js() -> bytes:
+    return P('templates/kobo.js', data=True)
+
+
+def add_style_and_script(root, kobo_js_href: str, opts: Options) -> bool:
 
     def add(parent):
-        e = parent.makeelement(XHTML('style'), type='text/css')
+        e = parent.makeelement(XHTML('style'), type='text/css', id=KOBO_CSS_ID)
         e.text = opts.extra_css
-        e.set('class', cls)
+        insert_self_closing(parent, e)
+        e = parent.makeelement(XHTML('script'), type='text/javascript', src=kobo_js_href)
         insert_self_closing(parent, e)
 
     if heads := XPath('./h:head')(root):
@@ -89,9 +98,20 @@ def add_style(root, opts: Options, cls=KOBO_CSS_CLASS) -> bool:
     return False
 
 
-def remove_kobo_styles(root):
-    for x in XPath(f'//h:style[@type="text/css" and @class="{KOBO_CSS_CLASS}"]')(root):
-        extract(x)
+def is_href_to_fname(href: str | None, fname: str) -> bool:
+    return href and href.rpartition('/')[-1] == fname
+
+
+def remove_kobo_styles_and_scripts(root):
+    for style in XPath('//h:style')(root):
+        if style.get('id') == KOBO_CSS_ID:
+            extract(style)
+    for link in XPath('//h:link')(root):
+        if link.get('rel') == 'stylesheet' and link.get('type') == 'text/css' and is_href_to_fname(link.get('href'), KOBO_CSS_NAME):
+            extract(link)
+    for script in XPath('//h:script')(root):
+        if script.get('type') == 'text/javascript' and is_href_to_fname(script.get('src'), KOBO_JS_NAME):
+            extract(script)
 
 
 def wrap_body_contents(body):
@@ -219,16 +239,16 @@ def remove_kobo_spans(body: etree.Element) -> bool:
     return found
 
 
-def add_kobo_markup_to_html(root, opts, metadata_lang):
+def add_kobo_markup_to_html(root: etree.Element, kobo_js_href: str, opts: Options, metadata_lang: str) -> None:
     root_lang = canonicalize_lang(lang_for_elem(root, canonicalize_lang(metadata_lang or get_lang())) or 'en')
-    add_style(root, opts)
+    add_style_and_script(root, kobo_js_href, opts)
     for body in XPath('./h:body')(root):
         inner = wrap_body_contents(body)
         add_kobo_spans(inner, lang_for_elem(body, root_lang))
 
 
 def remove_kobo_markup_from_html(root):
-    remove_kobo_styles(root)
+    remove_kobo_styles_and_scripts(root)
     for body in XPath('./h:body')(root):
         unwrap_body_contents(body)
         remove_kobo_spans(body)
@@ -293,7 +313,7 @@ def process_stylesheet(css: str, opts: Options) -> str:
     return sheet.cssText if changed else css
 
 
-def kepubify_parsed_html(root, opts: Options, metadata_lang: str = 'en'):
+def kepubify_parsed_html(root: etree.Element, kobo_js_href: str, opts: Options, metadata_lang: str = 'en'):
     remove_kobo_markup_from_html(root)
     if not opts.for_removal:
         merge_multiple_html_heads_and_bodies(root)
@@ -302,19 +322,19 @@ def kepubify_parsed_html(root, opts: Options, metadata_lang: str = 'en'):
             if (style.get('type') or 'text/css') == 'text/css' and style.text:
                 style.text = process_stylesheet(style.text, opts)
     if not opts.for_removal:
-        add_kobo_markup_to_html(root, opts, metadata_lang)
+        add_kobo_markup_to_html(root, kobo_js_href, opts, metadata_lang)
 
 
-def kepubify_html_data(raw: str | bytes, opts: Options = Options(), metadata_lang: str = 'en'):
+def kepubify_html_data(raw: str | bytes, kobo_js_href: str = KOBO_JS_NAME, opts: Options = Options(), metadata_lang: str = 'en'):
     root = parse(raw)
-    kepubify_parsed_html(root, opts, metadata_lang)
+    kepubify_parsed_html(root, kobo_js_href, opts, metadata_lang)
     return root
 
 
-def kepubify_html_path(path: str, metadata_lang: str = 'en', opts: Options = Options()):
+def kepubify_html_path(path: str, kobo_js_href: str = KOBO_JS_NAME, metadata_lang: str = 'en', opts: Options = Options()):
     with open(path, 'r+b') as f:
         raw = f.read()
-        root = kepubify_html_data(raw, opts, metadata_lang)
+        root = kepubify_html_data(raw, kobo_js_href, opts, metadata_lang)
         raw = serialize_html(root)
         f.seek(0)
         f.truncate()
@@ -348,7 +368,7 @@ def add_dummy_title_page(container: Container, cover_image_name: str, mi) -> Non
             div {{ padding:0pt; margin: 0pt }}
             img {{ padding:0pt; margin: 0pt }}
         </style>
-        <style type="text/css" class="{KOBO_CSS_CLASS}">
+        <style type="text/css" id="{KOBO_CSS_ID}">
         {KOBO_CSS}
         </style>
     </head>
@@ -408,40 +428,58 @@ def process_stylesheet_path(path: str, opts: Options) -> None:
                 f.write(ncss)
 
 
-def process_path(path: str, metadata_lang: str, opts: Options, media_type: str) -> None:
+def process_path(path: str, kobo_js_href: str, metadata_lang: str, opts: Options, media_type: str) -> None:
     if media_type in OEB_DOCS:
-        kepubify_html_path(path, metadata_lang, opts)
+        kepubify_html_path(path, kobo_js_href, metadata_lang, opts)
     elif media_type in OEB_STYLES:
         process_stylesheet_path(path, opts)
 
 
-def do_work_in_parallel(container: Container, opts: Options, metadata_lang: str, max_workers: int) -> None:
+def do_work_in_parallel(container: Container, kobo_js_name: str, opts: Options, metadata_lang: str, max_workers: int) -> None:
     names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS or mt in OEB_STYLES)
     num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers)
     paths = tuple(map(container.name_to_abspath, names_that_need_work))
     if num_workers < 2:
         for name in names_that_need_work:
-            process_path(container.name_to_abspath(name), metadata_lang, opts, container.mime_map[name])
+            process_path(container.name_to_abspath(name), container.name_to_href(kobo_js_name, name), metadata_lang, opts, container.mime_map[name])
     else:
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             futures = tuple(executor.submit(
-                process_path, container.name_to_abspath(name), metadata_lang, opts, container.mime_map[name])
-                            for name in names_that_need_work)
+                process_path, container.name_to_abspath(name), container.name_to_href(kobo_js_name, name),
+                metadata_lang, opts, container.mime_map[name]) for name in names_that_need_work)
             for future in futures:
                 future.result()
 
 
+def remove_kobo_files(container):
+    for name, mt in tuple(container.mime_map.items()):
+        fname = name.rpartition('/')[-1]
+        if mt == 'application/javascript' and fname == KOBO_JS_NAME:
+            container.remove_item(name)
+        elif mt == 'text/css' and fname == KOBO_CSS_NAME:
+            container.remove_item(name)
+
+
 def unkepubify_container(container: Container, max_workers: int = 0) -> None:
     remove_dummy_cover_image(container)
     remove_dummy_title_page(container)
+    remove_kobo_files(container)
     opts = Options(for_removal=True)
     metadata_lang = container.mi.language
-    do_work_in_parallel(container, opts, metadata_lang, max_workers)
+    do_work_in_parallel(container, KOBO_JS_NAME, opts, metadata_lang, max_workers)
+
+
+def uniqify_name(container: Container, fname: str) -> str:
+    q = fname
+    while container.has_name_case_insensitive(q) or container.manifest_has_name(q):
+        q = f'{uuid4()}/fname'
+    return q
 
 
 def kepubify_container(container: Container, opts: Options, max_workers: int = 0) -> None:
     remove_dummy_title_page(container)
     remove_dummy_cover_image(container)
+    remove_kobo_files(container)
     metadata_lang = container.mi.language
     cover_image_name = find_cover_image(container) or find_cover_image3(container)
     mi = container.mi
@@ -452,7 +490,9 @@ def kepubify_container(container: Container, opts: Options, max_workers: int = 0
     container.apply_unique_properties(cover_image_name, 'cover-image')
     if not find_cover_page(container) and not first_spine_item_is_probably_title_page(container):
         add_dummy_title_page(container, cover_image_name, mi)
-    do_work_in_parallel(container, opts, metadata_lang, max_workers)
+    kobo_js_name = uniqify_name(container, KOBO_JS_NAME)
+    kobo_js_name = container.add_file(kobo_js_name, kobo_js(), media_type='application/javascript', suggested_id='js-kobo.js')
+    do_work_in_parallel(container, kobo_js_name, opts, metadata_lang, max_workers)
 
 
 def kepubify_path(path, outpath='', max_workers=0, allow_overwrite=False, opts: Options = Options()):
diff --git a/src/calibre/ebooks/oeb/polish/tests/kepubify.py b/src/calibre/ebooks/oeb/polish/tests/kepubify.py
index 66e59ed84e..7e0692f66d 100644
--- a/src/calibre/ebooks/oeb/polish/tests/kepubify.py
+++ b/src/calibre/ebooks/oeb/polish/tests/kepubify.py
@@ -8,6 +8,7 @@ from calibre.ebooks.oeb.polish.kepubify import (
     CSS_COMMENT_COOKIE,
     DUMMY_COVER_IMAGE_NAME,
     DUMMY_TITLE_PAGE_NAME,
+    KOBO_JS_NAME,
     Options,
     kepubify_html_data,
     kepubify_parsed_html,
@@ -51,9 +52,10 @@ class KepubifyTests(BaseTest):
                 b(has_cover, epub_version)
 
     def test_kepubify_html(self):
-        prefix = '''<?xml version='1.0' encoding='utf-8'?>
-<html xmlns="http://www.w3.org/1999/xhtml"><head><style type="text/css" class="kobostylehacks">\
-div#book-inner { margin-top: 0; margin-bottom: 0; }</style></head><body><div id="book-columns"><div id="book-inner">'''
+        prefix = f'''<?xml version='1.0' encoding='utf-8'?>
+<html xmlns="http://www.w3.org/1999/xhtml"><head><style type="text/css" id="kobostylehacks">\
+div#book-inner {{ margin-top: 0; margin-bottom: 0; }}</style><script type="text/javascript" src="{KOBO_JS_NAME}"/></head>\
+<body><div id="book-columns"><div id="book-inner">'''
         suffix =  '</div></div></body></html>'
         for src, expected in {
             # basics
@@ -105,12 +107,12 @@ div#book-inner { margin-top: 0; margin-bottom: 0; }</style></head><body><div id=
             '<span class="koboSpan" id="kobo.1.1">Some</span></div>'
         }.items():
             opts = Options()._replace(remove_widows_and_orphans=True, remove_at_page_rules=True)
-            root = kepubify_html_data(src, opts)
+            root = kepubify_html_data(src, KOBO_JS_NAME, opts)
             actual = serialize_html(root).decode('utf-8')
             actual = actual[len(prefix):-len(suffix)]
             self.assertEqual(expected, actual)
             expected = serialize_html(parse(src)).decode('utf-8')
             opts = opts._replace(for_removal=True)
-            kepubify_parsed_html(root, opts)
+            kepubify_parsed_html(root, KOBO_JS_NAME, opts)
             actual = serialize_html(root).decode('utf-8')
             self.assertEqual(expected, actual)