Strip empty <style,title,script> tags from EPUB. Fix #1660 (epub to LRF minor problems)

2025-07-09 03:04:10 -04:00 · 2009-01-22 13:41:40 -08:00 · 2009-01-22 13:41:40 -08:00 · 7717679733
commit 7717679733
parent 741e13c0e2
2 changed files with 9 additions and 1 deletions
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -166,6 +166,14 @@ class HTMLProcessor(Processor, Rationalizer):
                if tag.get('type', '').lower().strip() in ('image/svg+xml',):
                    continue
                tag.getparent().remove(tag)
+                
+        
+        for tag in self.root.xpath('//title|//style'):
+            if not tag.text:
+                tag.getparent().remove(tag)
+        for tag in self.root.xpath('//script'):
+            if not tag.text and not tag.get('src', False):
+                tag.getparent().remove(tag)
    
    def save(self):
        for meta in list(self.root.xpath('//meta')):
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -1720,7 +1720,7 @@ class HTMLConverter(object, LoggingInterface):
                self.previous_text = '\n'
            elif tagname in ['hr', 'tr']: # tr needed for nested tables
                self.end_current_block()
-                if tagname == 'hr':
+                if tagname == 'hr' and not tag_css.get('width', '').strip().startswith('0'):
                    self.current_page.RuledLine(linelength=int(self.current_page.pageStyle.attrs['textwidth']))
                self.previous_text = '\n'
                self.process_children(tag, tag_css, tag_pseudo_css)