mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	remove br from top of page in chm conversion
This commit is contained in:
		
							parent
							
								
									c91f022385
								
							
						
					
					
						commit
						ac8ccceef8
					
				@ -11,7 +11,7 @@ from mimetypes import guess_type as guess_mimetype
 | 
				
			|||||||
from htmlentitydefs import name2codepoint
 | 
					from htmlentitydefs import name2codepoint
 | 
				
			||||||
from pprint import PrettyPrinter
 | 
					from pprint import PrettyPrinter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from BeautifulSoup import BeautifulSoup
 | 
					from BeautifulSoup import BeautifulSoup, NavigableString
 | 
				
			||||||
from lxml import html, etree
 | 
					from lxml import html, etree
 | 
				
			||||||
from pychm.chm import CHMFile
 | 
					from pychm.chm import CHMFile
 | 
				
			||||||
from pychm.chmlib import (
 | 
					from pychm.chmlib import (
 | 
				
			||||||
@ -35,6 +35,17 @@ def match_string(s1, s2_already_lowered):
 | 
				
			|||||||
            return True
 | 
					            return True
 | 
				
			||||||
    return False
 | 
					    return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_all_prev_empty(tag):
 | 
				
			||||||
 | 
					    if tag is None:
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					    if tag.__class__ == NavigableString and not check_empty(tag):
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					    return check_all_prev_empty(tag.previousSibling)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_empty(s, rex = re.compile(r'\S')):
 | 
				
			||||||
 | 
					    return rex.search(s) is None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def option_parser():
 | 
					def option_parser():
 | 
				
			||||||
    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
 | 
					    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
 | 
				
			||||||
    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
 | 
					    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
 | 
				
			||||||
@ -161,6 +172,12 @@ class CHMReader(CHMFile):
 | 
				
			|||||||
        # for some very odd reason each page's content appears to be in a table
 | 
					        # for some very odd reason each page's content appears to be in a table
 | 
				
			||||||
        # too. and this table has sub-tables for random asides... grr.
 | 
					        # too. and this table has sub-tables for random asides... grr.
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					        # remove br at top of page if present after nav bars removed
 | 
				
			||||||
 | 
					        br = html('br')
 | 
				
			||||||
 | 
					        if br:
 | 
				
			||||||
 | 
					            if check_all_prev_empty(br[0].previousSibling):
 | 
				
			||||||
 | 
					                br[0].extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # some images seem to be broken in some chm's :/
 | 
					        # some images seem to be broken in some chm's :/
 | 
				
			||||||
        for img in html('img'):
 | 
					        for img in html('img'):
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user