mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
Fix BD preprocessing bugs.
This commit is contained in:
parent
c28e7b7c34
commit
eeab59df29
@ -14,6 +14,7 @@
|
|||||||
## You should have received a copy of the GNU General Public License along
|
## You should have received a copy of the GNU General Public License along
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
import tempfile
|
||||||
"""
|
"""
|
||||||
Code to convert HTML ebooks into LRF ebooks.
|
Code to convert HTML ebooks into LRF ebooks.
|
||||||
|
|
||||||
@ -96,6 +97,9 @@ class HTMLConverter(object):
|
|||||||
|
|
||||||
# Fix Book Designer markup
|
# Fix Book Designer markup
|
||||||
BOOK_DESIGNER = [
|
BOOK_DESIGNER = [
|
||||||
|
# HR
|
||||||
|
(re.compile('<hr>', re.IGNORECASE),
|
||||||
|
lambda match : '<span style="page-break-after:always"> </span>'),
|
||||||
# Create header tags
|
# Create header tags
|
||||||
(re.compile('<h2.*?id=BookTitle.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<h2.*?id=BookTitle.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
||||||
@ -107,10 +111,7 @@ class HTMLConverter(object):
|
|||||||
lambda match : '<h3>%s</h3>'%(match.group(1),)),
|
lambda match : '<h3>%s</h3>'%(match.group(1),)),
|
||||||
# Blank lines
|
# Blank lines
|
||||||
(re.compile('<div.*?>( ){4}</div>', re.IGNORECASE),
|
(re.compile('<div.*?>( ){4}</div>', re.IGNORECASE),
|
||||||
lambda match : '<p></p>'),
|
lambda match : '<p></p>'),
|
||||||
# HR
|
|
||||||
(re.compile('<hr>', re.IGNORECASE),
|
|
||||||
lambda match : '<span style="page-break-after:always"> </span>'),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def __hasattr__(self, attr):
|
def __hasattr__(self, attr):
|
||||||
@ -243,6 +244,12 @@ class HTMLConverter(object):
|
|||||||
a = soup.find(id='BookAuthor')
|
a = soup.find(id='BookAuthor')
|
||||||
if a:
|
if a:
|
||||||
self.book.set_author(self.get_text(a))
|
self.book.set_author(self.get_text(a))
|
||||||
|
if self.verbose:
|
||||||
|
tdir = tempfile.gettempdir()
|
||||||
|
dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb')
|
||||||
|
dump.write(str(soup))
|
||||||
|
self.logger.info('Written preprocessed HTML to '+dump.name)
|
||||||
|
dump.close()
|
||||||
self.logger.info('\tConverting to BBeB...')
|
self.logger.info('\tConverting to BBeB...')
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
self.current_page = None
|
self.current_page = None
|
||||||
@ -326,7 +333,9 @@ class HTMLConverter(object):
|
|||||||
if parent_css:
|
if parent_css:
|
||||||
merge_parent_css(prop, parent_css)
|
merge_parent_css(prop, parent_css)
|
||||||
if tag.has_key("align"):
|
if tag.has_key("align"):
|
||||||
prop["text-align"] = tag["align"]
|
al = tag['align'].lower()
|
||||||
|
if al in ('left', 'right', 'center', 'justify'):
|
||||||
|
prop["text-align"] = al
|
||||||
if self.css.has_key(tagname):
|
if self.css.has_key(tagname):
|
||||||
prop.update(self.css[tagname])
|
prop.update(self.css[tagname])
|
||||||
if self.pseudo_css.has_key(tagname):
|
if self.pseudo_css.has_key(tagname):
|
||||||
@ -574,9 +583,9 @@ class HTMLConverter(object):
|
|||||||
print ptag, type(ptag)
|
print ptag, type(ptag)
|
||||||
|
|
||||||
def get_alignment(self, css):
|
def get_alignment(self, css):
|
||||||
|
val = css['text-align'].lower() if css.has_key('text-align') else None
|
||||||
align = 'head'
|
align = 'head'
|
||||||
if css.has_key('text-align'):
|
if val is not None:
|
||||||
val = css['text-align'].lower()
|
|
||||||
if val in ["right", "foot"]:
|
if val in ["right", "foot"]:
|
||||||
align = "foot"
|
align = "foot"
|
||||||
elif val == "center":
|
elif val == "center":
|
||||||
@ -857,7 +866,7 @@ class HTMLConverter(object):
|
|||||||
ImageBlock(self.images[path], xsize=width,
|
ImageBlock(self.images[path], xsize=width,
|
||||||
ysize=height, x1=width, y1=height,
|
ysize=height, x1=width, y1=height,
|
||||||
blockwidth=width, blockheight=height),
|
blockwidth=width, blockheight=height),
|
||||||
left, 0)
|
left, 0)
|
||||||
|
|
||||||
def process_page_breaks(self, tag, tagname, tag_css):
|
def process_page_breaks(self, tag, tagname, tag_css):
|
||||||
if 'page-break-before' in tag_css.keys():
|
if 'page-break-before' in tag_css.keys():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user