Fix bug 2676: Divide by Zero error when checking line lenght of pdf input.

This commit is contained in:
John Schember 2009-06-20 10:11:19 -04:00
parent 05d27a0225
commit 65dfbd5cb4

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, functools import functools
import re
from calibre import entity_to_unicode from calibre import entity_to_unicode
@ -52,7 +52,12 @@ def line_length(raw, percent):
for line in lines: for line in lines:
if len(line) > 0: if len(line) > 0:
lengths.append(len(line)) lengths.append(len(line))
if not lengths:
return 0
total = sum(lengths) total = sum(lengths)
print total
avg = total / len(lengths) avg = total / len(lengths)
max_line = avg * 2 max_line = avg * 2
@ -178,10 +183,13 @@ class HTMLPreProcessor(object):
elif self.is_book_designer(html): elif self.is_book_designer(html):
rules = self.BOOK_DESIGNER rules = self.BOOK_DESIGNER
elif self.is_pdftohtml(html): elif self.is_pdftohtml(html):
line_length_rules = [ length = line_length(html, .3)
# Un wrap using punctuation line_length_rules = []
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .3), re.UNICODE), wrap_lines), if length:
] line_length_rules = [
# Un wrap using punctuation
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % length, re.UNICODE), wrap_lines),
]
rules = self.PDFTOHTML + line_length_rules rules = self.PDFTOHTML + line_length_rules
else: else: