mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
6e1fc23c47
commit
feb58a8846
@ -192,6 +192,16 @@ class Region(object):
|
|||||||
self.average_line_separation = sum([x.average_line_separation for x in
|
self.average_line_separation = sum([x.average_line_separation for x in
|
||||||
self.columns])/float(len(self.columns))
|
self.columns])/float(len(self.columns))
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for x in self.columns:
|
||||||
|
yield x
|
||||||
|
|
||||||
|
def detect_paragraphs(self):
|
||||||
|
first = True
|
||||||
|
for col in self:
|
||||||
|
col.detect_paragraphs(self.average_line_separation, first)
|
||||||
|
first = False
|
||||||
|
|
||||||
|
|
||||||
class Page(object):
|
class Page(object):
|
||||||
|
|
||||||
@ -203,6 +213,8 @@ class Page(object):
|
|||||||
# for them to be considered to be part of the same text fragment
|
# for them to be considered to be part of the same text fragment
|
||||||
LINE_FACTOR = 0.4
|
LINE_FACTOR = 0.4
|
||||||
|
|
||||||
|
# Multiplies the average line height when determining row height
|
||||||
|
# of a particular element to detect columns.
|
||||||
YFUZZ = 1.5
|
YFUZZ = 1.5
|
||||||
|
|
||||||
|
|
||||||
@ -305,7 +317,7 @@ class Page(object):
|
|||||||
|
|
||||||
def find_elements_in_row_of(self, x):
|
def find_elements_in_row_of(self, x):
|
||||||
interval = Interval(x.top,
|
interval = Interval(x.top,
|
||||||
x.top + self.YFUZZ*(1+self.average_text_height))
|
x.top + self.YFUZZ*(self.average_text_height))
|
||||||
h_interval = Interval(x.left, x.right)
|
h_interval = Interval(x.left, x.right)
|
||||||
for y in self.elements[x.idx:x.idx+15]:
|
for y in self.elements[x.idx:x.idx+15]:
|
||||||
if y is not x:
|
if y is not x:
|
||||||
@ -320,6 +332,7 @@ class Page(object):
|
|||||||
'Locate paragraph boundaries in each column'
|
'Locate paragraph boundaries in each column'
|
||||||
for region in self.regions:
|
for region in self.regions:
|
||||||
region.collect_stats()
|
region.collect_stats()
|
||||||
|
region.detect_paragraphs()
|
||||||
|
|
||||||
|
|
||||||
class PDFDocument(object):
|
class PDFDocument(object):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user