This commit is contained in:
Kovid Goyal 2010-02-09 20:00:16 -07:00
parent ba2c202ae1
commit 1b30ddb0d6
2 changed files with 25 additions and 6 deletions

View File

@ -303,6 +303,22 @@ class Region(object):
for x in self.columns: for x in self.columns:
yield x yield x
def absorb_regions(self, regions, at):
for region in regions:
self.absorb_region(region, at)
def absorb_region(self, region, at):
src_iter = lambda x:x if at == 'bottom' else reversed
if len(region.columns) == len(self.columns):
for src, dest in zip(region.columns, self.columns):
for elem in src_iter(src):
if at == 'bottom':
dest.append(elem)
else:
dest.insert(0, elem)
else:
pass
def linearize(self): def linearize(self):
self.elements = [] self.elements = []
for x in self.columns: for x in self.columns:
@ -444,7 +460,7 @@ class Page(object):
for i, region in enumerate(self.regions): for i, region in enumerate(self.regions):
if region.is_small: if region.is_small:
found = True found = True
regions = [] regions = [region]
for j in range(i+1, len(self.regions)): for j in range(i+1, len(self.regions)):
if self.regions[j].is_small: if self.regions[j].is_small:
regions.append(self.regions[j]) regions.append(self.regions[j])
@ -452,8 +468,10 @@ class Page(object):
break break
prev_region = None if i == 0 else i-1 prev_region = None if i == 0 else i-1
next_region = j if self.regions[j] not in regions else None next_region = j if self.regions[j] not in regions else None
absorb_at = 'bottom'
if prev_region is None and next_region is not None: if prev_region is None and next_region is not None:
absorb_into = next_region absorb_into = next_region
absorb_at = 'top'
elif next_region is None and prev_region is not None: elif next_region is None and prev_region is not None:
absorb_into = prev_region absorb_into = prev_region
elif prev_region is None and next_region is None: elif prev_region is None and next_region is None:
@ -471,8 +489,9 @@ class Page(object):
or abs(avg_column_count - len(prev_region.columns)) \ or abs(avg_column_count - len(prev_region.columns)) \
> abs(avg_column_count - len(next_region.columns)): > abs(avg_column_count - len(next_region.columns)):
absorb_into = next_region absorb_into = next_region
absorb_at = 'top'
if absorb_into is not None: if absorb_into is not None:
absorb_into.absorb_region(regions) absorb_into.absorb_regions(regions, absorb_at)
absorbed.update(regions) absorbed.update(regions)
i = j i = j
for region in absorbed: for region in absorbed:

View File

@ -182,10 +182,10 @@ class PML_HTMLizer(object):
return pml return pml
def strip_pml(self, pml): def strip_pml(self, pml):
pml = re.sub(r'\\C\d=".+*"', '', pml) pml = re.sub(r'\\C\d=".*"', '', pml)
pml = re.sub(r'\\Fn=".+*"', '', pml) pml = re.sub(r'\\Fn=".*"', '', pml)
pml = re.sub(r'\\Sd=".+*"', '', pml) pml = re.sub(r'\\Sd=".*"', '', pml)
pml = re.sub(r'\\.=".+*"', '', pml) pml = re.sub(r'\\.=".*"', '', pml)
pml = re.sub(r'\\X\d', '', pml) pml = re.sub(r'\\X\d', '', pml)
pml = re.sub(r'\\S[pbd]', '', pml) pml = re.sub(r'\\S[pbd]', '', pml)
pml = re.sub(r'\\Fn', '', pml) pml = re.sub(r'\\Fn', '', pml)