From 1b30ddb0d68dcc7c86e0bd16886ae6631d2106b0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 9 Feb 2010 20:00:16 -0700 Subject: [PATCH] ... --- src/calibre/ebooks/pdf/reflow.py | 23 +++++++++++++++++++++-- src/calibre/ebooks/pml/pmlconverter.py | 8 ++++---- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 552af1590f..a904ec3d8b 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -303,6 +303,22 @@ class Region(object): for x in self.columns: yield x + def absorb_regions(self, regions, at): + for region in regions: + self.absorb_region(region, at) + + def absorb_region(self, region, at): + src_iter = lambda x:x if at == 'bottom' else reversed + if len(region.columns) == len(self.columns): + for src, dest in zip(region.columns, self.columns): + for elem in src_iter(src): + if at == 'bottom': + dest.append(elem) + else: + dest.insert(0, elem) + else: + pass + def linearize(self): self.elements = [] for x in self.columns: @@ -444,7 +460,7 @@ class Page(object): for i, region in enumerate(self.regions): if region.is_small: found = True - regions = [] + regions = [region] for j in range(i+1, len(self.regions)): if self.regions[j].is_small: regions.append(self.regions[j]) @@ -452,8 +468,10 @@ class Page(object): break prev_region = None if i == 0 else i-1 next_region = j if self.regions[j] not in regions else None + absorb_at = 'bottom' if prev_region is None and next_region is not None: absorb_into = next_region + absorb_at = 'top' elif next_region is None and prev_region is not None: absorb_into = prev_region elif prev_region is None and next_region is None: @@ -471,8 +489,9 @@ class Page(object): or abs(avg_column_count - len(prev_region.columns)) \ > abs(avg_column_count - len(next_region.columns)): absorb_into = next_region + absorb_at = 'top' if absorb_into is not None: - absorb_into.absorb_region(regions) + absorb_into.absorb_regions(regions, absorb_at) absorbed.update(regions) i = j for region in absorbed: diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 4c74502bd1..ed955879f8 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -182,10 +182,10 @@ class PML_HTMLizer(object): return pml def strip_pml(self, pml): - pml = re.sub(r'\\C\d=".+*"', '', pml) - pml = re.sub(r'\\Fn=".+*"', '', pml) - pml = re.sub(r'\\Sd=".+*"', '', pml) - pml = re.sub(r'\\.=".+*"', '', pml) + pml = re.sub(r'\\C\d=".*"', '', pml) + pml = re.sub(r'\\Fn=".*"', '', pml) + pml = re.sub(r'\\Sd=".*"', '', pml) + pml = re.sub(r'\\.=".*"', '', pml) pml = re.sub(r'\\X\d', '', pml) pml = re.sub(r'\\S[pbd]', '', pml) pml = re.sub(r'\\Fn', '', pml)