mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
...
This commit is contained in:
parent
093404e208
commit
b632639ff7
@ -23,6 +23,7 @@ class Text(object):
|
||||
self.font_map = font_map
|
||||
self.top, self.left, self.width, self.height = map(float, map(text.get,
|
||||
('top', 'left', 'width', 'height')))
|
||||
self.bottom = self.top + self.height
|
||||
self.font = self.font_map[text.get('font')]
|
||||
self.font_size = self.font.size
|
||||
self.color = self.font.color
|
||||
@ -31,6 +32,18 @@ class Text(object):
|
||||
self.text_as_string = etree.tostring(text, method='text',
|
||||
encoding=unicode)
|
||||
|
||||
class FontSizeStats(dict):
|
||||
|
||||
def __init__(self, stats):
|
||||
total = float(sum(stats.values()))
|
||||
self.most_common_size, self.chars_at_most_common_size = -1, 0
|
||||
|
||||
for sz, chars in stats.items():
|
||||
if chars >= self.chars_at_most_common_size:
|
||||
self.most_common_size, self.chars_at_most_common_size = sz, chars
|
||||
self[sz] = chars/total
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
def __init__(self, page, font_map, opts, log):
|
||||
@ -46,6 +59,15 @@ class Page(object):
|
||||
for text in page.xpath('descendant::text'):
|
||||
self.texts.append(Text(text, self.font_map, self.opts, self.log))
|
||||
|
||||
self.font_size_stats = {}
|
||||
for t in self.texts:
|
||||
if t.font_size not in self.font_size_stats:
|
||||
self.font_size_stats[t.font_size] = 0
|
||||
self.font_size_stats[t.font_size] += len(t.text_as_string)
|
||||
|
||||
self.font_size_stats = FontSizeStats(self.font_size_stats)
|
||||
|
||||
|
||||
|
||||
class PDFDocument(object):
|
||||
|
||||
@ -69,6 +91,17 @@ class PDFDocument(object):
|
||||
self.page_map[page.id] = page
|
||||
self.pages.append(page)
|
||||
|
||||
self.collect_font_statistics()
|
||||
|
||||
def collect_font_statistics(self):
|
||||
self.font_size_stats = {}
|
||||
for p in self.pages:
|
||||
for sz, chars in p.font_size_stats:
|
||||
if sz not in self.font_size_stats:
|
||||
self.font_size_stats[sz] = 0
|
||||
self.font_size_stats[sz] += chars
|
||||
|
||||
self.font_size_stats = FontSizeStats(self.font_size_stats)
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user