mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Integrated masthead code with KG revisions
This commit is contained in:
commit
be6fbbab96
@ -53,6 +53,8 @@ class Economist(BasicNewsRecipe):
|
|||||||
self.feed_dict.items()])
|
self.feed_dict.items()])
|
||||||
|
|
||||||
def eco_sort_sections(self, feeds):
|
def eco_sort_sections(self, feeds):
|
||||||
|
if not feeds:
|
||||||
|
raise ValueError('No new articles found')
|
||||||
order = {
|
order = {
|
||||||
'The World This Week': 1,
|
'The World This Week': 1,
|
||||||
'Leaders': 2,
|
'Leaders': 2,
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
@ -8,6 +11,7 @@ class JASN(BasicNewsRecipe):
|
|||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Krittika Goyal'
|
||||||
oldest_article = 31 #days
|
oldest_article = 31 #days
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
|
delay = 5
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
|
||||||
INDEX = 'http://jasn.asnjournals.org/current.shtml'
|
INDEX = 'http://jasn.asnjournals.org/current.shtml'
|
||||||
@ -102,9 +106,17 @@ class JASN(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://jasn.asnjournals.org'+url
|
url = 'http://jasn.asnjournals.org'+url
|
||||||
|
img = isoup = None
|
||||||
|
try:
|
||||||
isoup = self.index_to_soup(url)
|
isoup = self.index_to_soup(url)
|
||||||
img = isoup.find('img', src=lambda x: x and
|
except:
|
||||||
x.startswith('/content/'))
|
time.sleep(5)
|
||||||
|
try:
|
||||||
|
isoup = self.index_to_soup(url)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
img = isoup.find('img', src=lambda x: x and x.startswith('/content/'))
|
||||||
|
|
||||||
if img is not None:
|
if img is not None:
|
||||||
img.extract()
|
img.extract()
|
||||||
table = a.findParent('table')
|
table = a.findParent('table')
|
||||||
|
@ -79,13 +79,30 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
.authorId {text-align: left; \
|
.authorId {text-align: left; \
|
||||||
font-style: italic;}\n '
|
font-style: italic;}\n '
|
||||||
|
|
||||||
# def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
# st = time.localtime()
|
cover = None
|
||||||
# year = str(st.tm_year)
|
st = time.localtime()
|
||||||
# month = "%.2d" % st.tm_mon
|
year = str(st.tm_year)
|
||||||
# day = "%.2d" % st.tm_mday
|
month = "%.2d" % st.tm_mon
|
||||||
# cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/' + 'scan.jpg'
|
day = "%.2d" % st.tm_mday
|
||||||
# return cover
|
cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/scan.jpg'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(cover)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
cover = None
|
||||||
|
return cover
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
nytimes.com
|
nytimes.com
|
||||||
'''
|
'''
|
||||||
import string, re
|
import string, re, time
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
@ -31,7 +31,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
|
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
|
||||||
dict(id=['footer', 'toolsRight', 'articleInline',
|
dict(id=['footer', 'toolsRight', 'articleInline',
|
||||||
'navigation', 'archive', 'side_search', 'blog_sidebar',
|
'navigation', 'archive', 'side_search', 'blog_sidebar',
|
||||||
'side_tool', 'side_index',
|
'side_tool', 'side_index', 'login', 'businessSearchBar',
|
||||||
|
'adxLeaderboard',
|
||||||
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
encoding = decode
|
encoding = decode
|
||||||
@ -51,11 +52,39 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
#open('/t/log.html', 'wb').write(raw)
|
#open('/t/log.html', 'wb').write(raw)
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
||||||
|
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover = None
|
||||||
|
st = time.localtime()
|
||||||
|
year = str(st.tm_year)
|
||||||
|
month = "%.2d" % st.tm_mon
|
||||||
|
day = "%.2d" % st.tm_mday
|
||||||
|
cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/scan.jpg'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(cover)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
cover = None
|
||||||
|
return cover
|
||||||
|
|
||||||
def short_title(self):
|
def short_title(self):
|
||||||
return 'NY Times'
|
return 'NY Times'
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
self.encoding = 'cp1252'
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||||
|
self.encoding = decode
|
||||||
|
|
||||||
def feed_title(div):
|
def feed_title(div):
|
||||||
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
||||||
|
@ -71,7 +71,7 @@ int do_mount(const char *dev, const char *mp) {
|
|||||||
#ifdef __NetBSD__
|
#ifdef __NetBSD__
|
||||||
execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL);
|
execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL);
|
||||||
#else
|
#else
|
||||||
execlp("mount", "mount", "-t", "vfat", "-o", options, dev, mp, NULL);
|
execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL);
|
||||||
#endif
|
#endif
|
||||||
errsv = errno;
|
errsv = errno;
|
||||||
fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv));
|
fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv));
|
||||||
|
@ -26,6 +26,11 @@ class LITInput(InputFormatPlugin):
|
|||||||
for item in oeb.spine:
|
for item in oeb.spine:
|
||||||
root = item.data
|
root = item.data
|
||||||
if not hasattr(root, 'xpath'): continue
|
if not hasattr(root, 'xpath'): continue
|
||||||
|
for bad in ('metadata', 'guide'):
|
||||||
|
metadata = XPath('//h:'+bad)(root)
|
||||||
|
if metadata:
|
||||||
|
for x in metadata:
|
||||||
|
x.getparent().remove(x)
|
||||||
body = XPath('//h:body')(root)
|
body = XPath('//h:body')(root)
|
||||||
if body:
|
if body:
|
||||||
body = body[0]
|
body = body[0]
|
||||||
|
@ -909,6 +909,12 @@ class Manifest(object):
|
|||||||
'content': '%s; charset=utf-8' % XHTML_NS})
|
'content': '%s; charset=utf-8' % XHTML_NS})
|
||||||
# Ensure has a <body/>
|
# Ensure has a <body/>
|
||||||
if not xpath(data, '/h:html/h:body'):
|
if not xpath(data, '/h:html/h:body'):
|
||||||
|
body = xpath(data, '//h:body')
|
||||||
|
if body:
|
||||||
|
body = body[0]
|
||||||
|
body.getparent().remove(body)
|
||||||
|
data.append(body)
|
||||||
|
else:
|
||||||
self.oeb.logger.warn(
|
self.oeb.logger.warn(
|
||||||
'File %r missing <body/> element' % self.href)
|
'File %r missing <body/> element' % self.href)
|
||||||
etree.SubElement(data, XHTML('body'))
|
etree.SubElement(data, XHTML('body'))
|
||||||
|
@ -43,6 +43,10 @@ class Image(Element):
|
|||||||
self.bottom = self.top + self.height
|
self.bottom = self.top + self.height
|
||||||
self.right = self.left + self.width
|
self.right = self.left + self.width
|
||||||
|
|
||||||
|
def to_html(self):
|
||||||
|
return '<img src="%s" width="%dpx" height="%dpx"/>' % \
|
||||||
|
(self.src, int(self.width), int(self.height))
|
||||||
|
|
||||||
|
|
||||||
class Text(Element):
|
class Text(Element):
|
||||||
|
|
||||||
@ -66,8 +70,6 @@ class Text(Element):
|
|||||||
self.raw = text.text if text.text else u''
|
self.raw = text.text if text.text else u''
|
||||||
for x in text.iterchildren():
|
for x in text.iterchildren():
|
||||||
self.raw += etree.tostring(x, method='xml', encoding=unicode)
|
self.raw += etree.tostring(x, method='xml', encoding=unicode)
|
||||||
if x.tail:
|
|
||||||
self.raw += x.tail
|
|
||||||
self.average_character_width = self.width/len(self.text_as_string)
|
self.average_character_width = self.width/len(self.text_as_string)
|
||||||
|
|
||||||
def coalesce(self, other, page_number):
|
def coalesce(self, other, page_number):
|
||||||
@ -86,6 +88,9 @@ class Text(Element):
|
|||||||
self.average_character_width = (self.average_character_width +
|
self.average_character_width = (self.average_character_width +
|
||||||
other.average_character_width)/2.0
|
other.average_character_width)/2.0
|
||||||
|
|
||||||
|
def to_html(self):
|
||||||
|
return self.raw
|
||||||
|
|
||||||
class FontSizeStats(dict):
|
class FontSizeStats(dict):
|
||||||
|
|
||||||
def __init__(self, stats):
|
def __init__(self, stats):
|
||||||
@ -108,6 +113,11 @@ class Interval(object):
|
|||||||
right = min(self.right, other.right)
|
right = min(self.right, other.right)
|
||||||
return Interval(left, right)
|
return Interval(left, right)
|
||||||
|
|
||||||
|
def centered_in(self, parent):
|
||||||
|
left = abs(self.left - parent.left)
|
||||||
|
right = abs(self.right - parent.right)
|
||||||
|
return abs(left-right) < 3
|
||||||
|
|
||||||
def __nonzero__(self):
|
def __nonzero__(self):
|
||||||
return self.width > 0
|
return self.width > 0
|
||||||
|
|
||||||
@ -146,6 +156,9 @@ class Column(object):
|
|||||||
for x in self.elements:
|
for x in self.elements:
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.elements)
|
||||||
|
|
||||||
def contains(self, elem):
|
def contains(self, elem):
|
||||||
return elem.left > self.left - self.HFUZZ*self.width and \
|
return elem.left > self.left - self.HFUZZ*self.width and \
|
||||||
elem.right < self.right + self.HFUZZ*self.width
|
elem.right < self.right + self.HFUZZ*self.width
|
||||||
@ -160,9 +173,10 @@ class Column(object):
|
|||||||
elem.indent_fraction = left_margin/self.width
|
elem.indent_fraction = left_margin/self.width
|
||||||
elem.width_fraction = elem.width/self.width
|
elem.width_fraction = elem.width/self.width
|
||||||
if i == 0:
|
if i == 0:
|
||||||
elem.top_gap = None
|
elem.top_gap_ratio = None
|
||||||
else:
|
else:
|
||||||
elem.top_gap = self.elements[i-1].bottom - elem.top
|
elem.top_gap_ratio = (self.elements[i-1].bottom -
|
||||||
|
elem.top)/self.average_line_separation
|
||||||
|
|
||||||
def previous_element(self, idx):
|
def previous_element(self, idx):
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
@ -173,12 +187,42 @@ class Column(object):
|
|||||||
class Box(list):
|
class Box(list):
|
||||||
|
|
||||||
def __init__(self, type='p'):
|
def __init__(self, type='p'):
|
||||||
self.type = type
|
self.tag = type
|
||||||
|
|
||||||
|
def to_html(self):
|
||||||
|
ans = ['<%s>'%self.tag]
|
||||||
|
for elem in self:
|
||||||
|
if isinstance(elem, int):
|
||||||
|
ans.append('<a name="page_%d"/>'%elem)
|
||||||
|
else:
|
||||||
|
ans.append(elem.to_html()+' ')
|
||||||
|
ans.append('</%s>'%self.tag)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
class ImageBox(Box):
|
||||||
|
|
||||||
|
def __init__(self, img):
|
||||||
|
Box.__init__(self)
|
||||||
|
self.img = img
|
||||||
|
|
||||||
|
def to_html(self):
|
||||||
|
ans = ['<div style="text-align:center">']
|
||||||
|
ans.append(self.img.to_html())
|
||||||
|
if len(self) > 0:
|
||||||
|
ans.append('<br/>')
|
||||||
|
for elem in self:
|
||||||
|
if isinstance(elem, int):
|
||||||
|
ans.append('<a name="page_%d"/>'%elem)
|
||||||
|
else:
|
||||||
|
ans.append(elem.to_html()+' ')
|
||||||
|
ans.append('</div>')
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
class Region(object):
|
class Region(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, opts, log):
|
||||||
|
self.opts, self.log = opts, log
|
||||||
self.columns = []
|
self.columns = []
|
||||||
self.top = self.bottom = self.left = self.right = self.width = self.height = 0
|
self.top = self.bottom = self.left = self.right = self.width = self.height = 0
|
||||||
|
|
||||||
@ -211,6 +255,40 @@ class Region(object):
|
|||||||
def is_empty(self):
|
def is_empty(self):
|
||||||
return len(self.columns) == 0
|
return len(self.columns) == 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_small(self):
|
||||||
|
max_lines = 0
|
||||||
|
for c in self.columns:
|
||||||
|
max_lines = max(max_lines, len(c))
|
||||||
|
return max_lines > 2
|
||||||
|
|
||||||
|
def absorb(self, singleton):
|
||||||
|
|
||||||
|
def most_suitable_column(elem):
|
||||||
|
mc, mw = None, 0
|
||||||
|
for c in self.columns:
|
||||||
|
i = Interval(c.left, c.right)
|
||||||
|
e = Interval(elem.left, elem.right)
|
||||||
|
w = i.intersection(e).width
|
||||||
|
if w > mw:
|
||||||
|
mc, mw = c, w
|
||||||
|
if mc is None:
|
||||||
|
self.log.warn('No suitable column for singleton',
|
||||||
|
elem.to_html())
|
||||||
|
mc = self.columns[0]
|
||||||
|
return mc
|
||||||
|
|
||||||
|
print
|
||||||
|
for c in singleton.columns:
|
||||||
|
for elem in c:
|
||||||
|
col = most_suitable_column(elem)
|
||||||
|
if self.opts.verbose > 3:
|
||||||
|
idx = self.columns.index(col)
|
||||||
|
self.log.debug(u'Absorbing singleton %s into column'%elem.to_html(),
|
||||||
|
idx)
|
||||||
|
col.add(elem)
|
||||||
|
|
||||||
|
|
||||||
def collect_stats(self):
|
def collect_stats(self):
|
||||||
for column in self.columns:
|
for column in self.columns:
|
||||||
column.collect_stats()
|
column.collect_stats()
|
||||||
@ -225,9 +303,30 @@ class Region(object):
|
|||||||
self.elements = []
|
self.elements = []
|
||||||
for x in self.columns:
|
for x in self.columns:
|
||||||
self.elements.extend(x)
|
self.elements.extend(x)
|
||||||
|
self.boxes = [Box()]
|
||||||
# Find block quotes
|
for i, elem in enumerate(self.elements):
|
||||||
indented = [i for (i, x) in enumerate(self.elements) if x.indent_fraction >= 0.2]
|
if isinstance(elem, Image):
|
||||||
|
self.boxes.append(ImageBox(elem))
|
||||||
|
img = Interval(elem.left, elem.right)
|
||||||
|
for j in range(i+1, len(self.elements)):
|
||||||
|
t = self.elements[j]
|
||||||
|
if not isinstance(t, Text):
|
||||||
|
break
|
||||||
|
ti = Interval(t.left, t.right)
|
||||||
|
if not ti.centered_in(img):
|
||||||
|
break
|
||||||
|
self.boxes[-1].append(t)
|
||||||
|
self.boxes.append(Box())
|
||||||
|
else:
|
||||||
|
is_indented = False
|
||||||
|
if i+1 < len(self.elements):
|
||||||
|
indent_diff = elem.indent_fraction - \
|
||||||
|
self.elements[i+1].indent_fraction
|
||||||
|
if indent_diff > 0.05:
|
||||||
|
is_indented = True
|
||||||
|
if elem.top_gap_ratio > 1.2 or is_indented:
|
||||||
|
self.boxes.append(Box())
|
||||||
|
self.boxes[-1].append(elem)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -313,7 +412,7 @@ class Page(object):
|
|||||||
return
|
return
|
||||||
for i, x in enumerate(self.elements):
|
for i, x in enumerate(self.elements):
|
||||||
x.idx = i
|
x.idx = i
|
||||||
current_region = Region()
|
current_region = Region(self.opts, self.log)
|
||||||
processed = set([])
|
processed = set([])
|
||||||
for x in self.elements:
|
for x in self.elements:
|
||||||
if x in processed: continue
|
if x in processed: continue
|
||||||
@ -322,12 +421,42 @@ class Page(object):
|
|||||||
processed.update(elems)
|
processed.update(elems)
|
||||||
if not current_region.contains(columns):
|
if not current_region.contains(columns):
|
||||||
self.regions.append(current_region)
|
self.regions.append(current_region)
|
||||||
current_region = Region()
|
current_region = Region(self.opts, self.log)
|
||||||
current_region.add(columns)
|
current_region.add(columns)
|
||||||
if not current_region.is_empty:
|
if not current_region.is_empty:
|
||||||
self.regions.append(current_region)
|
self.regions.append(current_region)
|
||||||
|
|
||||||
|
self.coalesce_regions()
|
||||||
|
|
||||||
|
def coalesce_regions(self):
|
||||||
|
# find contiguous sets of small regions
|
||||||
|
# absorb into a neighboring region (prefer the one with number of cols
|
||||||
|
# closer to the avg number of cols in the set, if equal use large
|
||||||
|
# region)
|
||||||
|
# merge contiguous regions that can contain each other
|
||||||
|
absorbed = set([])
|
||||||
|
found = True
|
||||||
|
while found:
|
||||||
|
found = False
|
||||||
|
for i, region in enumerate(self.regions):
|
||||||
|
if region.is_small:
|
||||||
|
found = True
|
||||||
|
regions = []
|
||||||
|
for j in range(i+1, len(self.regions)):
|
||||||
|
if self.regions[j].is_small:
|
||||||
|
regions.append(self.regions[j])
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
prev = None if i == 0 else i-1
|
||||||
|
next = j if self.regions[j] not in regions else None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sort_into_columns(self, elem, neighbors):
|
def sort_into_columns(self, elem, neighbors):
|
||||||
|
neighbors.add(elem)
|
||||||
|
neighbors = sorted(neighbors, cmp=lambda x,y:cmp(x.left, y.left))
|
||||||
|
if self.opts.verbose > 3:
|
||||||
|
self.log.debug('Neighbors:', [x.to_html() for x in neighbors])
|
||||||
columns = [Column()]
|
columns = [Column()]
|
||||||
columns[0].add(elem)
|
columns[0].add(elem)
|
||||||
for x in neighbors:
|
for x in neighbors:
|
||||||
@ -393,6 +522,9 @@ class PDFDocument(object):
|
|||||||
page.first_pass()
|
page.first_pass()
|
||||||
page.second_pass()
|
page.second_pass()
|
||||||
|
|
||||||
|
self.linearize()
|
||||||
|
self.render()
|
||||||
|
|
||||||
def collect_font_statistics(self):
|
def collect_font_statistics(self):
|
||||||
self.font_size_stats = {}
|
self.font_size_stats = {}
|
||||||
for p in self.pages:
|
for p in self.pages:
|
||||||
@ -404,5 +536,43 @@ class PDFDocument(object):
|
|||||||
|
|
||||||
self.font_size_stats = FontSizeStats(self.font_size_stats)
|
self.font_size_stats = FontSizeStats(self.font_size_stats)
|
||||||
|
|
||||||
|
def linearize(self):
|
||||||
|
self.elements = []
|
||||||
|
last_region = last_block = None
|
||||||
|
for page in self.pages:
|
||||||
|
page_number_inserted = False
|
||||||
|
for region in page.regions:
|
||||||
|
merge_first_block = last_region is not None and \
|
||||||
|
len(last_region.columns) == len(region.columns) and \
|
||||||
|
not hasattr(last_block, 'img')
|
||||||
|
for i, block in enumerate(region.boxes):
|
||||||
|
if merge_first_block:
|
||||||
|
merge_first_block = False
|
||||||
|
if not page_number_inserted:
|
||||||
|
last_block.append(page.number)
|
||||||
|
page_number_inserted = True
|
||||||
|
for elem in block:
|
||||||
|
last_block.append(elem)
|
||||||
|
else:
|
||||||
|
if not page_number_inserted:
|
||||||
|
block.insert(0, page.number)
|
||||||
|
page_number_inserted = True
|
||||||
|
self.elements.append(block)
|
||||||
|
last_block = block
|
||||||
|
last_region = region
|
||||||
|
|
||||||
|
|
||||||
|
def render(self):
|
||||||
|
html = ['<?xml version="1.0" encoding="UTF-8"?>',
|
||||||
|
'<html xmlns="http://www.w3.org/1999/xhtml">', '<head>',
|
||||||
|
'<title>PDF Reflow conversion</title>', '</head>', '<body>',
|
||||||
|
'<div>']
|
||||||
|
for elem in self.elements:
|
||||||
|
html.extend(elem.to_html())
|
||||||
|
html += ['</body>', '</html>']
|
||||||
|
with open('index.html', 'wb') as f:
|
||||||
|
f.write((u'\n'.join(html)).encode('utf-8'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -195,9 +195,9 @@ class RTFInput(InputFormatPlugin):
|
|||||||
fname = self.preprocess(stream.name)
|
fname = self.preprocess(stream.name)
|
||||||
try:
|
try:
|
||||||
xml = self.generate_xml(fname)
|
xml = self.generate_xml(fname)
|
||||||
except RtfInvalidCodeException:
|
except RtfInvalidCodeException, e:
|
||||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||||
'support. Convert it to HTML first and then try it.'))
|
'support. Convert it to HTML first and then try it.\n%s')%e)
|
||||||
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
|
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
|
||||||
if d:
|
if d:
|
||||||
imap = {}
|
imap = {}
|
||||||
|
@ -37,7 +37,7 @@ class SaveTemplate(QWidget, Ui_Form):
|
|||||||
tmpl = preprocess_template(self.opt_template.text())
|
tmpl = preprocess_template(self.opt_template.text())
|
||||||
fa = {}
|
fa = {}
|
||||||
for x in FORMAT_ARG_DESCS.keys():
|
for x in FORMAT_ARG_DESCS.keys():
|
||||||
fa[x]=''
|
fa[x]='random long string'
|
||||||
try:
|
try:
|
||||||
tmpl.format(**fa)
|
tmpl.format(**fa)
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
|
@ -122,6 +122,21 @@ class ImageMagick(object):
|
|||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
finalize()
|
finalize()
|
||||||
|
|
||||||
|
def remove_transparency(wand, background_color='white'):
|
||||||
|
'''
|
||||||
|
Converts transparent pixels to the specified background color.
|
||||||
|
Returns a new magick wand with the opaque image.
|
||||||
|
'''
|
||||||
|
nw = NewMagickWand()
|
||||||
|
pw = NewPixelWand()
|
||||||
|
if nw < 0 or pw < 0:
|
||||||
|
raise RuntimeError('Out of memory')
|
||||||
|
PixelSetColor(pw, background_color)
|
||||||
|
MagickNewImage(nw, MagickGetImageWidth(wand), MagickGetImageHeight(wand),
|
||||||
|
pw)
|
||||||
|
MagickCompositeImage(nw, wand, OverCompositeOp, 0, 0)
|
||||||
|
DestroyPixelWand(pw)
|
||||||
|
return nw
|
||||||
|
|
||||||
class MetricType(ctypes.c_int): pass
|
class MetricType(ctypes.c_int): pass
|
||||||
UndefinedMetric = MetricType(0)
|
UndefinedMetric = MetricType(0)
|
||||||
@ -730,6 +745,32 @@ class MagickStatusType(ctypes.c_void_p): pass
|
|||||||
class MagickInfo(ctypes.c_void_p): pass
|
class MagickInfo(ctypes.c_void_p): pass
|
||||||
class MagickWand(ctypes.c_void_p): pass
|
class MagickWand(ctypes.c_void_p): pass
|
||||||
|
|
||||||
|
# NewPixelWand
|
||||||
|
try:
|
||||||
|
_magick.NewPixelWand.restype = PixelWand
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
NewPixelWand = _magick.NewPixelWand
|
||||||
|
|
||||||
|
# MagickSetImageOpacity
|
||||||
|
try:
|
||||||
|
_magick.MagickSetImageOpacity.argtypes = (MagickWand, ctypes.c_double)
|
||||||
|
_magick.restype = MagickBooleanType
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
MagickSetImageOpacity = _magick.MagickSetImageOpacity
|
||||||
|
|
||||||
|
# MagickMergeImageLayers
|
||||||
|
try:
|
||||||
|
_magick.MagickMergeImageLayers.argtypes = (MagickWand, ImageLayerMethod)
|
||||||
|
_magick.MagickMergeImageLayers.restype = MagickWand
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
MagickMergeImageLayers = _magick.MagickMergeImageLayers
|
||||||
|
|
||||||
# MagickSetLastIterator
|
# MagickSetLastIterator
|
||||||
try:
|
try:
|
||||||
_magick.MagickSetLastIterator.restype = None
|
_magick.MagickSetLastIterator.restype = None
|
||||||
|
@ -272,6 +272,10 @@ class BasicNewsRecipe(Recipe):
|
|||||||
}
|
}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
#: By default, calibre will use a default image for the masthead (Kindle only).
|
||||||
|
#: Override this in your recipe to provide a url to use as a masthead.
|
||||||
|
masthead_url = None
|
||||||
|
|
||||||
#: Set to a non empty string to disable this recipe
|
#: Set to a non empty string to disable this recipe
|
||||||
#: The string will be used as the disabled message
|
#: The string will be used as the disabled message
|
||||||
recipe_disabled = None
|
recipe_disabled = None
|
||||||
@ -434,7 +438,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if not isinstance(_raw, unicode) and self.encoding:
|
if not isinstance(_raw, unicode) and self.encoding:
|
||||||
_raw = _raw.decode(self.encoding, 'replace')
|
_raw = _raw.decode(self.encoding, 'replace')
|
||||||
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||||
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
|
||||||
|
massage.append((re.compile(r'&(\S+?);'), lambda match:
|
||||||
|
entity_to_unicode(match, encoding=enc)))
|
||||||
return BeautifulSoup(_raw, markupMassage=massage)
|
return BeautifulSoup(_raw, markupMassage=massage)
|
||||||
|
|
||||||
|
|
||||||
@ -749,8 +755,12 @@ class BasicNewsRecipe(Recipe):
|
|||||||
|
|
||||||
self.report_progress(0, _('Trying to download cover...'))
|
self.report_progress(0, _('Trying to download cover...'))
|
||||||
self.download_cover()
|
self.download_cover()
|
||||||
self.report_progress(0, _('Trying to download masthead...'))
|
self.report_progress(0, _('Generating masthead...'))
|
||||||
|
if self.get_masthead_url():
|
||||||
self.download_masthead()
|
self.download_masthead()
|
||||||
|
else:
|
||||||
|
mpath = os.path.join(self.output_dir, 'mastheadImage.jpg')
|
||||||
|
self.default_masthead_image(mpath)
|
||||||
|
|
||||||
if self.test:
|
if self.test:
|
||||||
feeds = feeds[:2]
|
feeds = feeds[:2]
|
||||||
@ -868,6 +878,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
self.log.exception('Failed to download cover')
|
self.log.exception('Failed to download cover')
|
||||||
self.cover_path = None
|
self.cover_path = None
|
||||||
|
|
||||||
|
'''
|
||||||
def convert_image(self, name):
|
def convert_image(self, name):
|
||||||
image_ext = name.rpartition('.')[2].lower()
|
image_ext = name.rpartition('.')[2].lower()
|
||||||
if image_ext in ['jpg','jpeg']:
|
if image_ext in ['jpg','jpeg']:
|
||||||
@ -884,9 +895,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
p.MagickWriteImage(img, name)
|
p.MagickWriteImage(img, name)
|
||||||
p.DestroyMagickWand(img)
|
p.DestroyMagickWand(img)
|
||||||
return name
|
return name
|
||||||
|
'''
|
||||||
|
|
||||||
def _download_masthead(self):
|
def _download_masthead(self):
|
||||||
self.masthead_path = None
|
|
||||||
try:
|
try:
|
||||||
mu = self.get_masthead_url()
|
mu = self.get_masthead_url()
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
@ -899,6 +910,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
ext = ''
|
ext = ''
|
||||||
ext = ext.lower() if ext else 'jpg'
|
ext = ext.lower() if ext else 'jpg'
|
||||||
mpath = os.path.join(self.output_dir, 'mastheadImage.'+ext)
|
mpath = os.path.join(self.output_dir, 'mastheadImage.'+ext)
|
||||||
|
outfile = mpath.rpartition('.')[0] + '.jpg'
|
||||||
if os.access(mu, os.R_OK):
|
if os.access(mu, os.R_OK):
|
||||||
with open(mpath, 'wb') as mfile:
|
with open(mpath, 'wb') as mfile:
|
||||||
mfile.write(open(mu, 'rb').read())
|
mfile.write(open(mu, 'rb').read())
|
||||||
@ -906,7 +918,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
self.report_progress(1, _('Downloading masthead from %s')%mu)
|
self.report_progress(1, _('Downloading masthead from %s')%mu)
|
||||||
with nested(open(mpath, 'wb'), closing(self.browser.open(mu))) as (mfile, r):
|
with nested(open(mpath, 'wb'), closing(self.browser.open(mu))) as (mfile, r):
|
||||||
mfile.write(r.read())
|
mfile.write(r.read())
|
||||||
self.masthead_path = self.convert_image(mpath)
|
self.masthead_path = self.prepare_masthead_image(mpath,outfile)
|
||||||
|
|
||||||
|
|
||||||
def download_masthead(self):
|
def download_masthead(self):
|
||||||
@ -914,7 +926,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
self._download_masthead()
|
self._download_masthead()
|
||||||
except:
|
except:
|
||||||
self.log.exception('Failed to download masthead')
|
self.log.exception('Failed to download masthead')
|
||||||
self.masthead_path = None
|
|
||||||
|
|
||||||
def default_cover(self, cover_file):
|
def default_cover(self, cover_file):
|
||||||
'''
|
'''
|
||||||
@ -979,6 +991,71 @@ class BasicNewsRecipe(Recipe):
|
|||||||
cover_file.flush()
|
cover_file.flush()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def get_masthead_title(self):
|
||||||
|
'Override in subclass to use something other than the recipe title'
|
||||||
|
return self.title
|
||||||
|
|
||||||
|
def default_masthead_image(self, out_path):
|
||||||
|
try:
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
Image, ImageDraw, ImageFont
|
||||||
|
except ImportError:
|
||||||
|
import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
|
||||||
|
img = Image.new('RGB', (600, 100), 'white')
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
font = ImageFont.truetype(P('fonts/liberation/LiberationSerif-Bold.ttf'), 48)
|
||||||
|
text = self.get_masthead_title().encode('utf-8')
|
||||||
|
width, height = draw.textsize(text, font=font)
|
||||||
|
left = max(int((600 - width)/2.), 0)
|
||||||
|
top = max(int((100 - height)/2.), 0)
|
||||||
|
draw.text((left, top), text, fill=(0,0,0), font=font)
|
||||||
|
img.save(open(out_path, 'wb'), 'JPEG')
|
||||||
|
|
||||||
|
def prepare_masthead_image(self, path_to_image, out_path):
|
||||||
|
import calibre.utils.PythonMagickWand as pw
|
||||||
|
from ctypes import byref
|
||||||
|
from calibre import fit_image
|
||||||
|
|
||||||
|
with pw.ImageMagick():
|
||||||
|
img = pw.NewMagickWand()
|
||||||
|
img2 = pw.NewMagickWand()
|
||||||
|
frame = pw.NewMagickWand()
|
||||||
|
p = pw.NewPixelWand()
|
||||||
|
if img < 0 or img2 < 0 or p < 0 or frame < 0:
|
||||||
|
raise RuntimeError('Out of memory')
|
||||||
|
if not pw.MagickReadImage(img, path_to_image):
|
||||||
|
severity = pw.ExceptionType(0)
|
||||||
|
msg = pw.MagickGetException(img, byref(severity))
|
||||||
|
raise IOError('Failed to read image from: %s: %s'
|
||||||
|
%(path_to_image, msg))
|
||||||
|
pw.PixelSetColor(p, 'white')
|
||||||
|
width, height = pw.MagickGetImageWidth(img),pw.MagickGetImageHeight(img)
|
||||||
|
scaled, nwidth, nheight = fit_image(width, height, 600, 100)
|
||||||
|
if not pw.MagickNewImage(img2, width, height, p):
|
||||||
|
raise RuntimeError('Out of memory')
|
||||||
|
if not pw.MagickNewImage(frame, 600, 100, p):
|
||||||
|
raise RuntimeError('Out of memory')
|
||||||
|
if not pw.MagickCompositeImage(img2, img, pw.OverCompositeOp, 0, 0):
|
||||||
|
raise RuntimeError('Out of memory')
|
||||||
|
if scaled:
|
||||||
|
if not pw.MagickResizeImage(img2, nwidth, nheight, pw.LanczosFilter,
|
||||||
|
0.5):
|
||||||
|
raise RuntimeError('Out of memory')
|
||||||
|
left = int((600 - nwidth)/2.0)
|
||||||
|
top = int((100 - nheight)/2.0)
|
||||||
|
if not pw.MagickCompositeImage(frame, img2, pw.OverCompositeOp,
|
||||||
|
left, top):
|
||||||
|
raise RuntimeError('Out of memory')
|
||||||
|
if not pw.MagickWriteImage(frame, out_path):
|
||||||
|
raise RuntimeError('Failed to save image to %s'%out_path)
|
||||||
|
|
||||||
|
pw.DestroyPixelWand(p)
|
||||||
|
for x in (img, img2, frame):
|
||||||
|
pw.DestroyMagickWand(x)
|
||||||
|
|
||||||
|
return out_path
|
||||||
|
|
||||||
def create_opf(self, feeds, dir=None):
|
def create_opf(self, feeds, dir=None):
|
||||||
if dir is None:
|
if dir is None:
|
||||||
@ -1020,7 +1097,6 @@ class BasicNewsRecipe(Recipe):
|
|||||||
|
|
||||||
# Get masthead
|
# Get masthead
|
||||||
mpath = getattr(self, 'masthead_path', None)
|
mpath = getattr(self, 'masthead_path', None)
|
||||||
print "\ncreate_opf(): masthead: %s\n" % mpath
|
|
||||||
if mpath is not None and os.access(mpath, os.R_OK):
|
if mpath is not None and os.access(mpath, os.R_OK):
|
||||||
manifest.append(mpath)
|
manifest.append(mpath)
|
||||||
opf.manifest = mpath
|
opf.manifest = mpath
|
||||||
@ -1032,7 +1108,6 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if mani.path.endswith('mastheadImage.jpg'):
|
if mani.path.endswith('mastheadImage.jpg'):
|
||||||
mani.id = 'masthead-image'
|
mani.id = 'masthead-image'
|
||||||
|
|
||||||
|
|
||||||
entries = ['index.html']
|
entries = ['index.html']
|
||||||
toc = TOC(base_path=dir)
|
toc = TOC(base_path=dir)
|
||||||
self.play_order_counter = 0
|
self.play_order_counter = 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user