Remove grayscaling of images from varios old recipes

This commit is contained in:
Kovid Goyal 2016-05-12 07:45:03 +05:30
parent 3285acd4d9
commit fc12ac73d8
14 changed files with 12 additions and 344 deletions

View File

@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'
import datetime, re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
from calibre.utils.magick import Image
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
now = datetime.datetime.now()
@ -115,15 +114,3 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
meta_idx = meta.parent.contents.index(meta)
meta.parent.insert(meta_idx + 1, tag)
# Starson17 'Convert Images to Grayscale'
def postprocess_html(self, soup, first):
# process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -153,18 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)

View File

@ -1,7 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1316245412(BasicNewsRecipe):
#from calibre.utils.magick import Image, PixelWand
title = u'Cicero Online'
description = u'Magazin f\xfcr politische Kultur (RSS Version)'
publisher = 'Ringier Publishing GmbH'

View File

@ -153,18 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)

View File

@ -49,7 +49,6 @@ class CanWestPaper(BasicNewsRecipe):
(u'Health',u'/health/index.html')
]
# un-comment the following six lines for the Vancouver Province
# title = u'Vancouver Province'
# url_prefix = 'http://www.theprovince.com'
@ -120,7 +119,8 @@ class CanWestPaper(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':re.compile('story')})]
remove_tags = [{'class':'comments'},{'class':'comment-intro'},{'class':'storytab'},
dict(name='div', attrs={'class':'section_title'}),dict(name='div', attrs={'class':'sharebar'}),dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='div', attrs={'class':'section_title'}),dict(name='div', attrs={'class':'sharebar'}),dict(
name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
@ -130,7 +130,6 @@ class CanWestPaper(BasicNewsRecipe):
dict(name='div', attrs={'id':re.compile('flyer')}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
@ -153,18 +152,6 @@ class CanWestPaper(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
@ -211,16 +198,15 @@ class CanWestPaper(BasicNewsRecipe):
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
# delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
@ -245,14 +231,11 @@ class CanWestPaper(BasicNewsRecipe):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self):
articles = {}
ans = []
def handle_article(adiv,key):
if adiv.name=='h1' or adiv.name=='h3':
h1tag = adiv
@ -301,7 +284,7 @@ class CanWestPaper(BasicNewsRecipe):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
print("Section: "+key+' NOT FOUND');
print("Section: "+key+' NOT FOUND')
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
@ -309,9 +292,9 @@ class CanWestPaper(BasicNewsRecipe):
if footer is not None:
footer.extract()
for wdiv in mainsoup.findAll(attrs={'class':['genericfeature']}):
wdiv.extract()
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'class':['headline','featurecontent']}):
handle_article(wdiv,key)
handle_article(wdiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)

View File

@ -22,7 +22,6 @@ microwavejournal.com
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.magick import Image
class MWJournal(BasicNewsRecipe):
@ -127,15 +126,3 @@ class MWJournal(BasicNewsRecipe):
feeds.append((section_title, articles))
return feeds
def postprocess_html(self, soup, first):
if self.Convert_Grayscale:
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -4,11 +4,7 @@ __copyright__ = '2010, Hans Donner <hans.donner at pobox.com>'
www.standardmedia.co.ke
'''
import os
from calibre import strftime, __appname__, __version__
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.constants import preferred_encoding
from calibre.utils.magick import Image
class NationKeRecipe(BasicNewsRecipe):
@ -22,8 +18,7 @@ class NationKeRecipe(BasicNewsRecipe):
publisher = 'nation.co.ke'
category = 'news, politics, Kenia'
cover_img_url = 'http://www.nation.co.ke/image/view/-/465228/medRes/33884/-/maxh/85/-/12e8pptz/-/Sunday_Logo.gif'
masthead_url = cover_img_url
masthead_url = 'http://www.nation.co.ke/image/view/-/465228/medRes/33884/-/maxh/85/-/12e8pptz/-/Sunday_Logo.gif'
max_articles_per_feed = 200
oldest_article = 2
@ -52,7 +47,6 @@ class NationKeRecipe(BasicNewsRecipe):
, 'language' : language
}
def print_version(self, url):
from calibre.ebooks.BeautifulSoup import BeautifulSoup
soup = BeautifulSoup(self.browser.open(url).read())
@ -65,92 +59,3 @@ class NationKeRecipe(BasicNewsRecipe):
def preprocess_html(self, soup):
return self.adeify_images(soup)
def get_cover_img_url(self):
return getattr(self, 'cover_img_url', None)
def _download_cover_img(self):
# hack to reuse download_cover
old_cu = None
try:
old_cu = self.get_cover_ur()
except:
pass
new_cu = self.get_cover_img_url()
self.cover_url = new_cu
self._download_cover()
outfile = os.path.join(self.output_dir, 'cover_img.jpg')
self.prepare_masthead_image(self.cover_path, outfile)
self.cover_url = old_cu
self.cover_img_path = outfile
def download_cover_img(self):
try:
self._download_cover_img()
self.report_progress(1, _('Downloaded cover to %s') % self.cover_img_path)
except:
self.log.exception('Failed to download cover img')
self.cover_img_path = None
def prepare_cover_image(self, path_to_image, out_path):
img = Image()
img.open(path_to_image)
img.save(out_path)
def default_cover(self, cover_file):
'''
Create a generic cover for recipes that have a special cover img
'''
try:
try:
from PIL import Image, ImageDraw, ImageFont
Image, ImageDraw, ImageFont
except ImportError:
import Image, ImageDraw, ImageFont
font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
title = self.title if isinstance(self.title, unicode) else \
self.title.decode(preferred_encoding, 'replace')
date = strftime(self.timefmt)
app = '['+__appname__ +' '+__version__+']'
COVER_WIDTH, COVER_HEIGHT = 590, 750
img = Image.new('RGB', (COVER_WIDTH, COVER_HEIGHT), 'white')
draw = ImageDraw.Draw(img)
# Title
font = ImageFont.truetype(font_path, 44)
width, height = draw.textsize(title, font=font)
left = max(int((COVER_WIDTH - width)/2.), 0)
top = 15
draw.text((left, top), title, fill=(0,0,0), font=font)
bottom = top + height
# Date
font = ImageFont.truetype(font_path, 32)
width, height = draw.textsize(date, font=font)
left = max(int((COVER_WIDTH - width)/2.), 0)
draw.text((left, bottom+15), date, fill=(0,0,0), font=font)
# Vanity
font = ImageFont.truetype(font_path, 28)
width, height = draw.textsize(app, font=font)
left = max(int((COVER_WIDTH - width)/2.), 0)
top = COVER_HEIGHT - height - 15
draw.text((left, top), app, fill=(0,0,0), font=font)
# Logo
logo_file = I('library.png')
self.download_cover_img()
if getattr(self, 'cover_img_path', None) is not None:
logo_file = self.cover_img_path
self.report_progress(1, _('using cover img from %s') % logo_file)
logo = Image.open(logo_file, 'r')
width, height = logo.size
left = max(int((COVER_WIDTH - width)/2.), 0)
top = max(int((COVER_HEIGHT - height)/2.), 0)
img.paste(logo, (left, top))
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
img.convert('RGB').save(cover_file, 'JPEG')
cover_file.flush()
except Exception, e:
self.log.exception('Failed to generate default cover ', e)
return False
return True

View File

@ -26,7 +26,6 @@ newscientist.com
'''
import re
from calibre.utils.magick import Image
from calibre.web.feeds.news import BasicNewsRecipe
class NewScientist(BasicNewsRecipe):
@ -131,16 +130,3 @@ class NewScientist(BasicNewsRecipe):
self.conversion_options.update({'series_index':nr})
return cover_url
# Converts images to Gray Scale
def postprocess_html(self, soup, first):
if self.Convert_Grayscale:
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -153,18 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)

View File

@ -4,7 +4,6 @@ __license__ = 'GPL v3'
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.magick import Image
class sportowefakty(BasicNewsRecipe):
title = u'SportoweFakty'
@ -20,10 +19,10 @@ class sportowefakty(BasicNewsRecipe):
no_stylesheets=True
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(attrs = {'class' : 'box-article'})]
keep_only_tags = [dict(attrs={'class' : 'box-article'})]
remove_tags =[]
remove_tags.append(dict(attrs = {'class' : re.compile(r'^newsStream')}))
remove_tags.append(dict(attrs = {'target' : '_blank'}))
remove_tags.append(dict(attrs={'class' : re.compile(r'^newsStream')}))
remove_tags.append(dict(attrs={'target' : '_blank'}))
feeds = [
(u'Piłka Nożna', u'http://www.sportowefakty.pl/pilka-nozna/index.rss'),
@ -58,13 +57,3 @@ class sportowefakty(BasicNewsRecipe):
alink.replaceWith(tstr)
return soup
def postprocess_html(self, soup, first):
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -4,12 +4,7 @@ __copyright__ = '2010, Hans Donner <hans.donner at pobox.com>'
www.standardmedia.co.ke
'''
import os
from calibre import strftime, __appname__, __version__
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.constants import preferred_encoding
from calibre.utils.magick import Image
class StandardMediaKeRecipe(BasicNewsRecipe):
@ -23,8 +18,7 @@ class StandardMediaKeRecipe(BasicNewsRecipe):
publisher = 'standardmedia.co.ke'
category = 'news, politics, Kenia'
cover_img_url = 'http://www.standardmedia.co.ke/images/easLogoOther.gif'
masthead_url = cover_img_url
masthead_url = 'http://www.standardmedia.co.ke/images/easLogoOther.gif'
max_articles_per_feed = 200
oldest_article = 3
@ -58,92 +52,3 @@ class StandardMediaKeRecipe(BasicNewsRecipe):
def preprocess_html(self, soup):
return self.adeify_images(soup)
def get_cover_img_url(self):
return getattr(self, 'cover_img_url', None)
def _download_cover_img(self):
# hack to reuse download_cover
old_cu = None
try:
old_cu = self.get_cover_ur()
except:
pass
new_cu = self.get_cover_img_url()
self.cover_url = new_cu
self._download_cover()
outfile = os.path.join(self.output_dir, 'cover_img.jpg')
self.prepare_masthead_image(self.cover_path, outfile)
self.cover_url = old_cu
self.cover_img_path = outfile
def download_cover_img(self):
try:
self._download_cover_img()
self.report_progress(1, _('Downloaded cover to %s') % self.cover_img_path)
except:
self.log.exception('Failed to download cover img')
self.cover_img_path = None
def prepare_cover_image(self, path_to_image, out_path):
img = Image()
img.open(path_to_image)
img.save(out_path)
def default_cover(self, cover_file):
'''
Create a generic cover for recipes that have a special cover img
'''
try:
try:
from PIL import Image, ImageDraw, ImageFont
Image, ImageDraw, ImageFont
except ImportError:
import Image, ImageDraw, ImageFont
font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
title = self.title if isinstance(self.title, unicode) else \
self.title.decode(preferred_encoding, 'replace')
date = strftime(self.timefmt)
app = '['+__appname__ +' '+__version__+']'
COVER_WIDTH, COVER_HEIGHT = 590, 750
img = Image.new('RGB', (COVER_WIDTH, COVER_HEIGHT), 'white')
draw = ImageDraw.Draw(img)
# Title
font = ImageFont.truetype(font_path, 44)
width, height = draw.textsize(title, font=font)
left = max(int((COVER_WIDTH - width)/2.), 0)
top = 15
draw.text((left, top), title, fill=(0,0,0), font=font)
bottom = top + height
# Date
font = ImageFont.truetype(font_path, 32)
width, height = draw.textsize(date, font=font)
left = max(int((COVER_WIDTH - width)/2.), 0)
draw.text((left, bottom+15), date, fill=(0,0,0), font=font)
# Vanity
font = ImageFont.truetype(font_path, 28)
width, height = draw.textsize(app, font=font)
left = max(int((COVER_WIDTH - width)/2.), 0)
top = COVER_HEIGHT - height - 15
draw.text((left, top), app, fill=(0,0,0), font=font)
# Logo
logo_file = I('library.png')
self.download_cover_img()
if getattr(self, 'cover_img_path', None) is not None:
logo_file = self.cover_img_path
self.report_progress(1, _('using cover img from %s') % logo_file)
logo = Image.open(logo_file, 'r')
width, height = logo.size
left = max(int((COVER_WIDTH - width)/2.), 0)
top = max(int((COVER_HEIGHT - height)/2.), 0)
img.paste(logo, (left, top))
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE)
img.convert('RGB').save(cover_file, 'JPEG')
cover_file.flush()
except Exception, e:
self.log.exception('Failed to generate default cover ', e)
return False
return True

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.magick import Image
class tvn24(BasicNewsRecipe):
title = u'TVN24'
oldest_article = 7
@ -37,14 +36,3 @@ class tvn24(BasicNewsRecipe):
alink.replaceWith(tstr)
return soup
def postprocess_html(self, soup, first):
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -154,18 +154,6 @@ class CanWestPaper(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)

View File

@ -61,7 +61,6 @@ class TimesColonist(BasicNewsRecipe):
masthead_url = 'http://www.timescolonist.com/gmg/img/global/logoTimesColonist.png'
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
@ -112,18 +111,6 @@ class TimesColonist(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)