#!/usr/bin/env python
##
# Title: Microwave Journal
# Contact: Kiavash (use Mobile Read)
##
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
# Copyright: Kiavash
##
# Written: Jan 2012
# Last Edited: Feb 2012
##
# Feb 2012: New Recipe compatible with the MWJournal 2.0 website
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
__copyright__ = 'Kiavash'
__author__ = 'Kaivash'
'''
microwavejournal.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MWJournal(BasicNewsRecipe):
title = u'Microwave Journal'
description = u'Microwave Journal Monthly Magazine'
publisher = 'Horizon House'
publication_type = 'magazine'
INDEX = 'http://www.microwavejournal.com/publications/'
language = 'en'
timeout = 30
Convert_Grayscale = False # Convert images to gray scale or not
keep_only_tags = [dict(name='div', attrs={'class': 'record'})]
no_stylesheets = True
remove_javascript = True
remove_tags = [
dict(name='font', attrs={'class': 'footer'}), # remove fonts
]
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
.introduction, .first { font-weight: bold; } \
.cross-head { font-weight: bold; font-size: 125%; } \
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
font-size: 80%; font-style: italic; margin: 1px auto; } \
.story-date, .published { font-size: 80%; } \
table { width: 100%; } \
td img { display: block; margin: 5px auto; } \
ul { padding-top: 10px; } \
ol { padding-top: 10px; } \
li { padding-top: 5px; padding-bottom: 5px; } \
h1 { font-size: 175%; font-weight: bold; } \
h2 { font-size: 150%; font-weight: bold; } \
h3 { font-size: 125%; font-weight: bold; } \
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
# Remove the line breaks, href links and float left/right and picture
# width/height.
preprocess_regexps = [(re.compile(r'
', re.IGNORECASE), lambda m: ''),
(re.compile(r'
',
re.IGNORECASE), lambda m: ''),
(re.compile(r''), lambda h1: ''),
(re.compile(r''), lambda h2: ''),
(re.compile(r'float:.*?'), lambda h3: ''),
(re.compile(r'width:.*?px'), lambda h4: ''),
(re.compile(r'height:.*?px'), lambda h5: '')
]
def print_version(self, url):
return url.replace('/articles/', '/articles/print/')
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
ts = soup.find(
'div', attrs={'class': 'box1 article publications-show'})
ds = self.tag_to_string(ts.find('h2'))
self.log('Found Current Issue:', ds)
self.timefmt = ' [%s]' % ds
cover = ts.find('img', src=True)
if cover is not None:
self.cover_url = 'http://www.microwavejournal.com' + cover['src']
self.log('Found Cover image:', self.cover_url)
feeds = []
seen_titles = set() # This is used to remove duplicant articles
sections = soup.find('div', attrs={'class': 'box2 publication'})
for section in sections.findAll('div', attrs={'class': 'records'}):
section_title = self.tag_to_string(section.find('h3'))
self.log('Found section:', section_title)
articles = []
for post in section.findAll('div', attrs={'class': 'record'}):
h = post.find('h2')
title = self.tag_to_string(h)
# Let's get rid of the useless Puzzler!
if title.find('The MWJ Puzzler') >= 0:
continue
if title in seen_titles:
continue
seen_titles.add(title)
a = post.find('a', href=True)
url = a['href']
if url.startswith('/'):
url = 'http://www.microwavejournal.com' + url
abstract = post.find('div', attrs={'class': 'abstract'})
p = abstract.find('p')
desc = None
self.log('\tFound article:', title, 'at', url)
if p is not None:
desc = self.tag_to_string(p)
self.log('\t\t', desc)
articles.append({'title': title, 'url': url, 'description': desc,
'date': self.timefmt})
if articles:
feeds.append((section_title, articles))
return feeds