mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
New recipe for The Economist that does not require a subscription
This commit is contained in:
parent
152738b691
commit
c1c030a386
68
resources/recipes/economist_free.recipe
Normal file
68
resources/recipes/economist_free.recipe
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
class Economist(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'The Economist (free)'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
__author__ = "Kovid Goyal"
|
||||||
|
description = ('Global news and current affairs from a European perspective.'
|
||||||
|
' Much slower than the subscription based version.')
|
||||||
|
|
||||||
|
oldest_article = 6.5
|
||||||
|
cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg'
|
||||||
|
remove_tags = [dict(name=['script', 'noscript', 'title'])]
|
||||||
|
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
from calibre.web.feeds.feedparser import parse
|
||||||
|
raw = self.index_to_soup(
|
||||||
|
'http://feeds.feedburner.com/economist/full_print_edition',
|
||||||
|
raw=True)
|
||||||
|
entries = parse(raw).entries
|
||||||
|
feeds = {}
|
||||||
|
for i, item in enumerate(entries):
|
||||||
|
from calibre.web.feeds import Article
|
||||||
|
published = time.gmtime(item.get('timestamp', time.time()))
|
||||||
|
title = item.get('title', _('Untitled article'))
|
||||||
|
link = item.get('link', None)
|
||||||
|
description = item.get('description', '')
|
||||||
|
author = item.get('author', '')
|
||||||
|
|
||||||
|
try:
|
||||||
|
feedtitle, link = self.process_eco_feed_article(link)
|
||||||
|
self.log('Found print version for article:', title)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to process article:', title)
|
||||||
|
continue
|
||||||
|
|
||||||
|
a = Article(i, title, link, author, description, published, '')
|
||||||
|
delta = datetime.utcnow() - a.utctime
|
||||||
|
if delta.days*24*3600 + delta.seconds > 24*3600*self.oldest_article:
|
||||||
|
self.log.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, a.localtime.strftime('%a, %d %b, %Y %H:%M'), title))
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
article = dict(title=a.title, description=a.text_summary,
|
||||||
|
date=time.strftime(self.timefmt, a.date), author=a.author, url=a.url)
|
||||||
|
if feedtitle not in feeds:
|
||||||
|
feeds[feedtitle] = []
|
||||||
|
feeds[feedtitle].append(article)
|
||||||
|
return [(t, a) for t, a in feeds.items()]
|
||||||
|
|
||||||
|
def process_eco_feed_article(self, url):
|
||||||
|
ret = self.browser.open(url)
|
||||||
|
raw = ret.read()
|
||||||
|
url = self.browser.geturl().replace('displaystory', 'PrinterFriendly').strip()
|
||||||
|
root = html.fromstring(raw)
|
||||||
|
matches = root.xpath('//*[@class = "article-section"]')
|
||||||
|
feedtitle = 'Miscellaneous'
|
||||||
|
if matches:
|
||||||
|
feedtitle = html.tostring(matches[0], method='text',
|
||||||
|
encoding=unicode)
|
||||||
|
return feedtitle, url
|
||||||
|
|
||||||
|
|
83
src/calibre/utils/libwmf.c
Normal file
83
src/calibre/utils/libwmf.c
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
#include <libwmf/api.h>
|
||||||
|
#include <libwmf/svg.h>
|
||||||
|
|
||||||
|
#define False 0
|
||||||
|
#define True 1
|
||||||
|
typedef int bool;
|
||||||
|
|
||||||
|
bool create_api(wmfAPI** API) {
|
||||||
|
wmfAPI_Options options;
|
||||||
|
wmf_error_t error;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
flags = WMF_OPT_FUNCTION;
|
||||||
|
flags |= WMF_OPT_IGNORE_NONFATAL;
|
||||||
|
|
||||||
|
options.function = wmf_svg_function;
|
||||||
|
error = wmf_api_create (API, flags, &options);
|
||||||
|
if (error != wmf_E_None) {
|
||||||
|
wmf_api_destroy (*API);
|
||||||
|
return False;
|
||||||
|
}
|
||||||
|
return True;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool load_image(wmfAPI *API, const char *path) {
|
||||||
|
wmf_error_t error;
|
||||||
|
|
||||||
|
error = wmf_file_open(API, path);
|
||||||
|
if (error != wmf_E_None) {
|
||||||
|
wmf_api_destroy (API);
|
||||||
|
return False;
|
||||||
|
}
|
||||||
|
return True;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool scan_image(wmfAPI *API, wmfD_Rect *bbox) {
|
||||||
|
wmf_error_t error;
|
||||||
|
|
||||||
|
error = wmf_scan (API, 0, bbox);
|
||||||
|
if (error != wmf_E_None) {
|
||||||
|
wmf_api_destroy (API);
|
||||||
|
return False;
|
||||||
|
}
|
||||||
|
return True;
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_image_size(wmfD_Rect *bbox, float *width, float *height) {
|
||||||
|
*width = bbox->BR.x - bbox->TL.x;
|
||||||
|
*height = bbox->BR.y - bbox->TL.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
wmfAPI *API = NULL;
|
||||||
|
wmfD_Rect bbox;
|
||||||
|
wmf_svg_t *ddata;
|
||||||
|
float width, height;
|
||||||
|
|
||||||
|
if (argc != 2) {
|
||||||
|
fprintf(stderr, "Usage: wmf file\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!create_api(&API)) {
|
||||||
|
fprintf(stderr, "Failed to create WMF API\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
ddata = WMF_SVG_GetData(API);
|
||||||
|
|
||||||
|
if (!load_image(API, argv[1])) {
|
||||||
|
fprintf(stderr, "Failed to load image: %s\n", argv[1]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!scan_image(API, &bbox)) {
|
||||||
|
fprintf(stderr, "Failed to scan image: %s\n", argv[1]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
wmf_file_close(API);
|
||||||
|
get_image_size(&bbox, &width, &height);
|
||||||
|
printf("Image size: %f x %f\n", width, height);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user