From c1c030a3864c2b4d66006f3668c40dee957f0d2f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 29 Nov 2009 14:34:42 -0700 Subject: [PATCH] New recipe for The Economist that does not require a subscription --- resources/recipes/economist_free.recipe | 68 ++++++++++++++++++++ src/calibre/utils/libwmf.c | 83 +++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 resources/recipes/economist_free.recipe create mode 100644 src/calibre/utils/libwmf.c diff --git a/resources/recipes/economist_free.recipe b/resources/recipes/economist_free.recipe new file mode 100644 index 0000000000..14689a95d8 --- /dev/null +++ b/resources/recipes/economist_free.recipe @@ -0,0 +1,68 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import time +from datetime import datetime +from lxml import html + +class Economist(BasicNewsRecipe): + + title = 'The Economist (free)' + language = 'en' + + __author__ = "Kovid Goyal" + description = ('Global news and current affairs from a European perspective.' + ' Much slower than the subscription based version.') + + oldest_article = 6.5 + cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg' + remove_tags = [dict(name=['script', 'noscript', 'title'])] + remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body') + + def parse_index(self): + from calibre.web.feeds.feedparser import parse + raw = self.index_to_soup( + 'http://feeds.feedburner.com/economist/full_print_edition', + raw=True) + entries = parse(raw).entries + feeds = {} + for i, item in enumerate(entries): + from calibre.web.feeds import Article + published = time.gmtime(item.get('timestamp', time.time())) + title = item.get('title', _('Untitled article')) + link = item.get('link', None) + description = item.get('description', '') + author = item.get('author', '') + + try: + feedtitle, link = self.process_eco_feed_article(link) + self.log('Found print version for article:', title) + except: + self.log.exception('Failed to process article:', title) + continue + + a = Article(i, title, link, author, description, published, '') + delta = datetime.utcnow() - a.utctime + if delta.days*24*3600 + delta.seconds > 24*3600*self.oldest_article: + self.log.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, a.localtime.strftime('%a, %d %b, %Y %H:%M'), title)) + continue + + + article = dict(title=a.title, description=a.text_summary, + date=time.strftime(self.timefmt, a.date), author=a.author, url=a.url) + if feedtitle not in feeds: + feeds[feedtitle] = [] + feeds[feedtitle].append(article) + return [(t, a) for t, a in feeds.items()] + + def process_eco_feed_article(self, url): + ret = self.browser.open(url) + raw = ret.read() + url = self.browser.geturl().replace('displaystory', 'PrinterFriendly').strip() + root = html.fromstring(raw) + matches = root.xpath('//*[@class = "article-section"]') + feedtitle = 'Miscellaneous' + if matches: + feedtitle = html.tostring(matches[0], method='text', + encoding=unicode) + return feedtitle, url + + diff --git a/src/calibre/utils/libwmf.c b/src/calibre/utils/libwmf.c new file mode 100644 index 0000000000..7ec0ef533d --- /dev/null +++ b/src/calibre/utils/libwmf.c @@ -0,0 +1,83 @@ +#include +#include + +#define False 0 +#define True 1 +typedef int bool; + +bool create_api(wmfAPI** API) { + wmfAPI_Options options; + wmf_error_t error; + unsigned long flags; + + flags = WMF_OPT_FUNCTION; + flags |= WMF_OPT_IGNORE_NONFATAL; + + options.function = wmf_svg_function; + error = wmf_api_create (API, flags, &options); + if (error != wmf_E_None) { + wmf_api_destroy (*API); + return False; + } + return True; +} + +bool load_image(wmfAPI *API, const char *path) { + wmf_error_t error; + + error = wmf_file_open(API, path); + if (error != wmf_E_None) { + wmf_api_destroy (API); + return False; + } + return True; +} + +bool scan_image(wmfAPI *API, wmfD_Rect *bbox) { + wmf_error_t error; + + error = wmf_scan (API, 0, bbox); + if (error != wmf_E_None) { + wmf_api_destroy (API); + return False; + } + return True; +} + +void get_image_size(wmfD_Rect *bbox, float *width, float *height) { + *width = bbox->BR.x - bbox->TL.x; + *height = bbox->BR.y - bbox->TL.y; +} + +int main(int argc, char **argv) { + wmfAPI *API = NULL; + wmfD_Rect bbox; + wmf_svg_t *ddata; + float width, height; + + if (argc != 2) { + fprintf(stderr, "Usage: wmf file\n"); + return 1; + } + if (!create_api(&API)) { + fprintf(stderr, "Failed to create WMF API\n"); + return 1; + } + ddata = WMF_SVG_GetData(API); + + if (!load_image(API, argv[1])) { + fprintf(stderr, "Failed to load image: %s\n", argv[1]); + return 1; + } + if (!scan_image(API, &bbox)) { + fprintf(stderr, "Failed to scan image: %s\n", argv[1]); + return 1; + } + + + wmf_file_close(API); + get_image_size(&bbox, &width, &height); + printf("Image size: %f x %f\n", width, height); + + return 0; +}