New recipe for The Economist that does not require a subscription

2025-06-23 15:30:45 -04:00 · 2009-11-29 14:34:42 -07:00 · 2009-11-29 14:34:42 -07:00 · c1c030a386
commit c1c030a386
parent 152738b691
2 changed files with 151 additions and 0 deletions
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -0,0 +1,68 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import time
+from datetime import datetime
+from lxml import html
+
+class Economist(BasicNewsRecipe):
+
+    title = 'The Economist (free)'
+    language = 'en'
+
+    __author__ = "Kovid Goyal"
+    description = ('Global news and current affairs from a European perspective.'
+            ' Much slower than the subscription based version.')
+
+    oldest_article = 6.5
+    cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg'
+    remove_tags = [dict(name=['script', 'noscript', 'title'])]
+    remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
+
+    def parse_index(self):
+        from calibre.web.feeds.feedparser import parse
+        raw = self.index_to_soup(
+                'http://feeds.feedburner.com/economist/full_print_edition',
+                raw=True)
+        entries = parse(raw).entries
+        feeds = {}
+        for i, item in enumerate(entries):
+            from calibre.web.feeds import Article
+            published   = time.gmtime(item.get('timestamp', time.time()))
+            title       = item.get('title', _('Untitled article'))
+            link        = item.get('link', None)
+            description = item.get('description', '')
+            author      = item.get('author', '')
+
+            try:
+                feedtitle, link = self.process_eco_feed_article(link)
+                self.log('Found print version for article:', title)
+            except:
+                self.log.exception('Failed to process article:', title)
+                continue
+
+            a = Article(i, title, link, author, description, published, '')
+            delta = datetime.utcnow() - a.utctime
+            if delta.days*24*3600 + delta.seconds > 24*3600*self.oldest_article:
+                self.log.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, a.localtime.strftime('%a, %d %b, %Y %H:%M'), title))
+                continue
+
+
+            article = dict(title=a.title, description=a.text_summary,
+                date=time.strftime(self.timefmt, a.date), author=a.author, url=a.url)
+            if feedtitle not in feeds:
+                feeds[feedtitle] = []
+            feeds[feedtitle].append(article)
+        return [(t, a) for t, a in feeds.items()]
+
+    def process_eco_feed_article(self, url):
+        ret = self.browser.open(url)
+        raw = ret.read()
+        url = self.browser.geturl().replace('displaystory', 'PrinterFriendly').strip()
+        root = html.fromstring(raw)
+        matches = root.xpath('//*[@class = "article-section"]')
+        feedtitle = 'Miscellaneous'
+        if matches:
+            feedtitle = html.tostring(matches[0], method='text',
+                    encoding=unicode)
+        return feedtitle, url
+
+
--- a/src/calibre/utils/libwmf.c
+++ b/src/calibre/utils/libwmf.c
@ -0,0 +1,83 @@
+#include <libwmf/api.h>
+#include <libwmf/svg.h>
+
+#define False 0
+#define True 1
+typedef int bool;
+
+bool create_api(wmfAPI** API) {
+    wmfAPI_Options options;
+    wmf_error_t error;
+    unsigned long flags;
+
+    flags = WMF_OPT_FUNCTION;
+	flags |= WMF_OPT_IGNORE_NONFATAL;
+
+    options.function = wmf_svg_function;
+    error = wmf_api_create (API, flags, &options);
+    if (error != wmf_E_None) {
+        wmf_api_destroy (*API);
+        return False;
+    }
+    return True;
+}
+
+bool load_image(wmfAPI *API, const char *path) {
+    wmf_error_t error;
+    
+    error = wmf_file_open(API, path);
+    if (error != wmf_E_None) {
+        wmf_api_destroy (API);
+        return False;
+    }
+    return True;
+}
+
+bool scan_image(wmfAPI *API, wmfD_Rect *bbox) {
+    wmf_error_t error;
+
+    error = wmf_scan (API, 0, bbox);
+    if (error != wmf_E_None) {
+        wmf_api_destroy (API);
+        return False;
+    }
+    return True;
+}
+
+void get_image_size(wmfD_Rect *bbox, float *width, float *height) {
+    *width = bbox->BR.x - bbox->TL.x;
+    *height = bbox->BR.y - bbox->TL.y;
+}
+
+int main(int argc, char **argv) {
+    wmfAPI *API = NULL;
+    wmfD_Rect bbox;
+    wmf_svg_t *ddata;
+    float width, height;
+
+    if (argc != 2) {
+        fprintf(stderr, "Usage: wmf file\n");
+        return 1;
+    }
+    if (!create_api(&API)) {
+        fprintf(stderr, "Failed to create WMF API\n");
+        return 1;
+    }
+    ddata = WMF_SVG_GetData(API);
+
+    if (!load_image(API, argv[1])) {
+        fprintf(stderr, "Failed to load image: %s\n", argv[1]);
+        return 1;
+    }
+    if (!scan_image(API, &bbox)) {
+        fprintf(stderr, "Failed to scan image: %s\n", argv[1]);
+        return 1;
+    }
+
+
+    wmf_file_close(API);
+    get_image_size(&bbox, &width, &height);
+    printf("Image size: %f x %f\n", width, height);
+
+    return 0;
+}