mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-02 18:47:01 -05:00 
			
		
		
		
	New recipe for The Economist that does not require a subscription
This commit is contained in:
		
							parent
							
								
									152738b691
								
							
						
					
					
						commit
						c1c030a386
					
				
							
								
								
									
										68
									
								
								resources/recipes/economist_free.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								resources/recipes/economist_free.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,68 @@
 | 
				
			|||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					from lxml import html
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Economist(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title = 'The Economist (free)'
 | 
				
			||||||
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    __author__ = "Kovid Goyal"
 | 
				
			||||||
 | 
					    description = ('Global news and current affairs from a European perspective.'
 | 
				
			||||||
 | 
					            ' Much slower than the subscription based version.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    oldest_article = 6.5
 | 
				
			||||||
 | 
					    cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg'
 | 
				
			||||||
 | 
					    remove_tags = [dict(name=['script', 'noscript', 'title'])]
 | 
				
			||||||
 | 
					    remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        from calibre.web.feeds.feedparser import parse
 | 
				
			||||||
 | 
					        raw = self.index_to_soup(
 | 
				
			||||||
 | 
					                'http://feeds.feedburner.com/economist/full_print_edition',
 | 
				
			||||||
 | 
					                raw=True)
 | 
				
			||||||
 | 
					        entries = parse(raw).entries
 | 
				
			||||||
 | 
					        feeds = {}
 | 
				
			||||||
 | 
					        for i, item in enumerate(entries):
 | 
				
			||||||
 | 
					            from calibre.web.feeds import Article
 | 
				
			||||||
 | 
					            published   = time.gmtime(item.get('timestamp', time.time()))
 | 
				
			||||||
 | 
					            title       = item.get('title', _('Untitled article'))
 | 
				
			||||||
 | 
					            link        = item.get('link', None)
 | 
				
			||||||
 | 
					            description = item.get('description', '')
 | 
				
			||||||
 | 
					            author      = item.get('author', '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                feedtitle, link = self.process_eco_feed_article(link)
 | 
				
			||||||
 | 
					                self.log('Found print version for article:', title)
 | 
				
			||||||
 | 
					            except:
 | 
				
			||||||
 | 
					                self.log.exception('Failed to process article:', title)
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            a = Article(i, title, link, author, description, published, '')
 | 
				
			||||||
 | 
					            delta = datetime.utcnow() - a.utctime
 | 
				
			||||||
 | 
					            if delta.days*24*3600 + delta.seconds > 24*3600*self.oldest_article:
 | 
				
			||||||
 | 
					                self.log.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, a.localtime.strftime('%a, %d %b, %Y %H:%M'), title))
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            article = dict(title=a.title, description=a.text_summary,
 | 
				
			||||||
 | 
					                date=time.strftime(self.timefmt, a.date), author=a.author, url=a.url)
 | 
				
			||||||
 | 
					            if feedtitle not in feeds:
 | 
				
			||||||
 | 
					                feeds[feedtitle] = []
 | 
				
			||||||
 | 
					            feeds[feedtitle].append(article)
 | 
				
			||||||
 | 
					        return [(t, a) for t, a in feeds.items()]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def process_eco_feed_article(self, url):
 | 
				
			||||||
 | 
					        ret = self.browser.open(url)
 | 
				
			||||||
 | 
					        raw = ret.read()
 | 
				
			||||||
 | 
					        url = self.browser.geturl().replace('displaystory', 'PrinterFriendly').strip()
 | 
				
			||||||
 | 
					        root = html.fromstring(raw)
 | 
				
			||||||
 | 
					        matches = root.xpath('//*[@class = "article-section"]')
 | 
				
			||||||
 | 
					        feedtitle = 'Miscellaneous'
 | 
				
			||||||
 | 
					        if matches:
 | 
				
			||||||
 | 
					            feedtitle = html.tostring(matches[0], method='text',
 | 
				
			||||||
 | 
					                    encoding=unicode)
 | 
				
			||||||
 | 
					        return feedtitle, url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										83
									
								
								src/calibre/utils/libwmf.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								src/calibre/utils/libwmf.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,83 @@
 | 
				
			|||||||
 | 
					#include <libwmf/api.h>
 | 
				
			||||||
 | 
					#include <libwmf/svg.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define False 0
 | 
				
			||||||
 | 
					#define True 1
 | 
				
			||||||
 | 
					typedef int bool;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool create_api(wmfAPI** API) {
 | 
				
			||||||
 | 
					    wmfAPI_Options options;
 | 
				
			||||||
 | 
					    wmf_error_t error;
 | 
				
			||||||
 | 
					    unsigned long flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    flags = WMF_OPT_FUNCTION;
 | 
				
			||||||
 | 
						flags |= WMF_OPT_IGNORE_NONFATAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    options.function = wmf_svg_function;
 | 
				
			||||||
 | 
					    error = wmf_api_create (API, flags, &options);
 | 
				
			||||||
 | 
					    if (error != wmf_E_None) {
 | 
				
			||||||
 | 
					        wmf_api_destroy (*API);
 | 
				
			||||||
 | 
					        return False;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return True;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool load_image(wmfAPI *API, const char *path) {
 | 
				
			||||||
 | 
					    wmf_error_t error;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    error = wmf_file_open(API, path);
 | 
				
			||||||
 | 
					    if (error != wmf_E_None) {
 | 
				
			||||||
 | 
					        wmf_api_destroy (API);
 | 
				
			||||||
 | 
					        return False;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return True;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool scan_image(wmfAPI *API, wmfD_Rect *bbox) {
 | 
				
			||||||
 | 
					    wmf_error_t error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    error = wmf_scan (API, 0, bbox);
 | 
				
			||||||
 | 
					    if (error != wmf_E_None) {
 | 
				
			||||||
 | 
					        wmf_api_destroy (API);
 | 
				
			||||||
 | 
					        return False;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return True;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void get_image_size(wmfD_Rect *bbox, float *width, float *height) {
 | 
				
			||||||
 | 
					    *width = bbox->BR.x - bbox->TL.x;
 | 
				
			||||||
 | 
					    *height = bbox->BR.y - bbox->TL.y;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char **argv) {
 | 
				
			||||||
 | 
					    wmfAPI *API = NULL;
 | 
				
			||||||
 | 
					    wmfD_Rect bbox;
 | 
				
			||||||
 | 
					    wmf_svg_t *ddata;
 | 
				
			||||||
 | 
					    float width, height;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (argc != 2) {
 | 
				
			||||||
 | 
					        fprintf(stderr, "Usage: wmf file\n");
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (!create_api(&API)) {
 | 
				
			||||||
 | 
					        fprintf(stderr, "Failed to create WMF API\n");
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ddata = WMF_SVG_GetData(API);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!load_image(API, argv[1])) {
 | 
				
			||||||
 | 
					        fprintf(stderr, "Failed to load image: %s\n", argv[1]);
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (!scan_image(API, &bbox)) {
 | 
				
			||||||
 | 
					        fprintf(stderr, "Failed to scan image: %s\n", argv[1]);
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    wmf_file_close(API);
 | 
				
			||||||
 | 
					    get_image_size(&bbox, &width, &height);
 | 
				
			||||||
 | 
					    printf("Image size: %f x %f\n", width, height);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user