From 65a3b30a8387a36649e1143ad43e5e9a3a292dbe Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 18 Apr 2009 01:36:04 -0700 Subject: [PATCH] IGN:Updated diepresse recipe and fix pdf metadata reader holding files open --- src/calibre/__init__.py | 17 +++++++ src/calibre/ebooks/metadata/pdf.py | 27 ++++++----- src/calibre/gui2/images/news/diepresse.png | Bin 0 -> 637 bytes .../web/feeds/recipes/recipe_diepresse.py | 44 +++++++++++++++--- 4 files changed, 70 insertions(+), 18 deletions(-) create mode 100644 src/calibre/gui2/images/news/diepresse.png diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index cb53dff24b..a0dc41009a 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -244,6 +244,23 @@ class CurrentDir(object): os.chdir(self.cwd) +class FileWrapper(object): + ''' + Used primarily with pyPdf to ensure the stream is properly closed. + ''' + + def __init__(self, stream): + for x in ('read', 'seek', 'tell'): + setattr(self, x, getattr(stream, x)) + + def __exit__(self, *args): + for x in ('read', 'seek', 'tell'): + setattr(self, x, None) + + def __enter__(self): + return self + + def detect_ncpus(): """Detects the number of effective CPUs in the system""" try: diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 54d52f0b58..769f169984 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -1,3 +1,4 @@ +from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' '''Read meta information from PDF files''' @@ -5,6 +6,7 @@ __copyright__ = '2008, Kovid Goyal ' import sys, os, cStringIO from threading import Thread +from calibre import FileWrapper from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser from pyPdf import PdfFileReader, PdfFileWriter @@ -13,18 +15,19 @@ def get_metadata(stream): mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) try: - info = PdfFileReader(stream).getDocumentInfo() - if info.title: - mi.title = info.title - if info.author: - src = info.author.split('&') - authors = [] - for au in src: - authors += au.split(',') - mi.authors = authors - mi.author = info.author - if info.subject: - mi.category = info.subject + with FileWrapper(stream) as stream: + info = PdfFileReader(stream).getDocumentInfo() + if info.title: + mi.title = info.title + if info.author: + src = info.author.split('&') + authors = [] + for au in src: + authors += au.split(',') + mi.authors = authors + mi.author = info.author + if info.subject: + mi.category = info.subject except Exception, err: msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err)) print >>sys.stderr, msg.encode('utf8') diff --git a/src/calibre/gui2/images/news/diepresse.png b/src/calibre/gui2/images/news/diepresse.png new file mode 100644 index 0000000000000000000000000000000000000000..41bbdcbf1ba5f05c892306267919047b70244676 GIT binary patch literal 637 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87#Np%x;TbdoSr+`I%A5XNb7!&nj=m*Z;zYRFG$%S)n5gf}R95-+tR1@n?5M0zMWG#Bo$&p)GNJL)apaP4a<54Gp-6BnUIcKDt z+xa>A%)9lk*C|x5PTcYNZt=M@JZtlpWIelO(|qO3&B+&L?T+S+`aWMb{IG1)=F+|C zPE)mu_+%Wn6|y8QBnOo;iZGV4eu08XCl`pkDUY%ON z_9x=E=ZzEV7&qPAo2~vhcyrCaT}`+C{f^>TJ$J+F31ZXj4gP;I4RQ*SRCxG;S>uwI z^YO)pJ!X64`RfU6H-5!<;+U65>Y={eXwQ>34!x2olP$V#s>Zq0SLNf&pS}!HlZAWa z<+bnK{`rS@g{Xz zH88g_Fo@udvV>{K%}>cpt3=aa7Gh{&WnyV%Xado&O@4tWPy>UftDnm{r-UW|_kaci literal 0 HcmV?d00001 diff --git a/src/calibre/web/feeds/recipes/recipe_diepresse.py b/src/calibre/web/feeds/recipes/recipe_diepresse.py index c806575356..362a08fb3a 100644 --- a/src/calibre/web/feeds/recipes/recipe_diepresse.py +++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py @@ -1,18 +1,42 @@ -import re +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2009, Gerhard Aigner ' + +''' http://www.diepresse.at - Austrian Newspaper ''' + +import re from calibre.web.feeds.news import BasicNewsRecipe class DiePresseRecipe(BasicNewsRecipe): - title = u'diePresse' + title = u'diePresse' + __author__ = 'Gerhard Aigner' + description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' + publisher ='DiePresse.com' + category = 'news, politics, nachrichten, Austria' + use_embedded_content = False + remove_empty_feeds = True + lang = 'de-AT' + no_stylesheets = True + encoding = 'ISO-8859-1' + language = _('German') + recursions = 0 oldest_article = 1 max_articles_per_feed = 100 - recursions = 0 - language = _('German') - __author__ = 'Gerhard Aigner' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + preprocess_regexps = [ (re.compile(r'Textversion', re.DOTALL), lambda match: ''), ] + remove_tags = [dict(name='hr'), dict(name='br'), dict(name='small'), @@ -21,6 +45,7 @@ class DiePresseRecipe(BasicNewsRecipe): dict(name='h1', attrs={'class':'titel'}), dict(name='a', attrs={'class':'print'}), dict(name='div', attrs={'class':'hline'})] + feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'), (u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'), (u'Europa', u'http://diepresse.com/rss/EU'), @@ -29,7 +54,7 @@ class DiePresseRecipe(BasicNewsRecipe): (u'Kultur', u'http://diepresse.com/rss/Kultur'), (u'Leben', u'http://diepresse.com/rss/Leben'), (u'Tech', u'http://diepresse.com/rss/Tech'), - (u'Science', u'http://diepresse.com/rss/Science'), + (u'Wissenschaft', u'http://diepresse.com/rss/Science'), (u'Bildung', u'http://diepresse.com/rss/Bildung'), (u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'), (u'Recht', u'http://diepresse.com/rss/Recht'), @@ -38,3 +63,10 @@ class DiePresseRecipe(BasicNewsRecipe): def print_version(self, url): return url.replace('home','text/home') + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '' + soup.head.insert(0,mtag) + return soup \ No newline at end of file