From 65a3b30a8387a36649e1143ad43e5e9a3a292dbe Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 18 Apr 2009 01:36:04 -0700
Subject: [PATCH] IGN:Updated diepresse recipe and fix pdf metadata reader
 holding files open

---
 src/calibre/__init__.py                       |  17 +++++++
 src/calibre/ebooks/metadata/pdf.py            |  27 ++++++-----
 src/calibre/gui2/images/news/diepresse.png    | Bin 0 -> 637 bytes
 .../web/feeds/recipes/recipe_diepresse.py     |  44 +++++++++++++++---
 4 files changed, 70 insertions(+), 18 deletions(-)
 create mode 100644 src/calibre/gui2/images/news/diepresse.png
diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index cb53dff24b..a0dc41009a 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -244,6 +244,23 @@ class CurrentDir(object):
         os.chdir(self.cwd)
 
 
+class FileWrapper(object):
+    '''
+    Used primarily with pyPdf to ensure the stream is properly closed.
+    '''
+
+    def __init__(self, stream):
+        for x in ('read', 'seek', 'tell'):
+            setattr(self, x, getattr(stream, x))
+
+    def __exit__(self, *args):
+        for x in ('read', 'seek', 'tell'):
+            setattr(self, x, None)
+
+    def __enter__(self):
+        return self
+
+
 def detect_ncpus():
     """Detects the number of effective CPUs in the system"""
     try:
diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py
index 54d52f0b58..769f169984 100644
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
@@ -5,6 +6,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import sys, os, cStringIO
 from threading import Thread
 
+from calibre import FileWrapper
 from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
 from pyPdf import PdfFileReader, PdfFileWriter
 
@@ -13,18 +15,19 @@ def get_metadata(stream):
     mi = MetaInformation(_('Unknown'), [_('Unknown')])
     stream.seek(0)
     try:
-        info = PdfFileReader(stream).getDocumentInfo()
-        if info.title:
-            mi.title = info.title
-        if info.author:
-            src = info.author.split('&')
-            authors = []
-            for au in src:
-                authors += au.split(',')
-            mi.authors = authors
-            mi.author = info.author
-        if info.subject:
-            mi.category = info.subject
+        with FileWrapper(stream) as stream:
+            info = PdfFileReader(stream).getDocumentInfo()
+            if info.title:
+                mi.title = info.title
+            if info.author:
+                src = info.author.split('&')
+                authors = []
+                for au in src:
+                    authors += au.split(',')
+                mi.authors = authors
+                mi.author = info.author
+            if info.subject:
+                mi.category = info.subject
     except Exception, err:
         msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err))
         print >>sys.stderr, msg.encode('utf8')
diff --git a/src/calibre/gui2/images/news/diepresse.png b/src/calibre/gui2/images/news/diepresse.png
new file mode 100644
index 0000000000000000000000000000000000000000..41bbdcbf1ba5f05c892306267919047b70244676
GIT binary patch
literal 637
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87#Np%x;TbdoSr+`I%A5XNb7!&nj=m*Z;z<m
z>YRFG$%S)n5gf}R95-+tR1@n?5M0zMWG#Bo$&p)GNJL)apaP4a<54Gp-6BnUIcKDt
z+xa>A%)9lk*C|x5PTcYNZt=M@JZtlpWIelO(|qO3&B+&L?T+S+`aWMb{IG1)=F+|C
zPE)mu_+%Wn6|y8Q<cPaBk2PV({4Ly*dELIhdwFa3<_$*<b(^0!=J)mAw7Q0Fb2+Ah
z*9Vn)SDyCqiGN!1SY!G*(~UyY{_&m=blI`@+=+v)UP*nI(=GgPz)nYT_q+AFAt50X
zJ2{jVF%%v1yOEZXVx4ba=V<oJ`Cz^SOWA~CwHT>BnOo;iZGV4eu08XCl`pkDUY%ON
z_9x=E=ZzEV7&qPAo2~vhcyrCaT}`+C{f^>TJ$J+F31ZXj4gP;I4RQ*SRCxG;S>uwI
z^YO)pJ!X64`RfU6H-5!<;+U65>Y={eXwQ>34!x2olP$V#s>Zq0SLNf&pS}!HlZAWa
z<+bnK{`rS@g{Xz<q*F^SZd5Cpm@|`Ox-{41;&)Z%rVsv3zPJB_z3G;H6IUHye~-81
z$B~VP|1dc=)&|)vOUMI8pK6I~L`iZ{YGO&MZVHfKFfuT()HN{CHM9sZFtjo@wlX!>
zH88g_Fo@udvV>{K%}>cpt3=aa7Gh{&WnyV%Xado&O@4tWPy>UftDnm{r-UW|_kaci

literal 0
HcmV?d00001

diff --git a/src/calibre/web/feeds/recipes/recipe_diepresse.py b/src/calibre/web/feeds/recipes/recipe_diepresse.py
index c806575356..362a08fb3a 100644
--- a/src/calibre/web/feeds/recipes/recipe_diepresse.py
+++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py
@@ -1,18 +1,42 @@
-import re
+# -*- coding: utf-8 -*-
 
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
+
+''' http://www.diepresse.at - Austrian Newspaper '''
+
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class DiePresseRecipe(BasicNewsRecipe):
-    title          = u'diePresse'
+    title = u'diePresse'
+    __author__ = 'Gerhard Aigner'
+    description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' 
+    publisher ='DiePresse.com'
+    category = 'news, politics, nachrichten, Austria'
+    use_embedded_content = False
+    remove_empty_feeds = True
+    lang = 'de-AT'
+    no_stylesheets = True
+    encoding = 'ISO-8859-1'
+    language = _('German')
+    recursions = 0
     oldest_article = 1
     max_articles_per_feed = 100
-    recursions = 0
-    language = _('German')
-    __author__ = 'Gerhard Aigner'
+  
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
 
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+  
     preprocess_regexps = [
 	(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
     ]
+    
     remove_tags = [dict(name='hr'),
 	dict(name='br'),
 	dict(name='small'),
@@ -21,6 +45,7 @@ class DiePresseRecipe(BasicNewsRecipe):
 	dict(name='h1', attrs={'class':'titel'}),
 	dict(name='a', attrs={'class':'print'}),
 	dict(name='div', attrs={'class':'hline'})]
+	
     feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
 	(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
 	(u'Europa', u'http://diepresse.com/rss/EU'),
@@ -29,7 +54,7 @@ class DiePresseRecipe(BasicNewsRecipe):
 	(u'Kultur', u'http://diepresse.com/rss/Kultur'),
 	(u'Leben', u'http://diepresse.com/rss/Leben'),
 	(u'Tech', u'http://diepresse.com/rss/Tech'),
-	(u'Science', u'http://diepresse.com/rss/Science'),
+	(u'Wissenschaft', u'http://diepresse.com/rss/Science'),
 	(u'Bildung', u'http://diepresse.com/rss/Bildung'),
 	(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
 	(u'Recht', u'http://diepresse.com/rss/Recht'),
@@ -38,3 +63,10 @@ class DiePresseRecipe(BasicNewsRecipe):
 
     def print_version(self, url):
         return url.replace('home','text/home')
+
+    def preprocess_html(self, soup):
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+	mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+        soup.head.insert(0,mtag)
+	return soup  
\ No newline at end of file