From 4e1f851a445737575725e0c3cd7b0f34d0bb9fcb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 16 Apr 2009 14:39:17 -0700
Subject: [PATCH 1/3] Add a timeout to the PDF metadata writer as it hangs on
 some PDF files

---
 src/calibre/ebooks/metadata/pdf.py | 48 +++++++++++++++++++-------
 src/pyPdf/generic.py               |  4 +--
 src/pyPdf/pdf.py                   | 54 +++++++++++++++---------------
 3 files changed, 64 insertions(+), 42 deletions(-)
diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py
index 80cdc82070..54d52f0b58 100644
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@@ -2,7 +2,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
 
-import sys, os, StringIO
+import sys, os, cStringIO
+from threading import Thread
 
 from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
 from pyPdf import PdfFileReader, PdfFileWriter
@@ -29,25 +30,46 @@ def get_metadata(stream):
         print >>sys.stderr, msg.encode('utf8')
     return mi
 
+class MetadataWriter(Thread):
+
+    def __init__(self, out_pdf, buf):
+        self.out_pdf = out_pdf
+        self.buf = buf
+        Thread.__init__(self)
+        self.daemon = True
+
+    def run(self):
+        try:
+            self.out_pdf.write(self.buf)
+        except RuntimeError:
+            pass
+
 def set_metadata(stream, mi):
     stream.seek(0)
-    
+
     # Use a StringIO object for the pdf because we will want to over
     # write it later and if we are working on the stream directly it
     # could cause some issues.
-    raw = StringIO.StringIO(stream.read())
+    raw = cStringIO.StringIO(stream.read())
     orig_pdf = PdfFileReader(raw)
-    
+
     title = mi.title if mi.title else orig_pdf.documentInfo.title
     author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
-    
+
     out_pdf = PdfFileWriter(title=title, author=author)
+    out_str = cStringIO.StringIO()
+    writer = MetadataWriter(out_pdf, out_str)
     for page in orig_pdf.pages:
         out_pdf.addPage(page)
-        
-    out_str = StringIO.StringIO()
-    out_pdf.write(out_str)
-    
+
+    writer.start()
+    writer.join(10) # Wait 10 secs for writing to complete
+    out_pdf.killed = True
+    writer.join()
+    if out_pdf.killed:
+        print 'Failed to set metadata: took too long'
+        return
+
     stream.seek(0)
     stream.truncate()
     out_str.seek(0)
@@ -59,7 +81,7 @@ def option_parser():
     p.remove_option('--category')
     p.remove_option('--comment')
     return p
-            
+
 def main(args=sys.argv):
     #p = option_parser()
     #opts, args = p.parse_args(args)
@@ -67,14 +89,14 @@ def main(args=sys.argv):
         print >>sys.stderr, _('Usage: pdf-meta file.pdf')
         print >>sys.stderr, _('No filename specified.')
         return 1
-    
+
     stream = open(os.path.abspath(os.path.expanduser(args[1])), 'r+b')
     #mi = MetaInformation(opts.title, opts.authors)
     #if mi.title or mi.authors:
     #    set_metadata(stream, mi)
     print unicode(get_metadata(stream)).encode('utf-8')
-    
+
     return 0
 
 if __name__ == '__main__':
-    sys.exit(main())
\ No newline at end of file
+    sys.exit(main())
diff --git a/src/pyPdf/generic.py b/src/pyPdf/generic.py
index fb75ef3b3f..5447ef5fbc 100644
--- a/src/pyPdf/generic.py
+++ b/src/pyPdf/generic.py
@@ -299,7 +299,7 @@ def readStringFromStream(stream):
             elif tok == "t":
                 tok = "\t"
             elif tok == "b":
-                tok == "\b"
+                tok = "\b"
             elif tok == "f":
                 tok = "\f"
             elif tok == "(":
@@ -673,7 +673,7 @@ class RectangleObject(ArrayObject):
 
     def getUpperLeft_x(self):
         return self.getLowerLeft_x()
-    
+
     def getUpperLeft_y(self):
         return self.getUpperRight_y()
 
diff --git a/src/pyPdf/pdf.py b/src/pyPdf/pdf.py
index 362879a39a..710d128ad0 100644
--- a/src/pyPdf/pdf.py
+++ b/src/pyPdf/pdf.py
@@ -39,15 +39,12 @@ __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 
 import struct
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
+from cStringIO import StringIO
 
-import filters
-import utils
-import warnings
-from generic import *
+from generic import DictionaryObject, NameObject, NumberObject, \
+createStringObject, ArrayObject, ByteStringObject, StreamObject, \
+IndirectObject, utils, readObject, TextStringObject, BooleanObject, \
+RectangleObject, DecodedStreamObject
 from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
 
 
@@ -56,6 +53,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt
 # class (typically {@link #PdfFileReader PdfFileReader}).
 class PdfFileWriter(object):
     def __init__(self,title=u"Unknown",author=u"Unknown"):
+        self.killed = False
         self._header = "%PDF-1.3"
         self._objects = []  # array of indirect objects
 
@@ -162,7 +160,7 @@ class PdfFileWriter(object):
     # @param stream An object to write the file to.  The object must support
     # the write method, and the tell method, similar to a file object.
     def write(self, stream):
-        import struct, md5
+        import md5
 
         externalReferenceMap = {}
         self.stack = []
@@ -209,11 +207,13 @@ class PdfFileWriter(object):
         if hasattr(self, "_encrypt"):
             trailer[NameObject("/Encrypt")] = self._encrypt
         trailer.writeToStream(stream, None)
-        
+
         # eof
         stream.write("\nstartxref\n%s\n%%%%EOF\n" % (xref_location))
 
     def _sweepIndirectReferences(self, externMap, data):
+        if self.killed:
+            raise RuntimeError('Writer killed')
         if isinstance(data, DictionaryObject):
             for key, value in data.items():
                 origvalue = value
@@ -356,8 +356,8 @@ class PdfFileReader(object):
         return self.flattenedPages[pageNumber]
 
     ##
-    # Read-only property that accesses the 
-    # {@link #PdfFileReader.getNamedDestinations 
+    # Read-only property that accesses the
+    # {@link #PdfFileReader.getNamedDestinations
     # getNamedDestinations} function.
     # <p>
     # Stability: Added in v1.10, will exist for all future v1.x releases.
@@ -374,7 +374,7 @@ class PdfFileReader(object):
         if retval == None:
             retval = {}
             catalog = self.trailer["/Root"]
-            
+
             # get the name tree
             if catalog.has_key("/Dests"):
                 tree = catalog["/Dests"]
@@ -382,7 +382,7 @@ class PdfFileReader(object):
                 names = catalog['/Names']
                 if names.has_key("/Dests"):
                     tree = names['/Dests']
-        
+
         if tree == None:
             return retval
 
@@ -420,17 +420,17 @@ class PdfFileReader(object):
         if outlines == None:
             outlines = []
             catalog = self.trailer["/Root"]
-            
+
             # get the outline dictionary and named destinations
             if catalog.has_key("/Outlines"):
                 lines = catalog["/Outlines"]
                 if lines.has_key("/First"):
                     node = lines["/First"]
             self._namedDests = self.getNamedDestinations()
-            
+
         if node == None:
           return outlines
-          
+
         # see if there are any more outlines
         while 1:
             outline = self._buildOutline(node)
@@ -454,10 +454,10 @@ class PdfFileReader(object):
         page, typ = array[0:2]
         array = array[2:]
         return Destination(title, page, typ, *array)
-          
+
     def _buildOutline(self, node):
         dest, title, outline = None, None, None
-        
+
         if node.has_key("/A") and node.has_key("/Title"):
             # Action, section 8.5 (only type GoTo supported)
             title  = node["/Title"]
@@ -951,7 +951,7 @@ class PageObject(DictionaryObject):
 
     def _pushPopGS(contents, pdf):
         # adds a graphics state "push" and "pop" to the beginning and end
-        # of a content stream.  This isolates it from changes such as 
+        # of a content stream.  This isolates it from changes such as
         # transformation matricies.
         stream = ContentStream(contents, pdf)
         stream.operations.insert(0, [[], "q"])
@@ -1291,7 +1291,7 @@ class Destination(DictionaryObject):
         self[NameObject("/Title")] = title
         self[NameObject("/Page")] = page
         self[NameObject("/Type")] = typ
-        
+
         # from table 8.2 of the PDF 1.6 reference.
         if typ == "/XYZ":
             (self[NameObject("/Left")], self[NameObject("/Top")],
@@ -1307,7 +1307,7 @@ class Destination(DictionaryObject):
             pass
         else:
             raise utils.PdfReadError("Unknown Destination Type: %r" % typ)
-          
+
     ##
     # Read-only property accessing the destination title.
     # @return A string.
@@ -1474,25 +1474,25 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
     # described in Algorithm 3.2.
     key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
     # 2. Initialize the MD5 hash function and pass the 32-byte padding string
-    # shown in step 1 of Algorithm 3.2 as input to this function. 
+    # shown in step 1 of Algorithm 3.2 as input to this function.
     import md5
     m = md5.new()
     m.update(_encryption_padding)
     # 3. Pass the first element of the file's file identifier array (the value
     # of the ID entry in the document's trailer dictionary; see Table 3.13 on
     # page 73) to the hash function and finish the hash.  (See implementation
-    # note 25 in Appendix H.) 
+    # note 25 in Appendix H.)
     m.update(id1_entry)
     md5_hash = m.digest()
     # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
-    # function with the encryption key from step 1. 
+    # function with the encryption key from step 1.
     val = utils.RC4_encrypt(key, md5_hash)
     # 5. Do the following 19 times: Take the output from the previous
     # invocation of the RC4 function and pass it as input to a new invocation
     # of the function; use an encryption key generated by taking each byte of
     # the original encryption key (obtained in step 2) and performing an XOR
     # operation between that byte and the single-byte value of the iteration
-    # counter (from 1 to 19). 
+    # counter (from 1 to 19).
     for i in range(1, 20):
         new_key = ''
         for l in range(len(key)):
@@ -1500,7 +1500,7 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
         val = utils.RC4_encrypt(new_key, val)
     # 6. Append 16 bytes of arbitrary padding to the output from the final
     # invocation of the RC4 function and store the 32-byte result as the value
-    # of the U entry in the encryption dictionary. 
+    # of the U entry in the encryption dictionary.
     # (implementator note: I don't know what "arbitrary padding" is supposed to
     # mean, so I have used null bytes.  This seems to match a few other
     # people's implementations)

From d6022d93fc7b6fd7a62da922c00a6ce7c1b4f05a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 16 Apr 2009 21:37:36 -0700
Subject: [PATCH 2/3] New recipe for the Seattle Times by Darko Miletic

---
 .../gui2/images/news/seattle_times.png        | Bin 0 -> 746 bytes
 src/calibre/web/feeds/recipes/__init__.py     |   1 +
 .../web/feeds/recipes/recipe_seattle_times.py |  50 ++++++++++++++++++
 3 files changed, 51 insertions(+)
 create mode 100644 src/calibre/gui2/images/news/seattle_times.png
 create mode 100644 src/calibre/web/feeds/recipes/recipe_seattle_times.py

diff --git a/src/calibre/gui2/images/news/seattle_times.png b/src/calibre/gui2/images/news/seattle_times.png
new file mode 100644
index 0000000000000000000000000000000000000000..b885684992a4b40df7a7eeabbe39de2caff2dda9
GIT binary patch
literal 746
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?^}TT^vI!PA{Eom?4}f(KdgJ@A=n`Z8ugY
z@qGz0UgDcP^}znAL9Y_Fcw0*x^jn&JU~MZOA8Vn|s}~su{bE;@8ElD_;S=@pIXPj;
z_B)*w;_Rx&jHg<}6mJVy`hwfu{y+Eq-}~+#|KG}BG<kFGKgN7+f9Hi~p5$29EbHIA
z@pMUhwSVZ3&0m{$#V-B&S335<#z!Ge%2O}=)4Cx0PSvtVv|%3K!V^NLe`c2WvL4tX
z<8wPzLF(iEZO$u`0$1J8*yN(8t+B82EjPoH_i^dDmk(@-5qqA$C|hgUTc!osf0qau
zE<5JVm|<c&E$mvo`UO+biG7Q+|E^gh77-QczT~mQ(ORJLjo&UQOc$Lotvch;g^VhO
ze>^T$d<Ps%4i(kSiU|u?bwlU*a?Ma(mns3*X&RxrUZUC>d@N?$$_;hZ6_y`!uiR3=
z9vf`f^?Sy)!}BkcZdtz7S2-X&MRP~L)jW*_CxnWPlQlLSX;#<oTY541?+e4Jx{GTT
zIq*B^Y0nVwWKeUN)H^qJid|=8M$68bhVN`^+A=MlY-&1p!CLLE_@nT5>h0~3`g;~%
z{L<9_<oi3I&#t%WF)f(eZKT*{kUQOf{{9bF|2|N!*tePcK-|%!+Zx5c+a7#;wYp*V
zl9fw@q^d=`br<_Aeko{`qc~;eoZR{HG7Hsp=4SC*S!7h!tl9g$Jz{&d%odwl1*?iL
zd|Gka)SmIreXIXWAM|bn-8p#T3oxyymbgZgBp0P7mZa*Y00{;o0|QH40~1|Cvk)Ug
zD`QhD6Cl^z%D{jlD|t6eLvDUbW?Cht24gEjpi(1<hPT0!Er1#rJYD@<);T3K0RX+C
BHE{p{

literal 0
HcmV?d00001

diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index 9e2ef1969d..405fd265a7 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -40,6 +40,7 @@ recipe_modules = ['recipe_' + r for r in (
            'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
            'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
            'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
+           'seattle_times',
           )]
 
 import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/recipe_seattle_times.py b/src/calibre/web/feeds/recipes/recipe_seattle_times.py
new file mode 100644
index 0000000000..695a82b5b4
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_seattle_times.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+seattletimes.nwsource.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SeattleTimes(BasicNewsRecipe):
+    title                 = 'The Seattle Times'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Seattle and USA'
+    publisher             = 'The Seattle Times'
+    category              = 'news, politics, USA'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'cp1252'
+    language              = _('English')
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')]
+
+    remove_tags        = [
+                             dict(name=['object','link','script'])
+                            ,dict(name='p', attrs={'class':'permission'})
+                         ]
+
+    def print_version(self, url):
+        start_url, sep, rest_url = url.rpartition('_')
+        rurl, rsep, article_id = start_url.rpartition('/')
+        return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
+
+    def preprocess_html(self, soup):
+        mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+

From 0cb7a49d112d5194301497f0f78e61b42975bd3b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 17 Apr 2009 11:37:52 -0700
Subject: [PATCH 3/3] IGN:Updated Der standard and fix series box being too
 long initially on OS X

---
 src/calibre/gui2/dialogs/metadata_single.py   |   8 +---
 src/calibre/gui2/images/news/der_standard.png | Bin 0 -> 509 bytes
 .../web/feeds/recipes/recipe_der_standard.py  |  44 ++++++++++++++----
 3 files changed, 36 insertions(+), 16 deletions(-)
 create mode 100644 src/calibre/gui2/images/news/der_standard.png

diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index c48c7c3640..4a74c87097 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -319,6 +319,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
         self.cover_changed = True
 
     def initialize_series(self):
+        self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow)
         all_series = self.db.all_series()
         all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
         series_id = self.db.series_id(self.row)
@@ -335,13 +336,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
             self.series.setCurrentIndex(idx)
             self.enable_series_index()
 
-        pl = self.series.parentWidget().layout()
-        for i in range(pl.count()):
-            l =  pl.itemAt(i).layout()
-            if l:
-                l.invalidate()
-                l.activate()
-
     def initialize_series_and_publisher(self):
         self.initialize_series()
         all_publishers = self.db.all_publishers()
diff --git a/src/calibre/gui2/images/news/der_standard.png b/src/calibre/gui2/images/news/der_standard.png
new file mode 100644
index 0000000000000000000000000000000000000000..4d750fe5a8583839440916aae93cdb7bb2222f1e
GIT binary patch
literal 509
zcmeAS@N?(olHy`uVBq!ia0vp^1|ZDA1|-9oezpTCwj^(N7l!{JxM1({$v_d#0*}aI
zAngIhZYQ(tK!Rljj_E)ete@fByPj|c21Yec7srr{dv9l4<U3@*;oAN2f4!vqvn^LQ
zIV}|9d45ty_HkSC+ZH7j!Hwmz3=UDJn{M~DSMfId5-NJP<{U>Zm$Q`4`|5=^vZrxz
z8eZ>ND&RhK+v$Bp6<_+@J*DRcDYB|rZ9L|o>9|dKiu2-Nt&}5M{Px72__J|->a%in
zec>6$k{K?>Jl!o|q@`K5WZRPn^?qT2hqea-)aN<Pd2GV9o_k@*C%Z<)iA-PaA3b?B
zC|5)5wMZsY>>{>`vzsirXJ!U>eC00bvTFR#$>6;u(RStv{wqZ<?yaqs|6v>d@sV~&
zt=pP(&A$^D8Lavo@}h_-`k&l&p}2+5ykq7XM$MIR_EqTddnkH}VfRTeDy)@S+IJz5
zDaN#J5?AEmi^obC@*b-{6s=-NzvQ02-DlP|VDPDyxJHyD7o{ear0S*s2?iqr14~^4
z6J0~|5CcmqQ$s6LQ(XgdD+7b?)=w>A8glbfGSe#2G?;}LT3DHwS(#WsG@J;T-vQLX
N;OXk;vd$@?2>=Kjy)^&;

literal 0
HcmV?d00001

diff --git a/src/calibre/web/feeds/recipes/recipe_der_standard.py b/src/calibre/web/feeds/recipes/recipe_der_standard.py
index eec4c4e74d..c053d74cfb 100644
--- a/src/calibre/web/feeds/recipes/recipe_der_standard.py
+++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py
@@ -1,14 +1,37 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
 
 ''' http://www.derstandard.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class DerStandardRecipe(BasicNewsRecipe):
-    title          = u'derStandard'
-    __author__  = 'Gerhard Aigner'
-
+    title = u'derStandard'
+    __author__ = 'Gerhard Aigner'
+    description = u'Nachrichten aus Österreich' 
+    publisher ='derStandard.at'
+    category = 'news, politics, nachrichten, Austria'
+    use_embedded_content = False
+    remove_empty_feeds = True
+    lang = 'de-AT'
+    no_stylesheets = True
+    encoding = 'utf-8'
+    language = _('German')
+    recursions = 0
     oldest_article = 1
     max_articles_per_feed = 100
+    
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    
     feeds          = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
         (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
         (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
@@ -20,17 +43,13 @@ class DerStandardRecipe(BasicNewsRecipe):
         (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
         (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
         (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
-
-    encoding = 'utf-8'
-    language = _('German')
-    recursions = 0
     remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
         dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
     preprocess_regexps = [
-        (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
+        (re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
         (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
     ]
-
+    
     def print_version(self, url):
         return url.replace('?id=', 'txt/?id=')
 
@@ -40,3 +59,10 @@ class DerStandardRecipe(BasicNewsRecipe):
         if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
             return None
         return article.link
+
+    def preprocess_html(self, soup):
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
+        soup.head.insert(0,mtag)
+        return soup  
\ No newline at end of file