Fix #3783 (Die Zeit - Recipe doesn't include full articles)

This commit is contained in:
Kovid Goyal 2009-10-18 11:58:03 -06:00
parent ab4401a4fe
commit 1a45dc51a5
2 changed files with 33 additions and 9 deletions

View File

@ -15,7 +15,7 @@ class ZeitDe(BasicNewsRecipe):
language = 'de' language = 'de'
lang = 'de_DE' lang = 'de_DE'
__author__ = 'Martin Pitt and Suajta Raman' __author__ = 'Martin Pitt and Sujata Raman'
use_embedded_content = False use_embedded_content = False
max_articles_per_feed = 40 max_articles_per_feed = 40
remove_empty_feeds = True remove_empty_feeds = True
@ -41,7 +41,8 @@ class ZeitDe(BasicNewsRecipe):
.article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small} .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
.headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small} .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
''' '''
filter_regexps = [r'ad.de.doubleclick.net/'] #filter_regexps = [r'ad.de.doubleclick.net/']
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class':["article"]}) , dict(name='div', attrs={'class':["article"]}) ,
] ]
@ -51,15 +52,32 @@ class ZeitDe(BasicNewsRecipe):
dict(name='div', attrs={'id':["place_5","place_4"]}) dict(name='div', attrs={'id':["place_5","place_4"]})
] ]
def get_article_url(self, article): def get_article_url(self, article):
ans = article.get('guid',None)
try:
self.log('Looking for full story link in', ans)
soup = self.index_to_soup(ans)
x = soup.find(text="Auf einer Seite lesen")
if x is not None:
a = x.parent
if a and a.has_key('href'):
ans = a['href']
self.log('Found full story link', ans)
except:
pass
if 'video' in ans or 'quiz' in ans :
url = article.get('guid', None) ans = None
return ans
if 'video' in url or 'quiz' in url :
url = None
return url
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
@ -69,6 +87,7 @@ class ZeitDe(BasicNewsRecipe):
return soup return soup
#def print_version(self,url): #def print_version(self,url):
# return url.replace('http://www.zeit.de/', 'http://images.zeit.de/text/').replace('?from=rss', '') # return url.replace('http://www.zeit.de/', 'http://images.zeit.de/text/').replace('?from=rss', '')

View File

@ -141,6 +141,11 @@ Now you should be able to access your books on your iPhone by opening Stanza and
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address. Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
How do I use |app| with my Android phone?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
First install the WordPlayer e-book reading app from the Android Marketplace onto you phone. Then simply plug your phone into the computer with a USB cable. |app| should automatically detect the phone and then you can transfer books to it by clicking the Send to Device button. |app| does not have support for every single androind device out there, so if you would like to have support for your device added, follow the instructions above for getting your device supported in |app|.
I get the error message "Failed to start content server: Port 8080 not free on '0.0.0.0'"? I get the error message "Failed to start content server: Port 8080 not free on '0.0.0.0'"?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~