mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix the Wall Street Journal profile
This commit is contained in:
parent
f823c2b988
commit
dbc4bc0e5b
@ -7,6 +7,7 @@
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
||||||
|
|
||||||
@ -15,9 +16,10 @@ class WallStreetJournal(DefaultProfile):
|
|||||||
title = 'Wall Street Journal'
|
title = 'Wall Street Journal'
|
||||||
max_recursions = 2
|
max_recursions = 2
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
no_stylesheets = False
|
||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 10
|
||||||
timefmt = ' [%a, %b %d, %Y]'
|
timefmt = ' [%a, %b %d, %Y]'
|
||||||
html2lrf_options = ['--ignore-tables', '--base-font-size=5']
|
html2lrf_options = ['--ignore-tables']
|
||||||
|
|
||||||
## Don't grab articles more than 7 days old
|
## Don't grab articles more than 7 days old
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
@ -27,9 +29,6 @@ class WallStreetJournal(DefaultProfile):
|
|||||||
## Remove anything before the body of the article.
|
## Remove anything before the body of the article.
|
||||||
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
|
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
|
||||||
|
|
||||||
## Remove any insets from the body of the article.
|
|
||||||
(r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),
|
|
||||||
|
|
||||||
## Remove anything after the end of the article.
|
## Remove anything after the end of the article.
|
||||||
(r'<!-- article end.*?</body>', lambda match : '</body>'),
|
(r'<!-- article end.*?</body>', lambda match : '</body>'),
|
||||||
]
|
]
|
||||||
@ -46,12 +45,13 @@ class WallStreetJournal(DefaultProfile):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/article/', '/article_print/')
|
article = urlparse(url).path.rpartition('/')[-1]
|
||||||
|
return 'http://online.wsj.com/article_print/'+article
|
||||||
|
|
||||||
## Comment out the feeds you don't want retrieved.
|
## Comment out the feeds you don't want retrieved.
|
||||||
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them or use spaces to put them in the order you desire
|
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them or use spaces to put them in the order you desire
|
||||||
def get_feeds(self):
|
def get_feeds(self):
|
||||||
return [
|
return [
|
||||||
#('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'),
|
#('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'),
|
||||||
#('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'),
|
#('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'),
|
||||||
#('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
|
#('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user