mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Sync to trunk.
This commit is contained in:
commit
a0d1670e6f
BIN
resources/images/news/ad.png
Normal file
BIN
resources/images/news/ad.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 569 B |
BIN
resources/images/news/digitaljournal.png
Normal file
BIN
resources/images/news/digitaljournal.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 253 B |
BIN
resources/images/news/kitsapun.png
Normal file
BIN
resources/images/news/kitsapun.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.3 KiB |
BIN
resources/images/news/ledevoir.png
Normal file
BIN
resources/images/news/ledevoir.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 531 B |
86
resources/recipes/ad.recipe
Normal file
86
resources/recipes/ad.recipe
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ADRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'kwetal'
|
||||||
|
language = 'nl'
|
||||||
|
country = 'NL'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'AD'
|
||||||
|
publisher = u'de Persgroep Publishing Nederland NV'
|
||||||
|
category = u'News, Sports, the Netherlands'
|
||||||
|
description = u'News and Sports from the Netherlands'
|
||||||
|
|
||||||
|
oldest_article = 1.2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'art_box2'}))
|
||||||
|
keep_only_tags.append(dict(name = 'p', attrs = {'class': 'gen_footnote3'}))
|
||||||
|
|
||||||
|
remove_tags = []
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class': 'gen_clear'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')}))
|
||||||
|
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
|
# feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
|
||||||
|
feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
|
||||||
|
feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
|
||||||
|
feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
|
||||||
|
feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
|
||||||
|
feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
|
||||||
|
feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
|
||||||
|
feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
|
||||||
|
feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
|
||||||
|
feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
|
||||||
|
feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
|
||||||
|
feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
|
||||||
|
feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
|
||||||
|
feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
|
||||||
|
feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
|
||||||
|
feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
|
||||||
|
feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
|
||||||
|
feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
|
||||||
|
feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
|
||||||
|
feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
|
||||||
|
feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
|
||||||
|
feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
|
||||||
|
feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
|
||||||
|
feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
|
||||||
|
feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
||||||
|
div.captionEmbeddedMasterObject {font-size: x-small; font-style: italic; color: #696969;}
|
||||||
|
.gen_footnote3 {font-size: small; color: #666666; margin-top: 0.6em;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
|
||||||
|
'publisher': publisher}
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
parts = url.split('/')
|
||||||
|
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
|
||||||
|
+ parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13]
|
||||||
|
|
||||||
|
return print_url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for br in soup.findAll('br'):
|
||||||
|
prev = br.findPreviousSibling(True)
|
||||||
|
if hasattr(prev, 'name') and prev.name == 'br':
|
||||||
|
next = br.findNextSibling(True)
|
||||||
|
if hasattr(next, 'name') and next.name == 'br':
|
||||||
|
br.extract()
|
||||||
|
|
||||||
|
return soup
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
spectator.org
|
spectator.org
|
||||||
'''
|
'''
|
||||||
@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class TheAmericanSpectator(BasicNewsRecipe):
|
class TheAmericanSpectator(BasicNewsRecipe):
|
||||||
title = 'The American Spectator'
|
title = 'The American Spectator'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
description = 'News from USA'
|
description = 'News from USA'
|
||||||
|
category = 'news, politics, USA, world'
|
||||||
|
publisher = 'The American Spectator'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
INDEX = 'http://spectator.org'
|
INDEX = 'http://spectator.org'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comments' : description
|
||||||
, '--category' , 'news, politics, USA'
|
,'tags' : category
|
||||||
, '--publisher' , title
|
,'language' : language
|
||||||
]
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'post inner'})
|
dict(name='div', attrs={'class':'post inner'})
|
||||||
@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='object')
|
dict(name='object')
|
||||||
,dict(name='div', attrs={'class':'col3' })
|
,dict(name='div', attrs={'class':['col3','post-options','social']})
|
||||||
,dict(name='div', attrs={'class':'post-options' })
|
,dict(name='p' , attrs={'class':['letter-editor','meta']})
|
||||||
,dict(name='p' , attrs={'class':'letter-editor'})
|
|
||||||
,dict(name='div', attrs={'class':'social' })
|
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
|
feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '/print'
|
return url + '/print'
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('guid', None)
|
||||||
|
|
||||||
|
60
resources/recipes/bbc_fast.recipe
Normal file
60
resources/recipes/bbc_fast.recipe
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
news.bbc.co.uk
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class BBC(BasicNewsRecipe):
|
||||||
|
title = 'BBC News (fast)'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from UK. A much faster version that does not download pictures'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'BBC'
|
||||||
|
category = 'news, UK, world'
|
||||||
|
language = 'en'
|
||||||
|
extra_css = ' body{ font-family: sans-serif; } .headline{font-size: xx-large; font-weight: bold} .ibox{display: block; margin: 20px 50px; padding: 10px; border: 1px solid } '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div',attrs={'class':'headline'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'footer'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','script','iframe'])
|
||||||
|
,dict(name='div', attrs={'class':'footer'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
|
||||||
|
('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
|
||||||
|
('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
|
||||||
|
('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
|
||||||
|
('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
|
||||||
|
('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
|
||||||
|
('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
|
||||||
|
('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
|
||||||
|
('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
|
||||||
|
('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
|
||||||
|
('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
|
||||||
|
('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
|
||||||
|
('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
emp,sep,rstrip = url.partition('http://')
|
||||||
|
return 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/' + rstrip
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('guid', None)
|
||||||
|
|
121
resources/recipes/calgary_herald.recipe
Normal file
121
resources/recipes/calgary_herald.recipe
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Calgary Herald
|
||||||
|
title = u'Calgary Herald'
|
||||||
|
url_prefix = 'http://www.calgaryherald.com'
|
||||||
|
description = u'News from Calgary, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Regina Leader-Post
|
||||||
|
#title = u'Regina Leader-Post'
|
||||||
|
#url_prefix = 'http://www.leaderpost.com'
|
||||||
|
#description = u'News from Regina, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||||
|
#title = u'Saskatoon Star-Phoenix'
|
||||||
|
#url_prefix = 'http://www.thestarphoenix.com'
|
||||||
|
#description = u'News from Saskatoon, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Windsor Star
|
||||||
|
#title = u'Windsor Star'
|
||||||
|
#url_prefix = 'http://www.windsorstar.com'
|
||||||
|
#description = u'News from Windsor, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
#title = u'Ottawa Citizen'
|
||||||
|
#url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
#description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
15
resources/recipes/cjr.recipe
Normal file
15
resources/recipes/cjr.recipe
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CJR(BasicNewsRecipe):
|
||||||
|
title = u'Columbia Journalism Review'
|
||||||
|
__author__ = u'Xanthan Gum'
|
||||||
|
description = 'News about journalism.'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'News Stories', u'http://www.cjr.org/index.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?page=all&print=true'
|
52
resources/recipes/digitaljournal.recipe
Normal file
52
resources/recipes/digitaljournal.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
digitaljournal.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DigitalJournal(BasicNewsRecipe):
|
||||||
|
title = 'Digital Journal'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'A Global Citizen Journalism News Network'
|
||||||
|
category = 'news, politics, USA, world'
|
||||||
|
publisher = 'Digital Journal'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['article','body']})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','table'])]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Latest News' , u'http://digitaljournal.com/rss/?feed=latest_news' )
|
||||||
|
,(u'Business' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Business' )
|
||||||
|
,(u'Entertainment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Entertainment')
|
||||||
|
,(u'Environment' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Environment' )
|
||||||
|
,(u'Food' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Food' )
|
||||||
|
,(u'Health' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Health' )
|
||||||
|
,(u'Internet' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Internet' )
|
||||||
|
,(u'Politics' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Politics' )
|
||||||
|
,(u'Religion' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Religion' )
|
||||||
|
,(u'Science' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Science' )
|
||||||
|
,(u'Sports' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Sports' )
|
||||||
|
,(u'Technology' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Technology' )
|
||||||
|
,(u'World' , u'http://digitaljournal.com/rss/?feed=top_news&depname=World' )
|
||||||
|
,(u'Arts' , u'http://digitaljournal.com/rss/?feed=top_news&depname=Arts' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('digitaljournal.com/','digitaljournal.com/print/')
|
||||||
|
|
126
resources/recipes/edmonton_journal.recipe
Normal file
126
resources/recipes/edmonton_journal.recipe
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Edmonton Journal
|
||||||
|
title = u'Edmonton Journal'
|
||||||
|
url_prefix = 'http://www.edmontonjournal.com'
|
||||||
|
description = u'News from Edmonton, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Calgary Herald
|
||||||
|
#title = u'Calgary Herald'
|
||||||
|
#url_prefix = 'http://www.calgaryherald.com'
|
||||||
|
#description = u'News from Calgary, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Regina Leader-Post
|
||||||
|
#title = u'Regina Leader-Post'
|
||||||
|
#url_prefix = 'http://www.leaderpost.com'
|
||||||
|
#description = u'News from Regina, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||||
|
#title = u'Saskatoon Star-Phoenix'
|
||||||
|
#url_prefix = 'http://www.thestarphoenix.com'
|
||||||
|
#description = u'News from Saskatoon, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Windsor Star
|
||||||
|
#title = u'Windsor Star'
|
||||||
|
#url_prefix = 'http://www.windsorstar.com'
|
||||||
|
#description = u'News from Windsor, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
#title = u'Ottawa Citizen'
|
||||||
|
#url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
#description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
@ -9,27 +9,33 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
|
|
||||||
class FTDe(BasicNewsRecipe):
|
class FTDe(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'FTD'
|
title = 'FTD'
|
||||||
description = 'Financial Times Deutschland'
|
description = 'Financial Times Deutschland'
|
||||||
__author__ = 'Oliver Niesner'
|
__author__ = 'Oliver Niesner'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
language = 'de'
|
language = _('German')
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
remove_tags = [dict(id='navi_top'),
|
remove_tags = [dict(id='navi_top'),
|
||||||
dict(id='topbanner'),
|
dict(id='topbanner'),
|
||||||
dict(id='seitenkopf'),
|
dict(id='seitenkopf'),
|
||||||
dict(id='BoxA-0-0-0'),
|
dict(id='BoxA-0-0-0'),
|
||||||
|
#dict(id='BoxA-2-0-0'),
|
||||||
dict(id='footer'),
|
dict(id='footer'),
|
||||||
dict(id='rating_open'),
|
dict(id='rating_open'),
|
||||||
dict(id='ADS_Top'),
|
dict(id='ADS_Top'),
|
||||||
dict(id='spinner'),
|
dict(id='spinner'),
|
||||||
dict(id='ftd-contentad'),
|
dict(id='ftd-contentad'),
|
||||||
|
dict(id='ftd-promo'),
|
||||||
dict(id='nava-50009007-1-0'),
|
dict(id='nava-50009007-1-0'),
|
||||||
dict(id='navli-50009007-1-0'),
|
dict(id='navli-50009007-1-0'),
|
||||||
|
dict(id='Box5000534-0-0-0'),
|
||||||
|
dict(id='ExpV-1-0-0-1'),
|
||||||
|
dict(id='ExpV-1-0-0-0'),
|
||||||
|
dict(id='PollExpV-2-0-0-0'),
|
||||||
dict(id='starRating'),
|
dict(id='starRating'),
|
||||||
dict(id='saveRating'),
|
dict(id='saveRating'),
|
||||||
dict(id='yLayer'),
|
dict(id='yLayer'),
|
||||||
@ -44,14 +50,20 @@ class FTDe(BasicNewsRecipe):
|
|||||||
dict(name='ul', attrs={'class':'nav'}),
|
dict(name='ul', attrs={'class':'nav'}),
|
||||||
dict(name='p', attrs={'class':'articleOptionHead'}),
|
dict(name='p', attrs={'class':'articleOptionHead'}),
|
||||||
dict(name='p', attrs={'class':'articleOptionFoot'}),
|
dict(name='p', attrs={'class':'articleOptionFoot'}),
|
||||||
|
dict(name='p', attrs={'class':'moreInfo'}),
|
||||||
dict(name='div', attrs={'class':'chartBox'}),
|
dict(name='div', attrs={'class':'chartBox'}),
|
||||||
dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
|
dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
|
||||||
dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
|
dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
|
||||||
dict(name='div', attrs={'class':'box boxNavTabs '}),
|
dict(name='div', attrs={'class':'box boxNavTabs'}),
|
||||||
|
dict(name='div', attrs={'class':'boxMMRgtLow'}),
|
||||||
dict(name='span', attrs={'class':'vote_455857'}),
|
dict(name='span', attrs={'class':'vote_455857'}),
|
||||||
dict(name='div', attrs={'class':'relatedhalb'}),
|
dict(name='div', attrs={'class':'relatedhalb'}),
|
||||||
dict(name='div', attrs={'class':'box boxListScrollOutline'}),
|
dict(name='div', attrs={'class':'box boxListScrollOutline'}),
|
||||||
|
dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
|
||||||
|
dict(name='div', attrs={'class':'box boxTeaser boxPhotoshow boxImgWide'}),
|
||||||
|
dict(name='div', attrs={'class':'box boxTeaser'}),
|
||||||
dict(name='div', attrs={'class':'tagCloud'}),
|
dict(name='div', attrs={'class':'tagCloud'}),
|
||||||
|
dict(name='div', attrs={'class':'pollView'}),
|
||||||
dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
|
dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
|
||||||
dict(name='div', attrs={'class':'ftdHpNav'}),
|
dict(name='div', attrs={'class':'ftdHpNav'}),
|
||||||
dict(name='div', attrs={'class':'ftdHead'}),
|
dict(name='div', attrs={'class':'ftdHead'}),
|
||||||
@ -67,11 +79,12 @@ class FTDe(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class':'wertungoben'}),
|
dict(name='div', attrs={'class':'wertungoben'}),
|
||||||
dict(name='div', attrs={'class':'artikelfuss'}),
|
dict(name='div', attrs={'class':'artikelfuss'}),
|
||||||
dict(name='a', attrs={'class':'rating'}),
|
dict(name='a', attrs={'class':'rating'}),
|
||||||
|
dict(name='a', attrs={'href':'#rt'}),
|
||||||
dict(name='div', attrs={'class':'articleOptionFootFrame'}),
|
dict(name='div', attrs={'class':'articleOptionFootFrame'}),
|
||||||
dict(name='div', attrs={'class':'artikelsplitfaq'})]
|
dict(name='div', attrs={'class':'artikelsplitfaq'})]
|
||||||
remove_tags_after = [dict(name='a', attrs={'class':'more'})]
|
#remove_tags_after = [dict(name='a', attrs={'class':'more'})]
|
||||||
|
|
||||||
feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
|
feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
|
||||||
('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
|
('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
|
||||||
('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
|
('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
|
||||||
('Politik', 'http://www.ftd.de/rss2/politik'),
|
('Politik', 'http://www.ftd.de/rss2/politik'),
|
||||||
@ -82,8 +95,8 @@ class FTDe(BasicNewsRecipe):
|
|||||||
('Auto', 'http://www.ftd.de/rss2/auto'),
|
('Auto', 'http://www.ftd.de/rss2/auto'),
|
||||||
('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
|
('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?mode=print'
|
return url.replace('.html', '.html?mode=print')
|
||||||
|
@ -32,7 +32,7 @@ class GlobeAndMail(BasicNewsRecipe):
|
|||||||
'gallery-controls', 'video', 'galleryLoading','deck','header',
|
'gallery-controls', 'video', 'galleryLoading','deck','header',
|
||||||
'toolsBottom'] },
|
'toolsBottom'] },
|
||||||
{'class':['credit','inline-img-caption','tab-pointer'] },
|
{'class':['credit','inline-img-caption','tab-pointer'] },
|
||||||
dict(name='div', attrs={'id':'lead-photo'}),
|
dict(name='div', attrs={'id':['lead-photo', 'most-popular-story']}),
|
||||||
dict(name='div', attrs={'class':'right'}),
|
dict(name='div', attrs={'class':'right'}),
|
||||||
dict(name='div', attrs={'id':'footer'}),
|
dict(name='div', attrs={'id':'footer'}),
|
||||||
dict(name='div', attrs={'id':'beta-msg'}),
|
dict(name='div', attrs={'id':'beta-msg'}),
|
||||||
|
44
resources/recipes/kitsapun.recipe
Normal file
44
resources/recipes/kitsapun.recipe
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.kitsapun.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Kitsapsun(BasicNewsRecipe):
|
||||||
|
title = 'Kitsap Sun'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Kitsap County'
|
||||||
|
publisher = 'Scripps Interactive Newspapers Group'
|
||||||
|
category = 'news, Kitsap county, USA'
|
||||||
|
language = 'en'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher': publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link','embed','form','iframe'])]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News' , u'http://www.kitsapsun.com/rss/headlines/news/' )
|
||||||
|
,(u'Business' , u'http://www.kitsapsun.com/rss/headlines/business/' )
|
||||||
|
,(u'Communities' , u'http://www.kitsapsun.com/rss/headlines/communities/' )
|
||||||
|
,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/')
|
||||||
|
,(u'Lifestyles' , u'http://www.kitsapsun.com/rss/headlines/lifestyles/' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.rpartition('/')[0] + '/?print=1'
|
@ -1,79 +1,79 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Lorenzo Vigentini'
|
__author__ = 'Lorenzo Vigentini'
|
||||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||||
__version__ = 'v1.01'
|
__version__ = 'v1.01'
|
||||||
__date__ = '14, January 2010'
|
__date__ = '14, January 2010'
|
||||||
__description__ = 'Canadian Paper '
|
__description__ = 'Canadian Paper '
|
||||||
|
|
||||||
'''
|
'''
|
||||||
http://www.ledevoir.com/
|
http://www.ledevoir.com/
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ledevoir(BasicNewsRecipe):
|
class ledevoir(BasicNewsRecipe):
|
||||||
author = 'Lorenzo Vigentini'
|
author = 'Lorenzo Vigentini'
|
||||||
description = 'Canadian Paper'
|
description = 'Canadian Paper'
|
||||||
|
|
||||||
cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
|
cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
|
||||||
title = u'Le Devoir'
|
title = u'Le Devoir'
|
||||||
publisher = 'leDevoir.com'
|
publisher = 'leDevoir.com'
|
||||||
category = 'News, finance, economy, politics'
|
category = 'News, finance, economy, politics'
|
||||||
|
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 10
|
recursion = 10
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':'article'}),
|
dict(name='div', attrs={'id':'article'}),
|
||||||
dict(name='ul', attrs={'id':'ariane'})
|
dict(name='ul', attrs={'id':'ariane'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'dialog'}),
|
dict(name='div', attrs={'id':'dialog'}),
|
||||||
dict(name='div', attrs={'class':['interesse_actions','reactions']}),
|
dict(name='div', attrs={'class':['interesse_actions','reactions']}),
|
||||||
dict(name='ul', attrs={'class':'mots_cles'}),
|
dict(name='ul', attrs={'class':'mots_cles'}),
|
||||||
dict(name='a', attrs={'class':'haut'}),
|
dict(name='a', attrs={'class':'haut'}),
|
||||||
dict(name='h5', attrs={'class':'interesse_actions'})
|
dict(name='h5', attrs={'class':'interesse_actions'})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
|
(u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
|
||||||
(u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
|
(u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
|
||||||
(u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
|
(u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
|
||||||
(u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
|
(u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
|
||||||
(u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
|
(u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
|
||||||
(u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
|
(u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
|
||||||
(u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
|
(u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
|
||||||
(u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
|
(u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
|
||||||
(u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
|
(u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
|
||||||
(u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
|
(u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
|
||||||
(u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
|
(u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
|
||||||
(u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
|
(u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
|
h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
|
||||||
h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
|
h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
|
||||||
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
|
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
|
||||||
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
|
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
|
||||||
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
|
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
|
||||||
.specs {line-height:1em;margin:1px 0;}
|
.specs {line-height:1em;margin:1px 0;}
|
||||||
.specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
|
.specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
|
||||||
.specs span.auteur a,
|
.specs span.auteur a,
|
||||||
.specs span.auteur span {text-transform:uppercase;color:#787878;}
|
.specs span.auteur span {text-transform:uppercase;color:#787878;}
|
||||||
.specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
|
.specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
|
||||||
ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
|
ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
|
||||||
ul#ariane li {display:inline;}
|
ul#ariane li {display:inline;}
|
||||||
ul#ariane a {color:#2E2E2E;text-decoration:underline;}
|
ul#ariane a {color:#2E2E2E;text-decoration:underline;}
|
||||||
.credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
|
.credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
|
||||||
.texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
|
.texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
|
||||||
'''
|
'''
|
||||||
|
96
resources/recipes/montreal_gazette.recipe
Normal file
96
resources/recipes/montreal_gazette.recipe
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
title = u'Montreal Gazette'
|
||||||
|
url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
101
resources/recipes/ottawa_citizen.recipe
Normal file
101
resources/recipes/ottawa_citizen.recipe
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
title = u'Ottawa Citizen'
|
||||||
|
url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
48
resources/recipes/pajama.recipe
Normal file
48
resources/recipes/pajama.recipe
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
class PajamasMedia(BasicNewsRecipe):
|
||||||
|
title = u'Pajamas Media'
|
||||||
|
description = u'Provides exclusive news and opinion for forty countries.'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 1 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
recursions = 1
|
||||||
|
match_regexps = [r'http://pajamasmedia.com/blog/.*/2/$']
|
||||||
|
#encoding = 'latin1'
|
||||||
|
|
||||||
|
remove_stylesheets = True
|
||||||
|
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'paged-nav'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':['pages']}),
|
||||||
|
#dict(name='div', attrs={'id':['bookmark']}),
|
||||||
|
#dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
|
||||||
|
#dict(name='ul', attrs={'class':'articleTools'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('pajamas Media',
|
||||||
|
'http://feeds.feedburner.com/PajamasMedia'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
story = soup.find(name='div', attrs={'id':'innerpage-content'})
|
||||||
|
#td = heading.findParent(name='td')
|
||||||
|
#td.extract()
|
||||||
|
|
||||||
|
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||||
|
body = soup.find(name='body')
|
||||||
|
body.insert(0, story)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
if not first:
|
||||||
|
h = soup.find(attrs={'class':'innerpage-header'})
|
||||||
|
if h: h.extract()
|
||||||
|
auth = soup.find(attrs={'class':'author'})
|
||||||
|
if auth: auth.extract()
|
||||||
|
return soup
|
@ -8,8 +8,7 @@ class Physicstoday(BasicNewsRecipe):
|
|||||||
description = u'Physics Today magazine'
|
description = u'Physics Today magazine'
|
||||||
publisher = 'American Institute of Physics'
|
publisher = 'American Institute of Physics'
|
||||||
category = 'Physics'
|
category = 'Physics'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
cover_url = strftime('http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg')
|
cover_url = strftime('http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg')
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -30,11 +29,11 @@ class Physicstoday(BasicNewsRecipe):
|
|||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('http://www.physicstoday.org/pt/sso_login.jsp')
|
br.open('http://ptonline.aip.org/journals/doc/PHTOAD-home/pt_login.jsp?fl=f')
|
||||||
br.select_form(name='login')
|
br.select_form(name='login_form')
|
||||||
br['username'] = self.username
|
br['username'] = self.username
|
||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
feeds = [(u'All', u'http://www.physicstoday.org/feed.xml')]
|
feeds = [(u'All', u'http://www.physicstoday.org/feed.xml')]
|
||||||
|
188
resources/recipes/readers_digest.recipe
Normal file
188
resources/recipes/readers_digest.recipe
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
'''
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.web.feeds import Feed
|
||||||
|
|
||||||
|
|
||||||
|
class ReadersDigest(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Readers Digest'
|
||||||
|
__author__ = 'BrianG'
|
||||||
|
language = 'en'
|
||||||
|
description = 'Readers Digest Feeds'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
oldest_article = 60
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
|
||||||
|
language = 'en'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
extra_css = ''' h1 {font-family:georgia,serif;color:#000000;}
|
||||||
|
.mainHd{font-family:georgia,serif;color:#000000;}
|
||||||
|
h2 {font-family:Arial,Sans-serif;}
|
||||||
|
.name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
|
||||||
|
.date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
|
||||||
|
.byline{font-family:Arial,Sans-serif; font-size:x-small ;}
|
||||||
|
.photoBkt{ font-size:x-small ;}
|
||||||
|
.vertPhoto{font-size:x-small ;}
|
||||||
|
.credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
|
||||||
|
.credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
|
||||||
|
.artTxt{font-family:georgia,serif;}
|
||||||
|
.caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
|
||||||
|
.credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
|
||||||
|
a:link{color:#CC0000;}
|
||||||
|
.breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='h4', attrs={'class':'close'}),
|
||||||
|
dict(name='div', attrs={'class':'fromLine'}),
|
||||||
|
dict(name='img', attrs={'class':'colorTag'}),
|
||||||
|
dict(name='div', attrs={'id':'sponsorArticleHeader'}),
|
||||||
|
dict(name='div', attrs={'class':'horizontalAd'}),
|
||||||
|
dict(name='div', attrs={'id':'imageCounterLeft'}),
|
||||||
|
dict(name='div', attrs={'id':'commentsPrint'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('New in RD', 'http://feeds.rd.com/ReadersDigest'),
|
||||||
|
('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
|
||||||
|
('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
|
||||||
|
('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
|
||||||
|
]
|
||||||
|
|
||||||
|
cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
|
||||||
|
# Get the identity number of the current article and append it to the root print URL
|
||||||
|
|
||||||
|
if url.find('/article') > 0:
|
||||||
|
ident = url[url.find('/article')+8:url.find('.html?')-4]
|
||||||
|
url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
|
||||||
|
|
||||||
|
elif url.find('/post') > 0:
|
||||||
|
|
||||||
|
# in this case, have to get the page itself to derive the Print page.
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
newsoup = soup.find('ul',attrs={'class':'printBlock'})
|
||||||
|
url = 'http://www.rd.com' + newsoup('a')[0]['href']
|
||||||
|
url = url[0:url.find('&Keep')]
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
|
||||||
|
pages = [
|
||||||
|
('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
|
||||||
|
# useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
|
||||||
|
('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
section, url, divider, attrList = page
|
||||||
|
newArticles = self.page_parse(url, divider, attrList)
|
||||||
|
feeds.append((section,newArticles))
|
||||||
|
|
||||||
|
# after the pages of the site have been processed, parse several RSS feeds for additional sections
|
||||||
|
newfeeds = Feed()
|
||||||
|
newfeeds = self.parse_rss()
|
||||||
|
|
||||||
|
|
||||||
|
# The utility code in parse_rss returns a Feed object. Convert each feed/article combination into a form suitable
|
||||||
|
# for this module (parse_index).
|
||||||
|
|
||||||
|
for feed in newfeeds:
|
||||||
|
newArticles = []
|
||||||
|
for article in feed.articles:
|
||||||
|
newArt = {
|
||||||
|
'title' : article.title,
|
||||||
|
'url' : article.url,
|
||||||
|
'date' : article.date,
|
||||||
|
'description' : article.text_summary
|
||||||
|
}
|
||||||
|
newArticles.append(newArt)
|
||||||
|
|
||||||
|
|
||||||
|
# New and Blogs should be the first two feeds.
|
||||||
|
if feed.title == 'New in RD':
|
||||||
|
feeds.insert(0,(feed.title,newArticles))
|
||||||
|
elif feed.title == 'Blogs':
|
||||||
|
feeds.insert(1,(feed.title,newArticles))
|
||||||
|
else:
|
||||||
|
feeds.append((feed.title,newArticles))
|
||||||
|
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def page_parse(self, mainurl, divider, attrList):
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
mainsoup = self.index_to_soup(mainurl)
|
||||||
|
for item in mainsoup.findAll(attrs=attrList):
|
||||||
|
newArticle = {
|
||||||
|
'title' : item('img')[0]['alt'],
|
||||||
|
'url' : 'http://www.rd.com'+item('a')[0]['href'],
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''
|
||||||
|
}
|
||||||
|
articles.append(newArticle)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return articles
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def parse_rss (self):
|
||||||
|
|
||||||
|
# Do the "official" parse_feeds first
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
|
||||||
|
# Loop thru the articles in all feeds to find articles with "recipe" in it
|
||||||
|
recipeArticles = []
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if curarticle.title.upper().find('RECIPE') >= 0:
|
||||||
|
recipeArticles.append(curarticle)
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
# If there are any recipes found, create a new Feed object and append.
|
||||||
|
if len(recipeArticles) > 0:
|
||||||
|
pfeed = Feed()
|
||||||
|
pfeed.title = 'Recipes'
|
||||||
|
pfeed.descrition = 'Recipe Feed (Virtual)'
|
||||||
|
pfeed.image_url = None
|
||||||
|
pfeed.oldest_article = 30
|
||||||
|
pfeed.id_counter = len(recipeArticles)
|
||||||
|
# Create a new Feed, add the recipe articles, and then append
|
||||||
|
# to "official" list of feeds
|
||||||
|
pfeed.articles = recipeArticles[:]
|
||||||
|
feeds.append(pfeed)
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
116
resources/recipes/regina_leader_post.recipe
Normal file
116
resources/recipes/regina_leader_post.recipe
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Regina Leader-Post
|
||||||
|
title = u'Regina Leader-Post'
|
||||||
|
url_prefix = 'http://www.leaderpost.com'
|
||||||
|
description = u'News from Regina, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||||
|
#title = u'Saskatoon Star-Phoenix'
|
||||||
|
#url_prefix = 'http://www.thestarphoenix.com'
|
||||||
|
#description = u'News from Saskatoon, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Windsor Star
|
||||||
|
#title = u'Windsor Star'
|
||||||
|
#url_prefix = 'http://www.windsorstar.com'
|
||||||
|
#description = u'News from Windsor, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
#title = u'Ottawa Citizen'
|
||||||
|
#url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
#description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
111
resources/recipes/saskatoon_star_phoenix.recipe
Normal file
111
resources/recipes/saskatoon_star_phoenix.recipe
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||||
|
title = u'Saskatoon Star-Phoenix'
|
||||||
|
url_prefix = 'http://www.thestarphoenix.com'
|
||||||
|
description = u'News from Saskatoon, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Windsor Star
|
||||||
|
#title = u'Windsor Star'
|
||||||
|
#url_prefix = 'http://www.windsorstar.com'
|
||||||
|
#description = u'News from Windsor, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
#title = u'Ottawa Citizen'
|
||||||
|
#url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
#description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
136
resources/recipes/vancouver_provice.recipe
Normal file
136
resources/recipes/vancouver_provice.recipe
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Vancouver Province
|
||||||
|
title = u'Vancouver Province'
|
||||||
|
url_prefix = 'http://www.theprovince.com'
|
||||||
|
description = u'News from Vancouver, BC'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Vancouver Sun
|
||||||
|
#title = u'Vancouver Sun'
|
||||||
|
#url_prefix = 'http://www.vancouversun.com'
|
||||||
|
#description = u'News from Vancouver, BC'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Edmonton Journal
|
||||||
|
#title = u'Edmonton Journal'
|
||||||
|
#url_prefix = 'http://www.edmontonjournal.com'
|
||||||
|
#description = u'News from Edmonton, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Calgary Herald
|
||||||
|
#title = u'Calgary Herald'
|
||||||
|
#url_prefix = 'http://www.calgaryherald.com'
|
||||||
|
#description = u'News from Calgary, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Regina Leader-Post
|
||||||
|
#title = u'Regina Leader-Post'
|
||||||
|
#url_prefix = 'http://www.leaderpost.com'
|
||||||
|
#description = u'News from Regina, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||||
|
#title = u'Saskatoon Star-Phoenix'
|
||||||
|
#url_prefix = 'http://www.thestarphoenix.com'
|
||||||
|
#description = u'News from Saskatoon, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Windsor Star
|
||||||
|
#title = u'Windsor Star'
|
||||||
|
#url_prefix = 'http://www.windsorstar.com'
|
||||||
|
#description = u'News from Windsor, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
#title = u'Ottawa Citizen'
|
||||||
|
#url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
#description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
131
resources/recipes/vancouver_sun.recipe
Normal file
131
resources/recipes/vancouver_sun.recipe
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Vancouver Sun
|
||||||
|
title = u'Vancouver Sun'
|
||||||
|
url_prefix = 'http://www.vancouversun.com'
|
||||||
|
description = u'News from Vancouver, BC'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Edmonton Journal
|
||||||
|
#title = u'Edmonton Journal'
|
||||||
|
#url_prefix = 'http://www.edmontonjournal.com'
|
||||||
|
#description = u'News from Edmonton, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Calgary Herald
|
||||||
|
#title = u'Calgary Herald'
|
||||||
|
#url_prefix = 'http://www.calgaryherald.com'
|
||||||
|
#description = u'News from Calgary, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Regina Leader-Post
|
||||||
|
#title = u'Regina Leader-Post'
|
||||||
|
#url_prefix = 'http://www.leaderpost.com'
|
||||||
|
#description = u'News from Regina, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||||
|
#title = u'Saskatoon Star-Phoenix'
|
||||||
|
#url_prefix = 'http://www.thestarphoenix.com'
|
||||||
|
#description = u'News from Saskatoon, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Windsor Star
|
||||||
|
#title = u'Windsor Star'
|
||||||
|
#url_prefix = 'http://www.windsorstar.com'
|
||||||
|
#description = u'News from Windsor, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
#title = u'Ottawa Citizen'
|
||||||
|
#url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
#description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
141
resources/recipes/vic_times.recipe
Normal file
141
resources/recipes/vic_times.recipe
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Victoria Times Colonist
|
||||||
|
title = u'Victoria Times Colonist'
|
||||||
|
url_prefix = 'http://www.timescolonist.com'
|
||||||
|
description = u'News from Victoria, BC'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Vancouver Province
|
||||||
|
#title = u'Vancouver Province'
|
||||||
|
#url_prefix = 'http://www.theprovince.com'
|
||||||
|
#description = u'News from Vancouver, BC'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Vancouver Sun
|
||||||
|
#title = u'Vancouver Sun'
|
||||||
|
#url_prefix = 'http://www.vancouversun.com'
|
||||||
|
#description = u'News from Vancouver, BC'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Edmonton Journal
|
||||||
|
#title = u'Edmonton Journal'
|
||||||
|
#url_prefix = 'http://www.edmontonjournal.com'
|
||||||
|
#description = u'News from Edmonton, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Calgary Herald
|
||||||
|
#title = u'Calgary Herald'
|
||||||
|
#url_prefix = 'http://www.calgaryherald.com'
|
||||||
|
#description = u'News from Calgary, AB'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Regina Leader-Post
|
||||||
|
#title = u'Regina Leader-Post'
|
||||||
|
#url_prefix = 'http://www.leaderpost.com'
|
||||||
|
#description = u'News from Regina, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||||
|
#title = u'Saskatoon Star-Phoenix'
|
||||||
|
#url_prefix = 'http://www.thestarphoenix.com'
|
||||||
|
#description = u'News from Saskatoon, SK'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Windsor Star
|
||||||
|
#title = u'Windsor Star'
|
||||||
|
#url_prefix = 'http://www.windsorstar.com'
|
||||||
|
#description = u'News from Windsor, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
#title = u'Ottawa Citizen'
|
||||||
|
#url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
#description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
106
resources/recipes/windows_star.recipe
Normal file
106
resources/recipes/windows_star.recipe
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Windsor Star
|
||||||
|
title = u'Windsor Star'
|
||||||
|
url_prefix = 'http://www.windsorstar.com'
|
||||||
|
description = u'News from Windsor, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Ottawa Citizen
|
||||||
|
#title = u'Ottawa Citizen'
|
||||||
|
#url_prefix = 'http://www.ottawacitizen.com'
|
||||||
|
#description = u'News from Ottawa, ON'
|
||||||
|
|
||||||
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
|
#title = u'Montreal Gazette'
|
||||||
|
#url_prefix = 'http://www.montrealgazette.com'
|
||||||
|
#description = u'News from Montreal, QC'
|
||||||
|
|
||||||
|
|
||||||
|
language = 'en_CA'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
extra_css = '''
|
||||||
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
|
#storyheader { font-size: medium; }
|
||||||
|
#storyheader h1 { font-size: x-large; }
|
||||||
|
#storyheader h2 { font-size: large; font-style: italic; }
|
||||||
|
.byline { font-size:xx-small; }
|
||||||
|
#photocaption { font-size: small; font-style: italic }
|
||||||
|
#photocredit { font-size: xx-small; }'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
remove_tags = [{'class':'comments'},
|
||||||
|
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||||
|
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||||
|
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||||
|
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||||
|
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||||
|
divtags = soup.findAll('div',attrs={'id':''})
|
||||||
|
if divtags:
|
||||||
|
for div in divtags:
|
||||||
|
del(div['id'])
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = 'News'
|
||||||
|
ans = ['News']
|
||||||
|
|
||||||
|
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||||
|
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||||
|
#self.log(" div class = %s" % divtag['class'])
|
||||||
|
if divtag['class'].startswith('section_title'):
|
||||||
|
# div contains section title
|
||||||
|
if not divtag.h3:
|
||||||
|
continue
|
||||||
|
key = self.tag_to_string(divtag.h3,False)
|
||||||
|
ans.append(key)
|
||||||
|
self.log("Section name %s" % key)
|
||||||
|
continue
|
||||||
|
# div contains article data
|
||||||
|
h1tag = divtag.find('h1')
|
||||||
|
if not h1tag:
|
||||||
|
continue
|
||||||
|
atag = h1tag.find('a',href=True)
|
||||||
|
if not atag:
|
||||||
|
continue
|
||||||
|
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||||
|
#self.log("Section %s" % key)
|
||||||
|
#self.log("url %s" % url)
|
||||||
|
title = self.tag_to_string(atag,False)
|
||||||
|
#self.log("title %s" % title)
|
||||||
|
pubdate = ''
|
||||||
|
description = ''
|
||||||
|
ptag = divtag.find('p');
|
||||||
|
if ptag:
|
||||||
|
description = self.tag_to_string(ptag,False)
|
||||||
|
#self.log("description %s" % description)
|
||||||
|
author = ''
|
||||||
|
autag = divtag.find('h4')
|
||||||
|
if autag:
|
||||||
|
author = self.tag_to_string(autag,False)
|
||||||
|
#self.log("author %s" % author)
|
||||||
|
if not articles.has_key(key):
|
||||||
|
articles[key] = []
|
||||||
|
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return ans
|
@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre import strftime
|
||||||
|
|
||||||
# http://online.wsj.com/page/us_in_todays_paper.html
|
# http://online.wsj.com/page/us_in_todays_paper.html
|
||||||
|
|
||||||
@ -67,6 +68,13 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.wsj_get_index()
|
soup = self.wsj_get_index()
|
||||||
|
|
||||||
|
year = strftime('%Y')
|
||||||
|
for x in soup.findAll('td', attrs={'class':'b14'}):
|
||||||
|
txt = self.tag_to_string(x).strip()
|
||||||
|
if year in txt:
|
||||||
|
self.timefmt = ' [%s]'%txt
|
||||||
|
break
|
||||||
|
|
||||||
left_column = soup.find(
|
left_column = soup.find(
|
||||||
text=lambda t: 'begin ITP Left Column' in str(t))
|
text=lambda t: 'begin ITP Left Column' in str(t))
|
||||||
|
|
||||||
@ -91,7 +99,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
url = url.partition('#')[0]
|
url = url.partition('#')[0]
|
||||||
desc = ''
|
desc = ''
|
||||||
d = x.findNextSibling(True)
|
d = x.findNextSibling(True)
|
||||||
if d.get('class', None) == 'arialResize':
|
if d is not None and d.get('class', None) == 'arialResize':
|
||||||
desc = self.tag_to_string(d)
|
desc = self.tag_to_string(d)
|
||||||
desc = desc.partition(u'\u2022')[0]
|
desc = desc.partition(u'\u2022')[0]
|
||||||
self.log('\t\tFound article:', title)
|
self.log('\t\tFound article:', title)
|
||||||
|
@ -3,47 +3,139 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
online.wsj.com.com
|
online.wsj.com
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||||
|
from datetime import timedelta, date
|
||||||
|
|
||||||
class WSJ(BasicNewsRecipe):
|
class WSJ(BasicNewsRecipe):
|
||||||
# formatting adapted from original recipe by Kovid Goyal and Sujata Raman
|
# formatting adapted from original recipe by Kovid Goyal and Sujata Raman
|
||||||
title = u'Wall Street Journal (free)'
|
title = u'Wall Street Journal (free)'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = ('All the free content from the Wall Street Journal (business'
|
description = ('All the free content from the Wall Street Journal (business, financial and political news)')
|
||||||
', financial and political news)')
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
timefmt = ' [%b %d]'
|
timefmt = ' [%b %d]'
|
||||||
extra_css = '''h1{font-size:large; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;}
|
|
||||||
h2{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
|
# customization notes: delete sections you are not interested in
|
||||||
.subhead{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
|
# set omit_paid_content to False if you want the paid content article snippets
|
||||||
.insettipUnit {font-family:Arial,Sans-serif;font-size:xx-small;}
|
# set oldest_article to the maximum number of days back from today to include articles
|
||||||
.targetCaption{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
|
sectionlist = [
|
||||||
.article{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
['/home-page','Front Page'],
|
||||||
.tagline { ont-size:xx-small;}
|
['/public/page/news-opinion-commentary.html','Commentary'],
|
||||||
.dateStamp {font-family:Arial,Helvetica,sans-serif;}
|
['/public/page/news-global-world.html','World News'],
|
||||||
h3{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
['/public/page/news-world-business.html','US News'],
|
||||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small; list-style-type: none;}
|
['/public/page/news-business-us.html','Business'],
|
||||||
|
['/public/page/news-financial-markets-stock.html','Markets'],
|
||||||
|
['/public/page/news-tech-technology.html','Technology'],
|
||||||
|
['/public/page/news-personal-finance.html','Personal Finnce'],
|
||||||
|
['/public/page/news-lifestyle-arts-entertainment.html','Life & Style'],
|
||||||
|
['/public/page/news-real-estate-homes.html','Real Estate'],
|
||||||
|
['/public/page/news-career-jobs.html','Careers'],
|
||||||
|
['/public/page/news-small-business-marketing.html','Small Business']
|
||||||
|
]
|
||||||
|
oldest_article = 2
|
||||||
|
omit_paid_content = True
|
||||||
|
|
||||||
|
extra_css = '''h1{font-size:large; font-family:Times,serif;}
|
||||||
|
h2{font-family:Times,serif; font-size:small; font-style:italic;}
|
||||||
|
.subhead{font-family:Times,serif; font-size:small; font-style:italic;}
|
||||||
|
.insettipUnit {font-family:Times,serif;font-size:xx-small;}
|
||||||
|
.targetCaption{font-size:x-small; font-family:Times,serif; font-style:italic; margin-top: 0.25em;}
|
||||||
|
.article{font-family:Times,serif; font-size:x-small;}
|
||||||
|
.tagline { font-size:xx-small;}
|
||||||
|
.dateStamp {font-family:Times,serif;}
|
||||||
|
h3{font-family:Times,serif; font-size:xx-small;}
|
||||||
|
.byline {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
|
||||||
.metadataType-articleCredits {list-style-type: none;}
|
.metadataType-articleCredits {list-style-type: none;}
|
||||||
h6{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic;}
|
h6{font-family:Times,serif; font-size:small; font-style:italic;}
|
||||||
.paperLocation{font-size:xx-small;}'''
|
.paperLocation{font-size:xx-small;}'''
|
||||||
|
|
||||||
remove_tags_before = dict(name='h1')
|
|
||||||
remove_tags = [ dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
|
remove_tags_before = dict({'class':re.compile('^articleHeadlineBox')})
|
||||||
"articleTabs_tab_interactive","articleTabs_tab_video",
|
remove_tags = [ dict({'id':re.compile('^articleTabs_tab_')}),
|
||||||
"articleTabs_tab_map","articleTabs_tab_slideshow"]),
|
#dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
|
||||||
{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
|
# "articleTabs_tab_interactive","articleTabs_tab_video",
|
||||||
'insettip','insetClose','more_in', "insetContent", 'articleTools_bottom', 'aTools', 'tooltip',
|
# "articleTabs_tab_map","articleTabs_tab_slideshow"]),
|
||||||
'adSummary', 'nav-inline','insetFullBracket']},
|
{'class': ['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
|
||||||
dict(rel='shortcut icon'),
|
'insettip','insetClose','more_in', "insetContent",
|
||||||
|
# 'articleTools_bottom','articleTools_bottom mjArticleTools',
|
||||||
|
'aTools', 'tooltip',
|
||||||
|
'adSummary', 'nav-inline','insetFullBracket']},
|
||||||
|
dict({'class':re.compile('^articleTools_bottom')}),
|
||||||
|
dict(rel='shortcut icon')
|
||||||
]
|
]
|
||||||
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]
|
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self,soup):
|
def preprocess_html(self,soup):
|
||||||
|
|
||||||
|
def decode_us_date(datestr):
|
||||||
|
udate = datestr.strip().lower().split()
|
||||||
|
m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(udate[0])+1
|
||||||
|
d = int(udate[1])
|
||||||
|
y = int(udate[2])
|
||||||
|
return date(y,m,d)
|
||||||
|
|
||||||
|
# check if article is paid content
|
||||||
|
if self.omit_paid_content:
|
||||||
|
divtags = soup.findAll('div','tooltip')
|
||||||
|
if divtags:
|
||||||
|
for divtag in divtags:
|
||||||
|
if divtag.find(text="Subscriber Content"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# check if article is too old
|
||||||
|
datetag = soup.find('li',attrs={'class' : re.compile("^dateStamp")})
|
||||||
|
if datetag:
|
||||||
|
dateline_string = self.tag_to_string(datetag,False)
|
||||||
|
date_items = dateline_string.split(',')
|
||||||
|
datestring = date_items[0]+date_items[1]
|
||||||
|
article_date = decode_us_date(datestring)
|
||||||
|
earliest_date = date.today() - timedelta(days=self.oldest_article)
|
||||||
|
if article_date < earliest_date:
|
||||||
|
self.log("Skipping article dated %s" % datestring)
|
||||||
|
return None
|
||||||
|
datetag.parent.extract()
|
||||||
|
|
||||||
|
# place dateline in article heading
|
||||||
|
|
||||||
|
bylinetag = soup.find('h3','byline')
|
||||||
|
if bylinetag:
|
||||||
|
h3bylinetag = bylinetag
|
||||||
|
else:
|
||||||
|
bylinetag = soup.find('li','byline')
|
||||||
|
if bylinetag:
|
||||||
|
h3bylinetag = bylinetag.h3
|
||||||
|
if not h3bylinetag:
|
||||||
|
h3bylinetag = bylinetag
|
||||||
|
bylinetag = bylinetag.parent
|
||||||
|
if bylinetag:
|
||||||
|
if h3bylinetag.a:
|
||||||
|
bylinetext = 'By '+self.tag_to_string(h3bylinetag.a,False)
|
||||||
|
else:
|
||||||
|
bylinetext = self.tag_to_string(h3bylinetag,False)
|
||||||
|
h3byline = Tag(soup,'h3',[('class','byline')])
|
||||||
|
if bylinetext.isspace() or (bylinetext == ''):
|
||||||
|
h3byline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
|
||||||
|
else:
|
||||||
|
h3byline.insert(0,NavigableString(bylinetext+u'\u2014'+date_items[0]+','+date_items[1]))
|
||||||
|
bylinetag.replaceWith(h3byline)
|
||||||
|
else:
|
||||||
|
headlinetag = soup.find('div',attrs={'class' : re.compile("^articleHeadlineBox")})
|
||||||
|
if headlinetag:
|
||||||
|
dateline = Tag(soup,'h3', [('class','byline')])
|
||||||
|
dateline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
|
||||||
|
headlinetag.insert(len(headlinetag),dateline)
|
||||||
|
else: # if no date tag, don't process this page--it's not a news item
|
||||||
|
return None
|
||||||
# This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
|
# This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
|
||||||
ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
|
ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
|
||||||
if ultag:
|
if ultag:
|
||||||
@ -58,7 +150,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
key = None
|
key = None
|
||||||
ans = []
|
ans = []
|
||||||
|
|
||||||
def parse_index_page(page_name,page_title,omit_paid_content):
|
def parse_index_page(page_name,page_title):
|
||||||
|
|
||||||
def article_title(tag):
|
def article_title(tag):
|
||||||
atag = tag.find('h2') # title is usually in an h2 tag
|
atag = tag.find('h2') # title is usually in an h2 tag
|
||||||
@ -119,7 +211,6 @@ class WSJ(BasicNewsRecipe):
|
|||||||
soup = self.index_to_soup(pageurl)
|
soup = self.index_to_soup(pageurl)
|
||||||
# Find each instance of div with class including "headlineSummary"
|
# Find each instance of div with class including "headlineSummary"
|
||||||
for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
|
for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
|
||||||
|
|
||||||
# divtag contains all article data as ul's and li's
|
# divtag contains all article data as ul's and li's
|
||||||
# first, check if there is an h3 tag which provides a section name
|
# first, check if there is an h3 tag which provides a section name
|
||||||
stag = divtag.find('h3')
|
stag = divtag.find('h3')
|
||||||
@ -162,7 +253,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
# now skip paid subscriber articles if desired
|
# now skip paid subscriber articles if desired
|
||||||
subscriber_tag = litag.find(text="Subscriber Content")
|
subscriber_tag = litag.find(text="Subscriber Content")
|
||||||
if subscriber_tag:
|
if subscriber_tag:
|
||||||
if omit_paid_content:
|
if self.omit_paid_content:
|
||||||
continue
|
continue
|
||||||
# delete the tip div so it doesn't get in the way
|
# delete the tip div so it doesn't get in the way
|
||||||
tiptag = litag.find("div", { "class" : "tipTargetBox" })
|
tiptag = litag.find("div", { "class" : "tipTargetBox" })
|
||||||
@ -185,7 +276,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
if url.startswith("/article"):
|
if url.startswith("/article"):
|
||||||
url = mainurl+url
|
url = mainurl+url
|
||||||
if not url.startswith("http"):
|
if not url.startswith("http://online.wsj.com"):
|
||||||
continue
|
continue
|
||||||
if not url.endswith(".html"):
|
if not url.endswith(".html"):
|
||||||
continue
|
continue
|
||||||
@ -214,48 +305,10 @@ class WSJ(BasicNewsRecipe):
|
|||||||
articles[page_title] = []
|
articles[page_title] = []
|
||||||
articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||||
|
|
||||||
# customization notes: delete sections you are not interested in
|
|
||||||
# set omit_paid_content to False if you want the paid content article previews
|
|
||||||
sectionlist = ['Front Page','Commentary','World News','US News','Business','Markets',
|
|
||||||
'Technology','Personal Finance','Life & Style','Real Estate','Careers','Small Business']
|
|
||||||
omit_paid_content = True
|
|
||||||
|
|
||||||
if 'Front Page' in sectionlist:
|
for page_name,page_title in self.sectionlist:
|
||||||
parse_index_page('/home-page','Front Page',omit_paid_content)
|
parse_index_page(page_name,page_title)
|
||||||
ans.append('Front Page')
|
ans.append(page_title)
|
||||||
if 'Commentary' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-opinion-commentary.html','Commentary',omit_paid_content)
|
|
||||||
ans.append('Commentary')
|
|
||||||
if 'World News' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-global-world.html','World News',omit_paid_content)
|
|
||||||
ans.append('World News')
|
|
||||||
if 'US News' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-world-business.html','US News',omit_paid_content)
|
|
||||||
ans.append('US News')
|
|
||||||
if 'Business' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-business-us.html','Business',omit_paid_content)
|
|
||||||
ans.append('Business')
|
|
||||||
if 'Markets' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-financial-markets-stock.html','Markets',omit_paid_content)
|
|
||||||
ans.append('Markets')
|
|
||||||
if 'Technology' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-tech-technology.html','Technology',omit_paid_content)
|
|
||||||
ans.append('Technology')
|
|
||||||
if 'Personal Finance' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-personal-finance.html','Personal Finance',omit_paid_content)
|
|
||||||
ans.append('Personal Finance')
|
|
||||||
if 'Life & Style' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-lifestyle-arts-entertainment.html','Life & Style',omit_paid_content)
|
|
||||||
ans.append('Life & Style')
|
|
||||||
if 'Real Estate' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-real-estate-homes.html','Real Estate',omit_paid_content)
|
|
||||||
ans.append('Real Estate')
|
|
||||||
if 'Careers' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-career-jobs.html','Careers',omit_paid_content)
|
|
||||||
ans.append('Careers')
|
|
||||||
if 'Small Business' in sectionlist:
|
|
||||||
parse_index_page('/public/page/news-small-business-marketing.html','Small Business',omit_paid_content)
|
|
||||||
ans.append('Small Business')
|
|
||||||
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
125
resources/recipes/yementimes.recipe
Normal file
125
resources/recipes/yementimes.recipe
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
class YemenTimesRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'kwetal'
|
||||||
|
language = 'en_YE'
|
||||||
|
country = 'YE'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'Yemen Times'
|
||||||
|
publisher = u'yementimes.com'
|
||||||
|
category = u'News, Opinion, Yemen'
|
||||||
|
description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
|
||||||
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1',
|
||||||
|
'class': 'DMAIN2'}))
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
|
INDEX = 'http://www.yementimes.com/'
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT'))
|
||||||
|
feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news'))
|
||||||
|
feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News'))
|
||||||
|
feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report'))
|
||||||
|
feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health'))
|
||||||
|
feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview'))
|
||||||
|
feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion'))
|
||||||
|
feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business'))
|
||||||
|
feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed'))
|
||||||
|
feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture'))
|
||||||
|
feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View'))
|
||||||
|
feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety'))
|
||||||
|
feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education'))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
|
||||||
|
div.yemen_byline {font-size: medium; font-weight: bold;}
|
||||||
|
div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
|
||||||
|
.yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
|
||||||
|
'publisher': publisher, 'linearize_tables': True}
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.set_handle_gzip(True)
|
||||||
|
|
||||||
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
answer = []
|
||||||
|
for feed_title, feed in self.feeds:
|
||||||
|
soup = self.index_to_soup(feed)
|
||||||
|
|
||||||
|
newsbox = soup.find('div', 'newsbox')
|
||||||
|
main = newsbox.findNextSibling('table')
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
for li in main.findAll('li'):
|
||||||
|
title = self.tag_to_string(li.a)
|
||||||
|
url = self.INDEX + li.a['href']
|
||||||
|
articles.append({'title': title, 'date': None, 'url': url, 'description': '<br/> '})
|
||||||
|
|
||||||
|
answer.append((feed_title, articles))
|
||||||
|
|
||||||
|
return answer
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
freshSoup = self.getFreshSoup(soup)
|
||||||
|
|
||||||
|
headline = soup.find('div', attrs = {'id': 'DVMTIT'})
|
||||||
|
if headline:
|
||||||
|
div = headline.findNext('div', attrs = {'id': 'DVTOP'})
|
||||||
|
img = None
|
||||||
|
if div:
|
||||||
|
img = div.find('img')
|
||||||
|
|
||||||
|
headline.name = 'h1'
|
||||||
|
freshSoup.body.append(headline)
|
||||||
|
if img is not None:
|
||||||
|
freshSoup.body.append(img)
|
||||||
|
|
||||||
|
byline = soup.find('div', attrs = {'id': 'DVTIT'})
|
||||||
|
if byline:
|
||||||
|
date_el = byline.find('span')
|
||||||
|
if date_el:
|
||||||
|
pub_date = self.tag_to_string(date_el)
|
||||||
|
date = Tag(soup, 'div', attrs = [('class', 'yemen_date')])
|
||||||
|
date.append(pub_date)
|
||||||
|
date_el.extract()
|
||||||
|
|
||||||
|
raw = '<br/>'.join(['%s' % (part) for part in byline.findAll(text = True)])
|
||||||
|
author = BeautifulSoup('<div class="yemen_byline">' + raw + '</div>')
|
||||||
|
|
||||||
|
if date is not None:
|
||||||
|
freshSoup.body.append(date)
|
||||||
|
freshSoup.body.append(author)
|
||||||
|
|
||||||
|
story = soup.find('div', attrs = {'id': 'DVDET'})
|
||||||
|
if story:
|
||||||
|
for table in story.findAll('table'):
|
||||||
|
if table.find('img'):
|
||||||
|
table['class'] = 'yemen_caption'
|
||||||
|
|
||||||
|
freshSoup.body.append(story)
|
||||||
|
|
||||||
|
return freshSoup
|
||||||
|
|
||||||
|
def getFreshSoup(self, oldSoup):
|
||||||
|
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
||||||
|
if oldSoup.head.title:
|
||||||
|
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
||||||
|
return freshSoup
|
23
resources/viewer/images.js
Normal file
23
resources/viewer/images.js
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
* images management
|
||||||
|
* Copyright 2008 Kovid Goyal
|
||||||
|
* License: GNU GPL v3
|
||||||
|
*/
|
||||||
|
|
||||||
|
function scale_images() {
|
||||||
|
$("img:visible").each(function() {
|
||||||
|
var offset = $(this).offset();
|
||||||
|
//window.py_bridge.debug(window.getComputedStyle(this, '').getPropertyValue('max-width'));
|
||||||
|
$(this).css("max-width", (window.innerWidth-offset.left-5)+"px");
|
||||||
|
$(this).css("max-height", (window.innerHeight-5)+"px");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function setup_image_scaling_handlers() {
|
||||||
|
scale_images();
|
||||||
|
$(window).resize(function(){
|
||||||
|
scale_images();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -2,10 +2,11 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import sys
|
import atexit, os, shutil, sys, tempfile, zipfile
|
||||||
|
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
|
||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
class Plugin(object):
|
class Plugin(object):
|
||||||
'''
|
'''
|
||||||
@ -225,12 +226,14 @@ class MetadataWriterPlugin(Plugin):
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class CatalogPlugin(Plugin):
|
class CatalogPlugin(Plugin):
|
||||||
'''
|
'''
|
||||||
A plugin that implements a catalog generator.
|
A plugin that implements a catalog generator.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
resources_path = None
|
||||||
|
|
||||||
#: Output file type for which this plugin should be run
|
#: Output file type for which this plugin should be run
|
||||||
#: For example: 'epub' or 'xml'
|
#: For example: 'epub' or 'xml'
|
||||||
file_types = set([])
|
file_types = set([])
|
||||||
@ -248,15 +251,19 @@ class CatalogPlugin(Plugin):
|
|||||||
#: '%default' + "'"))]
|
#: '%default' + "'"))]
|
||||||
|
|
||||||
cli_options = []
|
cli_options = []
|
||||||
|
|
||||||
|
|
||||||
def search_sort_db(self, db, opts):
|
def search_sort_db(self, db, opts):
|
||||||
if opts.search_text:
|
|
||||||
|
# If declared, --ids overrides any declared search criteria
|
||||||
|
if not opts.ids and opts.search_text:
|
||||||
db.search(opts.search_text)
|
db.search(opts.search_text)
|
||||||
|
|
||||||
if opts.sort_by:
|
if opts.sort_by:
|
||||||
# 2nd arg = ascending
|
# 2nd arg = ascending
|
||||||
db.sort(opts.sort_by, True)
|
db.sort(opts.sort_by, True)
|
||||||
|
|
||||||
return db.get_data_as_dict()
|
return db.get_data_as_dict(ids=opts.ids)
|
||||||
|
|
||||||
def get_output_fields(self, opts):
|
def get_output_fields(self, opts):
|
||||||
# Return a list of requested fields, with opts.sort_by first
|
# Return a list of requested fields, with opts.sort_by first
|
||||||
@ -272,11 +279,40 @@ class CatalogPlugin(Plugin):
|
|||||||
fields = list(all_fields & requested_fields)
|
fields = list(all_fields & requested_fields)
|
||||||
else:
|
else:
|
||||||
fields = list(all_fields)
|
fields = list(all_fields)
|
||||||
|
|
||||||
fields.sort()
|
fields.sort()
|
||||||
fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
|
if opts.sort_by:
|
||||||
|
fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
|
||||||
return fields
|
return fields
|
||||||
|
|
||||||
def run(self, path_to_output, opts, db):
|
def initialize(self):
|
||||||
|
'''
|
||||||
|
If plugin is not a built-in, copy the plugin's .ui and .py files from
|
||||||
|
the zip file to $TMPDIR.
|
||||||
|
Tab will be dynamically generated and added to the Catalog Options dialog in
|
||||||
|
calibre.gui2.dialogs.catalog.py:Catalog
|
||||||
|
'''
|
||||||
|
from calibre.customize.builtins import plugins as builtin_plugins
|
||||||
|
from calibre.customize.ui import config
|
||||||
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
|
||||||
|
if not type(self) in builtin_plugins and \
|
||||||
|
not self.name in config['disabled_plugins']:
|
||||||
|
files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
|
||||||
|
resources = zipfile.ZipFile(self.plugin_path,'r')
|
||||||
|
|
||||||
|
if self.resources_path is None:
|
||||||
|
self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='')
|
||||||
|
|
||||||
|
for file in files_to_copy:
|
||||||
|
try:
|
||||||
|
resources.extract(file, self.resources_path)
|
||||||
|
except:
|
||||||
|
print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path))
|
||||||
|
continue
|
||||||
|
resources.close()
|
||||||
|
|
||||||
|
def run(self, path_to_output, opts, db, ids):
|
||||||
'''
|
'''
|
||||||
Run the plugin. Must be implemented in subclasses.
|
Run the plugin. Must be implemented in subclasses.
|
||||||
It should generate the catalog in the format specified
|
It should generate the catalog in the format specified
|
||||||
|
@ -18,7 +18,7 @@ class BLACKBERRY(USBMS):
|
|||||||
|
|
||||||
VENDOR_ID = [0x0fca]
|
VENDOR_ID = [0x0fca]
|
||||||
PRODUCT_ID = [0x8004, 0x0004]
|
PRODUCT_ID = [0x8004, 0x0004]
|
||||||
BCD = [0x0200, 0x0107]
|
BCD = [0x0200, 0x0107, 0x0201]
|
||||||
|
|
||||||
VENDOR_NAME = 'RIM'
|
VENDOR_NAME = 'RIM'
|
||||||
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
|
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
|
||||||
|
@ -86,4 +86,5 @@ class NOOK(USBMS):
|
|||||||
|
|
||||||
return drives
|
return drives
|
||||||
|
|
||||||
|
def sanitize_path_components(self, components):
|
||||||
|
return [x.replace('#', '_') for x in components]
|
||||||
|
@ -782,6 +782,13 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
'''
|
'''
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
def sanitize_path_components(self, components):
|
||||||
|
'''
|
||||||
|
Perform any device specific sanitization on the path components
|
||||||
|
for files to be uploaded to the device
|
||||||
|
'''
|
||||||
|
return components
|
||||||
|
|
||||||
def create_upload_path(self, path, mdata, fname):
|
def create_upload_path(self, path, mdata, fname):
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
extra_components = []
|
extra_components = []
|
||||||
@ -834,6 +841,7 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
|
|
||||||
extra_components = list(map(remove_trailing_periods, extra_components))
|
extra_components = list(map(remove_trailing_periods, extra_components))
|
||||||
components = shorten_components_to(250 - len(path), extra_components)
|
components = shorten_components_to(250 - len(path), extra_components)
|
||||||
|
components = self.sanitize_path_components(components)
|
||||||
filepath = os.path.join(path, *components)
|
filepath = os.path.join(path, *components)
|
||||||
filedir = os.path.dirname(filepath)
|
filedir = os.path.dirname(filepath)
|
||||||
|
|
||||||
|
@ -132,7 +132,8 @@ class FB2MLizer(object):
|
|||||||
href = self.oeb_book.guide['titlepage'].href
|
href = self.oeb_book.guide['titlepage'].href
|
||||||
item = self.oeb_book.manifest.hrefs[href]
|
item = self.oeb_book.manifest.hrefs[href]
|
||||||
if item.spine_position is None:
|
if item.spine_position is None:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||||
|
self.opts, self.opts.output_profile)
|
||||||
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@ -152,7 +153,7 @@ class FB2MLizer(object):
|
|||||||
text = []
|
text = []
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
text.append(self.add_page_anchor(item))
|
text.append(self.add_page_anchor(item))
|
||||||
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||||
return ''.join(text)
|
return ''.join(text)
|
||||||
|
@ -32,7 +32,7 @@ class LITOutput(OutputFormatPlugin):
|
|||||||
mangler(oeb, opts)
|
mangler(oeb, opts)
|
||||||
rasterizer = SVGRasterizer()
|
rasterizer = SVGRasterizer()
|
||||||
rasterizer(oeb, opts)
|
rasterizer(oeb, opts)
|
||||||
lit = LitWriter()
|
lit = LitWriter(self.opts)
|
||||||
lit(oeb, output_path)
|
lit(oeb, output_path)
|
||||||
|
|
||||||
|
|
||||||
|
@ -134,7 +134,7 @@ def warn(x):
|
|||||||
class ReBinary(object):
|
class ReBinary(object):
|
||||||
NSRMAP = {'': None, XML_NS: 'xml'}
|
NSRMAP = {'': None, XML_NS: 'xml'}
|
||||||
|
|
||||||
def __init__(self, root, item, oeb, map=HTML_MAP):
|
def __init__(self, root, item, oeb, opts, map=HTML_MAP):
|
||||||
self.item = item
|
self.item = item
|
||||||
self.logger = oeb.logger
|
self.logger = oeb.logger
|
||||||
self.manifest = oeb.manifest
|
self.manifest = oeb.manifest
|
||||||
@ -143,7 +143,7 @@ class ReBinary(object):
|
|||||||
self.anchors = []
|
self.anchors = []
|
||||||
self.page_breaks = []
|
self.page_breaks = []
|
||||||
self.is_html = is_html = map is HTML_MAP
|
self.is_html = is_html = map is HTML_MAP
|
||||||
self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
|
self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
|
||||||
self.tree_to_binary(root)
|
self.tree_to_binary(root)
|
||||||
self.content = self.buf.getvalue()
|
self.content = self.buf.getvalue()
|
||||||
self.ahc = self.build_ahc() if is_html else None
|
self.ahc = self.build_ahc() if is_html else None
|
||||||
@ -295,9 +295,8 @@ def preserve(function):
|
|||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
class LitWriter(object):
|
class LitWriter(object):
|
||||||
def __init__(self):
|
def __init__(self, opts):
|
||||||
# Wow, no options
|
self.opts = opts
|
||||||
pass
|
|
||||||
|
|
||||||
def _litize_oeb(self):
|
def _litize_oeb(self):
|
||||||
oeb = self._oeb
|
oeb = self._oeb
|
||||||
@ -469,7 +468,7 @@ class LitWriter(object):
|
|||||||
secnum = 0
|
secnum = 0
|
||||||
if isinstance(data, etree._Element):
|
if isinstance(data, etree._Element):
|
||||||
self._add_folder(name)
|
self._add_folder(name)
|
||||||
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
|
rebin = ReBinary(data, item, self._oeb, self.opts, map=HTML_MAP)
|
||||||
self._add_file(name + '/ahc', rebin.ahc, 0)
|
self._add_file(name + '/ahc', rebin.ahc, 0)
|
||||||
self._add_file(name + '/aht', rebin.aht, 0)
|
self._add_file(name + '/aht', rebin.aht, 0)
|
||||||
item.page_breaks = rebin.page_breaks
|
item.page_breaks = rebin.page_breaks
|
||||||
@ -562,7 +561,7 @@ class LitWriter(object):
|
|||||||
meta.attrib['ms--minimum_level'] = '0'
|
meta.attrib['ms--minimum_level'] = '0'
|
||||||
meta.attrib['ms--attr5'] = '1'
|
meta.attrib['ms--attr5'] = '1'
|
||||||
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
||||||
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
|
rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP)
|
||||||
meta = rebin.content
|
meta = rebin.content
|
||||||
self._meta = meta
|
self._meta = meta
|
||||||
self._add_file('/meta', meta)
|
self._add_file('/meta', meta)
|
||||||
|
@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type):
|
|||||||
mi.title_sort = title_sort(opts.title)
|
mi.title_sort = title_sort(opts.title)
|
||||||
if getattr(opts, 'tags', None) is not None:
|
if getattr(opts, 'tags', None) is not None:
|
||||||
mi.tags = [t.strip() for t in opts.tags.split(',')]
|
mi.tags = [t.strip() for t in opts.tags.split(',')]
|
||||||
|
if getattr(opts, 'series', None) is not None:
|
||||||
|
mi.series = opts.series.strip()
|
||||||
|
if getattr(opts, 'series_index', None) is not None:
|
||||||
|
mi.series_index = float(opts.series_index.strip())
|
||||||
|
|
||||||
if getattr(opts, 'cover', None) is not None:
|
if getattr(opts, 'cover', None) is not None:
|
||||||
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
|
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
|
||||||
|
@ -134,7 +134,10 @@ def metadata_from_filename(name, pat=None):
|
|||||||
mi.authors = aus
|
mi.authors = aus
|
||||||
if prefs['swap_author_names'] and mi.authors:
|
if prefs['swap_author_names'] and mi.authors:
|
||||||
def swap(a):
|
def swap(a):
|
||||||
parts = a.split()
|
if ',' in a:
|
||||||
|
parts = a.split(',', 1)
|
||||||
|
else:
|
||||||
|
parts = a.split(None, 1)
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
t = parts[-1]
|
t = parts[-1]
|
||||||
parts = parts[:-1]
|
parts = parts[:-1]
|
||||||
|
@ -92,6 +92,7 @@ class MobiMLizer(object):
|
|||||||
def __call__(self, oeb, context):
|
def __call__(self, oeb, context):
|
||||||
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
|
self.opts = context
|
||||||
self.profile = profile = context.dest
|
self.profile = profile = context.dest
|
||||||
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
||||||
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
||||||
@ -114,7 +115,7 @@ class MobiMLizer(object):
|
|||||||
def mobimlize_spine(self):
|
def mobimlize_spine(self):
|
||||||
'Iterate over the spine and convert it to MOBIML'
|
'Iterate over the spine and convert it to MOBIML'
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
|
||||||
body = item.data.find(XHTML('body'))
|
body = item.data.find(XHTML('body'))
|
||||||
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||||
nbody = etree.SubElement(nroot, XHTML('body'))
|
nbody = etree.SubElement(nroot, XHTML('body'))
|
||||||
|
@ -563,6 +563,16 @@ class MobiReader(object):
|
|||||||
recindex = attrib.pop(attr, None) or recindex
|
recindex = attrib.pop(attr, None) or recindex
|
||||||
if recindex is not None:
|
if recindex is not None:
|
||||||
attrib['src'] = 'images/%s.jpg' % recindex
|
attrib['src'] = 'images/%s.jpg' % recindex
|
||||||
|
for attr in ('width', 'height'):
|
||||||
|
if attr in attrib:
|
||||||
|
val = attrib[attr]
|
||||||
|
if val.lower().endswith('em'):
|
||||||
|
try:
|
||||||
|
nval = float(val[:-2])
|
||||||
|
nval *= 16 * (168.451/72) # Assume this was set using the Kindle profile
|
||||||
|
attrib[attr] = "%dpx"%int(nval)
|
||||||
|
except:
|
||||||
|
del attrib[attr]
|
||||||
elif tag.tag == 'pre':
|
elif tag.tag == 'pre':
|
||||||
if not tag.text:
|
if not tag.text:
|
||||||
tag.tag = 'div'
|
tag.tag = 'div'
|
||||||
|
@ -1,99 +0,0 @@
|
|||||||
'''
|
|
||||||
Registry associating file extensions with Reader classes.
|
|
||||||
'''
|
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
|
||||||
|
|
||||||
import sys, os, logging
|
|
||||||
from itertools import chain
|
|
||||||
import calibre
|
|
||||||
from calibre.ebooks.oeb.base import OEBError
|
|
||||||
from calibre.ebooks.oeb.reader import OEBReader
|
|
||||||
from calibre.ebooks.oeb.writer import OEBWriter
|
|
||||||
from calibre.ebooks.lit.reader import LitReader
|
|
||||||
from calibre.ebooks.lit.writer import LitWriter
|
|
||||||
from calibre.ebooks.mobi.reader import MobiReader
|
|
||||||
from calibre.ebooks.mobi.writer import MobiWriter
|
|
||||||
from calibre.ebooks.oeb.base import OEBBook
|
|
||||||
from calibre.ebooks.oeb.profile import Context
|
|
||||||
from calibre.utils.config import Config
|
|
||||||
|
|
||||||
__all__ = ['get_reader']
|
|
||||||
|
|
||||||
REGISTRY = {
|
|
||||||
'.opf': (OEBReader, None),
|
|
||||||
'.lit': (LitReader, LitWriter),
|
|
||||||
'.mobi': (MobiReader, MobiWriter),
|
|
||||||
}
|
|
||||||
|
|
||||||
def ReaderFactory(path):
|
|
||||||
if os.path.isdir(path):
|
|
||||||
return OEBReader
|
|
||||||
ext = os.path.splitext(path)[1].lower()
|
|
||||||
Reader = REGISTRY.get(ext, (None, None))[0]
|
|
||||||
if Reader is None:
|
|
||||||
raise OEBError('Unknown e-book file extension %r' % ext)
|
|
||||||
return Reader
|
|
||||||
|
|
||||||
def WriterFactory(path):
|
|
||||||
if os.path.isdir(path):
|
|
||||||
return OEBWriter
|
|
||||||
ext = os.path.splitext(path)[1].lower()
|
|
||||||
if not os.path.exists(path) and not ext:
|
|
||||||
return OEBWriter
|
|
||||||
Writer = REGISTRY.get(ext, (None, None))[1]
|
|
||||||
if Writer is None:
|
|
||||||
raise OEBError('Unknown e-book file extension %r' % ext)
|
|
||||||
return Writer
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser(Reader, Writer):
|
|
||||||
cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
|
|
||||||
Reader.config(cfg)
|
|
||||||
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
|
|
||||||
Transform.config(cfg)
|
|
||||||
Writer.config(cfg)
|
|
||||||
parser = cfg.option_parser()
|
|
||||||
parser.add_option('--encoding', default=None,
|
|
||||||
help=_('Character encoding for input. Default is to auto detect.'))
|
|
||||||
parser.add_option('-o', '--output', default=None,
|
|
||||||
help=_('Output file. Default is derived from input filename.'))
|
|
||||||
parser.add_option('-p', '--pretty-print', action='store_true',
|
|
||||||
default=False, help=_('Produce more human-readable XML output.'))
|
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
|
||||||
help=_('Useful for debugging.'))
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
|
||||||
if len(argv) < 3:
|
|
||||||
print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
|
|
||||||
return 1
|
|
||||||
inpath, outpath = argv[1], argv[2]
|
|
||||||
Reader = ReaderFactory(inpath)
|
|
||||||
Writer = WriterFactory(outpath)
|
|
||||||
parser = option_parser(Reader, Writer)
|
|
||||||
opts, args = parser.parse_args(argv[3:])
|
|
||||||
if len(args) != 0:
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
logger = logging.getLogger('ebook-convert')
|
|
||||||
calibre.setup_cli_handlers(logger, logging.DEBUG)
|
|
||||||
encoding = opts.encoding
|
|
||||||
pretty_print = opts.pretty_print
|
|
||||||
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
|
|
||||||
context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
|
|
||||||
reader = Reader.generate(opts)
|
|
||||||
writer = Writer.generate(opts)
|
|
||||||
transforms = []
|
|
||||||
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
|
|
||||||
transforms.append(Transform.generate(opts))
|
|
||||||
reader(oeb, inpath)
|
|
||||||
for transform in transforms:
|
|
||||||
transform(oeb, context)
|
|
||||||
writer(oeb, outpath)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -110,9 +110,9 @@ class CSSSelector(etree.XPath):
|
|||||||
class Stylizer(object):
|
class Stylizer(object):
|
||||||
STYLESHEETS = WeakKeyDictionary()
|
STYLESHEETS = WeakKeyDictionary()
|
||||||
|
|
||||||
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
|
def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'],
|
||||||
extra_css='', user_css=''):
|
extra_css='', user_css=''):
|
||||||
self.oeb = oeb
|
self.oeb, self.opts = oeb, opts
|
||||||
self.profile = profile
|
self.profile = profile
|
||||||
self.logger = oeb.logger
|
self.logger = oeb.logger
|
||||||
item = oeb.manifest.hrefs[path]
|
item = oeb.manifest.hrefs[path]
|
||||||
@ -249,6 +249,8 @@ class Stylizer(object):
|
|||||||
style.update(self._normalize_font(prop.cssValue))
|
style.update(self._normalize_font(prop.cssValue))
|
||||||
elif name == 'list-style':
|
elif name == 'list-style':
|
||||||
style.update(self._normalize_list_style(prop.cssValue))
|
style.update(self._normalize_list_style(prop.cssValue))
|
||||||
|
elif name == 'text-align':
|
||||||
|
style.update(self._normalize_text_align(prop.cssValue))
|
||||||
else:
|
else:
|
||||||
style[name] = prop.value
|
style[name] = prop.value
|
||||||
if 'font-size' in style:
|
if 'font-size' in style:
|
||||||
@ -306,6 +308,19 @@ class Stylizer(object):
|
|||||||
|
|
||||||
return style
|
return style
|
||||||
|
|
||||||
|
def _normalize_text_align(self, cssvalue):
|
||||||
|
style = {}
|
||||||
|
text = cssvalue.cssText
|
||||||
|
if text == 'inherit':
|
||||||
|
style['text-align'] = 'inherit'
|
||||||
|
else:
|
||||||
|
if text in ('left', 'justify'):
|
||||||
|
val = 'left' if self.opts.dont_justify else 'justify'
|
||||||
|
style['text-align'] = val
|
||||||
|
else:
|
||||||
|
style['text-align'] = text
|
||||||
|
return style
|
||||||
|
|
||||||
def _normalize_font(self, cssvalue):
|
def _normalize_font(self, cssvalue):
|
||||||
composition = ('font-style', 'font-variant', 'font-weight',
|
composition = ('font-style', 'font-variant', 'font-weight',
|
||||||
'font-size', 'line-height', 'font-family')
|
'font-size', 'line-height', 'font-family')
|
||||||
@ -411,6 +426,7 @@ class Style(object):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def _unit_convert(self, value, base=None, font=None):
|
def _unit_convert(self, value, base=None, font=None):
|
||||||
|
' Return value in pts'
|
||||||
if isinstance(value, (int, long, float)):
|
if isinstance(value, (int, long, float)):
|
||||||
return value
|
return value
|
||||||
try:
|
try:
|
||||||
@ -447,6 +463,9 @@ class Style(object):
|
|||||||
result = value * 0.40
|
result = value * 0.40
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def pt_to_px(self, value):
|
||||||
|
return (self._profile.dpi / 72.0) * value
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def fontSize(self):
|
def fontSize(self):
|
||||||
def normalize_fontsize(value, base):
|
def normalize_fontsize(value, base):
|
||||||
|
@ -141,7 +141,7 @@ class CSSFlattener(object):
|
|||||||
bs.append('text-align: '+ \
|
bs.append('text-align: '+ \
|
||||||
('left' if self.context.dont_justify else 'justify'))
|
('left' if self.context.dont_justify else 'justify'))
|
||||||
body.set('style', '; '.join(bs))
|
body.set('style', '; '.join(bs))
|
||||||
stylizer = Stylizer(html, item.href, self.oeb, profile,
|
stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
|
||||||
user_css=self.context.extra_css,
|
user_css=self.context.extra_css,
|
||||||
extra_css=css)
|
extra_css=css)
|
||||||
self.stylizers[item] = stylizer
|
self.stylizers[item] = stylizer
|
||||||
|
@ -29,13 +29,14 @@ class CaseMangler(object):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def generate(cls, opts):
|
def generate(cls, opts):
|
||||||
return cls()
|
return cls()
|
||||||
|
|
||||||
def __call__(self, oeb, context):
|
def __call__(self, oeb, context):
|
||||||
oeb.logger.info('Applying case-transforming CSS...')
|
oeb.logger.info('Applying case-transforming CSS...')
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
|
self.opts = context
|
||||||
self.profile = context.source
|
self.profile = context.source
|
||||||
self.mangle_spine()
|
self.mangle_spine()
|
||||||
|
|
||||||
def mangle_spine(self):
|
def mangle_spine(self):
|
||||||
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
|
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
|
||||||
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
|
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
|
||||||
@ -44,9 +45,9 @@ class CaseMangler(object):
|
|||||||
relhref = item.relhref(href)
|
relhref = item.relhref(href)
|
||||||
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
|
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
|
||||||
rel='stylesheet', href=relhref, type=CSS_MIME)
|
rel='stylesheet', href=relhref, type=CSS_MIME)
|
||||||
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
|
stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
|
||||||
self.mangle_elem(html.find(XHTML('body')), stylizer)
|
self.mangle_elem(html.find(XHTML('body')), stylizer)
|
||||||
|
|
||||||
def text_transform(self, transform, text):
|
def text_transform(self, transform, text):
|
||||||
if transform == 'capitalize':
|
if transform == 'capitalize':
|
||||||
return text.title()
|
return text.title()
|
||||||
@ -55,7 +56,7 @@ class CaseMangler(object):
|
|||||||
elif transform == 'lowercase':
|
elif transform == 'lowercase':
|
||||||
return text.lower()
|
return text.lower()
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def split_text(self, text):
|
def split_text(self, text):
|
||||||
results = ['']
|
results = ['']
|
||||||
isupper = text[0].isupper()
|
isupper = text[0].isupper()
|
||||||
@ -66,7 +67,7 @@ class CaseMangler(object):
|
|||||||
isupper = not isupper
|
isupper = not isupper
|
||||||
results.append(char)
|
results.append(char)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def smallcaps_elem(self, elem, attr):
|
def smallcaps_elem(self, elem, attr):
|
||||||
texts = self.split_text(getattr(elem, attr))
|
texts = self.split_text(getattr(elem, attr))
|
||||||
setattr(elem, attr, None)
|
setattr(elem, attr, None)
|
||||||
@ -90,7 +91,7 @@ class CaseMangler(object):
|
|||||||
last.tail = tail
|
last.tail = tail
|
||||||
child.tail = None
|
child.tail = None
|
||||||
last = child
|
last = child
|
||||||
|
|
||||||
def mangle_elem(self, elem, stylizer):
|
def mangle_elem(self, elem, stylizer):
|
||||||
if not isinstance(elem.tag, basestring) or \
|
if not isinstance(elem.tag, basestring) or \
|
||||||
namespace(elem.tag) != XHTML_NS:
|
namespace(elem.tag) != XHTML_NS:
|
||||||
|
@ -44,6 +44,7 @@ class SVGRasterizer(object):
|
|||||||
def __call__(self, oeb, context):
|
def __call__(self, oeb, context):
|
||||||
oeb.logger.info('Rasterizing SVG images...')
|
oeb.logger.info('Rasterizing SVG images...')
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
|
self.opts = context
|
||||||
self.profile = context.dest
|
self.profile = context.dest
|
||||||
self.images = {}
|
self.images = {}
|
||||||
self.dataize_manifest()
|
self.dataize_manifest()
|
||||||
@ -102,7 +103,7 @@ class SVGRasterizer(object):
|
|||||||
def rasterize_spine(self):
|
def rasterize_spine(self):
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
html = item.data
|
html = item.data
|
||||||
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
|
stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
|
||||||
self.rasterize_item(item, stylizer)
|
self.rasterize_item(item, stylizer)
|
||||||
|
|
||||||
def rasterize_item(self, item, stylizer):
|
def rasterize_item(self, item, stylizer):
|
||||||
|
@ -20,6 +20,10 @@ class Font(object):
|
|||||||
|
|
||||||
class Column(object):
|
class Column(object):
|
||||||
|
|
||||||
|
# A column contains an element is the element bulges out to
|
||||||
|
# the left or the right by at most HFUZZ*col width.
|
||||||
|
HFUZZ = 0.2
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.left = self.right = self.top = self.bottom = 0
|
self.left = self.right = self.top = self.bottom = 0
|
||||||
self.width = self.height = 0
|
self.width = self.height = 0
|
||||||
@ -41,6 +45,10 @@ class Column(object):
|
|||||||
for x in self.elements:
|
for x in self.elements:
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
|
def contains(self, elem):
|
||||||
|
return elem.left > self.left - self.HFUZZ*self.width and \
|
||||||
|
elem.right < self.right + self.HFUZZ*self.width
|
||||||
|
|
||||||
class Element(object):
|
class Element(object):
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
@ -132,6 +140,18 @@ class Interval(object):
|
|||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return hash('(%f,%f)'%self.left, self.right)
|
return hash('(%f,%f)'%self.left, self.right)
|
||||||
|
|
||||||
|
class Region(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.columns = []
|
||||||
|
self.top = self.bottom = self.left = self.right = self.width = self.height = 0
|
||||||
|
|
||||||
|
def add_columns(self, columns):
|
||||||
|
if not self.columns:
|
||||||
|
for x in sorted(columns, cmp=lambda x,y: cmp(x.left, y.left)):
|
||||||
|
self.columns.append(x)
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
class Page(object):
|
class Page(object):
|
||||||
|
|
||||||
@ -238,11 +258,10 @@ class Page(object):
|
|||||||
return columns
|
return columns
|
||||||
|
|
||||||
def find_elements_in_row_of(self, x):
|
def find_elements_in_row_of(self, x):
|
||||||
interval = Interval(x.top - self.YFUZZ * self.average_text_height,
|
interval = Interval(x.top,
|
||||||
x.top + self.YFUZZ*(1+self.average_text_height))
|
x.top + self.YFUZZ*(1+self.average_text_height))
|
||||||
h_interval = Interval(x.left, x.right)
|
h_interval = Interval(x.left, x.right)
|
||||||
m = max(0, x.idx-15)
|
for y in self.elements[x.idx:x.idx+15]:
|
||||||
for y in self.elements[m:x.idx+15]:
|
|
||||||
if y is not x:
|
if y is not x:
|
||||||
y_interval = Interval(y.top, y.bottom)
|
y_interval = Interval(y.top, y.bottom)
|
||||||
x_interval = Interval(y.left, y.right)
|
x_interval = Interval(y.left, y.right)
|
||||||
|
@ -113,7 +113,8 @@ class PMLMLizer(object):
|
|||||||
href = self.oeb_book.guide['titlepage'].href
|
href = self.oeb_book.guide['titlepage'].href
|
||||||
item = self.oeb_book.manifest.hrefs[href]
|
item = self.oeb_book.manifest.hrefs[href]
|
||||||
if item.spine_position is None:
|
if item.spine_position is None:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||||
|
self.opts, self.opts.output_profile)
|
||||||
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
@ -90,7 +90,8 @@ class RBMLizer(object):
|
|||||||
href = self.oeb_book.guide['titlepage'].href
|
href = self.oeb_book.guide['titlepage'].href
|
||||||
item = self.oeb_book.manifest.hrefs[href]
|
item = self.oeb_book.manifest.hrefs[href]
|
||||||
if item.spine_position is None:
|
if item.spine_position is None:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||||
|
self.opts, self.opts.output_profile)
|
||||||
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@ -111,7 +112,7 @@ class RBMLizer(object):
|
|||||||
output = [u'']
|
output = [u'']
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
|
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
output.append(self.add_page_anchor(item))
|
output.append(self.add_page_anchor(item))
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||||
return ''.join(output)
|
return ''.join(output)
|
||||||
|
@ -111,12 +111,13 @@ class RTFMLizer(object):
|
|||||||
href = self.oeb_book.guide['titlepage'].href
|
href = self.oeb_book.guide['titlepage'].href
|
||||||
item = self.oeb_book.manifest.hrefs[href]
|
item = self.oeb_book.manifest.hrefs[href]
|
||||||
if item.spine_position is None:
|
if item.spine_position is None:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||||
|
self.opts, self.opts.output_profile)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
output += '{\\page } '
|
output += '{\\page } '
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to RTF markup...' % item.href)
|
self.log.debug('Converting %s to RTF markup...' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
output += self.footer()
|
output += self.footer()
|
||||||
output = self.insert_images(output)
|
output = self.insert_images(output)
|
||||||
|
@ -54,7 +54,7 @@ class TXTMLizer(object):
|
|||||||
output.append(self.get_toc())
|
output.append(self.get_toc())
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to TXT...' % item.href)
|
self.log.debug('Converting %s to TXT...' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
||||||
content = self.remove_newlines(content)
|
content = self.remove_newlines(content)
|
||||||
output += self.dump_text(etree.fromstring(content), stylizer)
|
output += self.dump_text(etree.fromstring(content), stylizer)
|
||||||
|
@ -4,9 +4,14 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import Plumber
|
import os
|
||||||
from calibre.utils.logging import Log
|
from optparse import OptionParser
|
||||||
|
|
||||||
from calibre.customize.conversion import OptionRecommendation, DummyReporter
|
from calibre.customize.conversion import OptionRecommendation, DummyReporter
|
||||||
|
from calibre.ebooks.conversion.plumber import Plumber
|
||||||
|
from calibre.customize.ui import plugin_for_catalog_format
|
||||||
|
from calibre.utils.logging import Log
|
||||||
|
from calibre.gui2 import choose_dir, Application
|
||||||
|
|
||||||
def gui_convert(input, output, recommendations, notification=DummyReporter(),
|
def gui_convert(input, output, recommendations, notification=DummyReporter(),
|
||||||
abort_after_input_dump=False, log=None):
|
abort_after_input_dump=False, log=None):
|
||||||
@ -20,7 +25,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),
|
|||||||
|
|
||||||
plumber.run()
|
plumber.run()
|
||||||
|
|
||||||
def gui_catalog(fmt, title, dbspec, ids, out_file_name,
|
def gui_catalog(fmt, title, dbspec, ids, out_file_name, fmt_options,
|
||||||
notification=DummyReporter(), log=None):
|
notification=DummyReporter(), log=None):
|
||||||
if log is None:
|
if log is None:
|
||||||
log = Log()
|
log = Log()
|
||||||
@ -31,8 +36,28 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name,
|
|||||||
db = LibraryDatabase2(dbpath)
|
db = LibraryDatabase2(dbpath)
|
||||||
else: # To be implemented in the future
|
else: # To be implemented in the future
|
||||||
pass
|
pass
|
||||||
# Implement the interface to the catalog generating code here
|
|
||||||
db
|
# Create a minimal OptionParser that we can append to
|
||||||
|
parser = OptionParser()
|
||||||
|
args = []
|
||||||
|
parser.add_option("--verbose", action="store_true", dest="verbose", default=True)
|
||||||
|
opts, args = parser.parse_args()
|
||||||
|
|
||||||
|
# Populate opts
|
||||||
|
opts.ids = ids
|
||||||
|
opts.search_text = None
|
||||||
|
opts.sort_by = None
|
||||||
|
|
||||||
|
# Extract the option dictionary to comma-separated lists
|
||||||
|
for option in fmt_options:
|
||||||
|
setattr(opts,option, ','.join(fmt_options[option]))
|
||||||
|
|
||||||
|
# Fetch and run the plugin for fmt
|
||||||
|
plugin = plugin_for_catalog_format(fmt)
|
||||||
|
plugin.run(out_file_name, opts, db)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,39 +6,131 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from PyQt4.Qt import QDialog
|
import os, shutil, sys, tempfile
|
||||||
|
|
||||||
|
from PyQt4.Qt import QDialog, QWidget
|
||||||
|
|
||||||
|
from calibre.customize.ui import config
|
||||||
from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
|
from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
|
||||||
from calibre.gui2 import dynamic
|
from calibre.gui2 import gprefs, dynamic
|
||||||
from calibre.customize.ui import available_catalog_formats
|
from calibre.customize.ui import available_catalog_formats, catalog_plugins
|
||||||
|
from calibre.gui2.catalog.catalog_csv_xml import PluginWidget
|
||||||
|
|
||||||
class Catalog(QDialog, Ui_Dialog):
|
class Catalog(QDialog, Ui_Dialog):
|
||||||
|
''' Catalog Dialog builder'''
|
||||||
|
widgets = []
|
||||||
|
|
||||||
def __init__(self, parent, dbspec, ids):
|
def __init__(self, parent, dbspec, ids):
|
||||||
|
import re, cStringIO
|
||||||
|
from calibre import prints as info
|
||||||
|
from calibre.gui2 import dynamic
|
||||||
|
from PyQt4.uic import compileUi
|
||||||
|
|
||||||
QDialog.__init__(self, parent)
|
QDialog.__init__(self, parent)
|
||||||
|
|
||||||
|
# Run the dialog setup generated from catalog.ui
|
||||||
self.setupUi(self)
|
self.setupUi(self)
|
||||||
self.dbspec, self.ids = dbspec, ids
|
self.dbspec, self.ids = dbspec, ids
|
||||||
|
|
||||||
|
# Display the number of books we've been passed
|
||||||
self.count.setText(unicode(self.count.text()).format(len(ids)))
|
self.count.setText(unicode(self.count.text()).format(len(ids)))
|
||||||
|
|
||||||
|
# Display the last-used title
|
||||||
self.title.setText(dynamic.get('catalog_last_used_title',
|
self.title.setText(dynamic.get('catalog_last_used_title',
|
||||||
_('My Books')))
|
_('My Books')))
|
||||||
fmts = sorted([x.upper() for x in available_catalog_formats()])
|
|
||||||
|
|
||||||
|
# GwR *** Add option tabs for built-in formats
|
||||||
|
# This code models #69 in calibre/gui2/dialogs/config/__init__.py
|
||||||
|
|
||||||
|
self.fmts = []
|
||||||
|
|
||||||
|
from calibre.customize.builtins import plugins as builtin_plugins
|
||||||
|
from calibre.customize import CatalogPlugin
|
||||||
|
|
||||||
|
for plugin in catalog_plugins():
|
||||||
|
if plugin.name in config['disabled_plugins']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = plugin.name.lower().replace(' ', '_')
|
||||||
|
if type(plugin) in builtin_plugins:
|
||||||
|
#info("Adding widget for builtin Catalog plugin %s" % plugin.name)
|
||||||
|
try:
|
||||||
|
catalog_widget = __import__('calibre.gui2.catalog.'+name,
|
||||||
|
fromlist=[1])
|
||||||
|
pw = catalog_widget.PluginWidget()
|
||||||
|
pw.initialize(name)
|
||||||
|
pw.ICON = I('forward.svg')
|
||||||
|
self.widgets.append(pw)
|
||||||
|
[self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
|
||||||
|
except ImportError:
|
||||||
|
info("ImportError with %s" % name)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Load dynamic tab
|
||||||
|
form = os.path.join(plugin.resources_path,'%s.ui' % name)
|
||||||
|
klass = os.path.join(plugin.resources_path,'%s.py' % name)
|
||||||
|
compiled_form = os.path.join(plugin.resources_path,'%s_ui.py' % name)
|
||||||
|
|
||||||
|
if os.path.exists(form) and os.path.exists(klass):
|
||||||
|
#info("Adding widget for user-installed Catalog plugin %s" % plugin.name)
|
||||||
|
|
||||||
|
# Compile the .ui form provided in plugin.zip
|
||||||
|
if not os.path.exists(compiled_form):
|
||||||
|
# info('\tCompiling form', form)
|
||||||
|
buf = cStringIO.StringIO()
|
||||||
|
compileUi(form, buf)
|
||||||
|
dat = buf.getvalue()
|
||||||
|
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)',
|
||||||
|
re.DOTALL).sub(r'_("\1")', dat)
|
||||||
|
open(compiled_form, 'wb').write(dat)
|
||||||
|
|
||||||
|
# Import the dynamic PluginWidget() from .py file provided in plugin.zip
|
||||||
|
try:
|
||||||
|
sys.path.insert(0, plugin.resources_path)
|
||||||
|
catalog_widget = __import__(name, fromlist=[1])
|
||||||
|
pw = catalog_widget.PluginWidget()
|
||||||
|
pw.initialize(name)
|
||||||
|
pw.ICON = I('forward.svg')
|
||||||
|
self.widgets.append(pw)
|
||||||
|
[self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
|
||||||
|
except ImportError:
|
||||||
|
info("ImportError with %s" % name)
|
||||||
|
continue
|
||||||
|
finally:
|
||||||
|
sys.path.remove(plugin.resources_path)
|
||||||
|
|
||||||
|
else:
|
||||||
|
info("No dynamic tab resources found for %s" % name)
|
||||||
|
|
||||||
|
self.widgets = sorted(self.widgets, key=lambda x:(x.TITLE, x.TITLE))
|
||||||
|
for pw in self.widgets:
|
||||||
|
page = self.tabs.addTab(pw,pw.TITLE)
|
||||||
|
|
||||||
|
# Generate a sorted list of installed catalog formats/sync_enabled pairs
|
||||||
|
fmts = sorted([x[0] for x in self.fmts])
|
||||||
|
|
||||||
|
self.sync_enabled_formats = []
|
||||||
|
for fmt in self.fmts:
|
||||||
|
if fmt[1]:
|
||||||
|
self.sync_enabled_formats.append(fmt[0])
|
||||||
|
|
||||||
|
# Callback when format changes
|
||||||
self.format.currentIndexChanged.connect(self.format_changed)
|
self.format.currentIndexChanged.connect(self.format_changed)
|
||||||
|
|
||||||
|
# Add the installed catalog format list to the format QComboBox
|
||||||
self.format.addItems(fmts)
|
self.format.addItems(fmts)
|
||||||
|
|
||||||
pref = dynamic.get('catalog_preferred_format', 'EPUB')
|
pref = dynamic.get('catalog_preferred_format', 'CSV')
|
||||||
idx = self.format.findText(pref)
|
idx = self.format.findText(pref)
|
||||||
if idx > -1:
|
if idx > -1:
|
||||||
self.format.setCurrentIndex(idx)
|
self.format.setCurrentIndex(idx)
|
||||||
|
|
||||||
if self.sync.isEnabled():
|
if self.sync.isEnabled():
|
||||||
self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
|
self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
|
||||||
|
|
||||||
def format_changed(self, idx):
|
def format_changed(self, idx):
|
||||||
cf = unicode(self.format.currentText())
|
cf = unicode(self.format.currentText())
|
||||||
if cf in ('EPUB', 'MOBI'):
|
if cf in self.sync_enabled_formats:
|
||||||
self.sync.setEnabled(True)
|
self.sync.setEnabled(True)
|
||||||
else:
|
else:
|
||||||
self.sync.setDisabled(True)
|
self.sync.setDisabled(True)
|
||||||
|
@ -6,105 +6,121 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>628</width>
|
<width>611</width>
|
||||||
<height>503</height>
|
<height>514</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
<string>Generate catalog</string>
|
<string>Generate catalog</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowIcon">
|
<property name="windowIcon">
|
||||||
<iconset resource="../../../work/calibre/resources/images.qrc">
|
<iconset>
|
||||||
<normaloff>:/images/library.png</normaloff>:/images/library.png</iconset>
|
<normaloff>:/images/library.png</normaloff>:/images/library.png</iconset>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<widget class="QDialogButtonBox" name="buttonBox">
|
||||||
<item row="2" column="0">
|
<property name="geometry">
|
||||||
<widget class="QDialogButtonBox" name="buttonBox">
|
<rect>
|
||||||
<property name="orientation">
|
<x>430</x>
|
||||||
<enum>Qt::Horizontal</enum>
|
<y>470</y>
|
||||||
</property>
|
<width>164</width>
|
||||||
<property name="standardButtons">
|
<height>32</height>
|
||||||
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
<property name="orientation">
|
||||||
</item>
|
<enum>Qt::Horizontal</enum>
|
||||||
<item row="1" column="0">
|
</property>
|
||||||
<widget class="QTabWidget" name="tabs">
|
<property name="standardButtons">
|
||||||
<property name="currentIndex">
|
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
|
||||||
<number>0</number>
|
</property>
|
||||||
</property>
|
</widget>
|
||||||
<widget class="QWidget" name="tab">
|
<widget class="QTabWidget" name="tabs">
|
||||||
<attribute name="title">
|
<property name="geometry">
|
||||||
<string>Catalog options</string>
|
<rect>
|
||||||
</attribute>
|
<x>12</x>
|
||||||
<layout class="QGridLayout" name="gridLayout_2">
|
<y>39</y>
|
||||||
<item row="0" column="0">
|
<width>579</width>
|
||||||
<widget class="QLabel" name="label">
|
<height>411</height>
|
||||||
<property name="text">
|
</rect>
|
||||||
<string>Catalog &format:</string>
|
</property>
|
||||||
</property>
|
<property name="currentIndex">
|
||||||
<property name="buddy">
|
<number>0</number>
|
||||||
<cstring>format</cstring>
|
</property>
|
||||||
</property>
|
<widget class="QWidget" name="tab">
|
||||||
</widget>
|
<attribute name="title">
|
||||||
</item>
|
<string>Catalog options</string>
|
||||||
<item row="0" column="2">
|
</attribute>
|
||||||
<widget class="QComboBox" name="format"/>
|
<layout class="QGridLayout" name="gridLayout_2">
|
||||||
</item>
|
<item row="0" column="0">
|
||||||
<item row="1" column="0">
|
<widget class="QLabel" name="label">
|
||||||
<widget class="QLabel" name="label_2">
|
<property name="text">
|
||||||
<property name="text">
|
<string>Catalog &format:</string>
|
||||||
<string>Catalog &title (existing catalog with the same title will be replaced):</string>
|
</property>
|
||||||
</property>
|
<property name="buddy">
|
||||||
<property name="wordWrap">
|
<cstring>format</cstring>
|
||||||
<bool>true</bool>
|
</property>
|
||||||
</property>
|
</widget>
|
||||||
<property name="buddy">
|
</item>
|
||||||
<cstring>title</cstring>
|
<item row="0" column="2">
|
||||||
</property>
|
<widget class="QComboBox" name="format"/>
|
||||||
</widget>
|
</item>
|
||||||
</item>
|
<item row="1" column="0">
|
||||||
<item row="2" column="1">
|
<widget class="QLabel" name="label_2">
|
||||||
<spacer name="verticalSpacer">
|
<property name="text">
|
||||||
<property name="orientation">
|
<string>Catalog &title (existing catalog with the same title will be replaced):</string>
|
||||||
<enum>Qt::Vertical</enum>
|
</property>
|
||||||
</property>
|
<property name="wordWrap">
|
||||||
<property name="sizeHint" stdset="0">
|
<bool>true</bool>
|
||||||
<size>
|
</property>
|
||||||
<width>20</width>
|
<property name="buddy">
|
||||||
<height>299</height>
|
<cstring>title</cstring>
|
||||||
</size>
|
</property>
|
||||||
</property>
|
</widget>
|
||||||
</spacer>
|
</item>
|
||||||
</item>
|
<item row="1" column="2">
|
||||||
<item row="3" column="0">
|
<widget class="QLineEdit" name="title"/>
|
||||||
<widget class="QCheckBox" name="sync">
|
</item>
|
||||||
<property name="text">
|
<item row="3" column="0">
|
||||||
<string>&Send catalog to device automatically</string>
|
<widget class="QCheckBox" name="sync">
|
||||||
</property>
|
<property name="text">
|
||||||
</widget>
|
<string>&Send catalog to device automatically</string>
|
||||||
</item>
|
</property>
|
||||||
<item row="1" column="2">
|
</widget>
|
||||||
<widget class="QLineEdit" name="title"/>
|
</item>
|
||||||
</item>
|
<item row="2" column="1">
|
||||||
</layout>
|
<spacer name="verticalSpacer">
|
||||||
</widget>
|
<property name="orientation">
|
||||||
</widget>
|
<enum>Qt::Vertical</enum>
|
||||||
</item>
|
</property>
|
||||||
<item row="0" column="0">
|
<property name="sizeHint" stdset="0">
|
||||||
<widget class="QLabel" name="count">
|
<size>
|
||||||
<property name="font">
|
<width>20</width>
|
||||||
<font>
|
<height>299</height>
|
||||||
<weight>75</weight>
|
</size>
|
||||||
<bold>true</bold>
|
</property>
|
||||||
</font>
|
</spacer>
|
||||||
</property>
|
</item>
|
||||||
<property name="text">
|
</layout>
|
||||||
<string>Generate catalog for {0} books</string>
|
</widget>
|
||||||
</property>
|
</widget>
|
||||||
</widget>
|
<widget class="QLabel" name="count">
|
||||||
</item>
|
<property name="geometry">
|
||||||
</layout>
|
<rect>
|
||||||
|
<x>12</x>
|
||||||
|
<y>12</y>
|
||||||
|
<width>205</width>
|
||||||
|
<height>17</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="font">
|
||||||
|
<font>
|
||||||
|
<weight>75</weight>
|
||||||
|
<bold>true</bold>
|
||||||
|
</font>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>Generate catalog for {0} books</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
</widget>
|
</widget>
|
||||||
<resources>
|
<resources>
|
||||||
<include location="../../../work/calibre/resources/images.qrc"/>
|
<include location="../../../work/calibre/resources/images.qrc"/>
|
||||||
|
@ -532,7 +532,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
if self.cover_fetcher.exception is not None:
|
if self.cover_fetcher.exception is not None:
|
||||||
err = self.cover_fetcher.exception
|
err = self.cover_fetcher.exception
|
||||||
error_dialog(self, _('Cannot fetch cover'),
|
error_dialog(self, _('Cannot fetch cover'),
|
||||||
_('<b>Could not fetch cover.</b><br/>')+repr(err)).exec_()
|
_('<b>Could not fetch cover.</b><br/>')+unicode(err)).exec_()
|
||||||
return
|
return
|
||||||
|
|
||||||
pix = QPixmap()
|
pix = QPixmap()
|
||||||
|
@ -215,7 +215,7 @@ class TagsModel(QAbstractItemModel):
|
|||||||
return QModelIndex()
|
return QModelIndex()
|
||||||
|
|
||||||
child_item = index.internalPointer()
|
child_item = index.internalPointer()
|
||||||
parent_item = child_item.parent
|
parent_item = getattr(child_item, 'parent', None)
|
||||||
|
|
||||||
if parent_item is self.root_item or parent_item is None:
|
if parent_item is self.root_item or parent_item is None:
|
||||||
return QModelIndex()
|
return QModelIndex()
|
||||||
|
@ -238,19 +238,36 @@ def fetch_scheduled_recipe(arg):
|
|||||||
|
|
||||||
def generate_catalog(parent, dbspec, ids):
|
def generate_catalog(parent, dbspec, ids):
|
||||||
from calibre.gui2.dialogs.catalog import Catalog
|
from calibre.gui2.dialogs.catalog import Catalog
|
||||||
|
|
||||||
|
# Build the Catalog dialog in gui2.dialogs.catalog
|
||||||
d = Catalog(parent, dbspec, ids)
|
d = Catalog(parent, dbspec, ids)
|
||||||
|
|
||||||
if d.exec_() != d.Accepted:
|
if d.exec_() != d.Accepted:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Create the output file
|
||||||
out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
|
out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
|
||||||
|
|
||||||
|
# Retrieve plugin options
|
||||||
|
fmt_options = {}
|
||||||
|
for x in range(d.tabs.count()):
|
||||||
|
if str(d.tabs.tabText(x)).find(str(d.catalog_format)) > -1:
|
||||||
|
for fmt in d.fmts:
|
||||||
|
if fmt[0] == d.catalog_format:
|
||||||
|
fmt_options = fmt[2].options()
|
||||||
|
# print "gui2.tools:generate_catalog(): options for %s: %s" % (fmt[0], fmt_options)
|
||||||
|
|
||||||
args = [
|
args = [
|
||||||
d.catalog_format,
|
d.catalog_format,
|
||||||
d.catalog_title,
|
d.catalog_title,
|
||||||
dbspec,
|
dbspec,
|
||||||
ids,
|
ids,
|
||||||
out.name,
|
out.name,
|
||||||
|
fmt_options
|
||||||
]
|
]
|
||||||
out.close()
|
out.close()
|
||||||
|
|
||||||
|
# This calls gui2.convert.gui_conversion:gui_catalog()
|
||||||
return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
|
return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
|
||||||
d.catalog_title
|
d.catalog_title
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
'''The main GUI'''
|
'''The main GUI'''
|
||||||
|
|
||||||
import os, sys, textwrap, collections, time
|
import atexit, os, shutil, sys, tempfile, textwrap, collections, time
|
||||||
from xml.parsers.expat import ExpatError
|
from xml.parsers.expat import ExpatError
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
@ -357,7 +357,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
cm.addAction(_('Bulk convert'))
|
cm.addAction(_('Bulk convert'))
|
||||||
cm.addSeparator()
|
cm.addSeparator()
|
||||||
ac = cm.addAction(
|
ac = cm.addAction(
|
||||||
_('Create catalog of the books in your calibre library'))
|
_('Create catalog of books in your calibre library'))
|
||||||
ac.triggered.connect(self.generate_catalog)
|
ac.triggered.connect(self.generate_catalog)
|
||||||
self.action_convert.setMenu(cm)
|
self.action_convert.setMenu(cm)
|
||||||
self._convert_single_hook = partial(self.convert_ebook, bulk=False)
|
self._convert_single_hook = partial(self.convert_ebook, bulk=False)
|
||||||
@ -1359,26 +1359,32 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
|
|
||||||
############################### Generate catalog ###########################
|
############################### Generate catalog ###########################
|
||||||
|
|
||||||
def generate_catalog(self):
|
def generate_catalog(self):
|
||||||
rows = self.library_view.selectionModel().selectedRows()
|
rows = self.library_view.selectionModel().selectedRows()
|
||||||
if not rows:
|
if not rows or len(rows) < 2:
|
||||||
rows = xrange(self.library_view.model().rowCount(QModelIndex()))
|
rows = xrange(self.library_view.model().rowCount(QModelIndex()))
|
||||||
ids = map(self.library_view.model().id, rows)
|
ids = map(self.library_view.model().id, rows)
|
||||||
|
|
||||||
dbspec = None
|
dbspec = None
|
||||||
if not ids:
|
if not ids:
|
||||||
return error_dialog(self, _('No books selected'),
|
return error_dialog(self, _('No books selected'),
|
||||||
_('No books selected to generate catalog for'),
|
_('No books selected to generate catalog for'),
|
||||||
show=True)
|
show=True)
|
||||||
|
|
||||||
|
# Calling gui2.tools:generate_catalog()
|
||||||
ret = generate_catalog(self, dbspec, ids)
|
ret = generate_catalog(self, dbspec, ids)
|
||||||
if ret is None:
|
if ret is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
func, args, desc, out, sync, title = ret
|
func, args, desc, out, sync, title = ret
|
||||||
|
|
||||||
fmt = os.path.splitext(out)[1][1:].upper()
|
fmt = os.path.splitext(out)[1][1:].upper()
|
||||||
job = self.job_manager.run_job(
|
job = self.job_manager.run_job(
|
||||||
Dispatcher(self.catalog_generated), func, args=args,
|
Dispatcher(self.catalog_generated), func, args=args,
|
||||||
description=desc)
|
description=desc)
|
||||||
job.catalog_file_path = out
|
job.catalog_file_path = out
|
||||||
job.catalog_sync, job.catalog_title = sync, title
|
job.fmt = fmt
|
||||||
|
job.catalog_sync, job.catalog_title = sync, title
|
||||||
self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
|
self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
|
||||||
|
|
||||||
def catalog_generated(self, job):
|
def catalog_generated(self, job):
|
||||||
@ -1392,8 +1398,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
dynamic.set('catalogs_to_be_synced', sync)
|
dynamic.set('catalogs_to_be_synced', sync)
|
||||||
self.status_bar.showMessage(_('Catalog generated.'), 3000)
|
self.status_bar.showMessage(_('Catalog generated.'), 3000)
|
||||||
self.sync_catalogs()
|
self.sync_catalogs()
|
||||||
|
if job.fmt in ['CSV','XML']:
|
||||||
|
export_dir = choose_dir(self, 'Export Catalog Directory',
|
||||||
|
'Select destination for %s.%s' % (job.catalog_title, job.fmt.lower()))
|
||||||
|
if export_dir:
|
||||||
|
destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower()))
|
||||||
|
shutil.copyfile(job.catalog_file_path, destination)
|
||||||
|
|
||||||
############################### Fetch news #################################
|
############################### Fetch news #################################
|
||||||
|
|
||||||
def download_scheduled_recipe(self, arg):
|
def download_scheduled_recipe(self, arg):
|
||||||
|
@ -7,14 +7,14 @@
|
|||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>479</width>
|
<width>479</width>
|
||||||
<height>574</height>
|
<height>606</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
<string>Configure Ebook viewer</string>
|
<string>Configure Ebook viewer</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowIcon">
|
<property name="windowIcon">
|
||||||
<iconset resource="../../../../resources/images.qrc">
|
<iconset>
|
||||||
<normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
|
<normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout_4">
|
<layout class="QGridLayout" name="gridLayout_4">
|
||||||
@ -164,7 +164,7 @@
|
|||||||
</item>
|
</item>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="6" column="0" colspan="2">
|
<item row="7" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_remember_window_size">
|
<widget class="QCheckBox" name="opt_remember_window_size">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Remember last used &window size</string>
|
<string>Remember last used &window size</string>
|
||||||
@ -218,6 +218,13 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="6" column="0" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="opt_fit_images">
|
||||||
|
<property name="text">
|
||||||
|
<string>&Resize images larger than the viewer window (needs restart)</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="0">
|
<item row="3" column="0">
|
||||||
|
@ -10,7 +10,7 @@ from base64 import b64encode
|
|||||||
from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
|
from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
|
||||||
QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
|
QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
|
||||||
QColor, QPoint, QImage, QRegion, QVariant, QIcon, \
|
QColor, QPoint, QImage, QRegion, QVariant, QIcon, \
|
||||||
QFont, QObject, QApplication, pyqtSignature, QAction
|
QFont, pyqtSignature, QAction
|
||||||
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
|
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
|
||||||
|
|
||||||
from calibre.utils.config import Config, StringConfig
|
from calibre.utils.config import Config, StringConfig
|
||||||
@ -21,7 +21,7 @@ from calibre.constants import iswindows
|
|||||||
from calibre import prints, guess_type
|
from calibre import prints, guess_type
|
||||||
from calibre.gui2.viewer.keys import SHORTCUTS
|
from calibre.gui2.viewer.keys import SHORTCUTS
|
||||||
|
|
||||||
bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = hyphenator = None
|
bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = hyphenator = images =None
|
||||||
|
|
||||||
def load_builtin_fonts():
|
def load_builtin_fonts():
|
||||||
base = P('fonts/liberation/*.ttf')
|
base = P('fonts/liberation/*.ttf')
|
||||||
@ -42,6 +42,8 @@ def config(defaults=None):
|
|||||||
help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
|
help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
|
||||||
c.add_opt('max_view_width', default=6000,
|
c.add_opt('max_view_width', default=6000,
|
||||||
help=_('Maximum width of the viewer window, in pixels.'))
|
help=_('Maximum width of the viewer window, in pixels.'))
|
||||||
|
c.add_opt('fit_images', default=True,
|
||||||
|
help=_('Resize images larger than the viewer window to fit inside it'))
|
||||||
c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
|
c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
|
||||||
c.add_opt('hyphenate_default_lang', default='en',
|
c.add_opt('hyphenate_default_lang', default='en',
|
||||||
help=_('Default language for hyphenation rules'))
|
help=_('Default language for hyphenation rules'))
|
||||||
@ -59,20 +61,6 @@ def config(defaults=None):
|
|||||||
|
|
||||||
return c
|
return c
|
||||||
|
|
||||||
class PythonJS(QObject):
|
|
||||||
|
|
||||||
def __init__(self, callback):
|
|
||||||
QObject.__init__(self, QApplication.instance())
|
|
||||||
self.setObjectName("py_bridge")
|
|
||||||
self._callback = callback
|
|
||||||
|
|
||||||
@pyqtSignature("QString")
|
|
||||||
def callback(self, msg):
|
|
||||||
print "callback called"
|
|
||||||
self._callback(msg)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ConfigDialog(QDialog, Ui_Dialog):
|
class ConfigDialog(QDialog, Ui_Dialog):
|
||||||
|
|
||||||
def __init__(self, shortcuts, parent=None):
|
def __init__(self, shortcuts, parent=None):
|
||||||
@ -110,6 +98,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
|||||||
self.shortcut_config = ShortcutConfig(shortcuts, parent=self)
|
self.shortcut_config = ShortcutConfig(shortcuts, parent=self)
|
||||||
p = self.tabs.widget(1)
|
p = self.tabs.widget(1)
|
||||||
p.layout().addWidget(self.shortcut_config)
|
p.layout().addWidget(self.shortcut_config)
|
||||||
|
self.opt_fit_images.setChecked(opts.fit_images)
|
||||||
|
|
||||||
|
|
||||||
def accept(self, *args):
|
def accept(self, *args):
|
||||||
@ -122,6 +111,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
|||||||
c.set('standard_font', {0:'serif', 1:'sans', 2:'mono'}[self.standard_font.currentIndex()])
|
c.set('standard_font', {0:'serif', 1:'sans', 2:'mono'}[self.standard_font.currentIndex()])
|
||||||
c.set('user_css', unicode(self.css.toPlainText()))
|
c.set('user_css', unicode(self.css.toPlainText()))
|
||||||
c.set('remember_window_size', self.opt_remember_window_size.isChecked())
|
c.set('remember_window_size', self.opt_remember_window_size.isChecked())
|
||||||
|
c.set('fit_images', self.opt_fit_images.isChecked())
|
||||||
c.set('max_view_width', int(self.max_view_width.value()))
|
c.set('max_view_width', int(self.max_view_width.value()))
|
||||||
c.set('hyphenate', self.hyphenate.isChecked())
|
c.set('hyphenate', self.hyphenate.isChecked())
|
||||||
idx = self.hyphenate_default_lang.currentIndex()
|
idx = self.hyphenate_default_lang.currentIndex()
|
||||||
@ -157,7 +147,6 @@ class Document(QWebPage):
|
|||||||
self.setObjectName("py_bridge")
|
self.setObjectName("py_bridge")
|
||||||
self.debug_javascript = False
|
self.debug_javascript = False
|
||||||
self.current_language = None
|
self.current_language = None
|
||||||
#self.js_bridge = PythonJS(self.js_callback)
|
|
||||||
|
|
||||||
self.setLinkDelegationPolicy(self.DelegateAllLinks)
|
self.setLinkDelegationPolicy(self.DelegateAllLinks)
|
||||||
self.scroll_marks = []
|
self.scroll_marks = []
|
||||||
@ -197,9 +186,14 @@ class Document(QWebPage):
|
|||||||
opts = config().parse()
|
opts = config().parse()
|
||||||
self.hyphenate = opts.hyphenate
|
self.hyphenate = opts.hyphenate
|
||||||
self.hyphenate_default_lang = opts.hyphenate_default_lang
|
self.hyphenate_default_lang = opts.hyphenate_default_lang
|
||||||
|
self.do_fit_images = opts.fit_images
|
||||||
|
|
||||||
|
def fit_images(self):
|
||||||
|
if self.do_fit_images:
|
||||||
|
self.javascript('setup_image_scaling_handlers()')
|
||||||
|
|
||||||
def load_javascript_libraries(self):
|
def load_javascript_libraries(self):
|
||||||
global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, hyphenator
|
global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, hyphenator, images
|
||||||
self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
|
self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
|
||||||
if jquery is None:
|
if jquery is None:
|
||||||
jquery = P('content_server/jquery.js', data=True)
|
jquery = P('content_server/jquery.js', data=True)
|
||||||
@ -215,6 +209,9 @@ class Document(QWebPage):
|
|||||||
if referencing is None:
|
if referencing is None:
|
||||||
referencing = P('viewer/referencing.js', data=True)
|
referencing = P('viewer/referencing.js', data=True)
|
||||||
self.javascript(referencing)
|
self.javascript(referencing)
|
||||||
|
if images is None:
|
||||||
|
images = P('viewer/images.js', data=True)
|
||||||
|
self.javascript(images)
|
||||||
if hyphenation is None:
|
if hyphenation is None:
|
||||||
hyphenation = P('viewer/hyphenation.js', data=True)
|
hyphenation = P('viewer/hyphenation.js', data=True)
|
||||||
self.javascript(hyphenation)
|
self.javascript(hyphenation)
|
||||||
@ -353,7 +350,13 @@ class Document(QWebPage):
|
|||||||
return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
|
return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
|
||||||
|
|
||||||
def set_bottom_padding(self, amount):
|
def set_bottom_padding(self, amount):
|
||||||
self.javascript('$("body").css("padding-bottom", "%dpx")' % amount)
|
padding = '%dpx'%amount
|
||||||
|
try:
|
||||||
|
old_padding = unicode(self.javascript('$("body").css("padding-bottom")').toString())
|
||||||
|
except:
|
||||||
|
old_padding = ''
|
||||||
|
if old_padding != padding:
|
||||||
|
self.javascript('$("body").css("padding-bottom", "%s")' % padding)
|
||||||
|
|
||||||
|
|
||||||
class EntityDeclarationProcessor(object):
|
class EntityDeclarationProcessor(object):
|
||||||
@ -541,6 +544,7 @@ class DocumentView(QWebView):
|
|||||||
return
|
return
|
||||||
self.loading_url = None
|
self.loading_url = None
|
||||||
self.document.set_bottom_padding(0)
|
self.document.set_bottom_padding(0)
|
||||||
|
self.document.fit_images()
|
||||||
self._size_hint = self.document.mainFrame().contentsSize()
|
self._size_hint = self.document.mainFrame().contentsSize()
|
||||||
scrolled = False
|
scrolled = False
|
||||||
if self.to_bottom:
|
if self.to_bottom:
|
||||||
|
@ -40,8 +40,9 @@ class CSV_XML(CatalogPlugin):
|
|||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
|
|
||||||
log = Log()
|
log = Log()
|
||||||
self.fmt = path_to_output[path_to_output.rfind('.') + 1:]
|
self.fmt = path_to_output.rpartition('.')[2]
|
||||||
if opts.verbose:
|
|
||||||
|
if False and opts.verbose:
|
||||||
log("%s:run" % self.name)
|
log("%s:run" % self.name)
|
||||||
log(" path_to_output: %s" % path_to_output)
|
log(" path_to_output: %s" % path_to_output)
|
||||||
log(" Output format: %s" % self.fmt)
|
log(" Output format: %s" % self.fmt)
|
||||||
@ -53,7 +54,7 @@ class CSV_XML(CatalogPlugin):
|
|||||||
log(" opts:")
|
log(" opts:")
|
||||||
for key in keys:
|
for key in keys:
|
||||||
log(" %s: %s" % (key, opts_dict[key]))
|
log(" %s: %s" % (key, opts_dict[key]))
|
||||||
|
|
||||||
# Get the sorted, filtered database as a dictionary
|
# Get the sorted, filtered database as a dictionary
|
||||||
data = self.search_sort_db(db, opts)
|
data = self.search_sort_db(db, opts)
|
||||||
|
|
||||||
|
@ -644,6 +644,10 @@ def catalog_option_parser(args):
|
|||||||
output, fmt = validate_command_line(parser, args, log)
|
output, fmt = validate_command_line(parser, args, log)
|
||||||
|
|
||||||
# Add options common to all catalog plugins
|
# Add options common to all catalog plugins
|
||||||
|
parser.add_option('-i', '--ids', default=None, dest='ids',
|
||||||
|
help=_("Comma-separated list of database IDs to catalog.\n"
|
||||||
|
"If declared, --search is ignored.\n"
|
||||||
|
"Default: all"))
|
||||||
parser.add_option('-s', '--search', default=None, dest='search_text',
|
parser.add_option('-s', '--search', default=None, dest='search_text',
|
||||||
help=_("Filter the results by the search query. "
|
help=_("Filter the results by the search query. "
|
||||||
"For the format of the search query, please see "
|
"For the format of the search query, please see "
|
||||||
@ -656,31 +660,6 @@ def catalog_option_parser(args):
|
|||||||
# Add options specific to fmt plugin
|
# Add options specific to fmt plugin
|
||||||
plugin = add_plugin_parser_options(fmt, parser, log)
|
plugin = add_plugin_parser_options(fmt, parser, log)
|
||||||
|
|
||||||
# Merge options from GUI Preferences
|
|
||||||
'''
|
|
||||||
# Placeholder sample code until we implement GUI preferences
|
|
||||||
from calibre.library.save_to_disk import config
|
|
||||||
c = config()
|
|
||||||
for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']:
|
|
||||||
opt = c.get_option(pref)
|
|
||||||
switch = '--dont-'+pref.replace('_', '-')
|
|
||||||
parser.add_option(switch, default=True, action='store_false',
|
|
||||||
help=opt.help+' '+_('Specifying this switch will turn '
|
|
||||||
'this behavior off.'), dest=pref)
|
|
||||||
|
|
||||||
for pref in ['timefmt', 'template', 'formats']:
|
|
||||||
opt = c.get_option(pref)
|
|
||||||
switch = '--'+pref
|
|
||||||
parser.add_option(switch, default=opt.default,
|
|
||||||
help=opt.help, dest=pref)
|
|
||||||
|
|
||||||
for pref in ('replace_whitespace', 'to_lowercase'):
|
|
||||||
opt = c.get_option(pref)
|
|
||||||
switch = '--'+pref.replace('_', '-')
|
|
||||||
parser.add_option(switch, default=False, action='store_true',
|
|
||||||
help=opt.help)
|
|
||||||
'''
|
|
||||||
|
|
||||||
return parser, plugin, log
|
return parser, plugin, log
|
||||||
|
|
||||||
def command_catalog(args, dbpath):
|
def command_catalog(args, dbpath):
|
||||||
@ -693,6 +672,9 @@ def command_catalog(args, dbpath):
|
|||||||
return 1
|
return 1
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
|
log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
|
||||||
|
if opts.ids:
|
||||||
|
opts.ids = [int(id) for id in opts.ids.split(',')]
|
||||||
|
|
||||||
with plugin:
|
with plugin:
|
||||||
plugin.run(args[1], opts, get_db(dbpath, opts))
|
plugin.run(args[1], opts, get_db(dbpath, opts))
|
||||||
return 0
|
return 0
|
||||||
|
@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
for i in iter(self):
|
for i in iter(self):
|
||||||
yield i[x]
|
yield i[x]
|
||||||
|
|
||||||
def get_data_as_dict(self, prefix=None, authors_as_string=False):
|
def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
|
||||||
'''
|
'''
|
||||||
Return all metadata stored in the database as a dict. Includes paths to
|
Return all metadata stored in the database as a dict. Includes paths to
|
||||||
the cover and each format.
|
the cover and each format.
|
||||||
|
|
||||||
:param prefix: The prefix for all paths. By default, the prefix is the absolute path
|
:param prefix: The prefix for all paths. By default, the prefix is the absolute path
|
||||||
to the library folder.
|
to the library folder.
|
||||||
|
:param ids: Set of ids to return the data for. If None return data for
|
||||||
|
all entries in database.
|
||||||
'''
|
'''
|
||||||
if prefix is None:
|
if prefix is None:
|
||||||
prefix = self.library_path
|
prefix = self.library_path
|
||||||
@ -1650,11 +1652,14 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
data = []
|
data = []
|
||||||
for record in self.data:
|
for record in self.data:
|
||||||
if record is None: continue
|
if record is None: continue
|
||||||
|
db_id = record[FIELD_MAP['id']]
|
||||||
|
if ids is not None and db_id not in ids:
|
||||||
|
continue
|
||||||
x = {}
|
x = {}
|
||||||
for field in FIELDS:
|
for field in FIELDS:
|
||||||
x[field] = record[FIELD_MAP[field]]
|
x[field] = record[FIELD_MAP[field]]
|
||||||
data.append(x)
|
data.append(x)
|
||||||
x['id'] = record[FIELD_MAP['id']]
|
x['id'] = db_id
|
||||||
x['formats'] = []
|
x['formats'] = []
|
||||||
if not x['authors']:
|
if not x['authors']:
|
||||||
x['authors'] = _('Unknown')
|
x['authors'] = _('Unknown')
|
||||||
|
@ -524,6 +524,7 @@ class DynamicConfig(dict):
|
|||||||
pass
|
pass
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
|
print 'Failed to unpickle stored object:'
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
d = {}
|
d = {}
|
||||||
self.clear()
|
self.clear()
|
||||||
|
@ -104,6 +104,7 @@ _extra_lang_codes = {
|
|||||||
'en_CY' : _('English (Cyprus)'),
|
'en_CY' : _('English (Cyprus)'),
|
||||||
'en_PK' : _('English (Pakistan)'),
|
'en_PK' : _('English (Pakistan)'),
|
||||||
'en_SG' : _('English (Singapore)'),
|
'en_SG' : _('English (Singapore)'),
|
||||||
|
'en_YE' : _('English (Yemen)'),
|
||||||
'de_AT' : _('German (AT)'),
|
'de_AT' : _('German (AT)'),
|
||||||
'nl' : _('Dutch (NL)'),
|
'nl' : _('Dutch (NL)'),
|
||||||
'nl_BE' : _('Dutch (BE)'),
|
'nl_BE' : _('Dutch (BE)'),
|
||||||
|
@ -9,9 +9,22 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import __builtin__, sys, os
|
import __builtin__, sys, os
|
||||||
|
|
||||||
|
_dev_path = os.environ.get('CALIBRE_DEVELOP_FROM', None)
|
||||||
|
if _dev_path is not None:
|
||||||
|
_dev_path = os.path.join(os.path.abspath(os.path.dirname(_dev_path)), 'resources')
|
||||||
|
if not os.path.exists(_dev_path):
|
||||||
|
_dev_path = None
|
||||||
|
|
||||||
def get_path(path, data=False):
|
def get_path(path, data=False):
|
||||||
|
global _dev_path
|
||||||
path = path.replace(os.sep, '/')
|
path = path.replace(os.sep, '/')
|
||||||
path = os.path.join(sys.resources_location, *path.split('/'))
|
base = None
|
||||||
|
if _dev_path is not None:
|
||||||
|
if os.path.exists(os.path.join(_dev_path, *path.split('/'))):
|
||||||
|
base = _dev_path
|
||||||
|
if base is None:
|
||||||
|
base = sys.resources_location
|
||||||
|
path = os.path.join(base, *path.split('/'))
|
||||||
if data:
|
if data:
|
||||||
return open(path, 'rb').read()
|
return open(path, 'rb').read()
|
||||||
return path
|
return path
|
||||||
|
@ -357,9 +357,17 @@ class BasicNewsRecipe(Recipe):
|
|||||||
Override in a subclass to customize extraction of the :term:`URL` that points
|
Override in a subclass to customize extraction of the :term:`URL` that points
|
||||||
to the content for each article. Return the
|
to the content for each article. Return the
|
||||||
article URL. It is called with `article`, an object representing a parsed article
|
article URL. It is called with `article`, an object representing a parsed article
|
||||||
from a feed. See `feedsparser <http://www.feedparser.org/docs/>`_.
|
from a feed. See `feedparser <http://www.feedparser.org/docs/>`_.
|
||||||
By default it returns `article.link <http://www.feedparser.org/docs/reference-entry-link.html>`_.
|
By default it looks for the original link (for feeds syndicated via a
|
||||||
|
service like feedburner or pheedo) and if found,
|
||||||
|
returns that or else returns
|
||||||
|
`article.link <http://www.feedparser.org/docs/reference-entry-link.html>`_.
|
||||||
'''
|
'''
|
||||||
|
for key in article.keys():
|
||||||
|
if key.endswith('_origlink'):
|
||||||
|
url = article[key]
|
||||||
|
if url and url.startswith('http://'):
|
||||||
|
return url
|
||||||
return article.get('link', None)
|
return article.get('link', None)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user