IGN:Easier development environment using the binary build on windows

This commit is contained in:
Kovid Goyal 2009-09-26 11:28:28 -06:00
parent 0c678d9f05
commit c9aeb3fd09
9 changed files with 318 additions and 321 deletions

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, cStringIO, re
import os
from setup import Command, __appname__
@ -17,6 +17,8 @@ class GUI(Command):
@classmethod
def find_forms(cls):
from calibre.gui2 import find_forms
return find_forms(cls.SRC)
forms = []
for root, _, files in os.walk(cls.PATH):
for name in files:
@ -27,7 +29,8 @@ class GUI(Command):
@classmethod
def form_to_compiled_form(cls, form):
return form.rpartition('.')[0]+'_ui.py'
from calibre.gui2 import form_to_compiled_form
return form_to_compiled_form(form)
def run(self, opts):
self.build_forms()
@ -53,38 +56,8 @@ class GUI(Command):
def build_forms(self):
from PyQt4.uic import compileUi
forms = self.find_forms()
pat = re.compile(r'''(['"]):/images/([^'"]+)\1''')
def sub(match):
ans = 'I(%s%s%s)'%(match.group(1), match.group(2), match.group(1))
return ans
for form in forms:
compiled_form = self.form_to_compiled_form(form)
if not os.path.exists(compiled_form) or os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
self.info('\tCompiling form', form)
buf = cStringIO.StringIO()
compileUi(form, buf)
dat = buf.getvalue()
dat = dat.replace('__appname__', __appname__)
dat = dat.replace('import images_rc', '')
dat = dat.replace('from library import', 'from calibre.gui2.library import')
dat = dat.replace('from widgets import', 'from calibre.gui2.widgets import')
dat = dat.replace('from convert.xpath_wizard import',
'from calibre.gui2.convert.xpath_wizard import')
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
dat = pat.sub(sub, dat)
if form.endswith('viewer%smain.ui'%os.sep):
self.info('\t\tPromoting WebView')
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
dat += '\nQtWebKit'
open(compiled_form, 'wb').write(dat)
from calibre.gui2 import build_forms
build_forms(self.SRC, info=self.info)
def clean(self):
forms = self.find_forms()

View File

@ -121,6 +121,9 @@ base = os.path.dirname(sys.executable.decode(fenc))
sys.resources_location = os.path.join(base, 'resources')
sys.extensions_location = os.path.join(base, 'plugins')
dv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
if dv and os.path.exists(dv):
sys.path.insert(0, os.path.abspath(dv))
del sys
'''
@ -278,7 +281,9 @@ def main(args=sys.argv):
'packages' : ['PIL', 'lxml', 'cherrypy',
'dateutil', 'dns'],
'excludes' : ["Tkconstants", "Tkinter", "tcl",
"_imagingtk", "ImageTk", "FixTk"
"_imagingtk", "ImageTk",
"FixTk",
'PyQt4.uic.port_v3.proxy_base'
],
'dll_excludes' : ['mswsock.dll', 'tcl85.dll',
'tk85.dll'],

View File

@ -6,10 +6,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Embedded console for debugging.
'''
import sys, os, re, shutil
import sys, os
from calibre.utils.config import OptionParser
from calibre.constants import iswindows, isosx
from calibre.libunzip import update
from calibre import prints
def option_parser():
@ -18,11 +17,6 @@ def option_parser():
Run an embedded python interpreter.
''')
parser.add_option('-u', '--update-module', default=False,
action='store_true',
help='Update the specified module in the frozen library. '+
'Module specifications are of the form full.name.of.module path_to_module.py',
)
parser.add_option('-c', '--command', help='Run python code.', default=None)
parser.add_option('-e', '--exec-file', default=None, help='Run the python code in file.')
parser.add_option('-d', '--debug-device-driver', default=False, action='store_true',
@ -41,39 +35,17 @@ Run an embedded python interpreter.
parser.add_option('--pdfreflow', default=None,
help='Path to PDF file to try and reflow. Output will be placed in '
'current directory. ')
parser.add_option('-f', '--develop-from', default=None,
help=('Develop calibre from the specified path. '
'The path should point to the src sub-directory in the '
'calibre source tree.'))
return parser
def update_zipfile(zipfile, mod, path):
if 'win32' in sys.platform:
print 'WARNING: On Windows Vista using this option may cause windows to put library.zip into the Virtual Store (typically located in c:\Users\username\AppData\Local\VirtualStore). If it does this you must delete it from there after you\'re done debugging).'
pat = re.compile(mod.replace('.', '/')+r'\.py[co]*')
name = mod.replace('.', '/') + os.path.splitext(path)[-1]
update(zipfile, [pat], [path], [name])
def update_site_packages(sp, mod, path):
dest = os.path.join(sp, *mod.split('.'))+'.py'
shutil.copy2(path, dest)
def update_module(mod, path):
if not hasattr(sys, 'frozen'):
raise RuntimeError('Modules can only be updated in frozen installs.')
zp = None
if iswindows:
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
elif getattr(sys, 'new_app_bundle', False):
update_site_packages(sys.site_packages, mod, path)
elif isosx:
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
'Resources', 'lib',
'python'+'.'.join(map(str, sys.version_info[:2])),
'site-packages.zip')
else:
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
if zp is not None:
update_zipfile(zp, mod, path)
else:
raise ValueError('Updating modules is not supported on this platform.')
def develop_from(path):
from calibre.gui2 import build_forms
print 'Compiling .ui forms...'
build_forms(path)
def migrate(old, new):
from calibre.utils.config import prefs
@ -189,9 +161,6 @@ def main(args=sys.argv):
if opts.gui:
from calibre.gui2.main import main
main(['calibre'])
elif opts.update_module:
mod, path = args[1:3]
update_module(mod, os.path.expanduser(path))
elif opts.command:
sys.argv = args[:1]
exec opts.command
@ -218,6 +187,8 @@ def main(args=sys.argv):
from calibre.utils.logging import default_log
opts2, args = px().parse_args(['xxxx', '-vvvv', opts.pdfreflow])
run(opts2, opts.pdfreflow, default_log)
elif opts.develop_from is not None:
develop_from(opts.develop_from)
else:
from IPython.Shell import IPShellEmbed
ipshell = IPShellEmbed()

View File

@ -525,3 +525,53 @@ def is_ok_to_use_qt():
gui_thread = QThread.currentThread()
return gui_thread is QThread.currentThread()
def find_forms(srcdir):
base = os.path.join(srcdir, 'calibre', 'gui2')
forms = []
for root, _, files in os.walk(base):
for name in files:
if name.endswith('.ui'):
forms.append(os.path.abspath(os.path.join(root, name)))
return forms
def form_to_compiled_form(form):
return form.rpartition('.')[0]+'_ui.py'
def build_forms(srcdir, info=None):
import re, cStringIO
from PyQt4.uic import compileUi
forms = find_forms(srcdir)
if info is None:
from calibre import prints
info = prints
pat = re.compile(r'''(['"]):/images/([^'"]+)\1''')
def sub(match):
ans = 'I(%s%s%s)'%(match.group(1), match.group(2), match.group(1))
return ans
for form in forms:
compiled_form = form_to_compiled_form(form)
if not os.path.exists(compiled_form) or os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
info('\tCompiling form', form)
buf = cStringIO.StringIO()
compileUi(form, buf)
dat = buf.getvalue()
dat = dat.replace('__appname__', 'calibre')
dat = dat.replace('import images_rc', '')
dat = dat.replace('from library import', 'from calibre.gui2.library import')
dat = dat.replace('from widgets import', 'from calibre.gui2.widgets import')
dat = dat.replace('from convert.xpath_wizard import',
'from calibre.gui2.convert.xpath_wizard import')
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
dat = pat.sub(sub, dat)
if form.endswith('viewer%smain.ui'%os.sep):
info('\t\tPromoting WebView')
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
dat += '\nQtWebKit'
open(compiled_form, 'wb').write(dat)

View File

@ -1,57 +1,57 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.business-standard.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class BusinessStandard(BasicNewsRecipe):
title = 'Business Standard'
__author__ = 'Darko Miletic'
description = "India's most respected business daily"
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
publisher = 'Business Standard Limited'
category = 'news, business, money, india, world'
language = 'en_IN'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
remove_attributes=['style']
remove_tags = [dict(name=['object','link','script','iframe'])]
feeds = [
(u'News Now' , u'http://feeds.business-standard.com/News-Now.xml' )
,(u'Banking & finance' , u'http://feeds.business-standard.com/Banking-Finance-All.xml' )
,(u'Companies & Industry', u'http://feeds.business-standard.com/Companies-Industry-All.xml')
,(u'Economy & Policy' , u'http://feeds.business-standard.com/Economy-Policy-All.xml' )
,(u'Tech World' , u'http://feeds.business-standard.com/Tech-World-All.xml' )
,(u'Life & Leisure' , u'http://feeds.business-standard.com/Life-Leisure-All.xml' )
,(u'Markets & Investing' , u'http://feeds.business-standard.com/Markets-Investing-All.xml' )
,(u'Management & Mktg' , u'http://feeds.business-standard.com/Management-Mktg-All.xml' )
,(u'Automobiles' , u'http://feeds.business-standard.com/Automobiles.xml' )
,(u'Aviation' , u'http://feeds.business-standard.com/Aviation.xml' )
]
def print_version(self, url):
autono = url.rpartition('autono=')[2]
tp = 'on'
hk = url.rpartition('bKeyFlag=')[1]
if hk == '':
tp = ''
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
def get_article_url(self, article):
return article.get('guid', None)
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.business-standard.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class BusinessStandard(BasicNewsRecipe):
title = 'Business Standard'
__author__ = 'Darko Miletic'
description = "India's most respected business daily"
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
publisher = 'Business Standard Limited'
category = 'news, business, money, india, world'
language = 'en_IN'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
remove_attributes=['style']
remove_tags = [dict(name=['object','link','script','iframe'])]
feeds = [
(u'News Now' , u'http://feeds.business-standard.com/News-Now.xml' )
,(u'Banking & finance' , u'http://feeds.business-standard.com/Banking-Finance-All.xml' )
,(u'Companies & Industry', u'http://feeds.business-standard.com/Companies-Industry-All.xml')
,(u'Economy & Policy' , u'http://feeds.business-standard.com/Economy-Policy-All.xml' )
,(u'Tech World' , u'http://feeds.business-standard.com/Tech-World-All.xml' )
,(u'Life & Leisure' , u'http://feeds.business-standard.com/Life-Leisure-All.xml' )
,(u'Markets & Investing' , u'http://feeds.business-standard.com/Markets-Investing-All.xml' )
,(u'Management & Mktg' , u'http://feeds.business-standard.com/Management-Mktg-All.xml' )
,(u'Automobiles' , u'http://feeds.business-standard.com/Automobiles.xml' )
,(u'Aviation' , u'http://feeds.business-standard.com/Aviation.xml' )
]
def print_version(self, url):
autono = url.rpartition('autono=')[2]
tp = 'on'
hk = url.rpartition('bKeyFlag=')[1]
if hk == '':
tp = ''
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
def get_article_url(self, article):
return article.get('guid', None)

View File

@ -1,73 +1,72 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
mondediplo.com
'''
import re, urllib
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class LeMondeDiplomatiqueEn(BasicNewsRecipe):
title = 'Le Monde diplomatique - English edition'
__author__ = 'Darko Miletic'
description = 'Real journalism making sense of the world around us'
publisher = 'Le Monde diplomatique'
category = 'news, politics, world'
no_stylesheets = True
oldest_article = 31
delay = 1
encoding = 'utf-8'
needs_subscription = True
PREFIX = 'http://mondediplo.com/'
LOGIN = PREFIX + '2009/09/02congo'
INDEX = PREFIX + strftime('%Y/%m/')
use_embedded_content = False
language = 'en'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.LOGIN)
if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'login':self.username
,'pass':self.password
,'enter':'enter'
})
br.open(self.LOGIN,data)
return br
keep_only_tags =[dict(name='div', attrs={'id':'contenu'})]
remove_tags = [dict(name=['object','link','script','iframe','base'])]
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
cnt = soup.find('div',attrs={'class':'som_num'})
for item in cnt.findAll('li'):
description = ''
feed_link = item.find('a')
desc = item.find('div',attrs={'class':'chapo'})
if desc:
description = desc.string
if feed_link and feed_link.has_key('href'):
url = self.PREFIX + feed_link['href'].partition('/../')[2]
title = self.tag_to_string(feed_link)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
return [(soup.head.title.string, articles)]
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
mondediplo.com
'''
import urllib
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LeMondeDiplomatiqueEn(BasicNewsRecipe):
title = 'Le Monde diplomatique - English edition'
__author__ = 'Darko Miletic'
description = 'Real journalism making sense of the world around us'
publisher = 'Le Monde diplomatique'
category = 'news, politics, world'
no_stylesheets = True
oldest_article = 31
delay = 1
encoding = 'utf-8'
needs_subscription = True
PREFIX = 'http://mondediplo.com/'
LOGIN = PREFIX + '2009/09/02congo'
INDEX = PREFIX + strftime('%Y/%m/')
use_embedded_content = False
language = 'en'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.LOGIN)
if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'login':self.username
,'pass':self.password
,'enter':'enter'
})
br.open(self.LOGIN,data)
return br
keep_only_tags =[dict(name='div', attrs={'id':'contenu'})]
remove_tags = [dict(name=['object','link','script','iframe','base'])]
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
cnt = soup.find('div',attrs={'class':'som_num'})
for item in cnt.findAll('li'):
description = ''
feed_link = item.find('a')
desc = item.find('div',attrs={'class':'chapo'})
if desc:
description = desc.string
if feed_link and feed_link.has_key('href'):
url = self.PREFIX + feed_link['href'].partition('/../')[2]
title = self.tag_to_string(feed_link)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
return [(soup.head.title.string, articles)]

View File

@ -16,7 +16,7 @@ class NYTimes(BasicNewsRecipe):
__author__ = 'GRiker'
language = _('English')
description = 'Top Stories from the New York Times'
# List of sections typically included in Top Stories. Use a keyword from the
# right column in the excludeSectionKeywords[] list to skip downloading that section
sections = {
@ -39,7 +39,7 @@ class NYTimes(BasicNewsRecipe):
'world' : 'World'
}
# By default, no sections are skipped.
# By default, no sections are skipped.
excludeSectionKeywords = []
# Add section keywords from the right column above to skip that section
@ -49,7 +49,7 @@ class NYTimes(BasicNewsRecipe):
# excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
# Fetch only Top Stories
# excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
# The maximum number of articles that will be downloaded
max_articles_per_feed = 40
@ -63,7 +63,7 @@ class NYTimes(BasicNewsRecipe):
dict(attrs={ 'id':['toolsRight','inlineBox','sidebarArticles',
'portfolioInline','articleInline','readerscomment',
'nytRating']}) ]
encoding = 'cp1252'
no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \
@ -105,13 +105,13 @@ class NYTimes(BasicNewsRecipe):
_raw = url_or_raw
if raw:
return _raw
if not isinstance(_raw, unicode) and self.encoding:
_raw = _raw.decode(docEncoding, 'replace')
massage = list(BeautifulSoup.MARKUP_MASSAGE)
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
return BeautifulSoup(_raw, markupMassage=massage)
# Entry point
soup = get_the_soup( self.encoding, url_or_raw )
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
@ -122,7 +122,7 @@ class NYTimes(BasicNewsRecipe):
if self.verbose > 2:
self.log( " document encoding: '%s'" % docEncoding)
if docEncoding != self.encoding :
soup = get_the_soup(docEncoding, url_or_raw)
soup = get_the_soup(docEncoding, url_or_raw)
return soup
@ -133,7 +133,7 @@ class NYTimes(BasicNewsRecipe):
feed = key = 'All Top Stories'
articles[key] = []
ans.append(key)
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
# Fetch the outer table
@ -242,10 +242,10 @@ class NYTimes(BasicNewsRecipe):
if url == article['url'] :
duplicateFound = True
break
if duplicateFound:
if duplicateFound:
# Continue fetching, don't add this article
continue
continue
if not articles.has_key(feed):
articles[feed] = []
@ -254,7 +254,7 @@ class NYTimes(BasicNewsRecipe):
description=description, author=author, content=''))
ans = self.sort_index_by(ans, {'Top Stories':-1})
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans
def strip_anchors(self,soup):
@ -270,7 +270,7 @@ class NYTimes(BasicNewsRecipe):
# refresh = soup.find('meta', {'http-equiv':'refresh'})
# if refresh is None:
# return self.strip_anchors(soup)
#
#
# content = refresh.get('content').partition('=')[2]
# raw = self.browser.open('http://www.nytimes.com'+content).read()
# soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
@ -280,7 +280,7 @@ class NYTimes(BasicNewsRecipe):
content = refresh.get('content').partition('=')[2]
raw = self.browser.open('http://www.nytimes.com'+content).read()
soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
soup = self.strip_anchors(soup)
# Test for empty content
@ -291,7 +291,7 @@ class NYTimes(BasicNewsRecipe):
return soup
else:
print "no allowed content found, removing article"
raise StringError
raise Exception()
def postprocess_html(self,soup, True):
@ -334,7 +334,7 @@ class NYTimes(BasicNewsRecipe):
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
# Synthesize a section header
dsk = soup.find('meta', attrs={'name':'dsk'})
if dsk is not None and dsk.has_key('content'):
@ -343,12 +343,12 @@ class NYTimes(BasicNewsRecipe):
hTag.insert(0,NavigableString(dsk['content']))
articleTag = soup.find(True, attrs={'id':'article'})
articleTag.insert(0,hTag)
# Add class="articleBody" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag is not None :
divTag['class'] = divTag['id']
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag is not None :

View File

@ -1,51 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.smashingmagazine.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class SmashingMagazine(BasicNewsRecipe):
title = 'Smashing Magazine'
__author__ = 'Darko Miletic'
description = 'We smash you with the information that will make your life easier, really'
oldest_article = 20
language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
publisher = 'Smashing Magazine'
category = 'news, web, IT, css, javascript, html'
encoding = 'utf-8'
conversion_options = {
'comments' : description
,'tags' : category
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
remove_tags_after = dict(name='ul',attrs={'class':'social'})
remove_tags = [
dict(name=['link','object'])
,dict(name='h1',attrs={'class':'logo'})
,dict(name='div',attrs={'id':'booklogosec'})
,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
]
feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
def preprocess_html(self, soup):
for iter in soup.findAll('div',attrs={'class':'leftframe'}):
it = iter.find('h1')
if it == None:
iter.extract()
for item in soup.findAll('img'):
oldParent = item.parent
if oldParent.name == 'a':
oldParent.name = 'div'
return soup
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.smashingmagazine.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class SmashingMagazine(BasicNewsRecipe):
title = 'Smashing Magazine'
__author__ = 'Darko Miletic'
description = 'We smash you with the information that will make your life easier, really'
oldest_article = 20
language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
publisher = 'Smashing Magazine'
category = 'news, web, IT, css, javascript, html'
encoding = 'utf-8'
conversion_options = {
'comments' : description
,'tags' : category
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
remove_tags_after = dict(name='ul',attrs={'class':'social'})
remove_tags = [
dict(name=['link','object'])
,dict(name='h1',attrs={'class':'logo'})
,dict(name='div',attrs={'id':'booklogosec'})
,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
]
feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
def preprocess_html(self, soup):
for iter in soup.findAll('div',attrs={'class':'leftframe'}):
it = iter.find('h1')
if it == None:
iter.extract()
for item in soup.findAll('img'):
oldParent = item.parent
if oldParent.name == 'a':
oldParent.name = 'div'
return soup

View File

@ -1,47 +1,47 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.thestar.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheTorontoStar(BasicNewsRecipe):
title = 'The Toronto Star'
__author__ = 'Darko Miletic'
description = "Canada's largest daily newspaper"
oldest_article = 2
language = 'en_CA'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
publisher = 'The Toronto Star'
category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson"
encoding = 'utf-8'
extra_css = ' .headlineArticle{font-size: x-large; font-weight: bold} .navbar{text-align:center} '
conversion_options = {
'comments' : description
,'tags' : category
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'id':'AssetWebPart1'})]
remove_attributes= ['style']
feeds = [
(u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' )
,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' )
,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' )
,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' )
,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300')
]
def print_version(self, url):
return url.replace('/article/','/printArticle/')
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.thestar.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheTorontoStar(BasicNewsRecipe):
title = 'The Toronto Star'
__author__ = 'Darko Miletic'
description = "Canada's largest daily newspaper"
oldest_article = 2
language = 'en_CA'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
publisher = 'The Toronto Star'
category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson"
encoding = 'utf-8'
extra_css = ' .headlineArticle{font-size: x-large; font-weight: bold} .navbar{text-align:center} '
conversion_options = {
'comments' : description
,'tags' : category
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'id':'AssetWebPart1'})]
remove_attributes= ['style']
feeds = [
(u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' )
,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' )
,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' )
,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' )
,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300')
]
def print_version(self, url):
return url.replace('/article/','/printArticle/')