IGN:Easier development environment using the binary build on windows

This commit is contained in:
Kovid Goyal 2009-09-26 11:28:28 -06:00
parent 0c678d9f05
commit c9aeb3fd09
9 changed files with 318 additions and 321 deletions

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, cStringIO, re import os
from setup import Command, __appname__ from setup import Command, __appname__
@ -17,6 +17,8 @@ class GUI(Command):
@classmethod @classmethod
def find_forms(cls): def find_forms(cls):
from calibre.gui2 import find_forms
return find_forms(cls.SRC)
forms = [] forms = []
for root, _, files in os.walk(cls.PATH): for root, _, files in os.walk(cls.PATH):
for name in files: for name in files:
@ -27,7 +29,8 @@ class GUI(Command):
@classmethod @classmethod
def form_to_compiled_form(cls, form): def form_to_compiled_form(cls, form):
return form.rpartition('.')[0]+'_ui.py' from calibre.gui2 import form_to_compiled_form
return form_to_compiled_form(form)
def run(self, opts): def run(self, opts):
self.build_forms() self.build_forms()
@ -53,38 +56,8 @@ class GUI(Command):
def build_forms(self): def build_forms(self):
from PyQt4.uic import compileUi from calibre.gui2 import build_forms
forms = self.find_forms() build_forms(self.SRC, info=self.info)
pat = re.compile(r'''(['"]):/images/([^'"]+)\1''')
def sub(match):
ans = 'I(%s%s%s)'%(match.group(1), match.group(2), match.group(1))
return ans
for form in forms:
compiled_form = self.form_to_compiled_form(form)
if not os.path.exists(compiled_form) or os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
self.info('\tCompiling form', form)
buf = cStringIO.StringIO()
compileUi(form, buf)
dat = buf.getvalue()
dat = dat.replace('__appname__', __appname__)
dat = dat.replace('import images_rc', '')
dat = dat.replace('from library import', 'from calibre.gui2.library import')
dat = dat.replace('from widgets import', 'from calibre.gui2.widgets import')
dat = dat.replace('from convert.xpath_wizard import',
'from calibre.gui2.convert.xpath_wizard import')
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
dat = pat.sub(sub, dat)
if form.endswith('viewer%smain.ui'%os.sep):
self.info('\t\tPromoting WebView')
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
dat += '\nQtWebKit'
open(compiled_form, 'wb').write(dat)
def clean(self): def clean(self):
forms = self.find_forms() forms = self.find_forms()

View File

@ -121,6 +121,9 @@ base = os.path.dirname(sys.executable.decode(fenc))
sys.resources_location = os.path.join(base, 'resources') sys.resources_location = os.path.join(base, 'resources')
sys.extensions_location = os.path.join(base, 'plugins') sys.extensions_location = os.path.join(base, 'plugins')
dv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
if dv and os.path.exists(dv):
sys.path.insert(0, os.path.abspath(dv))
del sys del sys
''' '''
@ -278,7 +281,9 @@ def main(args=sys.argv):
'packages' : ['PIL', 'lxml', 'cherrypy', 'packages' : ['PIL', 'lxml', 'cherrypy',
'dateutil', 'dns'], 'dateutil', 'dns'],
'excludes' : ["Tkconstants", "Tkinter", "tcl", 'excludes' : ["Tkconstants", "Tkinter", "tcl",
"_imagingtk", "ImageTk", "FixTk" "_imagingtk", "ImageTk",
"FixTk",
'PyQt4.uic.port_v3.proxy_base'
], ],
'dll_excludes' : ['mswsock.dll', 'tcl85.dll', 'dll_excludes' : ['mswsock.dll', 'tcl85.dll',
'tk85.dll'], 'tk85.dll'],

View File

@ -6,10 +6,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Embedded console for debugging. Embedded console for debugging.
''' '''
import sys, os, re, shutil import sys, os
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.constants import iswindows, isosx from calibre.constants import iswindows, isosx
from calibre.libunzip import update
from calibre import prints from calibre import prints
def option_parser(): def option_parser():
@ -18,11 +17,6 @@ def option_parser():
Run an embedded python interpreter. Run an embedded python interpreter.
''') ''')
parser.add_option('-u', '--update-module', default=False,
action='store_true',
help='Update the specified module in the frozen library. '+
'Module specifications are of the form full.name.of.module path_to_module.py',
)
parser.add_option('-c', '--command', help='Run python code.', default=None) parser.add_option('-c', '--command', help='Run python code.', default=None)
parser.add_option('-e', '--exec-file', default=None, help='Run the python code in file.') parser.add_option('-e', '--exec-file', default=None, help='Run the python code in file.')
parser.add_option('-d', '--debug-device-driver', default=False, action='store_true', parser.add_option('-d', '--debug-device-driver', default=False, action='store_true',
@ -41,39 +35,17 @@ Run an embedded python interpreter.
parser.add_option('--pdfreflow', default=None, parser.add_option('--pdfreflow', default=None,
help='Path to PDF file to try and reflow. Output will be placed in ' help='Path to PDF file to try and reflow. Output will be placed in '
'current directory. ') 'current directory. ')
parser.add_option('-f', '--develop-from', default=None,
help=('Develop calibre from the specified path. '
'The path should point to the src sub-directory in the '
'calibre source tree.'))
return parser return parser
def update_zipfile(zipfile, mod, path): def develop_from(path):
if 'win32' in sys.platform: from calibre.gui2 import build_forms
print 'WARNING: On Windows Vista using this option may cause windows to put library.zip into the Virtual Store (typically located in c:\Users\username\AppData\Local\VirtualStore). If it does this you must delete it from there after you\'re done debugging).' print 'Compiling .ui forms...'
pat = re.compile(mod.replace('.', '/')+r'\.py[co]*') build_forms(path)
name = mod.replace('.', '/') + os.path.splitext(path)[-1]
update(zipfile, [pat], [path], [name])
def update_site_packages(sp, mod, path):
dest = os.path.join(sp, *mod.split('.'))+'.py'
shutil.copy2(path, dest)
def update_module(mod, path):
if not hasattr(sys, 'frozen'):
raise RuntimeError('Modules can only be updated in frozen installs.')
zp = None
if iswindows:
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
elif getattr(sys, 'new_app_bundle', False):
update_site_packages(sys.site_packages, mod, path)
elif isosx:
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
'Resources', 'lib',
'python'+'.'.join(map(str, sys.version_info[:2])),
'site-packages.zip')
else:
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
if zp is not None:
update_zipfile(zp, mod, path)
else:
raise ValueError('Updating modules is not supported on this platform.')
def migrate(old, new): def migrate(old, new):
from calibre.utils.config import prefs from calibre.utils.config import prefs
@ -189,9 +161,6 @@ def main(args=sys.argv):
if opts.gui: if opts.gui:
from calibre.gui2.main import main from calibre.gui2.main import main
main(['calibre']) main(['calibre'])
elif opts.update_module:
mod, path = args[1:3]
update_module(mod, os.path.expanduser(path))
elif opts.command: elif opts.command:
sys.argv = args[:1] sys.argv = args[:1]
exec opts.command exec opts.command
@ -218,6 +187,8 @@ def main(args=sys.argv):
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
opts2, args = px().parse_args(['xxxx', '-vvvv', opts.pdfreflow]) opts2, args = px().parse_args(['xxxx', '-vvvv', opts.pdfreflow])
run(opts2, opts.pdfreflow, default_log) run(opts2, opts.pdfreflow, default_log)
elif opts.develop_from is not None:
develop_from(opts.develop_from)
else: else:
from IPython.Shell import IPShellEmbed from IPython.Shell import IPShellEmbed
ipshell = IPShellEmbed() ipshell = IPShellEmbed()

View File

@ -525,3 +525,53 @@ def is_ok_to_use_qt():
gui_thread = QThread.currentThread() gui_thread = QThread.currentThread()
return gui_thread is QThread.currentThread() return gui_thread is QThread.currentThread()
def find_forms(srcdir):
base = os.path.join(srcdir, 'calibre', 'gui2')
forms = []
for root, _, files in os.walk(base):
for name in files:
if name.endswith('.ui'):
forms.append(os.path.abspath(os.path.join(root, name)))
return forms
def form_to_compiled_form(form):
return form.rpartition('.')[0]+'_ui.py'
def build_forms(srcdir, info=None):
import re, cStringIO
from PyQt4.uic import compileUi
forms = find_forms(srcdir)
if info is None:
from calibre import prints
info = prints
pat = re.compile(r'''(['"]):/images/([^'"]+)\1''')
def sub(match):
ans = 'I(%s%s%s)'%(match.group(1), match.group(2), match.group(1))
return ans
for form in forms:
compiled_form = form_to_compiled_form(form)
if not os.path.exists(compiled_form) or os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
info('\tCompiling form', form)
buf = cStringIO.StringIO()
compileUi(form, buf)
dat = buf.getvalue()
dat = dat.replace('__appname__', 'calibre')
dat = dat.replace('import images_rc', '')
dat = dat.replace('from library import', 'from calibre.gui2.library import')
dat = dat.replace('from widgets import', 'from calibre.gui2.widgets import')
dat = dat.replace('from convert.xpath_wizard import',
'from calibre.gui2.convert.xpath_wizard import')
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
dat = pat.sub(sub, dat)
if form.endswith('viewer%smain.ui'%os.sep):
info('\t\tPromoting WebView')
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
dat += '\nQtWebKit'
open(compiled_form, 'wb').write(dat)

View File

@ -1,57 +1,57 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.business-standard.com www.business-standard.com
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class BusinessStandard(BasicNewsRecipe): class BusinessStandard(BasicNewsRecipe):
title = 'Business Standard' title = 'Business Standard'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = "India's most respected business daily" description = "India's most respected business daily"
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'Business Standard Limited' publisher = 'Business Standard Limited'
category = 'news, business, money, india, world' category = 'news, business, money, india, world'
language = 'en_IN' language = 'en_IN'
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
,'tags' : category ,'tags' : category
,'language' : language ,'language' : language
,'publisher' : publisher ,'publisher' : publisher
,'linearize_tables': True ,'linearize_tables': True
} }
remove_attributes=['style'] remove_attributes=['style']
remove_tags = [dict(name=['object','link','script','iframe'])] remove_tags = [dict(name=['object','link','script','iframe'])]
feeds = [ feeds = [
(u'News Now' , u'http://feeds.business-standard.com/News-Now.xml' ) (u'News Now' , u'http://feeds.business-standard.com/News-Now.xml' )
,(u'Banking & finance' , u'http://feeds.business-standard.com/Banking-Finance-All.xml' ) ,(u'Banking & finance' , u'http://feeds.business-standard.com/Banking-Finance-All.xml' )
,(u'Companies & Industry', u'http://feeds.business-standard.com/Companies-Industry-All.xml') ,(u'Companies & Industry', u'http://feeds.business-standard.com/Companies-Industry-All.xml')
,(u'Economy & Policy' , u'http://feeds.business-standard.com/Economy-Policy-All.xml' ) ,(u'Economy & Policy' , u'http://feeds.business-standard.com/Economy-Policy-All.xml' )
,(u'Tech World' , u'http://feeds.business-standard.com/Tech-World-All.xml' ) ,(u'Tech World' , u'http://feeds.business-standard.com/Tech-World-All.xml' )
,(u'Life & Leisure' , u'http://feeds.business-standard.com/Life-Leisure-All.xml' ) ,(u'Life & Leisure' , u'http://feeds.business-standard.com/Life-Leisure-All.xml' )
,(u'Markets & Investing' , u'http://feeds.business-standard.com/Markets-Investing-All.xml' ) ,(u'Markets & Investing' , u'http://feeds.business-standard.com/Markets-Investing-All.xml' )
,(u'Management & Mktg' , u'http://feeds.business-standard.com/Management-Mktg-All.xml' ) ,(u'Management & Mktg' , u'http://feeds.business-standard.com/Management-Mktg-All.xml' )
,(u'Automobiles' , u'http://feeds.business-standard.com/Automobiles.xml' ) ,(u'Automobiles' , u'http://feeds.business-standard.com/Automobiles.xml' )
,(u'Aviation' , u'http://feeds.business-standard.com/Aviation.xml' ) ,(u'Aviation' , u'http://feeds.business-standard.com/Aviation.xml' )
] ]
def print_version(self, url): def print_version(self, url):
autono = url.rpartition('autono=')[2] autono = url.rpartition('autono=')[2]
tp = 'on' tp = 'on'
hk = url.rpartition('bKeyFlag=')[1] hk = url.rpartition('bKeyFlag=')[1]
if hk == '': if hk == '':
tp = '' tp = ''
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
def get_article_url(self, article): def get_article_url(self, article):
return article.get('guid', None) return article.get('guid', None)

View File

@ -1,73 +1,72 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
mondediplo.com mondediplo.com
''' '''
import re, urllib import urllib
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class LeMondeDiplomatiqueEn(BasicNewsRecipe):
class LeMondeDiplomatiqueEn(BasicNewsRecipe): title = 'Le Monde diplomatique - English edition'
title = 'Le Monde diplomatique - English edition' __author__ = 'Darko Miletic'
__author__ = 'Darko Miletic' description = 'Real journalism making sense of the world around us'
description = 'Real journalism making sense of the world around us' publisher = 'Le Monde diplomatique'
publisher = 'Le Monde diplomatique' category = 'news, politics, world'
category = 'news, politics, world' no_stylesheets = True
no_stylesheets = True oldest_article = 31
oldest_article = 31 delay = 1
delay = 1 encoding = 'utf-8'
encoding = 'utf-8' needs_subscription = True
needs_subscription = True PREFIX = 'http://mondediplo.com/'
PREFIX = 'http://mondediplo.com/' LOGIN = PREFIX + '2009/09/02congo'
LOGIN = PREFIX + '2009/09/02congo' INDEX = PREFIX + strftime('%Y/%m/')
INDEX = PREFIX + strftime('%Y/%m/') use_embedded_content = False
use_embedded_content = False language = 'en'
language = 'en'
conversion_options = {
conversion_options = { 'comment' : description
'comment' : description , 'tags' : category
, 'tags' : category , 'publisher' : publisher
, 'publisher' : publisher , 'language' : language
, 'language' : language }
}
def get_browser(self):
def get_browser(self): br = BasicNewsRecipe.get_browser()
br = BasicNewsRecipe.get_browser() br.open(self.LOGIN)
br.open(self.LOGIN) if self.username is not None and self.password is not None:
if self.username is not None and self.password is not None: data = urllib.urlencode({ 'login':self.username
data = urllib.urlencode({ 'login':self.username ,'pass':self.password
,'pass':self.password ,'enter':'enter'
,'enter':'enter' })
}) br.open(self.LOGIN,data)
br.open(self.LOGIN,data) return br
return br
keep_only_tags =[dict(name='div', attrs={'id':'contenu'})]
keep_only_tags =[dict(name='div', attrs={'id':'contenu'})] remove_tags = [dict(name=['object','link','script','iframe','base'])]
remove_tags = [dict(name=['object','link','script','iframe','base'])]
def parse_index(self):
def parse_index(self): articles = []
articles = [] soup = self.index_to_soup(self.INDEX)
soup = self.index_to_soup(self.INDEX) cnt = soup.find('div',attrs={'class':'som_num'})
cnt = soup.find('div',attrs={'class':'som_num'}) for item in cnt.findAll('li'):
for item in cnt.findAll('li'): description = ''
description = '' feed_link = item.find('a')
feed_link = item.find('a') desc = item.find('div',attrs={'class':'chapo'})
desc = item.find('div',attrs={'class':'chapo'}) if desc:
if desc: description = desc.string
description = desc.string if feed_link and feed_link.has_key('href'):
if feed_link and feed_link.has_key('href'): url = self.PREFIX + feed_link['href'].partition('/../')[2]
url = self.PREFIX + feed_link['href'].partition('/../')[2] title = self.tag_to_string(feed_link)
title = self.tag_to_string(feed_link) date = strftime(self.timefmt)
date = strftime(self.timefmt) articles.append({
articles.append({ 'title' :title
'title' :title ,'date' :date
,'date' :date ,'url' :url
,'url' :url ,'description':description
,'description':description })
}) return [(soup.head.title.string, articles)]
return [(soup.head.title.string, articles)]

View File

@ -16,7 +16,7 @@ class NYTimes(BasicNewsRecipe):
__author__ = 'GRiker' __author__ = 'GRiker'
language = _('English') language = _('English')
description = 'Top Stories from the New York Times' description = 'Top Stories from the New York Times'
# List of sections typically included in Top Stories. Use a keyword from the # List of sections typically included in Top Stories. Use a keyword from the
# right column in the excludeSectionKeywords[] list to skip downloading that section # right column in the excludeSectionKeywords[] list to skip downloading that section
sections = { sections = {
@ -39,7 +39,7 @@ class NYTimes(BasicNewsRecipe):
'world' : 'World' 'world' : 'World'
} }
# By default, no sections are skipped. # By default, no sections are skipped.
excludeSectionKeywords = [] excludeSectionKeywords = []
# Add section keywords from the right column above to skip that section # Add section keywords from the right column above to skip that section
@ -49,7 +49,7 @@ class NYTimes(BasicNewsRecipe):
# excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World'] # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
# Fetch only Top Stories # Fetch only Top Stories
# excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World'] # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
# The maximum number of articles that will be downloaded # The maximum number of articles that will be downloaded
max_articles_per_feed = 40 max_articles_per_feed = 40
@ -63,7 +63,7 @@ class NYTimes(BasicNewsRecipe):
dict(attrs={ 'id':['toolsRight','inlineBox','sidebarArticles', dict(attrs={ 'id':['toolsRight','inlineBox','sidebarArticles',
'portfolioInline','articleInline','readerscomment', 'portfolioInline','articleInline','readerscomment',
'nytRating']}) ] 'nytRating']}) ]
encoding = 'cp1252' encoding = 'cp1252'
no_stylesheets = True no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \ extra_css = '.headline {text-align: left;}\n \
@ -105,13 +105,13 @@ class NYTimes(BasicNewsRecipe):
_raw = url_or_raw _raw = url_or_raw
if raw: if raw:
return _raw return _raw
if not isinstance(_raw, unicode) and self.encoding: if not isinstance(_raw, unicode) and self.encoding:
_raw = _raw.decode(docEncoding, 'replace') _raw = _raw.decode(docEncoding, 'replace')
massage = list(BeautifulSoup.MARKUP_MASSAGE) massage = list(BeautifulSoup.MARKUP_MASSAGE)
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding))) massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
return BeautifulSoup(_raw, markupMassage=massage) return BeautifulSoup(_raw, markupMassage=massage)
# Entry point # Entry point
soup = get_the_soup( self.encoding, url_or_raw ) soup = get_the_soup( self.encoding, url_or_raw )
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'}) contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
@ -122,7 +122,7 @@ class NYTimes(BasicNewsRecipe):
if self.verbose > 2: if self.verbose > 2:
self.log( " document encoding: '%s'" % docEncoding) self.log( " document encoding: '%s'" % docEncoding)
if docEncoding != self.encoding : if docEncoding != self.encoding :
soup = get_the_soup(docEncoding, url_or_raw) soup = get_the_soup(docEncoding, url_or_raw)
return soup return soup
@ -133,7 +133,7 @@ class NYTimes(BasicNewsRecipe):
feed = key = 'All Top Stories' feed = key = 'All Top Stories'
articles[key] = [] articles[key] = []
ans.append(key) ans.append(key)
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/') soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
# Fetch the outer table # Fetch the outer table
@ -242,10 +242,10 @@ class NYTimes(BasicNewsRecipe):
if url == article['url'] : if url == article['url'] :
duplicateFound = True duplicateFound = True
break break
if duplicateFound: if duplicateFound:
# Continue fetching, don't add this article # Continue fetching, don't add this article
continue continue
if not articles.has_key(feed): if not articles.has_key(feed):
articles[feed] = [] articles[feed] = []
@ -254,7 +254,7 @@ class NYTimes(BasicNewsRecipe):
description=description, author=author, content='')) description=description, author=author, content=''))
ans = self.sort_index_by(ans, {'Top Stories':-1}) ans = self.sort_index_by(ans, {'Top Stories':-1})
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans
def strip_anchors(self,soup): def strip_anchors(self,soup):
@ -270,7 +270,7 @@ class NYTimes(BasicNewsRecipe):
# refresh = soup.find('meta', {'http-equiv':'refresh'}) # refresh = soup.find('meta', {'http-equiv':'refresh'})
# if refresh is None: # if refresh is None:
# return self.strip_anchors(soup) # return self.strip_anchors(soup)
# #
# content = refresh.get('content').partition('=')[2] # content = refresh.get('content').partition('=')[2]
# raw = self.browser.open('http://www.nytimes.com'+content).read() # raw = self.browser.open('http://www.nytimes.com'+content).read()
# soup = BeautifulSoup(raw.decode('cp1252', 'replace')) # soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
@ -280,7 +280,7 @@ class NYTimes(BasicNewsRecipe):
content = refresh.get('content').partition('=')[2] content = refresh.get('content').partition('=')[2]
raw = self.browser.open('http://www.nytimes.com'+content).read() raw = self.browser.open('http://www.nytimes.com'+content).read()
soup = BeautifulSoup(raw.decode('cp1252', 'replace')) soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
soup = self.strip_anchors(soup) soup = self.strip_anchors(soup)
# Test for empty content # Test for empty content
@ -291,7 +291,7 @@ class NYTimes(BasicNewsRecipe):
return soup return soup
else: else:
print "no allowed content found, removing article" print "no allowed content found, removing article"
raise StringError raise Exception()
def postprocess_html(self,soup, True): def postprocess_html(self,soup, True):
@ -334,7 +334,7 @@ class NYTimes(BasicNewsRecipe):
bTag = Tag(soup, "b") bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0]) bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag) subhead.replaceWith(bTag)
# Synthesize a section header # Synthesize a section header
dsk = soup.find('meta', attrs={'name':'dsk'}) dsk = soup.find('meta', attrs={'name':'dsk'})
if dsk is not None and dsk.has_key('content'): if dsk is not None and dsk.has_key('content'):
@ -343,12 +343,12 @@ class NYTimes(BasicNewsRecipe):
hTag.insert(0,NavigableString(dsk['content'])) hTag.insert(0,NavigableString(dsk['content']))
articleTag = soup.find(True, attrs={'id':'article'}) articleTag = soup.find(True, attrs={'id':'article'})
articleTag.insert(0,hTag) articleTag.insert(0,hTag)
# Add class="articleBody" to <div> so we can format with CSS # Add class="articleBody" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'articleBody'}) divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag is not None : if divTag is not None :
divTag['class'] = divTag['id'] divTag['class'] = divTag['id']
# Add class="authorId" to <div> so we can format with CSS # Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'}) divTag = soup.find('div',attrs={'id':'authorId'})
if divTag is not None : if divTag is not None :

View File

@ -1,51 +1,50 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.smashingmagazine.com www.smashingmagazine.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class SmashingMagazine(BasicNewsRecipe):
class SmashingMagazine(BasicNewsRecipe): title = 'Smashing Magazine'
title = 'Smashing Magazine' __author__ = 'Darko Miletic'
__author__ = 'Darko Miletic' description = 'We smash you with the information that will make your life easier, really'
description = 'We smash you with the information that will make your life easier, really' oldest_article = 20
oldest_article = 20 language = 'en'
language = 'en' max_articles_per_feed = 100
max_articles_per_feed = 100 no_stylesheets = True
no_stylesheets = True use_embedded_content = False
use_embedded_content = False publisher = 'Smashing Magazine'
publisher = 'Smashing Magazine' category = 'news, web, IT, css, javascript, html'
category = 'news, web, IT, css, javascript, html' encoding = 'utf-8'
encoding = 'utf-8'
conversion_options = {
conversion_options = { 'comments' : description
'comments' : description ,'tags' : category
,'tags' : category ,'publisher' : publisher
,'publisher' : publisher }
}
keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})] remove_tags_after = dict(name='ul',attrs={'class':'social'})
remove_tags_after = dict(name='ul',attrs={'class':'social'}) remove_tags = [
remove_tags = [ dict(name=['link','object'])
dict(name=['link','object']) ,dict(name='h1',attrs={'class':'logo'})
,dict(name='h1',attrs={'class':'logo'}) ,dict(name='div',attrs={'id':'booklogosec'})
,dict(name='div',attrs={'id':'booklogosec'}) ,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'}) ]
]
feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
def preprocess_html(self, soup):
def preprocess_html(self, soup): for iter in soup.findAll('div',attrs={'class':'leftframe'}):
for iter in soup.findAll('div',attrs={'class':'leftframe'}): it = iter.find('h1')
it = iter.find('h1') if it == None:
if it == None: iter.extract()
iter.extract() for item in soup.findAll('img'):
for item in soup.findAll('img'): oldParent = item.parent
oldParent = item.parent if oldParent.name == 'a':
if oldParent.name == 'a': oldParent.name = 'div'
oldParent.name = 'div' return soup
return soup

View File

@ -1,47 +1,47 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.thestar.com www.thestar.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TheTorontoStar(BasicNewsRecipe): class TheTorontoStar(BasicNewsRecipe):
title = 'The Toronto Star' title = 'The Toronto Star'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = "Canada's largest daily newspaper" description = "Canada's largest daily newspaper"
oldest_article = 2 oldest_article = 2
language = 'en_CA' language = 'en_CA'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
publisher = 'The Toronto Star' publisher = 'The Toronto Star'
category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson" category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson"
encoding = 'utf-8' encoding = 'utf-8'
extra_css = ' .headlineArticle{font-size: x-large; font-weight: bold} .navbar{text-align:center} ' extra_css = ' .headlineArticle{font-size: x-large; font-weight: bold} .navbar{text-align:center} '
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
,'tags' : category ,'tags' : category
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [dict(name='div', attrs={'id':'AssetWebPart1'})] keep_only_tags = [dict(name='div', attrs={'id':'AssetWebPart1'})]
remove_attributes= ['style'] remove_attributes= ['style']
feeds = [ feeds = [
(u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) (u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' ) ,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' )
,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' ) ,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' )
,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' ) ,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' )
,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) ,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) ,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' ) ,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' )
,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300') ,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300')
] ]
def print_version(self, url): def print_version(self, url):
return url.replace('/article/','/printArticle/') return url.replace('/article/','/printArticle/')