mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Easier development environment using the binary build on windows
This commit is contained in:
parent
0c678d9f05
commit
c9aeb3fd09
41
setup/gui.py
41
setup/gui.py
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, cStringIO, re
|
import os
|
||||||
|
|
||||||
from setup import Command, __appname__
|
from setup import Command, __appname__
|
||||||
|
|
||||||
@ -17,6 +17,8 @@ class GUI(Command):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def find_forms(cls):
|
def find_forms(cls):
|
||||||
|
from calibre.gui2 import find_forms
|
||||||
|
return find_forms(cls.SRC)
|
||||||
forms = []
|
forms = []
|
||||||
for root, _, files in os.walk(cls.PATH):
|
for root, _, files in os.walk(cls.PATH):
|
||||||
for name in files:
|
for name in files:
|
||||||
@ -27,7 +29,8 @@ class GUI(Command):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def form_to_compiled_form(cls, form):
|
def form_to_compiled_form(cls, form):
|
||||||
return form.rpartition('.')[0]+'_ui.py'
|
from calibre.gui2 import form_to_compiled_form
|
||||||
|
return form_to_compiled_form(form)
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
self.build_forms()
|
self.build_forms()
|
||||||
@ -53,38 +56,8 @@ class GUI(Command):
|
|||||||
|
|
||||||
|
|
||||||
def build_forms(self):
|
def build_forms(self):
|
||||||
from PyQt4.uic import compileUi
|
from calibre.gui2 import build_forms
|
||||||
forms = self.find_forms()
|
build_forms(self.SRC, info=self.info)
|
||||||
pat = re.compile(r'''(['"]):/images/([^'"]+)\1''')
|
|
||||||
def sub(match):
|
|
||||||
ans = 'I(%s%s%s)'%(match.group(1), match.group(2), match.group(1))
|
|
||||||
return ans
|
|
||||||
|
|
||||||
for form in forms:
|
|
||||||
compiled_form = self.form_to_compiled_form(form)
|
|
||||||
if not os.path.exists(compiled_form) or os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
|
|
||||||
self.info('\tCompiling form', form)
|
|
||||||
buf = cStringIO.StringIO()
|
|
||||||
compileUi(form, buf)
|
|
||||||
dat = buf.getvalue()
|
|
||||||
dat = dat.replace('__appname__', __appname__)
|
|
||||||
dat = dat.replace('import images_rc', '')
|
|
||||||
dat = dat.replace('from library import', 'from calibre.gui2.library import')
|
|
||||||
dat = dat.replace('from widgets import', 'from calibre.gui2.widgets import')
|
|
||||||
dat = dat.replace('from convert.xpath_wizard import',
|
|
||||||
'from calibre.gui2.convert.xpath_wizard import')
|
|
||||||
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
|
|
||||||
dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
|
|
||||||
dat = pat.sub(sub, dat)
|
|
||||||
|
|
||||||
if form.endswith('viewer%smain.ui'%os.sep):
|
|
||||||
self.info('\t\tPromoting WebView')
|
|
||||||
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
|
|
||||||
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
|
|
||||||
dat += '\nQtWebKit'
|
|
||||||
|
|
||||||
open(compiled_form, 'wb').write(dat)
|
|
||||||
|
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
forms = self.find_forms()
|
forms = self.find_forms()
|
||||||
|
@ -121,6 +121,9 @@ base = os.path.dirname(sys.executable.decode(fenc))
|
|||||||
sys.resources_location = os.path.join(base, 'resources')
|
sys.resources_location = os.path.join(base, 'resources')
|
||||||
sys.extensions_location = os.path.join(base, 'plugins')
|
sys.extensions_location = os.path.join(base, 'plugins')
|
||||||
|
|
||||||
|
dv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
|
||||||
|
if dv and os.path.exists(dv):
|
||||||
|
sys.path.insert(0, os.path.abspath(dv))
|
||||||
|
|
||||||
del sys
|
del sys
|
||||||
'''
|
'''
|
||||||
@ -278,7 +281,9 @@ def main(args=sys.argv):
|
|||||||
'packages' : ['PIL', 'lxml', 'cherrypy',
|
'packages' : ['PIL', 'lxml', 'cherrypy',
|
||||||
'dateutil', 'dns'],
|
'dateutil', 'dns'],
|
||||||
'excludes' : ["Tkconstants", "Tkinter", "tcl",
|
'excludes' : ["Tkconstants", "Tkinter", "tcl",
|
||||||
"_imagingtk", "ImageTk", "FixTk"
|
"_imagingtk", "ImageTk",
|
||||||
|
"FixTk",
|
||||||
|
'PyQt4.uic.port_v3.proxy_base'
|
||||||
],
|
],
|
||||||
'dll_excludes' : ['mswsock.dll', 'tcl85.dll',
|
'dll_excludes' : ['mswsock.dll', 'tcl85.dll',
|
||||||
'tk85.dll'],
|
'tk85.dll'],
|
||||||
|
@ -6,10 +6,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
Embedded console for debugging.
|
Embedded console for debugging.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, os, re, shutil
|
import sys, os
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.constants import iswindows, isosx
|
from calibre.constants import iswindows, isosx
|
||||||
from calibre.libunzip import update
|
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
@ -18,11 +17,6 @@ def option_parser():
|
|||||||
|
|
||||||
Run an embedded python interpreter.
|
Run an embedded python interpreter.
|
||||||
''')
|
''')
|
||||||
parser.add_option('-u', '--update-module', default=False,
|
|
||||||
action='store_true',
|
|
||||||
help='Update the specified module in the frozen library. '+
|
|
||||||
'Module specifications are of the form full.name.of.module path_to_module.py',
|
|
||||||
)
|
|
||||||
parser.add_option('-c', '--command', help='Run python code.', default=None)
|
parser.add_option('-c', '--command', help='Run python code.', default=None)
|
||||||
parser.add_option('-e', '--exec-file', default=None, help='Run the python code in file.')
|
parser.add_option('-e', '--exec-file', default=None, help='Run the python code in file.')
|
||||||
parser.add_option('-d', '--debug-device-driver', default=False, action='store_true',
|
parser.add_option('-d', '--debug-device-driver', default=False, action='store_true',
|
||||||
@ -41,39 +35,17 @@ Run an embedded python interpreter.
|
|||||||
parser.add_option('--pdfreflow', default=None,
|
parser.add_option('--pdfreflow', default=None,
|
||||||
help='Path to PDF file to try and reflow. Output will be placed in '
|
help='Path to PDF file to try and reflow. Output will be placed in '
|
||||||
'current directory. ')
|
'current directory. ')
|
||||||
|
parser.add_option('-f', '--develop-from', default=None,
|
||||||
|
help=('Develop calibre from the specified path. '
|
||||||
|
'The path should point to the src sub-directory in the '
|
||||||
|
'calibre source tree.'))
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def update_zipfile(zipfile, mod, path):
|
def develop_from(path):
|
||||||
if 'win32' in sys.platform:
|
from calibre.gui2 import build_forms
|
||||||
print 'WARNING: On Windows Vista using this option may cause windows to put library.zip into the Virtual Store (typically located in c:\Users\username\AppData\Local\VirtualStore). If it does this you must delete it from there after you\'re done debugging).'
|
print 'Compiling .ui forms...'
|
||||||
pat = re.compile(mod.replace('.', '/')+r'\.py[co]*')
|
build_forms(path)
|
||||||
name = mod.replace('.', '/') + os.path.splitext(path)[-1]
|
|
||||||
update(zipfile, [pat], [path], [name])
|
|
||||||
|
|
||||||
def update_site_packages(sp, mod, path):
|
|
||||||
dest = os.path.join(sp, *mod.split('.'))+'.py'
|
|
||||||
shutil.copy2(path, dest)
|
|
||||||
|
|
||||||
def update_module(mod, path):
|
|
||||||
if not hasattr(sys, 'frozen'):
|
|
||||||
raise RuntimeError('Modules can only be updated in frozen installs.')
|
|
||||||
zp = None
|
|
||||||
if iswindows:
|
|
||||||
zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
|
|
||||||
elif getattr(sys, 'new_app_bundle', False):
|
|
||||||
update_site_packages(sys.site_packages, mod, path)
|
|
||||||
elif isosx:
|
|
||||||
zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
|
|
||||||
'Resources', 'lib',
|
|
||||||
'python'+'.'.join(map(str, sys.version_info[:2])),
|
|
||||||
'site-packages.zip')
|
|
||||||
else:
|
|
||||||
zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
|
|
||||||
if zp is not None:
|
|
||||||
update_zipfile(zp, mod, path)
|
|
||||||
else:
|
|
||||||
raise ValueError('Updating modules is not supported on this platform.')
|
|
||||||
|
|
||||||
def migrate(old, new):
|
def migrate(old, new):
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs
|
||||||
@ -189,9 +161,6 @@ def main(args=sys.argv):
|
|||||||
if opts.gui:
|
if opts.gui:
|
||||||
from calibre.gui2.main import main
|
from calibre.gui2.main import main
|
||||||
main(['calibre'])
|
main(['calibre'])
|
||||||
elif opts.update_module:
|
|
||||||
mod, path = args[1:3]
|
|
||||||
update_module(mod, os.path.expanduser(path))
|
|
||||||
elif opts.command:
|
elif opts.command:
|
||||||
sys.argv = args[:1]
|
sys.argv = args[:1]
|
||||||
exec opts.command
|
exec opts.command
|
||||||
@ -218,6 +187,8 @@ def main(args=sys.argv):
|
|||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
opts2, args = px().parse_args(['xxxx', '-vvvv', opts.pdfreflow])
|
opts2, args = px().parse_args(['xxxx', '-vvvv', opts.pdfreflow])
|
||||||
run(opts2, opts.pdfreflow, default_log)
|
run(opts2, opts.pdfreflow, default_log)
|
||||||
|
elif opts.develop_from is not None:
|
||||||
|
develop_from(opts.develop_from)
|
||||||
else:
|
else:
|
||||||
from IPython.Shell import IPShellEmbed
|
from IPython.Shell import IPShellEmbed
|
||||||
ipshell = IPShellEmbed()
|
ipshell = IPShellEmbed()
|
||||||
|
@ -525,3 +525,53 @@ def is_ok_to_use_qt():
|
|||||||
gui_thread = QThread.currentThread()
|
gui_thread = QThread.currentThread()
|
||||||
return gui_thread is QThread.currentThread()
|
return gui_thread is QThread.currentThread()
|
||||||
|
|
||||||
|
def find_forms(srcdir):
|
||||||
|
base = os.path.join(srcdir, 'calibre', 'gui2')
|
||||||
|
forms = []
|
||||||
|
for root, _, files in os.walk(base):
|
||||||
|
for name in files:
|
||||||
|
if name.endswith('.ui'):
|
||||||
|
forms.append(os.path.abspath(os.path.join(root, name)))
|
||||||
|
|
||||||
|
return forms
|
||||||
|
|
||||||
|
def form_to_compiled_form(form):
|
||||||
|
return form.rpartition('.')[0]+'_ui.py'
|
||||||
|
|
||||||
|
def build_forms(srcdir, info=None):
|
||||||
|
import re, cStringIO
|
||||||
|
from PyQt4.uic import compileUi
|
||||||
|
forms = find_forms(srcdir)
|
||||||
|
if info is None:
|
||||||
|
from calibre import prints
|
||||||
|
info = prints
|
||||||
|
pat = re.compile(r'''(['"]):/images/([^'"]+)\1''')
|
||||||
|
def sub(match):
|
||||||
|
ans = 'I(%s%s%s)'%(match.group(1), match.group(2), match.group(1))
|
||||||
|
return ans
|
||||||
|
|
||||||
|
for form in forms:
|
||||||
|
compiled_form = form_to_compiled_form(form)
|
||||||
|
if not os.path.exists(compiled_form) or os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
|
||||||
|
info('\tCompiling form', form)
|
||||||
|
buf = cStringIO.StringIO()
|
||||||
|
compileUi(form, buf)
|
||||||
|
dat = buf.getvalue()
|
||||||
|
dat = dat.replace('__appname__', 'calibre')
|
||||||
|
dat = dat.replace('import images_rc', '')
|
||||||
|
dat = dat.replace('from library import', 'from calibre.gui2.library import')
|
||||||
|
dat = dat.replace('from widgets import', 'from calibre.gui2.widgets import')
|
||||||
|
dat = dat.replace('from convert.xpath_wizard import',
|
||||||
|
'from calibre.gui2.convert.xpath_wizard import')
|
||||||
|
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
|
||||||
|
dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
|
||||||
|
dat = pat.sub(sub, dat)
|
||||||
|
|
||||||
|
if form.endswith('viewer%smain.ui'%os.sep):
|
||||||
|
info('\t\tPromoting WebView')
|
||||||
|
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
|
||||||
|
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
|
||||||
|
dat += '\nQtWebKit'
|
||||||
|
|
||||||
|
open(compiled_form, 'wb').write(dat)
|
||||||
|
|
||||||
|
@ -1,57 +1,57 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.business-standard.com
|
www.business-standard.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class BusinessStandard(BasicNewsRecipe):
|
class BusinessStandard(BasicNewsRecipe):
|
||||||
title = 'Business Standard'
|
title = 'Business Standard'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "India's most respected business daily"
|
description = "India's most respected business daily"
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
publisher = 'Business Standard Limited'
|
publisher = 'Business Standard Limited'
|
||||||
category = 'news, business, money, india, world'
|
category = 'news, business, money, india, world'
|
||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
,'language' : language
|
,'language' : language
|
||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
,'linearize_tables': True
|
,'linearize_tables': True
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_attributes=['style']
|
remove_attributes=['style']
|
||||||
remove_tags = [dict(name=['object','link','script','iframe'])]
|
remove_tags = [dict(name=['object','link','script','iframe'])]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News Now' , u'http://feeds.business-standard.com/News-Now.xml' )
|
(u'News Now' , u'http://feeds.business-standard.com/News-Now.xml' )
|
||||||
,(u'Banking & finance' , u'http://feeds.business-standard.com/Banking-Finance-All.xml' )
|
,(u'Banking & finance' , u'http://feeds.business-standard.com/Banking-Finance-All.xml' )
|
||||||
,(u'Companies & Industry', u'http://feeds.business-standard.com/Companies-Industry-All.xml')
|
,(u'Companies & Industry', u'http://feeds.business-standard.com/Companies-Industry-All.xml')
|
||||||
,(u'Economy & Policy' , u'http://feeds.business-standard.com/Economy-Policy-All.xml' )
|
,(u'Economy & Policy' , u'http://feeds.business-standard.com/Economy-Policy-All.xml' )
|
||||||
,(u'Tech World' , u'http://feeds.business-standard.com/Tech-World-All.xml' )
|
,(u'Tech World' , u'http://feeds.business-standard.com/Tech-World-All.xml' )
|
||||||
,(u'Life & Leisure' , u'http://feeds.business-standard.com/Life-Leisure-All.xml' )
|
,(u'Life & Leisure' , u'http://feeds.business-standard.com/Life-Leisure-All.xml' )
|
||||||
,(u'Markets & Investing' , u'http://feeds.business-standard.com/Markets-Investing-All.xml' )
|
,(u'Markets & Investing' , u'http://feeds.business-standard.com/Markets-Investing-All.xml' )
|
||||||
,(u'Management & Mktg' , u'http://feeds.business-standard.com/Management-Mktg-All.xml' )
|
,(u'Management & Mktg' , u'http://feeds.business-standard.com/Management-Mktg-All.xml' )
|
||||||
,(u'Automobiles' , u'http://feeds.business-standard.com/Automobiles.xml' )
|
,(u'Automobiles' , u'http://feeds.business-standard.com/Automobiles.xml' )
|
||||||
,(u'Aviation' , u'http://feeds.business-standard.com/Aviation.xml' )
|
,(u'Aviation' , u'http://feeds.business-standard.com/Aviation.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
autono = url.rpartition('autono=')[2]
|
autono = url.rpartition('autono=')[2]
|
||||||
tp = 'on'
|
tp = 'on'
|
||||||
hk = url.rpartition('bKeyFlag=')[1]
|
hk = url.rpartition('bKeyFlag=')[1]
|
||||||
if hk == '':
|
if hk == '':
|
||||||
tp = ''
|
tp = ''
|
||||||
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
|
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.get('guid', None)
|
return article.get('guid', None)
|
||||||
|
@ -1,73 +1,72 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
mondediplo.com
|
mondediplo.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re, urllib
|
import urllib
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
||||||
class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
title = 'Le Monde diplomatique - English edition'
|
||||||
title = 'Le Monde diplomatique - English edition'
|
__author__ = 'Darko Miletic'
|
||||||
__author__ = 'Darko Miletic'
|
description = 'Real journalism making sense of the world around us'
|
||||||
description = 'Real journalism making sense of the world around us'
|
publisher = 'Le Monde diplomatique'
|
||||||
publisher = 'Le Monde diplomatique'
|
category = 'news, politics, world'
|
||||||
category = 'news, politics, world'
|
no_stylesheets = True
|
||||||
no_stylesheets = True
|
oldest_article = 31
|
||||||
oldest_article = 31
|
delay = 1
|
||||||
delay = 1
|
encoding = 'utf-8'
|
||||||
encoding = 'utf-8'
|
needs_subscription = True
|
||||||
needs_subscription = True
|
PREFIX = 'http://mondediplo.com/'
|
||||||
PREFIX = 'http://mondediplo.com/'
|
LOGIN = PREFIX + '2009/09/02congo'
|
||||||
LOGIN = PREFIX + '2009/09/02congo'
|
INDEX = PREFIX + strftime('%Y/%m/')
|
||||||
INDEX = PREFIX + strftime('%Y/%m/')
|
use_embedded_content = False
|
||||||
use_embedded_content = False
|
language = 'en'
|
||||||
language = 'en'
|
|
||||||
|
conversion_options = {
|
||||||
conversion_options = {
|
'comment' : description
|
||||||
'comment' : description
|
, 'tags' : category
|
||||||
, 'tags' : category
|
, 'publisher' : publisher
|
||||||
, 'publisher' : publisher
|
, 'language' : language
|
||||||
, 'language' : language
|
}
|
||||||
}
|
|
||||||
|
def get_browser(self):
|
||||||
def get_browser(self):
|
br = BasicNewsRecipe.get_browser()
|
||||||
br = BasicNewsRecipe.get_browser()
|
br.open(self.LOGIN)
|
||||||
br.open(self.LOGIN)
|
if self.username is not None and self.password is not None:
|
||||||
if self.username is not None and self.password is not None:
|
data = urllib.urlencode({ 'login':self.username
|
||||||
data = urllib.urlencode({ 'login':self.username
|
,'pass':self.password
|
||||||
,'pass':self.password
|
,'enter':'enter'
|
||||||
,'enter':'enter'
|
})
|
||||||
})
|
br.open(self.LOGIN,data)
|
||||||
br.open(self.LOGIN,data)
|
return br
|
||||||
return br
|
|
||||||
|
keep_only_tags =[dict(name='div', attrs={'id':'contenu'})]
|
||||||
keep_only_tags =[dict(name='div', attrs={'id':'contenu'})]
|
remove_tags = [dict(name=['object','link','script','iframe','base'])]
|
||||||
remove_tags = [dict(name=['object','link','script','iframe','base'])]
|
|
||||||
|
def parse_index(self):
|
||||||
def parse_index(self):
|
articles = []
|
||||||
articles = []
|
soup = self.index_to_soup(self.INDEX)
|
||||||
soup = self.index_to_soup(self.INDEX)
|
cnt = soup.find('div',attrs={'class':'som_num'})
|
||||||
cnt = soup.find('div',attrs={'class':'som_num'})
|
for item in cnt.findAll('li'):
|
||||||
for item in cnt.findAll('li'):
|
description = ''
|
||||||
description = ''
|
feed_link = item.find('a')
|
||||||
feed_link = item.find('a')
|
desc = item.find('div',attrs={'class':'chapo'})
|
||||||
desc = item.find('div',attrs={'class':'chapo'})
|
if desc:
|
||||||
if desc:
|
description = desc.string
|
||||||
description = desc.string
|
if feed_link and feed_link.has_key('href'):
|
||||||
if feed_link and feed_link.has_key('href'):
|
url = self.PREFIX + feed_link['href'].partition('/../')[2]
|
||||||
url = self.PREFIX + feed_link['href'].partition('/../')[2]
|
title = self.tag_to_string(feed_link)
|
||||||
title = self.tag_to_string(feed_link)
|
date = strftime(self.timefmt)
|
||||||
date = strftime(self.timefmt)
|
articles.append({
|
||||||
articles.append({
|
'title' :title
|
||||||
'title' :title
|
,'date' :date
|
||||||
,'date' :date
|
,'url' :url
|
||||||
,'url' :url
|
,'description':description
|
||||||
,'description':description
|
})
|
||||||
})
|
return [(soup.head.title.string, articles)]
|
||||||
return [(soup.head.title.string, articles)]
|
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
__author__ = 'GRiker'
|
__author__ = 'GRiker'
|
||||||
language = _('English')
|
language = _('English')
|
||||||
description = 'Top Stories from the New York Times'
|
description = 'Top Stories from the New York Times'
|
||||||
|
|
||||||
# List of sections typically included in Top Stories. Use a keyword from the
|
# List of sections typically included in Top Stories. Use a keyword from the
|
||||||
# right column in the excludeSectionKeywords[] list to skip downloading that section
|
# right column in the excludeSectionKeywords[] list to skip downloading that section
|
||||||
sections = {
|
sections = {
|
||||||
@ -39,7 +39,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'world' : 'World'
|
'world' : 'World'
|
||||||
}
|
}
|
||||||
|
|
||||||
# By default, no sections are skipped.
|
# By default, no sections are skipped.
|
||||||
excludeSectionKeywords = []
|
excludeSectionKeywords = []
|
||||||
|
|
||||||
# Add section keywords from the right column above to skip that section
|
# Add section keywords from the right column above to skip that section
|
||||||
@ -49,7 +49,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
|
# excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
|
||||||
# Fetch only Top Stories
|
# Fetch only Top Stories
|
||||||
# excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
|
# excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
|
||||||
|
|
||||||
# The maximum number of articles that will be downloaded
|
# The maximum number of articles that will be downloaded
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
|
|
||||||
@ -63,7 +63,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
dict(attrs={ 'id':['toolsRight','inlineBox','sidebarArticles',
|
dict(attrs={ 'id':['toolsRight','inlineBox','sidebarArticles',
|
||||||
'portfolioInline','articleInline','readerscomment',
|
'portfolioInline','articleInline','readerscomment',
|
||||||
'nytRating']}) ]
|
'nytRating']}) ]
|
||||||
|
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '.headline {text-align: left;}\n \
|
extra_css = '.headline {text-align: left;}\n \
|
||||||
@ -105,13 +105,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
_raw = url_or_raw
|
_raw = url_or_raw
|
||||||
if raw:
|
if raw:
|
||||||
return _raw
|
return _raw
|
||||||
|
|
||||||
if not isinstance(_raw, unicode) and self.encoding:
|
if not isinstance(_raw, unicode) and self.encoding:
|
||||||
_raw = _raw.decode(docEncoding, 'replace')
|
_raw = _raw.decode(docEncoding, 'replace')
|
||||||
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||||
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
||||||
return BeautifulSoup(_raw, markupMassage=massage)
|
return BeautifulSoup(_raw, markupMassage=massage)
|
||||||
|
|
||||||
# Entry point
|
# Entry point
|
||||||
soup = get_the_soup( self.encoding, url_or_raw )
|
soup = get_the_soup( self.encoding, url_or_raw )
|
||||||
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
||||||
@ -122,7 +122,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if self.verbose > 2:
|
if self.verbose > 2:
|
||||||
self.log( " document encoding: '%s'" % docEncoding)
|
self.log( " document encoding: '%s'" % docEncoding)
|
||||||
if docEncoding != self.encoding :
|
if docEncoding != self.encoding :
|
||||||
soup = get_the_soup(docEncoding, url_or_raw)
|
soup = get_the_soup(docEncoding, url_or_raw)
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
@ -133,7 +133,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
feed = key = 'All Top Stories'
|
feed = key = 'All Top Stories'
|
||||||
articles[key] = []
|
articles[key] = []
|
||||||
ans.append(key)
|
ans.append(key)
|
||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
# Fetch the outer table
|
# Fetch the outer table
|
||||||
@ -242,10 +242,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if url == article['url'] :
|
if url == article['url'] :
|
||||||
duplicateFound = True
|
duplicateFound = True
|
||||||
break
|
break
|
||||||
|
|
||||||
if duplicateFound:
|
if duplicateFound:
|
||||||
# Continue fetching, don't add this article
|
# Continue fetching, don't add this article
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not articles.has_key(feed):
|
if not articles.has_key(feed):
|
||||||
articles[feed] = []
|
articles[feed] = []
|
||||||
@ -254,7 +254,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
description=description, author=author, content=''))
|
description=description, author=author, content=''))
|
||||||
|
|
||||||
ans = self.sort_index_by(ans, {'Top Stories':-1})
|
ans = self.sort_index_by(ans, {'Top Stories':-1})
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
def strip_anchors(self,soup):
|
||||||
@ -270,7 +270,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# refresh = soup.find('meta', {'http-equiv':'refresh'})
|
# refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||||
# if refresh is None:
|
# if refresh is None:
|
||||||
# return self.strip_anchors(soup)
|
# return self.strip_anchors(soup)
|
||||||
#
|
#
|
||||||
# content = refresh.get('content').partition('=')[2]
|
# content = refresh.get('content').partition('=')[2]
|
||||||
# raw = self.browser.open('http://www.nytimes.com'+content).read()
|
# raw = self.browser.open('http://www.nytimes.com'+content).read()
|
||||||
# soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
|
# soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||||
@ -280,7 +280,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
content = refresh.get('content').partition('=')[2]
|
content = refresh.get('content').partition('=')[2]
|
||||||
raw = self.browser.open('http://www.nytimes.com'+content).read()
|
raw = self.browser.open('http://www.nytimes.com'+content).read()
|
||||||
soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
|
soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||||
|
|
||||||
soup = self.strip_anchors(soup)
|
soup = self.strip_anchors(soup)
|
||||||
|
|
||||||
# Test for empty content
|
# Test for empty content
|
||||||
@ -291,7 +291,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
else:
|
else:
|
||||||
print "no allowed content found, removing article"
|
print "no allowed content found, removing article"
|
||||||
raise StringError
|
raise Exception()
|
||||||
|
|
||||||
def postprocess_html(self,soup, True):
|
def postprocess_html(self,soup, True):
|
||||||
|
|
||||||
@ -334,7 +334,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
bTag = Tag(soup, "b")
|
bTag = Tag(soup, "b")
|
||||||
bTag.insert(0, subhead.contents[0])
|
bTag.insert(0, subhead.contents[0])
|
||||||
subhead.replaceWith(bTag)
|
subhead.replaceWith(bTag)
|
||||||
|
|
||||||
# Synthesize a section header
|
# Synthesize a section header
|
||||||
dsk = soup.find('meta', attrs={'name':'dsk'})
|
dsk = soup.find('meta', attrs={'name':'dsk'})
|
||||||
if dsk is not None and dsk.has_key('content'):
|
if dsk is not None and dsk.has_key('content'):
|
||||||
@ -343,12 +343,12 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
hTag.insert(0,NavigableString(dsk['content']))
|
hTag.insert(0,NavigableString(dsk['content']))
|
||||||
articleTag = soup.find(True, attrs={'id':'article'})
|
articleTag = soup.find(True, attrs={'id':'article'})
|
||||||
articleTag.insert(0,hTag)
|
articleTag.insert(0,hTag)
|
||||||
|
|
||||||
# Add class="articleBody" to <div> so we can format with CSS
|
# Add class="articleBody" to <div> so we can format with CSS
|
||||||
divTag = soup.find('div',attrs={'id':'articleBody'})
|
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||||
if divTag is not None :
|
if divTag is not None :
|
||||||
divTag['class'] = divTag['id']
|
divTag['class'] = divTag['id']
|
||||||
|
|
||||||
# Add class="authorId" to <div> so we can format with CSS
|
# Add class="authorId" to <div> so we can format with CSS
|
||||||
divTag = soup.find('div',attrs={'id':'authorId'})
|
divTag = soup.find('div',attrs={'id':'authorId'})
|
||||||
if divTag is not None :
|
if divTag is not None :
|
||||||
|
@ -1,51 +1,50 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.smashingmagazine.com
|
www.smashingmagazine.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
class SmashingMagazine(BasicNewsRecipe):
|
||||||
class SmashingMagazine(BasicNewsRecipe):
|
title = 'Smashing Magazine'
|
||||||
title = 'Smashing Magazine'
|
__author__ = 'Darko Miletic'
|
||||||
__author__ = 'Darko Miletic'
|
description = 'We smash you with the information that will make your life easier, really'
|
||||||
description = 'We smash you with the information that will make your life easier, really'
|
oldest_article = 20
|
||||||
oldest_article = 20
|
language = 'en'
|
||||||
language = 'en'
|
max_articles_per_feed = 100
|
||||||
max_articles_per_feed = 100
|
no_stylesheets = True
|
||||||
no_stylesheets = True
|
use_embedded_content = False
|
||||||
use_embedded_content = False
|
publisher = 'Smashing Magazine'
|
||||||
publisher = 'Smashing Magazine'
|
category = 'news, web, IT, css, javascript, html'
|
||||||
category = 'news, web, IT, css, javascript, html'
|
encoding = 'utf-8'
|
||||||
encoding = 'utf-8'
|
|
||||||
|
conversion_options = {
|
||||||
conversion_options = {
|
'comments' : description
|
||||||
'comments' : description
|
,'tags' : category
|
||||||
,'tags' : category
|
,'publisher' : publisher
|
||||||
,'publisher' : publisher
|
}
|
||||||
}
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
|
remove_tags_after = dict(name='ul',attrs={'class':'social'})
|
||||||
remove_tags_after = dict(name='ul',attrs={'class':'social'})
|
remove_tags = [
|
||||||
remove_tags = [
|
dict(name=['link','object'])
|
||||||
dict(name=['link','object'])
|
,dict(name='h1',attrs={'class':'logo'})
|
||||||
,dict(name='h1',attrs={'class':'logo'})
|
,dict(name='div',attrs={'id':'booklogosec'})
|
||||||
,dict(name='div',attrs={'id':'booklogosec'})
|
,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
|
||||||
,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
|
]
|
||||||
]
|
|
||||||
|
feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
|
||||||
feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
def preprocess_html(self, soup):
|
for iter in soup.findAll('div',attrs={'class':'leftframe'}):
|
||||||
for iter in soup.findAll('div',attrs={'class':'leftframe'}):
|
it = iter.find('h1')
|
||||||
it = iter.find('h1')
|
if it == None:
|
||||||
if it == None:
|
iter.extract()
|
||||||
iter.extract()
|
for item in soup.findAll('img'):
|
||||||
for item in soup.findAll('img'):
|
oldParent = item.parent
|
||||||
oldParent = item.parent
|
if oldParent.name == 'a':
|
||||||
if oldParent.name == 'a':
|
oldParent.name = 'div'
|
||||||
oldParent.name = 'div'
|
return soup
|
||||||
return soup
|
|
||||||
|
@ -1,47 +1,47 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.thestar.com
|
www.thestar.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class TheTorontoStar(BasicNewsRecipe):
|
class TheTorontoStar(BasicNewsRecipe):
|
||||||
title = 'The Toronto Star'
|
title = 'The Toronto Star'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "Canada's largest daily newspaper"
|
description = "Canada's largest daily newspaper"
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
publisher = 'The Toronto Star'
|
publisher = 'The Toronto Star'
|
||||||
category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson"
|
category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson"
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
extra_css = ' .headlineArticle{font-size: x-large; font-weight: bold} .navbar{text-align:center} '
|
extra_css = ' .headlineArticle{font-size: x-large; font-weight: bold} .navbar{text-align:center} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'AssetWebPart1'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'AssetWebPart1'})]
|
||||||
remove_attributes= ['style']
|
remove_attributes= ['style']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
|
(u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
|
||||||
,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' )
|
,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' )
|
||||||
,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' )
|
,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' )
|
||||||
,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' )
|
,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' )
|
||||||
,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
|
,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
|
||||||
,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
|
,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' )
|
||||||
,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' )
|
,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' )
|
||||||
,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300')
|
,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300')
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/article/','/printArticle/')
|
return url.replace('/article/','/printArticle/')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user