diff --git a/setup/gui.py b/setup/gui.py index a73c3466e3..058a3f052f 100644 --- a/setup/gui.py +++ b/setup/gui.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, cStringIO, re +import os from setup import Command, __appname__ @@ -17,6 +17,8 @@ class GUI(Command): @classmethod def find_forms(cls): + from calibre.gui2 import find_forms + return find_forms(cls.SRC) forms = [] for root, _, files in os.walk(cls.PATH): for name in files: @@ -27,7 +29,8 @@ class GUI(Command): @classmethod def form_to_compiled_form(cls, form): - return form.rpartition('.')[0]+'_ui.py' + from calibre.gui2 import form_to_compiled_form + return form_to_compiled_form(form) def run(self, opts): self.build_forms() @@ -53,38 +56,8 @@ class GUI(Command): def build_forms(self): - from PyQt4.uic import compileUi - forms = self.find_forms() - pat = re.compile(r'''(['"]):/images/([^'"]+)\1''') - def sub(match): - ans = 'I(%s%s%s)'%(match.group(1), match.group(2), match.group(1)) - return ans - - for form in forms: - compiled_form = self.form_to_compiled_form(form) - if not os.path.exists(compiled_form) or os.stat(form).st_mtime > os.stat(compiled_form).st_mtime: - self.info('\tCompiling form', form) - buf = cStringIO.StringIO() - compileUi(form, buf) - dat = buf.getvalue() - dat = dat.replace('__appname__', __appname__) - dat = dat.replace('import images_rc', '') - dat = dat.replace('from library import', 'from calibre.gui2.library import') - dat = dat.replace('from widgets import', 'from calibre.gui2.widgets import') - dat = dat.replace('from convert.xpath_wizard import', - 'from calibre.gui2.convert.xpath_wizard import') - dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?' Embedded console for debugging. ''' -import sys, os, re, shutil +import sys, os from calibre.utils.config import OptionParser from calibre.constants import iswindows, isosx -from calibre.libunzip import update from calibre import prints def option_parser(): @@ -18,11 +17,6 @@ def option_parser(): Run an embedded python interpreter. ''') - parser.add_option('-u', '--update-module', default=False, - action='store_true', - help='Update the specified module in the frozen library. '+ - 'Module specifications are of the form full.name.of.module path_to_module.py', - ) parser.add_option('-c', '--command', help='Run python code.', default=None) parser.add_option('-e', '--exec-file', default=None, help='Run the python code in file.') parser.add_option('-d', '--debug-device-driver', default=False, action='store_true', @@ -41,39 +35,17 @@ Run an embedded python interpreter. parser.add_option('--pdfreflow', default=None, help='Path to PDF file to try and reflow. Output will be placed in ' 'current directory. ') + parser.add_option('-f', '--develop-from', default=None, + help=('Develop calibre from the specified path. ' + 'The path should point to the src sub-directory in the ' + 'calibre source tree.')) return parser -def update_zipfile(zipfile, mod, path): - if 'win32' in sys.platform: - print 'WARNING: On Windows Vista using this option may cause windows to put library.zip into the Virtual Store (typically located in c:\Users\username\AppData\Local\VirtualStore). If it does this you must delete it from there after you\'re done debugging).' - pat = re.compile(mod.replace('.', '/')+r'\.py[co]*') - name = mod.replace('.', '/') + os.path.splitext(path)[-1] - update(zipfile, [pat], [path], [name]) - -def update_site_packages(sp, mod, path): - dest = os.path.join(sp, *mod.split('.'))+'.py' - shutil.copy2(path, dest) - -def update_module(mod, path): - if not hasattr(sys, 'frozen'): - raise RuntimeError('Modules can only be updated in frozen installs.') - zp = None - if iswindows: - zp = os.path.join(os.path.dirname(sys.executable), 'library.zip') - elif getattr(sys, 'new_app_bundle', False): - update_site_packages(sys.site_packages, mod, path) - elif isosx: - zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')), - 'Resources', 'lib', - 'python'+'.'.join(map(str, sys.version_info[:2])), - 'site-packages.zip') - else: - zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip') - if zp is not None: - update_zipfile(zp, mod, path) - else: - raise ValueError('Updating modules is not supported on this platform.') +def develop_from(path): + from calibre.gui2 import build_forms + print 'Compiling .ui forms...' + build_forms(path) def migrate(old, new): from calibre.utils.config import prefs @@ -189,9 +161,6 @@ def main(args=sys.argv): if opts.gui: from calibre.gui2.main import main main(['calibre']) - elif opts.update_module: - mod, path = args[1:3] - update_module(mod, os.path.expanduser(path)) elif opts.command: sys.argv = args[:1] exec opts.command @@ -218,6 +187,8 @@ def main(args=sys.argv): from calibre.utils.logging import default_log opts2, args = px().parse_args(['xxxx', '-vvvv', opts.pdfreflow]) run(opts2, opts.pdfreflow, default_log) + elif opts.develop_from is not None: + develop_from(opts.develop_from) else: from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed() diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 2d3a294c5b..ff30fff2c1 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -525,3 +525,53 @@ def is_ok_to_use_qt(): gui_thread = QThread.currentThread() return gui_thread is QThread.currentThread() +def find_forms(srcdir): + base = os.path.join(srcdir, 'calibre', 'gui2') + forms = [] + for root, _, files in os.walk(base): + for name in files: + if name.endswith('.ui'): + forms.append(os.path.abspath(os.path.join(root, name))) + + return forms + +def form_to_compiled_form(form): + return form.rpartition('.')[0]+'_ui.py' + +def build_forms(srcdir, info=None): + import re, cStringIO + from PyQt4.uic import compileUi + forms = find_forms(srcdir) + if info is None: + from calibre import prints + info = prints + pat = re.compile(r'''(['"]):/images/([^'"]+)\1''') + def sub(match): + ans = 'I(%s%s%s)'%(match.group(1), match.group(2), match.group(1)) + return ans + + for form in forms: + compiled_form = form_to_compiled_form(form) + if not os.path.exists(compiled_form) or os.stat(form).st_mtime > os.stat(compiled_form).st_mtime: + info('\tCompiling form', form) + buf = cStringIO.StringIO() + compileUi(form, buf) + dat = buf.getvalue() + dat = dat.replace('__appname__', 'calibre') + dat = dat.replace('import images_rc', '') + dat = dat.replace('from library import', 'from calibre.gui2.library import') + dat = dat.replace('from widgets import', 'from calibre.gui2.widgets import') + dat = dat.replace('from convert.xpath_wizard import', + 'from calibre.gui2.convert.xpath_wizard import') + dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?' -''' -www.business-standard.com -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - -class BusinessStandard(BasicNewsRecipe): - title = 'Business Standard' - __author__ = 'Darko Miletic' - description = "India's most respected business daily" - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - publisher = 'Business Standard Limited' - category = 'news, business, money, india, world' - language = 'en_IN' - - conversion_options = { - 'comments' : description - ,'tags' : category - ,'language' : language - ,'publisher' : publisher - ,'linearize_tables': True - } - - remove_attributes=['style'] - remove_tags = [dict(name=['object','link','script','iframe'])] - - feeds = [ - (u'News Now' , u'http://feeds.business-standard.com/News-Now.xml' ) - ,(u'Banking & finance' , u'http://feeds.business-standard.com/Banking-Finance-All.xml' ) - ,(u'Companies & Industry', u'http://feeds.business-standard.com/Companies-Industry-All.xml') - ,(u'Economy & Policy' , u'http://feeds.business-standard.com/Economy-Policy-All.xml' ) - ,(u'Tech World' , u'http://feeds.business-standard.com/Tech-World-All.xml' ) - ,(u'Life & Leisure' , u'http://feeds.business-standard.com/Life-Leisure-All.xml' ) - ,(u'Markets & Investing' , u'http://feeds.business-standard.com/Markets-Investing-All.xml' ) - ,(u'Management & Mktg' , u'http://feeds.business-standard.com/Management-Mktg-All.xml' ) - ,(u'Automobiles' , u'http://feeds.business-standard.com/Automobiles.xml' ) - ,(u'Aviation' , u'http://feeds.business-standard.com/Aviation.xml' ) - ] - - def print_version(self, url): - autono = url.rpartition('autono=')[2] - tp = 'on' - hk = url.rpartition('bKeyFlag=')[1] - if hk == '': - tp = '' - return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp - - def get_article_url(self, article): - return article.get('guid', None) +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.business-standard.com +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class BusinessStandard(BasicNewsRecipe): + title = 'Business Standard' + __author__ = 'Darko Miletic' + description = "India's most respected business daily" + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + publisher = 'Business Standard Limited' + category = 'news, business, money, india, world' + language = 'en_IN' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + + remove_attributes=['style'] + remove_tags = [dict(name=['object','link','script','iframe'])] + + feeds = [ + (u'News Now' , u'http://feeds.business-standard.com/News-Now.xml' ) + ,(u'Banking & finance' , u'http://feeds.business-standard.com/Banking-Finance-All.xml' ) + ,(u'Companies & Industry', u'http://feeds.business-standard.com/Companies-Industry-All.xml') + ,(u'Economy & Policy' , u'http://feeds.business-standard.com/Economy-Policy-All.xml' ) + ,(u'Tech World' , u'http://feeds.business-standard.com/Tech-World-All.xml' ) + ,(u'Life & Leisure' , u'http://feeds.business-standard.com/Life-Leisure-All.xml' ) + ,(u'Markets & Investing' , u'http://feeds.business-standard.com/Markets-Investing-All.xml' ) + ,(u'Management & Mktg' , u'http://feeds.business-standard.com/Management-Mktg-All.xml' ) + ,(u'Automobiles' , u'http://feeds.business-standard.com/Automobiles.xml' ) + ,(u'Aviation' , u'http://feeds.business-standard.com/Aviation.xml' ) + ] + + def print_version(self, url): + autono = url.rpartition('autono=')[2] + tp = 'on' + hk = url.rpartition('bKeyFlag=')[1] + if hk == '': + tp = '' + return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp + + def get_article_url(self, article): + return article.get('guid', None) diff --git a/src/calibre/web/feeds/recipes/recipe_lemonde_dip.py b/src/calibre/web/feeds/recipes/recipe_lemonde_dip.py index 4a114a09ae..fa20e43aa2 100644 --- a/src/calibre/web/feeds/recipes/recipe_lemonde_dip.py +++ b/src/calibre/web/feeds/recipes/recipe_lemonde_dip.py @@ -1,73 +1,72 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' -''' -mondediplo.com -''' - -import re, urllib -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - -class LeMondeDiplomatiqueEn(BasicNewsRecipe): - title = 'Le Monde diplomatique - English edition' - __author__ = 'Darko Miletic' - description = 'Real journalism making sense of the world around us' - publisher = 'Le Monde diplomatique' - category = 'news, politics, world' - no_stylesheets = True - oldest_article = 31 - delay = 1 - encoding = 'utf-8' - needs_subscription = True - PREFIX = 'http://mondediplo.com/' - LOGIN = PREFIX + '2009/09/02congo' - INDEX = PREFIX + strftime('%Y/%m/') - use_embedded_content = False - language = 'en' - - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - } - - def get_browser(self): - br = BasicNewsRecipe.get_browser() - br.open(self.LOGIN) - if self.username is not None and self.password is not None: - data = urllib.urlencode({ 'login':self.username - ,'pass':self.password - ,'enter':'enter' - }) - br.open(self.LOGIN,data) - return br - - keep_only_tags =[dict(name='div', attrs={'id':'contenu'})] - remove_tags = [dict(name=['object','link','script','iframe','base'])] - - def parse_index(self): - articles = [] - soup = self.index_to_soup(self.INDEX) - cnt = soup.find('div',attrs={'class':'som_num'}) - for item in cnt.findAll('li'): - description = '' - feed_link = item.find('a') - desc = item.find('div',attrs={'class':'chapo'}) - if desc: - description = desc.string - if feed_link and feed_link.has_key('href'): - url = self.PREFIX + feed_link['href'].partition('/../')[2] - title = self.tag_to_string(feed_link) - date = strftime(self.timefmt) - articles.append({ - 'title' :title - ,'date' :date - ,'url' :url - ,'description':description - }) - return [(soup.head.title.string, articles)] - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008-2009, Darko Miletic ' +''' +mondediplo.com +''' + +import urllib +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class LeMondeDiplomatiqueEn(BasicNewsRecipe): + title = 'Le Monde diplomatique - English edition' + __author__ = 'Darko Miletic' + description = 'Real journalism making sense of the world around us' + publisher = 'Le Monde diplomatique' + category = 'news, politics, world' + no_stylesheets = True + oldest_article = 31 + delay = 1 + encoding = 'utf-8' + needs_subscription = True + PREFIX = 'http://mondediplo.com/' + LOGIN = PREFIX + '2009/09/02congo' + INDEX = PREFIX + strftime('%Y/%m/') + use_embedded_content = False + language = 'en' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.open(self.LOGIN) + if self.username is not None and self.password is not None: + data = urllib.urlencode({ 'login':self.username + ,'pass':self.password + ,'enter':'enter' + }) + br.open(self.LOGIN,data) + return br + + keep_only_tags =[dict(name='div', attrs={'id':'contenu'})] + remove_tags = [dict(name=['object','link','script','iframe','base'])] + + def parse_index(self): + articles = [] + soup = self.index_to_soup(self.INDEX) + cnt = soup.find('div',attrs={'class':'som_num'}) + for item in cnt.findAll('li'): + description = '' + feed_link = item.find('a') + desc = item.find('div',attrs={'class':'chapo'}) + if desc: + description = desc.string + if feed_link and feed_link.has_key('href'): + url = self.PREFIX + feed_link['href'].partition('/../')[2] + title = self.tag_to_string(feed_link) + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + return [(soup.head.title.string, articles)] + diff --git a/src/calibre/web/feeds/recipes/recipe_nytimes.py b/src/calibre/web/feeds/recipes/recipe_nytimes.py index c21145ac07..af78856010 100644 --- a/src/calibre/web/feeds/recipes/recipe_nytimes.py +++ b/src/calibre/web/feeds/recipes/recipe_nytimes.py @@ -16,7 +16,7 @@ class NYTimes(BasicNewsRecipe): __author__ = 'GRiker' language = _('English') description = 'Top Stories from the New York Times' - + # List of sections typically included in Top Stories. Use a keyword from the # right column in the excludeSectionKeywords[] list to skip downloading that section sections = { @@ -39,7 +39,7 @@ class NYTimes(BasicNewsRecipe): 'world' : 'World' } - # By default, no sections are skipped. + # By default, no sections are skipped. excludeSectionKeywords = [] # Add section keywords from the right column above to skip that section @@ -49,7 +49,7 @@ class NYTimes(BasicNewsRecipe): # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World'] # Fetch only Top Stories # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World'] - + # The maximum number of articles that will be downloaded max_articles_per_feed = 40 @@ -63,7 +63,7 @@ class NYTimes(BasicNewsRecipe): dict(attrs={ 'id':['toolsRight','inlineBox','sidebarArticles', 'portfolioInline','articleInline','readerscomment', 'nytRating']}) ] - + encoding = 'cp1252' no_stylesheets = True extra_css = '.headline {text-align: left;}\n \ @@ -105,13 +105,13 @@ class NYTimes(BasicNewsRecipe): _raw = url_or_raw if raw: return _raw - + if not isinstance(_raw, unicode) and self.encoding: _raw = _raw.decode(docEncoding, 'replace') massage = list(BeautifulSoup.MARKUP_MASSAGE) massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding))) return BeautifulSoup(_raw, markupMassage=massage) - + # Entry point soup = get_the_soup( self.encoding, url_or_raw ) contentType = soup.find(True,attrs={'http-equiv':'Content-Type'}) @@ -122,7 +122,7 @@ class NYTimes(BasicNewsRecipe): if self.verbose > 2: self.log( " document encoding: '%s'" % docEncoding) if docEncoding != self.encoding : - soup = get_the_soup(docEncoding, url_or_raw) + soup = get_the_soup(docEncoding, url_or_raw) return soup @@ -133,7 +133,7 @@ class NYTimes(BasicNewsRecipe): feed = key = 'All Top Stories' articles[key] = [] ans.append(key) - + soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/') # Fetch the outer table @@ -242,10 +242,10 @@ class NYTimes(BasicNewsRecipe): if url == article['url'] : duplicateFound = True break - - if duplicateFound: + + if duplicateFound: # Continue fetching, don't add this article - continue + continue if not articles.has_key(feed): articles[feed] = [] @@ -254,7 +254,7 @@ class NYTimes(BasicNewsRecipe): description=description, author=author, content='')) ans = self.sort_index_by(ans, {'Top Stories':-1}) - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] + ans = [(key, articles[key]) for key in ans if articles.has_key(key)] return ans def strip_anchors(self,soup): @@ -270,7 +270,7 @@ class NYTimes(BasicNewsRecipe): # refresh = soup.find('meta', {'http-equiv':'refresh'}) # if refresh is None: # return self.strip_anchors(soup) -# +# # content = refresh.get('content').partition('=')[2] # raw = self.browser.open('http://www.nytimes.com'+content).read() # soup = BeautifulSoup(raw.decode('cp1252', 'replace')) @@ -280,7 +280,7 @@ class NYTimes(BasicNewsRecipe): content = refresh.get('content').partition('=')[2] raw = self.browser.open('http://www.nytimes.com'+content).read() soup = BeautifulSoup(raw.decode('cp1252', 'replace')) - + soup = self.strip_anchors(soup) # Test for empty content @@ -291,7 +291,7 @@ class NYTimes(BasicNewsRecipe): return soup else: print "no allowed content found, removing article" - raise StringError + raise Exception() def postprocess_html(self,soup, True): @@ -334,7 +334,7 @@ class NYTimes(BasicNewsRecipe): bTag = Tag(soup, "b") bTag.insert(0, subhead.contents[0]) subhead.replaceWith(bTag) - + # Synthesize a section header dsk = soup.find('meta', attrs={'name':'dsk'}) if dsk is not None and dsk.has_key('content'): @@ -343,12 +343,12 @@ class NYTimes(BasicNewsRecipe): hTag.insert(0,NavigableString(dsk['content'])) articleTag = soup.find(True, attrs={'id':'article'}) articleTag.insert(0,hTag) - + # Add class="articleBody" to
so we can format with CSS divTag = soup.find('div',attrs={'id':'articleBody'}) if divTag is not None : divTag['class'] = divTag['id'] - + # Add class="authorId" to
so we can format with CSS divTag = soup.find('div',attrs={'id':'authorId'}) if divTag is not None : diff --git a/src/calibre/web/feeds/recipes/recipe_smashing.py b/src/calibre/web/feeds/recipes/recipe_smashing.py index cc4edd2c77..04436a05ef 100644 --- a/src/calibre/web/feeds/recipes/recipe_smashing.py +++ b/src/calibre/web/feeds/recipes/recipe_smashing.py @@ -1,51 +1,50 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.smashingmagazine.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - -class SmashingMagazine(BasicNewsRecipe): - title = 'Smashing Magazine' - __author__ = 'Darko Miletic' - description = 'We smash you with the information that will make your life easier, really' - oldest_article = 20 - language = 'en' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - publisher = 'Smashing Magazine' - category = 'news, web, IT, css, javascript, html' - encoding = 'utf-8' - - conversion_options = { - 'comments' : description - ,'tags' : category - ,'publisher' : publisher - } - - keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})] - remove_tags_after = dict(name='ul',attrs={'class':'social'}) - remove_tags = [ - dict(name=['link','object']) - ,dict(name='h1',attrs={'class':'logo'}) - ,dict(name='div',attrs={'id':'booklogosec'}) - ,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'}) - ] - - feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')] - - def preprocess_html(self, soup): - for iter in soup.findAll('div',attrs={'class':'leftframe'}): - it = iter.find('h1') - if it == None: - iter.extract() - for item in soup.findAll('img'): - oldParent = item.parent - if oldParent.name == 'a': - oldParent.name = 'div' - return soup +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.smashingmagazine.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class SmashingMagazine(BasicNewsRecipe): + title = 'Smashing Magazine' + __author__ = 'Darko Miletic' + description = 'We smash you with the information that will make your life easier, really' + oldest_article = 20 + language = 'en' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + publisher = 'Smashing Magazine' + category = 'news, web, IT, css, javascript, html' + encoding = 'utf-8' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'publisher' : publisher + } + + keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})] + remove_tags_after = dict(name='ul',attrs={'class':'social'}) + remove_tags = [ + dict(name=['link','object']) + ,dict(name='h1',attrs={'class':'logo'}) + ,dict(name='div',attrs={'id':'booklogosec'}) + ,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'}) + ] + + feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')] + + def preprocess_html(self, soup): + for iter in soup.findAll('div',attrs={'class':'leftframe'}): + it = iter.find('h1') + if it == None: + iter.extract() + for item in soup.findAll('img'): + oldParent = item.parent + if oldParent.name == 'a': + oldParent.name = 'div' + return soup diff --git a/src/calibre/web/feeds/recipes/recipe_thestar.py b/src/calibre/web/feeds/recipes/recipe_thestar.py index 99f024e964..5e662441ef 100644 --- a/src/calibre/web/feeds/recipes/recipe_thestar.py +++ b/src/calibre/web/feeds/recipes/recipe_thestar.py @@ -1,47 +1,47 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.thestar.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class TheTorontoStar(BasicNewsRecipe): - title = 'The Toronto Star' - __author__ = 'Darko Miletic' - description = "Canada's largest daily newspaper" - oldest_article = 2 - language = 'en_CA' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - publisher = 'The Toronto Star' - category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson" - encoding = 'utf-8' - extra_css = ' .headlineArticle{font-size: x-large; font-weight: bold} .navbar{text-align:center} ' - - conversion_options = { - 'comments' : description - ,'tags' : category - ,'publisher' : publisher - } - - keep_only_tags = [dict(name='div', attrs={'id':'AssetWebPart1'})] - remove_attributes= ['style'] - - feeds = [ - (u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) - ,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' ) - ,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' ) - ,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' ) - ,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) - ,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) - ,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' ) - ,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300') - ] - - def print_version(self, url): - return url.replace('/article/','/printArticle/') - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.thestar.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TheTorontoStar(BasicNewsRecipe): + title = 'The Toronto Star' + __author__ = 'Darko Miletic' + description = "Canada's largest daily newspaper" + oldest_article = 2 + language = 'en_CA' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + publisher = 'The Toronto Star' + category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson" + encoding = 'utf-8' + extra_css = ' .headlineArticle{font-size: x-large; font-weight: bold} .navbar{text-align:center} ' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'publisher' : publisher + } + + keep_only_tags = [dict(name='div', attrs={'id':'AssetWebPart1'})] + remove_attributes= ['style'] + + feeds = [ + (u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) + ,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' ) + ,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' ) + ,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' ) + ,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) + ,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) + ,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' ) + ,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300') + ] + + def print_version(self, url): + return url.replace('/article/','/printArticle/') +