Merge from trunk

This commit is contained in:
Charles Haley 2010-08-13 04:50:44 +01:00
commit 04c3ed8434
4 changed files with 62 additions and 32 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 370 B

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.esquire.com
@ -9,7 +7,6 @@ www.esquire.com
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Esquire(BasicNewsRecipe):
title = 'Esquire'
@ -23,21 +20,19 @@ class Esquire(BasicNewsRecipe):
encoding = 'cp1250'
use_embedded_content = False
language = 'en'
lang = 'en-US'
cover_url = strftime('http://www.esquire.com/cm/esquire/cover-images/%Y_') + strftime('%m').strip('0') + '.jpg'
publication_type = 'magazine'
masthead_url = 'http://www.esquire.com/cm/shared/site_images/print_this/esquire_logo.gif'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
remove_tags = [dict(name=['object','link','embed','iframe'])]
keep_only_tags = [dict(name='div', attrs={'id':['article_header','article_content']})]
remove_tags = [dict(name=['object','link','embed','iframe','base'])]
remove_attributes = ['width','height']
feeds = [
(u'Style' , u'http://www.esquire.com/style/rss/' )
@ -47,17 +42,7 @@ class Esquire(BasicNewsRecipe):
,(u'Frontpage', u'http://www.esquire.com/rss/' )
]
def print_version(self, url):
rest = url.rpartition('?')[0]
article = rest.rpartition('/')[2]
return 'http://www.esquire.com/print-this/' + article
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,46 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
financialexpress.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class FE_India(BasicNewsRecipe):
title = 'The Financial Express'
__author__ = 'Darko Miletic'
description = 'Financial news from India'
publisher = 'The Indian Express Limited'
category = 'news, politics, finances, India'
oldest_article = 30
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en_IN'
remove_empty_feeds = True
masthead_url = 'http://static.expressindia.com/frontend/fe/images/fe_logo.jpg'
publication_type = 'magazine'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(attrs={'class':'txt'})]
remove_attributes = ['width','height']
feeds = [(u'Articles', u'http://www.expressindia.com/syndications/fe.xml')]
def print_version(self, url):
article_raw = url.rpartition('/')[0]
article_id = article_raw.rpartition('/')[2]
return 'http://www.financialexpress.com/printer/news/' + article_id + '/'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -515,15 +515,15 @@ class ConfigDialog(ResizableDialog, Ui_Dialog):
self.reset_confirmation_button.clicked.connect(self.reset_confirmation)
deft, curt = read_raw_tweaks()
self.current_tweaks.setPlainText(curt)
self.default_tweaks.setPlainText(deft)
self.current_tweaks.setPlainText(curt.decode('utf-8'))
self.default_tweaks.setPlainText(deft.decode('utf-8'))
self.restore_tweaks_to_default_button.clicked.connect(self.restore_tweaks_to_default)
self.category_view.setCurrentIndex(self.category_view.model().index_for_name(initial_category))
def restore_tweaks_to_default(self, *args):
deft, curt = read_raw_tweaks()
self.current_tweaks.setPlainText(deft)
self.current_tweaks.setPlainText(deft.decode('utf-8'))
def reset_confirmation(self):
@ -698,8 +698,7 @@ class ConfigDialog(ResizableDialog, Ui_Dialog):
self.input_order.setCurrentRow(idx-1)
def set_tweaks(self):
raw = unicode(self.current_tweaks.toPlainText())
raw = re.sub(r'(?m)^#.*fileencoding.*', '# ', raw)
raw = unicode(self.current_tweaks.toPlainText()).encode('utf-8')
try:
exec raw
except: