Merge from trunk

This commit is contained in:
Charles Haley 2011-05-08 09:30:15 +01:00
commit f997a54203
2 changed files with 38 additions and 3 deletions

View File

@ -0,0 +1,36 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
'''
Profile to download KoreaHerald
'''
from calibre.web.feeds.news import BasicNewsRecipe
class KoreaHerald(BasicNewsRecipe):
title = u'KoreaHerald'
language = 'en'
description = u'Korea Herald News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 10
recursions = 3
max_articles_per_feed = 10
no_stylesheets = True
keep_only_tags = [
dict(id=['contentLeft', '_article'])
]
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
]
feeds = [
('All News','http://www.koreaherald.com/rss/020000000000.xml'),
('National','http://www.koreaherald.com/rss/020100000000.xml'),
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
]

View File

@ -16,7 +16,6 @@ from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.metadata import title_sort as _title_sort
from calibre.library.save_to_disk import preprocess_template
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.bibtex import BibTeX
@ -128,7 +127,7 @@ class CSV_XML(CatalogPlugin): # {{{
if field.startswith('#'):
item = db.get_field(entry['id'],field,index_is_id=True)
elif field == 'title_sort':
item = _title_sort(unicode(entry['title']))
item = entry['sort']
else:
item = entry[field]
@ -183,7 +182,7 @@ class CSV_XML(CatalogPlugin): # {{{
record.append(item)
if 'title' in fields:
title = E.title(r['title'], sort=_title_sort(unicode(r['title'])))
title = E.title(r['title'], sort=r['sort'])
record.append(title)
if 'authors' in fields: