Sync to trunk.

This commit is contained in:
John Schember 2011-05-18 18:07:56 -04:00
commit f95544e7b5
18 changed files with 383 additions and 36 deletions

View File

@ -0,0 +1,33 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Börse-online'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.dpv.de/images/1995/source.gif'
masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
remove_tags_bevor = [dict(name='h3')]
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}),
dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']),
dict(name=['h2', 'Gesamtranking', 'h3',''])]
def print_version(self, url):
return url.replace('.html#nv=rss', '.html?mode=print')
feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')]

61
recipes/capital_de.recipe Normal file
View File

@ -0,0 +1,61 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
title = u'Capital.de'
language = 'de'
__author__ = 'schuster'
oldest_article =7
max_articles_per_feed = 35
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
masthead_url = 'http://www.wirtschaftsmedien-shop.de/media/stores/wirtschaftsmedien/capital/teaser_large_abo.jpg'
cover_url = 'http://d1kb9jvg6ylufe.cloudfront.net/WebsiteCMS/de/unternehmen/linktipps/mainColumn/08/image/DE_Capital_bis20mm_SW.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
return url.replace ('nv=rss#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', 'mode=print')
remove_tags_bevor = [dict(name='td', attrs={'class':'textcell'})]
remove_tags_after = [dict(name='div', attrs={'class':'artikelsplit'})]
feeds = [ (u'Wirtschaftsmagazin', u'http://www.capital.de/rss/'),
(u'Unternehmen', u'http://www.capital.de/rss/unternehmen'),
(u'Finanz & Geldanlage', u'http://www.capital.de/rss/finanzen/geldanlage')]
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'artikelsplit'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'printable'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('div', attrs={'class':'artikelsplit'}):
item.extract()
self.append_page(soup, soup.body, 3)
pager = soup.find('div',attrs={'class':'artikelsplit'})
if pager:
pager.extract()
return self.adeify_images(soup)
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'artikelsplit']}),
dict(id=['topNav', 'mainNav', 'subNav', 'socialmedia', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere']),
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
dict(rel=['canonical'])]

View File

@ -1,19 +1,21 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1302341394(BasicNewsRecipe): class AdvancedUserRecipe1302341394(BasicNewsRecipe):
title = u'DvhN' title = u'DvhN'
oldest_article = 1 __author__ = 'Reijndert'
oldest_article = 7
max_articles_per_feed = 200 max_articles_per_feed = 200
__author__ = 'Reijndert'
no_stylesheets = True no_stylesheets = True
cover_url = 'http://www.dvhn.nl/template/Dagblad_v2.0/gfx/logo_DvhN.gif' cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg'
language = 'nl' language = 'nl'
country = 'NL' country = 'NL'
version = 1 version = 1
publisher = u'Dagblad van het Noorden' publisher = u'Dagblad van het Noorden'
category = u'Nieuws' category = u'Nieuws'
description = u'Nieuws uit Noord Nederland' description = u'Nieuws uit Noord Nederland'
timefmt = ' %Y-%m-%d (%a)'
keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'}) keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'})
@ -21,11 +23,26 @@ class AdvancedUserRecipe1302341394(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
dict(name=['object','link','iframe','base']) dict(name='span',attrs={'class':'location'})
,dict(name='span',attrs={'class':'copyright'})
] ]
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'), (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'), (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'), (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'), (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'), (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'), (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'), (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')] preprocess_regexps = [
(re.compile(r'<a.*?>'), lambda h1: '')
,(re.compile(r'</a>'), lambda h2: '')
,(re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'), lambda h3: '')
,(re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '')
]
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss')
, (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss')
, (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss')
, (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss')
, (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss')
, (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss')
, (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss')
, (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')
]
extra_css = ''' extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;} body {font-family: verdana, arial, helvetica, geneva, sans-serif;}

38
recipes/glamour.recipe Normal file
View File

@ -0,0 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Glamour (US)'
oldest_article = 21
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'en'
remove_javascript = True
__author__ = 'Anonymous'
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
def print_version(self, url):
return url + '?printable=true'
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'All Fashion', u'http://feeds.glamour.com/glamour/all_fashion'),
(u'All Beauty', u'http://feeds.glamour.com/glamour/all_beauty'),
(u'All Sex, Love & Life', u'http://feeds.glamour.com/glamour/sex_love_life'),
(u'All Health & Fitness', u'http://feeds.glamour.com/glamour/health_fitness'),
(u'Shopping', u'http://feeds.glamour.com/glamour/shopping'),
(u'Slaves to Fashion blog', u'http://feeds.glamour.com/glamour/slavestofashion'),
(u'The Girls in the Beauty Department', u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
(u'Smitten blog', u'http://feeds.glamour.com/glamour/smitten'),
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
(u'Single-ish blog', u'http://feeds.glamour.com/glamour/glamoursingle-ish'),
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
(u'Vitamin G blog', u'http://feeds.glamour.com/glamour/vitamin-g'),
(u'Margarita Shapes Up blog', u'http://feeds.glamour.com/glamour/margaritashapesup'),
(u'Little Miss Fortune blog', u'http://feeds.glamour.com/glamour/little-miss-fortune'),
]

View File

@ -0,0 +1,32 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Good to Know (uk)'
oldest_article = 14
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
__author__ = 'Anonymous'
language = 'en_GB'
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
def print_version(self, url):
return url + '/print/1'
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'),
(u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'),
(u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'),
(u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'),
(u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'),
(u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'),
(u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'),
(u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'),
]

BIN
recipes/icons/osnews_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1006 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 722 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 425 B

32
recipes/impulse_de.recipe Normal file
View File

@ -0,0 +1,32 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
title = u'Impulse.de'
language = 'de'
__author__ = 'schuster'
oldest_article =14
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
cover_url = 'http://www.bvk.de/files/image/bilder/Logo%20Impulse.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
return url.replace ('#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', '?mode=print')
remove_tags_bevor = [dict(name='h1', attrs={'class':'h2'})]
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
feeds = [ (u'impulstest', u'http://www.impulse.de/rss/')]
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'kasten_artikel']}),
dict(id=['metaNav', 'impKopf', 'impTopNav', 'impSubNav', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere', 'socialmedia', 'rating_open']),
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
dict(rel=['canonical'])]

View File

@ -0,0 +1,10 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305636254(BasicNewsRecipe):
title = u'Mens Health (US)'
language = 'en'
__author__ = 'Anonymous'
oldest_article = 14
max_articles_per_feed = 100
feeds = [(u'News', u'http://blogs.menshealth.com/health-headlines/feed')]

View File

@ -11,6 +11,20 @@ class Newsweek(BasicNewsRecipe):
BASE_URL = 'http://www.newsweek.com' BASE_URL = 'http://www.newsweek.com'
topics = {
'Culture' : '/tag/culture.html',
'Business' : '/tag/business.html',
'Society' : '/tag/society.html',
'Science' : '/tag/science.html',
'Education' : '/tag/education.html',
'Politics' : '/tag/politics.html',
'Health' : '/tag/health.html',
'World' : '/tag/world.html',
'Nation' : '/tag/nation.html',
'Technology' : '/tag/technology.html',
'Game Changers' : '/tag/game-changers.html',
}
keep_only_tags = dict(name='article', attrs={'class':'article-text'}) keep_only_tags = dict(name='article', attrs={'class':'article-text'})
remove_tags = [dict(attrs={'data-dartad':True})] remove_tags = [dict(attrs={'data-dartad':True})]
remove_attributes = ['property'] remove_attributes = ['property']
@ -21,14 +35,10 @@ class Newsweek(BasicNewsRecipe):
return soup return soup
def newsweek_sections(self): def newsweek_sections(self):
return [ for topic_name, topic_url in self.topics.iteritems():
('Nation', 'http://www.newsweek.com/tag/nation.html'), yield (topic_name,
('Society', 'http://www.newsweek.com/tag/society.html'), self.BASE_URL+topic_url)
('Culture', 'http://www.newsweek.com/tag/culture.html'),
('World', 'http://www.newsweek.com/tag/world.html'),
('Politics', 'http://www.newsweek.com/tag/politics.html'),
('Business', 'http://www.newsweek.com/tag/business.html'),
]
def newsweek_parse_section_page(self, soup): def newsweek_parse_section_page(self, soup):
for article in soup.findAll('article', about=True, for article in soup.findAll('article', about=True,

View File

@ -14,6 +14,7 @@ class UnitedDaily(BasicNewsRecipe):
(u'生活', u'http://udn.com/udnrss/life.xml'), (u'生活', u'http://udn.com/udnrss/life.xml'),
(u'綜合', u'http://udn.com/udnrss/education.xml'), (u'綜合', u'http://udn.com/udnrss/education.xml'),
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'), (u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
(u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'),
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'), (u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'), (u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'), (u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
@ -21,15 +22,21 @@ class UnitedDaily(BasicNewsRecipe):
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'), (u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'), (u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'), (u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
(u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'),
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'), (u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
(u'國際焦點', u'http://udn.com/udnrss/international.xml'), (u'國際焦點', u'http://udn.com/udnrss/international.xml'),
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'), (u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'), (u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
(u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'),
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'), (u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'), (u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'), (u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'), (u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
(u'房市情報', u'http://udn.com/udnrss/houses.xml'), (u'房市情報', u'http://udn.com/udnrss/houses.xml'),
(u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'),
(u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'),
(u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'),
(u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'),
(u'棒球', u'http://udn.com/udnrss/baseball.xml'), (u'棒球', u'http://udn.com/udnrss/baseball.xml'),
(u'籃球', u'http://udn.com/udnrss/basketball.xml'), (u'籃球', u'http://udn.com/udnrss/basketball.xml'),
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'), (u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
@ -40,19 +47,24 @@ class UnitedDaily(BasicNewsRecipe):
(u'電影世界', u'http://udn.com/udnrss/movie.xml'), (u'電影世界', u'http://udn.com/udnrss/movie.xml'),
(u'流行音樂', u'http://udn.com/udnrss/music.xml'), (u'流行音樂', u'http://udn.com/udnrss/music.xml'),
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'), (u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
(u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'),
(u'食樂指南', u'http://udn.com/udnrss/food.xml'), (u'食樂指南', u'http://udn.com/udnrss/food.xml'),
(u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'),
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'), (u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
(u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'),
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'), (u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'), (u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'), (u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'), (u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
(u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'),
(u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'),
] ]
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}''' extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}'''
__author__ = 'Eddie Lau' __author__ = 'Eddie Lau'
__version__ = '1.0' __version__ = '1.1'
language = 'zh' language = 'zh-TW'
publisher = 'United Daily News Group' publisher = 'United Daily News Group'
description = 'United Daily (Taiwan)' description = 'United Daily (Taiwan)'
category = 'News, Chinese, Taiwan' category = 'News, Chinese, Taiwan'
@ -63,5 +75,12 @@ class UnitedDaily(BasicNewsRecipe):
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif' masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif' cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})] keep_only_tags = [dict(name='td', attrs={'class':['story_title']}),
dict(name='div', attrs={'id':['story_title']}),
dict(name='td', attrs={'class':['story_author']}),
dict(name='div', attrs={'id':['story_author']}),
dict(name='td', attrs={'class':['story']}),
dict(name='div', attrs={'id':['story']}),
]
remove_tags = [dict(name='div', attrs={'id':['mvouter']})] remove_tags = [dict(name='div', attrs={'id':['mvouter']})]

View File

@ -41,14 +41,19 @@ authors_completer_append_separator = False
#: Author sort name algorithm #: Author sort name algorithm
# The algorithm used to copy author to author_sort # The algorithm used to copy author to author_sort
# Possible values are: # Possible values are:
# invert: use "fn ln" -> "ln, fn" (the default algorithm) # invert: use "fn ln" -> "ln, fn"
# copy : copy author to author_sort without modification # copy : copy author to author_sort without modification
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert' # comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
# nocomma : "fn ln" -> "ln fn" (without the comma) # nocomma : "fn ln" -> "ln fn" (without the comma)
# When this tweak is changed, the author_sort values stored with each author # When this tweak is changed, the author_sort values stored with each author
# must be recomputed by right-clicking on an author in the left-hand tags pane, # must be recomputed by right-clicking on an author in the left-hand tags pane,
# selecting 'manage authors', and pressing 'Recalculate all author sort values'. # selecting 'manage authors', and pressing 'Recalculate all author sort values'.
# The author name suffixes are words that are ignored when they occur at the
# end of an author name. The case of the suffix is ignored and trailing
# periods are automatically handled.
author_sort_copy_method = 'comma' author_sort_copy_method = 'comma'
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
'MD', 'M.D', 'I', 'II', 'III', 'IV')
#: Use author sort in Tag Browser #: Use author sort in Tag Browser
# Set which author field to display in the tags pane (the list of authors, # Set which author field to display in the tags pane (the list of authors,

View File

@ -630,6 +630,24 @@ def human_readable(size):
size = size[:-2] size = size[:-2]
return size + " " + suffix return size + " " + suffix
def remove_bracketed_text(src,
brackets={u'(':u')', u'[':u']', u'{':u'}'}):
from collections import Counter
counts = Counter()
buf = []
src = force_unicode(src)
rmap = dict([(v, k) for k, v in brackets.iteritems()])
for char in src:
if char in brackets:
counts[char] += 1
elif char in rmap:
idx = rmap[char]
if counts[idx] > 0:
counts[idx] -= 1
elif sum(counts.itervalues()) < 1:
buf.append(char)
return u''.join(buf)
if isosx: if isosx:
import glob, shutil import glob, shutil
fdir = os.path.expanduser('~/.fonts') fdir = os.path.expanduser('~/.fonts')

View File

@ -10,7 +10,7 @@ import os, sys, re
from urllib import unquote, quote from urllib import unquote, quote
from urlparse import urlparse from urlparse import urlparse
from calibre import relpath, guess_type from calibre import relpath, guess_type, remove_bracketed_text
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
@ -27,20 +27,37 @@ def authors_to_string(authors):
else: else:
return '' return ''
_bracket_pat = re.compile(r'[\[({].*?[})\]]') def author_to_author_sort(author, method=None):
def author_to_author_sort(author):
if not author: if not author:
return '' return u''
method = tweaks['author_sort_copy_method'] sauthor = remove_bracketed_text(author).strip()
if method == 'copy' or (method == 'comma' and ',' in author): tokens = sauthor.split()
if len(tokens) < 2:
return author return author
author = _bracket_pat.sub('', author).strip() if method is None:
tokens = author.split() method = tweaks['author_sort_copy_method']
if tokens and tokens[-1] not in ('Inc.', 'Inc'): if method == u'copy':
tokens = tokens[-1:] + tokens[:-1] return author
if len(tokens) > 1 and method != 'nocomma': suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
tokens[0] += ',' suffixes |= set([x+u'.' for x in suffixes])
return ' '.join(tokens)
last = tokens[-1].lower()
suffix = None
if last in suffixes:
suffix = tokens[-1]
tokens = tokens[:-1]
if method == u'comma' and u',' in u''.join(tokens):
return author
atokens = tokens[-1:] + tokens[:-1]
if suffix:
atokens.append(suffix)
if method != u'nocomma' and len(atokens) > 1:
atokens[0] += u','
return u' '.join(atokens)
def authors_to_sort_string(authors): def authors_to_sort_string(authors):
return ' & '.join(map(author_to_author_sort, authors)) return ' & '.join(map(author_to_author_sort, authors))

View File

@ -631,10 +631,11 @@ class Application(QApplication):
if (islinux or isfreebsd) and st in ('windows', 'motif', 'cde'): if (islinux or isfreebsd) and st in ('windows', 'motif', 'cde'):
from PyQt4.Qt import QStyleFactory from PyQt4.Qt import QStyleFactory
styles = set(map(unicode, QStyleFactory.keys())) styles = set(map(unicode, QStyleFactory.keys()))
if 'Cleanlooks' in styles: if 'Plastique' in styles and os.environ.get('KDE_FULL_SESSION',
self.setStyle('Cleanlooks') False):
else:
self.setStyle('Plastique') self.setStyle('Plastique')
elif 'Cleanlooks' in styles:
self.setStyle('Cleanlooks')
def _send_file_open_events(self): def _send_file_open_events(self):
with self._file_open_lock: with self._file_open_lock:

View File

@ -22,7 +22,7 @@ It can convert every input format in the following list, to every output format.
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ *Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ *Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ
.. note :: .. note ::

View File

@ -50,6 +50,12 @@ PARALLEL_FUNCS = {
'save_book' : 'save_book' :
('calibre.ebooks.metadata.worker', 'save_book', 'notification'), ('calibre.ebooks.metadata.worker', 'save_book', 'notification'),
'arbitrary' :
('calibre.utils.ipc.worker', 'arbitrary', None),
'arbitrary_n' :
('calibre.utils.ipc.worker', 'arbitrary', 'notification'),
} }
class Progress(Thread): class Progress(Thread):
@ -73,7 +79,55 @@ class Progress(Thread):
except: except:
break break
def arbitrary(module_name, func_name, args, kwargs={}):
'''
An entry point that allows arbitrary functions to be run in a parallel
process. useful for plugin developers that want to run jobs in a parallel
process.
To use this entry point, simply create a ParallelJob with the module and
function names for the real entry point.
Remember that args and kwargs must be serialized so only use basic types
for them.
To use this, you will do something like
from calibre.gui2 import Dispatcher
gui.job_manager.run_job(Dispatcher(job_done), 'arbitrary',
args=('calibre_plugins.myplugin.worker', 'do_work',
('arg1' 'arg2', 'arg3')),
description='Change the world')
The function job_done will be called on completion, see the code in
gui2.actions.catalog for an example of using run_job and Dispatcher.
:param module_name: The fully qualified name of the module that contains
the actual function to be run. For example:
calibre_plugins.myplugin.worker
:param func_name: The name of the function to be run.
:param name: A list (or tuple) of arguments that will be passed to the
function ``func_name``
:param kwargs: A dictionary of keyword arguments to pass to func_name
'''
module = importlib.import_module(module_name)
func = getattr(module, func_name)
return func(*args, **kwargs)
def arbitrary_n(module_name, func_name, args, kwargs={},
notification=lambda x, y: y):
'''
Same as :func:`arbitrary` above, except that func_name must support a
keyword argument "notification". This will be a function that accepts two
arguments. func_name should call it periodically with progress information.
The first argument is a float between 0 and 1 that represent percent
completed and the second is a string with a message (it can be an empty
string).
'''
module = importlib.import_module(module_name)
func = getattr(module, func_name)
kwargs['notification'] = notification
return func(*args, **kwargs)
def get_func(name): def get_func(name):
module, func, notification = PARALLEL_FUNCS[name] module, func, notification = PARALLEL_FUNCS[name]