mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
f95544e7b5
33
recipes/borse_online.recipe
Normal file
33
recipes/borse_online.recipe
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Börse-online'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://www.dpv.de/images/1995/source.gif'
|
||||||
|
masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg'
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
remove_tags_bevor = [dict(name='h3')]
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
|
||||||
|
remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}),
|
||||||
|
dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']),
|
||||||
|
dict(name=['h2', 'Gesamtranking', 'h3',''])]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('.html#nv=rss', '.html?mode=print')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')]
|
||||||
|
|
61
recipes/capital_de.recipe
Normal file
61
recipes/capital_de.recipe
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
|
||||||
|
title = u'Capital.de'
|
||||||
|
language = 'de'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
oldest_article =7
|
||||||
|
max_articles_per_feed = 35
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
masthead_url = 'http://www.wirtschaftsmedien-shop.de/media/stores/wirtschaftsmedien/capital/teaser_large_abo.jpg'
|
||||||
|
cover_url = 'http://d1kb9jvg6ylufe.cloudfront.net/WebsiteCMS/de/unternehmen/linktipps/mainColumn/08/image/DE_Capital_bis20mm_SW.jpg'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace ('nv=rss#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', 'mode=print')
|
||||||
|
remove_tags_bevor = [dict(name='td', attrs={'class':'textcell'})]
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':'artikelsplit'})]
|
||||||
|
|
||||||
|
feeds = [ (u'Wirtschaftsmagazin', u'http://www.capital.de/rss/'),
|
||||||
|
(u'Unternehmen', u'http://www.capital.de/rss/unternehmen'),
|
||||||
|
(u'Finanz & Geldanlage', u'http://www.capital.de/rss/finanzen/geldanlage')]
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag, position):
|
||||||
|
pager = soup.find('div',attrs={'class':'artikelsplit'})
|
||||||
|
if pager:
|
||||||
|
nexturl = self.INDEX + pager.a['href']
|
||||||
|
soup2 = self.index_to_soup(nexturl)
|
||||||
|
texttag = soup2.find('div', attrs={'class':'printable'})
|
||||||
|
for it in texttag.findAll(style=True):
|
||||||
|
del it['style']
|
||||||
|
newpos = len(texttag.contents)
|
||||||
|
self.append_page(soup2,texttag,newpos)
|
||||||
|
texttag.extract()
|
||||||
|
appendtag.insert(position,texttag)
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('div', attrs={'class':'artikelsplit'}):
|
||||||
|
item.extract()
|
||||||
|
self.append_page(soup, soup.body, 3)
|
||||||
|
pager = soup.find('div',attrs={'class':'artikelsplit'})
|
||||||
|
if pager:
|
||||||
|
pager.extract()
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'artikelsplit']}),
|
||||||
|
dict(id=['topNav', 'mainNav', 'subNav', 'socialmedia', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere']),
|
||||||
|
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
|
||||||
|
dict(rel=['canonical'])]
|
||||||
|
|
@ -1,19 +1,21 @@
|
|||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1302341394(BasicNewsRecipe):
|
class AdvancedUserRecipe1302341394(BasicNewsRecipe):
|
||||||
title = u'DvhN'
|
title = u'DvhN'
|
||||||
oldest_article = 1
|
__author__ = 'Reijndert'
|
||||||
|
oldest_article = 7
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
|
|
||||||
__author__ = 'Reijndert'
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
cover_url = 'http://www.dvhn.nl/template/Dagblad_v2.0/gfx/logo_DvhN.gif'
|
cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg'
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
country = 'NL'
|
country = 'NL'
|
||||||
version = 1
|
version = 1
|
||||||
publisher = u'Dagblad van het Noorden'
|
publisher = u'Dagblad van het Noorden'
|
||||||
category = u'Nieuws'
|
category = u'Nieuws'
|
||||||
description = u'Nieuws uit Noord Nederland'
|
description = u'Nieuws uit Noord Nederland'
|
||||||
|
timefmt = ' %Y-%m-%d (%a)'
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'})
|
keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'})
|
||||||
@ -21,11 +23,26 @@ class AdvancedUserRecipe1302341394(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','iframe','base'])
|
dict(name='span',attrs={'class':'location'})
|
||||||
,dict(name='span',attrs={'class':'copyright'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'), (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'), (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'), (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'), (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'), (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'), (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'), (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')]
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<a.*?>'), lambda h1: '')
|
||||||
|
,(re.compile(r'</a>'), lambda h2: '')
|
||||||
|
,(re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'), lambda h3: '')
|
||||||
|
,(re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss')
|
||||||
|
, (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss')
|
||||||
|
, (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss')
|
||||||
|
, (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss')
|
||||||
|
, (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss')
|
||||||
|
, (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss')
|
||||||
|
, (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss')
|
||||||
|
, (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')
|
||||||
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
||||||
|
38
recipes/glamour.recipe
Normal file
38
recipes/glamour.recipe
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
|
||||||
|
title = u'Glamour (US)'
|
||||||
|
oldest_article = 21
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_javascript = True
|
||||||
|
__author__ = 'Anonymous'
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?printable=true'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
feeds = [ (u'All Fashion', u'http://feeds.glamour.com/glamour/all_fashion'),
|
||||||
|
(u'All Beauty', u'http://feeds.glamour.com/glamour/all_beauty'),
|
||||||
|
(u'All Sex, Love & Life', u'http://feeds.glamour.com/glamour/sex_love_life'),
|
||||||
|
(u'All Health & Fitness', u'http://feeds.glamour.com/glamour/health_fitness'),
|
||||||
|
(u'Shopping', u'http://feeds.glamour.com/glamour/shopping'),
|
||||||
|
(u'Slaves to Fashion blog', u'http://feeds.glamour.com/glamour/slavestofashion'),
|
||||||
|
(u'The Girls in the Beauty Department', u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
|
||||||
|
(u'Smitten blog', u'http://feeds.glamour.com/glamour/smitten'),
|
||||||
|
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
|
||||||
|
(u'Single-ish blog', u'http://feeds.glamour.com/glamour/glamoursingle-ish'),
|
||||||
|
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
|
||||||
|
(u'Vitamin G blog', u'http://feeds.glamour.com/glamour/vitamin-g'),
|
||||||
|
(u'Margarita Shapes Up blog', u'http://feeds.glamour.com/glamour/margaritashapesup'),
|
||||||
|
(u'Little Miss Fortune blog', u'http://feeds.glamour.com/glamour/little-miss-fortune'),
|
||||||
|
]
|
32
recipes/good_to_know.recipe
Normal file
32
recipes/good_to_know.recipe
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
|
||||||
|
title = u'Good to Know (uk)'
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_javascript = True
|
||||||
|
__author__ = 'Anonymous'
|
||||||
|
language = 'en_GB'
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '/print/1'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'),
|
||||||
|
(u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'),
|
||||||
|
(u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'),
|
||||||
|
(u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'),
|
||||||
|
(u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'),
|
||||||
|
(u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'),
|
||||||
|
(u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'),
|
||||||
|
(u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'),
|
||||||
|
]
|
BIN
recipes/icons/osnews_pl.png
Normal file
BIN
recipes/icons/osnews_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1006 B |
BIN
recipes/icons/rmf24_opinie.png
Normal file
BIN
recipes/icons/rmf24_opinie.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 722 B |
BIN
recipes/icons/swiatkindle.png
Normal file
BIN
recipes/icons/swiatkindle.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 425 B |
32
recipes/impulse_de.recipe
Normal file
32
recipes/impulse_de.recipe
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
|
||||||
|
title = u'Impulse.de'
|
||||||
|
language = 'de'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
oldest_article =14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
cover_url = 'http://www.bvk.de/files/image/bilder/Logo%20Impulse.jpg'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace ('#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', '?mode=print')
|
||||||
|
remove_tags_bevor = [dict(name='h1', attrs={'class':'h2'})]
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
|
||||||
|
|
||||||
|
feeds = [ (u'impulstest', u'http://www.impulse.de/rss/')]
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'kasten_artikel']}),
|
||||||
|
dict(id=['metaNav', 'impKopf', 'impTopNav', 'impSubNav', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere', 'socialmedia', 'rating_open']),
|
||||||
|
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
|
||||||
|
dict(rel=['canonical'])]
|
||||||
|
|
10
recipes/mens_health.recipe
Normal file
10
recipes/mens_health.recipe
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1305636254(BasicNewsRecipe):
|
||||||
|
title = u'Mens Health (US)'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Anonymous'
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'News', u'http://blogs.menshealth.com/health-headlines/feed')]
|
@ -11,6 +11,20 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
|
|
||||||
BASE_URL = 'http://www.newsweek.com'
|
BASE_URL = 'http://www.newsweek.com'
|
||||||
|
|
||||||
|
topics = {
|
||||||
|
'Culture' : '/tag/culture.html',
|
||||||
|
'Business' : '/tag/business.html',
|
||||||
|
'Society' : '/tag/society.html',
|
||||||
|
'Science' : '/tag/science.html',
|
||||||
|
'Education' : '/tag/education.html',
|
||||||
|
'Politics' : '/tag/politics.html',
|
||||||
|
'Health' : '/tag/health.html',
|
||||||
|
'World' : '/tag/world.html',
|
||||||
|
'Nation' : '/tag/nation.html',
|
||||||
|
'Technology' : '/tag/technology.html',
|
||||||
|
'Game Changers' : '/tag/game-changers.html',
|
||||||
|
}
|
||||||
|
|
||||||
keep_only_tags = dict(name='article', attrs={'class':'article-text'})
|
keep_only_tags = dict(name='article', attrs={'class':'article-text'})
|
||||||
remove_tags = [dict(attrs={'data-dartad':True})]
|
remove_tags = [dict(attrs={'data-dartad':True})]
|
||||||
remove_attributes = ['property']
|
remove_attributes = ['property']
|
||||||
@ -21,14 +35,10 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def newsweek_sections(self):
|
def newsweek_sections(self):
|
||||||
return [
|
for topic_name, topic_url in self.topics.iteritems():
|
||||||
('Nation', 'http://www.newsweek.com/tag/nation.html'),
|
yield (topic_name,
|
||||||
('Society', 'http://www.newsweek.com/tag/society.html'),
|
self.BASE_URL+topic_url)
|
||||||
('Culture', 'http://www.newsweek.com/tag/culture.html'),
|
|
||||||
('World', 'http://www.newsweek.com/tag/world.html'),
|
|
||||||
('Politics', 'http://www.newsweek.com/tag/politics.html'),
|
|
||||||
('Business', 'http://www.newsweek.com/tag/business.html'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def newsweek_parse_section_page(self, soup):
|
def newsweek_parse_section_page(self, soup):
|
||||||
for article in soup.findAll('article', about=True,
|
for article in soup.findAll('article', about=True,
|
||||||
|
@ -14,6 +14,7 @@ class UnitedDaily(BasicNewsRecipe):
|
|||||||
(u'生活', u'http://udn.com/udnrss/life.xml'),
|
(u'生活', u'http://udn.com/udnrss/life.xml'),
|
||||||
(u'綜合', u'http://udn.com/udnrss/education.xml'),
|
(u'綜合', u'http://udn.com/udnrss/education.xml'),
|
||||||
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
|
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
|
||||||
|
(u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'),
|
||||||
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
|
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
|
||||||
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
|
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
|
||||||
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
|
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
|
||||||
@ -21,15 +22,21 @@ class UnitedDaily(BasicNewsRecipe):
|
|||||||
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
|
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
|
||||||
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
|
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
|
||||||
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
|
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
|
||||||
|
(u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'),
|
||||||
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
|
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
|
||||||
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
|
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
|
||||||
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
|
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
|
||||||
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
|
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
|
||||||
|
(u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'),
|
||||||
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
|
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
|
||||||
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
|
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
|
||||||
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
|
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
|
||||||
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
|
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
|
||||||
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
|
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
|
||||||
|
(u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'),
|
||||||
|
(u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'),
|
||||||
|
(u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'),
|
||||||
|
(u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'),
|
||||||
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
|
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
|
||||||
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
|
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
|
||||||
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
|
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
|
||||||
@ -40,19 +47,24 @@ class UnitedDaily(BasicNewsRecipe):
|
|||||||
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
|
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
|
||||||
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
|
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
|
||||||
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
|
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
|
||||||
|
(u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'),
|
||||||
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
|
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
|
||||||
|
(u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'),
|
||||||
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
|
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
|
||||||
|
(u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'),
|
||||||
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
|
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
|
||||||
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
|
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
|
||||||
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
|
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
|
||||||
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
|
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
|
||||||
|
(u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'),
|
||||||
|
(u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
|
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}'''
|
||||||
|
|
||||||
__author__ = 'Eddie Lau'
|
__author__ = 'Eddie Lau'
|
||||||
__version__ = '1.0'
|
__version__ = '1.1'
|
||||||
language = 'zh'
|
language = 'zh-TW'
|
||||||
publisher = 'United Daily News Group'
|
publisher = 'United Daily News Group'
|
||||||
description = 'United Daily (Taiwan)'
|
description = 'United Daily (Taiwan)'
|
||||||
category = 'News, Chinese, Taiwan'
|
category = 'News, Chinese, Taiwan'
|
||||||
@ -63,5 +75,12 @@ class UnitedDaily(BasicNewsRecipe):
|
|||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
||||||
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
|
keep_only_tags = [dict(name='td', attrs={'class':['story_title']}),
|
||||||
|
dict(name='div', attrs={'id':['story_title']}),
|
||||||
|
dict(name='td', attrs={'class':['story_author']}),
|
||||||
|
dict(name='div', attrs={'id':['story_author']}),
|
||||||
|
dict(name='td', attrs={'class':['story']}),
|
||||||
|
dict(name='div', attrs={'id':['story']}),
|
||||||
|
]
|
||||||
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]
|
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]
|
||||||
|
|
||||||
|
@ -41,14 +41,19 @@ authors_completer_append_separator = False
|
|||||||
#: Author sort name algorithm
|
#: Author sort name algorithm
|
||||||
# The algorithm used to copy author to author_sort
|
# The algorithm used to copy author to author_sort
|
||||||
# Possible values are:
|
# Possible values are:
|
||||||
# invert: use "fn ln" -> "ln, fn" (the default algorithm)
|
# invert: use "fn ln" -> "ln, fn"
|
||||||
# copy : copy author to author_sort without modification
|
# copy : copy author to author_sort without modification
|
||||||
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
|
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
|
||||||
# nocomma : "fn ln" -> "ln fn" (without the comma)
|
# nocomma : "fn ln" -> "ln fn" (without the comma)
|
||||||
# When this tweak is changed, the author_sort values stored with each author
|
# When this tweak is changed, the author_sort values stored with each author
|
||||||
# must be recomputed by right-clicking on an author in the left-hand tags pane,
|
# must be recomputed by right-clicking on an author in the left-hand tags pane,
|
||||||
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
||||||
|
# The author name suffixes are words that are ignored when they occur at the
|
||||||
|
# end of an author name. The case of the suffix is ignored and trailing
|
||||||
|
# periods are automatically handled.
|
||||||
author_sort_copy_method = 'comma'
|
author_sort_copy_method = 'comma'
|
||||||
|
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
|
||||||
|
'MD', 'M.D', 'I', 'II', 'III', 'IV')
|
||||||
|
|
||||||
#: Use author sort in Tag Browser
|
#: Use author sort in Tag Browser
|
||||||
# Set which author field to display in the tags pane (the list of authors,
|
# Set which author field to display in the tags pane (the list of authors,
|
||||||
|
@ -630,6 +630,24 @@ def human_readable(size):
|
|||||||
size = size[:-2]
|
size = size[:-2]
|
||||||
return size + " " + suffix
|
return size + " " + suffix
|
||||||
|
|
||||||
|
def remove_bracketed_text(src,
|
||||||
|
brackets={u'(':u')', u'[':u']', u'{':u'}'}):
|
||||||
|
from collections import Counter
|
||||||
|
counts = Counter()
|
||||||
|
buf = []
|
||||||
|
src = force_unicode(src)
|
||||||
|
rmap = dict([(v, k) for k, v in brackets.iteritems()])
|
||||||
|
for char in src:
|
||||||
|
if char in brackets:
|
||||||
|
counts[char] += 1
|
||||||
|
elif char in rmap:
|
||||||
|
idx = rmap[char]
|
||||||
|
if counts[idx] > 0:
|
||||||
|
counts[idx] -= 1
|
||||||
|
elif sum(counts.itervalues()) < 1:
|
||||||
|
buf.append(char)
|
||||||
|
return u''.join(buf)
|
||||||
|
|
||||||
if isosx:
|
if isosx:
|
||||||
import glob, shutil
|
import glob, shutil
|
||||||
fdir = os.path.expanduser('~/.fonts')
|
fdir = os.path.expanduser('~/.fonts')
|
||||||
|
@ -10,7 +10,7 @@ import os, sys, re
|
|||||||
from urllib import unquote, quote
|
from urllib import unquote, quote
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
from calibre import relpath, guess_type
|
from calibre import relpath, guess_type, remove_bracketed_text
|
||||||
|
|
||||||
from calibre.utils.config import tweaks
|
from calibre.utils.config import tweaks
|
||||||
|
|
||||||
@ -27,20 +27,37 @@ def authors_to_string(authors):
|
|||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
_bracket_pat = re.compile(r'[\[({].*?[})\]]')
|
def author_to_author_sort(author, method=None):
|
||||||
def author_to_author_sort(author):
|
|
||||||
if not author:
|
if not author:
|
||||||
return ''
|
return u''
|
||||||
method = tweaks['author_sort_copy_method']
|
sauthor = remove_bracketed_text(author).strip()
|
||||||
if method == 'copy' or (method == 'comma' and ',' in author):
|
tokens = sauthor.split()
|
||||||
|
if len(tokens) < 2:
|
||||||
return author
|
return author
|
||||||
author = _bracket_pat.sub('', author).strip()
|
if method is None:
|
||||||
tokens = author.split()
|
method = tweaks['author_sort_copy_method']
|
||||||
if tokens and tokens[-1] not in ('Inc.', 'Inc'):
|
if method == u'copy':
|
||||||
tokens = tokens[-1:] + tokens[:-1]
|
return author
|
||||||
if len(tokens) > 1 and method != 'nocomma':
|
suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
|
||||||
tokens[0] += ','
|
suffixes |= set([x+u'.' for x in suffixes])
|
||||||
return ' '.join(tokens)
|
|
||||||
|
last = tokens[-1].lower()
|
||||||
|
suffix = None
|
||||||
|
if last in suffixes:
|
||||||
|
suffix = tokens[-1]
|
||||||
|
tokens = tokens[:-1]
|
||||||
|
|
||||||
|
if method == u'comma' and u',' in u''.join(tokens):
|
||||||
|
return author
|
||||||
|
|
||||||
|
atokens = tokens[-1:] + tokens[:-1]
|
||||||
|
if suffix:
|
||||||
|
atokens.append(suffix)
|
||||||
|
|
||||||
|
if method != u'nocomma' and len(atokens) > 1:
|
||||||
|
atokens[0] += u','
|
||||||
|
|
||||||
|
return u' '.join(atokens)
|
||||||
|
|
||||||
def authors_to_sort_string(authors):
|
def authors_to_sort_string(authors):
|
||||||
return ' & '.join(map(author_to_author_sort, authors))
|
return ' & '.join(map(author_to_author_sort, authors))
|
||||||
|
@ -631,10 +631,11 @@ class Application(QApplication):
|
|||||||
if (islinux or isfreebsd) and st in ('windows', 'motif', 'cde'):
|
if (islinux or isfreebsd) and st in ('windows', 'motif', 'cde'):
|
||||||
from PyQt4.Qt import QStyleFactory
|
from PyQt4.Qt import QStyleFactory
|
||||||
styles = set(map(unicode, QStyleFactory.keys()))
|
styles = set(map(unicode, QStyleFactory.keys()))
|
||||||
if 'Cleanlooks' in styles:
|
if 'Plastique' in styles and os.environ.get('KDE_FULL_SESSION',
|
||||||
self.setStyle('Cleanlooks')
|
False):
|
||||||
else:
|
|
||||||
self.setStyle('Plastique')
|
self.setStyle('Plastique')
|
||||||
|
elif 'Cleanlooks' in styles:
|
||||||
|
self.setStyle('Cleanlooks')
|
||||||
|
|
||||||
def _send_file_open_events(self):
|
def _send_file_open_events(self):
|
||||||
with self._file_open_lock:
|
with self._file_open_lock:
|
||||||
|
@ -22,7 +22,7 @@ It can convert every input format in the following list, to every output format.
|
|||||||
|
|
||||||
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
|
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
|
||||||
|
|
||||||
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ
|
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ
|
||||||
|
|
||||||
.. note ::
|
.. note ::
|
||||||
|
|
||||||
|
@ -50,6 +50,12 @@ PARALLEL_FUNCS = {
|
|||||||
|
|
||||||
'save_book' :
|
'save_book' :
|
||||||
('calibre.ebooks.metadata.worker', 'save_book', 'notification'),
|
('calibre.ebooks.metadata.worker', 'save_book', 'notification'),
|
||||||
|
|
||||||
|
'arbitrary' :
|
||||||
|
('calibre.utils.ipc.worker', 'arbitrary', None),
|
||||||
|
|
||||||
|
'arbitrary_n' :
|
||||||
|
('calibre.utils.ipc.worker', 'arbitrary', 'notification'),
|
||||||
}
|
}
|
||||||
|
|
||||||
class Progress(Thread):
|
class Progress(Thread):
|
||||||
@ -73,7 +79,55 @@ class Progress(Thread):
|
|||||||
except:
|
except:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
def arbitrary(module_name, func_name, args, kwargs={}):
|
||||||
|
'''
|
||||||
|
An entry point that allows arbitrary functions to be run in a parallel
|
||||||
|
process. useful for plugin developers that want to run jobs in a parallel
|
||||||
|
process.
|
||||||
|
|
||||||
|
To use this entry point, simply create a ParallelJob with the module and
|
||||||
|
function names for the real entry point.
|
||||||
|
|
||||||
|
Remember that args and kwargs must be serialized so only use basic types
|
||||||
|
for them.
|
||||||
|
|
||||||
|
To use this, you will do something like
|
||||||
|
|
||||||
|
from calibre.gui2 import Dispatcher
|
||||||
|
gui.job_manager.run_job(Dispatcher(job_done), 'arbitrary',
|
||||||
|
args=('calibre_plugins.myplugin.worker', 'do_work',
|
||||||
|
('arg1' 'arg2', 'arg3')),
|
||||||
|
description='Change the world')
|
||||||
|
|
||||||
|
The function job_done will be called on completion, see the code in
|
||||||
|
gui2.actions.catalog for an example of using run_job and Dispatcher.
|
||||||
|
|
||||||
|
:param module_name: The fully qualified name of the module that contains
|
||||||
|
the actual function to be run. For example:
|
||||||
|
calibre_plugins.myplugin.worker
|
||||||
|
:param func_name: The name of the function to be run.
|
||||||
|
:param name: A list (or tuple) of arguments that will be passed to the
|
||||||
|
function ``func_name``
|
||||||
|
:param kwargs: A dictionary of keyword arguments to pass to func_name
|
||||||
|
'''
|
||||||
|
module = importlib.import_module(module_name)
|
||||||
|
func = getattr(module, func_name)
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
def arbitrary_n(module_name, func_name, args, kwargs={},
|
||||||
|
notification=lambda x, y: y):
|
||||||
|
'''
|
||||||
|
Same as :func:`arbitrary` above, except that func_name must support a
|
||||||
|
keyword argument "notification". This will be a function that accepts two
|
||||||
|
arguments. func_name should call it periodically with progress information.
|
||||||
|
The first argument is a float between 0 and 1 that represent percent
|
||||||
|
completed and the second is a string with a message (it can be an empty
|
||||||
|
string).
|
||||||
|
'''
|
||||||
|
module = importlib.import_module(module_name)
|
||||||
|
func = getattr(module, func_name)
|
||||||
|
kwargs['notification'] = notification
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
def get_func(name):
|
def get_func(name):
|
||||||
module, func, notification = PARALLEL_FUNCS[name]
|
module, func, notification = PARALLEL_FUNCS[name]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user