Sync to trunk.

This commit is contained in:
John Schember 2011-05-15 17:55:02 -04:00
commit d69621b71d
16 changed files with 390 additions and 164 deletions

46
recipes/bild_de.recipe Normal file
View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Bild.de'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
# get cover from myspace
cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
# set what to fetch on the site
remove_tags_before = dict(name = 'h2', attrs={'id':'cover'})
remove_tags_after = dict(name ='div', attrs={'class':'back'})
# thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
# this one removes a lot of direct-link's
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
# remove the ad's
filter_regexps = [r'.\.smartadserver\.com']
def skip_ad_pages(self, soup):
return None
#get the real url behind .feedsportal.com and fetch the artikels
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
#list of the rss source from www.bild.de
feeds = [(u'Überblick', u'http://rss.bild.de/bild.xml'),
(u'News', u'http://rss.bild.de/bild-news.xml'),
(u'Politik', u'http://rss.bild.de/bild-politik.xml'),
(u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
(u'Sport', u'http://rss.bild.de/bild-sport.xml'),
(u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
(u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml')
]

View File

@ -37,7 +37,7 @@ class DN_se(BasicNewsRecipe):
,(u'Kultur' , u'http://www.dn.se/kultur-rss' ) ,(u'Kultur' , u'http://www.dn.se/kultur-rss' )
] ]
keep_only_tags = [dict(name='div', attrs={'id':'article'})] keep_only_tags = [dict(name='div', attrs={'id':'article-content'})]
remove_tags_before = dict(name='h1') remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div',attrs={'id':'byline'}) remove_tags_after = dict(name='div',attrs={'id':'byline'})
remove_tags = [ remove_tags = [

74
recipes/express_de.recipe Normal file
View File

@ -0,0 +1,74 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Express.de'
__author__ = 'schuster'
oldest_article = 2
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
language = 'de'
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
'''
remove_javascript = True
remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
remove_tags_after = [dict(name='div', attrs={'class':'MoreNews'})]
remove_tags = [dict(id='kalaydo'),
dict(id='Header'),
dict(id='Searchline'),
dict(id='MainNav'),
dict(id='Logo'),
dict(id='MainLinkSpacer'),
dict(id='MainLinks'),
dict(title='Diese Seite Bookmarken'),
dict(name='span'),
dict(name='div', attrs={'class':'spacer_leftneu'}),
dict(name='div', attrs={'class':'button kalaydologo'}),
dict(name='div', attrs={'class':'button stellenneu'}),
dict(name='div', attrs={'class':'button autoneu'}),
dict(name='div', attrs={'class':'button immobilienneu'}),
dict(name='div', attrs={'class':'button kleinanzeigen'}),
dict(name='div', attrs={'class':'button tiereneu'}),
dict(name='div', attrs={'class':'button ferienwohnungen'}),
dict(name='div', attrs={'class':'button inserierenneu'}),
dict(name='div', attrs={'class':'spacer_rightneu'}),
dict(name='div', attrs={'class':'spacer_rightcorner'}),
dict(name='div', attrs={'class':'HeaderMetaNav'}),
dict(name='div', attrs={'class':'HeaderSearchOption'}),
dict(name='div', attrs={'class':'HeaderSearch'}),
dict(name='div', attrs={'class':'sbutton'}),
dict(name='div', attrs={'class':'active'}),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
(u'Regional - Köln', u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
(u'Regional - Bonn', u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
(u'Regional - Düsseldorf', u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
(u'Regional - Region', u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
(u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
(u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
(u'1.FC Köln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
(u'Alemannia Aachen News', u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
(u'Borussia M~Gladbach', u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
(u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
]

View File

@ -1,51 +1,38 @@
__license__ = 'GPL v3' from calibre.web.feeds.recipes import BasicNewsRecipe
__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>' class AdvancedUserRecipe1303841067(BasicNewsRecipe):
'''
Profile to download FAZ.net
'''
from calibre.web.feeds.news import BasicNewsRecipe title = u'Faz.net'
__author__ = 'schuster'
class FazNet(BasicNewsRecipe): remove_tags = [dict(attrs={'class':['right', 'ArrowLinkRight', 'ModulVerlagsInfo', 'left', 'Head']}),
title = 'FAZ NET' dict(id=['BreadCrumbs', 'tstag', 'FazFooterPrint']),
__author__ = 'Kovid Goyal, Darko Miletic' dict(name=['script', 'noscript', 'style'])]
oldest_article = 2
description = 'Frankfurter Allgemeine Zeitung' description = 'Frankfurter Allgemeine Zeitung'
publisher = 'FAZ Electronic Media GmbH' max_articles_per_feed = 100
category = 'news, politics, Germany' no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = 'de' language = 'de'
max_articles_per_feed = 30
no_stylesheets = True
encoding = 'utf-8'
remove_javascript = True remove_javascript = True
cover_url = 'http://www.faz.net/f30/Images/Logos/logo.gif'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
remove_tags = [
dict(name=['object','link','embed','base'])
,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
]
feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ]
def print_version(self, url): def print_version(self, url):
article, sep, rest = url.partition('?') return url.replace('.html', '~Afor~Eprint.html')
return article.replace('.html', '~Afor~Eprint.html')
feeds = [(u'Politik', u'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'),
(u'Wirtschaft', u'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'),
(u'Feuilleton', u'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'),
(u'Sport', u'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'),
(u'Gesellschaft', u'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'),
(u'Finanzen', u'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'),
(u'Wissen', u'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'),
(u'Reise', u'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'),
(u'Technik & Motor', u'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'),
(u'Beruf & Chance', u'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml'),
(u'Kunstmarkt', u'http://www.faz.net/s/RubBC09F7BF72A2405A96718ECBFB68FBFE/Tpl~Epartner~SRss_.xml'),
(u'Immobilien ', u'http://www.faz.net/s/RubFED172A9E10F46B3A5F01B02098C0C8D/Tpl~Epartner~SRss_.xml'),
(u'Rhein-Main Zeitung', u'http://www.faz.net/s/RubABE881A6669742C2A5EBCB5D50D7EBEE/Tpl~Epartner~SRss_.xml'),
(u'Atomdebatte ', u'http://www.faz.net/s/Rub469C43057F8C437CACC2DE9ED41B7950/Tpl~Epartner~SRss_.xml')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
soup.head.insert(0,mtag)
del soup.body['onload']
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,50 +1,60 @@
#!/usr/bin/env python from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
from calibre.web.feeds.news import BasicNewsRecipe
class golem_ger(BasicNewsRecipe):
title = u'Golem.de' title = u'Golem.de'
language = 'de' __author__ = 'schuster'
__author__ = 'Kovid Goyal'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 10
language = 'de'
lang = 'de-DE'
no_stylesheets = True no_stylesheets = True
encoding = 'iso-8859-1' use_embedded_content = False
recursions = 1 language = 'de'
match_regexps = [r'http://www.golem.de/.*.html'] cover_url = 'http://www.e-energy.de/images/logo_golem.jpg'
masthead_url = 'http://www.golem.de/staticrl/images/logo.png'
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
keep_only_tags = [ '''
dict(name='h1', attrs={'class':'artikelhead'}), remove_javascript = True
dict(name='p', attrs={'class':'teaser'}), remove_tags_befor = [dict(name='header', attrs={'class':'cluster-header'})]
dict(name='div', attrs={'class':'artikeltext'}), remove_tags_after = [dict(name='p', attrs={'class':'meta'})]
dict(name='h2', attrs={'id':'artikelhead'}), remove_tags = [dict(rel='nofollow'),
dict(name='header', attrs={'id':'header'}),
dict(name='div', attrs={'class':'dh1'}),
dict(name='label', attrs={'class':'implied'}),
dict(name='section', attrs={'id':'comments'}),
dict(name='li', attrs={'class':'gg_prebackcounterItem'}),
dict(name='li', attrs={'class':'gg_prebackcounterItem gg_embeddedIndexCounter'}),
dict(name='img', attrs={'class':'gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer'}),
dict(name='div', attrs={'target':'_blank'})
] ]
def get_browser(self, *args, **kwargs):
from calibre import browser
kwargs['user_agent'] = 'mozilla'
return browser(*args, **kwargs)
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
remove_tags = [ def preprocess_html(self, soup):
dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}), for alink in soup.findAll('a'):
dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}), if alink.string is not None:
dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}), tstr = alink.string
dict(name='td', attrs={'class':['xsmall']}), alink.replaceWith(tstr)
] return soup
feeds = [(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
# remove_tags_after = [
# dict(name='div', attrs={'id':['contentad2']})
# ]
feeds = [
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'), (u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'), (u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'), (u'Handy', u'http://rss.golem.de/rss.php?tp=handy&feed=RSS2.0'),
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'), (u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS2.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'), (u'Mobile', u'http://rss.golem.de/rss.php?tp=mc&feed=RSS2.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'), (u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
(u'Security', u'http://rss.golem.de/rss.php?tp=sec&feed=RSS2.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'), (u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'), (u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'), (u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
@ -53,31 +63,8 @@ class golem_ger(BasicNewsRecipe):
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'), (u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'), (u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'), (u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'), (u'Wirtschaft', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0'),
(u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0') (u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0')
] ]
feeds = [
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
]
extra_css = '''
h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;}
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
.teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
.xsmall{font-style:italic;font-size:x-small;}
.td{font-style:italic;font-size:x-small;}
img {align:left;}
'''

22
recipes/max_planck.recipe Normal file
View File

@ -0,0 +1,22 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Max-Planck-Inst.'
__author__ = 'schuster'
remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
dict(id=['ie_clearing', 'col2', 'col2_content']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
def print_version(self, url):
split_url = url.split("/")
print_url = 'http://www.mpg.de/print/' + split_url[3]
return print_url
feeds = [(u'Forschung', u'http://www.mpg.de/de/forschung.rss')]

29
recipes/ngz.recipe Normal file
View File

@ -0,0 +1,29 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'NGZ-online'
__author__ = 'schuster'
remove_tags_before = dict(id='bu')
remove_tags_after = dict(id='noblock')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix', 'liketext']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'Verlinken', 'vorheriger', 'LESERKOMMENTARE', 'bei facebook', 'bei twitter', 'Schreiben Sie jetzt Ihre Meinung:', 'Thema', 'Ihr Beitrag', 'Ihr Name', 'Ich möchte über weitere Lesermeinungen zu diesem Artikel per E-Mail informiert werden.', 'banneroben', 'bannerrechts', 'inserieren', 'stellen', 'auto', 'immobilien', 'kleinanzeige', 'tiere', 'ferienwohnung', 'NGZ Card', 'Mediengruppe RP', 'Werben', 'Newsletter', 'Wetter', 'RSS', 'Abo', 'Anzeigen', 'Redaktion', 'Schulprojekte', 'Gast', 'Mein NGZ', 'Nachrichten', 'Sport', 'Wirtschaft', 'Stadt-Infos', 'Bilderserien', 'Bookmarken', 'del.icio.us', 'Mister Wong', 'YiGG', 'Webnews', 'Shortnews', 'Twitter', 'Newsider', 'Facebook', 'StudiVZ/MeinVZ', 'Versenden', 'Drucken']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.rhein-kreis-neuss-macht-sport.de/sport/includes/bilder/ngz_logo.jpg'
def print_version(self, url):
return url + '?ot=de.circit.rpo.PopupPageLayout.ot'
feeds = [
(u'Grevenbroich', u'http://www.ngz-online.de/app/feed/rss/grevenbroich'),
(u'Kreis Neuss', u'http://www.ngz-online.de/app/feed/rss/rheinkreisneuss'),
(u'Dormagen', u'http://www.ngz-online.de/app/feed/rss/dormagen'),
(u'J\xfcchen', u'http://www.ngz-online.de/app/feed/rss/juechen'),
(u'Rommerskirchen', u'http://www.ngz-online.de/app/feed/rss/rommerskirchen')
]

22
recipes/pro_physik.recipe Normal file
View File

@ -0,0 +1,22 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Pro Physik'
__author__ = 'schuster'
oldest_article = 4
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.pro-physik.de/Phy/images/site/prophysik_logo1.jpg'
def print_version(self, url):
return url.replace('leadArticle.do', 'print.do')
feeds = [(u'Hightech', u'http://www.pro-physik.de/Phy/hightechfeed.xml'),
(u'Forschung', u'http://www.pro-physik.de/Phy/forschungfeed.xml'),
(u'Magazin', u'http://www.pro-physik.de/Phy/magazinfeed.xml')]

28
recipes/spektrum.recipe Normal file
View File

@ -0,0 +1,28 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Spektrum (der Wissenschaft)'
__author__ = 'schuster'
oldest_article = 7
max_articles_per_feed = 100
language = 'de'
cover_url = 'http://upload.wikimedia.org/wikipedia/de/3/3b/Spektrum_der_Wissenschaft_Logo.svg'
remove_tags = [dict(attrs={'class':['hauptnaviPkt gainlayout', 'hauptnaviButton', 'suchButton', 'suchbegriffKasten', 'loginButton', 'subnavigation', 'artikelInfoLeiste gainlayout', 'artikelTools', 'nurLetzteSeite', 'link', 'boxUnterArtikel', 'leserbriefeBlock', 'boxTitel', 'boxInhalt', 'sehrklein', 'boxabstand', 'werbeboxinhalt', 'rbabstand', 'bildlinks', 'rechtebox', 'denkmalbox', 'denkmalfrage']}),
dict(id=['pflip', 'verlagsleiste', 'bereich', 'bannerVertikal', 'headerLogoLink', 'kopf', 'topNavi', 'headerSchnellsuche', 'headerSchnellsucheWarten', 'navigation', 'navigationL', 'navigationR', 'inhalt', 'rechtespalte', 'sdwboxenshop', 'shopboxen', 'fuss']),
dict(name=['naservice'])]
def print_version(self,url):
newurl = url.replace('artikel/', 'sixcms/detail.php?id=')
return newurl + '&_druckversion=1'
feeds = [(u'Spektrum der Wissenschaft', u'http://www.spektrum.de/artikel/982623'),
(u'SpektrumDirekt', u'http://www.spektrumdirekt.de/artikel/996406'),
(u'Sterne und Weltraum', u'http://www.astronomie-heute.de/artikel/865248'),
(u'Gehirn & Geist', u'http://www.gehirn-und-geist.de/artikel/982626'),
(u'epoc', u'http://www.epoc.de/artikel/982625')
]
filter_regexps = [r'ads\.doubleclick\.net']

View File

@ -0,0 +1,24 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Technology Review'
__author__ = 'schuster'
remove_tags_before = dict(id='keywords')
remove_tags_after = dict(id='kommentar')
remove_tags = [dict(attrs={'class':['navi_oben_pvg', 'navi_oben_tarifr', 'navi_oben_itm', 'navi_oben_eve', 'navi_oben_whi', 'navi_oben_abo', 'navi_oben_shop', 'navi_top_logo', 'navi_top_abschnitt', 'first']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 4
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
def print_version(self, url):
return url + '?view=print'
feeds = [
(u'Technik News', u'http://www.heise.de/tr/news-atom.xml') ]

View File

@ -32,7 +32,6 @@ class Win32(VMInstaller):
FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice' FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
INSTALLER_EXT = 'msi' INSTALLER_EXT = 'msi'
SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0'] SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
BUILD_BUILD = ['python setup.py kakasi',] + VMInstaller.BUILD_BUILD
def download_installer(self): def download_installer(self):
installer = self.installer() installer = self.installer()

View File

@ -6,10 +6,10 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob import os, cPickle, re, shutil, marshal, zipfile, glob
from zlib import compress from zlib import compress
from setup import Command, basenames, __appname__, iswindows from setup import Command, basenames, __appname__
def get_opts_from_parser(parser): def get_opts_from_parser(parser):
def do_opt(opt): def do_opt(opt):
@ -34,12 +34,12 @@ class Kakasi(Command):
self.records = {} self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8') src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization', dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db') 'pykakasi','kanwadict2.pickle')
base = os.path.dirname(dest) base = os.path.dirname(dest)
if not os.path.exists(base): if not os.path.exists(base):
os.makedirs(base) os.makedirs(base)
if self.newer(dest, src) or iswindows: if self.newer(dest, src):
self.info('\tGenerating Kanwadict') self.info('\tGenerating Kanwadict')
for line in open(src, "r"): for line in open(src, "r"):
@ -50,7 +50,7 @@ class Kakasi(Command):
dest = self.j(self.RESOURCES, 'localization', dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle') 'pykakasi','itaijidict2.pickle')
if self.newer(dest, src) or iswindows: if self.newer(dest, src):
self.info('\tGenerating Itaijidict') self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest) self.mkitaiji(src, dest)
@ -58,7 +58,7 @@ class Kakasi(Command):
dest = self.j(self.RESOURCES, 'localization', dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle') 'pykakasi','kanadict2.pickle')
if self.newer(dest, src) or iswindows: if self.newer(dest, src):
self.info('\tGenerating kanadict') self.info('\tGenerating kanadict')
self.mkkanadict(src, dest) self.mkkanadict(src, dest)
@ -75,7 +75,7 @@ class Kakasi(Command):
continue continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line) pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1] dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
def mkkanadict(self, src, dst): def mkkanadict(self, src, dst):
dic = {} dic = {}
@ -87,7 +87,7 @@ class Kakasi(Command):
continue continue
(alpha, kana) = line.split(' ') (alpha, kana) = line.split(' ')
dic[kana] = alpha dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
def parsekdict(self, line): def parsekdict(self, line):
line = line.decode("utf-8").strip() line = line.decode("utf-8").strip()
@ -115,16 +115,11 @@ class Kakasi(Command):
self.records[key][kanji]=[(yomi, tail)] self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out): def kanwaout(self, out):
try: with open(out, 'wb') as f:
# Needed as otherwise anydbm tries to create a gdbm db when the db dic = {}
# created on Unix is found for k, v in self.records.iteritems():
os.remove(out)
except:
pass
dic = anydbm.open(out, 'n')
for (k, v) in self.records.iteritems():
dic[k] = compress(marshal.dumps(v)) dic[k] = compress(marshal.dumps(v))
dic.close() cPickle.dump(dic, f, -1)
def clean(self): def clean(self):
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi') kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')

View File

@ -690,6 +690,14 @@ class MobiReader(object):
lm = unit_convert('2em', 12, 500, 166) lm = unit_convert('2em', 12, 500, 166)
lm = self.left_margins.get(tag, lm) lm = self.left_margins.get(tag, lm)
ti = self.text_indents.get(tag, ti) ti = self.text_indents.get(tag, ti)
try:
lm = float(lm)
except:
lm = 0.0
try:
ti = float(ti)
except:
ti = 0.0
return lm + ti return lm + ti
parent = tag parent = tag

View File

@ -2,12 +2,8 @@
# jisyo.py # jisyo.py
# #
# Copyright 2011 Hiroshi Miura <miurahr@linux.com> # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
from cPickle import load import cPickle, marshal
import anydbm,marshal
from zlib import decompress from zlib import decompress
import os
import calibre.utils.resources as resources
class jisyo (object): class jisyo (object):
kanwadict = None kanwadict = None
@ -25,16 +21,14 @@ class jisyo (object):
def __init__(self): def __init__(self):
if self.kanwadict is None: if self.kanwadict is None:
dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db')) self.kanwadict = cPickle.loads(
self.kanwadict = anydbm.open(dictpath,'r') P('localization/pykakasi/kanwadict2.pickle', data=True))
if self.itaijidict is None: if self.itaijidict is None:
itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle')) self.itaijidict = cPickle.loads(
itaiji_pkl = open(itaijipath, 'rb') P('localization/pykakasi/itaijidict2.pickle', data=True))
self.itaijidict = load(itaiji_pkl)
if self.kanadict is None: if self.kanadict is None:
kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle')) self.kanadict = cPickle.loads(
kanadict_pkl = open(kanadictpath, 'rb') P('localization/pykakasi/kanadict2.pickle', data=True))
self.kanadict = load(kanadict_pkl)
def load_jisyo(self, char): def load_jisyo(self, char):
try:#python2 try:#python2

View File

@ -19,8 +19,9 @@ class PreferencesAction(InterfaceAction):
def genesis(self): def genesis(self):
pm = QMenu() pm = QMenu()
acname = _('Change calibre behavior') if isosx else _('Preferences') pm.addAction(QIcon(I('config.png')), _('Preferences'), self.do_config)
pm.addAction(QIcon(I('config.png')), acname, self.do_config) if isosx:
pm.addAction(QIcon(I('config.png')), _('Change calibre behavior'), self.do_config)
pm.addAction(QIcon(I('wizard.png')), _('Run welcome wizard'), pm.addAction(QIcon(I('wizard.png')), _('Run welcome wizard'),
self.gui.run_wizard) self.gui.run_wizard)
if not DEBUG: if not DEBUG:

View File

@ -10,7 +10,6 @@ License: http://www.opensource.org/licenses/mit-license.php
import re import re
from calibre.utils.icu import capitalize from calibre.utils.icu import capitalize
from calibre.utils.config import prefs
__all__ = ['titlecase'] __all__ = ['titlecase']
__version__ = '0.5' __version__ = '0.5'
@ -31,6 +30,17 @@ ALL_CAPS = re.compile(r'^[A-Z\s%s]+$' % PUNCT)
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$") UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
MAC_MC = re.compile(r"^([Mm]a?c)(.+)") MAC_MC = re.compile(r"^([Mm]a?c)(.+)")
_lang = None
def lang():
global _lang
if _lang is None:
from calibre.utils.localization import get_lang
_lang = get_lang().lower()
return _lang
def titlecase(text): def titlecase(text):
""" """
@ -68,7 +78,7 @@ def titlecase(text):
line.append(icu_lower(word)) line.append(icu_lower(word))
continue continue
if prefs['language'].lower().startswith('en'): if lang().startswith('en'):
match = MAC_MC.match(word) match = MAC_MC.match(word)
if match and not match.group(2)[:3] in ('hin', 'ht'): if match and not match.group(2)[:3] in ('hin', 'ht'):
line.append("%s%s" % (capitalize(match.group(1)), line.append("%s%s" % (capitalize(match.group(1)),