Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2011-05-15 17:55:02 -04:00 · 2011-05-15 17:55:02 -04:00 · d69621b71d
commit d69621b71d
parent 3aec78057a 010b2e8081
16 changed files with 390 additions and 164 deletions
--- a/recipes/bild_de.recipe
+++ b/recipes/bild_de.recipe
@ -0,0 +1,46 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title          = u'Bild.de'
    __author__  = 'schuster'
    oldest_article = 1
    max_articles_per_feed = 50
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    remove_javascript      = True
 # get cover from myspace
    cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
 # set what to fetch on the site
    remove_tags_before =  dict(name = 'h2', attrs={'id':'cover'})
    remove_tags_after = dict(name ='div', attrs={'class':'back'})
 #  thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
 # this one removes a lot of direct-link's
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
 # remove the ad's
    filter_regexps = [r'.\.smartadserver\.com']
    def skip_ad_pages(self, soup):
        return None
 #get the real url behind  .feedsportal.com and fetch the artikels
    def get_article_url(self, article):
        return article.get('id', article.get('guid', None))
 #list of the rss source from www.bild.de
    feeds          = [(u'Überblick', u'http://rss.bild.de/bild.xml'),
                          (u'News', u'http://rss.bild.de/bild-news.xml'),
                          (u'Politik', u'http://rss.bild.de/bild-politik.xml'),
                          (u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
                          (u'Sport', u'http://rss.bild.de/bild-sport.xml'),
                          (u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
                          (u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml')
 ]
--- a/recipes/dn_se.recipe
+++ b/recipes/dn_se.recipe
@ -37,7 +37,7 @@ class DN_se(BasicNewsRecipe):
              ,(u'Kultur' , u'http://www.dn.se/kultur-rss'       )
            ]
-    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+    keep_only_tags = [dict(name='div', attrs={'id':'article-content'})]
    remove_tags_before = dict(name='h1')
    remove_tags_after = dict(name='div',attrs={'id':'byline'})
    remove_tags = [
--- a/recipes/express_de.recipe
+++ b/recipes/express_de.recipe
@ -0,0 +1,74 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title          = u'Express.de'
    __author__  = 'schuster'
    oldest_article = 2
    max_articles_per_feed = 50
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    extra_css = '''
                    h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
                    h1{ font-family:Arial,Helvetica,sans-serif;  font-size:x-large; font-weight:bold;}
                '''
    remove_javascript      = True
    remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
    remove_tags_after = [dict(name='div', attrs={'class':'MoreNews'})]
    remove_tags = [dict(id='kalaydo'),
           dict(id='Header'),
           dict(id='Searchline'),
           dict(id='MainNav'),
           dict(id='Logo'),
           dict(id='MainLinkSpacer'),
           dict(id='MainLinks'),
           dict(title='Diese Seite Bookmarken'),
                                           dict(name='span'),
           dict(name='div', attrs={'class':'spacer_leftneu'}),
           dict(name='div', attrs={'class':'button kalaydologo'}),
           dict(name='div', attrs={'class':'button stellenneu'}),
           dict(name='div', attrs={'class':'button autoneu'}),
           dict(name='div', attrs={'class':'button immobilienneu'}),
           dict(name='div', attrs={'class':'button kleinanzeigen'}),
           dict(name='div', attrs={'class':'button tiereneu'}),
           dict(name='div', attrs={'class':'button ferienwohnungen'}),
           dict(name='div', attrs={'class':'button inserierenneu'}),
           dict(name='div', attrs={'class':'spacer_rightneu'}),
           dict(name='div', attrs={'class':'spacer_rightcorner'}),
           dict(name='div', attrs={'class':'HeaderMetaNav'}),
           dict(name='div', attrs={'class':'HeaderSearchOption'}),
           dict(name='div', attrs={'class':'HeaderSearch'}),
           dict(name='div', attrs={'class':'sbutton'}),
           dict(name='div', attrs={'class':'active'}),
 ]
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
    feeds          = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
                          (u'Regional - Köln', u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
                          (u'Regional - Bonn', u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
                          (u'Regional - Düsseldorf', u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
                          (u'Regional - Region', u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
                          (u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
                          (u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
                          (u'1.FC Köln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
                          (u'Alemannia Aachen News', u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
                          (u'Borussia M~Gladbach', u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
                          (u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
                          (u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
                          (u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
 ]
--- a/recipes/faznet.recipe
+++ b/recipes/faznet.recipe
@ -1,51 +1,38 @@
-__license__   = 'GPL v3'
+from calibre.web.feeds.recipes import BasicNewsRecipe
-__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
+class AdvancedUserRecipe1303841067(BasicNewsRecipe):
 '''
 Profile to download FAZ.net
 '''
-from calibre.web.feeds.news import BasicNewsRecipe
+    title          = u'Faz.net'
- 
+    __author__  = 'schuster'
-class FazNet(BasicNewsRecipe): 
+    remove_tags = [dict(attrs={'class':['right', 'ArrowLinkRight', 'ModulVerlagsInfo', 'left', 'Head']}),
-    title                 = 'FAZ NET'
+                dict(id=['BreadCrumbs', 'tstag', 'FazFooterPrint']),
-    __author__            = 'Kovid Goyal, Darko Miletic'
+                dict(name=['script', 'noscript', 'style'])]
    oldest_article = 2
    description           = 'Frankfurter Allgemeine Zeitung'
-    publisher             = 'FAZ Electronic Media GmbH'
+    max_articles_per_feed = 100
-    category              = 'news, politics, Germany'
+    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    max_articles_per_feed = 30 
    no_stylesheets        = True
    encoding              = 'utf-8'
    remove_javascript      = True
-
+    cover_url = 'http://www.faz.net/f30/Images/Logos/logo.gif'
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
    remove_tags = [
                     dict(name=['object','link','embed','base'])
                    ,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
                  ]
    feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ] 
    def print_version(self, url):
-        article, sep, rest = url.partition('?')    
+        return url.replace('.html', '~Afor~Eprint.html')
-        return article.replace('.html', '~Afor~Eprint.html') 
+
    feeds          = [(u'Politik', u'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'),
          (u'Wirtschaft', u'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'),
          (u'Feuilleton', u'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'),
          (u'Sport', u'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'),
          (u'Gesellschaft', u'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'),
          (u'Finanzen', u'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'),
          (u'Wissen', u'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'),
          (u'Reise', u'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'),
          (u'Technik & Motor', u'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'),
          (u'Beruf & Chance', u'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml'),
          (u'Kunstmarkt', u'http://www.faz.net/s/RubBC09F7BF72A2405A96718ECBFB68FBFE/Tpl~Epartner~SRss_.xml'),
          (u'Immobilien ', u'http://www.faz.net/s/RubFED172A9E10F46B3A5F01B02098C0C8D/Tpl~Epartner~SRss_.xml'),
          (u'Rhein-Main Zeitung', u'http://www.faz.net/s/RubABE881A6669742C2A5EBCB5D50D7EBEE/Tpl~Epartner~SRss_.xml'),
          (u'Atomdebatte ', u'http://www.faz.net/s/Rub469C43057F8C437CACC2DE9ED41B7950/Tpl~Epartner~SRss_.xml')
    ]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
        soup.head.insert(0,mtag)        
        del soup.body['onload']
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/golem_de.recipe
+++ b/recipes/golem_de.recipe
@ -1,50 +1,60 @@
-#!/usr/bin/env  python
+from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
 from calibre.web.feeds.news import BasicNewsRecipe
 class golem_ger(BasicNewsRecipe):
    title          = u'Golem.de'
-    language = 'de'
+    __author__  = 'schuster'
-    __author__ = 'Kovid Goyal'
+
    oldest_article = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed = 10
    language              = 'de'
    lang                  = 'de-DE'
    no_stylesheets         = True
-    encoding              = 'iso-8859-1'
+    use_embedded_content   = False
-    recursions = 1
+    language               = 'de'
-    match_regexps = [r'http://www.golem.de/.*.html']
+    cover_url = 'http://www.e-energy.de/images/logo_golem.jpg'
    masthead_url = 'http://www.golem.de/staticrl/images/logo.png'
    extra_css = '''
                    h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
                    h1{ font-family:Arial,Helvetica,sans-serif;  font-size:x-large; font-weight:bold;}
-    keep_only_tags     = [
+                '''
-                               dict(name='h1', attrs={'class':'artikelhead'}),
+    remove_javascript      = True
-                               dict(name='p', attrs={'class':'teaser'}),
+    remove_tags_befor = [dict(name='header', attrs={'class':'cluster-header'})]
-                               dict(name='div', attrs={'class':'artikeltext'}),
+    remove_tags_after = [dict(name='p', attrs={'class':'meta'})]
-                               dict(name='h2', attrs={'id':'artikelhead'}),
+    remove_tags = [dict(rel='nofollow'),
           dict(name='header', attrs={'id':'header'}),
           dict(name='div', attrs={'class':'dh1'}),
           dict(name='label', attrs={'class':'implied'}),
           dict(name='section', attrs={'id':'comments'}),
           dict(name='li', attrs={'class':'gg_prebackcounterItem'}),
           dict(name='li', attrs={'class':'gg_prebackcounterItem gg_embeddedIndexCounter'}),
           dict(name='img', attrs={'class':'gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer'}),
           dict(name='div', attrs={'target':'_blank'})
 ]
    def get_browser(self, *args, **kwargs):
       from calibre import browser
       kwargs['user_agent'] = 'mozilla'
       return browser(*args, **kwargs)
    def get_article_url(self, article):
        return article.get('id', article.get('guid', None))
-    remove_tags = [
+    def preprocess_html(self, soup):
-                    dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}),
+        for alink in soup.findAll('a'):
-                    dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}),
+            if alink.string is not None:
-                    dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}),
+               tstr = alink.string
-                    dict(name='td', attrs={'class':['xsmall']}),
+               alink.replaceWith(tstr)
-                    ]
+        return soup
-
+    feeds          = [(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
    # remove_tags_after  = [
      #                      dict(name='div', attrs={'id':['contentad2']})
       #                 ]
    feeds          = [
                      (u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
                      (u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
                          (u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
                          (u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
-                      (u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
+                          (u'Handy', u'http://rss.golem.de/rss.php?tp=handy&feed=RSS2.0'),
-                      (u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'),
+                          (u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS2.0'),
-                      (u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
+                          (u'Mobile', u'http://rss.golem.de/rss.php?tp=mc&feed=RSS2.0'),
-                      (u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'),
+                          (u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
                          (u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
                          (u'Security', u'http://rss.golem.de/rss.php?tp=sec&feed=RSS2.0'),
                          (u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
                          (u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
                          (u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
@ -53,31 +63,8 @@ class golem_ger(BasicNewsRecipe):
                          (u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
                          (u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
                          (u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
-                      (u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'),
+                          (u'Wirtschaft', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0'),
-                      (u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0')
+                          (u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0')
 ]
    feeds          = [
                      (u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
                      (u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
                      (u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
                      (u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
                      (u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
                      (u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
                      ]
    extra_css = '''
                h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;}
                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
                .teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
                .xsmall{font-style:italic;font-size:x-small;}
                .td{font-style:italic;font-size:x-small;}
                img {align:left;}
                '''
--- a/recipes/max_planck.recipe
+++ b/recipes/max_planck.recipe
@ -0,0 +1,22 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title          = u'Max-Planck-Inst.'
    __author__  = 'schuster'
    remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
                dict(id=['ie_clearing', 'col2', 'col2_content']),
                dict(name=['script', 'noscript', 'style'])]
    oldest_article = 30
    max_articles_per_feed = 100
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    remove_javascript      = True
    def print_version(self, url):
        split_url = url.split("/")
        print_url = 'http://www.mpg.de/print/' +  split_url[3]
        return print_url
    feeds          = [(u'Forschung', u'http://www.mpg.de/de/forschung.rss')]
--- a/recipes/ngz.recipe
+++ b/recipes/ngz.recipe
@ -0,0 +1,29 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title          = u'NGZ-online'
    __author__  = 'schuster'
    remove_tags_before = dict(id='bu')
    remove_tags_after  = dict(id='noblock')
    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix', 'liketext']}),
                dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'Verlinken', 'vorheriger', 'LESERKOMMENTARE', 'bei facebook', 'bei twitter', 'Schreiben Sie jetzt Ihre Meinung:', 'Thema', 'Ihr Beitrag', 'Ihr Name', 'Ich möchte über weitere Lesermeinungen zu diesem Artikel per E-Mail informiert werden.', 'banneroben', 'bannerrechts', 'inserieren', 'stellen', 'auto', 'immobilien', 'kleinanzeige', 'tiere', 'ferienwohnung', 'NGZ Card', 'Mediengruppe RP', 'Werben', 'Newsletter', 'Wetter', 'RSS', 'Abo', 'Anzeigen', 'Redaktion', 'Schulprojekte', 'Gast', 'Mein NGZ', 'Nachrichten', 'Sport', 'Wirtschaft', 'Stadt-Infos', 'Bilderserien', 'Bookmarken', 'del.icio.us', 'Mister Wong', 'YiGG', 'Webnews', 'Shortnews', 'Twitter', 'Newsider', 'Facebook', 'StudiVZ/MeinVZ', 'Versenden', 'Drucken']),
                dict(name=['script', 'noscript', 'style'])]
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    remove_javascript      = True
    cover_url = 'http://www.rhein-kreis-neuss-macht-sport.de/sport/includes/bilder/ngz_logo.jpg'
    def print_version(self, url):
          return url + '?ot=de.circit.rpo.PopupPageLayout.ot'
    feeds          = [
 (u'Grevenbroich', u'http://www.ngz-online.de/app/feed/rss/grevenbroich'),
 (u'Kreis Neuss', u'http://www.ngz-online.de/app/feed/rss/rheinkreisneuss'),
 (u'Dormagen', u'http://www.ngz-online.de/app/feed/rss/dormagen'),
 (u'J\xfcchen', u'http://www.ngz-online.de/app/feed/rss/juechen'),
 (u'Rommerskirchen', u'http://www.ngz-online.de/app/feed/rss/rommerskirchen')
 ]
--- a/recipes/pro_physik.recipe
+++ b/recipes/pro_physik.recipe
@ -0,0 +1,22 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title          = u'Pro Physik'
    __author__  = 'schuster'
    oldest_article = 4
    max_articles_per_feed = 100
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    remove_javascript      = True
    cover_url = 'http://www.pro-physik.de/Phy/images/site/prophysik_logo1.jpg'
    def print_version(self, url):
        return url.replace('leadArticle.do', 'print.do')
    feeds          = [(u'Hightech', u'http://www.pro-physik.de/Phy/hightechfeed.xml'),
                          (u'Forschung', u'http://www.pro-physik.de/Phy/forschungfeed.xml'),
                          (u'Magazin', u'http://www.pro-physik.de/Phy/magazinfeed.xml')]
--- a/recipes/spektrum.recipe
+++ b/recipes/spektrum.recipe
@ -0,0 +1,28 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title          = u'Spektrum (der Wissenschaft)'
    __author__  = 'schuster'
    oldest_article = 7
    max_articles_per_feed = 100
    language               = 'de'
    cover_url = 'http://upload.wikimedia.org/wikipedia/de/3/3b/Spektrum_der_Wissenschaft_Logo.svg'
    remove_tags = [dict(attrs={'class':['hauptnaviPkt gainlayout', 'hauptnaviButton', 'suchButton', 'suchbegriffKasten', 'loginButton', 'subnavigation', 'artikelInfoLeiste gainlayout', 'artikelTools', 'nurLetzteSeite', 'link', 'boxUnterArtikel', 'leserbriefeBlock', 'boxTitel', 'boxInhalt', 'sehrklein', 'boxabstand', 'werbeboxinhalt', 'rbabstand', 'bildlinks', 'rechtebox', 'denkmalbox', 'denkmalfrage']}),
 	dict(id=['pflip', 'verlagsleiste', 'bereich', 'bannerVertikal', 'headerLogoLink', 'kopf', 'topNavi', 'headerSchnellsuche', 'headerSchnellsucheWarten', 'navigation', 'navigationL', 'navigationR', 'inhalt', 'rechtespalte', 'sdwboxenshop', 'shopboxen', 'fuss']),
 	dict(name=['naservice'])]
    def print_version(self,url):
        newurl = url.replace('artikel/', 'sixcms/detail.php?id=')
        return newurl + '&_druckversion=1'
    feeds          = [(u'Spektrum der Wissenschaft', u'http://www.spektrum.de/artikel/982623'),
                          (u'SpektrumDirekt', u'http://www.spektrumdirekt.de/artikel/996406'),
                          (u'Sterne und Weltraum', u'http://www.astronomie-heute.de/artikel/865248'),
                          (u'Gehirn & Geist', u'http://www.gehirn-und-geist.de/artikel/982626'),
                          (u'epoc', u'http://www.epoc.de/artikel/982625')
 ]
    filter_regexps = [r'ads\.doubleclick\.net']
--- a/recipes/technology_review_de.recipe
+++ b/recipes/technology_review_de.recipe
@ -0,0 +1,24 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title          = u'Technology Review'
    __author__  = 'schuster'
    remove_tags_before = dict(id='keywords')
    remove_tags_after  = dict(id='kommentar')
    remove_tags = [dict(attrs={'class':['navi_oben_pvg', 'navi_oben_tarifr', 'navi_oben_itm', 'navi_oben_eve', 'navi_oben_whi', 'navi_oben_abo', 'navi_oben_shop', 'navi_top_logo', 'navi_top_abschnitt', 'first']}),
               dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
               dict(name=['script', 'noscript', 'style'])]
    oldest_article = 4
    max_articles_per_feed = 100
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    remove_javascript      = True
    def print_version(self, url):
        return url  + '?view=print'
    feeds          = [
    (u'Technik News', u'http://www.heise.de/tr/news-atom.xml') ]
--- a/setup/installer/windows/init.py
+++ b/setup/installer/windows/init.py
@ -32,7 +32,6 @@ class Win32(VMInstaller):
    FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
    INSTALLER_EXT = 'msi'
    SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
    BUILD_BUILD  =  ['python setup.py kakasi',] + VMInstaller.BUILD_BUILD
    def download_installer(self):
        installer = self.installer()
--- a/setup/resources.py
+++ b/setup/resources.py
@ -6,10 +6,10 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob
+import os, cPickle, re, shutil, marshal, zipfile, glob
 from zlib import compress
-from setup import Command, basenames, __appname__, iswindows
+from setup import Command, basenames, __appname__
 def get_opts_from_parser(parser):
    def do_opt(opt):
@ -34,12 +34,12 @@ class Kakasi(Command):
        self.records = {}
        src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
-                'pykakasi','kanwadict2.db')
+                'pykakasi','kanwadict2.pickle')
        base = os.path.dirname(dest)
        if not os.path.exists(base):
            os.makedirs(base)
-        if self.newer(dest, src) or iswindows:
+        if self.newer(dest, src):
            self.info('\tGenerating Kanwadict')
            for line in open(src, "r"):
@ -50,7 +50,7 @@ class Kakasi(Command):
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','itaijidict2.pickle')
-        if self.newer(dest, src) or iswindows:
+        if self.newer(dest, src):
            self.info('\tGenerating Itaijidict')
            self.mkitaiji(src, dest)
@ -58,7 +58,7 @@ class Kakasi(Command):
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','kanadict2.pickle')
-        if self.newer(dest, src) or iswindows:
+        if self.newer(dest, src):
            self.info('\tGenerating kanadict')
            self.mkkanadict(src, dest)
@ -75,7 +75,7 @@ class Kakasi(Command):
                continue
            pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
            dic[pair[0]] = pair[1]
-        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
+        cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
    def mkkanadict(self, src, dst):
        dic = {}
@ -87,7 +87,7 @@ class Kakasi(Command):
                continue
            (alpha, kana) = line.split(' ')
            dic[kana] = alpha
-        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
+        cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
    def parsekdict(self, line):
        line = line.decode("utf-8").strip()
@ -115,16 +115,11 @@ class Kakasi(Command):
            self.records[key][kanji]=[(yomi, tail)]
    def kanwaout(self, out):
-        try:
+        with open(out, 'wb') as f:
-            # Needed as otherwise anydbm tries to create a gdbm db when the db
+            dic = {}
-            # created on Unix is found
+            for k, v in self.records.iteritems():
            os.remove(out)
        except:
            pass
        dic = anydbm.open(out, 'n')
        for (k, v) in self.records.iteritems():
                dic[k] = compress(marshal.dumps(v))
-        dic.close()
+            cPickle.dump(dic, f, -1)
    def clean(self):
        kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -690,6 +690,14 @@ class MobiReader(object):
                lm = unit_convert('2em', 12, 500, 166)
            lm = self.left_margins.get(tag, lm)
            ti = self.text_indents.get(tag, ti)
            try:
                lm = float(lm)
            except:
                lm = 0.0
            try:
                ti = float(ti)
            except:
                ti = 0.0
            return lm + ti
        parent = tag
--- a/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
@ -2,12 +2,8 @@
 #  jisyo.py
 #
 # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
-from cPickle import load
+import cPickle, marshal
 import anydbm,marshal
 from zlib import decompress
 import os
 import calibre.utils.resources as resources
 class jisyo (object):
    kanwadict = None
@ -25,16 +21,14 @@ class jisyo (object):
    def __init__(self):
        if self.kanwadict is None:
-            dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db'))
+            self.kanwadict = cPickle.loads(
-            self.kanwadict = anydbm.open(dictpath,'r')
+                P('localization/pykakasi/kanwadict2.pickle', data=True))
        if self.itaijidict is None:
-            itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle'))
+            self.itaijidict = cPickle.loads(
-            itaiji_pkl = open(itaijipath, 'rb')
+                P('localization/pykakasi/itaijidict2.pickle', data=True))
            self.itaijidict = load(itaiji_pkl)
        if self.kanadict is None:
-            kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle'))
+            self.kanadict = cPickle.loads(
-            kanadict_pkl = open(kanadictpath, 'rb')
+                P('localization/pykakasi/kanadict2.pickle', data=True))
            self.kanadict = load(kanadict_pkl)
    def load_jisyo(self, char):
        try:#python2
--- a/src/calibre/gui2/actions/preferences.py
+++ b/src/calibre/gui2/actions/preferences.py
@ -19,8 +19,9 @@ class PreferencesAction(InterfaceAction):
    def genesis(self):
        pm = QMenu()
-        acname = _('Change calibre behavior') if isosx else _('Preferences')
+        pm.addAction(QIcon(I('config.png')), _('Preferences'), self.do_config)
-        pm.addAction(QIcon(I('config.png')), acname, self.do_config)
+        if isosx:
            pm.addAction(QIcon(I('config.png')), _('Change calibre behavior'), self.do_config)
        pm.addAction(QIcon(I('wizard.png')), _('Run welcome wizard'),
                self.gui.run_wizard)
        if not DEBUG:
--- a/src/calibre/utils/titlecase.py
+++ b/src/calibre/utils/titlecase.py
@ -10,7 +10,6 @@ License: http://www.opensource.org/licenses/mit-license.php
 import re
 from calibre.utils.icu import capitalize
 from calibre.utils.config import prefs
 __all__ = ['titlecase']
 __version__ = '0.5'
@ -31,6 +30,17 @@ ALL_CAPS = re.compile(r'^[A-Z\s%s]+$' % PUNCT)
 UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
 MAC_MC = re.compile(r"^([Mm]a?c)(.+)")
 _lang = None
 def lang():
    global _lang
    if _lang is None:
        from calibre.utils.localization import get_lang
        _lang = get_lang().lower()
    return _lang
 def titlecase(text):
    """
@ -68,7 +78,7 @@ def titlecase(text):
            line.append(icu_lower(word))
            continue
-        if prefs['language'].lower().startswith('en'):
+        if lang().startswith('en'):
            match = MAC_MC.match(word)
            if match and not match.group(2)[:3] in ('hin', 'ht'):
                line.append("%s%s" % (capitalize(match.group(1)),