Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2011-08-24 18:10:41 -04:00 · 2011-08-24 18:10:41 -04:00 · 140b1181d1
commit 140b1181d1
parent 02310d898b 8824104847
63 changed files with 2203 additions and 343 deletions
--- a/recipes/android_com_pl.recipe
+++ b/recipes/android_com_pl.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Android_com_pl(BasicNewsRecipe):
+    title          = u'Android.com.pl'
+    __author__        = 'fenuks'
+    description   = 'Android.com.pl - biggest polish Android site'
+    category       = 'Android, mobile'
+    language       = 'pl'
+    cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    feeds          = [(u'Android', u'http://android.com.pl/component/content/frontpage/frontpage.feed?type=rss')]
--- a/recipes/bash_org_pl.recipe
+++ b/recipes/bash_org_pl.recipe
@ -0,0 +1,15 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Bash_org_pl(BasicNewsRecipe):
+    title          = u'Bash.org.pl'
+    __author__        = 'fenuks'
+    description   = 'Bash.org.pl - funny quotations from IRC discussions'
+    category       = 'funny quotations, humour'
+    language       = 'pl'
+    oldest_article = 15
+    cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png'
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    keep_only_tags= [dict(name='div', attrs={'class':'quote post-content post-body'})]
+    feeds          = [(u'Cytaty', u'http://bash.org.pl/rss')]
--- a/recipes/bbc.recipe
+++ b/recipes/bbc.recipe
@ -36,8 +36,9 @@ class BBC(BasicNewsRecipe):
                        ]

    remove_tags = [
-                       dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper', \
-                       'story-feature wide ', 'story-feature narrow']})
+                       dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
+                                                    'story-feature wide ', 'story-feature narrow']}),
+                       dict(id=['hypertab', 'comment-form']),
                        ]

    remove_attributes = ['width','height']
--- a/recipes/brasil_de_fato.recipe
+++ b/recipes/brasil_de_fato.recipe
@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BrasilDeFato(BasicNewsRecipe):
+    news                  = True
+    title          = u'Brasil de Fato'
+    __author__            = 'Alex Mitrani'
+    description           = u'Uma visão popular do Brasil e do mundo.'
+    publisher             = u'SOCIEDADE EDITORIAL BRASIL DE FATO'
+    category              = 'news, politics, Brazil, rss, Portuguese'
+    oldest_article = 10
+    max_articles_per_feed = 100
+    summary_length        = 1000
+    language              = 'pt_BR'
+
+    remove_javascript     = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg'
+    keep_only_tags = [dict(name='div', attrs={'id':'main'})]
+    remove_tags = [dict(name='div', attrs={'class':'links'})]
+    remove_tags_after = [dict(name='div', attrs={'class':'links'})]
+
+    feeds          = [(u'Nacional', u'http://www.brasildefato.com.br/rss_nacional')
+	,(u'Internacional', u'http://www.brasildefato.com.br/rss_internacional')
+	,(u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista')
+	,(u'Cultura', u'http://www.brasildefato.com.br/rss_cultura')
+	,(u'Análise', u'http://www.brasildefato.com.br/rss_analise')
+	]
--- a/recipes/bugun_gazetesi.recipe
+++ b/recipes/bugun_gazetesi.recipe
@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Bugun (BasicNewsRecipe):
+
+    title                  = u'BUGÜN Gazetesi'
+    __author__             = u'thomass'
+    oldest_article         = 2
+    max_articles_per_feed  =100
+    #no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'UTF-8'
+    publisher              = 'thomass'
+    category               = 'news, haberler,TR,gazete'
+    language               = 'tr'
+    publication_type = 'newspaper '
+    extra_css              = ' div{font-size: small} h2{font-size: small;font-weight: bold} #ctl00_ortayer_haberBaslik{font-size:20px;font-weight: bold} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
+    #introduction{} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    cover_img_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
+    masthead_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
+
+    keep_only_tags      = [dict(name='h1', attrs={'class':[ 'haberBaslik']}),dict(name='h2', attrs={'class':[ 'haberOzet']}), dict(name='div', attrs={'class':['haberGriDivvvv']}), dict(name='div', attrs={'id':[ 'haberTextDiv']}), ]
+
+    #keep_only_tags      = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']})  ]
+    #remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
+
+
+    #remove_attributes = ['width','height']
+    remove_empty_feeds= True
+
+    feeds          = [
+                      ( u'Son Dakika', u'http://www.bugun.com.tr/haberler.xml'),
+                     ( u'Yazarlar', u'http://www.bugun.com.tr/rss/yazarlar.xml'),
+                     ( u'Gündem', u'http://www.bugun.com.tr/rss/gundem.xml'),
+                     ( u'Ekonomi', u'http://www.bugun.com.tr/rss/ekonomi.xml'),
+                     ( u'Spor', u'http://www.bugun.com.tr/rss/spor.xml'),
+                     ( u'Magazin', u'http://www.bugun.com.tr/rss/magazin.xml'),
+                     ( u'Teknoloji', u'http://www.bugun.com.tr/rss/teknoloji.xml'),
+                     ( u'Yaşam', u'http://www.bugun.com.tr/rss/yasam.xml'),
+                     ( u'Medya', u'http://www.bugun.com.tr/rss/medya.xml'),
+                     ( u'Dünya', u'http://www.bugun.com.tr/rss/dunya.xml'),
+                     ( u'Politika', u'http://www.bugun.com.tr/rss/politika.xml'),
+                     ( u'Sağlık', u'http://www.bugun.com.tr/rss/saglik.xml'),
+                     ( u'Tarifler', u'http://www.bugun.com.tr/rss/yemek-tarifi.xml'),
+
+
+
+
+                        ]
--- a/recipes/cd_action.recipe
+++ b/recipes/cd_action.recipe
@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class CD_Action(BasicNewsRecipe):
+    title          = u'CD-Action'
+    __author__        = 'fenuks'
+    description   = 'cdaction.pl - polish magazine about games site'
+    category       = 'games'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
+    keep_only_tags= dict(id='news_content')
+    remove_tags_after= dict(name='div', attrs={'class':'tresc'})
+    feeds          = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
--- a/recipes/cvecezla.recipe
+++ b/recipes/cvecezla.recipe
@ -0,0 +1,47 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+cvecezla.wordpress.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CveceZla(BasicNewsRecipe):
+    title                 = 'Cvece zla i naopakog'
+    __author__            = 'Darko Miletic'
+    description           = 'Haoticnost razmisljanja poradja haoticnost pisanja. Muzika, stripovi, igre, knjige, generalno glupiranje...'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    language              = 'sr'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    publication_type      = 'blog'
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{display: block } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'igre, muzika, film, blog, Srbija'
+                        , 'publisher': 'Mehmet Krljic'
+                        , 'language' : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    remove_tags_before = dict(attrs={'class':'navigation'})
+    remove_tags_after = dict(attrs={'class':'commentlist'})
+    remove_tags = [
+                     dict(attrs={'class':['postmetadata alt','sharedaddy sharedaddy-dark sd-like-enabled sd-sharing-enabled','reply','navigation']})
+                    ,dict(attrs={'id':'respond'})
+                  ]
+
+    feeds = [(u'Clanci', u'http://cvecezla.wordpress.com/feed/')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Dobreprogramy_pl(BasicNewsRecipe):
+    title = 'Dobreprogramy.pl'
+    __author__  = 'fenuks'
+    __licence__ ='GPL v3'
+    category       = 'IT'
+    language       = 'pl'
+    cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
+    description = u'Aktualności i blogi z dobreprogramy.pl'
+    encoding = 'utf-8'
+    no_stylesheets = True
+    language       = 'pl'
+    extra_css      = '.title {font-size:22px;}'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
+    keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
+    feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
+                 ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
--- a/recipes/fairbanks_daily.recipe
+++ b/recipes/fairbanks_daily.recipe
@ -0,0 +1,128 @@
+#import re          # Provides preprocess_regexps re.compile
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FairbanksDailyNewsminer(BasicNewsRecipe):
+    title          = u'Fairbanks Daily News-miner'
+    __author__ = 'Roger'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    description = ''''The voice of interior Alaska since 1903'''
+    publisher   = 'http://www.newsminer.com/'
+    category    = 'news, Alaska, Fairbanks'
+    language    = 'en'
+    #extra_css   = '''
+    #                p{font-weight: normal;text-align: justify}
+    #              '''
+
+    remove_javascript = True
+    use_embedded_content = False
+    no_stylesheets = True
+    language = 'en'
+    encoding = 'utf8'
+    conversion_options = {'linearize_tables':True}
+    # TODO: I don't see any photos in my Mobi file with this masterhead_url!
+    masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
+
+
+    # In order to omit seeing number of views, number of posts and the pipe
+    # symbol for divider after the title and date of the article, a regex or
+    # manual processing is needed to get just the "story_item_date updated"
+    # (which contains the date).  Everything else on this line is pretty much not needed.
+    #
+    # HTML line containing story_item_date:
+    # <div class="signature_line"><span title="2011-08-22T23:37:14Z" class="story_item_date updated">Aug 22, 2011</span>&nbsp;|&nbsp;2370&nbsp;views&nbsp;|&nbsp;52&nbsp;<a href="/pages/full_story/push?article-Officials+tout+new+South+Cushman+homeless+living+facility%20&id=15183753#comments_15183753"><img alt="52 comments" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/comments-icon.gif" title="52 comments" /></a>&nbsp;|&nbsp;<span id="number_recommendations_15183753" class="number_recommendations">9</span>&nbsp;<a href="#1" id="recommend_link_15183753" onclick="Element.remove('recommend_link_15183753'); new Ajax.Request('/community/content/recommend/15183753', {asynchronous:true, evalScripts:true}); return false;"><img alt="9 recommendations" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/thumbs-up-icon.gif" title="9 recommendations" /></a>&nbsp;|&nbsp;<a href="#1" onclick="$j.facebox({ajax: '/community/content/email_friend_pane/15183753'}); return false;"><span style="position: relative;"><img alt="email to a friend" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/email-this.gif" title="email to a friend" /></span></a>&nbsp;|&nbsp;<span><a href="/printer_friendly/15183753" target="_blank"><img alt="print" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/print_icon.gif" title="print" /></a></span><span id="email_content_message_15183753" class="signature_email_message"></span></div>
+
+    # The following was suggested, but it looks like I also need to define self & soup
+    # (as well as bring in extra soup depends?)
+    #date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
+
+    #preprocess_regexps = [(re.compile(r'<span[^>]*addthis_separator*>'), lambda match: '') ]
+    #preprocess_regexps = [(re.compile(r'span class="addthis_separator">|</span>'), lambda match: '') ]
+
+    #preprocess_regexps = [
+    #           (re.compile(r'<start>.*?<end>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+    #               ]
+
+    #def get_browser(self):
+    #def preprocess_html(soup, first_fetch):
+    #    date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
+    #    return
+
+
+    # Try to keep some tags - some might not be needed here
+    keep_only_tags = [
+                        #date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'})),
+                        dict(name='div', attrs={'class':'hnews hentry item'}),
+                        dict(name='div', attrs={'class':'story_item_headline entry-title'}),
+                        #dict(name='span', attrs={'class':'story_item_date updated'}),
+                        dict(name='div', attrs={'class':'full_story'})
+                     ]
+    #remove_tags = [
+    #                dict(name='div', attrs={'class':'story_tools'}),
+    #                dict(name='p', attrs={'class':'ad_label'}),
+    #              ]
+
+    # Try to remove some bothersome tags
+    remove_tags = [
+                    #dict(name='img', attrs={'alt'}),
+                    dict(name='img', attrs={'class':'dont_touch_me'}),
+                    dict(name='span', attrs={'class':'number_recommendations'}),
+                    #dict(name='div', attrs={'class':'signature_line'}),
+                    dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
+                    dict(name='div', attrs={'class':['addthis_toolbox','addthis_default_style']}),
+                    dict(name='span', attrs={'class':'addthis_separator'}),
+                    dict(name='div', attrs={'class':'related_content'}),
+                    dict(name='div', attrs={'class':'comments_container'}),
+                    #dict(name='div', attrs={'class':'signature_line'}),
+                    dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
+                    dict(name='div', attrs={'id':'comments_container'})
+                  ]
+
+
+    # This one works but only gets title, date and clips article content!
+    #remove_tags_after = [
+    #                        dict(name='span', attrs={'class':'story_item_date updated'})
+    #                    ]
+
+    #remove_tags_after = [
+    #                        dict(name='div', attrs={'class':'advertisement'}),
+    #                    ]
+
+    # Try clipping tags before and after to prevent pulling img views/posts numbers after date?
+    #remove_tags_before = [
+    #                        dict(name='span', attrs={'class':'story_item_date updated'})
+    #                     ]
+
+    #extra_css # tweak the appearance # TODO: Change article titles <h2?> to bold?
+
+
+    # Comment-out or uncomment any of the following RSS feeds according to your
+    # liking.
+    #
+    # TODO: Adding more then one RSS Feed, and newline will be omitted for
+    # entries within the Table of Contents or Index of Articles
+    #
+    # TODO: Some random bits of text is trailing the last page (or TOC on MOBI
+    # files), these are bits of public posts and comments and need to also be
+    # removed.
+    #
+    feeds = [
+        (u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'),
+        (u'Local News', u'http://newsminer.com/rss/rss_feeds/local_news?content_type=article&tags=local_news&page_name=rss_feeds&offset=0&instance=local_news'),
+        (u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'),
+        (u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
+        (u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
+     #  (u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),
+        (u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
+     #  (u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
+     #  (u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),
+        (u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
+     #  (u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
+     #  (u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
+     #  (u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),
+     #  (u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'),
+        (u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin')
+             ]
+
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -0,0 +1,40 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Filmweb_pl(BasicNewsRecipe):
+    title          = u'FilmWeb'
+    __author__        = 'fenuks'
+    description   = 'FilmWeb - biggest polish movie site'
+    cover_url      = 'http://userlogos.org/files/logos/crudus/filmweb.png'
+    category       = 'movies'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    extra_css      = '.hdrBig {font-size:22px;}'
+    remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
+    keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
+    feeds          = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
+                         (u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
+                         (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
+                         (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
+                         (u'News / Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'),
+                         (u'News / Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'),
+                         (u'News / Dystrybucja dvd / blu-ray', u'http://www.filmweb.pl/feed/news/category/video'),
+                         (u'News / Dystrybucja kinowa', u'http://www.filmweb.pl/feed/news/category/cinema'),
+                         (u'News / off', u'http://www.filmweb.pl/feed/news/category/off'),
+                         (u'News / Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'),
+                         (u'News / Organizacje branżowe', u'http://www.filmweb.pl/feed/news/category/organizations'),
+                         (u'News / Internet', u'http://www.filmweb.pl/feed/news/category/internet'),
+                         (u'News / Różne', u'http://www.filmweb.pl/feed/news/category/other'),
+                         (u'News / Kino polskie', u'http://www.filmweb.pl/feed/news/category/polish.cinema'),
+                         (u'News / Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'),
+                         (u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
+                         (u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')]
+
+    def skip_ad_pages(self, soup):
+        skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})['href']
+        #self.log.warn(skip_tag)
+        if skip_tag is not None:
+            return self.index_to_soup(skip_tag, raw=True)
+        else:
+            None
--- a/recipes/financial_times_uk.recipe
+++ b/recipes/financial_times_uk.recipe
@ -5,6 +5,7 @@ www.ft.com/uk-edition
 '''

 import datetime
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

@ -22,6 +23,8 @@ class FinancialTimes(BasicNewsRecipe):
    needs_subscription    = True
    encoding              = 'utf8'
    publication_type      = 'newspaper'
+    articles_are_obfuscated = True
+    temp_files              = []
    masthead_url          = 'http://im.media.ft.com/m/img/masthead_main.jpg'
    LOGIN                 = 'https://registration.ft.com/registration/barrier/login'
    LOGIN2                = 'http://media.ft.com/h/subs3.html'
@ -47,7 +50,12 @@ class FinancialTimes(BasicNewsRecipe):
            br.submit()
        return br

-    keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']})
+                       ,dict(name='div', attrs={'class':'standfirst'})
+                       ,dict(name='div', attrs={'id'   :'storyContent'})
+                       ,dict(name='div', attrs={'class':['ft-story-body','index-detail']})
+                     ]
    remove_tags = [
                      dict(name='div', attrs={'id':'floating-con'})
                     ,dict(name=['meta','iframe','base','object','embed','link'])
@ -69,18 +77,23 @@ class FinancialTimes(BasicNewsRecipe):

    def get_artlinks(self, elem):
        articles = []
+        count = 0
        for item in elem.findAll('a',href=True):
+            count = count + 1
+            if self.test and count > 2:
+               return articles
            rawlink = item['href']
            if rawlink.startswith('http://'):
               url = rawlink
            else:
               url   = self.PREFIX + rawlink
+            urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
            title = self.tag_to_string(item)
            date = strftime(self.timefmt)
            articles.append({
                              'title'      :title
                             ,'date'       :date
-                             ,'url'        :url
+                             ,'url'        :urlverified
                             ,'description':''
                            })
        return articles
@ -97,7 +110,11 @@ class FinancialTimes(BasicNewsRecipe):
        st = wide.find('h4',attrs={'class':'section-no-arrow'})
        if st:
           strest.insert(0,st)
+        count = 0
        for item in strest:
+            count = count + 1
+            if self.test and count > 2:
+               return feeds
            ftitle   = self.tag_to_string(item)
            self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
            feedarts = self.get_artlinks(item.parent.ul)
@ -136,4 +153,19 @@ class FinancialTimes(BasicNewsRecipe):
        if cdate.isoweekday() == 7:
           cdate -= datetime.timedelta(days=1)
        return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
+
+    def get_obfuscated_article(self, url):
+        count = 0
+        while (count < 10):
+            try:
+                response = self.browser.open(url)
+                html = response.read()
+                count = 10
+            except:
+                print "Retrying download..."
+            count += 1        
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
        
--- a/recipes/fluter_de.recipe
+++ b/recipes/fluter_de.recipe
@ -0,0 +1,39 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch fluter.de
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1313693926(BasicNewsRecipe):
+
+    title = u'Fluter'
+    description = 'fluter.de Magazin der Bundeszentrale für politische Bildung/bpb'
+    language = 'de'
+    encoding = 'UTF-8'
+
+    __author__ = 'Armin Geller' # 2011-08-19
+
+    oldest_article = 7
+    max_articles_per_feed = 50
+
+
+    remove_tags = [
+                        dict(name='div', attrs={'id':["comments"]}),
+                        dict(attrs={'class':['commentlink']}),
+                    ]
+
+
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':["grid_8 articleText"]}),
+                        dict(name='div', attrs={'class':["articleTextInnerText"]}),
+                      ]
+
+    feeds =  [
+               (u'Inhalt:', u'http://www.fluter.de/de/?tpl=907'),
+              ]
+
+    extra_css = '.cs_img {margin-right: 10pt;}'
+
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gram_pl(BasicNewsRecipe):
+    title          = u'Gram.pl'
+    __author__        = 'fenuks'
+    description   = 'Gram.pl - site about computer games'
+    category       = 'games'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
+    remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
+    keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
+    feeds          = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
+	      (u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@ -13,6 +13,8 @@ class HBR(BasicNewsRecipe):
    no_stylesheets = True

    LOGIN_URL = 'http://hbr.org/login?request_url=/'
+    LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
+
    INDEX = 'http://hbr.org/archive-toc/BR'

    keep_only_tags = [dict(name='div', id='pageContainer')]
@ -34,6 +36,9 @@ class HBR(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
+        self.logout_url = None
+
+        #'''
        br.open(self.LOGIN_URL)
        br.select_form(name='signin-form')
        br['signin-form:username'] = self.username
@ -41,10 +46,13 @@ class HBR(BasicNewsRecipe):
        raw = br.submit().read()
        if 'My Account' not in raw:
            raise Exception('Failed to login, are you sure your username and password are correct?')
-        self.logout_url = None
-        link = br.find_link(text='Sign out')
-        if link:
-            self.logout_url = link.absolute_url
+        try:
+            link = br.find_link(text='Sign out')
+            if link:
+                self.logout_url = link.absolute_url
+        except:
+            self.logout_url = self.LOGOUT_URL
+        #'''
        return br

    def cleanup(self):
@ -57,6 +65,8 @@ class HBR(BasicNewsRecipe):


    def hbr_get_toc(self):
+        #return self.index_to_soup(open('/t/hbr.html').read())
+
        today = date.today()
        future = today + timedelta(days=30)
        for x in [x.strftime('%y%m') for x in (future, today)]:
@ -66,53 +76,43 @@ class HBR(BasicNewsRecipe):
                return soup
        raise Exception('Could not find current issue')

-    def hbr_parse_section(self, container, feeds):
-        current_section = None
-        current_articles = []
-        for x in container.findAll(name=['li', 'h3', 'h4']):
-            if x.name in ['h3', 'h4'] and not x.findAll(True):
-                if current_section and current_articles:
-                    feeds.append((current_section, current_articles))
-                current_section = self.tag_to_string(x)
-                current_articles = []
-                self.log('\tFound section:', current_section)
-            if x.name == 'li':
-                a = x.find('a', href=True)
-                if a is not None:
-                    title = self.tag_to_string(a)
-                    url = a.get('href')
-                    if '/ar/' not in url:
-                        continue
-                    if url.startswith('/'):
-                        url = 'http://hbr.org'+url
-                    url = self.map_url(url)
-                    p = x.find('p')
-                    desc = ''
-                    if p is not None:
-                        desc = self.tag_to_string(p)
-                    if not title or not url:
-                        continue
-                    self.log('\t\tFound article:', title)
-                    self.log('\t\t\t', url)
-                    self.log('\t\t\t', desc)
-                    current_articles.append({'title':title, 'url':url,
-                        'description':desc, 'date':''})
-        if current_section and current_articles:
-            feeds.append((current_section, current_articles))
-
-
-
    def hbr_parse_toc(self, soup):
        feeds = []
-        features = soup.find(id='issueFeaturesContent')
-        self.hbr_parse_section(features, feeds)
-        departments = soup.find(id='issueDepartments')
-        self.hbr_parse_section(departments, feeds)
+        current_section = None
+        articles = []
+        for x in soup.find(id='archiveToc').findAll(['h3', 'h4']):
+            if x.name == 'h3':
+                if current_section is not None and articles:
+                    feeds.append((current_section, articles))
+                current_section = self.tag_to_string(x).capitalize()
+                articles = []
+                self.log('\tFound section:', current_section)
+            else:
+                a = x.find('a', href=True)
+                if a is None: continue
+                title = self.tag_to_string(a)
+                url = a['href']
+                if '/ar/' not in url:
+                    continue
+                if url.startswith('/'):
+                    url = 'http://hbr.org' + url
+                url = self.map_url(url)
+                p = x.parent.find('p')
+                desc = ''
+                if p is not None:
+                    desc = self.tag_to_string(p)
+                self.log('\t\tFound article:', title)
+                self.log('\t\t\t', url)
+                self.log('\t\t\t', desc)
+
+                articles.append({'title':title, 'url':url, 'description':desc,
+                    'date':''})
        return feeds


    def parse_index(self):
        soup = self.hbr_get_toc()
+        #open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
        feeds = self.hbr_parse_toc(soup)
        return feeds

--- a/recipes/hbr_blogs.recipe
+++ b/recipes/hbr_blogs.recipe
@ -6,33 +6,21 @@ class HBR(BasicNewsRecipe):
    title = 'Harvard Business Review Blogs'
    description = 'To subscribe go to http://hbr.harvardbusiness.org'
    needs_subscription = True
-    __author__ = 'Kovid Goyal, enhanced by BrianG'
+    __author__ = 'Kovid Goyal'
    language = 'en'
    no_stylesheets = True

    LOGIN_URL = 'http://hbr.org/login?request_url=/'
+    LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
+
    INDEX = 'http://hbr.org/current'

-    #
-    # Blog Stuff
-    #
-
-
-    INCLUDE_BLOGS = True
-    INCLUDE_ARTICLES = False
-
-    # option-specific settings.
-
-    if INCLUDE_BLOGS == True:
-        remove_tags_after = dict(id='articleBody')
-        remove_tags_before = dict(id='pageFeature')
-        feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
-        oldest_article = 30
-        max_articles_per_feed = 100
-        use_embedded_content = False
-    else:
-        timefmt                = ' [%B %Y]'
-
+    remove_tags_after = dict(id='articleBody')
+    remove_tags_before = dict(id='pageFeature')
+    feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
+    oldest_article = 30
+    max_articles_per_feed = 100
+    use_embedded_content = False

    keep_only_tags = [	dict(name='div', id='pageContainer')
 				]
@ -41,21 +29,15 @@ class HBR(BasicNewsRecipe):
        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
        'articleToolbarTop','articleToolbarBottom', 'articleToolbarRD',
-        'mailingListTout', 'partnerCenter', 'pageFooter']),
-        dict(name='iframe')]
+        'mailingListTout', 'partnerCenter', 'pageFooter', 'shareWidgetTop']),
+        dict(name=['iframe', 'style'])]

-    extra_css = '''
-                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
-                .article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
-                h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
-                h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small;  }
-                #articleBody{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
-                #summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
-                '''
-#-------------------------------------------------------------------------------------------------

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
+        self.logout_url = None
+
+        #'''
        br.open(self.LOGIN_URL)
        br.select_form(name='signin-form')
        br['signin-form:username'] = self.username
@ -63,11 +45,15 @@ class HBR(BasicNewsRecipe):
        raw = br.submit().read()
        if 'My Account' not in raw:
            raise Exception('Failed to login, are you sure your username and password are correct?')
-        self.logout_url = None
-        link = br.find_link(text='Sign out')
-        if link:
-            self.logout_url = link.absolute_url
+        try:
+            link = br.find_link(text='Sign out')
+            if link:
+                self.logout_url = link.absolute_url
+        except:
+            self.logout_url = self.LOGOUT_URL
+        #'''
        return br
+
 #-------------------------------------------------------------------------------------------------
    def cleanup(self):
        if self.logout_url is not None:
@ -76,99 +62,7 @@ class HBR(BasicNewsRecipe):
    def map_url(self, url):
        if url.endswith('/ar/1'):
            return url[:-1]+'pr'
-#-------------------------------------------------------------------------------------------------

-    def hbr_get_toc(self):
-        soup = self.index_to_soup(self.INDEX)
-        url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
-        return self.index_to_soup('http://hbr.org'+url)
-
-#-------------------------------------------------------------------------------------------------
-
-    def hbr_parse_section(self, container, feeds):
-        current_section = None
-        current_articles = []
-        for x in container.findAll(name=['li', 'h3', 'h4']):
-            if x.name in ['h3', 'h4'] and not x.findAll(True):
-                if current_section and current_articles:
-                    feeds.append((current_section, current_articles))
-                current_section = self.tag_to_string(x)
-                current_articles = []
-                self.log('\tFound section:', current_section)
-            if x.name == 'li':
-                a = x.find('a', href=True)
-                if a is not None:
-                    title = self.tag_to_string(a)
-                    url = a.get('href')
-                    if '/ar/' not in url:
-                        continue
-                    if url.startswith('/'):
-                        url = 'http://hbr.org'+url
-                    url = self.map_url(url)
-                    p = x.find('p')
-                    desc = ''
-                    if p is not None:
-                        desc = self.tag_to_string(p)
-                    if not title or not url:
-                        continue
-                    self.log('\t\tFound article:', title)
-                    self.log('\t\t\t', url)
-                    self.log('\t\t\t', desc)
-                    current_articles.append({'title':title, 'url':url,
-                        'description':desc, 'date':''})
-        if current_section and current_articles:
-            feeds.append((current_section, current_articles))
-
-#-------------------------------------------------------------------------------------------------
-
-    def hbr_parse_toc(self, soup):
-        feeds = []
-        features = soup.find(id='issueFeaturesContent')
-        self.hbr_parse_section(features, feeds)
-        departments = soup.find(id='issueDepartments')
-        self.hbr_parse_section(departments, feeds)
-        return feeds
-#-------------------------------------------------------------------------------------------------
-    def feed_to_index_append(self, feedObject, masterFeed):
-        # Loop thru the feed object and build the correct type of article list
-        for feed in feedObject:
-        # build the correct structure from the feed object
-            newArticles = []
-            for article in feed.articles:
-                newArt = {
-                'title' : article.title,
-                'url'   : article.url,
-                'date'  : article.date,
-                'description' : article.text_summary
-                }
-                newArticles.append(newArt)
-
-		# Append the earliest/latest dates of the feed to the feed title
-		startDate, endDate = self.get_feed_dates(feed, '%d-%b')
-		newFeedTitle = feed.title + '  (' + startDate + ' thru ' + endDate + ')'
-
-		# append the newly-built list object to the index object passed in
-		# as masterFeed.
-		masterFeed.append( (newFeedTitle,newArticles) )
-
-#-------------------------------------------------------------------------------------------------
-    def get_feed_dates(self, feedObject, dateMask):
-        startDate = feedObject.articles[len(feedObject.articles)-1].localtime.strftime(dateMask)
-        endDate   = feedObject.articles[0].localtime.strftime(dateMask)
-
-        return startDate, endDate
-
-#-------------------------------------------------------------------------------------------------
-
-    def parse_index(self):
-        if self.INCLUDE_ARTICLES == True:
-            soup = self.hbr_get_toc()
-            feeds = self.hbr_parse_toc(soup)
-        else:
-            return BasicNewsRecipe.parse_index(self)
-
-        return feeds
-#-------------------------------------------------------------------------------------------------
    def get_cover_url(self):
        cover_url = None
        index = 'http://hbr.org/current'
--- a/recipes/houston_chronicle.recipe
+++ b/recipes/houston_chronicle.recipe
@ -1,8 +1,6 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

-import string, pprint
-
 from calibre.web.feeds.news import BasicNewsRecipe

 class HoustonChronicle(BasicNewsRecipe):
@ -13,53 +11,28 @@ class HoustonChronicle(BasicNewsRecipe):
    language       = 'en'
    timefmt        = ' [%a, %d %b, %Y]'
    no_stylesheets = True
+    use_embedded_content = False
+    remove_attributes = ['style']

-    keep_only_tags = [
-                        dict(id=['story-head', 'story'])
-                     ]
-
-    remove_tags    = [
-                        dict(id=['share-module', 'resource-box',
-                        'resource-box-header'])
-                     ]
-
-    extra_css      = '''
-                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
-                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
-                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
-                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
-                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
-                        #story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
-                        #story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
-                        #story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                        #story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                        #story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
-                        #Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
-                        .p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
-                        .p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
-                     '''
-
-
-    def parse_index(self):
-        categories = ['news', 'sports', 'business', 'entertainment', 'life',
-                'travel']
-        feeds = []
-        for cat in categories:
-            articles = []
-            soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
-            for elem in soup.findAll(comptype='story', storyid=True):
-                a = elem.find('a', href=True)
-                if a is None: continue
-                url = a['href']
-                if not url.startswith('http://'):
-                    url = 'http://www.chron.com'+url
-                articles.append({'title':self.tag_to_string(a), 'url':url,
-                    'description':'', 'date':''})
-                pprint.pprint(articles[-1])
-            if articles:
-                feeds.append((string.capwords(cat), articles))
-        return feeds
+    oldest_article = 2.0

+    keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
+        'hst-articletext' in x or 'hst-galleryitem' in x)}

+    feeds = [
+            ('News', "http://www.chron.com/rss/feed/News-270.php"),
+            ('Sports',
+                'http://www.chron.com/sports/headlines/collectionRss/Sports-Headlines-Staff-Stories-10767.php'),
+            ('Neighborhood',
+                'http://www.chron.com/rss/feed/Neighborhood-305.php'),
+            ('Business', 'http://www.chron.com/rss/feed/Business-287.php'),
+            ('Entertainment',
+                'http://www.chron.com/rss/feed/Entertainment-293.php'),
+            ('Editorials',
+                'http://www.chron.com/opinion/editorials/collectionRss/Opinion-Editorials-Headline-List-10567.php'),
+            ('Life', 'http://www.chron.com/rss/feed/Life-297.php'),
+            ('Science & Tech',
+                'http://www.chron.com/rss/feed/AP-Technology-and-Science-266.php'),
+        ]


--- a/recipes/icons/android_com_pl.png
+++ b/recipes/icons/android_com_pl.png
--- a/recipes/icons/bash_org_pl.png
+++ b/recipes/icons/bash_org_pl.png
--- a/recipes/icons/cd_action.png
+++ b/recipes/icons/cd_action.png
--- a/recipes/icons/dobreprogamy.png
+++ b/recipes/icons/dobreprogamy.png
--- a/recipes/icons/film_web.png
+++ b/recipes/icons/film_web.png
--- a/recipes/icons/gram_pl.png
+++ b/recipes/icons/gram_pl.png
--- a/recipes/icons/niebezpiecznik.png
+++ b/recipes/icons/niebezpiecznik.png
--- a/recipes/icons/wnp.png
+++ b/recipes/icons/wnp.png
--- a/recipes/msdnmag_en.recipe
+++ b/recipes/msdnmag_en.recipe
@ -6,11 +6,13 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 msdn.microsoft.com/en-us/magazine
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup

 class MSDNMagazine_en(BasicNewsRecipe):
    title                 = 'MSDN Magazine'
    __author__            = 'Darko Miletic'
    description           = 'The Microsoft Journal for Developers'
+    masthead_url          = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
    publisher             = 'Microsoft Press'
    category              = 'news, IT, Microsoft, programming, windows'
    oldest_article        = 31
@ -20,24 +22,44 @@ class MSDNMagazine_en(BasicNewsRecipe):
    encoding              = 'utf-8'
    language              = 'en'

+    base_url              = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
+    rss_url               = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'


-    feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
-
-    keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
+    keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]

    remove_tags = [
-                     dict(name=['object','link','base','table'])
-                    ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
+                    dict(name='div', attrs={'class':'DivRatingsOnly'})
+                    ,dict(name='div', attrs={'class':'ShareThisButton4'})
                  ]
-    remove_tags_after = dict(name='div', attrs={'class':'navpage'})

-    def preprocess_html(self, soup):
-        for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
-            item.name="h2"
-        for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
-            item.name="h1"
-        for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
-            item.name="h3"
-        return soup
+    def find_articles(self):
+        idx_contents = self.browser.open(self.rss_url).read()
+        idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
+
+        for article in idx.findAll('item'):
+            desc_html = self.tag_to_string(article.find('description'))
+            description = self.tag_to_string(BeautifulSoup(desc_html))
+
+            a = {
+                    'title':  self.tag_to_string(article.find('title')),
+                    'url': self.tag_to_string(article.find('link')),
+                    'description': description,
+                    'date' : self.tag_to_string(article.find('pubdate')),
+                    }
+            yield a
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.base_url)
+
+        #find issue name, eg "August 2011"
+        issue_name = self.tag_to_string(soup.find('h1'))
+
+        # find cover pic
+        img = soup.find('img',attrs ={'alt':issue_name})
+        if img is not None:
+            self.cover_url = img['src']
+
+        return [(issue_name, list(self.find_articles()))]

--- a/recipes/niebezpiecznik.recipe
+++ b/recipes/niebezpiecznik.recipe
@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Niebezpiecznik_pl(BasicNewsRecipe):
+    title          = u'Niebezpiecznik.pl'
+    __author__        = 'fenuks'
+    description   = 'Niebezpiecznik.pl'
+    category       = 'hacking, IT'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url =u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
+    remove_tags=[dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
+    keep_only_tags= [dict(name='div', attrs={'class':['title', 'entry']})]
+    feeds          = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
+      	      ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
--- a/recipes/politifact.recipe
+++ b/recipes/politifact.recipe
@ -5,7 +5,6 @@ class PolitiFactCom(BasicNewsRecipe):
    __author__     = u'Michael Heinz'
    oldest_article = 21
    max_articles_per_feed = 100
-    recursion = 0
    language = 'en'

    no_stylesheets = True
--- a/recipes/svd_se.recipe
+++ b/recipes/svd_se.recipe
@ -40,11 +40,11 @@ class SVD_se(BasicNewsRecipe):
              ,(u'Kultur'      , u'http://www.svd.se/kulturnoje/nyheter/?service=rss')
            ]

-    keep_only_tags = [dict(name='div', attrs={'id':'articlecontent'})]
-    remove_tags_after = dict(name='div',attrs={'class':'articlebody normal'})
+    keep_only_tags = [dict(name='div', attrs={'id':['article-content', 'articlecontent']})]
+    remove_tags_after = dict(name='div',attrs={'class':'articlebody'})
    remove_tags = [
                     dict(name=['object','link','base'])
-                    ,dict(name='div',attrs={'class':['articlead','factcolumn']})
+                    ,dict(name='div',attrs={'class':['articlead','factcolumn', 'article-ad']})
                    ,dict(name='ul', attrs={'class':'toolbar articletop clearfix'})
                    ,dict(name='p', attrs={'class':'more'})
                  ]
--- a/recipes/wnp.recipe
+++ b/recipes/wnp.recipe
@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class AdvancedUserRecipe1312886443(BasicNewsRecipe):
+    title          = u'WNP'
+    cover_url= 'http://k.wnp.pl/images/wnpLogo.gif'
+    __author__        = 'fenuks'
+    description   = u'Wirtualny Nowy Przemysł'
+    category       = 'economy'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    keep_only_tags = dict(name='div', attrs={'id':'contentText'})
+    feeds          = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
+                          (u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
+          (u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
+          (u'Serwis Hutnictwo', u'http://www.wnp.pl/rss/serwis_rss_3.xml'),
+          (u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
+          (u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
+          (u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
--- a/recipes/wsj_free.recipe
+++ b/recipes/wsj_free.recipe
@ -53,6 +53,12 @@ class WallStreetJournal(BasicNewsRecipe):

        return soup

+    def abs_wsj_url(self, href):
+        if not href.startswith('http'):
+            href = 'http://online.wsj.com' + href
+        return href
+
+
    def wsj_get_index(self):
        return self.index_to_soup('http://online.wsj.com/itp')

@ -83,14 +89,14 @@ class WallStreetJournal(BasicNewsRecipe):
            pageone = a['href'].endswith('pageone')
            if pageone:
               title = 'Front Section'
-               url = 'http://online.wsj.com' + a['href']
+               url = self.abs_wsj_url(a['href'])
               feeds = self.wsj_add_feed(feeds,title,url)
               title = 'What''s News'
               url = url.replace('pageone','whatsnews')
               feeds = self.wsj_add_feed(feeds,title,url)
            else:
               title = self.tag_to_string(a)
-               url = 'http://online.wsj.com' + a['href']
+               url = self.abs_wsj_url(a['href'])
               feeds = self.wsj_add_feed(feeds,title,url)
        return feeds

@ -146,7 +152,7 @@ class WallStreetJournal(BasicNewsRecipe):
                title = self.tag_to_string(a).strip() + ' [%s]'%meta
            else:
                title = self.tag_to_string(a).strip()
-            url = 'http://online.wsj.com'+a['href']
+            url = self.abs_wsj_url(a['href'])
            desc = ''
            for p in container.findAll('p'):
                desc = self.tag_to_string(p)
--- a/recipes/yagmur_dergisi.recipe
+++ b/recipes/yagmur_dergisi.recipe
@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Yagmur(BasicNewsRecipe):
+    title          = u'Yagmur Dergisi'
+    __author__            = u'thomass'
+    description            = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi'
+    oldest_article         = 90
+    max_articles_per_feed  =100
+    no_stylesheets         = True
+    #delay                  = 1
+    #use_embedded_content   = False
+
+    #publisher              = '  '
+    category               = 'dergi, ilim, kültür, edebiyat,Türkçe'
+    language               = 'tr'
+    publication_type = 'magazine'
+    encoding               = 'ISO 8859-9'
+    publisher              = 'thomass'
+
+ 
+
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    #keep_only_tags    = [dict(name='h1', attrs={'class':['georgia_30']})]
+
+    #remove_attributes = ['aria-describedby']
+    #remove_tags  = [dict(name='div', attrs={'id':['renk10']}) ]
+    cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
+    masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
+    #remove_tags_before = dict(id='content-right')
+
+
+    #remove_empty_feeds= True
+    #remove_attributes = ['width','height']
+
+    feeds          = [
+                      ( u'Yagmur', u'http://open.dapper.net/services/yagmur'),
+                        ]
+
+    #def preprocess_html(self, soup):
+     #   return self.adeify_images(soup)
+    def print_version(self, url):       #there is a probem caused by table format
+     return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=')
+
--- a/recipes/yeni_umit_dergisi.recipe
+++ b/recipes/yeni_umit_dergisi.recipe
@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class YeniUmit(BasicNewsRecipe):
+    title          = u'Yeni Umit Dergisi'
+    __author__            = u'thomass'
+    description            = 'Aylık Dini İlimler ve Kültür Dergisi'
+    oldest_article         = 45
+    max_articles_per_feed  =100
+    no_stylesheets         = True
+    #delay                  = 1
+    #use_embedded_content   = False
+
+    #publisher              = '  '
+    category               = 'dergi, ilim, kültür, edebiyat,Türkçe'
+    language               = 'tr'
+    publication_type = 'magazine'
+    encoding               = 'ISO 8859-9'
+    publisher              = 'thomass'
+
+ 
+
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    #keep_only_tags    = [dict(name='h1', attrs={'class':['georgia_30']})]
+
+    #remove_attributes = ['aria-describedby']
+    #remove_tags  = [dict(name='div', attrs={'id':['renk10']}) ]
+    cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
+    masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
+    #remove_tags_before = dict(id='content-right')
+
+
+    #remove_empty_feeds= True
+    #remove_attributes = ['width','height']
+
+    feeds          = [
+                      ( u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'),
+                        ]
+
+    #def preprocess_html(self, soup):
+     #   return self.adeify_images(soup)
+    def print_version(self, url):       #there is a probem caused by table format
+     return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir')
+
--- a/recipes/yenisafak_gazetesi.recipe
+++ b/recipes/yenisafak_gazetesi.recipe
@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Bugun (BasicNewsRecipe):
+
+    title                  = u'Yenişafak Gazetesi'
+    __author__             = u'thomass'
+    oldest_article         = 2
+    max_articles_per_feed  =100
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'ISO 8859-9' #'UTF-8'
+    publisher              = 'thomass'
+    category               = 'news, haberler,TR,gazete'
+    language               = 'tr'
+    publication_type = 'newspaper '
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    cover_img_url = 'http://yenisafak.com.tr/resim/logo.gif'
+    masthead_url = 'http://yenisafak.com.tr/resim/logo.gif'
+
+    keep_only_tags      = [dict(name='div', attrs={'id':[ 'ctghaberdetay2010']}) ]
+    extra_css              = ' h1{font-size:20px;font-weight: bold}h2{font-size: small;font-weight: bold}div{font-size: small}   '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
+
+    #keep_only_tags      = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']})  ]
+    remove_tags = [ dict(name='div', attrs={'id':['yasaluyari2010','divhaberdetayilisik2010']}),dict(name='font', attrs={'class':['haberdetaytarih']})]#,'news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
+
+
+    #remove_attributes = ['width','height']
+    remove_empty_feeds= True
+
+    feeds          = [
+                      ( u'SonDakika', u'http://yenisafak.com.tr/rss/?xml=anasayfa'),
+                      ( u'Gündem', u'http://yenisafak.com.tr/rss/?xml=gundem'),
+                      ( u'Politika', u'http://yenisafak.com.tr/rss/?xml=politika'),
+                      ( u'Ekonomi', u'http://yenisafak.com.tr/rss/?xml=ekonomi'),
+                      ( u'Dünya', u'http://yenisafak.com.tr/rss/?xml=dunya'),
+                      ( u'Aktüel', u'http://yenisafak.com.tr/rss/?xml=aktuel'),
+                      ( u'Eğitim', u'http://yenisafak.com.tr/rss/?xml=egitim'),
+                      ( u'Spor', u'http://yenisafak.com.tr/rss/?xml=spor'),
+                      ( u'Yazarlar', u'http://yenisafak.com.tr/rss/?xml=yazarlar'),
+                      ( u'Televizyon', u'http://yenisafak.com.tr/rss/?xml=televizyon'),
+                      ( u'Sağlık', u'http://yenisafak.com.tr/rss/?xml=saglik'),
+                      ( u'Yurt Haberler', u'http://yenisafak.com.tr/rss/?xml=yurthaberler'),
+                      ( u'Bilişim', u'http://yenisafak.com.tr/rss/?xml=bilisim'),
+                      ( u'Diziler', u'http://yenisafak.com.tr/rss/?xml=diziler'),
+                      ( u'Kültür-Sanat', u'http://yenisafak.com.tr/rss/?xml=kultursanat'),
+                      ( u'Röportaj', u'http://yenisafak.com.tr/rss/?xml=roportaj'),
+                      ( u'Sinema', u'http://yenisafak.com.tr/rss/?xml=sinema'),
+                      ( u'Yorum', u'http://yenisafak.com.tr/rss/?xml=yorum'),
+                      ( u' Yeni Şafak Pazar', u'http://yenisafak.com.tr/rss/?xml=pazar'),
+                      ( u'Yeni Şafak Kitap', u'http://yenisafak.com.tr/rss/?xml=kitap'),
+                      ( u'Yeni Şafak English', u'http://yenisafak.com.tr/rss/?xml=english'),
+
+                    
+
+                        ]
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -62,10 +62,16 @@ authors_completer_append_separator = False
 # The author name suffixes are words that are ignored when they occur at the
 # end of an author name. The case of the suffix is ignored and trailing
 # periods are automatically handled.
+# The author name copy words are a set of words which if they occur in an
+# author name cause the automatically geenrated author sort string to be
+# identical to the author name. This means that the sort for a string like Acme
+# Inc. will be Acme Inc. instead of Inc., Acme
 author_sort_copy_method = 'comma'
 author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
                        'MD', 'M.D', 'I', 'II', 'III', 'IV',
                        'Junior', 'Senior')
+author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
+        'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')

 #: Use author sort in Tag Browser
 # Set which author field to display in the tags pane (the list of authors,
--- a/setup/gui.py
+++ b/setup/gui.py
@ -17,8 +17,8 @@ class GUI(Command):

    @classmethod
    def find_forms(cls):
-        from calibre.gui2 import find_forms
-        return find_forms(cls.SRC)
+        # We do not use the calibre function find_forms as
+        # mporting calibre.gui2 may not work
        forms = []
        for root, _, files in os.walk(cls.PATH):
            for name in files:
@ -29,8 +29,9 @@ class GUI(Command):

    @classmethod
    def form_to_compiled_form(cls, form):
-        from calibre.gui2 import form_to_compiled_form
-        return form_to_compiled_form(form)
+        # We do not use the calibre function form_to_compiled_form as
+        # importing calibre.gui2 may not work
+        return form.rpartition('.')[0]+'_ui.py'

    def run(self, opts):
        self.build_forms()
--- a/setup/install.py
+++ b/setup/install.py
@ -55,7 +55,7 @@ class Develop(Command):
    short_description = 'Setup a development environment for calibre'
    MODE = 0755

-    sub_commands = ['build', 'resources', 'gui']
+    sub_commands = ['build', 'resources', 'iso639', 'gui',]

    def add_postinstall_options(self, parser):
        parser.add_option('--make-errors-fatal', action='store_true', default=False,
--- a/setup/resources.py
+++ b/setup/resources.py
@ -219,12 +219,17 @@ class Resources(Command):
        json.dump(function_dict, open(dest, 'wb'), indent=4)

    def clean(self):
-        for x in ('scripts', 'recipes', 'ebook-convert-complete'):
+        for x in ('scripts', 'ebook-convert-complete'):
            x = self.j(self.RESOURCES, x+'.pickle')
            if os.path.exists(x):
                os.remove(x)
        from setup.commands import kakasi
        kakasi.clean()
+        for x in ('builtin_recipes.xml', 'builtin_recipes.zip',
+                'template-functions.json'):
+            x = self.j(self.RESOURCES, x)
+            if os.path.exists(x):
+                os.remove(x)



--- a/setup/translations.py
+++ b/setup/translations.py
@ -206,6 +206,10 @@ class Translations(POT): # {{{
            for x in (i, j, d):
                if os.path.exists(x):
                    os.remove(x)
+        zf = self.DEST + '.zip'
+        if os.path.exists(zf):
+            os.remove(zf)
+
 # }}}

 class GetTranslations(Translations):
@ -273,13 +277,14 @@ class GetTranslations(Translations):
 class ISO639(Command):

    description = 'Compile translations for ISO 639 codes'
+    DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
+            'iso639.pickle')

    def run(self, opts):
        src = self.j(self.d(self.SRC), 'setup', 'iso639.xml')
        if not os.path.exists(src):
            raise Exception(src + ' does not exist')
-        dest = self.j(self.d(self.SRC), 'resources', 'localization',
-                'iso639.pickle')
+        dest = self.DEST
        if not self.newer(dest, src):
            self.info('Pickled code is up to date')
            return
@ -322,3 +327,8 @@ class ISO639(Command):
                '3to2':m3to2, '3bto3t':m3bto3t, 'name_map':nm}
        dump(x, open(dest, 'wb'), -1)

+    def clean(self):
+        if os.path.exists(self.DEST):
+            os.remove(self.DEST)
+
+
--- a/src/calibre/ebooks/cssselect.py
+++ b/src/calibre/ebooks/cssselect.py
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -36,8 +36,15 @@ def author_to_author_sort(author, method=None):
        return author
    if method is None:
        method = tweaks['author_sort_copy_method']
+
+    ltoks = frozenset(x.lower() for x in tokens)
+    copy_words = frozenset(x.lower() for x in tweaks['author_name_copywords'])
+    if ltoks.intersection(copy_words):
+        method = u'copy'
+
    if method == u'copy':
        return author
+
    suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
    suffixes |= set([x+u'.' for x in suffixes])

--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -1312,7 +1312,7 @@ class OPFCreator(Metadata):
            ncx_stream.flush()


-def metadata_to_opf(mi, as_string=True):
+def metadata_to_opf(mi, as_string=True, default_lang=None):
    from lxml import etree
    import textwrap
    from calibre.ebooks.oeb.base import OPF, DC
@ -1328,7 +1328,8 @@ def metadata_to_opf(mi, as_string=True):
            '[http://calibre-ebook.com]'

    if not mi.languages:
-        lang = get_lang().replace('_', '-').partition('-')[0]
+        lang = (get_lang().replace('_', '-').partition('-')[0] if default_lang
+                is None else default_lang)
        mi.languages = [lang]

    root = etree.fromstring(textwrap.dedent(
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -481,7 +481,7 @@ def identify(log, abort, # {{{
    log('The identify phase took %.2f seconds'%(time.time() - start_time))
    log('The longest time (%f) was taken by:'%longest, lp)
    log('Merging results from different sources and finding earliest',
-            'publication dates')
+            'publication dates from the xisbn service')
    start_time = time.time()
    results = merge_identify_results(results, log)

--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@ -33,7 +33,7 @@ def serialize_metadata_for(formats, tdir, id_):
    if not mi.application_id:
        mi.application_id = '__calibre_dummy__'
    with open(os.path.join(tdir, '%s.opf'%id_), 'wb') as f:
-        f.write(metadata_to_opf(mi))
+        f.write(metadata_to_opf(mi, default_lang='und'))
    if cdata:
        with open(os.path.join(tdir, str(id_)), 'wb') as f:
            f.write(cdata)
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -308,6 +308,11 @@ class MobiMLizer(object):
        istate = copy.copy(istates[-1])
        istate.rendered = False
        istate.list_num = 0
+        if tag == 'ol' and 'start' in elem.attrib:
+            try:
+                istate.list_num = int(elem.attrib['start'])-1
+            except:
+                pass
        istates.append(istate)
        left = 0
        display = style['display']
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@ -504,6 +504,9 @@ class Indexer(object): # {{{
        else:
            self.indices = self.create_book_index()

+        if not self.indices:
+            raise ValueError('No valid entries in TOC, cannot generate index')
+
        self.records.append(self.create_index_record())
        self.records.insert(0, self.create_header())
        self.records.extend(self.cncx.records)
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -590,7 +590,7 @@ class MobiWriter(object):
        Write the PalmDB header
        '''
        title = ascii_filename(unicode(self.oeb.metadata.title[0])).replace(
-                ' ', '_')
+                ' ', '_')[:32]
        title = title + (b'\0' * (32 - len(title)))
        now = int(time.time())
        nrecords = len(self.records)
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -116,6 +116,12 @@ class Serializer(object):
        buf.write(b'</html>')
        self.end_offset = buf.tell()
        self.fixup_links()
+        if self.start_offset is None:
+            # If we don't set a start offset, the stupid Kindle will
+            # open the book at the location of the first IndexEntry, which
+            # could be anywhere. So ensure the book is always opened at the
+            # beginning, instead.
+            self.start_offset = self.body_start_offset
        return buf.getvalue()

    def serialize_head(self):
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -27,6 +27,7 @@ from calibre import force_unicode
 from calibre.ebooks import unit_convert
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
+from calibre.ebooks.cssselect import css_to_xpath_no_case

 cssutils_log.setLevel(logging.WARN)

@ -98,32 +99,71 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
                       'x-large', 'xx-large'])


-class CSSSelector(etree.XPath):
-    MIN_SPACE_RE = re.compile(r' *([>~+]) *')
+class CSSSelector(object):
+
    LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")

    def __init__(self, css, namespaces=XPNSMAP):
-        css = self.MIN_SPACE_RE.sub(r'\1', css)
        if isinstance(css, unicode):
            # Workaround for bug in lxml on windows/OS X that causes a massive
            # memory leak with non ASCII selectors
            css = css.encode('ascii', 'ignore').decode('ascii')
        try:
-            path = css_to_xpath(css)
-        except UnicodeEncodeError: # Bug in css_to_xpath
-            path = '/'
-        except NotImplementedError: # Probably a subselect like :hover
-            path = '/'
-        path = self.LOCAL_NAME_RE.sub(r"local-name() = '", path)
-        etree.XPath.__init__(self, path, namespaces=namespaces)
+            path = self.LOCAL_NAME_RE.sub(r"local-name() = '", css_to_xpath(css))
+            self.sel1 = etree.XPath(css_to_xpath(css), namespaces=namespaces)
+        except:
+            self.sel1 = lambda x: []
+        try:
+            path = self.LOCAL_NAME_RE.sub(r"local-name() = '",
+                    css_to_xpath_no_case(css))
+            self.sel2 = etree.XPath(path, namespaces=namespaces)
+        except:
+            self.sel2 = lambda x: []
+        self.sel2_use_logged = False
        self.css = css

+    def __call__(self, node, log):
+        try:
+            ans = self.sel1(node)
+        except (AssertionError, ExpressionError, etree.XPathSyntaxError,
+                    NameError, # thrown on OS X instead of SelectorSyntaxError
+                    SelectorSyntaxError):
+            return []
+
+        if not ans:
+            try:
+                ans = self.sel2(node)
+            except:
+                return []
+            else:
+                if ans and not self.sel2_use_logged:
+                    self.sel2_use_logged = True
+                    log.warn('Interpreting class and tag selectors case'
+                        ' insensitively in the CSS selector: %s'%self.css)
+        return ans
+
+
    def __repr__(self):
        return '<%s %s for %r>' % (
            self.__class__.__name__,
            hex(abs(id(self)))[2:],
            self.css)

+_selector_cache = {}
+
+MIN_SPACE_RE = re.compile(r' *([>~+]) *')
+
+def get_css_selector(raw_selector):
+    css = MIN_SPACE_RE.sub(r'\1', raw_selector)
+    if isinstance(css, unicode):
+        # Workaround for bug in lxml on windows/OS X that causes a massive
+        # memory leak with non ASCII selectors
+        css = css.encode('ascii', 'ignore').decode('ascii')
+    ans = _selector_cache.get(css, None)
+    if ans is None:
+        ans = CSSSelector(css)
+        _selector_cache[css] = ans
+    return ans

 class Stylizer(object):
    STYLESHEETS = WeakKeyDictionary()
@ -223,41 +263,12 @@ class Stylizer(object):
        rules.sort()
        self.rules = rules
        self._styles = {}
-        class_sel_pat = re.compile(r'\.[a-z]+', re.IGNORECASE)
-        capital_sel_pat = re.compile(r'h|[A-Z]+')
        for _, _, cssdict, text, _ in rules:
            fl = ':first-letter' in text
            if fl:
                text = text.replace(':first-letter', '')
-            try:
-                selector = CSSSelector(text)
-            except (AssertionError, ExpressionError, etree.XPathSyntaxError,
-                    NameError, # thrown on OS X instead of SelectorSyntaxError
-                    SelectorSyntaxError):
-                continue
-            try:
-                matches = selector(tree)
-            except etree.XPathEvalError:
-                continue
-
-            if not matches:
-                ntext = capital_sel_pat.sub(lambda m: m.group().lower(), text)
-                if ntext != text:
-                    self.logger.warn('Transformed CSS selector', text, 'to',
-                            ntext)
-                    selector = CSSSelector(ntext)
-                    matches = selector(tree)
-
-            if not matches and class_sel_pat.match(text) and text.lower() != text:
-                found = False
-                ltext = text.lower()
-                for x in tree.xpath('//*[@class]'):
-                    if ltext.endswith('.'+x.get('class').lower()):
-                        matches.append(x)
-                        found = True
-                if found:
-                    self.logger.warn('Ignoring case mismatches for CSS selector: %s in %s'
-                        %(text, item.href))
+            selector = get_css_selector(text)
+            matches = selector(tree, self.logger)
            if fl:
                from lxml.builder import ElementMaker
                E = ElementMaker(namespace=XHTML_NS)
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -320,7 +320,8 @@ class CSSFlattener(object):
            if self.context.insert_blank_line:
                cssdict['margin-top'] = cssdict['margin-bottom'] = \
                    '%fem'%self.context.insert_blank_line_size
-            if self.context.remove_paragraph_spacing:
+            if (self.context.remove_paragraph_spacing and
+                cssdict.get('text-align', None) not in ('center', 'right')):
                cssdict['text-indent'] =  "%1.1fem" % self.context.remove_paragraph_spacing_indent_size

        if cssdict:
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -98,6 +98,7 @@ gprefs.defaults['book_display_fields'] = [
        ]
 gprefs.defaults['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}'
 gprefs.defaults['preserve_date_on_ctl'] = True
+gprefs.defaults['cb_fullscreen'] = False

 # }}}

@ -173,6 +174,8 @@ def _config(): # {{{
        help='Search history for the plugin preferences')
    c.add_opt('shortcuts_search_history', default=[],
        help='Search history for the keyboard preferences')
+    c.add_opt('tweaks_search_history', default=[],
+        help='Search history for tweaks')
    c.add_opt('worker_limit', default=6,
            help=_(
        'Maximum number of simultaneous conversion/news download jobs. '
@ -186,7 +189,9 @@ def _config(): # {{{
    c.add_opt('enforce_cpu_limit', default=True,
            help=_('Limit max simultaneous jobs to number of CPUs'))
    c.add_opt('gui_layout', choices=['wide', 'narrow'],
-            help=_('The layout of the user interface'), default='wide')
+            help=_('The layout of the user interface. Wide has the '
+                'book details panel on the right and narrow has '
+                'it at the bottom.'), default='wide')
    c.add_opt('show_avg_rating', default=True,
            help=_('Show the average rating per item indication in the tag browser'))
    c.add_opt('disable_animations', default=False,
--- a/src/calibre/gui2/actions/catalog.py
+++ b/src/calibre/gui2/actions/catalog.py
@ -17,7 +17,7 @@ from calibre.gui2.actions import InterfaceAction
 class GenerateCatalogAction(InterfaceAction):

    name = 'Generate Catalog'
-    action_spec = (_('Create a catalog of the books in your calibre library'), 'catalog.png', 'Catalog builder', None)
+    action_spec = (_('Create catalog'), 'catalog.png', 'Catalog builder', None)
    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])

    def genesis(self):
--- a/src/calibre/gui2/cover_flow.py
+++ b/src/calibre/gui2/cover_flow.py
@ -9,8 +9,8 @@ Module to implement the Cover Flow feature

 import sys, os, time

-from PyQt4.Qt import (QImage, QSizePolicy, QTimer, QDialog, Qt, QSize,
-        QStackedLayout, QLabel, QByteArray, pyqtSignal)
+from PyQt4.Qt import (QImage, QSizePolicy, QTimer, QDialog, Qt, QSize, QAction,
+        QStackedLayout, QLabel, QByteArray, pyqtSignal, QKeySequence)

 from calibre import plugins
 from calibre.gui2 import config, available_height, available_width, gprefs
@ -150,12 +150,39 @@ class CBDialog(QDialog):
        if not self.restoreGeometry(geom):
            h, w = available_height()-60, int(available_width()/1.5)
            self.resize(w, h)
+        self.action_fs_toggle = a = QAction(self)
+        self.addAction(a)
+        a.setShortcuts([QKeySequence('F11', QKeySequence.PortableText),
+            QKeySequence('Ctrl+Shift+F', QKeySequence.PortableText)])
+        a.triggered.connect(self.toggle_fullscreen)
+        self.action_esc_fs = a = QAction(self)
+        a.triggered.connect(self.show_normal)
+        self.addAction(a)
+        a.setShortcuts([QKeySequence('Esc', QKeySequence.PortableText)])
+
+        self.pre_fs_geom = None

    def closeEvent(self, *args):
-        geom = bytearray(self.saveGeometry())
-        gprefs['cover_browser_dialog_geometry'] = geom
+        if not self.isFullScreen():
+            geom = bytearray(self.saveGeometry())
+            gprefs['cover_browser_dialog_geometry'] = geom
        self.closed.emit()

+    def show_normal(self):
+        self.showNormal()
+        if self.pre_fs_geom is not None:
+            self.restoreGeometry(self.pre_fs_geom)
+            self.pre_fs_geom = None
+
+    def toggle_fullscreen(self, *args):
+        if self.isFullScreen():
+            self.show_normal()
+        else:
+            self.pre_fs_geom = bytearray(self.saveGeometry())
+            self.showFullScreen()
+
+
+
 class CoverFlowMixin(object):

    def __init__(self):
@ -228,7 +255,7 @@ class CoverFlowMixin(object):
        d.addAction(self.cb_splitter.action_toggle)
        self.cover_flow.setVisible(True)
        self.cover_flow.setFocus(Qt.OtherFocusReason)
-        d.show()
+        d.showFullScreen() if gprefs['cb_fullscreen'] else d.show()
        self.cb_splitter.button.set_state_to_hide()
        d.closed.connect(self.cover_browser_closed)
        self.cb_dialog = d
--- a/src/calibre/gui2/languages.py
+++ b/src/calibre/gui2/languages.py
@ -9,18 +9,20 @@ __docformat__ = 'restructuredtext en'

 from calibre.gui2.complete import MultiCompleteComboBox
 from calibre.utils.localization import lang_map
-from calibre.utils.icu import sort_key
+from calibre.utils.icu import sort_key, lower

 class LanguagesEdit(MultiCompleteComboBox):

    def __init__(self, parent=None):
        MultiCompleteComboBox.__init__(self, parent)

+        self.setSizeAdjustPolicy(self.AdjustToMinimumContentsLengthWithIcon)
+        self.setMinimumContentsLength(20)
        self._lang_map = lang_map()
        self.names_with_commas = [x for x in self._lang_map.itervalues() if ',' in x]
        self.comma_map = {k:k.replace(',', '|') for k in self.names_with_commas}
        self.comma_rmap = {v:k for k, v in self.comma_map.iteritems()}
-        self._rmap = {v:k for k,v in self._lang_map.iteritems()}
+        self._rmap = {lower(v):k for k,v in self._lang_map.iteritems()}

        all_items = sorted(self._lang_map.itervalues(),
            key=sort_key)
@ -44,7 +46,7 @@ class LanguagesEdit(MultiCompleteComboBox):
            ans = []
            for name in vals:
                if name:
-                    code = self._rmap.get(name, None)
+                    code = self._rmap.get(lower(name), None)
                    if code is not None:
                        ans.append(code)
            return ans
@ -64,7 +66,7 @@ class LanguagesEdit(MultiCompleteComboBox):
        bad = []
        for name in vals:
            if name:
-                code = self._rmap.get(name, None)
+                code = self._rmap.get(lower(name), None)
                if code is None:
                    bad.append(name)
        return bad
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@ -308,7 +308,7 @@ class AuthorSortEdit(EnLineEdit):
    LABEL = _('Author s&ort:')

    def __init__(self, parent, authors_edit, autogen_button, db,
-            copy_a_to_as_action, copy_as_to_a_action):
+            copy_a_to_as_action, copy_as_to_a_action, a_to_as, as_to_a):
        EnLineEdit.__init__(self, parent)
        self.authors_edit = authors_edit
        self.db = db
@ -333,6 +333,8 @@ class AuthorSortEdit(EnLineEdit):
        autogen_button.clicked.connect(self.auto_generate)
        copy_a_to_as_action.triggered.connect(self.auto_generate)
        copy_as_to_a_action.triggered.connect(self.copy_to_authors)
+        a_to_as.triggered.connect(self.author_to_sort)
+        as_to_a.triggered.connect(self.sort_to_author)
        self.update_state()

    @dynamic_property
@ -389,10 +391,21 @@ class AuthorSortEdit(EnLineEdit):

    def auto_generate(self, *args):
        au = unicode(self.authors_edit.text())
-        au = re.sub(r'\s+et al\.$', '', au)
+        au = re.sub(r'\s+et al\.$', '', au).strip()
        authors = string_to_authors(au)
        self.current_val = self.db.author_sort_from_authors(authors)

+    def author_to_sort(self, *args):
+        au = unicode(self.authors_edit.text())
+        au = re.sub(r'\s+et al\.$', '', au).strip()
+        if au:
+            self.current_val = au
+
+    def sort_to_author(self, *args):
+        aus = self.current_val
+        if aus:
+            self.authors_edit.current_val = [aus]
+
    def initialize(self, db, id_):
        self.current_val = db.author_sort(id_, index_is_id=True)

--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -130,10 +130,15 @@ class MetadataSingleDialogBase(ResizableDialog):
        ac = m.addAction(QIcon(I('forward.png')), _('Set author sort from author'))
        ac2 = m.addAction(QIcon(I('back.png')), _('Set author from author sort'))
        ac3 = m.addAction(QIcon(I('user_profile.png')), _('Manage authors'))
+        ac4 = m.addAction(QIcon(I('next.png')),
+                _('Copy author to author sort'))
+        ac5 = m.addAction(QIcon(I('previous.png')),
+                _('Copy author sort to author'))
+
        b.setMenu(m)
        self.authors = AuthorsEdit(self, ac3)
        self.author_sort = AuthorSortEdit(self, self.authors, b, self.db, ac,
-                ac2)
+                ac2, ac4, ac5)
        self.basic_metadata_widgets.extend([self.authors, self.author_sort])

        self.swap_title_author_button = QToolButton(self)
@ -723,7 +728,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{

        tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
        tl.addWidget(self.manage_authors_button, 2, 0, 1, 1)
-        tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
+        tl.addWidget(self.paste_isbn_button, 12, 0, 1, 1)

        create_row(0, self.title, self.title_sort,
                   button=self.deduce_title_sort_button, span=2,
@ -859,7 +864,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{

        tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
        tl.addWidget(self.manage_authors_button, 2, 0, 2, 1)
-        tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
+        tl.addWidget(self.paste_isbn_button, 12, 0, 1, 1)

        create_row(0, self.title, self.title_sort,
                   button=self.deduce_title_sort_button, span=2,
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@ -6,16 +6,15 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 from PyQt4.Qt import (QApplication, QFont, QFontInfo, QFontDialog,
-        QAbstractListModel, Qt, QIcon)
+        QAbstractListModel, Qt, QIcon, QKeySequence)

 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
 from calibre.gui2.preferences.look_feel_ui import Ui_Form
-from calibre.gui2 import config, gprefs, qt_app
+from calibre.gui2 import config, gprefs, qt_app, NONE
 from calibre.utils.localization import (available_translations,
    get_language, get_lang)
 from calibre.utils.config import prefs
 from calibre.utils.icu import sort_key
-from calibre.gui2 import NONE
 from calibre.gui2.book_details import get_field_list
 from calibre.gui2.preferences.coloring import EditRules

@ -130,6 +129,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        r('disable_tray_notification', config)
        r('use_roman_numerals_for_series_number', config)
        r('separate_cover_flow', config, restart_required=True)
+        r('cb_fullscreen', gprefs)

        choices = [(_('Off'), 'off'), (_('Small'), 'small'),
            (_('Medium'), 'medium'), (_('Large'), 'large')]
@ -171,6 +171,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.tabWidget.addTab(self.edit_rules,
                QIcon(I('format-fill-color.png')), _('Column coloring'))
        self.tabWidget.setCurrentIndex(0)
+        keys = [QKeySequence('F11', QKeySequence.PortableText), QKeySequence(
+            'Ctrl+Shift+F', QKeySequence.PortableText)]
+        keys = [unicode(x.toString(QKeySequence.NativeText)) for x in keys]
+        self.fs_help_msg.setText(unicode(self.fs_help_msg.text())%(
+            _(' or ').join(keys)))

    def initialize(self):
        ConfigWidgetBase.initialize(self)
--- a/src/calibre/gui2/preferences/look_feel.ui
+++ b/src/calibre/gui2/preferences/look_feel.ui
@ -417,7 +417,7 @@ then the tags will be displayed each on their own line.</string>
       <item row="1" column="1">
        <widget class="QSpinBox" name="opt_cover_flow_queue_length"/>
       </item>
-       <item row="2" column="0" colspan="2">
+       <item row="4" column="0" colspan="2">
        <spacer name="verticalSpacer_4">
         <property name="orientation">
          <enum>Qt::Vertical</enum>
@ -430,6 +430,26 @@ then the tags will be displayed each on their own line.</string>
         </property>
        </spacer>
       </item>
+       <item row="2" column="0" colspan="2">
+        <widget class="QCheckBox" name="opt_cb_fullscreen">
+         <property name="text">
+          <string>When showing cover browser in separate window, show it &amp;fullscreen</string>
+         </property>
+        </widget>
+       </item>
+       <item row="3" column="0" colspan="2">
+        <widget class="QLabel" name="fs_help_msg">
+         <property name="styleSheet">
+          <string notr="true">margin-left: 1.5em</string>
+         </property>
+         <property name="text">
+          <string>You can press the %s keys to toggle full screen mode.</string>
+         </property>
+         <property name="wordWrap">
+          <bool>true</bool>
+         </property>
+        </widget>
+       </item>
      </layout>
     </widget>
    </widget>
--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@ -9,14 +9,19 @@ import textwrap

 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, AbortCommit
 from calibre.gui2.preferences.tweaks_ui import Ui_Form
-from calibre.gui2 import error_dialog, NONE
+from calibre.gui2 import error_dialog, NONE, info_dialog
 from calibre.utils.config import read_raw_tweaks, write_tweaks
 from calibre.gui2.widgets import PythonHighlighter
 from calibre import isbytestring
+from calibre.utils.icu import lower
+from calibre.utils.search_query_parser import (ParseException,
+        SearchQueryParser)

 from PyQt4.Qt import (QAbstractListModel, Qt, QStyledItemDelegate, QStyle,
    QStyleOptionViewItem, QFont, QDialogButtonBox, QDialog,
-    QVBoxLayout, QPlainTextEdit, QLabel)
+    QVBoxLayout, QPlainTextEdit, QLabel, QModelIndex)
+
+ROOT = QModelIndex()

 class Delegate(QStyledItemDelegate): # {{{
    def __init__(self, view):
@ -35,7 +40,7 @@ class Delegate(QStyledItemDelegate): # {{{
 class Tweak(object): # {{{

    def __init__(self, name, doc, var_names, defaults, custom):
-        translate = __builtins__['_']
+        translate = _
        self.name = translate(name)
        self.doc = translate(doc.strip())
        self.var_names = var_names
@ -87,10 +92,11 @@ class Tweak(object): # {{{

 # }}}

-class Tweaks(QAbstractListModel): # {{{
+class Tweaks(QAbstractListModel, SearchQueryParser): # {{{

    def __init__(self, parent=None):
        QAbstractListModel.__init__(self, parent)
+        SearchQueryParser.__init__(self, ['all'])
        raw_defaults, raw_custom = read_raw_tweaks()

        self.parse_tweaks(raw_defaults, raw_custom)
@ -223,6 +229,54 @@ class Tweaks(QAbstractListModel): # {{{
    def set_plugin_tweaks(self, d):
        self.plugin_tweaks = d

+    def universal_set(self):
+        return set(xrange(self.rowCount()))
+
+    def get_matches(self, location, query, candidates=None):
+        if candidates is None:
+            candidates = self.universal_set()
+        ans = set()
+        if not query:
+            return ans
+        query = lower(query)
+        for r in candidates:
+            dat = self.data(self.index(r), Qt.UserRole)
+            if query in lower(dat.name):# or query in lower(dat.doc):
+                ans.add(r)
+        return ans
+
+    def find(self, query):
+        query = query.strip()
+        if not query:
+            return ROOT
+        matches = self.parse(query)
+        if not matches:
+            return ROOT
+        matches = list(sorted(matches))
+        return self.index(matches[0])
+
+    def find_next(self, idx, query, backwards=False):
+        query = query.strip()
+        if not query:
+            return idx
+        matches = self.parse(query)
+        if not matches:
+            return idx
+        loc = idx.row()
+        if loc not in matches:
+            return self.find(query)
+        if len(matches) == 1:
+            return ROOT
+        matches = list(sorted(matches))
+        i = matches.index(loc)
+        if backwards:
+            ans = i - 1 if i - 1 >= 0 else len(matches)-1
+        else:
+            ans = i + 1 if i + 1 < len(matches) else 0
+
+        ans = matches[ans]
+        return self.index(ans)
+
 # }}}

 class PluginTweaks(QDialog): # {{{
@ -257,12 +311,18 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.delegate = Delegate(self.tweaks_view)
        self.tweaks_view.setItemDelegate(self.delegate)
        self.tweaks_view.currentChanged = self.current_changed
+        self.view = self.tweaks_view
        self.highlighter = PythonHighlighter(self.edit_tweak.document())
        self.restore_default_button.clicked.connect(self.restore_to_default)
        self.apply_button.clicked.connect(self.apply_tweak)
        self.plugin_tweaks_button.clicked.connect(self.plugin_tweaks)
        self.splitter.setStretchFactor(0, 1)
        self.splitter.setStretchFactor(1, 100)
+        self.next_button.clicked.connect(self.find_next)
+        self.previous_button.clicked.connect(self.find_previous)
+        self.search.initialize('tweaks_search_history', help_text=
+                _('Search for tweak'))
+        self.search.search.connect(self.find)


    def plugin_tweaks(self):
@ -290,7 +350,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.changed_signal.emit()

    def initialize(self):
-        self.tweaks = Tweaks()
+        self.tweaks = self._model = Tweaks()
        self.tweaks_view.setModel(self.tweaks)

    def restore_to_default(self, *args):
@ -338,6 +398,45 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        ConfigWidgetBase.commit(self)
        return True

+    def find(self, query):
+        if not query:
+            return
+        try:
+            idx = self._model.find(query)
+        except ParseException:
+            self.search.search_done(False)
+            return
+        self.search.search_done(True)
+        if not idx.isValid():
+            info_dialog(self, _('No matches'),
+                    _('Could not find any shortcuts matching %s')%query,
+                    show=True, show_copy_button=False)
+            return
+        self.highlight_index(idx)
+
+    def highlight_index(self, idx):
+        if not idx.isValid(): return
+        self.view.scrollTo(idx)
+        self.view.selectionModel().select(idx,
+                self.view.selectionModel().ClearAndSelect)
+        self.view.setCurrentIndex(idx)
+
+    def find_next(self, *args):
+        idx = self.view.currentIndex()
+        if not idx.isValid():
+            idx = self._model.index(0)
+        idx = self._model.find_next(idx,
+                unicode(self.search.currentText()))
+        self.highlight_index(idx)
+
+    def find_previous(self, *args):
+        idx = self.view.currentIndex()
+        if not idx.isValid():
+            idx = self._model.index(0)
+        idx = self._model.find_next(idx,
+            unicode(self.search.currentText()), backwards=True)
+        self.highlight_index(idx)
+

 if __name__ == '__main__':
    from PyQt4.Qt import QApplication
--- a/src/calibre/gui2/preferences/tweaks.ui
+++ b/src/calibre/gui2/preferences/tweaks.ui
@ -6,7 +6,7 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>660</width>
+    <width>756</width>
    <height>531</height>
   </rect>
  </property>
@ -14,8 +14,24 @@
   <string>Form</string>
  </property>
  <layout class="QVBoxLayout" name="verticalLayout_4">
+   <item>
+    <widget class="QLabel" name="label_18">
+     <property name="text">
+      <string>Values for the tweaks are shown below. Edit them to change the behavior of calibre. Your changes will only take effect &lt;b&gt;after a restart&lt;/b&gt; of calibre.</string>
+     </property>
+     <property name="wordWrap">
+      <bool>true</bool>
+     </property>
+    </widget>
+   </item>
   <item>
    <widget class="QSplitter" name="splitter">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Expanding" vsizetype="Preferred">
+       <horstretch>0</horstretch>
+       <verstretch>10</verstretch>
+      </sizepolicy>
+     </property>
     <property name="orientation">
      <enum>Qt::Horizontal</enum>
     </property>
@ -24,16 +40,6 @@
     </property>
     <widget class="QWidget" name="layoutWidget">
      <layout class="QVBoxLayout" name="verticalLayout_2">
-       <item>
-        <widget class="QLabel" name="label_18">
-         <property name="text">
-          <string>Values for the tweaks are shown below. Edit them to change the behavior of calibre. Your changes will only take effect &lt;b&gt;after a restart&lt;/b&gt; of calibre.</string>
-         </property>
-         <property name="wordWrap">
-          <bool>true</bool>
-         </property>
-        </widget>
-       </item>
       <item>
        <widget class="QListView" name="tweaks_view">
         <property name="sizePolicy">
@ -72,8 +78,8 @@
      </layout>
     </widget>
     <widget class="QWidget" name="layoutWidget">
-      <layout class="QVBoxLayout" name="verticalLayout_3">
-       <item>
+      <layout class="QGridLayout" name="gridLayout_3">
+       <item row="1" column="0" colspan="3">
        <widget class="QGroupBox" name="groupBox">
         <property name="title">
          <string>Help</string>
@ -92,7 +98,7 @@
         </layout>
        </widget>
       </item>
-       <item>
+       <item row="2" column="0" colspan="3">
        <widget class="QGroupBox" name="groupBox_2">
         <property name="title">
          <string>Edit tweak</string>
@ -128,12 +134,59 @@
         </layout>
        </widget>
       </item>
+       <item row="0" column="0">
+        <widget class="SearchBox2" name="search">
+         <property name="sizePolicy">
+          <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
+           <horstretch>10</horstretch>
+           <verstretch>0</verstretch>
+          </sizepolicy>
+         </property>
+         <property name="sizeAdjustPolicy">
+          <enum>QComboBox::AdjustToMinimumContentsLength</enum>
+         </property>
+         <property name="minimumContentsLength">
+          <number>10</number>
+         </property>
+        </widget>
+       </item>
+       <item row="0" column="1">
+        <widget class="QPushButton" name="next_button">
+         <property name="text">
+          <string>&amp;Next</string>
+         </property>
+         <property name="icon">
+          <iconset resource="../../../../resources/images.qrc">
+           <normaloff>:/images/arrow-down.png</normaloff>:/images/arrow-down.png</iconset>
+         </property>
+        </widget>
+       </item>
+       <item row="0" column="2">
+        <widget class="QPushButton" name="previous_button">
+         <property name="text">
+          <string>&amp;Previous</string>
+         </property>
+         <property name="icon">
+          <iconset resource="../../../../resources/images.qrc">
+           <normaloff>:/images/arrow-up.png</normaloff>:/images/arrow-up.png</iconset>
+         </property>
+        </widget>
+       </item>
      </layout>
     </widget>
    </widget>
   </item>
  </layout>
 </widget>
- <resources/>
+ <customwidgets>
+  <customwidget>
+   <class>SearchBox2</class>
+   <extends>QComboBox</extends>
+   <header>calibre/gui2/search_box.h</header>
+  </customwidget>
+ </customwidgets>
+ <resources>
+  <include location="../../../../resources/images.qrc"/>
+ </resources>
 <connections/>
 </ui>
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -15,7 +15,7 @@ from calibre.utils.config import tweaks, prefs
 from calibre.utils.date import parse_date, now, UNDEFINED_DATE
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.utils.pyparsing import ParseException
-from calibre.utils.localization import canonicalize_lang
+from calibre.utils.localization import canonicalize_lang, lang_map
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre import prints
@ -728,7 +728,9 @@ class ResultCache(SearchQueryParser): # {{{
                elif loc == db_col['languages']:
                    q = canonicalize_lang(query)
                    if q is None:
-                        q = query
+                        lm = lang_map()
+                        rm = {v.lower():k for k,v in lm.iteritems()}
+                        q = rm.get(query, query)
                else:
                    q = query

--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@ -290,7 +290,10 @@ class DatabaseException(Exception):

    def __init__(self, err, tb):
        tb = '\n\t'.join(('\tRemote'+tb).splitlines())
-        msg = unicode(err) +'\n' + tb
+        try:
+            msg = unicode(err) +'\n' + tb
+        except:
+            msg = repr(err) + '\n' + tb
        Exception.__init__(self, msg)
        self.orig_err = err
        self.orig_tb  = tb
--- a/src/calibre/utils/icu.py
+++ b/src/calibre/utils/icu.py
@ -35,7 +35,7 @@ def load_icu():
        if _icu is None:
            print plugins['icu'][1]
        else:
-            if not _icu.ok:
+            if not getattr(_icu, 'ok', False):
                print 'icu not ok'
                _icu = None
    return _icu
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -28,6 +28,7 @@ from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.date import now as nowf
 from calibre.utils.magick.draw import save_cover_data_to, add_borders_to_image
+from calibre.utils.localization import canonicalize_lang

 class LoginFailed(ValueError):
    pass
@ -1117,6 +1118,9 @@ class BasicNewsRecipe(Recipe):
        mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
        mi.timestamp = nowf()
        mi.comments = self.description
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
        if not isinstance(mi.comments, unicode):
            mi.comments = mi.comments.decode('utf-8', 'replace')
        mi.pubdate = nowf()