Sync to trunk.

2025-11-18 20:43:04 -05:00 · 2012-01-30 21:29:30 -05:00 · 2012-01-30 21:29:30 -05:00 · 0690471a92
commit 0690471a92
parent 1e14274564 5248d9f134
125 changed files with 26380 additions and 23863 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,72 @@
 #   new recipes:
 #     - title: 

+- version: 0.8.37
+  date: 2012-01-27
+
+  new features:
+    - title: "Allow calibre to be run simultaneously in two different user accounts on windows."
+      tickets: [919856]
+
+    - title: "Driver for Motorola Photon and Point of View PlayTab"
+      tickets: [920582, 919080] 
+
+    - title: "Add a checkbox to preferences->plugins to show only user installed plugins"
+
+    - title: "Add a restart calibre button to the warning dialog that pops up after changing some preference that requires a restart"
+ 
+  bug fixes:
+    - title: "Fix regression in 0.8.36 that caused the remove format from book function to only delete the entry from the database and not delete the actual file from the disk"
+      tickets: [921721]
+
+    - title: "Fix regression in 0.8.36 that caused the calibredb command to not properly refresh the format information in the GUI"
+      tickets: [919494] 
+
+    - title: "E-book viewer: Preserve the current position more accurately when changing font size/other preferences."
+      tickets: [912406]
+
+    - title: "Conversion pipeline: Fix items in the <guide> that refer to files with URL unsafe filenames being ignored."
+      tickets: [920804]
+
+    - title: "Fix calibre not running on linux systems that set LANG to an empty string"
+
+    - title: "On first run of calibre, ensure the columns are sized appropriately"
+
+    - title: "MOBI Output: Do not collapse whitespace when setting the comments metadata in newly created MOBI files"
+
+    - title: "HTML Input: Fix handling of files with ä characters in their filenames."
+      tickets: [919931]
+
+    - title: "Fix the sort on startup tweak ignoring more than three levels"
+      tickets: [919584]
+
+    - title: "Edit metadata dialog: Fix a bug that broke adding of a file to the book that calibre did not previously know about in the books directory while simultaneously changing the author or title of the book."
+      tickets: [922003]
+
+  improved recipes:
+    - People's Daily
+    - Plus Info
+    - grantland.com
+    - Eret es irodalom 
+    - Sueddeutsche.de
+
+  new recipes:
+    - title: Mumbai Mirror 
+      author: Krittika Goyal
+
+    - title: Real Clear 
+      author: TMcN
+
+    - title: Gazeta Wyborcza 
+      author: ravcio
+
+    - title: The Daily News Egypt and al masry al youm 
+      author: Omm Mishmishah
+
+    - title: Klip.me 
+      author: Ken Sun
+
+
 - version: 0.8.36
  date: 2012-01-20

--- a/recipes/beppe_grillo.recipe
+++ b/recipes/beppe_grillo.recipe
@ -0,0 +1,16 @@
+__license__   = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1327747616(BasicNewsRecipe):
+    title          = u'Beppe Grillo'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'Beppe Grillo', u'http://feeds.feedburner.com/beppegrillo/atom')]
+    description   = 'Blog of the famous comedian and politician Beppe Grillo - v1.00 (28, January 2012)'
+    __author__    = 'faber1971'
+
+    language = 'it'
+
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@ -0,0 +1,25 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325006965(BasicNewsRecipe):
+    title          = u'Countryfile.com'
+    cover_url = 'http://www.buysubscriptions.com/static_content/the-immediate/en/images/covers/CFIL_maxi.jpg'
+    __author__ = 'Dave Asbury'
+    description           = 'The official website of Countryfile Magazine'
+    # last updated 29/1/12
+    language = 'en_GB'
+    oldest_article = 30
+    max_articles_per_feed = 25
+    remove_empty_feeds = True
+    no_stylesheets = True
+    auto_cleanup = True
+    #articles_are_obfuscated = True
+
+    remove_tags    = [
+                             # dict(attrs={'class' : ['player']}),
+
+    ]
+    feeds          = [
+    (u'Homepage', u'http://www.countryfile.com/rss/home'),
+    (u'Country News', u'http://www.countryfile.com/rss/news'),
+            (u'Countryside', u'http://www.countryfile.com/rss/countryside'),
+            ]
--- a/recipes/elet_es_irodalom.recipe
+++ b/recipes/elet_es_irodalom.recipe
@ -1,16 +1,16 @@
 ################################################################################
 #Description:     http://es.hu/ RSS channel
 #Author:      Bigpapa (bigpapabig@hotmail.com)
-#Date:	  2010.12.01. - V1.0
+#Date:    2012.01.20. - V1.2
 ################################################################################

 from calibre.web.feeds.recipes import BasicNewsRecipe

 class elet_es_irodalom(BasicNewsRecipe):
-    title                  = u'Elet es Irodalom'
+    title                  = u'\u00c9let \u00e9s Irodalom'
    __author__             = 'Bigpapa'
    oldest_article         = 7
-    max_articles_per_feed  = 20	# Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
+    max_articles_per_feed  = 30 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
    no_stylesheets         = True
    #delay                  = 1
    use_embedded_content   = False
@ -19,21 +19,32 @@ class elet_es_irodalom(BasicNewsRecipe):
    language               = 'hu'
    publication_type       = 'newsportal'
    extra_css              = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
+    needs_subscription = 'optional'
+
+    masthead_url = 'http://www.es.hu/images/logo.jpg'
+    timefmt = ' [%Y %b %d, %a]'
+
+#Nem ide a kódba kell beleírni a hozzáférés adatait, hanem azt akkor adod meg, ha le akarod tölteni!
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://www.es.hu/')
+            br.select_form(name='userfrmlogin')
+            br['cusername'] = self.username
+            br['cpassword'] = self.password
+            br.submit()
+        return br

    keep_only_tags    = [
                       dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
-                    
    ]

    remove_tags = [
     dict(name='a', attrs={'target':['_TOP']}),
    dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),

-
    ]

-    
-
    feeds          = [
    (u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
    (u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
@ -44,5 +55,4 @@ class elet_es_irodalom(BasicNewsRecipe):
    (u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
    (u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
    (u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
-
    ]
--- a/recipes/fhm_uk.recipe
+++ b/recipes/fhm_uk.recipe
@ -6,7 +6,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
    __author__ = 'Dave Asbury'
-    # last updated 27/12/11
+    # last updated 27/1/12
    language = 'en_GB'
    oldest_article = 28
    max_articles_per_feed = 12
@ -22,9 +22,13 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):

        ]

+    #remove_tags    = [
+                              #dict(attrs={'class' : ['player']}),

+    #]
    feeds          = [
    (u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
+    (u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
    (u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
    (u'Gaming',u'http://feed43.com/0755006465351035.xml'),
            ]
--- a/recipes/grantland.recipe
+++ b/recipes/grantland.recipe
@ -7,40 +7,35 @@ class GrantLand(BasicNewsRecipe):
    language       = 'en'
    __author__     = 'barty on mobileread.com forum'
    max_articles_per_feed = 100
-	no_stylesheets = False
+    no_stylesheets = True
    # auto_cleanup is too aggressive sometimes and we end up with blank articles
    auto_cleanup   = False
    timefmt        = ' [%a, %d %b %Y]'
-	oldest_article = 365
+    oldest_article = 90

    cover_url      = 'http://cdn0.sbnation.com/imported_assets/740965/blog_grantland_grid_3.jpg'
    masthead_url   = 'http://a1.espncdn.com/prod/assets/grantland/grantland-logo.jpg'

    INDEX          = 'http://www.grantland.com'
    CATEGORIES     = [
-		# comment out categories you don't want
+        # comment out second line if you don't want older articles
        # (user friendly name, url suffix, max number of articles to load)
        ('Today in Grantland','',20),
        ('In Case You Missed It','incaseyoumissedit',35),
        ]

    remove_tags    = [
-		{'name':['head','style','script']},
-		{'id':['header']},
-		{'class':re.compile(r'\bside|\bad\b|floatright|tags')}
+        {'name':['style','aside','nav','footer','script']},
+        {'name':'h1','text':'Grantland'},
+        {'id':['header','col-right']},
+        {'class':['connect_widget']},
+        {'name':'section','class':re.compile(r'\b(ad|module)\b')},
        ]
-	remove_tags_before = {'class':'wrapper'}
-	remove_tags_after  = [{'id':'content'}]

    preprocess_regexps = [
-		# <header> tags with an img inside are just blog banners, don't need them
-		# note: there are other useful <header> tags so we don't want to just strip all of them
-		(re.compile(r'<header class.+?<img .+?>.+?</header>', re.DOTALL|re.IGNORECASE),lambda m: ''),
-		# delete everything between the *last* <hr class="small" /> and </article>
-		(re.compile(r'<hr class="small"(?:(?!<hr class="small").)+</article>', re.DOTALL|re.IGNORECASE),lambda m: '<hr class="small" /></article>'),
+        # remove blog banners
+        (re.compile(r'<a href="/blog/(?:(?!</a>).)+</a>', re.DOTALL|re.IGNORECASE), lambda m: ''),
        ]
-	extra_css = """cite, time { font-size: 0.8em !important; margin-right: 1em !important; }
-		img + cite { display:block; text-align:right}"""

    def parse_index(self):
        feeds = []
@ -54,45 +49,23 @@ class GrantLand(BasicNewsRecipe):

            page = "%s/%s" % (self.INDEX, tag)
            soup = self.index_to_soup(page)
-			headers = soup.findAll('h2' if tag=='' else 'h3')

-			for header in headers:
-				tag = header.find('a',href=True)
-				if tag is None:
-					continue
+            main = soup.find('div',id='col-main')
+            if main is None:
+                main = soup
+
+            for tag in main.findAll('a', href=re.compile(r'(story|post)/_/id/\d+')):
                url = tag['href']
                if url in seen_urls:
                    continue
-				title = self.tag_to_string(tag)
-				if 'Podcast:' in title or 'In Case You Missed It' in title:
+                title = tag.string
+                # blank title probably means <a href=".."><img /></a>.  skip
+                if not title:
                    continue
-				desc = dt = ''
-				# get at the div that contains description and other info
-				div = header.parent.find('div')
-				if div is not None:
-					desc = self.tag_to_string(div)
-					dt = div.find('time')
-					if dt is not None:
-						dt = self.tag_to_string( dt)
-
-				# if div contains the same url that is in h2/h3
-				# that means this is a series split into multiple articles
-				if div.find('a',href=url):
-					self.log('\tFound series:', title)
-					# grab all articles in series
-					for tag in div.findAll('a',href=True):
-						url = tag['href']
-						if url in seen_urls:
-							continue
-						self.log('\t', url)
-						seen_urls.add(url)
-						articles.append({'title':title+' - '+self.tag_to_string( tag),
-							'url':url,'description':desc,'date':dt})
-				else:
                self.log('\tFound article:', title)
                self.log('\t', url)
+                articles.append({'title':title,'url':url})
                seen_urls.add(url)
-					articles.append({'title':title,'url':url,'description':desc,'date':dt})

                if len(articles) >= max_articles:
                    break
@ -101,6 +74,3 @@ class GrantLand(BasicNewsRecipe):
                feeds.append((cat_name, articles))

        return feeds
-
-	def print_version(self, url):
-		return url+'?view=print'
--- a/recipes/high_country_news.recipe
+++ b/recipes/high_country_news.recipe
@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
+
+'''
+Fetch High Country News
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+class HighCountryNews(BasicNewsRecipe):
+
+    title = u'High Country News'
+    description = u'News from the American West'
+    __author__ = 'Armin Geller' # 2012-01-28
+    publisher = 'High Country News'
+    timefmt  = ' [%a, %d %b %Y]'
+    language = 'en'
+    encoding = 'UTF-8'
+    publication_type      = 'newspaper'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    auto_cleanup = True
+    remove_javascript = True
+    use_embedded_content  = False
+
+
+    feeds = [
+              (u'Most recent', u'http://feeds.feedburner.com/hcn/most-recent'),
+              (u'Current Issue', u'http://feeds.feedburner.com/hcn/current-issue'),
+
+              (u'Writers on the Range', u'http://feeds.feedburner.com/hcn/wotr'),
+              (u'High Country Views', u'http://feeds.feedburner.com/hcn/HighCountryViews'),
+             ]
+
+    def print_version(self, url):
+          return url + '/print_view'
+
--- a/recipes/la_voce.recipe
+++ b/recipes/la_voce.recipe
@ -0,0 +1,15 @@
+__license__   = 'GPL v3'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1324114228(BasicNewsRecipe):
+    title          = u'La Voce'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    masthead_url            = 'http://www.lavoce.info/binary/la_voce/testata/lavoce.1184661635.gif'
+    feeds          = [(u'La Voce', u'http://www.lavoce.info/feed_rss.php?id_feed=1')]
+    __author__    = 'faber1971'
+    description   = 'Italian website on Economy - v1.01 (17, December 2011)'
+    language = 'it'
+
+
--- a/recipes/liberation_sub.recipe
+++ b/recipes/liberation_sub.recipe
@ -0,0 +1,103 @@
+#!/usr/bin/env  python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Rémi Vanicat <vanicat at debian.org>'
+'''
+liberation.fr
+'''
+# The cleanning is from the Liberation recipe, by Darko Miletic
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Liberation(BasicNewsRecipe):
+
+    title                 = u'Libération: Édition abonnés'
+    __author__            = 'Rémi Vanicat'
+    description           = u'Actualités'
+    category              = 'Actualités, France, Monde'
+    language              = 'fr'
+    needs_subscription    = True
+
+    use_embedded_content   = False
+    no_stylesheets         = True
+    remove_empty_feeds     = True
+
+    extra_css = '''
+                    h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
+                    h4, h5, h2.rubrique,  {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .mna-body, entry-body  {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                '''
+
+    keep_only_tags    = [
+                  dict(name='div', attrs={'class':'article'})
+                  ,dict(name='div', attrs={'class':'text-article m-bot-s1'})
+                  ,dict(name='div', attrs={'class':'entry'})
+                  ,dict(name='div', attrs={'class':'col_contenu'})
+    ]
+
+    remove_tags_after = [
+        dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
+        ,dict(name='p',attrs={'class':['chapo']})
+        ,dict(id='_twitter_facebook')
+    ]
+
+    remove_tags    = [
+                        dict(name='iframe')
+                        ,dict(name='a', attrs={'class':'lnk-comments'})
+                        ,dict(name='div', attrs={'class':'toolbox'})
+                        ,dict(name='ul', attrs={'class':'share-box'})
+                        ,dict(name='ul', attrs={'class':'tool-box'})
+                        ,dict(name='ul', attrs={'class':'rub'})
+                        ,dict(name='p',attrs={'class':['chapo']})
+                        ,dict(name='p',attrs={'class':['tag']})
+                        ,dict(name='div',attrs={'class':['blokLies']})
+                        ,dict(name='div',attrs={'class':['alire']})
+                        ,dict(id='_twitter_facebook')
+                     ]
+
+    index           = 'http://www.liberation.fr/abonnes/'
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://www.liberation.fr/jogger/login/')
+            br.select_form(nr=0)
+            br['email']    = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        soup=self.index_to_soup(self.index)
+
+        content = soup.find('div', { 'class':'block-content' })
+
+        articles = []
+        cat_articles = []
+
+        for tag in content.findAll(recursive=False):
+            if(tag['class']=='headrest headrest-basic-rounded'):
+                cat_articles = []
+                articles.append((tag.find('h5').contents[0],cat_articles))
+            else:
+                title = tag.find('h3').contents[0]
+                url = tag.find('a')['href']
+                print(url)
+                descripion = tag.find('p',{ 'class':'subtitle' }).contents[0]
+                article = {
+                    'title': title,
+                    'url': url,
+                    'descripion': descripion,
+                    'content': ''
+                    }
+                cat_articles.append(article)
+        return articles
+
+
+
+# Local Variables:
+# mode: python
+# End:
--- a/recipes/marketing_magazine.recipe
+++ b/recipes/marketing_magazine.recipe
@ -0,0 +1,16 @@
+__license__   = 'GPL v3'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1327062445(BasicNewsRecipe):
+    title          = u'Marketing Magazine'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_javascript = True
+    masthead_url            = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
+    feeds          = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
+    __author__    = 'faber1971'
+    description   = 'Collection of Italian marketing websites - v1.00 (28, January 2012)'
+    language = 'it'
+
+
--- a/recipes/mumbai_mirror.recipe
+++ b/recipes/mumbai_mirror.recipe
@ -0,0 +1,59 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MumbaiMirror(BasicNewsRecipe):
+    title          = u'Mumbai Mirror'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    __author__            = 'Krittika Goyal'
+
+    description           = 'People Daily Newspaper'
+    language = 'en_IN'
+    category              = 'News, Mumbai, India'
+    remove_javascript = True
+    use_embedded_content   = False
+    auto_cleanup = True
+    no_stylesheets = True
+    #encoding               = 'GB2312'
+    conversion_options = {'linearize_tables':True}
+
+
+    feeds          = [
+('Cover Story',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=latest'),
+('City Diary',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=citydiary'),
+('Columnists',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=mmcolumnists'),
+('Mumbai, The City',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=city'),
+('Nation',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=nation'),
+('Top Stories',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=topstories'),
+('Business',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=business'),
+('World',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=world'),
+(' Chai Time',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=chaitime'),
+('Technology',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=technology'),
+('Entertainment',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=entertainment'),
+('Style',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=style'),
+('Ask the Sexpert',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=askthesexpert'),
+('Television',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=television'),
+('Lifestyle',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=lifestyle'),
+('Sports',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=sports'),
+('Travel: Travelers Diary',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=travellersdiaries'),
+('Travel: Domestic',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=traveldomestic'),
+('Travel: International',
+'http://www.mumbaimirror.com/rssfeeds.aspx?feed=travelinternational')
+]
--- a/recipes/oreilly_premium.recipe
+++ b/recipes/oreilly_premium.recipe
@ -14,6 +14,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class OReillyPremium(BasicNewsRecipe):
    title           = u'OReilly Premium'
    __author__      = 'TMcN'
+    language = 'en'
    description     = 'Retrieves Premium and News Letter content from BillOReilly.com.  Requires a Bill OReilly Premium Membership.'
    cover_url       = 'http://images.billoreilly.com/images/headers/billgray_header.png'
    auto_cleanup    = True
--- a/recipes/people_daily.recipe
+++ b/recipes/people_daily.recipe
@ -1,10 +1,11 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import os, time

 class AdvancedUserRecipe1277129332(BasicNewsRecipe):
-    title          = u'People Daily - China'
+    title          = u'人民日报'
    oldest_article = 2
    max_articles_per_feed = 100
-    __author__            = 'rty'
+    __author__            = 'zzh'

    pubisher  = 'people.com.cn'
    description           = 'People Daily Newspaper'
@ -14,21 +15,65 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
    use_embedded_content   = False
    no_stylesheets = True
    encoding               = 'GB2312'
+    language               = 'zh'
    conversion_options = {'linearize_tables':True}
+    masthead_url       = 'http://www.people.com.cn/img/2010wb/images/logo.gif'

-    feeds          = [(u'\u56fd\u5185\u65b0\u95fb', u'http://www.people.com.cn/rss/politics.xml'),
-       (u'\u56fd\u9645\u65b0\u95fb', u'http://www.people.com.cn/rss/world.xml'),
-       (u'\u7ecf\u6d4e\u65b0\u95fb', u'http://www.people.com.cn/rss/finance.xml'),
-       (u'\u4f53\u80b2\u65b0\u95fb', u'http://www.people.com.cn/rss/sports.xml'),
-       (u'\u53f0\u6e7e\u65b0\u95fb', u'http://www.people.com.cn/rss/haixia.xml')]
+    feeds          = [
+        (u'时政', u'http://www.people.com.cn/rss/politics.xml'),
+        (u'国际', u'http://www.people.com.cn/rss/world.xml'),
+        (u'经济', u'http://www.people.com.cn/rss/finance.xml'),
+        (u'体育', u'http://www.people.com.cn/rss/sports.xml'),
+        (u'教育', u'http://www.people.com.cn/rss/edu.xml'),
+        (u'文化', u'http://www.people.com.cn/rss/culture.xml'),
+        (u'社会', u'http://www.people.com.cn/rss/society.xml'),
+        (u'传媒', u'http://www.people.com.cn/rss/media.xml'),
+        (u'娱乐', u'http://www.people.com.cn/rss/ent.xml'),
+       # (u'汽车', u'http://www.people.com.cn/rss/auto.xml'),
+        (u'海峡两岸', u'http://www.people.com.cn/rss/haixia.xml'),
+       # (u'IT频道', u'http://www.people.com.cn/rss/it.xml'),
+       # (u'环保', u'http://www.people.com.cn/rss/env.xml'),
+       # (u'科技', u'http://www.people.com.cn/rss/scitech.xml'),
+       # (u'新农村', u'http://www.people.com.cn/rss/nc.xml'),
+       # (u'天气频道', u'http://www.people.com.cn/rss/weather.xml'),
+        (u'生活提示', u'http://www.people.com.cn/rss/life.xml'),
+        (u'卫生', u'http://www.people.com.cn/rss/medicine.xml'),
+       # (u'人口', u'http://www.people.com.cn/rss/npmpc.xml'),
+       # (u'读书', u'http://www.people.com.cn/rss/booker.xml'),
+       # (u'食品', u'http://www.people.com.cn/rss/shipin.xml'),
+       # (u'女性新闻', u'http://www.people.com.cn/rss/women.xml'),
+       # (u'游戏', u'http://www.people.com.cn/rss/game.xml'),
+       # (u'家电频道', u'http://www.people.com.cn/rss/homea.xml'),
+       # (u'房产', u'http://www.people.com.cn/rss/house.xml'),
+       # (u'健康', u'http://www.people.com.cn/rss/health.xml'),
+       # (u'科学发展观', u'http://www.people.com.cn/rss/kxfz.xml'),
+       # (u'知识产权', u'http://www.people.com.cn/rss/ip.xml'),
+       # (u'高层动态', u'http://www.people.com.cn/rss/64094.xml'),
+       # (u'党的各项工作', u'http://www.people.com.cn/rss/64107.xml'),
+       # (u'党建聚焦', u'http://www.people.com.cn/rss/64101.xml'),
+       # (u'机关党建', u'http://www.people.com.cn/rss/117094.xml'),
+       # (u'事业党建', u'http://www.people.com.cn/rss/117095.xml'),
+       # (u'国企党建', u'http://www.people.com.cn/rss/117096.xml'),
+       # (u'非公党建', u'http://www.people.com.cn/rss/117097.xml'),
+       # (u'社区党建', u'http://www.people.com.cn/rss/117098.xml'),
+       # (u'高校党建', u'http://www.people.com.cn/rss/117099.xml'),
+       # (u'农村党建', u'http://www.people.com.cn/rss/117100.xml'),
+       # (u'军队党建', u'http://www.people.com.cn/rss/117101.xml'),
+       # (u'时代先锋', u'http://www.people.com.cn/rss/78693.xml'),
+       # (u'网友声音', u'http://www.people.com.cn/rss/64103.xml'),
+       # (u'反腐倡廉', u'http://www.people.com.cn/rss/64371.xml'),
+       # (u'综合报道', u'http://www.people.com.cn/rss/64387.xml'),
+       # (u'中国人大新闻', u'http://www.people.com.cn/rss/14576.xml'),
+       # (u'中国政协新闻', u'http://www.people.com.cn/rss/34948.xml'),
+     ]
    keep_only_tags = [
-                              dict(name='div', attrs={'class':'left_content'}),
+                              dict(name='div', attrs={'class':'text_c'}),
                               ]
    remove_tags = [
-                    dict(name='table', attrs={'class':'title'}),
+                    dict(name='div', attrs={'class':'tools'}),
                         ]
    remove_tags_after = [
-                  dict(name='table', attrs={'class':'bianji'}),
+                  dict(name='div', attrs={'id':'p_content'}),
                         ]

    def append_page(self, soup, appendtag, position):
@ -36,7 +81,7 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
        if pager:
           nexturl = self.INDEX + pager.a['href']
           soup2 = self.index_to_soup(nexturl)
-           texttag = soup2.find('div', attrs={'class':'left_content'})
+           texttag = soup2.find('div', attrs={'class':'text_c'})
           #for it in texttag.findAll(style=True):
           #   del it['style']
           newpos = len(texttag.contents)
@ -44,9 +89,15 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
           texttag.extract()
           appendtag.insert(position,texttag)

+    def skip_ad_pages(self, soup):
+        if ('advertisement' in soup.find('title').string.lower()):
+            href = soup.find('a').get('href')
+            return self.browser.open(href).read().decode('GB2312', 'ignore')
+        else:
+            return None

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="content-type" content="text/html;charset=GB2312" />\n<meta http-equiv="content-language" content="utf-8" />'
+        mtag = '<meta http-equiv="content-type" content="text/html;charset=GB2312" />\n<meta http-equiv="content-language" content="GB2312" />'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['form']
@ -55,3 +106,19 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
        #if pager:
        #   pager.extract()
        return soup
+
+    def get_cover_url(self):
+        cover = None
+        os.environ['TZ'] = 'Asia/Shanghai'
+        time.tzset()
+        year = time.strftime('%Y')
+        month = time.strftime('%m')
+        day = time.strftime('%d')
+        cover = 'http://paper.people.com.cn/rmrb/page/'+year+'-'+month+'/'+day+'/01/RMRB'+year+month+day+'B001_b.jpg'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            self.log("\nCover unavailable: " + cover)
+            cover = None
+        return cover
--- a/recipes/plus_info.recipe
+++ b/recipes/plus_info.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

 __author__    = 'Darko Spasovski'
 __license__   = 'GPL v3'
@ -7,7 +8,6 @@ __copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
 '''
 www.plusinfo.mk
 '''
-
 from calibre.web.feeds.news import BasicNewsRecipe

 class PlusInfo(BasicNewsRecipe):
@ -27,8 +27,11 @@ class PlusInfo(BasicNewsRecipe):
    oldest_article        = 1
    max_articles_per_feed = 100

-    keep_only_tags = [dict(name='div', attrs={'class': 'vest'})]
-    remove_tags = [dict(name='div', attrs={'class':['komentari_holder', 'objava']})]
+    remove_tags = []
+    remove_tags.append(dict(name='div', attrs={'class':['komentari_holder', 'objava', 'koment']}))
+    remove_tags.append(dict(name='ul', attrs={'class':['vest_meni']}))
+    remove_tags.append(dict(name='a', attrs={'name': ['fb_share']}))
+    keep_only_tags = [dict(name='div', attrs={'class': 'vest1'})]

    feeds          = [(u'Македонија', u'http://www.plusinfo.mk/rss/makedonija'),
                      (u'Бизнис', u'http://www.plusinfo.mk/rss/biznis'),
--- a/recipes/real_clear.recipe
+++ b/recipes/real_clear.recipe
@ -0,0 +1,170 @@
+#  Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
+import time
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import NavigableString
+
+class RealClear(BasicNewsRecipe):
+    title           = u'Real Clear'
+    __author__      = 'TMcN'
+    description     = 'Real Clear Politics/Science/etc... aggregation of news\n'
+    cover_url       = 'http://www.realclearpolitics.com/dev/mt-static/images/logo.gif'
+    custom_title    = 'Real Clear - '+ time.strftime('%d %b %Y')
+    auto_cleanup    = True
+    encoding        = 'utf8'
+    language        = 'en'
+    needs_subscription = False
+    no_stylesheets  = True
+    oldest_article  = 7
+    remove_javascript = True
+    remove_tags     = [dict(name='img', attrs={})]
+    # Don't go down
+    recursions      = 0
+    max_articles_per_feed = 400
+    debugMessages = False
+
+    # Numeric parameter is type, controls whether we look for
+    feedsets = [
+                ["Politics",        "http://www.realclearpolitics.com/index.xml", 0],
+                ["Science",         "http://www.realclearscience.com/index.xml", 0],
+                ["Tech",            "http://www.realcleartechnology.com/index.xml", 0],
+                # The feedburner is essentially the same as the top feed, politics.
+                # ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
+                # ["Commentary",      "http://feeds.feedburner.com/Realclearpolitics-Articles", 1],
+                ["Markets Home",    "http://www.realclearmarkets.com/index.xml", 0],
+                ["Markets",         "http://www.realclearmarkets.com/articles/index.xml", 0],
+                ["World",           "http://www.realclearworld.com/index.xml", 0],
+                ["World Blog",           "http://www.realclearworld.com/blog/index.xml", 2]
+            ]
+    # Hints to extractPrintURL.
+    # First column is the URL snippet.  Then the string to search for as text, and the attributes to look for above it.  Start with attributes and drill down.
+    printhints = [
+                    ["billoreilly.com",     "Print this entry",            'a', ''],
+                    ["billoreilly.com",     "Print This Article",          'a', ''],
+                    ["politico.com",        "Print",                       'a', 'share-print'],
+                    ["nationalreview.com",  ">Print<",                     'a', ''],
+                    ["reason.com",          "",                       'a', 'printer']
+                    # The following are not supported due to JavaScripting, and would require obfuscated_article to handle
+                    # forbes,
+                    # usatoday - just prints with all current crap anyhow
+
+            ]
+
+    # Returns the best-guess print url.
+    # The second parameter (pageURL) is returned if nothing is found.
+    def extractPrintURL(self, pageURL):
+        tagURL = pageURL
+        hintsCount =len(self.printhints)
+        for x in range(0,hintsCount):
+            if pageURL.find(self.printhints[x][0])== -1 :
+                continue
+            print("Trying "+self.printhints[x][0])
+            # Only retrieve the soup if we have a match to check for the printed article with.
+            soup = self.index_to_soup(pageURL)
+            if soup is None:
+                return pageURL
+            if len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
+                if self.debugMessages == True :
+                    print("search1")
+                printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
+            elif  len(self.printhints[x][3])>0 :
+                if self.debugMessages == True :
+                    print("search2")
+                printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
+            else :
+                printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
+            if printFind is None:
+                if self.debugMessages == True :
+                    print("Not Found")
+                continue
+            print(printFind)
+            if isinstance(printFind, NavigableString)==False:
+                if printFind['href'] is not None:
+                    return printFind['href']
+            tag = printFind.parent
+            print(tag)
+            if tag['href'] is None:
+                if self.debugMessages == True :
+                    print("Not in parent, trying skip-up")
+                if tag.parent['href'] is None:
+                    if self.debugMessages == True :
+                        print("Not in skip either, aborting")
+                    continue;
+                return tag.parent['href']
+            return tag['href']
+        return tagURL
+
+    def get_browser(self):
+        if self.debugMessages == True :
+            print("In get_browser")
+        br = BasicNewsRecipe.get_browser()
+        return br
+
+    def parseRSS(self, index) :
+        if self.debugMessages == True :
+            print("\n\nStarting "+self.feedsets[index][0])
+        articleList = []
+        soup = self.index_to_soup(self.feedsets[index][1])
+        for div in soup.findAll("item"):
+            title = div.find("title").contents[0]
+            urlEl = div.find("originalLink")
+            if urlEl is None or len(urlEl.contents)==0 :
+                urlEl = div.find("originallink")
+            if urlEl is None or len(urlEl.contents)==0 :
+                urlEl = div.find("link")
+            if urlEl is None or len(urlEl.contents)==0 :
+                urlEl = div.find("guid")
+            if urlEl is None or title is None  or len(urlEl.contents)==0 :
+                print("Error in feed "+ self.feedsets[index][0])
+                print(div)
+                continue
+            print(title)
+            print(urlEl)
+            url = urlEl.contents[0].encode("utf-8")
+            description = div.find("description")
+            if description is not None and description.contents is not None and len(description.contents)>0:
+                description = description.contents[0]
+            else :
+                description="None"
+            pubDateEl = div.find("pubDate")
+            if pubDateEl is None :
+                pubDateEl = div.find("pubdate")
+            if pubDateEl is None :
+                pubDate = time.strftime('%a, %d %b')
+            else :
+                pubDate = pubDateEl.contents[0]
+            if self.debugMessages == True :
+                print("Article");
+                print(title)
+                print(description)
+                print(pubDate)
+                print(url)
+            url = self.extractPrintURL(url)
+            print(url)
+            #url +=re.sub(r'\?.*', '', div['href'])
+            pubdate = time.strftime('%a, %d %b')
+            articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
+        return articleList
+
+    # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
+    # returns a list of tuple ('feed title', list of articles)
+    # {
+    # 'title'       : article title,
+    # 'url'         : URL of print version,
+    # 'date'        : The publication date of the article as a string,
+    # 'description' : A summary of the article
+    # 'content'     : The full article (can be an empty string). This is used by FullContentProfile
+    # }
+    # this is used instead of BasicNewsRecipe.parse_feeds().
+    def parse_index(self):
+        # Parse the page into Python Soup
+
+        ans = []
+        feedsCount = len(self.feedsets)
+        for x in range(0,feedsCount): # should be ,4
+            feedarticles = self.parseRSS(x)
+            if feedarticles is not None:
+                ans.append((self.feedsets[x][0], feedarticles))
+        if self.debugMessages == True :
+            print(ans)
+        return ans
+
--- a/recipes/satira.recipe
+++ b/recipes/satira.recipe
@ -0,0 +1,14 @@
+__license__   = 'GPL v3'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1327351409(BasicNewsRecipe):
+    title          = u'Satira'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    feeds          = [(u'spinoza', u'http://feeds.feedburner.com/Spinoza'), (u'umore maligno', u'http://www.umoremaligno.it/feed/rss/'), (u'fed-ex', u'http://exfed.tumblr.com/rss'), (u'metilparaben', u'http://feeds.feedburner.com/metil'), (u'freddy nietzsche', u'http://feeds.feedburner.com/FreddyNietzsche')]
+    __author__    = 'faber1971'
+    description   = 'Collection of Italian satiric blogs - v1.00 (28, January 2012)'
+    language = 'it'
+
+
--- a/recipes/sueddeutsche.recipe
+++ b/recipes/sueddeutsche.recipe
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>' # 2012-01-26 AGe change to actual Year

 '''
 Fetch sueddeutsche.de
@ -8,19 +8,30 @@ Fetch sueddeutsche.de
 from calibre.web.feeds.news import BasicNewsRecipe
 class Sueddeutsche(BasicNewsRecipe):

-    title = u'sueddeutsche.de'
-    description = 'News from Germany'
-    __author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2011-12-16
-    use_embedded_content   = False
-    timefmt = ' [%d %b %Y]'
+    title                 = u'Süddeutsche.de'                 # 2012-01-26 AGe Correct Title
+    description           = 'News from Germany, Access to online content' # 2012-01-26 AGe
+    __author__            = 'Oliver Niesner and Armin Geller' #Update AGe 2012-01-26
+    publisher             = 'Süddeutsche Zeitung'             # 2012-01-26 AGe add
+    category              = 'news, politics, Germany'         # 2012-01-26 AGe add
+    timefmt               = ' [%a, %d %b %Y]'                 # 2012-01-26 AGe add %a
    oldest_article        = 7
-    max_articles_per_feed = 50
-    no_stylesheets = True
+    max_articles_per_feed = 100
    language              = 'de'
    encoding              = 'utf-8'
+    publication_type      = 'newspaper'                         # 2012-01-26 add
+    cover_source          = 'http://www.sueddeutsche.de/verlag' # 2012-01-26 AGe add from Darko Miletic paid content source
+    masthead_url          = 'http://www.sueddeutsche.de/static_assets/build/img/sdesiteheader/logo_homepage.441d531c.png' # 2012-01-26 AGe add
+
+    use_embedded_content  = False
+    no_stylesheets        = True
    remove_javascript     = True
    auto_cleanup          = True
-    cover_url  = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1237395.1324054345!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
+
+    def get_cover_url(self):                                      # 2012-01-26 AGe add from Darko Miletic paid content source
+      cover_source_soup = self.index_to_soup(self.cover_source)
+      preview_image_div = cover_source_soup.find(attrs={'class':'preview-image'})
+      return preview_image_div.div.img['src']
+
    feeds = [
              (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
              (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
@ -29,6 +40,9 @@ class Sueddeutsche(BasicNewsRecipe):
              (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
              (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
              (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
+              (u'Bildung', u'http://rss.sueddeutsche.de/rss/bildung'),         #2012-01-26 AGe New
+              (u'Gesundheit', u'http://rss.sueddeutsche.de/rss/gesundheit'),   #2012-01-26 AGe New
+              (u'Stil', u'http://rss.sueddeutsche.de/rss/stil'),               #2012-01-26 AGe New
              (u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
              (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
              (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
@ -42,6 +56,7 @@ class Sueddeutsche(BasicNewsRecipe):
              (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'),         # sometimes only
              (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
              (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'),   # sometimes only
+
            ]
 # AGe 2011-12-16 Problem of Handling redirections solved by a solution of Recipes-Re-usable code from kiklop74.
 # Feed is:                    http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss
--- a/recipes/tech_economy.recipe
+++ b/recipes/tech_economy.recipe
@ -0,0 +1,15 @@
+__license__   = 'GPL v3'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1327051385(BasicNewsRecipe):
+    title          = u'Tech Economy'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    masthead_url            = 'http://www.techeconomy.it/wp-content/uploads/2012/01/Logo-TE9.png'
+    feeds          = [(u'Tech Economy', u'http://www.techeconomy.it/feed/')]
+    remove_tags_after = [dict(name='div', attrs={'class':'cab-author-name'})]
+    __author__    = 'faber1971'
+    description   = 'Italian website on technology - v1.00 (28, January 2012)'
+    language = 'it'
+
--- a/recipes/tomshardware_it.recipe
+++ b/recipes/tomshardware_it.recipe
@ -0,0 +1,24 @@
+__license__   = 'GPL v3'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1327434170(BasicNewsRecipe):
+    title          = u"Tom's Hardware"
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    masthead_url            = 'http://userlogos.org/files/logos/spaljeni/tomshardwre.png'
+    def get_article_url(self, article):
+       link = BasicNewsRecipe.get_article_url(self, article)
+       if link.split('/')[-1]=="story01.htm":
+           link=link.split('/')[-2]
+           a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L'      , 'N'   , 'S'   ]
+           b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.']
+           for i in range(0,len(a)):
+              link=link.replace('0'+a[-i],b[-i])
+       return link
+    feeds          = [(u"Tom's Hardware", u'http://rss.feedsportal.com/c/32604/f/531080/index.rss')]
+    __author__    = 'faber1971'
+    description   = 'Italian website on technology - v1.00 (28, January 2012)'
+    language = 'it'
+
+
--- a/recipes/wyborcza_duzy_format.recipe
+++ b/recipes/wyborcza_duzy_format.recipe
@ -0,0 +1,144 @@
+#!/usr/bin/env  python
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class GazetaWyborczaDuzyForma(BasicNewsRecipe):
+    cover_url             = 'http://bi.gazeta.pl/im/8/5415/m5415058.gif'
+    title                 = u"Gazeta Wyborcza Duzy Format"
+    __author__            = 'ravcio - rlelusz[at]gmail.com'
+    description           = u"Articles from Gazeta's website"
+    language              = 'pl'
+    max_articles_per_feed = 50  #you can increade it event up to maybe 600, should still work
+    recursions            = 0
+    encoding              = 'iso-8859-2'
+    no_stylesheets        = True
+    remove_javascript     = True
+    use_embedded_content  = False
+
+
+    keep_only_tags    = [
+            dict(name='div', attrs={'id':['k1']})
+                ]
+
+    remove_tags = [
+            dict(name='div', attrs={'class':['zdjM', 'rel_video', 'zdjP', 'rel_box', 'index mod_zi_dolStrony']})
+            ,dict(name='div', attrs={'id':['source', 'banP4', 'article_toolbar', 'rel', 'inContext_disabled']})
+            ,dict(name='ul', attrs={'id':['articleToolbar']})
+            ,dict(name='img', attrs={'class':['brand']})
+            ,dict(name='h5', attrs={'class':['author']})
+            ,dict(name='h6', attrs={'class':['date']})
+            ,dict(name='p', attrs={'class':['txt_upl']})
+                ]
+
+    remove_tags_after = [
+            dict(name='div', attrs={'id':['Str']})                #nawigator numerow linii
+                ]
+
+    def load_article_links(self, url, count):
+        print '--- load_article_links', url, count
+
+		#page with link to articles
+        soup = self.index_to_soup(url)
+
+		#table with articles
+        list = soup.find('div', attrs={'class':'GWdalt'})
+
+		#single articles (link, title, ...)
+        links = list.findAll('div', attrs={'class':['GWdaltE']})
+
+        if len(links) < count:
+            #load links to more articles...
+
+			#remove new link
+            pages_nav = list.find('div', attrs={'class':'pages'})
+            next = pages_nav.find('a', attrs={'class':'next'})
+            if next:
+                print 'next=', next['href']
+                url = 'http://wyborcza.pl' + next['href']
+                #e.g. url = 'http://wyborcza.pl/0,75480.html?str=2'
+
+                older_links = self.load_article_links(url, count - len(links))
+                links.extend(older_links)
+
+        return links
+
+
+    #produce list of articles to download
+    def parse_index(self):
+        print '--- parse_index'
+
+        max_articles = 8000
+        links = self.load_article_links('http://wyborcza.pl/0,75480.html', max_articles)
+
+        ans = []
+        key = None
+        articles = {}
+
+        key = 'Uncategorized'
+        articles[key] = []
+
+        for div_art in links:
+            div_date = div_art.find('div', attrs={'class':'kL'})
+            div = div_art.find('div', attrs={'class':'kR'})
+
+            a = div.find('a', href=True)
+
+            url = a['href']
+            title = a.string
+            description = ''
+            pubdate = div_date.string.rstrip().lstrip()
+            summary = div.find('span', attrs={'class':'lead'})
+
+            desc = summary.find('a', href=True)
+            if desc:
+                desc.extract()
+
+            description = self.tag_to_string(summary, use_alt=False)
+            description = description.rstrip().lstrip()
+
+            feed = key if key is not None else 'Duzy Format'
+
+            if not articles.has_key(feed):
+                articles[feed] = []
+
+            if description != '':  # skip just pictures atricle
+                articles[feed].append(
+                                   dict(title=title, url=url, date=pubdate,
+                                        description=description,
+                                        content=''))
+
+        ans = [(key, articles[key])]
+        return ans
+
+    def append_page(self, soup, appendtag, position):
+        pager = soup.find('div',attrs={'id':'Str'})
+        if pager:
+			#seek for 'a' element with nast value (if not found exit)
+            list = pager.findAll('a')
+
+            for elem in list:
+                if 'nast' in elem.string:
+                    nexturl = elem['href']
+
+                    soup2 = self.index_to_soup('http://warszawa.gazeta.pl' + nexturl)
+
+                    texttag = soup2.find('div', attrs={'id':'artykul'})
+
+                    newpos = len(texttag.contents)
+                    self.append_page(soup2,texttag,newpos)
+                    texttag.extract()
+                    appendtag.insert(position,texttag)
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body, 3)
+
+        # finally remove some tags
+        pager = soup.find('div',attrs={'id':'Str'})
+        if pager:
+           pager.extract()
+
+        pager = soup.find('div',attrs={'class':'tylko_int'})
+        if pager:
+           pager.extract()
+
+        return soup
--- a/resources/quick_start.epub
+++ b/resources/quick_start.epub
--- a/setup/iso_639/en_GB.po
+++ b/setup/iso_639/en_GB.po
@ -14,8 +14,8 @@ msgstr ""
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-01-19 04:39+0000\n"
-"X-Generator: Launchpad (build 14692)\n"
+"X-Launchpad-Export-Date: 2012-01-20 04:38+0000\n"
+"X-Generator: Launchpad (build 14700)\n"

 #. name for aaa
 msgid "Ghotuo"
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 36)
+numeric_version = (0, 8, 37)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

@ -161,4 +161,32 @@ def get_version():
        v += '*'
    return v

+def get_unicode_windows_env_var(name):
+    import ctypes
+    name = unicode(name)
+    n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0)
+    if n == 0:
+        return None
+    buf = ctypes.create_unicode_buffer(u'\0'*n)
+    ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n)
+    return buf.value

+def get_windows_username():
+    '''
+    Return the user name of the currently loggen in user as a unicode string.
+    Note that usernames on windows are case insensitive, the case of the value
+    returned depends on what the user typed into the login box at login time.
+    '''
+    import ctypes
+    try:
+        advapi32 = ctypes.windll.advapi32
+        GetUserName = getattr(advapi32, u'GetUserNameW')
+    except AttributeError:
+        pass
+    else:
+        buf = ctypes.create_unicode_buffer(257)
+        n = ctypes.c_int(257)
+        if GetUserName(buf, ctypes.byref(n)):
+            return buf.value
+
+    return get_unicode_windows_env_var(u'USERNAME')
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -38,6 +38,7 @@ class ANDROID(USBMS):
                       0xca4  : [0x100, 0x0227, 0x0226, 0x222],
                       0xca9  : [0x100, 0x0227, 0x0226, 0x222],
                       0xcac  : [0x100, 0x0227, 0x0226, 0x222],
+                       0x2910 : [0x222],
            },

            # Eken
@ -175,13 +176,13 @@ class ANDROID(USBMS):
            'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
            'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
-            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK']
+            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
            '__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
            'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
-            'A1-07___C0541A4F', 'XT912']
+            'A1-07___C0541A4F', 'XT912', 'MB855']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -209,8 +209,8 @@ class ALURATEK_COLOR(USBMS):

    EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'books'

-    VENDOR_NAME = 'USB_2.0'
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'USB_FLASH_DRIVER'
+    VENDOR_NAME = ['USB_2.0', 'EZREADER']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['USB_FLASH_DRIVER', '.']

 class TREKSTOR(USBMS):

--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -591,26 +591,7 @@ class Device(DeviceConfig, DevicePlugin):
            mp = self.node_mountpoint(node)
            if mp is not None:
                return mp, 0
-            if type == 'main':
-                label = self.MAIN_MEMORY_VOLUME_LABEL
-            if type == 'carda':
-                label = self.STORAGE_CARD_VOLUME_LABEL
-            if type == 'cardb':
-                label = self.STORAGE_CARD2_VOLUME_LABEL
-                if not label:
-                    label = self.STORAGE_CARD_VOLUME_LABEL + ' 2'
-            if not label:
-                label = 'E-book Reader (%s)'%type
-            extra = 0
-            while True:
-                q = ' (%d)'%extra if extra else ''
-                if not os.path.exists('/media/'+label+q):
-                    break
-                extra += 1
-            if extra:
-                label += ' (%d)'%extra
-
-            def do_mount(node, label):
+            def do_mount(node):
                try:
                    from calibre.devices.udisks import mount
                    mount(node)
@ -621,8 +602,7 @@ class Device(DeviceConfig, DevicePlugin):
                    traceback.print_exc()
                    return 1

-
-            ret = do_mount(node, label)
+            ret = do_mount(node)
            if ret != 0:
                return None, ret
            return self.node_mountpoint(node)+'/', 0
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -157,7 +157,7 @@ class HeuristicProcessor(object):

        ITALICIZE_STYLE_PATS = [
            ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
-            ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*>]+)/',
+            ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/',
            ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
            ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
            ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
@ -172,8 +172,11 @@ class HeuristicProcessor(object):
        for word in ITALICIZE_WORDS:
            html = re.sub(r'(?<=\s|>)' + re.escape(word) + r'(?=\s|<)', '<i>%s</i>' % word, html)

+        def sub(mo):
+            return '<i>%s</i>'%mo.group('words')
+
        for pat in ITALICIZE_STYLE_PATS:
-            html = re.sub(pat, lambda mo: '<i>%s</i>' % mo.group('words'), html)
+            html = re.sub(pat, sub, html)

        return html

--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -475,7 +475,9 @@ class HTMLInput(InputFormatPlugin):
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
-            item.html_input_href = quote(bhref)
+            if isinstance(bhref, unicode):
+                bhref = bhref.encode('utf-8')
+            item.html_input_href = quote(bhref).decode('utf-8')
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(
                        self.css_import_handler, os.path.dirname(link))
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -494,7 +494,9 @@ class MobiWriter(object):
                    creators = [normalize(unicode(c)) for c in items]
                items = ['; '.join(creators)]
            for item in items:
-                data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
+                data = normalize(unicode(item))
+                if term != 'description':
+                    data = self.COLLAPSE_RE.sub(' ', data)
                if term == 'identifier':
                    if data.lower().startswith('urn:isbn:'):
                        data = data[9:]
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -29,14 +29,38 @@ class Extract(ODF2XHTML):
        root = etree.fromstring(html)
        self.epubify_markup(root, log)
        self.filter_css(root, log)
+        self.extract_css(root)
        html = etree.tostring(root, encoding='utf-8',
                xml_declaration=True)
        return html

+    def extract_css(self, root):
+        ans = []
+        for s in root.xpath('//*[local-name() = "style" and @type="text/css"]'):
+            ans.append(s.text)
+            s.getparent().remove(s)
+
+        head = root.xpath('//*[local-name() = "head"]')
+        if head:
+            head = head[0]
+            ns = head.nsmap.get(None, '')
+            if ns:
+                ns = '{%s}'%ns
+            etree.SubElement(head, ns+'link', {'type':'text/css',
+                'rel':'stylesheet', 'href':'odfpy.css'})
+
+        with open('odfpy.css', 'wb') as f:
+            f.write((u'\n\n'.join(ans)).encode('utf-8'))
+
+
    def epubify_markup(self, root, log):
+        from calibre.ebooks.oeb.base import XPath, XHTML
+        # Fix empty title tags
+        for t in XPath('//h:title')(root):
+            if not t.text:
+                t.text = u' '
        # Fix <p><div> constructs as the asinine epubchecker complains
        # about them
-        from calibre.ebooks.oeb.base import XPath, XHTML
        pdiv = XPath('//h:p/h:div')
        for div in pdiv(root):
            div.getparent().tag = XHTML('div')
@ -146,7 +170,8 @@ class Extract(ODF2XHTML):
            if not mi.authors:
                mi.authors = [_('Unknown')]
            opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
-            opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwd())])
+            opf.create_manifest([(os.path.abspath(f), None) for f in
+                walk(os.getcwdu())])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -425,15 +425,24 @@ class DirContainer(object):
                    self.opfname = path
                    return

+    def _unquote(self, path):
+        # urlunquote must run on a bytestring and will return a bytestring
+        # If it runs on a unicode object, it returns a double encoded unicode
+        # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
+        # and the latter is correct
+        if isinstance(path, unicode):
+            path = path.encode('utf-8')
+        return urlunquote(path).decode('utf-8')
+
    def read(self, path):
        if path is None:
            path = self.opfname
-        path = os.path.join(self.rootdir, path)
-        with open(urlunquote(path), 'rb') as f:
+        path = os.path.join(self.rootdir, self._unquote(path))
+        with open(path, 'rb') as f:
            return f.read()

    def write(self, path, data):
-        path = os.path.join(self.rootdir, urlunquote(path))
+        path = os.path.join(self.rootdir, self._unquote(path))
        dir = os.path.dirname(path)
        if not os.path.isdir(dir):
            os.makedirs(dir)
@ -442,7 +451,7 @@ class DirContainer(object):

    def exists(self, path):
        try:
-            path = os.path.join(self.rootdir, urlunquote(path))
+            path = os.path.join(self.rootdir, self._unquote(path))
        except ValueError: #Happens if path contains quoted special chars
            return False
        return os.path.isfile(path)
--- a/src/calibre/ebooks/oeb/display/cfi.coffee
+++ b/src/calibre/ebooks/oeb/display/cfi.coffee
@ -153,7 +153,7 @@ class CanonicalFragmentIdentifier

    ###
    This class is a namespace to expose CFI functions via the window.cfi
-    object. The three most important functions are:
+    object. The most important functions are:

    is_compatible(): Throws an error if the browser is not compatible with
                     this script
@ -166,6 +166,8 @@ class CanonicalFragmentIdentifier
    ###

    constructor: () -> # {{{
+        if not this instanceof arguments.callee
+            throw new Error('CFI constructor called as function')
        this.CREATE_RANGE_ERR = "Your browser does not support the createRange function. Update it to a newer version."
        this.IE_ERR = "Your browser is too old. You need Internet Explorer version 9 or newer."
        div = document.createElement('div')
@ -322,7 +324,7 @@ class CanonicalFragmentIdentifier
            point.time = r[1] - 0 # Coerce to number
            cfi = cfi.substr(r[0].length)

-        if (r = cfi.match(/^@(-?\d+(\.\d+)?),(-?\d+(\.\d+)?)/)) != null
+        if (r = cfi.match(/^@(-?\d+(\.\d+)?):(-?\d+(\.\d+)?)/)) != null
            # Spatial offset
            point.x = r[1] - 0 # Coerce to number
            point.y = r[3] - 0 # Coerce to number
@ -416,7 +418,7 @@ class CanonicalFragmentIdentifier
            rect = target.getBoundingClientRect()
            px = ((x - rect.left)*100)/target.offsetWidth
            py = ((y - rect.top)*100)/target.offsetHeight
-            tail = "#{ tail }@#{ fstr px },#{ fstr py }"
+            tail = "#{ tail }@#{ fstr px }:#{ fstr py }"
        else if name != 'audio'
            # Get the text offset
            # We use a custom function instead of caretRangeFromPoint as
@ -579,11 +581,12 @@ class CanonicalFragmentIdentifier

        get_cfi = (ox, oy) ->
            try
-                cfi = this.at(ox, oy)
-                point = this.point(cfi)
+                cfi = window.cfi.at(ox, oy)
+                point = window.cfi.point(cfi)
            catch err
                cfi = null

+            if cfi
                if point.range != null
                    r = point.range
                    rect = r.getClientRects()[0]
@ -625,8 +628,16 @@ class CanonicalFragmentIdentifier
                    return cfi
                cury += delta

-        # TODO: Return the CFI corresponding to the <body> tag
-        null
+        # Use a spatial offset on the html element, since we could not find a
+        # normal CFI
+        [x, y] = window_scroll_pos()
+        de = document.documentElement
+        rect = de.getBoundingClientRect()
+        px = (x*100)/rect.width
+        py = (y*100)/rect.height
+        cfi = "/2@#{ fstr px }:#{ fstr py }"
+
+        return cfi

    # }}}

--- a/src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee
+++ b/src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee
@ -30,18 +30,23 @@ window_ypos = (pos=null) ->
    window.scrollTo(0, pos)

 mark_and_reload = (evt) ->
-    # Remove image in case the click was on the image itself, we want the cfi to
-    # be on the underlying element
    x = evt.clientX
    y = evt.clientY
    if evt.button == 2
        return # Right mouse click, generated only in firefox
-    reset = document.getElementById('reset')
-    if document.elementFromPoint(x, y) == reset
+
+    if document.elementFromPoint(x, y)?.getAttribute('id') in ['reset', 'viewport_mode']
        return
+
+    # Remove image in case the click was on the image itself, we want the cfi to
+    # be on the underlying element
    ms = document.getElementById("marker")
-    if ms
-        ms.parentNode?.removeChild(ms)
+    ms.style.display = 'none'
+
+    if document.getElementById('viewport_mode').checked
+        cfi = window.cfi.at_current()
+        window.cfi.scroll_to(cfi)
+        return

    fn = () ->
        try
--- a/src/calibre/ebooks/oeb/display/test-cfi/index.html
+++ b/src/calibre/ebooks/oeb/display/test-cfi/index.html
@ -8,6 +8,7 @@
            body { 
                font-family: sans-serif;
                background-color: white;
+                padding-bottom: 500px;
            }
            
            h1, h2 { color: #005a9c }
@ -48,7 +49,13 @@
        <div id="container">
            <h1 id="first-h1">Testing cfi.coffee</h1>
            <p>Click anywhere and the location will be marked with a marker, whose position is set via a CFI.</p>
-            <p><a id="reset" href="/">Reset CFI to None</a></p>
+            <p>
+                <a id="reset" href="/">Reset CFI to None</a>
+                &nbsp;
+                Test viewport location calculation:
+                <input type="checkbox" id="viewport_mode" title=
+                "Checking this will cause the window to scroll to a position based on a CFI calculated for the windows current position."/>
+            </p>
            <h2>A div with scrollbars</h2>
            <p>Scroll down and click on some elements. Make sure to hit both
            bold and not bold text as well as different points on the image</p>
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -327,7 +327,7 @@ class OEBReader(object):
        manifest = self.oeb.manifest
        for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
            href = elem.get('href')
-            path = urldefrag(href)[0]
+            path = urlnormalize(urldefrag(href)[0])
            if path not in manifest.hrefs:
                self.logger.warn(u'Guide reference %r not found' % href)
                continue
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -37,6 +37,7 @@ def get_filters():
            (_('SNB Books'), ['snb']),
            (_('Comics'), ['cbz', 'cbr', 'cbc']),
            (_('Archives'), ['zip', 'rar']),
+            (_('Wordprocessor files'), ['odt', 'doc', 'docx']),
    ]


--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -206,6 +206,12 @@ class DeviceManager(Thread): # {{{
                self.scanner.is_device_connected(self.connected_device,
                        only_presence=True)
            if not connected:
+                if DEBUG:
+                    # Allow the device subsystem to output debugging info about
+                    # why it thinks the device is not connected. Used, for e.g.
+                    # in the can_handle() method of the T1 driver
+                    self.scanner.is_device_connected(self.connected_device,
+                            only_presence=True, debug=True)
                self.connected_device_removed()
        else:
            possibly_connected_devices = []
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -363,14 +363,15 @@ class BooksView(QTableView): # {{{
                    history.append([col, order])
        return history

-    def apply_sort_history(self, saved_history):
+    def apply_sort_history(self, saved_history, max_sort_levels=3):
        if not saved_history:
            return
-        for col, order in reversed(self.cleanup_sort_history(saved_history)[:3]):
+        for col, order in reversed(self.cleanup_sort_history(
+                saved_history)[:max_sort_levels]):
            self.sortByColumn(self.column_map.index(col),
                              Qt.AscendingOrder if order else Qt.DescendingOrder)

-    def apply_state(self, state):
+    def apply_state(self, state, max_sort_levels=3):
        h = self.column_header
        cmap = {}
        hidden = state.get('hidden_columns', [])
@ -399,7 +400,8 @@ class BooksView(QTableView): # {{{
                    sz = h.sectionSizeHint(cmap[col])
                h.resizeSection(cmap[col], sz)

-        self.apply_sort_history(state.get('sort_history', None))
+        self.apply_sort_history(state.get('sort_history', None),
+                max_sort_levels=max_sort_levels)

        for col, alignment in state.get('column_alignment', {}).items():
            self._model.change_alignment(col, alignment)
@ -474,6 +476,7 @@ class BooksView(QTableView): # {{{
        old_state = self.get_old_state()
        if old_state is None:
            old_state = self.get_default_state()
+        max_levels = 3

        if tweaks['sort_columns_at_startup'] is not None:
            sh = []
@ -488,9 +491,10 @@ class BooksView(QTableView): # {{{
                import traceback
                traceback.print_exc()
            old_state['sort_history'] = sh
+            max_levels = max(3, len(sh))

        self.column_header.blockSignals(True)
-        self.apply_state(old_state)
+        self.apply_state(old_state, max_sort_levels=max_levels)
        self.column_header.blockSignals(False)

        # Resize all rows to have the correct height
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -10,7 +10,7 @@ from PyQt4.Qt import (QCoreApplication, QIcon, QObject, QTimer,
 from calibre import prints, plugins, force_unicode
 from calibre.constants import (iswindows, __appname__, isosx, DEBUG,
        filesystem_encoding)
-from calibre.utils.ipc import ADDRESS, RC
+from calibre.utils.ipc import gui_socket_address, RC
 from calibre.gui2 import (ORG_NAME, APP_UID, initialize_file_icon_provider,
    Application, choose_dir, error_dialog, question_dialog, gprefs)
 from calibre.gui2.main_window import option_parser as _option_parser
@ -304,7 +304,7 @@ def cant_start(msg=_('If you are sure it is not running')+', ',
        if iswindows:
            what = _('try rebooting your computer.')
        else:
-            what = _('try deleting the file')+': '+ADDRESS
+            what = _('try deleting the file')+': '+ gui_socket_address()

    info = base%(where, msg, what)
    error_dialog(None, _('Cannot Start ')+__appname__,
@ -345,14 +345,14 @@ def main(args=sys.argv):
        return 0
    if si:
        try:
-            listener = Listener(address=ADDRESS)
+            listener = Listener(address=gui_socket_address())
        except socket.error:
            if iswindows:
                cant_start()
-            if os.path.exists(ADDRESS):
-                os.remove(ADDRESS)
+            if os.path.exists(gui_socket_address()):
+                os.remove(gui_socket_address())
            try:
-                listener = Listener(address=ADDRESS)
+                listener = Listener(address=gui_socket_address())
            except socket.error:
                cant_start()
            else:
@ -363,7 +363,7 @@ def main(args=sys.argv):
                    gui_debug=gui_debug)
    otherinstance = False
    try:
-        listener = Listener(address=ADDRESS)
+        listener = Listener(address=gui_socket_address())
    except socket.error: # Good si is correct (on UNIX)
        otherinstance = True
    else:
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -169,7 +169,10 @@ class MetadataSingleDialogBase(ResizableDialog):
        self.basic_metadata_widgets.extend([self.series, self.series_index])

        self.formats_manager = FormatsManager(self, self.copy_fmt)
-        self.basic_metadata_widgets.append(self.formats_manager)
+        # We want formats changes to be committed before title/author, as
+        # otherwise we could have data loss if the title/author changed and the
+        # user was trying to add an extra file from the old books directory.
+        self.basic_metadata_widgets.insert(0, self.formats_manager)
        self.formats_manager.metadata_from_format_button.clicked.connect(
                self.metadata_from_format)
        self.formats_manager.cover_from_format_button.clicked.connect(
--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@ -24,18 +24,27 @@ from calibre.constants import iswindows

 class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{

-    def __init__(self, *args):
-        QAbstractItemModel.__init__(self, *args)
+    def __init__(self, show_only_user_plugins=False):
+        QAbstractItemModel.__init__(self)
        SearchQueryParser.__init__(self, ['all'])
+        self.show_only_user_plugins = show_only_user_plugins
        self.icon = QVariant(QIcon(I('plugins.png')))
        p = QIcon(self.icon).pixmap(32, 32, QIcon.Disabled, QIcon.On)
        self.disabled_icon = QVariant(QIcon(p))
        self._p = p
        self.populate()

+    def toggle_shown_plugins(self, show_only_user_plugins):
+        self.show_only_user_plugins = show_only_user_plugins
+        self.populate()
+        self.reset()
+
    def populate(self):
        self._data = {}
        for plugin in initialized_plugins():
+            if (getattr(plugin, 'plugin_path', None) is None
+                    and self.show_only_user_plugins):
+                continue
            if plugin.type not in self._data:
                self._data[plugin.type] = [plugin]
            else:
@ -64,6 +73,7 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
            if p < 0:
                if query in lower(self.categories[c]):
                    ans.add((c, p))
+                continue
            else:
                try:
                    plugin = self._data[self.categories[c]][p]
@ -209,7 +219,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):

    def genesis(self, gui):
        self.gui = gui
-        self._plugin_model = PluginModel()
+        self._plugin_model = PluginModel(self.user_installed_plugins.isChecked())
        self.plugin_view.setModel(self._plugin_model)
        self.plugin_view.setStyleSheet(
                "QTreeView::item { padding-bottom: 10px;}")
@ -226,6 +236,10 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.next_button.clicked.connect(self.find_next)
        self.previous_button.clicked.connect(self.find_previous)
        self.changed_signal.connect(self.reload_store_plugins)
+        self.user_installed_plugins.stateChanged.connect(self.show_user_installed_plugins)
+
+    def show_user_installed_plugins(self, state):
+        self._plugin_model.toggle_shown_plugins(self.user_installed_plugins.isChecked())

    def find(self, query):
        idx = self._plugin_model.find(query)
--- a/src/calibre/gui2/preferences/plugins.ui
+++ b/src/calibre/gui2/preferences/plugins.ui
@ -65,6 +65,16 @@
     </item>
    </layout>
   </item>
+   <item>
+    <widget class="QCheckBox" name="user_installed_plugins">
+     <property name="toolTip">
+      <string>Show only those plugins that have been installed by you</string>
+     </property>
+     <property name="text">
+      <string>Show only &amp;user installed plugins</string>
+     </property>
+    </widget>
+   </item>
   <item>
    <widget class="QTreeView" name="plugin_view">
     <property name="alternatingRowColors">
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -292,6 +292,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
            self.library_view.model().books_added(1)
            if hasattr(self, 'db_images'):
                self.db_images.reset()
+            if self.library_view.model().rowCount(None) < 3:
+                self.library_view.resizeColumnsToContents()

        self.library_view.model().count_changed()
        self.bars_manager.database_changed(self.library_view.model().db)
@ -464,6 +466,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
            self.library_view.model().refresh()
            self.library_view.model().research()
            self.tags_view.recount()
+            self.library_view.model().db.refresh_format_cache()
        elif msg.startswith('shutdown:'):
            self.quit(confirm_quit=False)
        else:
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -24,6 +24,7 @@ from calibre.constants import iswindows
 from calibre import prints, guess_type
 from calibre.gui2.viewer.keys import SHORTCUTS
 from calibre.gui2.viewer.javascript import JavaScriptLoader
+from calibre.gui2.viewer.position import PagePosition

 # }}}

@ -170,10 +171,12 @@ class Document(QWebPage): # {{{
        settings.setFontFamily(QWebSettings.SerifFont, opts.serif_family)
        settings.setFontFamily(QWebSettings.SansSerifFont, opts.sans_family)
        settings.setFontFamily(QWebSettings.FixedFont, opts.mono_family)
+        settings.setAttribute(QWebSettings.ZoomTextOnly, True)

    def do_config(self, parent=None):
        d = ConfigDialog(self.shortcuts, parent)
        if d.exec_() == QDialog.Accepted:
+            with self.page_position:
                self.set_font_settings()
                self.set_user_stylesheet()
                self.misc_config()
@ -196,6 +199,7 @@ class Document(QWebPage): # {{{
        pal = self.palette()
        pal.setBrush(QPalette.Background, QColor(0xee, 0xee, 0xee))
        self.setPalette(pal)
+        self.page_position = PagePosition(self)

        settings = self.settings()

@ -895,15 +899,16 @@ class DocumentView(QWebView): # {{{
    @dynamic_property
    def multiplier(self):
        def fget(self):
-            return self.document.mainFrame().textSizeMultiplier()
+            return self.zoomFactor()
        def fset(self, val):
-            self.document.mainFrame().setTextSizeMultiplier(val)
+            self.setZoomFactor(val)
            self.magnification_changed.emit(val)
        return property(fget=fget, fset=fset)

    def magnify_fonts(self, amount=None):
        if amount is None:
            amount = self.document.font_magnification_step
+        with self.document.page_position:
            self.multiplier += amount
        return self.document.scroll_fraction

@ -911,6 +916,7 @@ class DocumentView(QWebView): # {{{
        if amount is None:
            amount = self.document.font_magnification_step
        if self.multiplier >= amount:
+            with self.document.page_position:
                self.multiplier -= amount
        return self.document.scroll_fraction

--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -481,16 +481,14 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.load_ebook(action.path)

    def font_size_larger(self):
-        frac = self.view.magnify_fonts()
+        self.view.magnify_fonts()
        self.action_font_size_larger.setEnabled(self.view.multiplier < 3)
        self.action_font_size_smaller.setEnabled(self.view.multiplier > 0.2)
-        self.set_page_number(frac)

    def font_size_smaller(self):
-        frac = self.view.shrink_fonts()
+        self.view.shrink_fonts()
        self.action_font_size_larger.setEnabled(self.view.multiplier < 3)
        self.action_font_size_smaller.setEnabled(self.view.multiplier > 0.2)
-        self.set_page_number(frac)

    def magnification_changed(self, val):
        tt = _('Make font size %(which)s\nCurrent magnification: %(mag).1f')
--- a/src/calibre/gui2/viewer/position.py
+++ b/src/calibre/gui2/viewer/position.py
@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import json
+
+class PagePosition(object):
+
+    def __init__(self, document):
+        self.document = document
+
+    @property
+    def viewport_cfi(self):
+        ans = None
+        res = self.document.mainFrame().evaluateJavaScript('''
+            ans = 'undefined';
+            try {
+                ans = window.cfi.at_current();
+                if (!ans) ans = 'undefined';
+            } catch (err) {
+                window.console.log(err);
+            }
+            window.console.log("Viewport cfi: " + ans);
+            ans;
+        ''')
+        if res.isValid() and not res.isNull() and res.type() == res.String:
+            c = unicode(res.toString())
+            if c != 'undefined':
+                ans = c
+        return ans
+
+    def scroll_to_cfi(self, cfi):
+        if cfi:
+            cfi = json.dumps(cfi)
+            self.document.mainFrame().evaluateJavaScript('''
+                    function fix_scroll() {
+                        /* cfi.scroll_to() uses scrollIntoView() which can result
+                           in scrolling along the x-axis. So we
+                           explicitly scroll to x=0.
+                        */
+                       scrollTo(0, window.pageYOffset)
+                    }
+
+                    window.cfi.scroll_to(%s, fix_scroll);
+                '''%cfi)
+
+    @property
+    def current_pos(self):
+        ans = self.viewport_cfi
+        if not ans:
+            ans = self.document.scroll_fraction
+        return ans
+
+    def __enter__(self):
+        self._cpos = self.current_pos
+
+    def __exit__(self, *args):
+        if isinstance(self._cpos, (int, float)):
+            self.document.scroll_fraction = self._cpos
+        else:
+            self.scroll_to_cfi(self._cpos)
+        self._cpos = None
+
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -312,10 +312,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            load_user_template_functions(self.prefs.get('user_template_functions', []))

        # Load the format filename cache
-        self.format_filename_cache = defaultdict(dict)
-        for book_id, fmt, name in self.conn.get(
-                'SELECT book,format,name FROM data'):
-            self.format_filename_cache[book_id][fmt.upper() if fmt else ''] = name
+        self.refresh_format_cache()

        self.conn.executescript('''
        DROP TRIGGER IF EXISTS author_insert_trg;
@ -509,7 +506,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
        self.refresh()
        self.last_update_check = self.last_modified()
-        self.format_metadata_cache = defaultdict(dict)

    def break_cycles(self):
        self.data.break_cycles()
@ -528,6 +524,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        ''' Return last modified time as a UTC datetime object'''
        return utcfromtimestamp(os.stat(self.dbpath).st_mtime)

+    def refresh_format_cache(self):
+        self.format_filename_cache = defaultdict(dict)
+        for book_id, fmt, name in self.conn.get(
+                'SELECT book,format,name FROM data'):
+            self.format_filename_cache[book_id][fmt.upper() if fmt else ''] = name
+        self.format_metadata_cache = defaultdict(dict)

    def check_if_modified(self):
        if self.last_modified() > self.last_update_check:
@ -1401,7 +1403,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        id = index if index_is_id else self.id(index)
        if not format: format = ''
        self.format_metadata_cache[id].pop(format.upper(), None)
-        name = self.format_filename_cache[id].pop(format.upper(), None)
+        name = self.format_filename_cache[id].get(format.upper(), None)
        if name:
            if not db_only:
                try:
@ -1410,6 +1412,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                        delete_file(path)
                except:
                    traceback.print_exc()
+            self.format_filename_cache[id].pop(format.upper(), None)
            self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
            if commit:
                self.conn.commit()
--- a/src/calibre/library/server/main.py
+++ b/src/calibre/library/server/main.py
@ -111,6 +111,10 @@ def main(args=sys.argv):
    from calibre.utils.config import prefs
    if opts.with_library is None:
        opts.with_library = prefs['library_path']
+    if not opts.with_library:
+        print('No saved library path. Use the --with-library option'
+                ' to specify the path to the library you want to use.')
+        return 1
    db = LibraryDatabase2(opts.with_library)
    server = LibraryServer(db, opts, show_tracebacks=opts.develop)
    server.start()
--- a/src/calibre/ptempfile.py
+++ b/src/calibre/ptempfile.py
@ -191,8 +191,14 @@ class SpooledTemporaryFile(tempfile.SpooledTemporaryFile):
            suffix = ''
        if dir is None:
            dir = base_dir()
-        tempfile.SpooledTemporaryFile.__init__(self, max_size=max_size, suffix=suffix,
-                prefix=prefix, dir=dir, mode=mode, bufsize=bufsize)
+        tempfile.SpooledTemporaryFile.__init__(self, max_size=max_size,
+                suffix=suffix, prefix=prefix, dir=dir, mode=mode,
+                bufsize=bufsize)
+
+    def truncate(self, *args):
+        # The stdlib SpooledTemporaryFile implementation of truncate() doesn't
+        # allow specifying a size.
+        self._file.truncate(*args)

 def better_mktemp(*args, **kwargs):
    fd, path = tempfile.mkstemp(*args, **kwargs)
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/mk.po
+++ b/src/calibre/translations/mk.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/nn.po
+++ b/src/calibre/translations/nn.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pa.po
+++ b/src/calibre/translations/pa.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/Show More
+++ b/Show More