Merge from trunk

2025-08-30 23:00:21 -04:00 · 2012-01-08 09:07:49 +01:00 · 2012-01-08 09:07:49 +01:00 · 953f61ef8d
commit 953f61ef8d
parent 223019e9c9 4ce59b4817
196 changed files with 91740 additions and 33266 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,112 @@
 #   new recipes:
 #     - title: 

+- version: 0.8.34
+  date: 2011-01-06
+
+  new features:
+    - title: "Apple driver: Set the year field in iTunes based on the published date in calibre." 
+      tickets: [909050]
+
+    - title: "EPUB Input: When converting a file that has entries in the manifest that do no exist, remove them, instead of aborting the conversion."
+      tickets: [910933]
+
+    - title: "Kindle driver: Ensure page counts are correctly sent to the device when connecting to Kindle 4/Touch."
+      tickets: [910279]
+
+    - title: "Allow user to set the number of recently viewed books shown in the dropdown menu of the view button, via a tweak in Preferences->Tweaks."
+      tickets: [910292]
+ 
+  bug fixes:
+    - title: "Fix regression in 0.8.33 that caused calibre to crash when starting the Content Server, if the port the content server is trying to listen on is blocked/busy."
+      tickets: [910512]
+
+    - title: "MOBI Input: Fix regression that caused a mixup of images when the MOBI file header contains an incorrect first image index pointer."
+      tickets: [911243]
+
+    - title: "Do not remove leading and trailing spaces from the replace fields in the Search and Replace conversion options"
+      tickets: [910523]
+
+    - title: "Conversion pipeline: Fix regression in 0.8.31 that broke parsing of documents containing a self closing <title/> tag."
+      tickets: [910325]
+
+  improved recipes:
+    - Kopalnia Wiedzy
+    - Alternet
+    - Tagesspiegel
+    - Philadelphia Inquirer
+    - Seattle Times
+    - La Razon
+
+  new recipes:
+    - title: Various Italian news sources 
+      author: faber1971
+
+    - title: money.pl 
+      author: intromatyk
+
+    - title: Diario Rio Negro
+      author: Darko Miletic.
+
+    - title: FHM UK 
+      author: Dave Asbury
+
+- version: 0.8.33
+  date: 2011-12-30
+
+  new features:
+    - title: "LIT Input: Switch to non-recursive algorithm, to allow conversion of files with deeply nested markup."
+      tickets: [909535]
+
+    - title: "Content server: Do not show the original_* formats in the mobile interface. Also upgrade to the latest CherryPy release."
+
+    - title: "E-book viewer: Add option in viewer preferences to control how much the font size is changed when you click the make fonts bigger/smaller buttons."
+      tickets: [908980]
+
+    - title: "E-book viewer: Allow changing font size via Ctrl+Mouse wheel"
+      tickets: [908975] 
+
+    - title: "Kobo driver: Hide previews and recommendations from the book list. You can customize the Kobo plugin if you would like to see them via Preferences->Plugins"
+ 
+  bug fixes:
+    - title: "Copy to library: Fix title sort not being copied"
+
+    - title: "PDF Output: Add custom size conversion option to the GUI (it was only present on the command line before)"
+
+    - title: "Add missing --keep-ligatures option to the ebook-convert command line"
+      tickets: [909182]
+      
+    - title: "Fix rendering of non ascii characters in generated masthead images when downloading news for the Kindle"
+
+    - title: "Linux binary: Disable qt-sp-api as it causes crashes/performance issues on various distros"
+
+    - title: "E-book viewer: Ensure that reference mode highlighting is removed from the book when reference mode is closed."
+      tickets: [908982]
+
+    - title: "Fix unable to load plugins from files on GNOME/XFCE desktops"
+
+    - title: "Fix regression that broke customizing toolbars on non English calibre installs"
+
+    - title: "Conversion pipeline: Disable HTML 5 parsing if it results in deeply nested trees."
+      tickets: [908818]
+
+    - title: "Do not loose position in book list on device connection/sync, if a search is active."
+      tickets: [908553]
+
+    - title: "Fix regression in 0.8.32 that broke deleting books if the path to the library contained non-ascii characters on linux"
+      tickets: [908068] 
+
+  improved recipes:
+    - Echo Online
+    - La Razon
+
+  new recipes:
+    - title: NYTimes Global
+      author: Krittika Goyal
+
+    - title: Grantland 
+      author: Barty
+
 - version: 0.8.32
  date: 2011-12-23

--- a/recipes/alternet.recipe
+++ b/recipes/alternet.recipe
@ -10,11 +10,11 @@ class Alternet(BasicNewsRecipe):
    category = 'News, Magazine'
    description = 'News magazine and online community'
    feeds          = [
-	(u'Front Page', u'http://feeds.feedblitz.com/alternet'),
-	(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
-	(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
-	(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
-	]
+        (u'Front Page', u'http://feeds.feedblitz.com/alternet'),
+        (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
+        (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
+        (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
+        ]
    remove_attributes = ['width', 'align','cellspacing']
    remove_javascript = True
    use_embedded_content   = False
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
+
+    conversion_options = {'linearize_tables': True}
--- a/recipes/ap.recipe
+++ b/recipes/ap.recipe
@ -11,7 +11,6 @@ class AssociatedPress(BasicNewsRecipe):
    language = 'en'
    no_stylesheets = True
    max_articles_per_feed = 15
-    html2lrf_options = ['--force-page-break-before-tag="chapter"']


    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -5,7 +5,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    description = 'News as provide by The Daily Mirror -UK'

    __author__ = 'Dave Asbury'
-    # last updated 30/10/11
+    # last updated 26/12/11
    language = 'en_GB'

    cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@ -13,30 +13,22 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'


-    oldest_article = 2
-    max_articles_per_feed = 30
+    oldest_article = 1
+    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
-    extra_css  = '''
-	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
-                	 '''
-
-    keep_only_tags = [
-       dict(name='div',attrs={'id' : 'body-content'})
-        ]
-
-    remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
-
+    auto_cleanup = True
    remove_tags = [
-           dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
-           dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
-           dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
-           dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'})
+           dict(name='title'),
+           dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
          ]

    preprocess_regexps = [
-    (re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
+    (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
+
+    preprocess_regexps = [
+    (re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')]


    feeds          = [
@ -53,5 +45,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
        ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')

           # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
-
  ]
+    extra_css  = '''
+	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+                    h1{ font-size:18px;}
+                    img { display:block}
+                	 '''
+
--- a/recipes/echo_online.recipe
+++ b/recipes/echo_online.recipe
@ -6,18 +6,18 @@ Fetch echo-online.de

 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Echo_Online(BasicNewsRecipe):
-    title          = u' Echo Online'
+    title          = u'Echo Online' # 2011-12-28 AGe
    description = '-Echo Online-'
    publisher = 'Echo Online GmbH'
    category = 'News, Germany'
-    __author__ = 'Armin Geller' # 2011-12-17
+    __author__ = 'Armin Geller' # 2011-12-28 AGe
    language = 'de'
    lang = 'de-DE'
    encoding = 'iso-8859-1'
    timefmt = ' [%a, %d %b %Y]'

    oldest_article = 7
-    max_articles_per_feed = 2
+    max_articles_per_feed = 50 # 2011-12-28 AGe
    no_stylesheets = True
    auto_cleanup = True
    remove_javascript = True
@ -42,6 +42,5 @@ class Echo_Online(BasicNewsRecipe):
    remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
    auto_cleanup_keep = '//div[@class="bild_gross w270"]'

-#    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-ash2/41801_145340745513489_893927_n.jpg' # 2011-12-16 AGe
-    cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif' # 2011-12-16 AGe
+    cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif'

--- a/recipes/fhm_uk.recipe
+++ b/recipes/fhm_uk.recipe
@ -0,0 +1,30 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325006965(BasicNewsRecipe):
+    title          = u'FHM UK'
+    description = 'Good News for Men'
+    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
+    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
+    __author__ = 'Dave Asbury'
+    # last updated 27/12/11
+    language = 'en_GB'
+    oldest_article = 28
+    max_articles_per_feed = 12
+    remove_empty_feeds = True
+    no_stylesheets = True
+    #auto_cleanup = True
+    #articles_are_obfuscated = True
+    keep_only_tags = [
+               dict(name='h1'),
+               dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
+               dict(name='div',attrs={'id' : ['articleLeft']}),
+                               dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody']}),
+
+        ]
+
+
+    feeds          = [
+    (u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
+    (u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
+    (u'Gaming',u'http://feed43.com/0755006465351035.xml'),
+            ]
--- a/recipes/goal.recipe
+++ b/recipes/goal.recipe
@ -0,0 +1,13 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325677767(BasicNewsRecipe):
+    title          = u'Goal'
+    oldest_article = 1
+    language = 'it'
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_tags_after = [dict(id='article_content')]
+    feeds          = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
+    __author__      = 'faber1971'
+    description    = 'Sports news from Italy'
+
--- a/recipes/hamilton_spectator.recipe
+++ b/recipes/hamilton_spectator.recipe
@ -0,0 +1,58 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+'''
+Hamilton Spectator Calibre Recipe
+'''
+class HamiltonSpectator(BasicNewsRecipe):
+    title = u'Hamilton Spectator'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    __author__ = u'Eric Coolman'
+    publisher = u'thespec.com'
+    description = u'Ontario Canada Newspaper'
+    category = u'News, Ontario, Canada'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en_CA'
+    encoding = 'utf-8'
+
+    feeds          = [
+		(u'Top Stories',u'http://www.thespec.com/rss?query=/&assetType=Article'),
+		(u'All News',u'http://www.thespec.com/rss?query=/news&assetType=Article'),
+		(u'Local',u'http://www.thespec.com/rss?query=/local&assetType=Article'),
+		(u'Ontario',u'http://www.thespec.com/rss?query=/ontario&assetType=Article'),
+		(u'Canada',u'http://www.thespec.com/rss?query=/canada&assetType=Article'),
+		(u'World News',u'http://www.thespec.com/rss?query=/world&assetType=Article'),
+		(u'Business',u'http://www.thespec.com/rss?query=/business&assetType=Article'),
+		(u'Crime',u'http://www.thespec.com/rss?query=/crime&assetType=Article'),
+		(u'All Sports',u'http://www.thespec.com/rss?query=/sports&assetType=Article'),
+		(u'Ticats',u'http://www.thespec.com/rss?query=/sports/ticats&assetType=Article'),
+		(u'Bulldogs',u'http://www.thespec.com/rss?query=/sports/bulldogs&assetType=Article'),
+		(u'High School Sports',u'http://www.thespec.com/rss?query=/sports/highschools&assetType=Article'),
+		(u'Local Sports',u'http://www.thespec.com/rss?query=/sports/local&assetType=Article'),
+		(u'What''s On',u'http://www.thespec.com/rss?query=/whatson&assetType=Article'),
+		(u'Arts and Entertainment',u'http://www.thespec.com/rss?query=/whatson/artsentertainment&assetType=Article'),
+		(u'Books',u'http://www.thespec.com/rss?query=/whatson/books&assetType=Article'),
+		(u'Movies',u'http://www.thespec.com/rss?query=/whatson/movies&assetType=Article'),
+		(u'Music',u'http://www.thespec.com/rss?query=/whatson/music&assetType=Article'),
+		(u'Restaurant Reviews',u'http://www.thespec.com/rss?query=/whatson/restaurants&assetType=Article'),
+		(u'Opinion',u'http://www.thespec.com/rss?query=/opinion&assetType=Article'),
+		(u'Opinion Columns',u'http://www.thespec.com/rss?query=/opinion/columns&assetType=Article'),
+		(u'Cartoons',u'http://www.thespec.com/rss?query=/opinion/cartoons&assetType=Article'),
+		(u'Letters',u'http://www.thespec.com/rss?query=/opinion/letters&assetType=Article'),
+		(u'Editorial',u'http://www.thespec.com/rss?query=/opinion/editorial&assetType=Article'),
+		(u'Community',u'http://www.thespec.com/rss?query=/community&assetType=Article'),
+		(u'Education',u'http://www.thespec.com/rss?query=/community/education&assetType=Article'),
+		(u'Faith',u'http://www.thespec.com/rss?query=/community/faith&assetType=Article'),
+		(u'Contests',u'http://www.thespec.com/rss?query=/community/contests&assetType=Article'),
+		(u'Living',u'http://www.thespec.com/rss?query=/living&assetType=Article'),
+		(u'Food',u'http://www.thespec.com/rss?query=/living/food&assetType=Article'),
+		(u'Health and Fitness',u'http://www.thespec.com/rss?query=/living/healthfitness&assetType=Article'),
+		(u'Your Home',u'http://www.thespec.com/rss?query=/living/home&assetType=Article'),
+		(u'Travel',u'http://www.thespec.com/rss?query=/living/travel&assetType=Article'),
+		(u'Family and Parenting',u'http://www.thespec.com/rss?query=/living/familyparenting&assetType=Article'),
+		(u'Style',u'http://www.thespec.com/rss?query=/living/style&assetType=Article')
+	]
+
--- a/recipes/icons/rionegro.png
+++ b/recipes/icons/rionegro.png
--- a/recipes/ideal_almeria.recipe
+++ b/recipes/ideal_almeria.recipe
@ -0,0 +1,68 @@
+# encoding: utf-8 -*-
+
+__license__     = 'GPL v3'
+__author__      = 'Josemi Liébana <office at josemi-liebana.com>'
+__copyright__   = 'Josemi Liébana'
+__version__     = 'v0.1'
+__date__        = '5 January 2012'
+
+
+'''
+www.ideal.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Ideal(BasicNewsRecipe):
+    title                 = u'Ideal (Edición Almería)'
+    __author__            = u'Josemi Liébana'
+    description           = u'Noticias de Almería y el resto del mundo'
+    publisher             = 'Ideal'
+    category              = u'News, Politics, Spain, Almería'
+    publication_type      = 'Newspaper'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = u'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
+    cover_url             = u'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
+    extra_css             = u' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                         dict(attrs={'id':'title'})
+                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
+                     ]
+
+    remove_tags = [dict(name='ul')]
+
+    remove_attributes = ['width','height']
+
+    feeds = [
+              (u'Última Hora'       , u'http://www.ideal.es/almeria/rss/feeds/ultima.xml'           )
+             ,(u'Portada'           , u'http://www.ideal.es/almeria/portada.xml'                    )
+             ,(u'Local'             , u'http://www.ideal.es/almeria/rss/feeds/granada.xml'          )
+             ,(u'Deportes'          , u'http://www.ideal.es/almeria/rss/feeds/deportes.xml'         )
+             ,(u'Sociedad'          , u'http://www.ideal.es/almeria/rss/feeds/sociedad.xml'         )
+             ,(u'Cultura'           , u'http://www.ideal.es/almeria/rss/feeds/cultura.xml'          )
+             ,(u'Economía'          , u'http://www.ideal.es/almeria/rss/feeds/economia.xml'         )
+             ,(u'Costa'             , u'http://www.ideal.es/almeria/rss/feeds/costa.xml'            )
+             ,(u'Puerta Purchena'   , u'http://www.ideal.es/almeria/rss/feeds/puerta_purchena.xml'  )
+             ,(u'Andalucía'         , u'http://www.ideal.es/almeria/rss/feeds/andalucia.xml'        )
+             ,(u'España'            , u'http://www.ideal.es/almeria/rss/feeds/espana.xml'           )
+             ,(u'Mundo'             , u'http://www.ideal.es/almeria/rss/feeds/internacional.xml'    )
+             ,(u'Vivir'             , u'http://www.ideal.es/almeria/rss/feeds/vivir.xml'            )
+             ,(u'Opinión'           , u'http://www.ideal.es/almeria/rss/feeds/opinion.xml'          )
+             ,(u'Televisión'        , u'http://www.ideal.es/almeria/rss/feeds/television.xml'       )
+             ,(u'Contraportada'     , u'http://www.ideal.es/almeria/rss/feeds/contraportada.xml'    )
+            ]
+
--- a/recipes/ideal_granada.recipe
+++ b/recipes/ideal_granada.recipe
@ -0,0 +1,69 @@
+# encoding: utf-8 -*-
+
+__license__     = 'GPL v3'
+__author__      = 'Josemi Liébana <office at josemi-liebana.com>'
+__copyright__   = 'Josemi Liébana'
+__version__     = 'v0.1'
+__date__        = '5 January 2012'
+
+
+'''
+www.ideal.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Ideal(BasicNewsRecipe):
+    title                 = u'Ideal (Edición Granada)'
+    __author__            = u'Josemi Liébana'
+    description           = u'Noticias de Granada y el resto del mundo'
+    publisher             = 'Ideal'
+    category              = 'News, Politics, Spain, Granada'
+    publication_type      = 'Newspaper'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
+    cover_url             = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                         dict(attrs={'id':'title'})
+                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
+                     ]
+
+    remove_tags = [dict(name='ul')]
+
+    remove_attributes = ['width','height']
+
+    feeds = [
+              (u'Última Hora'       , u'http://www.ideal.es/granada/rss/feeds/ultima.xml'           )
+             ,(u'Portada'           , u'http://www.ideal.es/granada/portada.xml'                    )
+             ,(u'Local'             , u'http://www.ideal.es/granada/rss/feeds/granada.xml'          )
+             ,(u'Deportes'          , u'http://www.ideal.es/granada/rss/feeds/deportes.xml'         )
+             ,(u'Sociedad'          , u'http://www.ideal.es/granada/rss/feeds/sociedad.xml'         )
+             ,(u'Cultura'           , u'http://www.ideal.es/granada/rss/feeds/cultura.xml'          )
+             ,(u'Economía'          , u'http://www.ideal.es/granada/rss/feeds/economia.xml'         )
+             ,(u'Costa'             , u'http://www.ideal.es/granada/rss/feeds/costa.xml'            )
+             ,(u'La Carrera'        , u'http://www.ideal.es/granada/rss/feeds/la_carrera.xml'       )
+             ,(u'Puerta Real'       , u'http://www.ideal.es/granada/rss/feeds/puerta_real.xml'      )
+             ,(u'Andalucía'         , u'http://www.ideal.es/granada/rss/feeds/andalucia.xml'        )
+             ,(u'España'            , u'http://www.ideal.es/granada/rss/feeds/espana.xml'           )
+             ,(u'Mundo'             , u'http://www.ideal.es/granada/rss/feeds/internacional.xml'    )
+             ,(u'Vivir'             , u'http://www.ideal.es/granada/rss/feeds/vivir.xml'            )
+             ,(u'Opinión'           , u'http://www.ideal.es/granada/rss/feeds/opinion.xml'          )
+             ,(u'Televisión'        , u'http://www.ideal.es/granada/rss/feeds/television.xml'       )
+             ,(u'Contraportada'     , u'http://www.ideal.es/granada/rss/feeds/contraportada.xml'    )
+            ]
+
--- a/recipes/ideal_jaen.recipe
+++ b/recipes/ideal_jaen.recipe
@ -0,0 +1,67 @@
+# encoding: utf-8 -*-
+
+__license__     = 'GPL v3'
+__author__      = 'Josemi Liébana <office at josemi-liebana.com>'
+__copyright__   = 'Josemi Liébana'
+__version__     = 'v0.1'
+__date__        = '5 January 2012'
+
+
+'''
+www.ideal.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Ideal(BasicNewsRecipe):
+    title                 = u'Ideal (Edición Jaén)'
+    __author__            = u'Josemi Liébana'
+    description           = u'Noticias de Jaén y el resto del mundo'
+    publisher             = 'Ideal'
+    category              = u'News, Politics, Spain, Jaén'
+    publication_type      = 'Newspaper'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
+    cover_url             = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                         dict(attrs={'id':'title'})
+                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
+                     ]
+
+    remove_tags = [dict(name='ul')]
+
+    remove_attributes = ['width','height']
+
+    feeds = [
+              (u'Última Hora'       , u'http://www.ideal.es/jaen/rss/feeds/ultima.xml'          )
+             ,(u'Portada'           , u'http://www.ideal.es/jaen/portada.xml'                   )
+             ,(u'Local'             , u'http://www.ideal.es/jaen/rss/feeds/granada.xml'         )
+             ,(u'Deportes'          , u'http://www.ideal.es/jaen/rss/feeds/deportes.xml'        )
+             ,(u'Sociedad'          , u'http://www.ideal.es/jaen/rss/feeds/sociedad.xml'        )
+             ,(u'Cultura'           , u'http://www.ideal.es/jaen/rss/feeds/cultura.xml'         )
+             ,(u'Economía'          , u'http://www.ideal.es/jaen/rss/feeds/economia.xml'        )
+             ,(u'Costa'             , u'http://www.ideal.es/jaen/rss/feeds/costa.xml'           )
+             ,(u'Andalucía'         , u'http://www.ideal.es/jaen/rss/feeds/andalucia.xml'       )
+             ,(u'España'            , u'http://www.ideal.es/jaen/rss/feeds/espana.xml'          )
+             ,(u'Mundo'             , u'http://www.ideal.es/jaen/rss/feeds/internacional.xml'   )
+             ,(u'Vivir'             , u'http://www.ideal.es/jaen/rss/feeds/vivir.xml'           )
+             ,(u'Opinión'           , u'http://www.ideal.es/jaen/rss/feeds/opinion.xml'         )
+             ,(u'Televisión'        , u'http://www.ideal.es/jaen/rss/feeds/television.xml'      )
+             ,(u'Contraportada'     , u'http://www.ideal.es/jaen/rss/feeds/contraportada.xml'   )
+            ]
+
--- a/recipes/iht.recipe
+++ b/recipes/iht.recipe
@ -1,63 +1,30 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Derry FitzGerald'
-'''
-iht.com
-'''
-import re
-
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ptempfile import PersistentTemporaryFile

+class NYTimesGlobal(BasicNewsRecipe):
+    title          = u'NY Times Global'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = False

-class InternationalHeraldTribune(BasicNewsRecipe):
-    title          = u'The International Herald Tribune'
-    __author__     = 'Derry FitzGerald'
-    language = 'en'
-
-    oldest_article = 1
-    max_articles_per_feed = 30
    no_stylesheets = True
+    auto_cleanup = True

-    remove_tags    = [dict(name='div', attrs={'class':['footer','header']}),
-                      dict(name=['form'])]
-    preprocess_regexps = [
-            (re.compile(r'<!-- webtrends.*', re.DOTALL),
-             lambda m:'</body></html>')
-                          ]
-    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'

-    remove_empty_feeds = True
-    
    feeds          = [
-                      (u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
-                      (u'Business', u'http://www.iht.com/rss/business.xml'),
-                      (u'Americas', u'http://www.iht.com/rss/america.xml'),
-                      (u'Europe', u'http://www.iht.com/rss/europe.xml'),
-                      (u'Asia', u'http://www.iht.com/rss/asia.xml'),
-                      (u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
-                      (u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
-                      (u'Technology', u'http://www.iht.com/rss/technology.xml'),
-                      (u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
-                      (u'Sports', u'http://www.iht.com/rss/sports.xml'),
-                      (u'Culture', u'http://www.iht.com/rss/arts.xml'),
-                      (u'Style and Design', u'http://www.iht.com/rss/style.xml'),
-                      (u'Travel', u'http://www.iht.com/rss/travel.xml'),
-                      (u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
-                      (u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
-                      (u'Properties', u'http://www.iht.com/rss/properties.xml')
-                    ]
-    temp_files = []
-    articles_are_obfuscated = True
-    
-    masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
-    
-    def get_obfuscated_article(self, url):
-        br = self.get_browser()
-        br.open(url)
-        response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
-        html = response1.read()
-        
-        self.temp_files.append(PersistentTemporaryFile('_iht.html'))
-        self.temp_files[-1].write(html)
-        self.temp_files[-1].close()
-        return self.temp_files[-1].name
+('NYTimes',
+ 'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml'),
+('NYTimes global',
+ 'http://www.nytimes.com/services/xml/rss/nyt/GlobalHome.xml'),
+('World',
+ 'http://www.nytimes.com/services/xml/rss/nyt/World.xml'),
+('U.S.',
+ 'http://www.nytimes.com/services/xml/rss/nyt/US.xml'),
+('Business',
+ 'http://feeds.nytimes.com/nyt/rss/Business'),
+('Sports',
+ 'http://www.nytimes.com/services/xml/rss/nyt/Sports.xml'),
+('Technology',
+ 'http://feeds.nytimes.com/nyt/rss/Technology'),
+]
--- a/recipes/indy_star.recipe
+++ b/recipes/indy_star.recipe
@ -1,16 +1,20 @@
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.recipes import BasicNewsRecipe

-class AdvancedUserRecipe1234144423(BasicNewsRecipe):
-    title          = u'Indianapolis Star'
-    oldest_article = 5
-    language = 'en'
+class IndianapolisStar(BasicNewsRecipe):
+	title                 = u'Indianapolis Star'
+	oldest_article        = 10
+	auto_cleanup          = True
+	language              = 'en'
+	__author__            = 'Owen Kelly'
+	max_articles_per_feed = 100
+	cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
+	feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss'),
+		(u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss'),
+		(u'Business Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss'),
+		(u'Politics and Government', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS05&template=rss'),
+		(u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'),
+		(u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')
+		]

-    __author__     = 'Owen Kelly'
-    max_articles_per_feed = 100
-
-    cover_url  = u'http://www2.indystar.com/frontpage/images/today.jpg'
-    
-    feeds          = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss&mime=XML'), (u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss&mime=XML'), (u'Business Headlines', u'http://www..indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss&mime=XML'), (u'Sports Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=SPORTS&template=rss&mime=XML'), (u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), (u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')]
-
-    def print_version(self, url):
-        return url + '&template=printart'
+	def print_version(self, url):
+		return url + '&template=printart'
--- a/recipes/kopalniawiedzy.recipe
+++ b/recipes/kopalniawiedzy.recipe
@ -1,79 +1,79 @@
 __license__   = 'GPL v3'
-__copyright__ = '2011, Attis <attis@attis.one.pl>'
+__copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
 __version__ = 'v. 0.1'

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class KopalniaWiedzy(BasicNewsRecipe):
-		title          = u'Kopalnia Wiedzy'
-		publisher      = u'Kopalnia Wiedzy'
-		description    = u'Ciekawostki ze świata nauki i techniki'
-		encoding       = 'utf-8'
-		__author__     = 'Attis'
-		language       = 'pl'
-		oldest_article = 7
-		max_articles_per_feed = 100
-		INDEX          = u'http://kopalniawiedzy.pl/'
-		remove_javascript     = True    
-		no_stylesheets        = True
-		
-		remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}]
-		remove_tags_after = dict(attrs={'class':'ad-square'})
-		keep_only_tags    = [dict(name="div", attrs={'id':'articleContent'})]
-		extra_css      = '.topimage {margin-top: 30px}'
-		
-		preprocess_regexps = [
-				(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
-				lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
-				(re.compile(u'<br  /><br  />'),
-				lambda match: '<br\/>')
-			]
-		
-		feeds = [
-			(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
-			(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
-			(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
-			(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
-			(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
-			(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
-		]
-		
-		def is_link_wanted(self, url, tag):
-			return tag['class'] == 'next'
-			
-		def remove_beyond(self, tag, next):
-				while tag is not None and getattr(tag, 'name', None) != 'body':
-						after = getattr(tag, next)
-						while after is not None:
-								ns = getattr(tag, next)
-								after.extract()
-								after = ns
-						tag = tag.parent
-		
-		def append_page(self, soup, appendtag, position):
-				pager = soup.find('a',attrs={'class':'next'})
-				if pager:
-					nexturl = self.INDEX + pager['href']
-					soup2 = self.index_to_soup(nexturl)
-					texttag = soup2.find('div', attrs={'id':'articleContent'})
-					
-					tag = texttag.find(attrs={'class':'pages'})
-					self.remove_beyond(tag, 'nextSibling')
-					
-					newpos = len(texttag.contents)
-					self.append_page(soup2,texttag,newpos)
+        title          = u'Kopalnia Wiedzy'
+        publisher      = u'Kopalnia Wiedzy'
+        description    = u'Ciekawostki ze świata nauki i techniki'
+        encoding       = 'utf-8'
+        __author__     = 'Attis & Tomasz Długosz'
+        language       = 'pl'
+        oldest_article = 7
+        max_articles_per_feed = 100
+        INDEX          = u'http://kopalniawiedzy.pl/'
+        remove_javascript     = True
+        no_stylesheets        = True

-					appendtag.insert(position,texttag)
+        remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
+        remove_tags_after = dict(attrs={'class':'ad-square'})
+        keep_only_tags    = [dict(name="div", attrs={'class':'article-text text-small'})]
+        extra_css      = '.topimage {margin-top: 30px}'
+
+        preprocess_regexps = [
+                (re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
+                lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
+                (re.compile(u'<br  /><br  />'),
+                lambda match: '<br\/>')
+            ]
+
+        feeds = [
+            (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
+            (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
+            (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
+            (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
+            (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
+            (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
+        ]
+
+        def is_link_wanted(self, url, tag):
+            return tag['class'] == 'next'
+
+        def remove_beyond(self, tag, next):
+                while tag is not None and getattr(tag, 'name', None) != 'body':
+                        after = getattr(tag, next)
+                        while after is not None:
+                                ns = getattr(tag, next)
+                                after.extract()
+                                after = ns
+                        tag = tag.parent
+
+        def append_page(self, soup, appendtag, position):
+                pager = soup.find('a',attrs={'class':'next'})
+                if pager:
+                    nexturl = self.INDEX + pager['href']
+                    soup2 = self.index_to_soup(nexturl)
+                    texttag = soup2.find('div', attrs={'id':'articleContent'})
+
+                    tag = texttag.find(attrs={'class':'pages'})
+                    self.remove_beyond(tag, 'nextSibling')
+
+                    newpos = len(texttag.contents)
+                    self.append_page(soup2,texttag,newpos)
+
+                    appendtag.insert(position,texttag)


-		def preprocess_html(self, soup): 
-				self.append_page(soup, soup.body, 3)
-				
-				for item in soup.findAll('div',attrs={'class':'pages'}):
-					item.extract()
-					
-				for item in soup.findAll('p', attrs={'class':'wykop'}):
-					item.extract()
-					
-				return soup
+        def preprocess_html(self, soup):
+                self.append_page(soup, soup.body, 3)
+
+                for item in soup.findAll('div',attrs={'class':'pages'}):
+                    item.extract()
+
+                for item in soup.findAll('p', attrs={'class':'wykop'}):
+                    item.extract()
+
+                return soup
--- a/recipes/la_razon_bo.recipe
+++ b/recipes/la_razon_bo.recipe
@ -1,10 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.la-razon.com
 '''

-from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

 class LaRazon_Bol(BasicNewsRecipe):
@ -16,19 +15,17 @@ class LaRazon_Bol(BasicNewsRecipe):
    oldest_article        = 1
    max_articles_per_feed = 200
    no_stylesheets        = True
-    encoding              = 'cp1252'
+    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'es_BO'
    publication_type      = 'newspaper'
-    delay                 = 1
    remove_empty_feeds    = True
-    cover_url             = strftime('http://www.la-razon.com/portadas/%Y%m%d_LaRazon.jpg')
-    masthead_url          = 'http://www.la-razon.com/imagenes/logo.jpg'
-    extra_css             = """ body{font-family: Arial,Helvetica,sans-serif }
-                                img{margin-bottom: 0.4em}
-                                .noticia-titulo{font-family: Georgia,"Times New Roman",Times,serif}
-                                .lead{font-weight: bold; font-size: 0.8em}
-                                """
+    masthead_url          = 'http://www.la-razon.com/static/LRZRazon/images/lrz-logo.png'
+    extra_css             = """ body{font-family: Georgia,"Times New Roman",Times,serif}
+                                img{margin-bottom: 0.4em; display: block}
+                                .meta{font-size: small; font-family: Arial,Helvetica,sans-serif}
+                            """
+    INDEX = 'http://www.la-razon.com/'

    conversion_options = {
                          'comment'   : description
@ -37,28 +34,37 @@ class LaRazon_Bol(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    keep_only_tags    = [dict(name='div', attrs={'class':['noticia-titulo','noticia-desarrollo']})]
-    remove_tags       = [dict(name=['meta','link','form','iframe','embed','object'])]
+    keep_only_tags    = [dict(name='div', attrs={'class':['pg-hd', 'pg-bd']})]
+    remove_tags       = [
+                          dict(name=['meta','link','form','iframe','embed','object'])
+                         ,dict(name='div', attrs={'class':'bd'})
+                        ]
    remove_attributes = ['width','height']

    feeds = [
-              (u'Editorial'     , u'http://www.la-razon.com/rss_editorial.php' )
-             ,(u'Opinión'       , u'http://www.la-razon.com/rss_opinion.php'   )
-             ,(u'Nacional'      , u'http://www.la-razon.com/rss_nacional.php'  )
-             ,(u'Economia'      , u'http://www.la-razon.com/rss_economia.php'  )
-             ,(u'Ciudades'      , u'http://www.la-razon.com/rss_ciudades.php'  )
-             ,(u'Sociedad'      , u'http://www.la-razon.com/rss_sociedad.php'  )
-             ,(u'Mundo'         , u'http://www.la-razon.com/rss_sociedad.php'  )
-             ,(u'La Revista'    , u'http://www.la-razon.com/rss_larevista.php' )
-             ,(u'Sociales'      , u'http://www.la-razon.com/rss_sociales.php'  )
-             ,(u'Mia'           , u'http://www.la-razon.com/rss_mia.php'       )
-             ,(u'Marcas'        , u'http://www.la-razon.com/rss_marcas.php'    )
-             ,(u'Escape'        , u'http://www.la-razon.com/rss_escape.php'    )
-             ,(u'El Financiero' , u'http://www.la-razon.com/rss_financiero.php')
-             ,(u'Tendencias'    , u'http://www.la-razon.com/rss_tendencias.php')
+              (u'Editorial'     , u'http://www.la-razon.com/rss/opinion/editorial/'     )
+             ,(u'Nacional'      , u'http://www.la-razon.com/rss/nacional/'              )
+             ,(u'Economia'      , u'http://www.la-razon.com/rss/economia/'              )
+             ,(u'Ciudades'      , u'http://www.la-razon.com/rss/ciudades/'              )
+             ,(u'Sociedad'      , u'http://www.la-razon.com/rss/sociedad/'              )
+             ,(u'Mundo'         , u'http://www.la-razon.com/rss/mundo/'                 )
+             ,(u'La Revista'    , u'http://www.la-razon.com/rss/la_revista/'            )
+             ,(u'Sociales'      , u'http://www.la-razon.com/rss/sociales/'              )
+             ,(u'Mia'           , u'http://www.la-razon.com/rss/suplementos/mia/'       )
+             ,(u'Marcas'        , u'http://www.la-razon.com/rss/marcas/'                )
+             ,(u'Escape'        , u'http://www.la-razon.com/rss/suplementos/escape/'    )
+             ,(u'El Financiero' , u'http://www.la-razon.com/rss/suplementos/financiero/')
+             ,(u'Tendencias'    , u'http://www.la-razon.com/rss/suplementos/tendencias/')
            ]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
+
+    def get_cover_url(self):
+        soup = self.index_to_soup(self.INDEX)
+        lightbox = soup.find('div', attrs = {'class' : 'lightbox lightbox-frontpage'})
+        return lightbox.img['src']
+
+
--- a/recipes/los_tiempos_bo.recipe
+++ b/recipes/los_tiempos_bo.recipe
@ -41,7 +41,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    keep_only_tags    = [dict(name='div', attrs={'id':'articulo'})]
    remove_tags       = [
                          dict(name=['meta','link','form','iframe','embed','object','hr'])
-                         ,dict(attrs={'class':['caja_fonts sin_border_bot','pub']})
+                         ,dict(attrs={'class':['caja_fonts sin_border_bot','pub','twitter-share-button']})
                        ]
    remove_attributes = ['width','height']

--- a/recipes/lwn_weekly.recipe
+++ b/recipes/lwn_weekly.recipe
@ -7,6 +7,7 @@ lwn.net
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
 import re

 class WeeklyLWN(BasicNewsRecipe):
@ -14,8 +15,11 @@ class WeeklyLWN(BasicNewsRecipe):
    description = 'Weekly summary of what has happened in the free software world.'
    __author__ = 'Davide Cavalca'
    language = 'en'
+    site_url = 'http://lwn.net'

-    cover_url = 'http://lwn.net/images/lcorner.png'
+    extra_css = 'pre,code,samp,kbd,tt { font-size: 80% }\nblockquote {margin-left:0 }\n* { color: black }\n'
+
+    cover_url = site_url + '/images/lcorner.png'
    #masthead_url = 'http://lwn.net/images/lcorner.png'
    publication_type = 'magazine'

@ -43,32 +47,51 @@ class WeeklyLWN(BasicNewsRecipe):
            br.submit()
        return br

+    def print_version(self, url):
+
+        # Strip off anchor
+        url = url.split('#')[0]
+
+        # Prepend site_url
+        if url[0:len(self.site_url)] != self.site_url:
+            url = self.site_url + url
+
+        # Append printable URL parameter
+        print_param = '?format=printable'
+        if url[-len(print_param):] != print_param:
+            url += print_param
+
+        #import sys
+        #print >>sys.stderr, "*** print_version(url):", url
+        return url
+
    def parse_index(self):
        if self.username is not None and self.password is not None:
-            index_url = 'http://lwn.net/current/bigpage?format=printable'
+            index_url = self.print_version('/current/bigpage')
        else:
-            index_url = 'http://lwn.net/free/bigpage?format=printable'
+            index_url = self.print_version('/free/bigpage')
        soup = self.index_to_soup(index_url)
        body = soup.body

        articles = {}
        ans = []
+        old_section = None
        url_re = re.compile('^/Articles/')

        while True:
-            tag_title = body.findNext(name='p', attrs={'class':'SummaryHL'})
+            tag_title = body.findNext(attrs={'class':'SummaryHL'})
            if tag_title == None:
                break

-            tag_section = tag_title.findPrevious(name='p', attrs={'class':'Cat1HL'})
+            tag_section = tag_title.findPrevious(attrs={'class':'Cat1HL'})
            if tag_section == None:
                section = 'Front Page'
            else:
                section = tag_section.string

-            tag_section2 = tag_title.findPrevious(name='p', attrs={'class':'Cat2HL'})
+            tag_section2 = tag_title.findPrevious(attrs={'class':'Cat2HL'})
            if tag_section2 != None:
-                if tag_section2.findPrevious(name='p', attrs={'class':'Cat1HL'}) == tag_section:
+                if tag_section2.findPrevious(attrs={'class':'Cat1HL'}) == tag_section:
                    section = "%s: %s" %(section, tag_section2.string)

            if section not in articles.keys():
@ -82,7 +105,7 @@ class WeeklyLWN(BasicNewsRecipe):
                if tag_url == None:
                    break
                body = tag_url
-                if tag_url.string == None:
+                if tag_url.string == None:    
                    continue
                elif tag_url.string == 'Full Story':
                    break
@ -93,10 +116,11 @@ class WeeklyLWN(BasicNewsRecipe):

            if tag_url == None:
                break
+  

            article = dict(
                title=self.tag_to_string(tag_title),
-                url= 'http://lwn.net' + tag_url['href'].split('#')[0] + '?format=printable',
+                url=tag_url['href'],
                description='', content='', date='')
            articles[section].append(article)

--- a/recipes/macity.recipe
+++ b/recipes/macity.recipe
@ -0,0 +1,23 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325766771(BasicNewsRecipe):
+    title          = u'Macity'
+    language = 'it'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    def get_article_url(self, article):
+        link = BasicNewsRecipe.get_article_url(self, article)
+        if link.split('/')[-1]=="story01.htm":
+            link=link.split('/')[-2]
+            a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L'      , 'N'   , 'S'   ]
+            b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.']
+            for i in range(0,len(a)):
+                link=link.replace('0'+a[-i],b[-i])
+        return link
+
+    feeds          = [(u'Macity', u'http://www.macitynet.it.feedsportal.com/c/33714/f/599513/index.rss')]
+    __author__      = 'faber1971'
+    description = 'Apple and hi-tech news'
+
--- a/recipes/money_pl.recipe
+++ b/recipes/money_pl.recipe
@ -0,0 +1,76 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FocusRecipe(BasicNewsRecipe):
+    __license__ = 'GPL v3'
+    __author__ = u'intromatyk <intromatyk@gmail.com>'
+    language = 'pl'
+    version = 1
+
+    title = u'Money.pl'
+    category = u'News'
+    description = u'Informacje finansowe z kraju i ze świata. Aktualne i archiwalne: notowania giełdowe, kursy walut, wskaźniki gospodarcze.'
+    remove_empty_feeds= True
+    no_stylesheets=True
+    oldest_article = 1
+    max_articles_per_feed = 100000
+    recursions = 0
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    simultaneous_downloads = 2
+
+    r = re.compile('.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'artykul'}))
+    remove_tags    = [dict(name='ul', attrs={'class':'socialStuff'})]
+
+    extra_css = '''
+                    body {font-family: Arial,Helvetica,sans-serif ;}
+                    h1{text-align: left;}
+                    h2{font-size: medium; font-weight: bold;}
+                    p.lead {font-weight: bold; text-align: left;}
+                    .authordate {font-size: small; color: #696969;}
+                    .fot{font-size: x-small; color: #666666;}
+                    '''
+
+    feeds          = [
+                            ('Wiadomosci z kraju', 'http://money.pl.feedsportal.com/c/33900/f/612847/index.rss'),
+                            ('Wiadomosci ze swiata', 'http://money.pl.feedsportal.com/c/33900/f/612848/index.rss'),
+                            ('Gospodarka', 'http://money.pl.feedsportal.com/c/33900/f/612849/index.rss'),
+                            ('Waluty', 'http://money.pl.feedsportal.com/c/33900/f/612850/index.rss'),
+                            ('Gielda', 'http://money.pl.feedsportal.com/c/33900/f/612851/index.rss'),
+                            ('Banki', 'http://money.pl.feedsportal.com/c/33900/f/612852/index.rss'),
+                            ('Fundusze', 'http://money.pl.feedsportal.com/c/33900/f/612853/index.rss'),
+                            ('Emerytury', 'http://money.pl.feedsportal.com/c/33900/f/612854/index.rss'),
+                            ('Podatki', 'http://money.pl.feedsportal.com/c/33900/f/612855/index.rss'),
+                            ('Ubezpieczenia', 'http://money.pl.feedsportal.com/c/33900/f/612856/index.rss'),
+                            ('Poradniki', 'http://money.pl.feedsportal.com/c/33900/f/612857/index.rss'),
+                            ('Raporty', 'http://money.pl.feedsportal.com/c/33900/f/612858/index.rss'),
+                            ('Motoryzacja', 'http://money.pl.feedsportal.com/c/33900/f/612859/index.rss'),
+                            ('Manager', 'http://money.pl.feedsportal.com/c/33900/f/612860/index.rss'),
+                            ('Dla firm', 'http://money.pl.feedsportal.com/c/33900/f/612861/index.rss'),
+                            ('Prawo', 'http://money.pl.feedsportal.com/c/33900/f/612862/index.rss'),
+                            ('Nieruchomosci', 'http://money.pl.feedsportal.com/c/33900/f/612863/index.rss'),
+                            ('Praca', 'http://money.pl.feedsportal.com/c/33900/f/612864/index.rss'),
+
+
+                           ]
+
+    def print_version(self, url):
+     if url.count ('money.pl.feedsportal.com'):
+            u = url.find('0Cartykul0C')
+            u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:]
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace ('0E','-')
+            u = u.replace ('0P',';')
+            u = u.replace ('0H',',')
+            u = u.replace ('0B','.')
+            u = u.replace (',0,',',-1,')
+            u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '')
+     else:
+            u = url.replace('/nc/1','/do-druku/1')
+     return u
--- a/recipes/moscow_times.recipe
+++ b/recipes/moscow_times.recipe
@ -13,7 +13,7 @@ class Moscowtimes(BasicNewsRecipe):
    category              = 'Russia, Moscow, Russian news, Moscow news, Russian newspaper, daily news, independent news, reliable news, USSR, Soviet Union, CIS, Russian politics, Russian business, Russian culture, Russian opinion, St Petersburg, Saint Petersburg'
    publisher             = 'The Moscow Times'
    language              = 'en'
-    oldest_article        = 2
+    oldest_article        = 4
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
@ -50,7 +50,7 @@ class Moscowtimes(BasicNewsRecipe):
                       dict(name='div', attrs={'class':['photo_nav','phototext']})
                      ,dict(name=['iframe','meta','base','link','embed','object'])
                     ]
-                     
+
    def preprocess_html(self, soup):
        for lnk in soup.findAll('a'):
            if lnk.string is not None:
@ -58,13 +58,13 @@ class Moscowtimes(BasicNewsRecipe):
               lnk.replaceWith(ind)
        return soup

-    def print_version(self, url):        
+    def print_version(self, url):
        return url.replace('.themoscowtimes.com/','.themoscowtimes.com/print/')

    def get_cover_url(self):
        cover_url = None
        href =  'http://www.themoscowtimes.com/pdf/'
-        soup = self.index_to_soup(href)        
+        soup = self.index_to_soup(href)
        div = soup.find('div',attrs={'class':'left'})
        if div:
            a = div.find('a')
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@ -325,7 +325,8 @@ class NYTimes(BasicNewsRecipe):
        '''
        def get_the_soup(docEncoding, url_or_raw, raw=False) :
            if re.match(r'\w+://', url_or_raw):
-                f = self.browser.open(url_or_raw)
+                br = self.clone_browser(self.browser)
+                f = br.open_novisit(url_or_raw)
                _raw = f.read()
                f.close()
                if not _raw:
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -364,7 +364,8 @@ class NYTimes(BasicNewsRecipe):
        '''
        def get_the_soup(docEncoding, url_or_raw, raw=False) :
            if re.match(r'\w+://', url_or_raw):
-                f = self.browser.open(url_or_raw)
+                br = self.clone_browser(self.browser)
+                f = br.open_novisit(url_or_raw)
                _raw = f.read()
                f.close()
                if not _raw:
--- a/recipes/opinion_bo.recipe
+++ b/recipes/opinion_bo.recipe
@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2011, Piet van Oostrum <piet@vanoostrum.org>'
+'''
+www.opinion.com.bo
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Opinion_Bol(BasicNewsRecipe):
+    title                 = u'Opinión - Bolivia'
+    __author__            = 'Piet van Oostrum'
+    description           = u'Opinión diario de circulación nacional, Cochabamba, Bolivia'
+    publisher             = 'Coboce Ltda - Editora Opinión'
+    category              = 'news, politics, Bolivia'
+    version               = 1
+    oldest_article        = 1
+    max_articles_per_feed = 20
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'es_BO'
+    publication_type      = 'newspaper'
+    delay                 = 1
+    remove_empty_feeds    = True
+
+    cover_url = strftime('http://www.opinion.com.bo/opinion/articulos/%Y/%m%d/fotos/portada_650.jpg')
+    masthead_url = 'http://opinion.com.bo/opinion/articulos/imagenes/logo_opinion.gif'
+    extra_css = """body{font-family: Helvetica,Arial,sans-serif}
+              .seccion_encabezado_nota_inte{font-size: 1.1em;
+                font-weight: bold;}
+              .autor_nota_inte{color: #999999; font-size: 0.8em;
+                margin-bottom: 0.5em; text-align: right;}
+              .pie{font-size: 0.8em;}"""
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags    = [dict(name='div', attrs={'class':'columna_izq_nota_intererior'})]
+
+    remove_tags       = [dict(name=['meta','link','form','iframe','embed','object','style']),
+                         dict(name='div', attrs={'class':'ocultar'})]
+    remove_attributes = ['width','height']
+
+    feeds = [
+        (u'El País'      , u'http://www.opinion.com.bo/opinion/rss/el_pais_rss.xml'  )
+       ,(u'Cochabamba'   , u'http://www.opinion.com.bo/opinion/rss/cochabamba_rss.xml'  )
+       ,(u'Economía'     , u'http://www.opinion.com.bo/opinion/rss/economia_rss.xml'  )
+       ,(u'Cultura'      , u'http://www.opinion.com.bo/opinion/rss/cultura_rss.xml'  )
+       ,(u'Mundo'        , u'http://www.opinion.com.bo/opinion/rss/mundo_rss.xml'  )
+       ,(u'Ciencia y Tecnología', u'http://www.opinion.com.bo/opinion/rss/ciencia_tecnologia_rss.xml' )
+       ,(u'Policial'     , u'http://www.opinion.com.bo/opinion/rss/policial_rss.xml'  )
+       ,(u'Editorial'    , u'http://www.opinion.com.bo/opinion/rss/editorial_rss.xml'    )
+       ,(u'Subeditorial' , u'http://www.opinion.com.bo/opinion/rss/subeditorial_rss.xml'    )
+       ,(u'Opinión'      , u'http://www.opinion.com.bo/opinion/rss/opinion_rss.xml'       )
+       ,(u'Deportes'     , u'http://www.opinion.com.bo/opinion/rss/deportes_rss.xml')
+       ,(u' Vida de hoy' , u'http://www.opinion.com.bo/opinion/rss/vidadehoy_rss.xml' )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    # Filter out today's articles
+    # maybe should take timezone into account
+
+    today = strftime('/%Y/%m%d/')
+    def get_article_url(self, article):
+        link = article.link
+        if self.today in link:
+             return link
--- a/recipes/oreilly_premium.recipe
+++ b/recipes/oreilly_premium.recipe
@ -0,0 +1,263 @@
+import re
+import time
+from calibre.web.feeds.recipes import BasicNewsRecipe
+# Allows the Python soup converter, which makes parsing easier.
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class OReillyPremium(BasicNewsRecipe):
+    title           = u'OReilly Premium'
+    __author__      = 'TMcN'
+    description     = 'Retrieves Premium and News Letter content from BillOReilly.com.  Requires a Bill OReilly Premium Membership.'
+    cover_url       = 'http://images.billoreilly.com/images/headers/billgray_header.png'
+    auto_cleanup    = True
+    encoding        = 'utf8'
+    needs_subscription = True
+    no_stylesheets  = True
+    oldest_article  = 20
+    remove_javascript = True
+    remove_tags     = [dict(name='img', attrs={})]
+    # Don't go down
+    recursions      = 0
+    max_articles_per_feed = 2000
+    language = 'en'
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
+            br.select_form(name='login')
+            br['formEmailField']   = self.username
+            br['formPasswordField'] = self.password
+            br.submit()
+        return br
+
+    def extractPrintURL(self, baseURL, pageURL, printString):
+        tagURL = pageURL
+        soup = self.index_to_soup(pageURL)
+        if soup :
+            printText = soup.find('a', text=printString)
+        else :
+            print("Failed to find Print string "+printString+ " in "+pageURL)
+
+        if printText:
+            tag = printText.parent
+            tagURL = baseURL+tag['href']
+        return tagURL
+
+    def stripBadChars(self, inString) :
+        return inString.replace("\'", "")
+
+
+    # returns a qualifying article list
+    def parseNoSpinArchives(self, baseURL, soupURL, debugMessages):
+        articleList = []
+        soup = self.index_to_soup(soupURL)
+        for div in soup.findAll(True, attrs={'class':['blogBody'], 'style':['padding-top:10px;']}):
+             a = div.find('a', href=True)
+             if not a:
+                 continue
+             # re == regex. [href] is the link
+             url = baseURL
+             url +=re.sub(r'\?.*', '', a['href'])
+             # Get print version
+             printURL = self.extractPrintURL(baseURL, url, "Print this entry")
+             if printURL:
+                url = printURL
+             title = self.tag_to_string(a, use_alt=True).strip()
+             if debugMessages :
+                print("No Spin Archive Title:"+title+" at url: "+url)
+             description = 'None'
+             pubdate = time.strftime('%a, %d %b')
+             summary = div.find(True, attrs={'class':'summary'})
+             if summary:
+                 description = self.tag_to_string(summary, use_alt=False)
+             articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
+        return articleList
+
+
+    def parseTVArchives(self, baseURL, soupURL, debugMessages):
+        # TV Archives page has some Ajax, so look for the static only.
+        articleList = []
+        soup = self.index_to_soup(soupURL)
+        if debugMessages :
+           print("In parseTVArchives")
+        for div in soup.findAll('a', {'class':['showLinks','homeLinks']}):
+             a = div
+             url = baseURL
+             url +=a['href']
+             printURL = self.extractPrintURL(baseURL, url, "Print this entry")
+             if printURL:
+                url = printURL
+             title = self.tag_to_string(a, use_alt=True).strip()
+             title = self.stripBadChars(title)
+             if debugMessages :
+                print("TV Archive "+title+" at url: "+url)
+             description = 'None'
+             pubdate = time.strftime('%a, %d %b')
+             summary = div.find(True, attrs={'class':'summary'})
+             if summary:
+                 description = self.tag_to_string(summary, use_alt=False)
+             articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
+        if debugMessages :
+            print("Leaving TV Parse ")
+        return articleList
+
+    # Get Daily Briefing Archives
+    def parseDailyBriefs(self, baseURL, soupURL, debugMessages) :
+        print("Starting daily briefs")
+        articleList = []
+        soup = self.index_to_soup(soupURL)
+        for div in soup.findAll(True, attrs={'class':['defaultHeaderSmallLinks']}):
+             # re == regex. [href] is the link
+             url = baseURL
+             url +=re.sub(r'\?.*', '', div['href'])
+             printURL = self.extractPrintURL(baseURL, url, "Print this entry")
+             if printURL:
+                url = printURL
+             title = div.contents[0]
+             if debugMessages :
+                print("Daily Brief - title:"+title+" at url: "+url)
+             description = 'None'
+             pubdate = time.strftime('%a, %d %b')
+             summary = div.find(True, attrs={'class':'summary'})
+             if summary:
+                 description = self.tag_to_string(summary, use_alt=False)
+             articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
+        print("Leaving daily briefs")
+        return articleList
+
+    # Get the weekly Stratfor intelligence report
+    def parseStratfor(self, baseURL, soupURL, debugMessages):
+        # http://www.billoreilly.com/blog?categoryID=5
+        articleList = []
+        soup = self.index_to_soup(soupURL)
+        if debugMessages :
+           print("In parseStratfor")
+        a = soup.find('a', {'class':['blogLinks']})
+        url = baseURL
+        url +=a['href']
+        title = self.tag_to_string(a, use_alt=True).strip()
+        if debugMessages :
+            print("url: "+url)
+            print("title:"+title)
+        # Get Stratfor contents so we can get the real title.
+        stratSoup = self.index_to_soup(url)
+        title = stratSoup.html.head.title.string
+        stratIndex = title.find('Stratfor.com:', 0)
+        if (stratIndex > -1) :
+            title = title[stratIndex+14:-1]
+        # Look for first blogBody  <td class="blogBody"
+        stratBody = stratSoup.find('td', {'class':['blogBody']})
+        if debugMessages :
+            print("Strat content title:"+title)
+            print("Strat body: "+ stratBody.contents[0])
+        description = 'None'
+        pubdate = time.strftime('%a, %d %b')
+        articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
+        if debugMessages :
+           print("Leaving Stratfor Parse ")
+        return articleList
+
+    def parseTalkingPoints(self, baseURL, soupURL, debugMessages) :
+        # Look for blogDate.  That's got the date...  Then the next blogBody has the title.  and then an anchor with class "homeBlogReadMore bold" has the URL.
+        articleList = []
+        soup = self.index_to_soup(soupURL)
+        if debugMessages :
+            print("Starting Talking Points")
+        topDate =  soup.find("td", "blogBody")
+        if not topDate :
+            print("Failed to find date in Talking Points")
+        # This page has the contents in double-wrapped tables!
+        # tableParent = topDate.parent.parent
+        myTable = topDate.findParents('table')[0]
+        upOneTable = myTable.findParents('table')[0]
+        upTwo = upOneTable.findParents('table')[0]
+        # Now navigate rows of upTwo
+        if debugMessages :
+            print("Entering rows")
+        for rows in upTwo.findChildren("tr", recursive=False):
+            # Inside top level table, each row is an article
+            rowTable = rows.find("table")
+            articleTable = rowTable.find("table")
+            articleTable = rows.find("tr")
+            # The middle table is just for formatting the article buffer... but this means we can skip the inner table.
+            blogDate = articleTable.find("a","blogDate").contents[0]
+            # Skip to second blogBody for this.
+            blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
+            blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
+            # re == regex. [href] is the link
+            url = baseURL
+            url +=re.sub(r'\?.*', '', blogURL)
+            title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
+            if debugMessages :
+                print("Talking Points Memo title "+title+" at url: "+url)
+            description = 'None'
+            pubdate = time.strftime('%a, %d %b')
+            articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
+        print("Exiting parseTalkingPoints\n")
+        return articleList
+
+    def parseCurrentColumn(self, baseURL, soupURL, debugMessages) :
+        # Only needed to get the column title.  Otherwise it's all good already; there's only one column
+        articleList = []
+        soup = self.index_to_soup(soupURL)
+        titleSpan = soup.find('span', {'class':['defaultHeader']})
+        title = titleSpan.contents[0]
+        # Get Print URL since it's available
+        printURL = self.extractPrintURL(baseURL, soupURL, "Print This Article")
+        if printURL:
+            print("Found print URL")
+            url = printURL
+        if debugMessages :
+            print("url: "+url)
+            print("title:"+title)
+        description = 'None'
+        pubdate = time.strftime('%a, %d %b')
+        articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
+        if debugMessages :
+           print("Leaving Stratfor Parse ")
+        return articleList
+
+
+    # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
+    # returns a list of tuple ('feed title', list of articles)
+    # {
+    # 'title'       : article title,
+    # 'url'         : URL of print version,
+    # 'date'        : The publication date of the article as a string,
+    # 'description' : A summary of the article
+    # 'content'     : The full article (can be an empty string). This is used by FullContentProfile
+    # }
+    # this is used instead of BasicNewsRecipe.parse_feeds().
+    def parse_index(self):
+        # Parse the page into Python Soup
+        debugMessages = True
+        baseURL = "https://www.billoreilly.com"
+        def feed_title(div):
+            return ''.join(div.findAll(text=True, recursive=False)).strip()
+        # [] is list, {} is empty mapping.
+        articleList = []
+        ans = []
+        showList = self.parseTVArchives(baseURL, 'https://www.billoreilly.com/show?action=tvShowArchive', debugMessages)
+        articleList = self.parseNoSpinArchives(baseURL, 'https://www.billoreilly.com/blog?categoryID=7', debugMessages)
+        stratList = self.parseStratfor(baseURL, 'http://www.billoreilly.com/blog?categoryID=5', debugMessages)
+        dailyBriefs = self.parseDailyBriefs(baseURL, 'http://www.billoreilly.com/blog?categoryID=11', debugMessages)
+        talkingPoints = self.parseTalkingPoints(baseURL, 'https://www.billoreilly.com/blog?categoryID=12', debugMessages)
+        currentColumn = self.parseCurrentColumn(baseURL, 'https://www.billoreilly.com/currentcolumn', debugMessages)
+        # Below, { x:y, a:b } creates a dictionary.   We return a tuple of a title and list of dict...
+        # Lists are constructed with square brackets, separating items with commas: [a, b, c].  Tuples are constructed by the comma operator (not within square brackets), with or without enclosing parentheses, but an empty tuple must have the enclosing parentheses, such as a, b, c or (). A single item tuple must have a trailing comma, such as (d,).
+        # Shows first two if talking points and no spin news.  Also if they are TV Shows ande Stratfor Weekly, also if Daily Briefing and Curren Column
+        # So all work individually.  No idea why only getting first two in TOC now.
+        ans = [("Talking Points Memos", talkingPoints),("No Spin News", articleList),("TV Shows", showList),("Stratfor Weekly",stratList), ("Daily Briefing", dailyBriefs),("Current Column", currentColumn)]
+        if debugMessages :
+            print ans
+        return ans
+
+    def preprocess_html(self, soup):
+        refresh = soup.find('meta', {'http-equiv':'refresh'})
+        if refresh is None:
+            return soup
+        content = refresh.get('content').partition('=')[2]
+        raw = self.browser.open('https://www.billoreilly.com'+content).read()
+        return BeautifulSoup(raw.decode('cp1252', 'replace'))
+
--- a/recipes/philly.recipe
+++ b/recipes/philly.recipe
@ -33,3 +33,6 @@ class BasicUserRecipe1314970845(BasicNewsRecipe):
            (u'Obituaries', u'http://www.philly.com/inquirer_obituaries.rss')
    ]

+    def print_version(self, url):
+        return url + '?viewAll=y'
+
--- a/recipes/rionegro.recipe
+++ b/recipes/rionegro.recipe
@ -0,0 +1,65 @@
+__license__   = 'GPL v3'
+__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.rionegro.com.ar
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class RioNegro(BasicNewsRecipe):
+    title                 = 'Diario Rio Negro'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias desde la Patagonia Argentina y el resto del mundo'
+    publisher             = 'Editorial Rio Negro SA.'
+    category              = 'news, politics, Argentina'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'es_AR'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.rionegro.com.ar/diario/imagenes/logorn.gif'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               img{display:block}
+                               h1 {font-size: 0.89em; color: red}
+                               h2 {font-family: Georgia,"Times New Roman",Times,serif; font-size: 1.8em}
+                               h3 {font-family: Georgia,"Times New Roman",Times,serif; border-bottom: 2px solid gray}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [
+                     dict(name=['meta','link','iframe','object','embed'])
+                    ,dict(name='div', attrs={'class':'logo'})
+                  ]
+    keep_only_tags=[dict(attrs={'class':'nota'})]
+    remove_attributes=['lang']
+
+
+    feeds = [
+              (u'Argentina'        , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9532')
+             ,(u'El Mundo'         , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9533')
+             ,(u'Carta de lectores', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9538')
+             ,(u'Columnistas'      , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9539')
+             ,(u'Domingo a Domingo', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9541')
+             ,(u'Editorial'        , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9542')
+             ,(u'Deportes'         , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9522')
+             ,(u'Espectaculos'     , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9523')
+             ,(u'Sociedad'         , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9574')
+             ,(u'Policiales'       , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9525')
+             ,(u'Municipales'      , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9862')
+             ,(u'Region'           , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9701')
+            ]
+
+    def print_version(self, url):
+        idart_raw = url.rpartition('idart=')[2]
+        idart = idart_raw.rpartition('&')[0]
+        return 'http://www.rionegro.com.ar/diario/rn/print.aspx?idArt=' + idart + '&tipo=2'
--- a/recipes/seattle_times.recipe
+++ b/recipes/seattle_times.recipe
@ -20,6 +20,8 @@ class SeattleTimes(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1252'
    language = 'en'
+    auto_cleanup          = True
+    auto_cleanup_keep     = '//div[@id="PhotoContainer"]'

    feeds              = [
                          (u'Top Stories',
@ -69,24 +71,4 @@ class SeattleTimes(BasicNewsRecipe):
                              u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
                         ]

-    keep_only_tags = [dict(id='content')]
-    remove_tags        = [
-                             dict(name=['object','link','script']),
-                            {'class':['permission', 'note', 'bottomtools',
-                                'homedelivery']},
-                            dict(id=["rightcolumn", 'footer', 'adbottom']),
-                         ]
-
-    def print_version(self, url):
-        return url
-        start_url, sep, rest_url = url.rpartition('_')
-        rurl, rsep, article_id = start_url.rpartition('/')
-        return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
-
-    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup

--- a/recipes/singtaohk.recipe
+++ b/recipes/singtaohk.recipe
@ -0,0 +1,491 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Eddie Lau'
+
+# data source: normal, mobile
+__Source__ = 'mobile'
+# please replace the following "True" with "False". (Default: True)
+__MakePeriodical__ = True
+# Turn below to True if your device supports display of CJK titles (Default: False)
+__UseChineseTitle__ = False
+# Set it to False if you want to skip images (Default: True)
+__KeepImages__ = True
+# Set it to True if you want to include a summary in Kindle's article view (Default: False)
+__IncludeSummary__ = False
+# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
+__IncludeThumbnails__ = True
+
+
+'''
+Change Log:
+2011/12/29 -- first version done
+TODO:
+* use alternative source at http://m.singtao.com/index.php
+'''
+
+from calibre.utils.date import now as nowf
+import os, datetime, re
+from datetime import date
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang
+
+# MAIN CLASS
+class STHKRecipe(BasicNewsRecipe):
+    if __UseChineseTitle__ == True:
+        title = u'\u661f\u5cf6\u65e5\u5831 (\u9999\u6e2f)'
+    else:
+        title   = 'Sing Tao Daily - Hong Kong'
+    description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
+    category    = 'Chinese, News, Hong Kong'
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}'
+    masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
+    if __Source__ == 'normal':
+        keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
+    else:
+        keep_only_tags = [dict(name='td', attrs={'class':['stmobheadline']}),
+                          dict(name='img', attrs={'width':['146']}),
+                          dict(name='td', attrs={'class':['bodytextg']}),
+                          ]
+    if __KeepImages__:
+        remove_tags = [dict(name='hr')]
+    else:
+        remove_tags = [dict(name='hr'), dict(name='img')]
+    remove_attributes = ['align']
+    preprocess_regexps = [
+                          (re.compile(r'<font class="bodytext">', re.DOTALL|re.IGNORECASE),
+                          lambda match: '<br><br><font class="bodytext">'),
+                         ]
+
+    oldest_article = 1
+    max_articles_per_feed = 200
+    __author__            = 'Eddie Lau'
+    publisher             = 'Sing Tao Ltd.'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'zh'
+    encoding = 'Big5-HKSCS'
+    recursions = 0
+    conversion_options = {'linearize_tables':True}
+    timefmt = ''
+    auto_cleanup = False
+
+    def get_dtlocal(self):
+        dt_utc = datetime.datetime.utcnow()
+        # convert UTC to local hk time - at HKT 4.00am, all news are available
+        dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.0/24)
+        return dt_local
+
+    def get_fetchdate(self):
+        return self.get_dtlocal().strftime("%Y%m%d")
+
+    def get_fetchformatteddate(self):
+        return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchyear(self):
+        return self.get_dtlocal().strftime("%Y")
+
+    def get_fetchmonth(self):
+        return self.get_dtlocal().strftime("%m")
+
+    def get_fetchday(self):
+        return self.get_dtlocal().strftime("%d")
+
+    def get_cover_url(self):
+        #cover = 'http://singtao.com/media/a/a(2660).jpg'  # for 2011/12/29
+        base = 2660
+        todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()))
+        diff = todaydate - date(2011, 12, 29)
+        base = base + int(diff.total_seconds()/(3600*24))
+        cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            cover = 'http://singtao.com/images/stlogo.gif'
+        return cover
+
+    def parse_index(self):
+        feeds = []
+        dateStr = self.get_fetchdate()
+        dateStr
+
+        if __Source__ == 'normal':
+            # single-item section
+            for title, url in [(u'\u793e\u8ad6 Editorial', 'http://singtao.com/yesterday/jou/j_index.html')]:
+                article = self.parse_singleitem_section(url)
+                if article:
+                    feeds.append((title, article))
+
+            # multiple items
+    #        for title, url in [(u'\u8981\u805e\u6e2f\u805e Local', 'http://singtao.com/yesterday/loc/a_index.html'),
+    #                           (u'\u8ca1\u7d93 Finance', 'http://singtao.com/yesterday/fin/d_index.html'),
+    #                           (u'\u5730\u7522 Properties', 'http://singtao.com/yesterday/pro/h_index.html'),
+    #                           (u'\u6559\u80b2 Education', 'http://singtao.com/yesterday/edu/g_index.asp'),
+    #                           (u'\u5a1b\u6a02 Entertainment', 'http://singtao.com/yesterday/ent/f_index.html'),
+    #                           (u'\u9ad4\u80b2 Sports', 'http://singtao.com/yesterday/spo/c_index.html'),
+    #                           (u'\u99ac\u7d93 Horse Racing', 'http://singtao.com/yesterday/rac/n_index.html')
+    #                           ]:
+    #            articles = self.parse_section(url)
+    #            if articles:
+    #                feeds.append((title, articles))
+
+             # special: supplement
+    #        for title, url, baseurl in [(u'\u526f\u520a Supplements', 'http://singtao.com/yesterday/sup/m_index.html', '/')]:
+    #            articles = self.parse_section_withouttext(url, baseurl)
+    #            if articles:
+    #                feeds.append((title, articles))
+
+           # multiple-item sections
+    #        for title, url in [(u'\u570b\u969b World', 'http://singtao.com/yesterday/int/b_index.html'),
+    #                           (u'\u4e2d\u570b China', 'http://singtao.com/yesterday/chi/e_index.html')
+    #                           ]:
+    #            articles = self.parse_section(url)
+    #            if articles:
+    #                feeds.append((title, articles))
+
+            for title, url, baseurl in [(u'\u8981\u805e\u6e2f\u805e Local', 'http://singtao.com/yesterday/loc/a_index.html', '/'),
+                                        (u'\u8ca1\u7d93 Finance', 'http://singtao.com/yesterday/fin/d_index.html', '/'),
+                                        (u'\u5730\u7522 Properties', 'http://singtao.com/yesterday/pro/h_index.html', '/'),
+                                        (u'\u6559\u80b2 Education', 'http://singtao.com/yesterday/edu/g_index.asp', '/'),
+                                        (u'\u5a1b\u6a02 Entertainment', 'http://singtao.com/yesterday/ent/f_index.html', '/'),
+                                        (u'\u9ad4\u80b2 Sports', 'http://singtao.com/yesterday/spo/c_index.html', '/'),
+                                        (u'\u99ac\u7d93 Horse Racing', 'http://singtao.com/yesterday/rac/n_index.html', '/'),
+                                        (u'\u526f\u520a Supplements', 'http://singtao.com/yesterday/sup/m_index.html', '/'),
+                                        (u'\u570b\u969b World', 'http://singtao.com/yesterday/int/b_index.html', '/'),
+                                        (u'\u4e2d\u570b China', 'http://singtao.com/yesterday/chi/e_index.html', '/')]:
+                articles = self.parse_section_withouttext(url, baseurl)
+                if articles:
+                    feeds.append((title, articles))
+        else:  # use mobile
+            # single-item section
+            for title, url in [(u'\u793e\u8ad6 Editorial', 'http://m.singtao.com/showContent.php?main=paper&sub=0&title=0')]:
+                article = self.parse_singleitem_section_m(url)
+                if article:
+                    feeds.append((title, article))
+            # multiple-item section
+            for title, url, baseurl in [(u'\u8981\u805e\u6e2f\u805e Local', 'http://m.singtao.com/showTitle.php?main=paper&sub=1', 'http://m.singtao.com/'),
+                                        (u'\u8ca1\u7d93 Finance', 'http://m.singtao.com/showTitle.php?main=paper&sub=2', 'http://m.singtao.com/'),
+                                        (u'\u5730\u7522 Properties', 'http://m.singtao.com/showTitle.php?main=paper&sub=3', 'http://m.singtao.com/'),
+                                        (u'\u6559\u80b2 Education', 'http://m.singtao.com/showTitle.php?main=paper&sub=4', 'http://m.singtao.com/'),
+                                        (u'\u5a1b\u6a02 Entertainment', 'http://m.singtao.com/showTitle.php?main=paper&sub=5', 'http://m.singtao.com/'),
+                                        (u'\u99ac\u7d93 Horse Racing', 'http://m.singtao.com/showTitle.php?main=paper&sub=6', 'http://m.singtao.com/'),
+                                        (u'\u9ad4\u80b2 Sports', 'http://m.singtao.com/showTitle.php?main=paper&sub=7', 'http://m.singtao.com/'),
+                                        (u'\u526f\u520a Supplements', 'http://m.singtao.com/showTitle.php?main=paper&sub=8', 'http://m.singtao.com/'),
+                                        (u'\u570b\u969b World', 'http://m.singtao.com/showTitle.php?main=paper&sub=9', 'http://m.singtao.com/'),
+                                        (u'\u4e2d\u570b China', 'http://m.singtao.com/showTitle.php?main=paper&sub=10', 'http://m.singtao.com/')]:
+                articles = self.parse_multiitem_section_m(url, baseurl)
+                if articles:
+                    feeds.append((title, articles))
+        return feeds
+
+    def parse_singleitem_section(self, url):
+        current_articles = []
+        current_articles.append({'title': '', 'url': url, 'description': '', 'date': ''})
+        return current_articles
+
+    def parse_singleitem_section_m(self, url):
+        current_articles = []
+        current_articles.append({'title': '', 'url': url, 'description': '', 'date': ''})
+        return current_articles
+
+    def parse_section(self, url):
+        soup = self.index_to_soup(url)
+        # find <table width=436 border=0 cellspacing=0 align=center cellpadding=0> tag
+        tables = soup.findAll(name={'table'}, attrs={'width': ['436']})
+        current_articles_all = []
+        for table in tables:
+            divs = table.findAll(name={'a'})
+            current_articles = []
+            included_urls = []
+            for i in divs:
+                title = self.tag_to_string(i)
+                urlstr = i.get('href', False)
+                urlstr = url + '/../' + urlstr
+                if urlstr not in included_urls:
+                    current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
+                    included_urls.append(urlstr)
+            current_articles_all.extend(current_articles)
+        return current_articles_all
+
+    def parse_section_withouttext(self, url, baseurl):
+        soup = self.index_to_soup(url)
+        # find all a tag
+        links = soup.findAll(name={'a'})
+        linksexcluded = soup.findAll(name={'a'}, attrs={'class':'secondhead'})
+        for elink in linksexcluded:
+            links.remove(elink)
+        linksexcluded = soup.findAll(name={'a'}, attrs={'class':'second02'})
+        for elink in linksexcluded:
+            links.remove(elink)
+        current_articles_all = []
+        included_urls = []
+        for link in links:
+            title = self.tag_to_string(link)
+            if len(title.strip()) > 0:
+                urlstr = link.get('href', False)
+                if urlstr.rfind(baseurl) == -1 and urlstr.rfind('mailto:') == -1:
+                    urlstr = url + '/../' + urlstr
+                    if urlstr not in included_urls:
+                        current_articles_all.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
+                        included_urls.append(urlstr)
+        return current_articles_all
+
+    def parse_multiitem_section_m(self, url, baseurl):
+        soup = self.index_to_soup(url)
+        # find all a tag
+        links = soup.findAll(name={'span'}, attrs={'class':'urlurl'})
+        current_articles_all = []
+        included_urls = []
+        for linkraw in links:
+            linkclean = soup.findAll(name={'a'})
+            for link in linkclean:
+                title = self.tag_to_string(link)
+                if len(title.strip()) > 0:
+                    urlstr = link.get('href', False)
+                    urlstr = baseurl + urlstr
+                    if urlstr not in included_urls:
+                        current_articles_all.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
+                        included_urls.append(urlstr)
+        return current_articles_all
+
+    def populate_article_metadata(self, article, soup, first):
+        if __Source__ == 'normal':
+            # get title if not fetched in parse_section() function
+            if article.title == '' or len(article.title.strip()) == 0:
+                articletitle = soup.findAll('td',attrs={'class':'bodyhead'})
+                if articletitle:
+                    articletitlemod = articletitle[0].find('font')
+                    if articletitlemod:
+                        article.title = articletitlemod.string.strip()
+                    else:
+                        article.title = articletitle[0].string.strip()
+        else:
+            # use the title in the text in any case
+            articletitle = soup.findAll('td', attrs={'class':'stmobheadline'})
+            if articletitle:
+                articletitle[0].br.extract()
+                article.title = articletitle[0].contents[0]
+        # get thumbnail image
+        if __IncludeThumbnails__ and first and hasattr(self, 'add_toc_thumbnail'):
+            img = soup.find('img')
+            if img is not None:
+                self.add_toc_thumbnail(article, img['src'])
+
+        try:
+            if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
+                # look for content
+                if __Source__ == 'normal':
+                    articlebodies = soup.findAll('font',attrs={'class':'bodytext'})
+                else:
+                    articlebodies = soup.findAll('div', attrs={'class':'hkadj'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        if articlebody:
+                            # the text may or may not be enclosed in <p></p> tag
+                            paras = articlebody.findAll('p')
+                            if not paras:
+                            	paras = articlebody
+                            textFound = False
+                            for p in paras:
+                                if not textFound:
+                                    summary_candidate = self.tag_to_string(p).strip()
+                                    if len(summary_candidate) > 0:
+                                        summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
+                                        article.summary = article.text_summary = summary_candidate
+                                        textFound = True
+            else:
+                # display a simple text
+                #article.summary = article.text_summary = u'\u66f4\u591a......'
+                # display word counts
+                counts = 0
+                if __Source__ == 'normal':
+                    articlebodies = soup.findAll('font',attrs={'class':'bodytext'})
+                else:
+                    articlebodies = soup.findAll('div', attrs={'class':'hkadj'})
+                if articlebodies:
+                    for articlebody in articlebodies:
+                        # the text may or may not be enclosed in <p></p> tag
+                        paras = articlebody.findAll('p')
+                        if not paras:
+                            paras = articlebody
+                        for p in paras:
+                            summary_candidate = self.tag_to_string(p).strip()
+                            counts += len(summary_candidate)
+                    article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
+        except:
+            self.log("Error creating article descriptions")
+            return
+
+    # override from the one in version 0.8.31
+    def create_opf(self, feeds, dir=None):
+        if dir is None:
+            dir = self.output_dir
+        title = self.short_title()
+        # change 1: allow our own flag to tell if a periodical is to be generated
+        # also use customed date instead of current time
+        if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
+            title = title + ' ' + self.get_fetchformatteddate()
+        # end of change 1
+        # change 2: __appname__ replaced by newspaper publisher
+        __appname__ = self.publisher
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
+        if __MakePeriodical__ == True:
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        else:
+            mi.publication_type = self.publication_type+':'+self.short_title()
+        #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        # change 4: in the following, all the nowf() are changed to adjusted time
+        # This one doesn't matter
+        mi.timestamp = nowf()
+        # change 5: skip listing the articles
+        #article_titles, aseen = [], set()
+        #for f in feeds:
+        #    for a in f:
+        #        if a.title and a.title not in aseen:
+        #            aseen.add(a.title)
+        #            article_titles.append(force_unicode(a.title, 'utf-8'))
+
+        #mi.comments = self.description
+        #if not isinstance(mi.comments, unicode):
+        #    mi.comments = mi.comments.decode('utf-8', 'replace')
+        #mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+        #        '\n\n'.join(article_titles))
+
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
+        # This one affects the pub date shown in kindle title
+        #mi.pubdate = nowf()
+        # now appears to need the time field to be > 12.00noon as well
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')
+
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)
+
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))
+
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)
+
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)
+
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'
+
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}
+
+
+        def feed_index(num, parent):
+            f = feeds[num]
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(num, j)
+                    auth = a.author
+                    if not auth:
+                        auth = None
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
+                    else:
+                        desc = self.description_limiter(desc)
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
+                    entries.append('%sindex.html'%adir)
+                    po = self.play_order_map.get(entries[-1], None)
+                    if po is None:
+                        self.play_order_counter += 1
+                        po = self.play_order_counter
+                    parent.add_item('%sindex.html'%adir, None,
+                            a.title if a.title else ('Untitled Article'),
+                            play_order=po, author=auth,
+                            description=desc, toc_thumbnail=tt)
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        last = sp
+
+                    if os.path.exists(last):
+                        with open(last, 'rb') as fi:
+                            src = fi.read().decode('utf-8')
+                        soup = BeautifulSoup(src)
+                        body = soup.find('body')
+                        if body is not None:
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            templ = self.navbar.generate(True, num, j, len(f),
+                                            not self.has_single_feed,
+                                            a.orig_url, __appname__, prefix=prefix,
+                                            center=self.center_navbar)
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            body.insert(len(body.contents), elem)
+                            with open(last, 'wb') as fi:
+                                fi.write(unicode(soup).encode('utf-8'))
+        if len(feeds) == 0:
+            raise Exception('All feeds are empty, aborting.')
+
+        if len(feeds) > 1:
+            for i, f in enumerate(feeds):
+                entries.append('feed_%d/index.html'%i)
+                po = self.play_order_map.get(entries[-1], None)
+                if po is None:
+                    self.play_order_counter += 1
+                    po = self.play_order_counter
+                auth = getattr(f, 'author', None)
+                if not auth:
+                    auth = None
+                desc = getattr(f, 'description', None)
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                    f.title, play_order=po, description=desc, author=auth))
+
+        else:
+            entries.append('feed_%d/index.html'%0)
+            feed_index(0, toc)
+
+        for i, p in enumerate(entries):
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+        opf.create_spine(entries)
+        opf.set_toc(toc)
+
+        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+            opf.render(opf_file, ncx_file)
+
+
+
--- a/recipes/tagesspiegel.recipe
+++ b/recipes/tagesspiegel.recipe
@ -33,17 +33,12 @@ class TagesspiegelRSS(BasicNewsRecipe):
    no_javascript = True
    remove_empty_feeds = True
    encoding = 'utf-8'
+    remove_tags = [{'class':'hcf-header'}]

-    keep_only_tags = dict(name='div', attrs={'class':["hcf-article"]})
-    remove_tags = [
-                    dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),dict(name='button'),
-                    dict(name='div', attrs={'class':["hcf-jump-to-comments","hcf-clear","hcf-magnify hcf-media-control",
-                        "hcf-socials-widgets hcf-socials-top","hcf-socials-widgets hcf-socials-bottom"] }),
-                    dict(name='span', attrs={'class':["hcf-mainsearch",] }),
-                    dict(name='ul', attrs={'class':["hcf-tools"]}),
-                    dict(name='ul', attrs={'class': re.compile('hcf-services')})
-                  ]
-                              
+    def print_version(self, url):
+        url = url.split('/')
+        url[-1] = 'v_print,%s?p='%url[-1]
+        return '/'.join(url)

    def parse_index(self):
        soup = self.index_to_soup('http://www.tagesspiegel.de/zeitung/')
--- a/recipes/tillsonburg.recipe
+++ b/recipes/tillsonburg.recipe
@ -0,0 +1,25 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+'''
+Tillsonburg/Norfolk County newspapers Calibre Recipe
+'''
+class TillsonburgNorfolkCounty(BasicNewsRecipe):
+    title = u'Tillsonburg/Norfolk County'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    __author__ = u'Eric Coolman'
+    publisher = u'canoe.ca'
+    description = u'Norfolk County and Tillsonburg, Ontario Canada Newspapers'
+    category = u'News, Ontario, Canada'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en_CA'
+    encoding = 'utf-8'
+
+    feeds = [
+	(u'Simcoe Reformer', u'http://www.simcoereformer.ca/rss/'),
+	(u'Delhi News-Record', u'http://www.delhinewsrecord.com/rss/'),
+	(u'Tilsonburg News', u'http://www.tillsonburgnews.com/rss/')
+	]
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@ -11,7 +11,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class USAToday(BasicNewsRecipe):

    title                  = 'USA Today'
-    __author__             = 'calibre'
+    __author__             = 'Kovid Goyal'
    description            = 'newspaper'
    encoding               = 'utf-8'
    publisher              = 'usatoday.com'
@ -47,32 +47,7 @@ class USAToday(BasicNewsRecipe):
                ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
                ]

-    keep_only_tags = [dict(attrs={'class':'story'})]
-
-    remove_tags = [
-            dict(attrs={'class':[
-                                'share',
-                                'reprints',
-                                'inline-h3',
-                                'info-extras rounded',
-                                'inset',
-                                'ppy-outer',
-                                'ppy-caption',
-                                'comments',
-                                'jump',
-                                'pagetools',
-                                'post-attributes',
-                                'tags',
-                                'bottom-tools',
-                                'sponsoredlinks',
-                                'corrections'
-                                ]}),
-            dict(name='ul', attrs={'class':'inside-copy'}),
-            dict(id=['pluck']),
-            dict(id=['updated']),
-            dict(id=['post-date-updated'])
-    ]
-
+    auto_cleanup = True

    def get_masthead_url(self):
        masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
--- a/recipes/wired_it.recipe
+++ b/recipes/wired_it.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1325758162(BasicNewsRecipe):
+    title          = u'Wired'
+    language = 'it'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    remove_tags_after = [dict(name='div', attrs={'class':'article_content'})]
+    feeds          = [(u'Wired', u'http://www.wired.it/rss.xml')]
+    __author__      = 'faber1971'
+    description = 'An American magazine that reports on how new technology affects culture, the economy, and politics'
--- a/resources/content_server/mobile.css
+++ b/resources/content_server/mobile.css
@ -14,7 +14,7 @@

 .button a, .button:visited a {
    padding: 0.5em;
-    font-size: 1.25em;
+    font-size: larger;
    border: 1px solid black;
    text-color: black;
    text-decoration: none;
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -291,7 +291,7 @@ auto_connect_to_folder = ''
 # how the value and category are combined together to make the collection name.
 # The only two fields available are {category} and {value}. The {value} field is
 # never empty. The {category} field can be empty. The default is to put the
-# value first, then the category enclosed in parentheses, it is isn't empty:
+# value first, then the category enclosed in parentheses, it isn't empty:
 # '{value} {category:|(|)}'
 # Examples: The first three examples assume that the second tweak
 # has not been changed.
@ -471,3 +471,14 @@ unified_title_toolbar_on_osx = False
 # this to False you can prevent calibre from saving the original file.
 save_original_format = True

+#: Number of recently viewed books to show
+# Right-clicking the View button shows a list of recently viewed books. Control
+# how many should be shown, here.
+gui_view_history_size = 15
+
+#: When using the 'Tweak Book' action, which format to prefer
+# When tweaking a book that has multiple formats, calibre picks one
+# automatically. By default EPUB is preferred to HTMLZ. If you would like to
+# prefer HTMLZ to EPUB for tweaking, change this to 'htmlz'
+tweak_book_prefer = 'epub'
+
--- a/resources/viewer/referencing.js
+++ b/resources/viewer/referencing.js
@ -4,10 +4,9 @@
 * License: GNU GPL v3
 */

-
-
 var reference_old_bgcol = "transparent";
 var reference_prefix = "1.";
+var reference_last_highlighted_para = null;

 function show_reference_panel(ref) {
    panel = $("#calibre_reference_panel");
@ -24,6 +23,7 @@ function toggle_reference(e) {
    p = $(this);
    if (e.type == "mouseenter") {
        reference_old_bgcol = p.css("background-color");
+        reference_last_highlighted_para = p;
        p.css({backgroundColor:"beige"});
        var i = 0;
        var paras = $("p");
@ -34,6 +34,7 @@ function toggle_reference(e) {
    } else {
        p.css({backgroundColor:reference_old_bgcol});
        panel = $("#calibre_reference_panel").hide();
+        reference_last_highlighted_para = null;
    }
    return false;
 }
@ -44,6 +45,10 @@ function enter_reference_mode() {

 function leave_reference_mode() {
    $("p").unbind("mouseenter mouseleave", toggle_reference);
+    panel = $("#calibre_reference_panel");
+    if (panel.length > 0) panel.hide();
+    if (reference_last_highlighted_para != null) 
+        reference_last_highlighted_para.css({backgroundColor:reference_old_bgcol});
 }

 function goto_reference(ref) {
--- a/setup/check.py
+++ b/setup/check.py
@ -63,11 +63,14 @@ class Check(Command):
            for f in x[-1]:
                y = self.j(x[0], f)
                mtime = os.stat(y).st_mtime
-                if (f.endswith('.py') and f not in ('ptempfile.py', 'feedparser.py',
+                if cache.get(y, 0) == mtime:
+                    continue
+                if (f.endswith('.py') and f not in ('feedparser.py',
                    'pyparsing.py', 'markdown.py') and
-                    'genshi' not in y and cache.get(y, 0) != mtime and
                    'prs500/driver.py' not in y):
                        yield y, mtime
+                if f.endswith('.coffee'):
+                    yield y, mtime

        for x in os.walk(self.j(self.d(self.SRC), 'recipes')):
            for f in x[-1]:
@ -84,9 +87,20 @@ class Check(Command):
        builtins = list(set_builtins(self.BUILTINS))
        for f, mtime in self.get_files(cache):
            self.info('\tChecking', f)
-            w = check_for_python_errors(open(f, 'rb').read(), f)
-            if w:
-                self.report_errors(w)
+            errors = False
+            ext = os.path.splitext(f)[1]
+            if ext in {'.py', '.recipe'}:
+                w = check_for_python_errors(open(f, 'rb').read(), f)
+                if w:
+                    errors = True
+                    self.report_errors(w)
+            else:
+                try:
+                    subprocess.check_call(['coffee', '-c', '-p', f],
+                            stdout=open(os.devnull, 'wb'))
+                except:
+                    errors = True
+            if errors:
                cPickle.dump(cache, open(self.CACHE, 'wb'), -1)
                subprocess.call(['gvim', '-f', f])
                raise SystemExit(1)
--- a/setup/commands.py
+++ b/setup/commands.py
@ -16,8 +16,8 @@ __all__ = [
        'sdist',
        'manual', 'tag_release',
        'pypi_register', 'pypi_upload', 'upload_to_server',
-        'upload_user_manual', 'upload_to_mobileread', 'upload_demo',
-        'upload_to_sourceforge', 'upload_to_google_code', 'reupload',
+        'upload_installers',
+        'upload_user_manual', 'upload_demo', 'reupload',
        'linux32', 'linux64', 'linux', 'linux_freeze',
        'osx32_freeze', 'osx', 'rsync', 'push',
        'win32_freeze', 'win32', 'win',
@ -65,14 +65,12 @@ stage4 = Stage4()
 stage5 = Stage5()
 publish = Publish()

-from setup.upload import UploadUserManual, UploadInstallers, UploadDemo, \
-        UploadToServer, UploadToSourceForge, UploadToGoogleCode, ReUpload
+from setup.upload import (UploadUserManual, UploadDemo, UploadInstallers,
+        UploadToServer, ReUpload)
 upload_user_manual = UploadUserManual()
-upload_to_mobileread = UploadInstallers()
 upload_demo = UploadDemo()
 upload_to_server = UploadToServer()
-upload_to_sourceforge = UploadToSourceForge()
-upload_to_google_code = UploadToGoogleCode()
+upload_installers = UploadInstallers()
 reupload = ReUpload()

 from setup.installer import Rsync, Push
--- a/setup/hosting.py
+++ b/setup/hosting.py
@ -0,0 +1,462 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, time, sys, traceback, subprocess, urllib2, re, base64, httplib
+from argparse import ArgumentParser, FileType
+from subprocess import check_call
+from tempfile import NamedTemporaryFile#, mkdtemp
+from collections import OrderedDict
+
+import mechanize
+from lxml import html
+
+def login_to_google(username, password):
+    br = mechanize.Browser()
+    br.addheaders = [('User-agent',
+        'Mozilla/5.0 (X11; Linux x86_64; rv:9.0) Gecko/20100101 Firefox/9.0')]
+    br.set_handle_robots(False)
+    br.open('https://accounts.google.com/ServiceLogin?service=code')
+    br.select_form(nr=0)
+    br.form['Email'] = username
+    br.form['Passwd'] = password
+    raw = br.submit().read()
+    if b'<title>Account overview - Account Settings</title>' not in raw:
+        raise ValueError(('Failed to login to google with credentials: %s %s'
+            '\nGoogle sometimes requires verification when logging in from a '
+            'new IP address. Use lynx to login and supply the verification.')
+                %(username, password))
+    return br
+
+class ReadFileWithProgressReporting(file): # {{{
+
+    def __init__(self, path, mode='rb'):
+        file.__init__(self, path, mode)
+        self.seek(0, os.SEEK_END)
+        self._total = self.tell()
+        self.seek(0)
+        self.start_time = time.time()
+
+    def __len__(self):
+        return self._total
+
+    def read(self, size):
+        data = file.read(self, size)
+        if data:
+            self.report_progress(len(data))
+        return data
+
+    def report_progress(self, size):
+        sys.stdout.write(b'\x1b[s')
+        sys.stdout.write(b'\x1b[K')
+        frac = float(self.tell())/self._total
+        mb_pos = self.tell()/float(1024**2)
+        mb_tot = self._total/float(1024**2)
+        kb_pos = self.tell()/1024.0
+        kb_rate = kb_pos/(time.time()-self.start_time)
+        bit_rate = kb_rate * 1024
+        eta = int((self._total - self.tell())/bit_rate) + 1
+        eta_m, eta_s = eta / 60, eta % 60
+        sys.stdout.write(
+            '  %.1f%%   %.1f/%.1fMB %.1f KB/sec    %d minutes, %d seconds left'%(
+                frac*100, mb_pos, mb_tot, kb_rate, eta_m, eta_s))
+        sys.stdout.write(b'\x1b[u')
+        if self.tell() >= self._total:
+            sys.stdout.write('\n')
+            t = int(time.time() - self.start_time) + 1
+            print ('Upload took %d minutes and %d seconds at %.1f KB/sec' % (
+                t/60, t%60, kb_rate))
+        sys.stdout.flush()
+# }}}
+
+class Base(object):  # {{{
+
+    def __init__(self):
+        self.d = os.path.dirname
+        self.j = os.path.join
+        self.a = os.path.abspath
+        self.b = os.path.basename
+        self.s = os.path.splitext
+        self.e = os.path.exists
+
+    def info(self, *args, **kwargs):
+        print(*args, **kwargs)
+        sys.stdout.flush()
+
+    def warn(self, *args, **kwargs):
+        print('\n'+'_'*20, 'WARNING','_'*20)
+        print(*args, **kwargs)
+        print('_'*50)
+        sys.stdout.flush()
+
+#}}}
+
+class GoogleCode(Base):# {{{
+
+    def __init__(self,
+            # A mapping of filenames to file descriptions. The descriptions are
+            # used to populate the description field for the upload on google
+            # code
+            files,
+
+            # The unix name for the application.
+            appname,
+
+            # The version being uploaded
+            version,
+
+            # Google account username
+            username,
+
+            # Googlecode.com password
+            password,
+
+            # Google account password
+            gmail_password,
+
+            # The name of the google code project we are uploading to
+            gc_project,
+
+            # Server to which to upload the mapping of file names to google
+            # code URLs. If not None, upload is performed via shelling out to
+            # ssh, so you must have ssh-agent setup with the authenticated key
+            # and ssh agent forwarding enabled
+            gpaths_server=None,
+            # The path on gpaths_server to which to upload the mapping data
+            gpaths=None,
+
+            # If True, files are replaced, otherwise existing files are skipped
+            reupload=False,
+
+            # The pattern to match filenames for the files being uploaded and
+            # extract version information from them. Must have a named group
+            # named version
+            filename_pattern=r'{appname}-(?:portable-)?(?P<version>.+?)(?:-(?:i686|x86_64|32bit|64bit))?\.(?:zip|exe|msi|dmg|tar\.bz2|tar\.xz|txz|tbz2)'
+
+            ):
+        self.username, self.password, = username, password
+        self.gmail_password, self.gc_project = gmail_password, gc_project
+        self.reupload, self.files, self.version = reupload, files, version
+        self.gpaths, self.gpaths_server = gpaths, gpaths_server
+
+        self.upload_host = '%s.googlecode.com'%gc_project
+        self.files_list = 'http://code.google.com/p/%s/downloads/list'%gc_project
+        self.delete_url = 'http://code.google.com/p/%s/downloads/delete?name=%%s'%gc_project
+
+        self.filename_pat = re.compile(filename_pattern.format(appname=appname))
+        for x in self.files:
+            if self.filename_pat.match(os.path.basename(x)) is None:
+                raise ValueError(('The filename %s does not match the '
+                        'filename pattern')%os.path.basename(x))
+
+    def upload_one(self, fname, retries=2):
+        self.info('\nUploading', fname)
+        typ = 'Type-' + ('Source' if fname.endswith('.xz') else 'Archive' if
+                fname.endswith('.zip') else 'Installer')
+        ext = os.path.splitext(fname)[1][1:]
+        op  = 'OpSys-'+{'msi':'Windows','zip':'Windows',
+                'dmg':'OSX','bz2':'Linux','xz':'All'}[ext]
+        desc = self.files[fname]
+        start = time.time()
+        for i in range(retries):
+            try:
+                path = self.upload(os.path.abspath(fname), desc,
+                    labels=[typ, op, 'Featured'], retry=100)
+            except KeyboardInterrupt:
+                raise SystemExit(1)
+            except:
+                traceback.print_exc()
+                print ('\nUpload failed, trying again in 30 secs.',
+                        '%d retries left.'%(retries-1))
+                time.sleep(30)
+            else:
+                break
+        self.info('Uploaded to:', path, 'in', int(time.time() - start),
+                'seconds')
+        return path
+
+    def re_upload(self):
+        fnames = {os.path.basename(x):x for x in self.files}
+        existing = self.old_files.intersection(set(fnames))
+        br = self.login_to_google()
+        for x, src in fnames.iteritems():
+            if not os.access(src, os.R_OK):
+                continue
+            if x in existing:
+                self.info('Deleting', x)
+                br.open(self.delete_url%x)
+                br.select_form(predicate=lambda y: 'delete.do' in y.action)
+                br.form.find_control(name='delete')
+                br.submit(name='delete')
+            self.upload_one(src)
+
+    def __call__(self):
+        self.paths = {}
+        self.old_files = self.get_old_files()
+        if self.reupload:
+            return self.re_upload()
+
+        for fname in self.files:
+            bname = os.path.basename(fname)
+            if bname in self.old_files:
+                path = 'http://%s.googlecode.com/files/%s'%(self.gc_project,
+                        bname)
+                self.info(
+                    '%s already uploaded, skipping. Assuming URL is: %s'%(
+                        bname, path))
+                self.old_files.remove(bname)
+            else:
+                path = self.upload_one(fname)
+            self.paths[bname] = path
+        self.info('Updating path map')
+        for k, v in self.paths.iteritems():
+            self.info('\t%s => %s'%(k, v))
+        if self.gpaths and self.gpaths_server:
+            raw = subprocess.Popen(['ssh', self.gpaths_server, 'cat', self.gpaths],
+                    stdout=subprocess.PIPE).stdout.read()
+            paths = eval(raw) if raw else {}
+            paths.update(self.paths)
+            rem = [x for x in paths if self.version not in x]
+            for x in rem: paths.pop(x)
+            raw = ['%r : %r,'%(k, v) for k, v in paths.items()]
+            raw = '{\n\n%s\n\n}\n'%('\n'.join(raw))
+            with NamedTemporaryFile() as t:
+                t.write(raw)
+                t.flush()
+                check_call(['scp', t.name, '%s:%s'%(self.gpaths_server,
+                    self.gpaths)])
+        if self.old_files:
+            self.br = self.login_to_google()
+            self.delete_old_files()
+
+    def login_to_google(self):
+        self.info('Logging into Google')
+        return login_to_google(self.username, self.gmail_password)
+
+    def get_files_hosted_by_google_code(self):
+        self.info('Getting existing files in google code:', self.gc_project)
+        raw = urllib2.urlopen(self.files_list).read()
+        root = html.fromstring(raw)
+        ans = {}
+        for a in root.xpath('//td[@class="vt id col_0"]/a[@href]'):
+            ans[a.text.strip()] = a.get('href')
+        return ans
+
+    def get_old_files(self):
+        ans = set()
+        for fname in self.get_files_hosted_by_google_code():
+            m = self.filename_pat.match(fname)
+            if m is not None:
+                ans.add(fname)
+        return ans
+
+    def delete_old_files(self):
+        if not self.old_files:
+            return
+        self.info('Deleting old files from Google Code...')
+        for fname in self.old_files:
+            self.info('\tDeleting', fname)
+            self.br.open(self.delete_url%fname)
+            self.br.select_form(predicate=lambda x: 'delete.do' in x.action)
+            self.br.form.find_control(name='delete')
+            self.br.submit(name='delete')
+
+    def encode_upload_request(self, fields, file_path):
+        BOUNDARY = '----------Googlecode_boundary_reindeer_flotilla'
+
+        body = []
+
+        # Add the metadata about the upload first
+        for key, value in fields:
+            body.extend(
+            ['--' + BOUNDARY,
+            'Content-Disposition: form-data; name="%s"' % key,
+            '',
+            value,
+            ])
+
+        # Now add the file itself
+        file_name = os.path.basename(file_path)
+        with open(file_path, 'rb') as f:
+            file_content = f.read()
+
+        body.extend(
+            ['--' + BOUNDARY,
+            'Content-Disposition: form-data; name="filename"; filename="%s"'
+            % file_name,
+            # The upload server determines the mime-type, no need to set it.
+            'Content-Type: application/octet-stream',
+            '',
+            file_content,
+            ])
+
+        # Finalize the form body
+        body.extend(['--' + BOUNDARY + '--', ''])
+        body = [x.encode('ascii') if isinstance(x, unicode) else x for x in
+                body]
+
+        return ('multipart/form-data; boundary=%s' % BOUNDARY,
+                b'\r\n'.join(body))
+
+    def upload(self, fname, desc, labels=[], retry=0):
+        form_fields = [('summary', desc)]
+        form_fields.extend([('label', l.strip()) for l in labels])
+
+        content_type, body = self.encode_upload_request(form_fields, fname)
+        upload_uri = '/files'
+        auth_token = base64.b64encode('%s:%s'% (self.username, self.password))
+        headers = {
+            'Authorization': 'Basic %s' % auth_token,
+            'User-Agent': 'googlecode.com uploader v1',
+            'Content-Type': content_type,
+            }
+
+        with NamedTemporaryFile(delete=False) as f:
+            f.write(body)
+
+        try:
+            body = ReadFileWithProgressReporting(f.name)
+            server = httplib.HTTPSConnection(self.upload_host)
+            server.request('POST', upload_uri, body, headers)
+            resp = server.getresponse()
+            server.close()
+        finally:
+            os.remove(f.name)
+
+        if resp.status == 201:
+            return resp.getheader('Location')
+
+        print ('Failed to upload with code %d and reason: %s'%(resp.status,
+                resp.reason))
+        if retry < 1:
+            print ('Retrying in 5 seconds....')
+            time.sleep(5)
+            return self.upload(fname, desc, labels=labels, retry=retry+1)
+        raise Exception('Failed to upload '+fname)
+
+
+# }}}
+
+class SourceForge(Base): # {{{
+
+    # Note that you should manually ssh once to username,project@frs.sourceforge.net
+    # on the staging server so that the host key is setup
+
+    def __init__(self, files, project, version, username, replace=False):
+        self.username, self.project, self.version = username, project, version
+        self.base = '/home/frs/project/c/ca/'+project
+        self.rdir = self.base + '/' + version
+        self.files = files
+
+    def __call__(self):
+        for x in self.files:
+            start = time.time()
+            self.info('Uploading', x)
+            for i in range(5):
+                try:
+                    check_call(['rsync', '-h', '-z', '--progress', '-e', 'ssh -x', x,
+                    '%s,%s@frs.sourceforge.net:%s'%(self.username, self.project,
+                        self.rdir+'/')])
+                except KeyboardInterrupt:
+                    raise SystemExit(1)
+                except:
+                    print ('\nUpload failed, trying again in 30 seconds')
+                    time.sleep(30)
+                else:
+                    break
+            print ('Uploaded in', int(time.time() - start), 'seconds\n\n')
+
+# }}}
+
+# CLI {{{
+def cli_parser():
+    epilog='Copyright Kovid Goyal 2012'
+
+    p = ArgumentParser(
+            description='Upload project files to a hosting service automatically',
+            epilog=epilog
+            )
+    a = p.add_argument
+    a('appname', help='The name of the application, all files to'
+            ' upload should begin with this name')
+    a('version', help='The version of the application, all files to'
+            ' upload should contain this version')
+    a('file_map', type=FileType('rb'),
+            help='A file containing a mapping of files to be uploaded to '
+            'descriptions of the files. The descriptions will be visible '
+            'to users trying to get the file from the hosting service. '
+            'The format of the file is filename: description, with one per '
+            'line. filename can be a path to the file relative to the current '
+            'directory.')
+    a('--replace', action='store_true', default=False,
+            help='If specified, existing files are replaced, otherwise '
+            'they are skipped.')
+
+    subparsers = p.add_subparsers(help='Where to upload to', dest='service',
+            title='Service', description='Hosting service to upload to')
+    gc = subparsers.add_parser('googlecode', help='Upload to googlecode',
+            epilog=epilog)
+    sf = subparsers.add_parser('sourceforge', help='Upload to sourceforge',
+            epilog=epilog)
+    a = gc.add_argument
+
+    a('project',
+            help='The name of the project on google code we are uploading to')
+    a('username',
+            help='Username to log into your google account')
+    a('password',
+            help='Password to log into your google account')
+    a('gc_password',
+            help='Password for google code hosting.'
+            ' Get it from http://code.google.com/hosting/settings')
+
+    a('--path-map-server',
+            help='A server to which the mapping of filenames to googlecode '
+            'URLs will be uploaded. The upload happens via ssh, so you must '
+            'have a working ssh agent')
+    a('--path-map-location',
+            help='Path on the server where the path map is placed.')
+
+    a = sf.add_argument
+    a('project',
+            help='The name of the project on sourceforge we are uploading to')
+    a('username',
+            help='Sourceforge username')
+
+    return p
+
+def main(args=None):
+    cli = cli_parser()
+    args = cli.parse_args(args)
+    files = {}
+    with args.file_map as f:
+        for line in f:
+            fname, _, desc = line.partition(':')
+            fname, desc = fname.strip(), desc.strip()
+            if fname and desc:
+                files[fname] = desc
+
+    ofiles = OrderedDict()
+    for x in sorted(files, key=lambda x:os.stat(x).st_size, reverse=True):
+        ofiles[x] = files[x]
+
+    if args.service == 'googlecode':
+        gc = GoogleCode(ofiles, args.appname, args.version, args.username,
+                args.gc_password, args.password, args.project,
+                gpaths_server=args.path_map_server,
+                gpaths=args.path_map_location, reupload=args.replace)
+        gc()
+    elif args.service == 'sourceforge':
+        sf = SourceForge(ofiles, args.project, args.version, args.username,
+                replace=args.replace)
+        sf()
+
+if __name__ == '__main__':
+    main()
+# }}}
+
--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -48,7 +48,7 @@ class Push(Command):
        threads = []
        for host in (
            r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
-            'kovid@ox:calibre',
+            'kovid@leopard_test:calibre',
            r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
            ):
            rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -288,6 +288,7 @@ class LinuxFreeze(Command):
                path=`readlink -f $0`
                base=`dirname $path`
                lib=$base/lib
+                export QT_ACCESSIBILITY=0 # qt-at-spi causes crashes and performance issues in various distros, so disable it
                export LD_LIBRARY_PATH=$lib:$LD_LIBRARY_PATH
                export MAGICK_HOME=$base
                export MAGICK_CONFIGURE_PATH=$lib/{1}/config
--- a/setup/iso_639/bs.po
+++ b/setup/iso_639/bs.po
--- a/setup/iso_639/en_GB.po
+++ b/setup/iso_639/en_GB.po
--- a/setup/iso_639/fr.po
+++ b/setup/iso_639/fr.po
@ -13,14 +13,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-09-27 17:36+0000\n"
-"Last-Translator: Christian Perrier <bubulle@debian.org>\n"
+"PO-Revision-Date: 2012-01-01 08:56+0000\n"
+"Last-Translator: sengian <Unknown>\n"
 "Language-Team: French <debian-l10n-french@lists.debian.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-11-26 05:14+0000\n"
-"X-Generator: Launchpad (build 14381)\n"
+"X-Launchpad-Export-Date: 2012-01-02 05:00+0000\n"
+"X-Generator: Launchpad (build 14560)\n"
 "Language: fr\n"

 #. name for aaa
@ -17961,7 +17961,7 @@ msgstr "ndoola"

 #. name for nds
 msgid "German; Low"
-msgstr ""
+msgstr "Allemand; Bas"

 #. name for ndt
 msgid "Ndunga"
--- a/setup/iso_639/ro.po
+++ b/setup/iso_639/ro.po
@ -9,32 +9,32 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-08-27 05:33+0000\n"
-"Last-Translator: Alastair McKinstry <Unknown>\n"
+"PO-Revision-Date: 2011-12-30 20:15+0000\n"
+"Last-Translator: iulargsb <Unknown>\n"
 "Language-Team: Romanian <gnomero-list@lists.sourceforge.net>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-11-26 05:35+0000\n"
-"X-Generator: Launchpad (build 14381)\n"
+"X-Launchpad-Export-Date: 2011-12-31 05:54+0000\n"
+"X-Generator: Launchpad (build 14560)\n"
 "Language: ro\n"
 "PO-Creation-Date: 2000-09-24 15:45+0300\n"

 #. name for aaa
 msgid "Ghotuo"
-msgstr ""
+msgstr "Ghotuo"

 #. name for aab
 msgid "Alumu-Tesu"
-msgstr ""
+msgstr "Alumu-Tesu"

 #. name for aac
 msgid "Ari"
-msgstr ""
+msgstr "Ari"

 #. name for aad
 msgid "Amal"
-msgstr ""
+msgstr "Amal"

 #. name for aae
 msgid "Albanian; Arbëreshë"
@ -42,11 +42,11 @@ msgstr ""

 #. name for aaf
 msgid "Aranadan"
-msgstr ""
+msgstr "Aranadan"

 #. name for aag
 msgid "Ambrak"
-msgstr ""
+msgstr "Ambrak"

 #. name for aah
 msgid "Arapesh; Abu'"
@ -54,31 +54,31 @@ msgstr ""

 #. name for aai
 msgid "Arifama-Miniafia"
-msgstr ""
+msgstr "Arifama-Miniafia"

 #. name for aak
 msgid "Ankave"
-msgstr ""
+msgstr "Ankave"

 #. name for aal
 msgid "Afade"
-msgstr ""
+msgstr "Afade"

 #. name for aam
 msgid "Aramanik"
-msgstr ""
+msgstr "Aramanik"

 #. name for aan
 msgid "Anambé"
-msgstr ""
+msgstr "Anambé"

 #. name for aao
 msgid "Arabic; Algerian Saharan"
-msgstr ""
+msgstr "Arabă; Algeriană Sahara"

 #. name for aap
 msgid "Arára; Pará"
-msgstr ""
+msgstr "Arára; Pará"

 #. name for aaq
 msgid "Abnaki; Eastern"
@ -86,11 +86,11 @@ msgstr ""

 #. name for aar
 msgid "Afar"
-msgstr ""
+msgstr "Afară"

 #. name for aas
 msgid "Aasáx"
-msgstr ""
+msgstr "Aasáx"

 #. name for aat
 msgid "Albanian; Arvanitika"
@ -98,27 +98,27 @@ msgstr ""

 #. name for aau
 msgid "Abau"
-msgstr ""
+msgstr "Abau"

 #. name for aaw
 msgid "Solong"
-msgstr ""
+msgstr "Solong"

 #. name for aax
 msgid "Mandobo Atas"
-msgstr ""
+msgstr "Mandobo Atas"

 #. name for aaz
 msgid "Amarasi"
-msgstr ""
+msgstr "Amarasi"

 #. name for aba
 msgid "Abé"
-msgstr ""
+msgstr "Abé"

 #. name for abb
 msgid "Bankon"
-msgstr ""
+msgstr "Bankon"

 #. name for abc
 msgid "Ayta; Ambala"
@ -134,43 +134,43 @@ msgstr ""

 #. name for abf
 msgid "Abai Sungai"
-msgstr ""
+msgstr "Abai Sungai"

 #. name for abg
 msgid "Abaga"
-msgstr ""
+msgstr "Abaga"

 #. name for abh
 msgid "Arabic; Tajiki"
-msgstr ""
+msgstr "Arabă; Tajikistan"

 #. name for abi
 msgid "Abidji"
-msgstr ""
+msgstr "Abidji"

 #. name for abj
 msgid "Aka-Bea"
-msgstr ""
+msgstr "Aka-Bea"

 #. name for abk
 msgid "Abkhazian"
-msgstr ""
+msgstr "Abhază"

 #. name for abl
 msgid "Lampung Nyo"
-msgstr ""
+msgstr "Lampung Nyo"

 #. name for abm
 msgid "Abanyom"
-msgstr ""
+msgstr "Abanyom"

 #. name for abn
 msgid "Abua"
-msgstr ""
+msgstr "Abua"

 #. name for abo
 msgid "Abon"
-msgstr ""
+msgstr "Abon"

 #. name for abp
 msgid "Ayta; Abellen"
@ -178,23 +178,23 @@ msgstr ""

 #. name for abq
 msgid "Abaza"
-msgstr ""
+msgstr "Abaza"

 #. name for abr
 msgid "Abron"
-msgstr ""
+msgstr "Abron"

 #. name for abs
 msgid "Malay; Ambonese"
-msgstr ""
+msgstr "Malaezia; Amboneză"

 #. name for abt
 msgid "Ambulas"
-msgstr ""
+msgstr "Ambulas"

 #. name for abu
 msgid "Abure"
-msgstr ""
+msgstr "Abure"

 #. name for abv
 msgid "Arabic; Baharna"
@ -202,31 +202,31 @@ msgstr ""

 #. name for abw
 msgid "Pal"
-msgstr ""
+msgstr "Pal"

 #. name for abx
 msgid "Inabaknon"
-msgstr ""
+msgstr "Inabaknon"

 #. name for aby
 msgid "Aneme Wake"
-msgstr ""
+msgstr "Aneme Wake"

 #. name for abz
 msgid "Abui"
-msgstr ""
+msgstr "Abui"

 #. name for aca
 msgid "Achagua"
-msgstr ""
+msgstr "Achagua"

 #. name for acb
 msgid "Áncá"
-msgstr ""
+msgstr "Áncá"

 #. name for acd
 msgid "Gikyode"
-msgstr ""
+msgstr "Gikyode"

 #. name for ace
 msgid "Achinese"
@ -238,27 +238,27 @@ msgstr ""

 #. name for ach
 msgid "Acoli"
-msgstr ""
+msgstr "Acoli"

 #. name for aci
 msgid "Aka-Cari"
-msgstr ""
+msgstr "Aka-Cari"

 #. name for ack
 msgid "Aka-Kora"
-msgstr ""
+msgstr "Aka-Kora"

 #. name for acl
 msgid "Akar-Bale"
-msgstr ""
+msgstr "Akar-Bale"

 #. name for acm
 msgid "Arabic; Mesopotamian"
-msgstr ""
+msgstr "Arabă; Mesopotamia"

 #. name for acn
 msgid "Achang"
-msgstr ""
+msgstr "Achang"

 #. name for acp
 msgid "Acipa; Eastern"
@ -266,59 +266,59 @@ msgstr ""

 #. name for acq
 msgid "Arabic; Ta'izzi-Adeni"
-msgstr ""
+msgstr "Arabă; Ta'izzi-Adeni"

 #. name for acr
 msgid "Achi"
-msgstr ""
+msgstr "Achi"

 #. name for acs
 msgid "Acroá"
-msgstr ""
+msgstr "Acroá"

 #. name for act
 msgid "Achterhoeks"
-msgstr ""
+msgstr "Achterhoeks"

 #. name for acu
 msgid "Achuar-Shiwiar"
-msgstr ""
+msgstr "Achuar-Shiwiar"

 #. name for acv
 msgid "Achumawi"
-msgstr ""
+msgstr "Achumawi"

 #. name for acw
 msgid "Arabic; Hijazi"
-msgstr ""
+msgstr "Arabă; Hijazi"

 #. name for acx
 msgid "Arabic; Omani"
-msgstr ""
+msgstr "Arabă; Oman"

 #. name for acy
 msgid "Arabic; Cypriot"
-msgstr ""
+msgstr "Arabă; Cipru"

 #. name for acz
 msgid "Acheron"
-msgstr ""
+msgstr "Acheron"

 #. name for ada
 msgid "Adangme"
-msgstr ""
+msgstr "Adangme"

 #. name for adb
 msgid "Adabe"
-msgstr ""
+msgstr "Adabe"

 #. name for add
 msgid "Dzodinka"
-msgstr ""
+msgstr "Dzodinka"

 #. name for ade
 msgid "Adele"
-msgstr ""
+msgstr "Adele"

 #. name for adf
 msgid "Arabic; Dhofari"
@ -326,87 +326,87 @@ msgstr ""

 #. name for adg
 msgid "Andegerebinha"
-msgstr ""
+msgstr "Andegerebinha"

 #. name for adh
 msgid "Adhola"
-msgstr ""
+msgstr "Adhola"

 #. name for adi
 msgid "Adi"
-msgstr ""
+msgstr "Adi"

 #. name for adj
 msgid "Adioukrou"
-msgstr ""
+msgstr "Adioukrou"

 #. name for adl
 msgid "Galo"
-msgstr ""
+msgstr "Galo"

 #. name for adn
 msgid "Adang"
-msgstr ""
+msgstr "Adang"

 #. name for ado
 msgid "Abu"
-msgstr ""
+msgstr "Abu"

 #. name for adp
 msgid "Adap"
-msgstr ""
+msgstr "Adap"

 #. name for adq
 msgid "Adangbe"
-msgstr ""
+msgstr "Adangbe"

 #. name for adr
 msgid "Adonara"
-msgstr ""
+msgstr "Adonara"

 #. name for ads
 msgid "Adamorobe Sign Language"
-msgstr ""
+msgstr "Limbajul de semne Adamorobe"

 #. name for adt
 msgid "Adnyamathanha"
-msgstr ""
+msgstr "Adnyamathanha"

 #. name for adu
 msgid "Aduge"
-msgstr ""
+msgstr "Aduge"

 #. name for adw
 msgid "Amundava"
-msgstr ""
+msgstr "Amundava"

 #. name for adx
 msgid "Tibetan; Amdo"
-msgstr ""
+msgstr "Tibetană; Amdo"

 #. name for ady
 msgid "Adyghe"
-msgstr ""
+msgstr "Adyghe"

 #. name for adz
 msgid "Adzera"
-msgstr ""
+msgstr "Adzera"

 #. name for aea
 msgid "Areba"
-msgstr ""
+msgstr "Areba"

 #. name for aeb
 msgid "Arabic; Tunisian"
-msgstr ""
+msgstr "Arabă; Tunisia"

 #. name for aec
 msgid "Arabic; Saidi"
-msgstr ""
+msgstr "Arabă; Saidi"

 #. name for aed
 msgid "Argentine Sign Language"
-msgstr ""
+msgstr "Limbajul de semne din Argentina"

 #. name for aee
 msgid "Pashayi; Northeast"
@ -414,23 +414,23 @@ msgstr ""

 #. name for aek
 msgid "Haeke"
-msgstr ""
+msgstr "Haeke"

 #. name for ael
 msgid "Ambele"
-msgstr ""
+msgstr "Ambele"

 #. name for aem
 msgid "Arem"
-msgstr ""
+msgstr "Arem"

 #. name for aen
 msgid "Armenian Sign Language"
-msgstr ""
+msgstr "Limbajul de semne armenian"

 #. name for aeq
 msgid "Aer"
-msgstr ""
+msgstr "Aer"

 #. name for aer
 msgid "Arrernte; Eastern"
@ -438,63 +438,63 @@ msgstr ""

 #. name for aes
 msgid "Alsea"
-msgstr ""
+msgstr "Alsea"

 #. name for aeu
 msgid "Akeu"
-msgstr ""
+msgstr "Akeu"

 #. name for aew
 msgid "Ambakich"
-msgstr ""
+msgstr "Ambakich"

 #. name for aey
 msgid "Amele"
-msgstr ""
+msgstr "Amele"

 #. name for aez
 msgid "Aeka"
-msgstr ""
+msgstr "Aeka"

 #. name for afb
 msgid "Arabic; Gulf"
-msgstr ""
+msgstr "Arabă; Golf"

 #. name for afd
 msgid "Andai"
-msgstr ""
+msgstr "Andai"

 #. name for afe
 msgid "Putukwam"
-msgstr ""
+msgstr "Putukwam"

 #. name for afg
 msgid "Afghan Sign Language"
-msgstr ""
+msgstr "Limbajul de semne afgan"

 #. name for afh
 msgid "Afrihili"
-msgstr ""
+msgstr "Afrihili"

 #. name for afi
 msgid "Akrukay"
-msgstr ""
+msgstr "Akrukay"

 #. name for afk
 msgid "Nanubae"
-msgstr ""
+msgstr "Nanubae"

 #. name for afn
 msgid "Defaka"
-msgstr ""
+msgstr "Defaka"

 #. name for afo
 msgid "Eloyi"
-msgstr ""
+msgstr "Eloyi"

 #. name for afp
 msgid "Tapei"
-msgstr ""
+msgstr "Tapei"

 #. name for afr
 msgid "Afrikaans"
@ -506,55 +506,55 @@ msgstr ""

 #. name for aft
 msgid "Afitti"
-msgstr ""
+msgstr "Afitti"

 #. name for afu
 msgid "Awutu"
-msgstr ""
+msgstr "Awutu"

 #. name for afz
 msgid "Obokuitai"
-msgstr ""
+msgstr "Obokuitai"

 #. name for aga
 msgid "Aguano"
-msgstr ""
+msgstr "Aguano"

 #. name for agb
 msgid "Legbo"
-msgstr ""
+msgstr "Legbo"

 #. name for agc
 msgid "Agatu"
-msgstr ""
+msgstr "Agatu"

 #. name for agd
 msgid "Agarabi"
-msgstr ""
+msgstr "Agarabi"

 #. name for age
 msgid "Angal"
-msgstr ""
+msgstr "Angal"

 #. name for agf
 msgid "Arguni"
-msgstr ""
+msgstr "Arguni"

 #. name for agg
 msgid "Angor"
-msgstr ""
+msgstr "Angor"

 #. name for agh
 msgid "Ngelima"
-msgstr ""
+msgstr "Ngelima"

 #. name for agi
 msgid "Agariya"
-msgstr ""
+msgstr "Agariya"

 #. name for agj
 msgid "Argobba"
-msgstr ""
+msgstr "Argobba"

 #. name for agk
 msgid "Agta; Isarog"
@ -562,27 +562,27 @@ msgstr ""

 #. name for agl
 msgid "Fembe"
-msgstr ""
+msgstr "Fembe"

 #. name for agm
 msgid "Angaataha"
-msgstr ""
+msgstr "Angaataha"

 #. name for agn
 msgid "Agutaynen"
-msgstr ""
+msgstr "Agutaynen"

 #. name for ago
 msgid "Tainae"
-msgstr ""
+msgstr "Tainae"

 #. name for agq
 msgid "Aghem"
-msgstr ""
+msgstr "Aghem"

 #. name for agr
 msgid "Aguaruna"
-msgstr ""
+msgstr "Aguaruna"

 #. name for ags
 msgid "Esimbi"
--- a/setup/publish.py
+++ b/setup/publish.py
@ -45,7 +45,7 @@ class Stage3(Command):
 class Stage4(Command):

    description = 'Stage 4 of the publish process'
-    sub_commands = ['upload_to_sourceforge', 'upload_to_google_code']
+    sub_commands = ['upload_installers']

 class Stage5(Command):

--- a/setup/resources.py
+++ b/setup/resources.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, cPickle, re, shutil, marshal, zipfile, glob, subprocess, time
+import os, cPickle, re, shutil, marshal, zipfile, glob, time
 from zlib import compress

 from setup import Command, basenames, __appname__
@ -35,6 +35,8 @@ class Coffee(Command): # {{{
                help='Display the generated javascript')

    def run(self, opts):
+        from calibre.utils.coffeescript import compile_coffeescript
+        self.compiler = compile_coffeescript
        self.do_coffee_compile(opts)
        if opts.watch:
            try:
@ -61,20 +63,24 @@ class Coffee(Command): # {{{
                if self.newer(js, x):
                    print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
                        timestamp else '', os.path.basename(x)))
-                    try:
-                        subprocess.check_call(['coffee', '-c', '-o', dest, x])
-                    except:
+                    with open(x, 'rb') as f:
+                        cs, errs = self.compiler(f.read())
+                    for line in errs:
+                        print (line)
+                    if cs and not errs:
+                        with open(js, 'wb') as f:
+                            f.write(cs.encode('utf-8'))
+                        if opts.show_js:
+                            self.show_js(js)
+                            print ('#'*80)
+                            print ('#'*80)
+                    else:
                        print ('\n\tCompilation of %s failed'%os.path.basename(x))
                        if ignore_errors:
                            with open(js, 'wb') as f:
                                f.write('# Compilation from coffeescript failed')
                        else:
                            raise SystemExit(1)
-                    else:
-                        if opts.show_js:
-                            self.show_js(js)
-                            print ('#'*80)
-                            print ('#'*80)

    def clean(self):
        for toplevel, dest in self.COFFEE_DIRS.iteritems():
--- a/setup/upload.py
+++ b/setup/upload.py
@ -5,12 +5,15 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, \
-    glob, stat, sys
+import os, re, subprocess, hashlib, shutil, glob, stat, sys, time
 from subprocess import check_call
 from tempfile import NamedTemporaryFile, mkdtemp
 from zipfile import ZipFile

+if __name__ == '__main__':
+    d = os.path.dirname
+    sys.path.insert(0, d(d(os.path.abspath(__file__))))
+
 from setup import Command, __version__, installer_name, __appname__

 PREFIX = "/var/www/calibre-ebook.com"
@ -19,8 +22,9 @@ BETAS = DOWNLOADS +'/betas'
 USER_MANUAL = '/var/www/localhost/htdocs/'
 HTML2LRF = "calibre/ebooks/lrf/html/demo"
 TXT2LRF  = "src/calibre/ebooks/lrf/txt/demo"
-MOBILEREAD = 'ftp://dev.mobileread.com/calibre/'
-
+STAGING_HOST = '67.207.135.179'
+STAGING_USER = 'root'
+STAGING_DIR = '/root/staging'

 def installers():
    installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2')))
@ -47,10 +51,10 @@ class ReUpload(Command): # {{{

    description = 'Re-uplaod any installers present in dist/'

-    sub_commands = ['upload_to_google_code', 'upload_to_sourceforge']
+    sub_commands = ['upload_installers']

    def pre_sub_commands(self, opts):
-        opts.re_upload = True
+        opts.replace = True

    def run(self, opts):
        for x in installers():
@ -58,365 +62,98 @@ class ReUpload(Command): # {{{
                os.remove(x)
 # }}}

-class ReadFileWithProgressReporting(file): # {{{
-
-    def __init__(self, path, mode='rb'):
-        file.__init__(self, path, mode)
-        self.seek(0, os.SEEK_END)
-        self._total = self.tell()
-        self.seek(0)
-        self.start_time = time.time()
-
-    def __len__(self):
-        return self._total
-
-    def read(self, size):
-        data = file.read(self, size)
-        if data:
-            self.report_progress(len(data))
-        return data
-
-    def report_progress(self, size):
-        sys.stdout.write(b'\x1b[s')
-        sys.stdout.write(b'\x1b[K')
-        frac = float(self.tell())/self._total
-        mb_pos = self.tell()/float(1024**2)
-        mb_tot = self._total/float(1024**2)
-        kb_pos = self.tell()/1024.0
-        kb_rate = kb_pos/(time.time()-self.start_time)
-        bit_rate = kb_rate * 1024
-        eta = int((self._total - self.tell())/bit_rate) + 1
-        eta_m, eta_s = eta / 60, eta % 60
-        sys.stdout.write(
-            '  %.1f%%   %.1f/%.1fMB %.1f KB/sec    %d minutes, %d seconds left'%(
-                frac*100, mb_pos, mb_tot, kb_rate, eta_m, eta_s))
-        sys.stdout.write(b'\x1b[u')
-        if self.tell() >= self._total:
-            sys.stdout.write('\n')
-            t = int(time.time() - self.start_time) + 1
-            print ('Upload took %d minutes and %d seconds at %.1f KB/sec' % (
-                t/60, t%60, kb_rate))
-        sys.stdout.flush()
-# }}}
-
-class UploadToGoogleCode(Command): # {{{
-
-    USERNAME = 'kovidgoyal'
-    # Password can be gotten by going to
-    # http://code.google.com/hosting/settings
-    # while logged into gmail
+# Data {{{
+def get_google_data():
    PASSWORD_FILE = os.path.expanduser('~/.googlecodecalibre')
    OFFLINEIMAP   = os.path.expanduser('~/work/kde/conf/offlineimap/rc')
-    GPATHS = '/var/www/status.calibre-ebook.com/googlepaths'
-    UPLOAD_HOST = 'calibre-ebook.googlecode.com'
-    FILES_LIST = 'http://code.google.com/p/calibre-ebook/downloads/list'

-    def add_options(self, parser):
-        parser.add_option('--re-upload', default=False, action='store_true',
-                help='Re-upload all installers currently in dist/')
+    gc_password = open(PASSWORD_FILE).read().strip()
+    raw = open(OFFLINEIMAP).read()
+    pw = re.search(r'(?s)remoteuser = .*@gmail.com.*?remotepass = (\S+)',
+            raw).group(1).strip()
+    return {
+        'username':'kovidgoyal@gmail.com', 'password':pw, 'gc_password':gc_password,
+        'path_map_server':'root@kovidgoyal.net',
+        'path_map_location':'/var/www/status.calibre-ebook.com/googlepaths',
+        # If you change this remember to change it in the
+        # status.calibre-ebook.com server as well
+        'project':'calibre-ebook'
+    }

-    def re_upload(self):
-        fnames = set([os.path.basename(x) for x in installers() if not
-                x.endswith('.tar.xz') and os.path.exists(x)])
-        existing = set(self.old_files.keys()).intersection(fnames)
-        br = self.login_to_gmail()
-        for x in fnames:
-            src = os.path.join('dist', x)
-            if not os.access(src, os.R_OK):
-                continue
-            if x in existing:
-                self.info('Deleting', x)
-                br.open('http://code.google.com/p/calibre-ebook/downloads/delete?name=%s'%x)
-                br.select_form(predicate=lambda y: 'delete.do' in y.action)
-                br.form.find_control(name='delete')
-                br.submit(name='delete')
-            self.upload_one(src)
+def get_sourceforge_data():
+    return {'username':'kovidgoyal', 'project':'calibre'}

-    def upload_one(self, fname):
-        self.info('\nUploading', fname)
-        typ = 'Type-' + ('Source' if fname.endswith('.xz') else 'Archive' if
-                fname.endswith('.zip') else 'Installer')
-        ext = os.path.splitext(fname)[1][1:]
-        op  = 'OpSys-'+{'msi':'Windows','zip':'Windows',
-                'dmg':'OSX','bz2':'Linux','xz':'All'}[ext]
-        desc = installer_description(fname)
-        start = time.time()
-        for i in range(5):
-            try:
-                path = self.upload(os.path.abspath(fname), desc,
-                    labels=[typ, op, 'Featured'])
-            except KeyboardInterrupt:
-                raise SystemExit(1)
-            except:
-                import traceback
-                traceback.print_exc()
-                print ('\nUpload failed, trying again in 30 secs')
-                time.sleep(30)
-            else:
-                break
-        self.info('Uploaded to:', path, 'in', int(time.time() - start),
-                'seconds')
-        return path
+def send_data(loc):
+    subprocess.check_call(['rsync', '--inplace', '--delete', '-r', '-z', '-h', '--progress', '-e', 'ssh -x',
+        loc+'/', '%s@%s:%s'%(STAGING_USER, STAGING_HOST, STAGING_DIR)])

-    def run(self, opts):
-        self.opts = opts
-        self.password = open(self.PASSWORD_FILE).read().strip()
-        self.paths = {}
-        self.old_files = self.get_files_hosted_by_google_code()
+def gc_cmdline(ver, gdata):
+    return [__appname__, ver, 'fmap', 'googlecode',
+                gdata['project'], gdata['username'], gdata['password'],
+                gdata['gc_password'], '--path-map-server',
+                gdata['path_map_server'], '--path-map-location',
+                gdata['path_map_location']]

-        if opts.re_upload:
-            return self.re_upload()
+def sf_cmdline(ver, sdata):
+    return [__appname__, ver, 'fmap', 'sourceforge', sdata['project'],
+            sdata['username']]

-        for fname in installers():
-            bname = os.path.basename(fname)
-            if bname in self.old_files:
-                path = 'http://calibre-ebook.googlecode.com/files/'+bname
-                self.info(
-                    '%s already uploaded, skipping. Assuming URL is: %s'%(
-                        bname, path))
-                self.old_files.pop(bname)
-            else:
-                path = self.upload_one(fname)
-            self.paths[bname] = path
-        self.info('Updating path map')
-        self.info(repr(self.paths))
-        raw = subprocess.Popen(['ssh', 'divok', 'cat', self.GPATHS],
-                stdout=subprocess.PIPE).stdout.read()
-        paths = eval(raw)
-        paths.update(self.paths)
-        rem = [x for x in paths if __version__ not in x]
-        for x in rem: paths.pop(x)
-        raw = ['%r : %r,'%(k, v) for k, v in paths.items()]
-        raw = '{\n\n%s\n\n}\n'%('\n'.join(raw))
-        t = NamedTemporaryFile()
-        t.write(raw)
-        t.flush()
-        check_call(['scp', t.name, 'divok:'+self.GPATHS])
-        self.br = self.login_to_gmail()
-        self.delete_old_files()
-        #if len(self.get_files_hosted_by_google_code()) > len(installers()):
-        #    self.warn('Some old files were not deleted from Google Code')
-
-    def login_to_gmail(self):
-        import mechanize
-        self.info('Logging into Gmail')
-        raw = open(self.OFFLINEIMAP).read()
-        pw = re.search(r'(?s)remoteuser = .*@gmail.com.*?remotepass = (\S+)',
-                raw).group(1).strip()
-        br = mechanize.Browser()
-        br.set_handle_robots(False)
-        br.open('http://gmail.com')
-        br.select_form(nr=0)
-        br.form['Email'] = self.USERNAME
-        br.form['Passwd'] = pw
-        br.submit()
-        return br
-
-    def get_files_hosted_by_google_code(self):
-        import urllib2
-        from lxml import html
-        self.info('Getting existing files in google code')
-        raw = urllib2.urlopen(self.FILES_LIST).read()
-        root = html.fromstring(raw)
-        ans = {}
-        for a in root.xpath('//td[@class="vt id col_0"]/a[@href]'):
-            ans[a.text.strip()] = a.get('href')
-        return ans
-
-    def delete_old_files(self):
-        self.info('Deleting old files from Google Code...')
-        for fname in self.old_files:
-            ext = fname.rpartition('.')[-1]
-            if ext in ('flv', 'mp4', 'ogg', 'avi'):
-                continue
-            self.info('\tDeleting', fname)
-            self.br.open('http://code.google.com/p/calibre-ebook/downloads/delete?name=%s'%fname)
-            self.br.select_form(predicate=lambda x: 'delete.do' in x.action)
-            self.br.form.find_control(name='delete')
-            self.br.submit(name='delete')
-
-    def encode_upload_request(self, fields, file_path):
-        BOUNDARY = '----------Googlecode_boundary_reindeer_flotilla'
-        CRLF = '\r\n'
-
-        body = []
-
-        # Add the metadata about the upload first
-        for key, value in fields:
-            body.extend(
-            ['--' + BOUNDARY,
-            'Content-Disposition: form-data; name="%s"' % key,
-            '',
-            value,
-            ])
-
-        # Now add the file itself
-        file_name = os.path.basename(file_path)
-        with open(file_path, 'rb') as f:
-            file_content = f.read()
-
-        body.extend(
-            ['--' + BOUNDARY,
-            'Content-Disposition: form-data; name="filename"; filename="%s"'
-            % file_name,
-            # The upload server determines the mime-type, no need to set it.
-            'Content-Type: application/octet-stream',
-            '',
-            file_content,
-            ])
-
-        # Finalize the form body
-        body.extend(['--' + BOUNDARY + '--', ''])
-
-        return 'multipart/form-data; boundary=%s' % BOUNDARY, CRLF.join(body)
-
-    def upload(self, fname, desc, labels=[], retry=0):
-        form_fields = [('summary', desc)]
-        form_fields.extend([('label', l.strip()) for l in labels])
-
-        content_type, body = self.encode_upload_request(form_fields, fname)
-        upload_uri = '/files'
-        auth_token = base64.b64encode('%s:%s'% (self.USERNAME, self.password))
-        headers = {
-            'Authorization': 'Basic %s' % auth_token,
-            'User-Agent': 'Calibre googlecode.com uploader v0.1.0',
-            'Content-Type': content_type,
-            }
-
-        with NamedTemporaryFile(delete=False) as f:
-            f.write(body)
-
-        try:
-            body = ReadFileWithProgressReporting(f.name)
-            server = httplib.HTTPSConnection(self.UPLOAD_HOST)
-            server.request('POST', upload_uri, body, headers)
-            resp = server.getresponse()
-            server.close()
-        finally:
-            os.remove(f.name)
-
-        if resp.status == 201:
-            return resp.getheader('Location')
-
-        print 'Failed to upload with code %d and reason: %s'%(resp.status,
-                resp.reason)
-        if retry < 1:
-            print 'Retrying in 5 seconds....'
-            time.sleep(5)
-            return self.upload(fname, desc, labels=labels, retry=retry+1)
-        raise Exception('Failed to upload '+fname)
-
-# }}}
-
-class UploadToSourceForge(Command): # {{{
-
-    description = 'Upload release files to sourceforge'
-
-    USERNAME = 'kovidgoyal'
-    PROJECT  = 'calibre'
-    BASE     = '/home/frs/project/c/ca/'+PROJECT
-
-    @property
-    def rdir(self):
-        return self.BASE+'/'+__version__
-
-    def upload_installers(self):
-        for x in installers():
-            if not os.path.exists(x): continue
-            start = time.time()
-            self.info('Uploading', x)
-            for i in range(5):
-                try:
-                    check_call(['rsync', '-z', '--progress', '-e', 'ssh -x', x,
-                    '%s,%s@frs.sourceforge.net:%s'%(self.USERNAME, self.PROJECT,
-                        self.rdir+'/')])
-                except KeyboardInterrupt:
-                    raise SystemExit(1)
-                except:
-                    print ('\nUpload failed, trying again in 30 seconds')
-                    time.sleep(30)
-                else:
-                    break
-            print 'Uploaded in', int(time.time() - start), 'seconds'
-            print ('\n')
-
-    def run(self, opts):
-        self.opts = opts
-        self.upload_installers()
+def run_remote_upload(args):
+    print 'Running remotely:', ' '.join(args)
+    subprocess.check_call(['ssh', '-x', '%s@%s'%(STAGING_USER, STAGING_HOST),
+        'cd', STAGING_DIR, '&&', 'python', 'hosting.py']+args)

 # }}}

 class UploadInstallers(Command): # {{{
-    description = 'Upload any installers present in dist/ to mobileread'
-    def curl_list_dir(self, url=MOBILEREAD, listonly=1):
-        import pycurl
-        c = pycurl.Curl()
-        c.setopt(pycurl.URL, url)
-        c.setopt(c.FTP_USE_EPSV, 1)
-        c.setopt(c.NETRC, c.NETRC_REQUIRED)
-        c.setopt(c.FTPLISTONLY, listonly)
-        c.setopt(c.FTP_CREATE_MISSING_DIRS, 1)
-        b = cStringIO.StringIO()
-        c.setopt(c.WRITEFUNCTION, b.write)
-        c.perform()
-        c.close()
-        return b.getvalue().split() if listonly else b.getvalue().splitlines()

-    def curl_delete_file(self, path, url=MOBILEREAD):
-        import pycurl
-        c = pycurl.Curl()
-        c.setopt(pycurl.URL, url)
-        c.setopt(c.FTP_USE_EPSV, 1)
-        c.setopt(c.NETRC, c.NETRC_REQUIRED)
-        self.info('Deleting file %s on %s'%(path, url))
-        c.setopt(c.QUOTE, ['dele '+ path])
-        c.perform()
-        c.close()
-
-
-    def curl_upload_file(self, stream, url):
-        import pycurl
-        c = pycurl.Curl()
-        c.setopt(pycurl.URL, url)
-        c.setopt(pycurl.UPLOAD, 1)
-        c.setopt(c.NETRC, c.NETRC_REQUIRED)
-        c.setopt(pycurl.READFUNCTION, stream.read)
-        stream.seek(0, 2)
-        c.setopt(pycurl.INFILESIZE_LARGE, stream.tell())
-        stream.seek(0)
-        c.setopt(c.NOPROGRESS, 0)
-        c.setopt(c.FTP_CREATE_MISSING_DIRS, 1)
-        self.info('Uploading file %s to url %s' % (getattr(stream, 'name', ''),
-            url))
-        try:
-            c.perform()
-            c.close()
-        except:
-            pass
-        files = self.curl_list_dir(listonly=0)
-        for line in files:
-            line = line.split()
-            if url.endswith(line[-1]):
-                size = long(line[4])
-                stream.seek(0,2)
-                if size != stream.tell():
-                    raise RuntimeError('curl failed to upload %s correctly'%getattr(stream, 'name', ''))
-
-    def upload_installer(self, name):
-        if not os.path.exists(name):
-            return
-        bname = os.path.basename(name)
-        pat = re.compile(bname.replace(__version__, r'\d+\.\d+\.\d+'))
-        for f in self.curl_list_dir():
-            if pat.search(f):
-                self.curl_delete_file('/calibre/'+f)
-        self.curl_upload_file(open(name, 'rb'), MOBILEREAD+os.path.basename(name))
+    def add_options(self, parser):
+        parser.add_option('--replace', default=False, action='store_true', help=
+                'Replace existing installers, when uploading to google')

    def run(self, opts):
-        self.info('Uploading installers...')
-        installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2')))
-        installers.append(installer_name('tar.bz2', is64bit=True))
-        map(self.upload_installer, installers)
+        all_possible = set(installers())
+        available = set(glob.glob('dist/*'))
+        files = {x:installer_description(x) for x in
+                all_possible.intersection(available)}
+        tdir = mkdtemp()
+        try:
+            self.upload_to_staging(tdir, files)
+            self.upload_to_sourceforge()
+            self.upload_to_google(opts.replace)
+        finally:
+            shutil.rmtree(tdir, ignore_errors=True)
+
+    def upload_to_staging(self, tdir, files):
+        os.mkdir(tdir+'/dist')
+        hosting = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+            'hosting.py')
+        shutil.copyfile(hosting, os.path.join(tdir, 'hosting.py'))
+
+        for f in files:
+            shutil.copyfile(f, os.path.join(tdir, f))
+
+        with open(os.path.join(tdir, 'fmap'), 'wb') as fo:
+            for f, desc in files.iteritems():
+                fo.write('%s: %s\n'%(f, desc))
+        try:
+            send_data(tdir)
+        except:
+            print('\nUpload to staging failed, retrying in a minute')
+            time.sleep(60)
+            send_data(tdir)
+
+    def upload_to_google(self, replace):
+        gdata = get_google_data()
+        args = gc_cmdline(__version__, gdata)
+        if replace:
+            args = ['--replace'] + args
+        run_remote_upload(args)
+
+    def upload_to_sourceforge(self):
+        sdata = get_sourceforge_data()
+        args = sf_cmdline(__version__, sdata)
+        run_remote_upload(args)
 # }}}

 class UploadUserManual(Command): # {{{
@ -502,4 +239,61 @@ class UploadToServer(Command): # {{{
        shutil.rmtree(tdir)
 # }}}

+# Testing {{{
+
+def write_files(fmap):
+    for f in fmap:
+        with open(f, 'wb') as f:
+            f.write(os.urandom(100))
+            f.write(b'a'*1000000)
+    with open('fmap', 'wb') as fo:
+        for f, desc in fmap.iteritems():
+            fo.write('%s: %s\n'%(f, desc))
+
+def setup_installers():
+    ver = '0.0.1'
+    files = {x.replace(__version__, ver):installer_description(x) for x in installers()}
+    tdir = mkdtemp()
+    os.chdir(tdir)
+    return tdir, files, ver
+
+def test_google_uploader():
+    gdata = get_google_data()
+    gdata['project'] = 'calibre-hosting-uploader'
+    gdata['path_map_location'] += '-test'
+    hosting = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+        'hosting.py')
+
+    tdir, files, ver = setup_installers()
+    try:
+        os.mkdir('dist')
+        write_files(files)
+        shutil.copyfile(hosting, 'hosting.py')
+        send_data(tdir)
+        args = gc_cmdline(ver, gdata)
+
+        print ('Doing initial upload')
+        run_remote_upload(args)
+        raw_input('Press Enter to proceed:')
+
+        print ('\nDoing re-upload')
+        run_remote_upload(['--replace']+args)
+        raw_input('Press Enter to proceed:')
+
+        nv = ver + '.1'
+        files = {x.replace(__version__, nv):installer_description(x) for x in installers()}
+        write_files(files)
+        send_data(tdir)
+        args[1] = nv
+        print ('\nDoing update upload')
+        run_remote_upload(args)
+        print ('\nDont forget to delete any remaining files in the %s project'%
+                gdata['project'])
+
+    finally:
+        shutil.rmtree(tdir)
+# }}}
+
+if __name__ == '__main__':
+    test_google_uploader()

--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 32)
+numeric_version = (0, 8, 34)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -839,7 +839,7 @@ class ActionCopyToLibrary(InterfaceActionBase):
 class ActionTweakEpub(InterfaceActionBase):
    name = 'Tweak ePub'
    actual_plugin = 'calibre.gui2.actions.tweak_epub:TweakEpubAction'
-    description = _('Make small tweaks to epub files in your calibre library')
+    description = _('Make small tweaks to epub or htmlz files in your calibre library')

 class ActionNextMatch(InterfaceActionBase):
    name = 'Next Match'
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -50,6 +50,7 @@ class ANDROID(USBMS):
                       0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
                       0x70c6 : [0x226],
                       0x4316 : [0x216],
+                       0x42d6 : [0x216],
                     },
            # Freescale
            0x15a2 : {
@ -138,7 +139,7 @@ class ANDROID(USBMS):
            0x5e3 : { 0x726 : [0x222] },

            # ZTE
-            0x19d2 : { 0x1353 : [0x226] },
+            0x19d2 : { 0x1353 : [0x226], 0x1351 : [0x227] },

            # Advent
            0x0955 : { 0x7100 : [0x9999] }, # This is the same as the Notion Ink Adam
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -6,6 +6,7 @@ __docformat__ = 'restructuredtext en'


 import cStringIO, ctypes, datetime, os, re, shutil, sys, tempfile, time
+
 from calibre.constants import __appname__, __version__, DEBUG
 from calibre import fit_image, confirm_config_name
 from calibre.constants import isosx, iswindows
@ -207,6 +208,10 @@ class ITUNES(DriverBase):
    BACKLOADING_ERROR_MESSAGE = _(
        "Cannot copy books directly from iDevice. "
        "Drag from iTunes Library to desktop, then add to calibre's Library window.")
+    UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE = _(
+        "Unsupported direct connect mode. "
+        "See http://www.mobileread.com/forums/showthread.php?t=118559 "
+        "for instructions on using 'Connect to iTunes'")

    # Product IDs:
    #  0x1291   iPod Touch
@ -806,6 +811,7 @@ class ITUNES(DriverBase):
        '''
        if DEBUG:
            self.log.info("ITUNES.get_file(): exporting '%s'" % path)
+
        outfile.write(open(self.cached_books[path]['lib_book'].location().path).read())

    def open(self, connected_device, library_uuid):
@ -832,7 +838,7 @@ class ITUNES(DriverBase):
            raise AppleOpenFeedback(self)
        else:
            if DEBUG:
-                self.log.info(" advanced user mode, directly connecting to iDevice")
+                self.log.warning(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE)

        # Confirm/create thumbs archive
        if not os.path.exists(self.cache_dir):
@ -1161,6 +1167,8 @@ class ITUNES(DriverBase):
                added = pl.add(appscript.mactypes.File(fpath),to=pl)
                if False:
                    self.log.info("  '%s' added to Device|Books" % metadata.title)
+
+                self._wait_for_writable_metadata(added)
                return added

        elif iswindows:
@ -1322,7 +1330,6 @@ class ITUNES(DriverBase):
            '''
            Unsupported direct-connect mode.
            '''
-            self.log.warning("  unsupported direct connect mode")
            db_added = self._add_device_book(fpath, metadata)
            lb_added = self._add_library_book(fpath, metadata)
            if not lb_added and DEBUG:
@ -1390,16 +1397,17 @@ class ITUNES(DriverBase):
                        except:
                            if DEBUG:
                                self.log.warning("  iTunes automation interface reported an error"
-                                                 " when adding artwork to '%s' in the iTunes Library" % metadata.title)
+                                                 " adding artwork to '%s' in the iTunes Library" % metadata.title)
                            pass

                    if db_added:
                        try:
                            db_added.artworks[1].data_.set(cover_data)
+                            self.log.info("   writing '%s' cover to iDevice" % metadata.title)
                        except:
                            if DEBUG:
                                self.log.warning("  iTunes automation interface reported an error"
-                                                 " when adding artwork to '%s' on the iDevice" % metadata.title)
+                                                 " adding artwork to '%s' on the iDevice" % metadata.title)
                            #import traceback
                            #traceback.print_exc()
                            #from calibre import ipython
@ -1945,7 +1953,7 @@ class ITUNES(DriverBase):
            return thumb_data

        if DEBUG:
-            self.log.info(" ITUNES._generate_thumbnail():")
+            self.log.info(" ITUNES._generate_thumbnail('%s'):" % title)
        if isosx:

            # Fetch the artwork from iTunes
@ -2762,6 +2770,7 @@ class ITUNES(DriverBase):
        # Update metadata from plugboard
        # If self.plugboard is None (no transforms), original metadata is returned intact
        metadata_x = self._xform_metadata_via_plugboard(metadata, this_book.format)
+
        if isosx:
            if lb_added:
                lb_added.name.set(metadata_x.title)
@ -2772,10 +2781,9 @@ class ITUNES(DriverBase):
                lb_added.enabled.set(True)
                lb_added.sort_artist.set(icu_title(metadata_x.author_sort))
                lb_added.sort_name.set(metadata_x.title_sort)
+                lb_added.year.set(metadata_x.pubdate.year)

            if db_added:
-                self.log.warning("  waiting for db_added to become writeable ")
-                time.sleep(1.0)
                db_added.name.set(metadata_x.title)
                db_added.album.set(metadata_x.title)
                db_added.artist.set(authors_to_string(metadata_x.authors))
@ -2784,6 +2792,7 @@ class ITUNES(DriverBase):
                db_added.enabled.set(True)
                db_added.sort_artist.set(icu_title(metadata_x.author_sort))
                db_added.sort_name.set(metadata_x.title_sort)
+                db_added.year.set(metadata_x.pubdate.year)

            if metadata_x.comments:
                if lb_added:
@ -2871,6 +2880,7 @@ class ITUNES(DriverBase):
                lb_added.Enabled = True
                lb_added.SortArtist = icu_title(metadata_x.author_sort)
                lb_added.SortName = metadata_x.title_sort
+                lb_added.Year = metadata_x.pubdate.year

            if db_added:
                self.log.warning("  waiting for db_added to become writeable ")
@ -2883,6 +2893,7 @@ class ITUNES(DriverBase):
                db_added.Enabled = True
                db_added.SortArtist = icu_title(metadata_x.author_sort)
                db_added.SortName = metadata_x.title_sort
+                db_added.Year = metadata_x.pubdate.year

            if metadata_x.comments:
                if lb_added:
@ -2981,6 +2992,32 @@ class ITUNES(DriverBase):
                            db_added.Genre = tag
                        break

+    def _wait_for_writable_metadata(self, db_added, delay=2.0):
+        '''
+        Ensure iDevice metadata is writable. Direct connect mode only
+        '''
+        if DEBUG:
+            self.log.info(" ITUNES._wait_for_writable_metadata()")
+            self.log.warning("  %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE)
+
+        attempts = 9
+        while attempts:
+            try:
+                if isosx:
+                    db_added.bpm.set(0)
+                elif iswindows:
+                    db_added.BPM = 0
+                break
+            except:
+                attempts -= 1
+                time.sleep(delay)
+                if DEBUG:
+                    self.log.warning("  waiting %.1f seconds for iDevice metadata to become writable (attempt #%d)" %
+                                     (delay, (10 - attempts)))
+        else:
+            if DEBUG:
+                self.log.error(" failed to write device metadata")
+
    def _xform_metadata_via_plugboard(self, book, format):
        ''' Transform book metadata from plugboard templates '''
        if DEBUG:
@ -3090,6 +3127,7 @@ class ITUNES_ASYNC(ITUNES):
                for (i,book) in enumerate(library_books):
                    format = 'pdf' if library_books[book].kind().startswith('PDF') else 'epub'
                    this_book = Book(library_books[book].name(), library_books[book].artist())
+                    #this_book.path = library_books[book].location().path
                    this_book.path = self.path_template % (library_books[book].name(),
                                                           library_books[book].artist(),
                                                           format)
--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -124,7 +124,7 @@ class BOOX(HANLINV3):

    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
-    BCD         = [0x322, 0x323]
+    BCD         = [0x322, 0x323, 0x326]

    MAIN_MEMORY_VOLUME_LABEL  = 'BOOX Internal Memory'
    STORAGE_CARD_VOLUME_LABEL = 'BOOX Storage Card'
--- a/src/calibre/devices/hanvon/driver.py
+++ b/src/calibre/devices/hanvon/driver.py
@ -50,7 +50,8 @@ class THEBOOK(N516):
    BCD = [0x399]
    MAIN_MEMORY_VOLUME_LABEL  = 'The Book Main Memory'
    EBOOK_DIR_MAIN = 'My books'
-    WINDOWS_CARD_A_MEM = '_FILE-STOR_GADGE'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['_FILE-STOR_GADGE',
+            'FILE-STOR_GADGET']

 class LIBREAIR(N516):
    name = 'Libre Air Driver'
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -284,11 +284,11 @@ class KINDLE(USBMS):

 class KINDLE2(KINDLE):

-    name           = 'Kindle 2/3 Device Interface'
-    description    = _('Communicate with the Kindle 2/3 eBook reader.')
+    name           = 'Kindle 2/3/4/Touch Device Interface'
+    description    = _('Communicate with the Kindle 2/3/4/Touch eBook reader.')

    FORMATS        = KINDLE.FORMATS + ['pdf', 'azw4', 'pobi']
-    DELETE_EXTS    = KINDLE.DELETE_EXTS
+    DELETE_EXTS    = KINDLE.DELETE_EXTS + ['.mbp1', '.mbs', '.sdr']

    PRODUCT_ID = [0x0002, 0x0004]
    BCD        = [0x0100]
@ -347,6 +347,18 @@ class KINDLE2(KINDLE):
                if h in path_map:
                    book.device_collections = list(sorted(path_map[h]))

+    # Detect if the product family needs .apnx files uploaded to sidecar folder
+    def post_open_callback(self):
+        product_id = self.device_being_opened[1]
+        self.sidecar_apnx = False
+        if product_id > 0x3:
+            # Check if we need to put the apnx into a sidecar dir
+            for _, dirnames, _ in os.walk(self._main_prefix):
+                for x in dirnames:
+                    if x.endswith('.sdr'):
+                        self.sidecar_apnx = True
+                        return
+
    def upload_cover(self, path, filename, metadata, filepath):
        '''
        Hijacking this function to write the apnx file.
@ -358,6 +370,13 @@ class KINDLE2(KINDLE):
        if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', '.prc'):
            return

+        # Create the sidecar folder if necessary
+        if (self.sidecar_apnx):
+            path = os.path.join(os.path.dirname(filepath), filename+".sdr")
+
+            if not os.path.exists(path):
+                os.makedirs(path)
+
        apnx_path = '%s.apnx' % os.path.join(path, filename)
        apnx_builder = APNXBuilder()
        try:
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -25,7 +25,7 @@ class KOBO(USBMS):
    gui_name = 'Kobo Reader'
    description = _('Communicate with the Kobo Reader')
    author = 'Timothy Legge'
-    version = (1, 0, 11)
+    version = (1, 0, 12)

    dbversion = 0
    fwversion = 0
@ -64,22 +64,34 @@ class KOBO(USBMS):
            _('Upload Black and White Covers'),
            _('Show expired books') +
            ':::'+_('A bug in an earlier version left non kepubs book records'
-                ' in the datbase.  With this option Calibre will show the '
+                ' in the database.  With this option Calibre will show the '
                'expired records and allow you to delete them with '
                'the new delete logic.'),
+            _('Show Previews') +
+            ':::'+_('Kobo previews are included on the Touch and some other versions'
+                ' by default they are no longer displayed as there is no good reason to '
+                'see them.  Enable if you wish to see/delete them.'),
+            _('Show Recommendations') +
+            ':::'+_('Kobo now shows recommendations on the device.  In some case these have '
+                'files but in other cases they are just pointers to the web site to buy. '
+                'Enable if you wish to see/delete them.'),
            ]

    EXTRA_CUSTOMIZATION_DEFAULT = [
            ', '.join(['tags']),
            True,
            True,
-            True
+            True,
+            False,
+            False
            ]

    OPT_COLLECTIONS    = 0
    OPT_UPLOAD_COVERS  = 1
    OPT_UPLOAD_GRAYSCALE_COVERS  = 2
    OPT_SHOW_EXPIRED_BOOK_RECORDS = 3
+    OPT_SHOW_PREVIEWS = 4
+    OPT_SHOW_RECOMMENDATIONS = 5

    def initialize(self):
        USBMS.initialize(self)
@ -161,6 +173,8 @@ class KOBO(USBMS):
                # Label Previews
                if accessibility == 6:
                    playlist_map[lpath].append('Preview')
+                elif accessibility == 4:
+                    playlist_map[lpath].append('Recommendation')

                path = self.normalize_path(path)
                # print "Normalized FileName: " + path
@ -241,31 +255,40 @@ class KOBO(USBMS):
            debug_print("Database Version: ", self.dbversion)

            opts = self.settings()
-            if self.dbversion >= 16:
+            if self.dbversion >= 33:
                query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
-                    'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \
+                    'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, IsDownloaded from content where ' \
+                    'BookID is Null %(previews)s %(recomendations)s and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
+                    if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')', \
+                    previews=' and Accessibility <> 6' \
+                    if opts.extra_customization[self.OPT_SHOW_PREVIEWS] == False else '', \
+                    recomendations=' and IsDownloaded in (\'true\', 1)' \
+                    if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] == False else '')
+            elif self.dbversion >= 16 and self.dbversion < 33:
+                query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
+                    'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, "1" as IsDownloaded from content where ' \
                    'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
                    if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
            elif self.dbversion < 16 and self.dbversion >= 14:
                query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
-                    'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility  from content where ' \
+                    'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility, "1" as IsDownloaded from content where ' \
                    'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
                    if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
            elif self.dbversion < 14 and self.dbversion >= 8:
                query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
-                    'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility  from content where ' \
+                    'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, "1" as IsDownloaded from content where ' \
                    'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
                    if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
            else:
                query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
-                    'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null'
+                    'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, "1" as IsDownloaded from content where BookID is Null'

            try:
                cursor.execute (query)
            except Exception as e:
                err = str(e)
                if not ('___ExpirationStatus' in err or 'FavouritesIndex' in err or
-                        'Accessibility' in err):
+                        'Accessibility' in err or 'IsDownloaded' in err):
                    raise
                query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, '
                    'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as '
@ -701,6 +724,7 @@ class KOBO(USBMS):

        accessibilitylist = {
            "Preview":6,
+            "Recommendation":4,
       }
 #        debug_print('Starting update_device_database_collections', collections_attributes)

--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -239,7 +239,7 @@ class PRST1(USBMS):

        if booklists[0] is not None:
            self.update_device_database(booklists[0], collections, None)
-        if booklists[1] is not None:
+        if len(booklists) > 1 and booklists[1] is not None:
            self.update_device_database(booklists[1], collections, 'carda')

        USBMS.sync_booklists(self, booklists, end_session=end_session)
@ -286,12 +286,15 @@ class PRST1(USBMS):
            query = 'SELECT file_path, _id FROM books'
            cursor.execute(query)
        except DatabaseError:
-            raise DeviceError(('The SONY database is corrupted. '
+            import traceback
+            tb = traceback.format_exc()
+            raise DeviceError((('The SONY database is corrupted. '
                    ' Delete the file %s on your reader and then disconnect '
                    ' reconnect it. If you are using an SD card, you '
                    ' should delete the file on the card as well. Note that '
                    ' deleting this file will cause your reader to forget '
-                    ' any notes/highlights, etc.')%dbpath)
+                    ' any notes/highlights, etc.')%dbpath)+' Underlying error:'
+                    '\n'+tb)

        db_books = {}
        for i, row in enumerate(cursor):
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -10,7 +10,7 @@ driver. It is intended to be subclassed with the relevant parts implemented
 for a particular device.
 '''

-import os, re, time, json, uuid, functools
+import os, re, time, json, uuid, functools, shutil
 from itertools import cycle

 from calibre.constants import numeric_version
@ -339,10 +339,13 @@ class USBMS(CLI, Device):

                filepath = os.path.splitext(path)[0]
                for ext in self.DELETE_EXTS:
-                    if os.path.exists(filepath + ext):
-                        os.unlink(filepath + ext)
-                    if os.path.exists(path + ext):
-                        os.unlink(path + ext)
+                    for x in (filepath, path):
+                        x += ext
+                        if os.path.exists(x):
+                            if os.path.isdir(x):
+                                shutil.rmtree(x, ignore_errors=True)
+                            else:
+                                os.unlink(x)

                if self.SUPPORTS_SUB_DIRS:
                    try:
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -9,7 +9,7 @@ from various formats.

 import traceback, os, re
 from cStringIO import StringIO
-from calibre import CurrentDir
+from calibre import CurrentDir, force_unicode

 class ConversionError(Exception):

@ -30,7 +30,8 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
                   'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
                   'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
-                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx']
+                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'md',
+                   'textile', 'markdown']

 class HTMLRenderer(object):

@ -237,10 +238,10 @@ def generate_masthead(title, output_path=None, width=600, height=60):
    img = Image.new('RGB', (width, height), 'white')
    draw = ImageDraw.Draw(img)
    try:
-        font = ImageFont.truetype(font_path, 48)
+        font = ImageFont.truetype(font_path, 48, encoding='unic')
    except:
-        font = ImageFont.truetype(default_font, 48)
-    text = title.encode('utf-8')
+        font = ImageFont.truetype(default_font, 48, encoding='unic')
+    text = force_unicode(title)
    width, height = draw.textsize(text, font=font)
    left = max(int((width - width)/2.), 0)
    top = max(int((height - height)/2.), 0)
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -141,7 +141,7 @@ def add_pipeline_options(parser, plumber):
                      'insert_blank_line', 'insert_blank_line_size',
                      'remove_paragraph_spacing',
                      'remove_paragraph_spacing_indent_size',
-                      'asciiize',
+                      'asciiize', 'keep_ligatures',
                  ]
                  ),

--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@ -151,7 +151,7 @@ class FB2Output(OutputFormatPlugin):
                'A value of "files" turns each file into a separate section; use this if your device is having trouble. '
                'A value of "Table of Contents" turns the entries in the Table of Contents into titles and creates sections; '
                'if it fails, adjust the "Structure Detection" and/or "Table of Contents" settings '
-                '(turn on "Force use of auto-generated Table of Contents).')),
+                '(turn on "Force use of auto-generated Table of Contents").')),
        OptionRecommendation(name='fb2_genre',
            recommended_value='antique', level=OptionRecommendation.LOW,
            choices=FB2_GENRES,
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -270,7 +270,7 @@ class HTMLInput(InputFormatPlugin):
            help=_('Normally this input plugin re-arranges all the input '
                'files into a standard folder hierarchy. Only use this option '
                'if you know what you are doing as it can result in various '
-                'nasty side effects in the rest of of the conversion pipeline.'
+                'nasty side effects in the rest of the conversion pipeline.'
                )
        ),

--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -7,9 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>'

-import struct, os
-import functools
-import re
+import struct, os, functools, re
 from urlparse import urldefrag
 from cStringIO import StringIO
 from urllib import unquote as urlunquote
@ -165,15 +163,27 @@ class UnBinary(object):
    def __str__(self):
        return self.raw

-    def binary_to_text(self, bin, buf, index=0, depth=0):
-        tag_name = current_map = None
-        dynamic_tag = errors = 0
-        in_censorship = is_goingdown = False
-        state = 'text'
-        flags = 0
+    def binary_to_text(self, bin, buf):
+        stack = [(0, None, None, 0, 0, False, False, 'text', 0)]
+        self.cpos = 0
+        while stack:
+            self.binary_to_text_inner(bin, buf, stack)
+        del self.cpos

-        while index < len(bin):
-            c, index = read_utf8_char(bin, index)
+    def binary_to_text_inner(self, bin, buf, stack):
+        (depth, tag_name, current_map, dynamic_tag, errors,
+                in_censorship, is_goingdown, state, flags) = stack.pop()
+
+        if state == 'close tag':
+            if not tag_name:
+                raise LitError('Tag ends before it begins.')
+            buf.write(encode(u''.join(('</', tag_name, '>'))))
+            dynamic_tag = 0
+            tag_name = None
+            state = 'text'
+
+        while self.cpos < len(bin):
+            c, self.cpos = read_utf8_char(bin, self.cpos)
            oc = ord(c)

            if state == 'text':
@ -223,26 +233,28 @@ class UnBinary(object):
                    buf.write(encode(tag_name))
                elif flags & FLAG_CLOSING:
                    if depth == 0:
-                        raise LitError('Extra closing tag')
-                    return index
+                        raise LitError('Extra closing tag %s at %d'%(tag_name,
+                            self.cpos))
+                    break

            elif state == 'get attr':
                in_censorship = False
                if oc == 0:
+                    state = 'text'
                    if not is_goingdown:
                        tag_name = None
                        dynamic_tag = 0
                        buf.write(' />')
                    else:
                        buf.write('>')
-                        index = self.binary_to_text(bin, buf, index, depth+1)
-                        is_goingdown = False
-                        if not tag_name:
-                            raise LitError('Tag ends before it begins.')
-                        buf.write(encode(u''.join(('</', tag_name, '>'))))
-                        dynamic_tag = 0
-                        tag_name = None
-                    state = 'text'
+                        frame = (depth, tag_name, current_map,
+                            dynamic_tag, errors, in_censorship, False,
+                            'close tag', flags)
+                        stack.append(frame)
+                        frame = (depth+1, None, None, 0, 0,
+                                False, False, 'text', 0)
+                        stack.append(frame)
+                        break
                else:
                    if oc == 0x8000:
                        state = 'get attr length'
@ -278,7 +290,7 @@ class UnBinary(object):
                state = 'get value'
                if oc == 0xffff:
                    continue
-                if count < 0 or count > (len(bin) - index):
+                if count < 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)

            elif state == 'get value':
@ -303,7 +315,7 @@ class UnBinary(object):

            elif state == 'get custom length':
                count = oc - 1
-                if count <= 0 or count > len(bin)-index:
+                if count <= 0 or count > len(bin)-self.cpos:
                    raise LitError('Invalid character count %d' % count)
                dynamic_tag += 1
                state = 'get custom'
@ -318,7 +330,7 @@ class UnBinary(object):

            elif state == 'get attr length':
                count = oc - 1
-                if count <= 0 or count > (len(bin) - index):
+                if count <= 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)
                buf.write(' ')
                state = 'get custom attr'
@ -332,7 +344,7 @@ class UnBinary(object):

            elif state == 'get href length':
                count = oc - 1
-                if count <= 0 or count > (len(bin) - index):
+                if count <= 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)
                href = ''
                state = 'get href'
@ -348,7 +360,6 @@ class UnBinary(object):
                    path = urlnormalize(path)
                    buf.write(encode(u'"%s"' % path))
                    state = 'get attr'
-        return index


 class DirectoryEntry(object):
@ -896,10 +907,3 @@ class LitReader(OEBReader):
    Container = LitContainer
    DEFAULT_PROFILE = 'MSReader'

-
-try:
-    import psyco
-    psyco.bind(read_utf8_char)
-    psyco.bind(UnBinary.binary_to_text)
-except ImportError:
-    pass
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -1410,19 +1410,22 @@ class MOBIFile(object): # {{{
            self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
+        image_index = 0
        for i in xrange(fntbr, len(self.records)):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
+            image_index += 1
            r = self.records[i]
            fmt = None
-            if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS',
-                    b'\xe9\x8e\r\n'):
+            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
+                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
+                    b'AUDI', b'VIDE'}:
                try:
                    width, height, fmt = identify_data(r.raw)
                except:
                    pass
            if fmt is not None:
-                self.image_records.append(ImageRecord(len(self.image_records)+1, r, fmt))
+                self.image_records.append(ImageRecord(image_index, r, fmt))
            else:
                self.binary_records.append(BinaryRecord(i, r))

--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -502,7 +502,7 @@ class MobiReader(object):
        self.processed_html = self.processed_html.replace('> <', '>\n<')
        self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
        self.processed_html = re.sub(r'<\?xml[^>]*>', '', self.processed_html)
-        self.processed_html = re.sub(r'<(/?)o:p', r'<\1p', self.processed_html)
+        self.processed_html = re.sub(r'<\s*(/?)\s*o:p[^>]*>', r'', self.processed_html)
        # Swap inline and block level elements, and order block level elements according to priority
        # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
        self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
@ -974,12 +974,13 @@ class MobiReader(object):
                continue
            processed_records.append(i)
            data  = self.sections[i][0]
+            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
-                # A FLIS, FCIS, SRCS or EOF record, ignore
+                # This record is a known non image type, not need to try to
+                # load the image
                continue
            buf = cStringIO.StringIO(data)
-            image_index += 1
            try:
                im = PILImage.open(buf)
                im = im.convert('RGB')
--- a/src/calibre/ebooks/oeb/display/cfi.coffee
+++ b/src/calibre/ebooks/oeb/display/cfi.coffee
@ -4,15 +4,29 @@
 ###
 Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
 Released under the GPLv3 License
- Based on code originally written by Peter Sorotkin (epubcfi.js)
+ Based on code originally written by Peter Sorotkin
+ http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js
+
+ Improvements with respect to that code:
+ 1. Works on all browsers (WebKit, Firefox and IE >= 9)
+ 2. Works for content in elements that are scrollable (i.e. have their own scrollbars)
+ 3. Much more comprehensive testing/error handling
+ 4. Properly encodes/decodes assertions
+ 5. Handles points in the padding of elements consistently
+
+ To check if this script is compatible with the current browser, call
+ window.cfi.is_compatible() it will throw an exception if not compatible.
+
+ Tested on: Firefox 9, IE 9, Chromium 16, Qt WebKit 2.1
 ###
-#
-log = (error) ->
+
+log = (error) -> # {{{
    if error
        if window?.console?.log
            window.console.log(error)
        else if process?.stdout?.write
            process.stdout.write(error + '\n')
+# }}}

 # CFI escaping {{{
 escape_for_cfi = (raw) ->
@ -51,12 +65,137 @@ fstr = (d) -> # {{{
    ans
 # }}}

+get_current_time = (target) -> # {{{
+    ans = 0
+    if target.currentTime != undefined
+        ans = target.currentTime
+    fstr(ans)
+# }}}
+
+window_scroll_pos = (win) -> # {{{
+    if typeof(win.pageXOffset) == 'number'
+        x = win.pageXOffset
+        y = win.pageYOffset
+    else # IE < 9
+        if document.body and ( document.body.scrollLeft or document.body.scrollTop )
+            x = document.body.scrollLeft
+            y = document.body.scrollTop
+        else if document.documentElement and ( document.documentElement.scrollLeft or document.documentElement.scrollTop)
+            y = document.documentElement.scrollTop
+            x = document.documentElement.scrollLeft
+    return [x, y]
+# }}}
+
+viewport_to_document = (x, y, doc) -> # {{{
+    win = doc.defaultView
+    [wx, wy] = window_scroll_pos(win)
+    x += wx
+    y += wy
+    if doc != window.document
+        # We are in a frame
+        node = win.frameElement
+        rect = node.getBoundingClientRect()
+        [vx, vy] = viewport_to_document(rect.left, rect.top, node.ownerDocument)
+        x += vx
+        y += vy
+    return [x, y]
+# }}}
+
+# Equivalent for caretRangeFromPoint for non WebKit browsers {{{
+range_has_point = (range, x, y) ->
+    for rect in range.getClientRects()
+        if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom)
+            return true
+    return false
+
+offset_in_text_node = (node, range, x, y) ->
+    limits = [0, node.nodeValue.length]
+    while limits[0] != limits[1]
+        pivot = Math.floor( (limits[0] + limits[1]) / 2 )
+        lr = [limits[0], pivot]
+        rr = [pivot+1, limits[1]]
+        range.setStart(node, pivot)
+        range.setEnd(node, pivot+1)
+        if range_has_point(range, x, y)
+            return pivot
+        range.setStart(node, rr[0])
+        range.setEnd(node, rr[1])
+        if range_has_point(range, x, y)
+            limits = rr
+            continue
+        range.setStart(node, lr[0])
+        range.setEnd(node, lr[1])
+        if range_has_point(range, x, y)
+            limits = lr
+            continue
+        break
+    return limits[0]
+
+find_offset_for_point = (x, y, node, cdoc) ->
+    range = cdoc.createRange()
+    child = node.firstChild
+    while child
+        if child.nodeType in [3, 4, 5, 6] and child.nodeValue?.length
+            range.setStart(child, 0)
+            range.setEnd(child, child.nodeValue.length)
+            if range_has_point(range, x, y)
+                return [child, offset_in_text_node(child, range, x, y)]
+        child = child.nextSibling
+
+    # The point must be after the last bit of text/in the padding/border, we dont know
+    # how to get a good point in this case
+    throw "Point (#{x}, #{y}) is in the padding/border of #{node}, so cannot calculate offset"
+
+# }}}
+
 class CanonicalFragmentIdentifier

-    # This class is a namespace to expose CFI functions via the window.cfi
-    # object
+    ###
+    This class is a namespace to expose CFI functions via the window.cfi
+    object. The three most important functions are:

-    constructor: () ->
+    is_compatible(): Throws an error if the browser is not compatible with
+                     this script
+
+    at(x, y): which maps a point to a CFI, if possible
+
+    scroll_to(cfi): which scrolls the browser to a point corresponding to the
+                    given cfi, and returns the x and y co-ordinates of the point.
+    ###
+
+    constructor: () -> # {{{
+        this.CREATE_RANGE_ERR = "Your browser does not support the createRange function. Update it to a newer version."
+        this.IE_ERR = "Your browser is too old. You need Internet Explorer version 9 or newer."
+        div = document.createElement('div')
+        ver = 3
+        while true
+            div.innerHTML = "<!--[if gt IE #{ ++ver }]><i></i><![endif]-->"
+            if div.getElementsByTagName('i').length == 0
+                break
+        this.iever = ver
+        this.isie = ver > 4
+
+    # }}}
+
+    is_compatible: () -> # {{{
+        if not window.document.createRange
+            throw this.CREATE_RANGE_ERR
+        # Check if Internet Explorer >= 8 as getClientRects returns physical
+        # rather than logical pixels on older IE
+        if this.isie and this.iever < 9
+            # We have IE < 9
+            throw this.IE_ERR
+    # }}}
+
+    set_current_time: (target, val) -> # {{{
+        if target.currentTime == undefined
+            return
+        if target.readyState == 4 or target.readyState == "complete"
+            target.currentTime = val
+        else
+            fn = -> target.currentTime = val
+            target.addEventListener("canplay", fn, false)
+    #}}}

    encode: (doc, node, offset, tail) -> # {{{
        cfi = tail or ""
@ -64,15 +203,17 @@ class CanonicalFragmentIdentifier
        # Handle the offset, if any
        switch node.nodeType
            when 1 # Element node
-                if typeoff(offset) == 'number'
+                if typeof(offset) == 'number'
                    node = node.childNodes.item(offset)
            when 3, 4, 5, 6 # Text/entity/CDATA node
                offset or= 0
                while true
                    p = node.previousSibling
-                    if (p?.nodeType not in [3, 4, 5, 6])
+                    if not p or p.nodeType > 8
                        break
-                    offset += p.nodeValue.length
+                    # log("previous sibling:"+ p + " " + p?.nodeType + " length: " + p?.nodeValue?.length)
+                    if p.nodeType not in [2, 8] and p.nodeValue?.length?
+                        offset += p.nodeValue.length
                    node = p
                cfi = ":" + offset + cfi
            else # Not handled
@ -89,12 +230,12 @@ class CanonicalFragmentIdentifier
                        cfi = "!" + cfi
                        continue
                break
-            # Increase index by the length of all previous sibling text nodes
+            # Find position of node in parent
            index = 0
            child = p.firstChild
            while true
-                index |= 1
-                if child.nodeType in [1, 7]
+                index |= 1 # Increment index by 1 if it is even
+                if child.nodeType == 1
                    index++
                if child == node
                    break
@ -117,8 +258,8 @@ class CanonicalFragmentIdentifier
        error = null
        node = doc

-        until cfi.length <= 0 or error
-            if ( (r = cfi.match(simple_node_regex)) is not null ) # Path step
+        until cfi.length < 1 or error
+            if (r = cfi.match(simple_node_regex)) # Path step
                target = parseInt(r[1])
                assertion = r[2]
                if assertion
@ -136,11 +277,18 @@ class CanonicalFragmentIdentifier
                            error = "No matching child found for CFI: " + cfi
                        break
                    index |= 1 # Increment index by 1 if it is even
-                    if child.nodeType in [1, 7] # We have an element or a PI
+                    if child.nodeType == 1
                        index++
-                    if ( index == target )
+                    if index == target
                        cfi = cfi.substr(r[0].length)
                        node = child
+                        if assertion and node.id != assertion
+                            # The found child does not match the id assertion,
+                            # trust the id assertion if an element with that id
+                            # exists
+                            child = doc.getElementById(assertion)
+                            if child
+                                node = child
                        break
                    child = child.nextSibling

@ -198,9 +346,12 @@ class CanonicalFragmentIdentifier
                next = false
                while true
                    nn = node.nextSibling
-                    if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata
+                    if not nn
+                        break
+                    if nn.nodeType in [3, 4, 5, 6] and nn.nodeValue?.length # Text node, entity, cdata
                        next = nn
                        break
+                    node = nn
                if not next
                    if offset > len
                        error = "Offset out of range: #{ offset }"
@ -223,6 +374,7 @@ class CanonicalFragmentIdentifier
    # }}}

    at: (x, y, doc=window?.document) -> # {{{
+        # x, y are in viewport co-ordinates
        cdoc = doc
        target = null
        cwin = cdoc.defaultView
@ -245,29 +397,31 @@ class CanonicalFragmentIdentifier
            if not cd
                break

-            x = x + cwin.pageXOffset - target.offsetLeft
-            y = y + cwin.pageYOffset - target.offsetTop
+            rect = target.getBoundingClientRect()
+            x = x - rect.x
+            y = y - rect.y
            cdoc = cd
            cwin = cdoc.defaultView

        (if target.parentNode then target.parentNode else target).normalize()

        if name in ['audio', 'video']
-            tail = "~" + fstr target.currentTime
+            tail = "~" + get_current_time(target)

        if name in ['img', 'video']
-            px = ((x + cwin.scrollX - target.offsetLeft)*100)/target.offsetWidth
-            py = ((y + cwin.scrollY - target.offsetTop)*100)/target.offsetHeight
+            rect = target.getBoundingClientRect()
+            px = ((x - rect.left)*100)/target.offsetWidth
+            py = ((y - rect.top)*100)/target.offsetHeight
            tail = "#{ tail }@#{ fstr px },#{ fstr py }"
        else if name != 'audio'
-            if cdoc.caretRangeFromPoint # WebKit
-                range = cdoc.caretRangeFromPoint(x, y)
-                if range
-                    target = range.startContainer
-                    offset = range.startOffset
+            # Get the text offset
+            # We use a custom function instead of caretRangeFromPoint as
+            # caretRangeFromPoint does weird things when the point falls in the
+            # padding of the element
+            if cdoc.createRange
+                [target, offset] = find_offset_for_point(x, y, target, cdoc)
            else
-                # TODO: implement a span bisection algorithm for UAs
-                # without caretRangeFromPoint (Gecko, IE)
+                throw this.CREATE_RANGE_ERR

        this.encode(doc, target, offset, tail)
    # }}}
@ -285,52 +439,122 @@ class CanonicalFragmentIdentifier
        nwin = ndoc.defaultView
        x = null
        y = null
+        range = null

        if typeof(r.offset) == "number"
            # Character offset
+            if not ndoc.createRange
+                throw this.CREATE_RANGE_ERR
            range = ndoc.createRange()
            if r.forward
                try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}]
            else
                try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}]
-            k = 0
            a = null
            rects = null
            node_len = node.nodeValue.length
-            until rects or rects.length or k >= try_list.length
-                t = try_list[k++]
-                start_offset = r.offset + t.start
-                end_offset = r.offset + t.end
-                a = t.a
-                if start_offset < 0 or end_offset >= node_len
-                    continue
-                range.setStart(node, start_offset)
-                range.setEnd(node, end_offset)
-                rects = range.getClientRects()
+            offset = r.offset
+            for i in [0, 1]
+                # Try reducing the offset by 1 if we get no match as if it refers to the position after the
+                # last character we wont get a match with getClientRects
+                offset = r.offset - i
+                if offset < 0
+                    offset = 0
+                k = 0
+                until rects?.length or k >= try_list.length
+                    t = try_list[k++]
+                    start_offset = offset + t.start
+                    end_offset = offset + t.end
+                    a = t.a
+                    if start_offset < 0 or end_offset >= node_len
+                        continue
+                    range.setStart(node, start_offset)
+                    range.setEnd(node, end_offset)
+                    rects = range.getClientRects()
+                if rects?.length
+                    break

-            if not rects or not rects.length
+
+            if not rects?.length
                log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }")
                return null

-            rect = rects[0]
-            x = (a*rect.left + (1-a)*rect.right)
-            y = (rect.top + rect.bottom)/2
        else
-            x = node.offsetLeft - nwin.scrollX
-            y = node.offsetTop - nwin.scrollY
-            if typeof(r.x) == "number" and node.offsetWidth
-                x += (r.x*node.offsetWidth)/100
-                y += (r.y*node.offsetHeight)/100
+            [x, y] = [r.x, r.y]

-        until ndoc == doc
-            node = nwin.frameElement
+        {x:x, y:y, node:r.node, time:r.time, range:range, a:a}
+
+    # }}}
+
+    scroll_to: (cfi, callback=false, doc=window?.document) -> # {{{
+        point = this.point(cfi, doc)
+        if not point
+            log("No point found for cfi: #{ cfi }")
+            return
+        if typeof point.time == 'number'
+            this.set_current_time(point.node, point.time)
+
+        if point.range != null
+            # Character offset
+            r = point.range
+            [so, eo, sc, ec] = [r.startOffset, r.endOffset, r.startContainer, r.endContainer]
+            node = r.startContainer
            ndoc = node.ownerDocument
            nwin = ndoc.defaultView
-            x += node.offsetLeft - nwin.scrollX
-            y += node.offsetTop - nwin.scrollY
+            span = ndoc.createElement('span')
+            span.setAttribute('style', 'border-width: 0; padding: 0; margin: 0')
+            r.surroundContents(span)
+            span.scrollIntoView()
+            fn = ->
+                # Remove the span and get the new position now that scrolling
+                # has (hopefully) completed
+                #
+                # In WebKit, the boundingrect of the span is wrong in some
+                # situations, whereas in IE resetting the range causes it to
+                # loose bounding info. So we use the range's rects unless they
+                # are absent, in which case we use the span's rect
+                #
+                rect = span.getBoundingClientRect()

-        {x:x, y:y, node:r.node, time:r.time}
+                # Remove the span we inserted
+                p = span.parentNode
+                for node in span.childNodes
+                    span.removeChild(node)
+                    p.insertBefore(node, span)
+                p.removeChild(span)
+                p.normalize()

+                # Reset the range to what it was before the span was added
+                r.setStart(sc, so)
+                r.setEnd(ec, eo)
+                rects = r.getClientRects()
+                if rects.length > 0
+                    rect = rects[0]
+
+                x = (point.a*rect.left + (1-point.a)*rect.right)
+                y = (rect.top + rect.bottom)/2
+                [x, y] = viewport_to_document(x, y, ndoc)
+                if callback
+                    callback(x, y)
+        else
+            node = point.node
+            nwin = node.ownerDocument.defaultView
+            node.scrollIntoView()
+
+            fn = ->
+                r = node.getBoundingClientRect()
+                [x, y] = viewport_to_document(r.left, r.top, node.ownerDocument)
+                if typeof(point.x) == 'number' and node.offsetWidth
+                    x += (point.x*node.offsetWidth)/100
+                if typeof(point.y) == 'number' and node.offsetHeight
+                    y += (point.y*node.offsetHeight)/100
+                scrollTo(x, y)
+                if callback
+                    callback(x, y)
+
+        setTimeout(fn, 10)
+
+        null
    # }}}

 if window?
--- a/src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee
+++ b/src/calibre/ebooks/oeb/display/test-cfi/cfi-test.coffee
@ -0,0 +1,73 @@
+#!/usr/bin/env coffee
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+###
+ Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
+ Released under the GPLv3 License
+###
+
+log = (error) ->
+    if error
+        if window?.console?.log
+            window.console.log(error)
+        else if process?.stdout?.write
+            process.stdout.write(error + '\n')
+
+show_cfi = () ->
+    if window.current_cfi
+        fn = (x, y) ->
+            ms = document.getElementById("marker").style
+            ms.display = 'block'
+            ms.top = y - 30 + 'px'
+            ms.left = x - 1 + 'px'
+
+        window.cfi.scroll_to(window.current_cfi, fn)
+    null
+
+window_ypos = (pos=null) ->
+    if pos == null
+        return window.pageYOffset
+    window.scrollTo(0, pos)
+
+mark_and_reload = (evt) ->
+    # Remove image in case the click was on the image itself, we want the cfi to
+    # be on the underlying element
+    ms = document.getElementById("marker")
+    if ms
+        ms.parentNode?.removeChild(ms)
+
+    fn = () ->
+        try
+            window.current_cfi = window.cfi.at(evt.clientX, evt.clientY)
+        catch err
+            alert("Failed to calculate cfi: #{ err }")
+            return
+        if window.current_cfi
+            epubcfi = "epubcfi(#{ window.current_cfi })"
+            ypos = window_ypos()
+            newloc = window.location.href.replace(/#.*$/, '') + "#" + ypos + epubcfi
+            window.location.replace(newloc)
+            window.location.reload()
+
+    setTimeout(fn, 1)
+    null
+
+window.onload = ->
+    try
+        window.cfi.is_compatible()
+    catch error
+        alert(error)
+        return
+    document.onclick = mark_and_reload
+    r = location.hash.match(/#(\d*)epubcfi\((.+)\)$/)
+    if r
+        window.current_cfi = r[2]
+        ypos = if r[1] then 0+r[1] else 0
+        base = document.getElementById('first-h1').innerHTML
+        document.title = base + ": " + window.current_cfi
+        fn = () ->
+            show_cfi()
+            window_ypos(ypos)
+        setTimeout(fn, 100)
+    null
+
--- a/src/calibre/ebooks/oeb/display/test-cfi/index.html
+++ b/src/calibre/ebooks/oeb/display/test-cfi/index.html
@ -0,0 +1,122 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <title>Testing EPUB CFI</title>
+        <script type="text/javascript" src="cfi.coffee"></script>
+        <script type="text/javascript" src="cfi-test.coffee"></script>
+        <style type="text/css">
+            body { 
+                font-family: sans-serif;
+                background-color: white;
+            }
+            
+            h1, h2 { color: #005a9c }
+
+            h2 {
+                border-top: solid 2px #005a9c;
+                margin-top: 4ex;
+            }
+
+            #container {
+                max-width: 30em;
+                margin-right: auto;
+                margin-left: 2em;
+                position:relative;
+            }
+
+            #overflow {
+                max-height: 100px;
+                overflow: scroll;
+                border: solid 1px black;
+                padding: 2em;
+            }
+
+            #whitespace {
+                border: 20px solid gray;
+                margin: 20px;
+                padding: 20px;
+            }
+
+            #reset {
+                color: blue;
+                text-decoration: none
+            }
+            #reset:hover { color: red }
+        </style>
+    </head>
+    <body>
+        <div id="container">
+            <h1 id="first-h1">Testing EPUB CFI</h1>
+            <p><a id="reset" href="/">Reset CFI to None</a></p>
+            <h2>A div with scrollbars</h2>
+            <p>Scroll down and click on some elements. Make sure to hit both
+            bold and not bold text as well as different points on the image</p>
+            <div id="overflow">But I must explain to you how all this mistaken
+                idea of denouncing pleasure and praising pain was born and I
+                will give you a complete account of the system, and expound the
+                actual teachings of the great explorer of the truth, the
+                master-builder of human happiness. No one rejects, dislikes, or
+                avoids pleasure itself, because it is pleasure, but because
+                those who do not know how to pursue pleasure rationally
+                encounter consequences that are extremely painful. Nor again is
+                there anyone who <b>loves or pursues or desires</b> to obtain
+                pain of itself, because it is pain, but because occasionally
+                circumstances occur in which toil and pain can procure him some
+                great pleasure. To take a trivial example, which of us ever
+                undertakes laborious physical exercise, except to obtain some
+                advantage from it? But who has any right to find fault with a
+                man who chooses to enjoy a pleasure that has no annoying
+                consequences, or one who avoids a pain that produces no
+                resultant pleasure? On the other hand, we denounce with
+                righteous indignation and dislike men who are so beguiled and
+                demoralized by the charms of pleasure of the moment, so blinded
+                by desire, that they cannot foresee
+                <img src="marker.png" width="150" height="200" alt="Test Image" style="border: solid 1px black; display:block"/>
+
+            </div>
+            <h2>Some entities and comments</h2>
+            <p>Entities: &amp; &copy; &sect; &gt; some text after entities</p>
+            <p>An invisible Comment: <!-- aaaaaa --> followed by some text</p>
+            <p>An invalid (in HTML) CDATA: <![CDATA[CDATA]]> followed by some text</p>
+            <h2>Margins padding borders</h2>
+            <p>Try clicking in the margins, borders and padding. CFI
+            calculation should fail.</p>
+
+            <p id="whitespace">But I must explain to you how all this mistaken
+            idea of denouncing pleasure and praising pain was born and I will
+            give you a complete account of the system, and expound the actual
+            teachings of the great explorer of the truth, the master-builder of
+            human happiness. No one rejects, dislikes, or avoids pleasure
+            itself, because it is pleasure, but because those who do not know
+            how to pursue pleasure rationally encounter consequences that are
+            extremely painful. Nor again is there anyone who <b>loves or
+                pursues or desires</b> to obtain pain of itself, because it is
+            pain, but because occasionally circumstances occur in which toil
+            and pain can procure him some great pleasure. To take a trivial
+            example, which of us ever undertakes laborious physical exercise,
+            except to obtain some advantage from it? But who has any right to
+            find fault with a man who chooses to enjoy a pleasure that has no
+            annoying consequences, or one who avoids a pain that produces no
+            resultant pleasure? On the other hand, we denounce with righteous
+            indignation and dislike men who are so beguiled and demoralized by
+            the charms of pleasure of the moment, so blinded by desire, that
+            they cannot foresee</p>
+
+            <h2>Lots of collapsed whitespace</h2>
+            <p>Try clicking the A character after the colon:             A suffix</p>
+
+            <h2>Lots of nested/sibling tags</h2>
+            <p>A <span>bunch of <span>nested<span> and</span> <span>sibling</span>
+                tags, all </span> mixed together</span>. <span>Click all</span>
+                over this paragraph to test<span> things.</span></p>
+
+            <h2>Images</h2>
+            <p>Try clicking at different points along the image. Also try changing the magnification and then hitting reload.</p>
+            <img src="marker.png" width="150" height="200" alt="Test Image" style="border: solid 1px black"/>
+
+        </div>
+        <img id="marker" style="position: absolute; display:none; z-index:10" src="marker.png" alt="Marker" />
+    </body>
+</html>
+
+
--- a/src/calibre/ebooks/oeb/display/test-cfi/marker.png
+++ b/src/calibre/ebooks/oeb/display/test-cfi/marker.png
--- a/src/calibre/ebooks/oeb/display/test-cfi/run.py
+++ b/src/calibre/ebooks/oeb/display/test-cfi/run.py
@ -10,16 +10,15 @@ __docformat__ = 'restructuredtext en'
 import os

 try:
-    from calibre.utils.coffeescript import serve
+    from calibre.utils.serve_coffee import serve
 except ImportError:
    import init_calibre
    if False: init_calibre, serve
-    from calibre.utils.coffeescript import serve
-
+    from calibre.utils.serve_coffee import serve

 def run_devel_server():
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
-    serve()
+    serve(resources={'cfi.coffee':'../cfi.coffee', '/':'index.html'})

 if __name__ == '__main__':
    run_devel_server()
--- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee
+++ b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee
@ -1,24 +0,0 @@
-#!/usr/bin/env coffee
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-###
- Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
- Released under the GPLv3 License
-###
-
-viewport_top = (node) ->
-    $(node).offset().top - window.pageYOffset
-
-viewport_left = (node) ->
-    $(node).offset().left - window.pageXOffset
-
-window.onload = ->
-    h1 = document.getElementsByTagName('h1')[0]
-    x = h1.scrollLeft + 150
-    y = viewport_top(h1) + h1.offsetHeight/2
-    e = document.elementFromPoint x, y
-    if e.getAttribute('id') != 'first-h1'
-        alert 'Failed to find top h1'
-        return
-    alert window.cfi.at x, y
-
--- a/src/calibre/ebooks/oeb/display/test/test.html
+++ b/src/calibre/ebooks/oeb/display/test/test.html
@ -1,14 +0,0 @@
-<!DOCTYPE html>
-<html>
-    <head>
-        <title>Testing CFI functionality</title>
-        <script type="text/javascript" src="../cfi.coffee"></script>
-        <script type="text/javascript" src="jquery.js"></script>
-        <script type="text/javascript" src="cfi-test.coffee"></script>
-    </head>
-    <body>
-        <h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1>
-    </body>
-</html>
-
-
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -70,9 +70,29 @@ def clone_element(elem, nsmap={}, in_context=True):
    nelem.extend(elem)
    return nelem

-def html5_parse(data):
+def node_depth(node):
+    ans = 0
+    p = node.getparent()
+    while p is not None:
+        ans += 1
+        p = p.getparent()
+    return ans
+
+def html5_parse(data, max_nesting_depth=100):
    import html5lib
+    # html5lib bug: http://code.google.com/p/html5lib/issues/detail?id=195
+    data = re.sub(r'<\s*title\s*[^>]*/\s*>', '<title></title>', data)
+
    data = html5lib.parse(data, treebuilder='lxml').getroot()
+
+    # Check that the asinine HTML 5 algorithm did not result in a tree with
+    # insane nesting depths
+    for x in data.iterdescendants():
+        if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node
+            depth = node_depth(x)
+            if depth > max_nesting_depth:
+                raise ValueError('html5lib resulted in a tree with nesting'
+                        ' depth > %d'%max_nesting_depth)
    # Set lang correctly
    xl = data.attrib.pop('xmlU0003Alang', None)
    if xl is not None and 'lang' not in data.attrib:
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -175,13 +175,27 @@ class OEBReader(object):
        manifest = self.oeb.manifest
        known = set(manifest.hrefs)
        unchecked = set(manifest.values())
+        cdoc = OEB_DOCS|OEB_STYLES
+        invalid = set()
        while unchecked:
            new = set()
            for item in unchecked:
+                data = None
+                if (item.media_type in cdoc or
+                        item.media_type[-4:] in ('/xml', '+xml')):
+                    try:
+                        data = item.data
+                    except:
+                        self.oeb.log.exception(u'Failed to read from manifest '
+                                u'entry with id: %s, ignoring'%item.id)
+                        invalid.add(item)
+                        continue
+                if data is None:
+                    continue
+
                if (item.media_type in OEB_DOCS or
-                    item.media_type[-4:] in ('/xml', '+xml')) and \
-                   item.data is not None:
-                    hrefs = [r[2] for r in iterlinks(item.data)]
+                        item.media_type[-4:] in ('/xml', '+xml')):
+                    hrefs = [r[2] for r in iterlinks(data)]
                    for href in hrefs:
                        href, _ = urldefrag(href)
                        if not href:
@ -197,7 +211,7 @@ class OEBReader(object):
                            new.add(href)
                elif item.media_type in OEB_STYLES:
                    try:
-                        urls = list(cssutils.getUrls(item.data))
+                        urls = list(cssutils.getUrls(data))
                    except:
                        urls = []
                    for url in urls:
@ -231,6 +245,9 @@ class OEBReader(object):
                added = manifest.add(id, href, media_type)
                unchecked.add(added)

+            for item in invalid:
+                self.oeb.manifest.remove(item)
+
    def _manifest_from_opf(self, opf):
        manifest = self.oeb.manifest
        for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
--- a/src/calibre/ebooks/pdf/output.py
+++ b/src/calibre/ebooks/pdf/output.py
@ -25,31 +25,30 @@ class PDFOutput(OutputFormatPlugin):
    file_type = 'pdf'

    options = set([
-                    OptionRecommendation(name='unit', recommended_value='inch',
-                        level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(),
-                        help=_('The unit of measure. Default is inch. Choices '
-                        'are %s '
-                        'Note: This does not override the unit for margins!') % UNITS.keys()),
-                    OptionRecommendation(name='paper_size', recommended_value='letter',
-                        level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(),
-                        help=_('The size of the paper. This size will be overridden when an '
-                        'output profile is used. Default is letter. Choices '
-                        'are %s') % PAPER_SIZES.keys()),
-                    OptionRecommendation(name='custom_size', recommended_value=None,
-                        help=_('Custom size of the document. Use the form widthxheight '
-                        'EG. `123x321` to specify the width and height. '
-                        'This overrides any specified paper-size.')),
-                    OptionRecommendation(name='orientation', recommended_value='portrait',
-                        level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(),
-                        help=_('The orientation of the page. Default is portrait. Choices '
-                        'are %s') % ORIENTATIONS.keys()),
-                    OptionRecommendation(name='preserve_cover_aspect_ratio',
-                        recommended_value=False,
-                        help=_('Preserve the aspect ratio of the cover, instead'
-                            ' of stretching it to fill the full first page of the'
-                            ' generated pdf.')
-                        ),
-                 ])
+        OptionRecommendation(name='unit', recommended_value='inch',
+            level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(),
+            help=_('The unit of measure. Default is inch. Choices '
+            'are %s '
+            'Note: This does not override the unit for margins!') % UNITS.keys()),
+        OptionRecommendation(name='paper_size', recommended_value='letter',
+            level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(),
+            help=_('The size of the paper. This size will be overridden when a '
+            'non default output profile is used. Default is letter. Choices '
+            'are %s') % PAPER_SIZES.keys()),
+        OptionRecommendation(name='custom_size', recommended_value=None,
+            help=_('Custom size of the document. Use the form widthxheight '
+            'EG. `123x321` to specify the width and height. '
+            'This overrides any specified paper-size.')),
+        OptionRecommendation(name='orientation', recommended_value='portrait',
+            level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(),
+            help=_('The orientation of the page. Default is portrait. Choices '
+            'are %s') % ORIENTATIONS.keys()),
+        OptionRecommendation(name='preserve_cover_aspect_ratio',
+            recommended_value=False,
+            help=_('Preserve the aspect ratio of the cover, instead'
+                ' of stretching it to fill the full first page of the'
+                ' generated pdf.')),
+        ])

    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        self.oeb = oeb_book
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@ -48,7 +48,7 @@ def get_pdf_printer(opts, for_comic=False):
    custom_size = get_custom_size(opts)

    if opts.output_profile.short_name == 'default' or \
-            opts.output_profile.width > 10000:
+            opts.output_profile.width > 9999:
        if custom_size is None:
            printer.setPaperSize(paper_size(opts.paper_size))
        else:
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@ -47,7 +47,8 @@ class PMLInput(InputFormatPlugin):
        self.log.debug('Converting PML to HTML...')
        hizer = PML_HTMLizer()
        html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path)
-        html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace'))
+        html = '<html><head><title></title></head><body>%s</body></html>'%html
+        html_stream.write(html.encode('utf-8', 'replace'))

        if pclose:
            pml_stream.close()
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -80,7 +80,7 @@ class PML_HTMLizer(object):
        'b': ('<span style="font-weight: bold;">', '</span>'),
        'l': ('<span style="font-size: 150%;">', '</span>'),
        'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
-        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
+        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><small><a href="#rfn-%s">return</a></small></div>'),
        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
    }

@ -143,7 +143,7 @@ class PML_HTMLizer(object):
        'd',
        'b',
    ]
-    
+
    NEW_LINE_EXCHANGE_STATES = {
        'h1': 'h1c',
    }
@ -230,7 +230,7 @@ class PML_HTMLizer(object):
        div = []
        span = []
        other = []
-        
+
        for key, val in state.items():
            if key in self.NEW_LINE_EXCHANGE_STATES and val[0]:
                state[self.NEW_LINE_EXCHANGE_STATES[key]] = val
@ -644,7 +644,7 @@ class PML_HTMLizer(object):
                empty_count = 0
                text = self.end_line()
                parsed.append(text)
-                
+
                # Basic indent will be set if the \t starts the line or
                # if we are in a continuing \t block.
                if basic_indent:
@ -666,7 +666,7 @@ class PML_HTMLizer(object):
                    parsed.append(self.STATES_TAGS['T'][1])
                    indent_state['T'] = False
                    adv_indent_val = ''
-                
+
                output.append(u''.join(parsed))
            line.close()

@ -677,7 +677,7 @@ class PML_HTMLizer(object):
    def get_toc(self):
        '''
        Toc can have up to 5 levels, 0 - 4 inclusive.
-        
+
        This function will add items to their appropriate
        depth in the TOC tree. If the specified depth is
        invalid (item would not have a valid parent) add
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
    name        = 'TXT Input'
    author      = 'John Schember'
    description = 'Convert TXT files to HTML'
-    file_types  = set(['txt', 'txtz', 'text'])
+    file_types  = set(['txt', 'txtz', 'text', 'md', 'textile', 'markdown'])

    options = set([
        OptionRecommendation(name='paragraph_type', recommended_value='auto',
@ -77,6 +77,11 @@ class TXTInput(InputFormatPlugin):
                        txt += tf.read() + '\n\n'
        else:
            txt = stream.read()
+            if file_ext in {'md', 'textile', 'markdown'}:
+                options.formatting_type = {'md': 'markdown'}.get(file_ext, file_ext)
+                log.info('File extension indicates particular formatting. '
+                        'Forcing formatting type to: %s'%options.formatting_type)
+                options.paragraph_type = 'off'

        # Get the encoding of the document.
        if options.input_encoding:
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -132,7 +132,7 @@ def _config(): # {{{
    c.add_opt('LRF_ebook_viewer_options', default=None,
              help=_('Options for the LRF ebook viewer'))
    c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
-        'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB'],
+        'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB', 'HTMLZ'],
              help=_('Formats that are viewed using the internal viewer'))
    c.add_opt('column_map', default=ALL_COLUMNS,
              help=_('Columns to be displayed in the book list'))
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -91,7 +91,7 @@ class AddAction(InterfaceAction):
                _('Are you sure'),
            _('Are you sure you want to add the same'
                ' files to all %d books? If the format'
-                'already exists for a book, it will be replaced.')%len(ids)):
+                ' already exists for a book, it will be replaced.')%len(ids)):
                return

        books = choose_files(self.gui, 'add formats dialog dir',
--- a/src/calibre/gui2/actions/copy_to_library.py
+++ b/src/calibre/gui2/actions/copy_to_library.py
@ -175,7 +175,6 @@ class CopyToLibraryAction(InterfaceAction):
            return error_dialog(self.gui, _('No library'),
                    _('No library found at %s')%loc, show=True)

-
        self.pd = ProgressDialog(_('Copying'), min=0, max=len(ids)-1,
                parent=self.gui, cancelable=False)

--- a/src/calibre/gui2/actions/tweak_epub.py
+++ b/src/calibre/gui2/actions/tweak_epub.py
@ -10,12 +10,13 @@ import os
 from calibre.gui2 import error_dialog
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2.dialogs.tweak_epub import TweakEpub
+from calibre.utils.config import tweaks

 class TweakEpubAction(InterfaceAction):

    name = 'Tweak ePub'
-    action_spec = (_('Tweak ePub'), 'trim.png',
-            _('Make small changes to ePub format books'),
+    action_spec = (_('Tweak Book'), 'trim.png',
+            _('Make small changes to ePub or HTMLZ format books'),
            _('T'))
    dont_add_to = frozenset(['context-menu-device'])
    action_type = 'current'
@ -26,33 +27,48 @@ class TweakEpubAction(InterfaceAction):
    def edit_epub_in_situ(self, *args):
        row = self.gui.library_view.currentIndex()
        if not row.isValid():
-            return error_dialog(self.gui, _('Cannot tweak ePub'),
+            return error_dialog(self.gui, _('Cannot tweak Book'),
                    _('No book selected'), show=True)

-        # Confirm 'EPUB' in formats
        book_id = self.gui.library_view.model().id(row)
+
+        # Confirm 'EPUB' in formats
        try:
            path_to_epub = self.gui.library_view.model().db.format(
                    book_id, 'EPUB', index_is_id=True, as_path=True)
        except:
            path_to_epub = None

-        if not path_to_epub:
-            return error_dialog(self.gui, _('Cannot tweak ePub'),
-                    _('No ePub available. First convert the book to ePub.'),
+        # Confirm 'HTMLZ' in formats
+        try:
+            path_to_htmlz = self.gui.library_view.model().db.format(
+                    book_id, 'HTMLZ', index_is_id=True, as_path=True)
+        except:
+            path_to_htmlz = None
+
+        if not path_to_epub and not path_to_htmlz:
+            return error_dialog(self.gui, _('Cannot tweak Book'),
+                    _('The book must be in ePub or HTMLZ format to tweak.'
+                        '\n\nFirst convert the book to ePub or HTMLZ.'),
                    show=True)

        # Launch modal dialog waiting for user to tweak or cancel
-        dlg = TweakEpub(self.gui, path_to_epub)
+        if tweaks['tweak_book_prefer'] == 'htmlz':
+            path_to_book = path_to_htmlz or path_to_epub
+        else:
+            path_to_book = path_to_epub or path_to_htmlz
+
+        dlg = TweakEpub(self.gui, path_to_book)
        if dlg.exec_() == dlg.Accepted:
            self.update_db(book_id, dlg._output)
        dlg.cleanup()
-        os.remove(path_to_epub)
+        os.remove(path_to_book)

    def update_db(self, book_id, rebuilt):
        '''
        Update the calibre db with the tweaked epub
        '''
-        self.gui.library_view.model().db.add_format(book_id, 'EPUB',
+        fmt = os.path.splitext(rebuilt)[1][1:].upper()
+        self.gui.library_view.model().db.add_format(book_id, fmt,
                open(rebuilt, 'rb'), index_is_id=True)

--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -14,7 +14,7 @@ from calibre.constants import isosx
 from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
        open_local_file, info_dialog
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
-from calibre.utils.config import prefs
+from calibre.utils.config import prefs, tweaks
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.gui2.actions import InterfaceAction

@ -239,6 +239,7 @@ class ViewAction(InterfaceAction):

    def update_history(self, views, remove=frozenset()):
        db = self.gui.current_db
+        vh = tweaks['gui_view_history_size']
        if views:
            seen = set()
            history = []
@ -247,12 +248,12 @@ class ViewAction(InterfaceAction):
                    seen.add(title)
                    history.append((id_, title))

-            db.prefs['gui_view_history'] = history[:10]
+            db.prefs['gui_view_history'] = history[:vh]
            self.build_menus(db)
        if remove:
            history = db.prefs.get('gui_view_history', [])
            history = [x for x in history if x[0] not in remove]
-            db.prefs['gui_view_history'] = history[:10]
+            db.prefs['gui_view_history'] = history[:vh]
            self.build_menus(db)

    def _view_books(self, rows):
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@ -14,7 +14,7 @@ from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.metadata import MetaInformation
 from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
 from calibre.utils.config import prefs
-from calibre import prints
+from calibre import prints, force_unicode, as_unicode

 single_shot = partial(QTimer.singleShot, 75)

@ -66,7 +66,8 @@ class RecursiveFind(QThread): # {{{
            if self.canceled:
                return
            self.update.emit(
-                    _('Searching in')+' '+dirpath[0])
+                    _('Searching in')+' '+force_unicode(dirpath[0],
+                        filesystem_encoding))
            self.books += list(self.db.find_books_in_directory(dirpath[0],
                                            self.single_book_per_directory))

@ -82,10 +83,7 @@ class RecursiveFind(QThread): # {{{
            except Exception as err:
                import traceback
                traceback.print_exc()
-                try:
-                    msg = unicode(err)
-                except:
-                    msg = repr(err)
+                msg = as_unicode(err)
                self.found.emit(msg)
                return

--- a/src/calibre/gui2/convert/init.py
+++ b/src/calibre/gui2/convert/init.py
@ -43,6 +43,9 @@ class Widget(QWidget):
    ICON  = I('config.png')
    HELP  = ''
    COMMIT_NAME = None
+    # If True, leading and trailing spaces are removed from line and text edit
+    # fields
+    STRIP_TEXT_FIELDS = True

    changed_signal = pyqtSignal()
    set_help = pyqtSignal(object)
@ -77,7 +80,6 @@ class Widget(QWidget):
                    self._options, only_existing=True)
            defaults.update(specifics)

-
        self.apply_recommendations(defaults)
        self.setup_help(get_help)

@ -124,7 +126,6 @@ class Widget(QWidget):
            if name in getattr(recs, 'disabled_options', []):
                gui_opt.setDisabled(True)

-
    def get_value(self, g):
        from calibre.gui2.convert.xpath_wizard import XPathEdit
        from calibre.gui2.convert.regex_builder import RegexEdit
@ -136,7 +137,9 @@ class Widget(QWidget):
            return g.value()
        elif isinstance(g, (QLineEdit, QTextEdit)):
            func = getattr(g, 'toPlainText', getattr(g, 'text', None))()
-            ans = unicode(func).strip()
+            ans = unicode(func)
+            if self.STRIP_TEXT_FIELDS:
+                ans = ans.strip()
            if not ans:
                ans = None
            return ans
--- a/src/calibre/gui2/convert/pdf_output.py
+++ b/src/calibre/gui2/convert/pdf_output.py
@ -18,14 +18,14 @@ class PluginWidget(Widget, Ui_Form):
    ICON = I('mimetypes/pdf.png')

    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
-        Widget.__init__(self, parent, ['paper_size',
+        Widget.__init__(self, parent, ['paper_size', 'custom_size',
            'orientation', 'preserve_cover_aspect_ratio'])
        self.db, self.book_id = db, book_id
-        
+
        for x in get_option('paper_size').option.choices:
            self.opt_paper_size.addItem(x)
        for x in get_option('orientation').option.choices:
            self.opt_orientation.addItem(x)
-        
+
        self.initialize_options(get_option, get_help, db, book_id)
-        
+
--- a/src/calibre/gui2/convert/pdf_output.ui
+++ b/src/calibre/gui2/convert/pdf_output.ui
@ -40,7 +40,7 @@
   <item row="1" column="1">
    <widget class="QComboBox" name="opt_orientation"/>
   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -53,13 +53,26 @@
     </property>
    </spacer>
   </item>
-   <item row="2" column="0" colspan="2">
+   <item row="3" column="0" colspan="2">
    <widget class="QCheckBox" name="opt_preserve_cover_aspect_ratio">
     <property name="text">
      <string>Preserve &amp;aspect ratio of cover</string>
     </property>
    </widget>
   </item>
+   <item row="2" column="0">
+    <widget class="QLabel" name="label_3">
+     <property name="text">
+      <string>&amp;Custom size:</string>
+     </property>
+     <property name="buddy">
+      <cstring>opt_custom_size</cstring>
+     </property>
+    </widget>
+   </item>
+   <item row="2" column="1">
+    <widget class="QLineEdit" name="opt_custom_size"/>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/convert/search_and_replace.py
+++ b/src/calibre/gui2/convert/search_and_replace.py
@ -6,8 +6,6 @@ __docformat__ = 'restructuredtext en'

 import re

-from PyQt4.Qt import QLineEdit, QTextEdit
-
 from calibre.gui2.convert.search_and_replace_ui import Ui_Form
 from calibre.gui2.convert import Widget
 from calibre.gui2 import error_dialog
@ -18,6 +16,7 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
    HELP  = _('Modify the document text and structure using user defined patterns.')
    COMMIT_NAME = 'search_and_replace'
    ICON = I('search.png')
+    STRIP_TEXT_FIELDS = False

    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
@ -74,13 +73,5 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
                             _('Invalid regular expression: %s')%err, show=True)
                return False
        return True
-    
-    def get_vaule(self, g):
-        if isinstance(g, (QLineEdit, QTextEdit)):
-            func = getattr(g, 'toPlainText', getattr(g, 'text', None))()
-            ans = unicode(func)
-            if not ans:
-                ans = None
-            return ans
-        else:
-            return Widget.get_value(self, g)
+
+
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -751,7 +751,7 @@ class DeviceMixin(object): # {{{
                error_dialog(self, _('Error talking to device'),
                             _('There was a temporary error talking to the '
                             'device. Please unplug and reconnect the device '
-                             'and or reboot.')).show()
+                             'or reboot.')).show()
                return
        except:
            pass
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@ -847,7 +847,7 @@ Future conversion of these books will use the default settings.</string>
              </sizepolicy>
             </property>
             <property name="toolTip">
-              <string>Enter the what you are looking for, either plain text or a regular expression, depending on the mode</string>
+              <string>Enter what you are looking for, either plain text or a regular expression, depending on the mode</string>
             </property>
            </widget>
           </item>
--- a/src/calibre/gui2/dialogs/tag_categories.ui
+++ b/src/calibre/gui2/dialogs/tag_categories.ui
@ -118,7 +118,7 @@
   <item row="0" column="3">
    <widget class="QToolButton" name="rename_category_button">
     <property name="toolTip">
-      <string>Rename the current category to the what is in the box</string>
+      <string>Rename the current category to what is in the box</string>
     </property>
     <property name="text">
      <string>...</string>
--- a/src/calibre/gui2/dialogs/template_dialog.ui
+++ b/src/calibre/gui2/dialogs/template_dialog.ui
@ -77,7 +77,7 @@
        <cstring>template_value</cstring>
       </property>
       <property name="toolTip">
-        <string>The value the of the template using the current book in the library view</string>
+        <string>The value of the template using the current book in the library view</string>
       </property>
      </widget>
     </item>
--- a/src/calibre/gui2/dialogs/tweak_epub.py
+++ b/src/calibre/gui2/dialogs/tweak_epub.py
@ -7,6 +7,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import os, shutil
+from itertools import repeat, izip
 from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED

 from PyQt4.Qt import QDialog
@ -30,9 +31,20 @@ class TweakEpub(QDialog, Ui_Dialog):
        self._epub = epub
        self._exploded = None
        self._output = None
+        self.ishtmlz = epub.lower().endswith('.htmlz')
+        self.rebuilt_name = 'rebuilt.' + ('htmlz' if self.ishtmlz else 'epub')

        # Run the dialog setup generated from tweak_epub.ui
        self.setupUi(self)
+        for x, props in [(self, ['windowTitle']), (self.label, ['text'])]+\
+                list(izip([self.cancel_button, self.explode_button,
+                    self.rebuild_button, self.preview_button],
+                    repeat(['text', 'statusTip', 'toolTip']))):
+            for prop in props:
+                val = unicode(getattr(x, prop)())
+                val = val.format('HTMLZ' if self.ishtmlz else 'ePub')
+                prop = 'set' + prop[0].upper() + prop[1:]
+                getattr(x, prop)(val)

        self.cancel_button.clicked.connect(self.reject)
        self.explode_button.clicked.connect(self.explode)
@ -83,9 +95,11 @@ class TweakEpub(QDialog, Ui_Dialog):
    def do_rebuild(self, src):
        with ZipFile(src, 'w', compression=ZIP_DEFLATED) as zf:
            # Write mimetype
-            zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED)
+            mt = os.path.join(self._exploded, 'mimetype')
+            if os.path.exists(mt):
+                zf.write(mt, 'mimetype', compress_type=ZIP_STORED)
            # Write everything else
-            exclude_files = ['.DS_Store','mimetype','iTunesMetadata.plist','rebuilt.epub']
+            exclude_files = ['.DS_Store','mimetype','iTunesMetadata.plist',self.rebuilt_name]
            for root, dirs, files in os.walk(self._exploded):
                for fn in files:
                    if fn in exclude_files:
@ -97,11 +111,11 @@ class TweakEpub(QDialog, Ui_Dialog):

    def preview(self):
        if not self._exploded:
-            return error_dialog(self, _('Cannot preview'),
-                    _('You must first explode the epub before previewing.'),
-                    show=True)
+            msg = _('You must first explode the %s before previewing.')
+            msg = msg%('HTMLZ' if self.ishtmlz else 'ePub')
+            return error_dialog(self, _('Cannot preview'), msg, show=True)

-        tf = PersistentTemporaryFile('.epub')
+        tf = PersistentTemporaryFile('.htmlz' if self.ishtmlz else '.epub')
        tf.close()
        self._preview_files.append(tf.name)

@ -110,7 +124,7 @@ class TweakEpub(QDialog, Ui_Dialog):
        self.gui.iactions['View']._view_file(tf.name)

    def rebuild(self, *args):
-        self._output = os.path.join(self._exploded, 'rebuilt.epub')
+        self._output = os.path.join(self._exploded, self.rebuilt_name)
        self.do_rebuild(self._output)
        return QDialog.accept(self)

--- a/src/calibre/gui2/dialogs/tweak_epub.ui
+++ b/src/calibre/gui2/dialogs/tweak_epub.ui
@ -14,7 +14,7 @@
   </rect>
  </property>
  <property name="windowTitle">
-   <string>Tweak ePub</string>
+   <string>Tweak {0}</string>
  </property>
  <property name="sizeGripEnabled">
   <bool>false</bool>
@ -26,7 +26,7 @@
   <item row="0" column="0" colspan="2">
    <widget class="QLabel" name="label">
     <property name="text">
-      <string>&lt;p&gt;Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window &lt;b&gt;and the editor windows you used to edit files in the epub&lt;/b&gt;.&lt;/p&gt;&lt;p&gt;Rebuild the ePub, updating your calibre library.&lt;/p&gt;</string>
+      <string>&lt;p&gt;Explode the {0} to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window &lt;b&gt;and the editor windows you used to edit files in the ePub&lt;/b&gt;.&lt;/p&gt;&lt;p&gt;Rebuild the ePub, updating your calibre library.&lt;/p&gt;</string>
     </property>
     <property name="wordWrap">
      <bool>true</bool>
@ -35,11 +35,14 @@
   </item>
   <item row="1" column="0">
    <widget class="QPushButton" name="explode_button">
+     <property name="toolTip">
+      <string>Display contents of exploded {0}</string>
+     </property>
     <property name="statusTip">
-      <string>Display contents of exploded ePub</string>
+      <string>Display contents of exploded {0}</string>
     </property>
     <property name="text">
-      <string>&amp;Explode ePub</string>
+      <string>&amp;Explode {0}</string>
     </property>
     <property name="icon">
      <iconset resource="../../../../resources/images.qrc">
@ -49,6 +52,9 @@
   </item>
   <item row="3" column="0">
    <widget class="QPushButton" name="cancel_button">
+     <property name="toolTip">
+      <string>Discard changes</string>
+     </property>
     <property name="statusTip">
      <string>Discard changes</string>
     </property>
@ -66,11 +72,14 @@
     <property name="enabled">
      <bool>false</bool>
     </property>
+     <property name="toolTip">
+      <string>Rebuild {0} from exploded contents</string>
+     </property>
     <property name="statusTip">
-      <string>Rebuild ePub from exploded contents</string>
+      <string>Rebuild {0} from exploded contents</string>
     </property>
     <property name="text">
-      <string>&amp;Rebuild ePub</string>
+      <string>&amp;Rebuild {0}</string>
     </property>
     <property name="icon">
      <iconset resource="../../../../resources/images.qrc">
@ -81,7 +90,7 @@
   <item row="1" column="1">
    <widget class="QPushButton" name="preview_button">
     <property name="text">
-      <string>&amp;Preview ePub</string>
+      <string>&amp;Preview {0}</string>
     </property>
     <property name="icon">
      <iconset resource="../../../../resources/images.qrc">
--- a/src/calibre/gui2/preferences/conversion.py
+++ b/src/calibre/gui2/preferences/conversion.py
@ -117,6 +117,6 @@ if __name__ == '__main__':
    from PyQt4.Qt import QApplication
    app = QApplication([])
    #test_widget('Conversion', 'Input Options')
-    test_widget('Conversion', 'Common Options')
-    #test_widget('Conversion', 'Output Options')
+    #test_widget('Conversion', 'Common Options')
+    test_widget('Conversion', 'Output Options')

--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@ -20,6 +20,7 @@ from calibre.gui2 import (NONE, error_dialog, info_dialog, choose_files,
        question_dialog, gprefs)
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.utils.icu import lower
+from calibre.constants import iswindows

 class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{

@ -272,8 +273,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.modify_plugin(op='remove')

    def add_plugin(self):
+        info = '' if iswindows else ' [.zip %s]'%_('files')
        path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
-                filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False,
+                filters=[(_('Plugins') + info, ['zip'])], all_files=False,
                    select_only_single_file=True)
        if not path:
            return
--- a/src/calibre/gui2/preferences/server.py
+++ b/src/calibre/gui2/preferences/server.py
@ -63,16 +63,21 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):

    def start_server(self):
        ConfigWidgetBase.commit(self)
-        self.gui.start_content_server(check_started=False)
-        while not self.gui.content_server.is_running and self.gui.content_server.exception is None:
-            time.sleep(1)
-        if self.gui.content_server.exception is not None:
-            error_dialog(self, _('Failed to start content server'),
-                    as_unicode(self.gui.content_server.exception)).exec_()
-            return
-        self.start_button.setEnabled(False)
-        self.test_button.setEnabled(True)
-        self.stop_button.setEnabled(True)
+        self.setCursor(Qt.BusyCursor)
+        try:
+            self.gui.start_content_server(check_started=False)
+            while (not self.gui.content_server.is_running and
+                    self.gui.content_server.exception is None):
+                time.sleep(0.1)
+            if self.gui.content_server.exception is not None:
+                error_dialog(self, _('Failed to start content server'),
+                        as_unicode(self.gui.content_server.exception)).exec_()
+                return
+            self.start_button.setEnabled(False)
+            self.test_button.setEnabled(True)
+            self.stop_button.setEnabled(True)
+        finally:
+            self.unsetCursor()

    def stop_server(self):
        self.gui.content_server.threaded_exit()
--- a/src/calibre/gui2/preferences/toolbar.py
+++ b/src/calibre/gui2/preferences/toolbar.py
@ -17,10 +17,10 @@ from calibre.gui2.preferences import ConfigWidgetBase, test_widget

 class FakeAction(object):

-    def __init__(self, name, icon, tooltip=None,
+    def __init__(self, name, gui_name, icon, tooltip=None,
            dont_add_to=frozenset([]), dont_remove_from=frozenset([])):
        self.name = name
-        self.action_spec = (name, icon, tooltip, None)
+        self.action_spec = (gui_name, icon, tooltip, None)
        self.dont_remove_from = dont_remove_from
        self.dont_add_to = dont_add_to

@ -28,17 +28,18 @@ class BaseModel(QAbstractListModel):

    def name_to_action(self, name, gui):
        if name == 'Donate':
-            return FakeAction(_('Donate'), 'donate.png',
+            return FakeAction('Donate', _('Donate'), 'donate.png',
                    dont_add_to=frozenset(['context-menu',
                        'context-menu-device']))
        if name == 'Location Manager':
-            return FakeAction(_('Location Manager'), None,
+            return FakeAction('Location Manager', _('Location Manager'), 'reader.png',
                    _('Switch between library and device views'),
                    dont_add_to=frozenset(['menubar', 'toolbar',
                        'toolbar-child', 'context-menu',
                        'context-menu-device']))
        if name is None:
-            return FakeAction('--- '+_('Separator')+' ---', None,
+            return FakeAction('--- '+('Separator')+' ---',
+                    '--- '+_('Separator')+' ---', None,
                    dont_add_to=frozenset(['menubar', 'menubar-device']))
        try:
            return gui.iactions[name]
@ -314,7 +315,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        if not pref_in_toolbar and not pref_in_menubar:
            self.models['menubar'][1].add(['Preferences'])
        if not lm_in_toolbar and not lm_in_menubar:
-            self.models['menubar-device'][1].add(['Location Manager'])
+            m = self.models['toolbar-device'][1]
+            m.add(['Location Manager'])
+            m.move(m.index(m.rowCount(None)-1), 5-m.rowCount(None))

        # Save data.
        for am, cm in self.models.values():
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -368,9 +368,14 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
                self.library_view.model().db, server_config().parse())
        self.content_server.state_callback = Dispatcher(
                self.iactions['Connect Share'].content_server_state_changed)
-        self.content_server.state_callback(True)
        if check_started:
-            QTimer.singleShot(10000, self.test_server)
+            self.content_server.start_failure_callback = \
+                Dispatcher(self.content_server_start_failed)
+
+    def content_server_start_failed(self, msg):
+        error_dialog(self, _('Failed to start Content Server'),
+                _('Could not start the content server. Error:\n\n%s')%msg,
+                show=True)

    def resizeEvent(self, ev):
        MainWindow.resizeEvent(self, ev)
--- a/Show More
+++ b/Show More