Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2010-05-04 18:00:58 -04:00 · 2010-05-04 18:00:58 -04:00 · e3aafa1789
commit e3aafa1789
parent 0779e8c3a0 7f3f216fca
83 changed files with 10751 additions and 7473 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,104 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.6.51
+  date: 2010-04-30
+
+  bug fixes:
+    - title: "Fix regression that broke EPUB output in 0.6.50 when converting lists"
+
+
+- version: 0.6.50
+  date: 2010-04-30
+
+  new features:
+    - title: "Add merge book feature"
+      type: major
+      desc: >
+        "You can now merge multiple books into a single book, by clicking the arrow next to the edit meta information button.
+        Meta information from the books will be merged as well as individual book files in different formats"
+
+    - title: "Support for the Samsung Galaxy Spica and the Palm Pre"
+
+    - title: "Add a 'Go to' context menu to the ebook viewer"
+      tickets: [1230]
+
+    - title: "Show an asterisk next to version number when user is using CALIBRE_DEVELOP_FROM"
+      tickets: [5417]
+
+    - title: "Import ComicBookLover metadata from CBZ files"
+      tickets: [5402]
+
+    - title: "Add keyboard shortcut for viewing a specific format"
+      tickets: [5408]
+
+    - title: "EPUB Output: Add option to not use SVG for covers. Useful if you want to generate an EPUB for devices like the iPhone or JetBook Lite that don't support SVG covers"
+      tickets: [5409]
+
+    - title: "In the book info display area, only show series and tags if there are any. Move comments to the bottom."
+      tickets: [5391]
+
+  bug fixes:
+    - title: "E-book viewer: Use the Qt API to set document padding during next page operation, instead of javascript."
+      tickets: [5343]
+
+    - title: "E-book viewer: Handle self-closing heading tags in XHTML documents correctly."
+      tickets: [5413]
+
+    - title: "Conversion pipeline: Ignore CSS pseudo selectors"
+      tickets: [5337]
+
+    - title: "MOBI Input: Ignore form tags"
+      tickets: [5378]
+
+    - title: "Handle a scheduled custom recipe being deleted gracefully"
+      tickets: [5366]
+
+    - title: "ebook-convert: Don't rename PNG covers to JPG"
+      tickets: [5379]
+
+    - title: "Conversion pipeline: Respect width and height attributes in addition to width and height in CSS"
+
+    - title: "Fix regression which broke the use of HTML files in the regexp testing wizard"
+      tickets: [5341]
+
+    - title: "Fix match highlighting for multi-line regexps in the regexp testing wizard"
+      tickets: [5414]
+
+    - title: "EPUB Output: Workaround Adobe Digital Editions bug in rendering of lists with a left margin set."
+      tickets: [5415]
+
+    - title: "PRS 505/600/700/300 driver: Don't give an error message when editing metadata on SD card and cache directory does not exist"
+      tickets: [5410]
+
+    - title: "When converting EPUB to EPUB multiple times and creating book jacket from metadata, if an existing book jacket is found,
+             replace it. This will only work with EPUBs created with the current release onwards"
+
+    - title: "Correctly handle HTML in comments"
+      tickets: [5237]
+
+    - title: "Kindle driver: When transferring files whose names start witha  period, replace the period."
+      tickets: [5367]
+
+    - title: "Conversion pipeline: When decoding XML (but not XHTML) if no encoding is specified, assume utf-8. Make entity conversion more robust. When splitting html handle ids with quotes in them correctly"
+
+  new recipes:
+    - title: The Old New Thing, Berlingske, ABC, Ultima Hora, China Daily, Dani
+      author: Darko Miletic
+
+    - title: Observa Digital
+      author: yrvn
+
+    - title: "Bill O'Reilly and Sean Hannity"
+      author: Rob Lammert
+
+  improved recipes:
+    - PC Magazine
+    - Reuters
+    - Sueddeutsche Zeitung
+    - "il Sole 24 Ore"
+    - La Repubblica
+
 - version: 0.6.49
  date: 2010-04-23

--- a/resources/images/news/abc_py.png
+++ b/resources/images/news/abc_py.png
--- a/resources/images/news/berlingske_dk.png
+++ b/resources/images/news/berlingske_dk.png
--- a/resources/images/news/chinadaily.png
+++ b/resources/images/news/chinadaily.png
--- a/resources/images/news/oldnewthing.png
+++ b/resources/images/news/oldnewthing.png
--- a/resources/images/news/ultimahora.png
+++ b/resources/images/news/ultimahora.png
--- a/resources/recipes/abc_py.recipe
+++ b/resources/recipes/abc_py.recipe
@ -0,0 +1,49 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+abc.com.py
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ABC_py(BasicNewsRecipe):
+    title                 = 'ABC digital'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias de Paraguay y el resto del mundo'
+    publisher             = 'ABC'
+    category              = 'news, politics, Paraguay'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es_PY'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags       = [dict(name=['form','iframe','embed','object','link','base','table']),dict(attrs={'class':'toolbox'})]
+    remove_tags_after = dict(attrs={'class':'date'})
+    keep_only_tags    = [dict(attrs={'class':'zcontent'})]
+
+
+    feeds = [
+               (u'Ultimo momento'      , u'http://www.abc.com.py/ultimo-momento.xml'      )
+              ,(u'Nacionales'          , u'http://www.abc.com.py/nacionales.xml'          )
+              ,(u'Internacionales'     , u'http://www.abc.com.py/internacionales.xml'     )
+              ,(u'Deportes'            , u'http://www.abc.com.py/deportes.xml'            )
+              ,(u'Espectaculos'        , u'http://www.abc.com.py/espectaculos.xml'        )
+              ,(u'Ciencia y Tecnologia', u'http://www.abc.com.py/ciencia-y-tecnologia.xml')
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/adnkronos.recipe
+++ b/resources/recipes/adnkronos.recipe
@ -0,0 +1,59 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Gabriele Marini, based on Darko Miletic'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
+description   = 'Italian daily newspaper - 02-05-2010'
+
+'''
+http://www.adnkronos.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Adnkronos(BasicNewsRecipe):
+    __author__        = 'Gabriele Marini'
+    description   = 'News agency'
+    cover_url      = 'http://www.adnkronos.com/IGN6/img/popup_ign.jpg'
+    title          = u'Adnkronos'
+    publisher      = 'Adnkronos Group - ews agency'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 7
+    max_articles_per_feed = 80
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+    def get_article_url(self, article):
+        link = article.get('id', article.get('guid', None))
+        return link
+
+    extra_css = ' .newsAbstract{font-style: italic} '
+    keep_only_tags     = [dict(name='div', attrs={'class':['breadCrumbs','newsTop','newsText']})
+                         ]
+
+
+    remove_tags        = [
+                            dict(name='div', attrs={'class':['leogoo','leogoo2']})
+                         ]
+
+
+    feeds          = [
+                       (u'Prima Pagina', u'http://rss.adnkronos.com/RSS_PrimaPagina.xml'),
+                       (u'Ultima Ora', u'http://rss.adnkronos.com/RSS_Ultimora.xml'),
+                       (u'Politica', u'http://rss.adnkronos.com/RSS_Politica.xml'),
+                       (u'Esteri', u'http://rss.adnkronos.com/RSS_Esteri.xml'),
+                       (u'Cronoca', u'http://rss.adnkronos.com/RSS_Cronaca.xml'),
+                       (u'Economia', u'http://rss.adnkronos.com/RSS_Economia.xml'),
+                       (u'Finanza', u'http://rss.adnkronos.com/RSS_Finanza.xml'),
+                       (u'CyberNews', u'http://rss.adnkronos.com/RSS_CyberNews.xml'),
+                       (u'Spettacolo', u'http://rss.adnkronos.com/RSS_Spettacolo.xml'),
+                       (u'Cultura', u'http://rss.adnkronos.com/RSS_Cultura.xml'),
+                       (u'Sport', u'http://rss.adnkronos.com/RSS_Sport.xml'),
+                       (u'Sostenibilita', u'http://rss.adnkronos.com/RSS_Sostenibilita.xml'),
+                       (u'Salute', u'http://rss.adnkronos.com/RSS_Salute.xml')
+                      ]
+
--- a/resources/recipes/berlingske_dk.recipe
+++ b/resources/recipes/berlingske_dk.recipe
@ -0,0 +1,49 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+berlingske.dk
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Berlingske_dk(BasicNewsRecipe):
+    title                 = 'Berlingske Tidende'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Denmark'
+    publisher             = 'berlingske.dk'
+    category              = 'news, politics, Denmark'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = False
+    publication_type      = 'newspaper'
+    encoding              = 'utf8'
+    language              = 'da'
+    masthead_url          = 'http://www.berlingske.dk/sites/all/themes/bm/img/layout/masthead_bg.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } h1,.manchet,.byline{font-family: Cambria,Georgia,Times,"Times New Roman",serif } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [
+                            (u'Breaking news' , u'http://www.berlingske.dk/breaking/rss'          )
+                           ,(u'Seneste nyt'   , u'http://www.berlingske.dk/seneste/rss'           )
+                           ,(u'Topnyheder'    , u'http://www.berlingske.dk/top/rss'               )
+                           ,(u'Danmark'       , u'http://www.berlingske.dk/danmark/seneste/rss'   )
+                           ,(u'Verden'        , u'http://www.berlingske.dk/verden/seneste/rss'    )
+                           ,(u'Klima'         , u'http://www.berlingske.dk/klima/seneste/rss'     )
+                           ,(u'Debat'         , u'http://www.berlingske.dk/debat/seneste/rss'     )
+                           ,(u'Koebenhavn'    , u'http://www.berlingske.dk/koebenhavn/seneste/rss')
+                           ,(u'Politik'       , u'http://www.berlingske.dk/politik/seneste/rss'   )
+                           ,(u'Kultur'        , u'http://www.berlingske.dk/kultur/seneste/rss'    )
+                          ]
+
+    keep_only_tags     = [dict(attrs={'class':['first','pt-article']})]
+    remove_tags        = [dict(name=['object','link','base','iframe','embed'])]
+
--- a/resources/recipes/chinadaily.recipe
+++ b/resources/recipes/chinadaily.recipe
@ -0,0 +1,48 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.chinadaily.com.cn
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Pagina12(BasicNewsRecipe):
+    title                 = 'China Daily'
+    __author__            = 'Darko Miletic'
+    description           = 'Chinadaily.com.cn is the largest English portal in China, providing news, business information, BBS, learning materials.'
+    publisher             = 'China Daily Information Co.'
+    category              = 'news, politics, China'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en_CN'
+    remove_empty_feeds    = True
+    publication_type      = 'newsportal'
+    masthead_url          = 'http://www.chinadaily.com.cn/15421.files/chinadailylogo_e_20100301.jpg'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [dict(name=['object','embed','iframe','table'])]
+    keep_only_tags = [dict(attrs={'id':['Title_e','Content']})]
+
+
+    feeds = [
+              (u'China'   , u'http://www.chinadaily.com.cn/rss/china_rss.xml'   )
+             ,(u'Business', u'http://www.chinadaily.com.cn/rss/bizchina_rss.xml')
+             ,(u'World'   , u'http://www.chinadaily.com.cn/rss/world_rss.xml'   )
+             ,(u'Sports'  , u'http://www.chinadaily.com.cn/rss/sports_rss.xml'  )
+             ,(u'Opinions', u'http://www.chinadaily.com.cn/rss/opinion_rss.xml' )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/el_observador.recipe
+++ b/resources/recipes/el_observador.recipe
@ -0,0 +1,57 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Yuri Alvarez<me at yurialvarez.com>'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+'''
+observa.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ObservaDigital(BasicNewsRecipe):
+    title                 = 'Observa Digital'
+    __author__            = 'yrvn'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['contenido'])]
+    remove_tags = [
+                     dict(name='div', attrs={'id':'contenedorVinculadas'}),
+             dict(name='p', attrs={'id':'nota_firma'}),
+             dict(name=['object','link'])
+                  ]
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+               (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
+           (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
+           (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
+           (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
+        ]
+
+    def get_cover_url(self):
+        index = 'http://www.observa.com.uy/'
+        soup = self.index_to_soup(index)
+        for image in soup.findAll('img',alt=True):
+           if image['alt'].startswith('Tapa El Observador'):
+              return image['src'].rstrip('b.jpg') + '.jpg'
+        return None
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/il_giornale.recipe
+++ b/resources/recipes/il_giornale.recipe
@ -0,0 +1,60 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Gabriele Marini, based on Darko Miletic'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
+description   = 'Italian daily newspaper - 19-04-2010'
+
+'''
+http://www.ilgiornale.it/
+'''
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class IlGiornale(BasicNewsRecipe):
+    __author__        = 'Marini Gabriele'
+    description   = 'Italian daily newspaper'
+
+    cover_url      = 'http://www.ilgiornale.it/img_v1/logo.gif'
+    title          = u'Il Giornale'
+    publisher      = 'Il Giornale ON-LINE S.r.l.'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 7
+    max_articles_per_feed = 50
+    use_embedded_content  = False
+    recursion             = 100
+
+    no_stylesheets        = True
+    conversion_options = {'linearize_tables':True}
+    remove_javascript = True
+
+
+    def get_article_url(self, article):
+        return article.get('guid', article.get('id', None))
+
+    def print_version(self, url):
+        raw = self.browser.open(url).read()
+        soup = BeautifulSoup(raw.decode('utf8', 'replace'))
+        all_print_tags = soup.find('div', {'style':'float:left; width:35%;'})
+        print_link = all_print_tags.contents[1]
+        if all_print_tags is None:
+           return url
+        return  print_link['href']
+
+
+    feeds = [
+             (u'Ultime Notizie',u'http://www.ilgiornale.it/?RSS=S'),
+             (u'All\'Interno', u'http://www.ilgiornale.it/la_s.pic1?SID=8&RSS=S'),
+             (u'Esteri', u'http://www.ilgiornale.it/la_s.pic1?SID=6&RSS=S'),
+             (u'Economia', u'http://www.ilgiornale.it/la_s.pic1?SID=5&RSS=S'),
+             (u'Cultura', u'http://www.ilgiornale.it/la_s.pic1?SID=4&RSS=S'),
+             (u'Spettacoli', u'http://www.ilgiornale.it/la_s.pic1?SID=14&RSS=S'),
+             (u'Sport', u'http://www.ilgiornale.it/la_s.pic1?SID=15&RSS=S'),
+             (u'Tech&Web', u'http://www.ilgiornale.it/la_s.pic1?SID=35&RSS=S'),
+             (u'Edizione di Roma', u'http://www.ilgiornale.it/roma.pic1?SID=13&RSS=S'),
+             (u'Edizione di Milano', u'http://www.ilgiornale.it/milano.pic1?SID=9&RSS=S'),
+             (u'Edizione di Genova', u'http://www.ilgiornale.it/genova.pic1?SID=7&RSS=S')
+             ]
--- a/resources/recipes/il_messaggero.recipe
+++ b/resources/recipes/il_messaggero.recipe
@ -0,0 +1,56 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Gabriele Marini, based on Darko Miletic'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+description   = 'Italian daily newspaper - v1.01 (04, January 2010)'
+
+'''
+http://www.messaggero.it/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class IlMessaggero(BasicNewsRecipe):
+    __author__    = 'Gabriele Marini'
+    description   = 'Italian News'
+
+    cover_url      = 'http://www.ilmessaggero.it/img_tst/logomsgr.gif'
+    title          = u'Il Messaggero'
+    publisher      = 'Caltagirone Editore'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 5
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+
+
+    keep_only_tags     = [dict(name='h1', attrs={'class':'titoloLettura2'}),
+                          dict(name='h2', attrs={'class':'sottotitLettura'}),
+                          dict(name='span', attrs={'class':'testoArticoloG'})
+                         ]
+
+
+
+    feeds          = [
+                       (u'HomePage', u'http://www.ilmessaggero.it/rss/home.xml'),
+                       (u'Primo Piano', u'http://www.ilmessaggero.it/rss/initalia_primopiano.xml'),
+                       (u'Cronaca Bianca', u'http://www.ilmessaggero.it/rss/initalia_cronacabianca.xml'),
+                       (u'Cronaca Nera', u'http://www.ilmessaggero.it/rss/initalia_cronacanera.xml'),
+                       (u'Economia e Finanza', u'http://www.ilmessaggero.it/rss/economia.xml'),
+                       (u'Politica', u'http://www.ilmessaggero.it/rss/initalia_politica.xml'),
+                       (u'Scienza e Tecnologia', u'http://www.ilmessaggero.it/rss/scienza.xml'),
+                       (u'Cinema', u'http://www.ilmessaggero.it/rss.php?refresh_ce#'),
+                       (u'Viaggi', u'http://www.ilmessaggero.it/rss.php?refresh_ce#'),
+                       (u'Roma', u'http://www.ilmessaggero.it/rss/roma.xml'),
+                       (u'Cultura e Tendenze', u'http://www.ilmessaggero.it/rss/roma_culturaspet.xml'),
+                       (u'Sport', u'http://www.ilmessaggero.it/rss/sport.xml'),
+                       (u'Calcio', u'http://www.ilmessaggero.it/rss/sport_calcio.xml'),
+                       (u'Motori', u'http://www.ilmessaggero.it/rss/sport_motori.xml')
+                      ]
+                     
--- a/resources/recipes/ilsole24ore.recipe
+++ b/resources/recipes/ilsole24ore.recipe
@ -11,12 +11,13 @@ http://www.ilsole24ore.com/
 from calibre.web.feeds.news import BasicNewsRecipe


-class ilsole(BasicNewsRecipe):
+class ilsole24Ore(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini & Edwin van Maastrigt'
    description   = 'Financial news daily paper'

-    cover_url      = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif'
-    title          = u'il Sole 24 Ore '
+    cover_url      = 'http://www.ilsole24ore.com/img2007/print_header.gif'
+
+    title          = u'il Sole 24 Ore New'
    publisher      = 'italiaNews'
    category       = 'News, finance, economy, politics'

@ -35,12 +36,14 @@ class ilsole(BasicNewsRecipe):

    def print_version(self, url):
        link, sep, params = url.rpartition('?')
+        if link  is None:
+           return link.replace('_1.php', '_php')
        return link.replace('.shtml', '_PRN.shtml')

    keep_only_tags     = [
                            dict(name='div', attrs={'class':'txt'})
                        ]
-    remove_tags = [dict(name='br')]
+#    remove_tags = [dict(name='br')]

    feeds          = [
                       (u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
@ -52,13 +55,14 @@ class ilsole(BasicNewsRecipe):
                       (u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
                       (u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
                       (u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
-                       (u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml')
+                       (u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml'),
+                       (u'Ambiente e Sicurezza',u'http://www.ilsole24ore.com/rss/prof_as.xml')
                     ]

    extra_css = '''
-                html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;}
+                html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:"Georgia","Times New Roman";}
                .linkHighlight {color:#0292c6;}
-                .txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;}
+                .txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px};text-align:justify;font-family:"serif"}
                .txt p {line-height:18px;}
                .txt span {line-height:22px;}
                .title h3 {color:#7b7b7b;}
--- a/resources/recipes/jpost.recipe
+++ b/resources/recipes/jpost.recipe
@ -1,3 +1,4 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class JerusalemPost(BasicNewsRecipe):
@ -10,8 +11,6 @@ class JerusalemPost(BasicNewsRecipe):
    __author__ = 'Kovid Goyal'
    max_articles_per_feed = 10
    no_stylesheets = True
-    remove_tags_before = {'class':'jp-grid-content'}
-    remove_tags_after = {'id':'body_val'}

    feeds =  [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
               ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
@ -20,9 +19,24 @@ class JerusalemPost(BasicNewsRecipe):
               ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
          ]

+    remove_tags = [
+            dict(id=lambda x: x and 'ads.' in x),
+            dict(attrs={'class':['printinfo', 'tt1']}),
+            dict(onclick='DoPrint()'),
+            dict(name='input'),
+            ]
+
+    conversion_options = {'linearize_tables':True}
+
    def preprocess_html(self, soup):
-        for x in soup.findAll(name=['form', 'input']):
-            x.name = 'div'
-        for x in soup.findAll('body', style=True):
-            del x['style']
+        for tag in soup.findAll('form'):
+            tag.name = 'div'
        return soup
+
+    def print_version(self, url):
+        m = re.search(r'(ID|id)=(\d+)', url)
+        if m is not None:
+            id_ = m.group(2)
+            return 'http://www.jpost.com/LandedPages/PrintArticle.aspx?id=%s'%id_
+        return url
+
--- a/resources/recipes/la_republica.recipe
+++ b/resources/recipes/la_republica.recipe
@ -10,7 +10,7 @@ http://www.repubblica.it/

 from calibre.web.feeds.news import BasicNewsRecipe

-class LaRepublica(BasicNewsRecipe):
+class LaRepubblica(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini, based on Darko Miletic'
    description   = 'Italian daily newspaper'

@ -54,21 +54,24 @@ class LaRepublica(BasicNewsRecipe):
                        ]

    feeds          = [
-                       (u'Repubblica Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
-                       (u'Repubblica Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'),
-                       (u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'),
-                       (u'Repubblica Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'),
-                       (u'Repubblica Politica', u'http://www.repubblica.it/rss/politica/rss2.0.xml'),
-                       (u'Repubblica Scienze', u'http://www.repubblica.it/rss/scienze/rss2.0.xml'),
-                       (u'Repubblica Tecnologia', u'http://www.repubblica.it/rss/tecnologia/rss2.0.xml'),
-                       (u'Repubblica Scuola e Universita', u'http://www.repubblica.it/rss/scuola_e_universita/rss2.0.xml'),
-                       (u'Repubblica Ambiente', u'http://www.repubblica.it/rss/ambiente/rss2.0.xml'),
-                       (u'Repubblica Cultura', u'http://www.repubblica.it/rss/spettacoli_e_cultura/rss2.0.xml'),
-                       (u'Repubblica Persone', u'http://www.repubblica.it/rss/persone/rss2.0.xml'),
-                       (u'Repubblica Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
-                       (u'Repubblica Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
-                       (u'Repubblica Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
-                       (u'Repubblica Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
-                       (u'Repubblica Torino', u'http://torino.repubblica.it/rss/rss2.0.xml')
+                       (u'Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
+                       (u'Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'),
+                       (u'Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'),
+                       (u'Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'),
+                       (u'Politica', u'http://www.repubblica.it/rss/politica/rss2.0.xml'),
+                       (u'Scienze', u'http://www.repubblica.it/rss/scienze/rss2.0.xml'),
+                       (u'Tecnologia', u'http://www.repubblica.it/rss/tecnologia/rss2.0.xml'),
+                       (u'Scuola e Universita', u'http://www.repubblica.it/rss/scuola_e_universita/rss2.0.xml'),
+                       (u'Ambiente', u'http://www.repubblica.it/rss/ambiente/rss2.0.xml'),
+                       (u'Cultura', u'http://www.repubblica.it/rss/spettacoli_e_cultura/rss2.0.xml'),
+                       (u'Persone', u'http://www.repubblica.it/rss/persone/rss2.0.xml'),
+                       (u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
+                       (u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
+                       (u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
+                       (u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
+                       (u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
+                       (u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
+                       (u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
+                       (u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
                      ]

--- a/resources/recipes/oldnewthing.recipe
+++ b/resources/recipes/oldnewthing.recipe
@ -0,0 +1,34 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+blogs.msdn.com/oldnewthing
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class OldNewThing(BasicNewsRecipe):
+    title                 = 'The Old New Thing'
+    __author__            = 'Darko Miletic'
+    description           = 'Famous blog by Windows guru Raymond Chen'
+    oldest_article        = 15
+    max_articles_per_feed = 100
+    language              = 'en'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    publication_type      = 'blog'
+    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif} .code{font-family: "Lucida Console",monospace} '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'blog, windows, microsoft, programming'
+                        , 'publisher': 'Raymond Chen'
+                        , 'language' : language
+                        }
+
+    remove_attributes = ['width','height']
+    keep_only_tags    = [dict(attrs={'class':['postsub','comment']})]
+
+    feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]
+
--- a/resources/recipes/onionavclub.recipe
+++ b/resources/recipes/onionavclub.recipe
@ -0,0 +1,36 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+'''
+bbc.co.uk
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BBC(BasicNewsRecipe):
+    title          = u'The Onion AV Club'
+    __author__     = 'Stephen Williams'
+    description    = 'Film, Television and Music Reviews'
+    no_stylesheets = True
+    oldest_article        = 2
+    max_articles_per_feed = 100
+
+    keep_only_tags     = [dict(name='div', attrs={'id':'content'})
+                          ]
+
+    remove_tags    = [dict(name='div', attrs={'class':['footer','tools_horizontal']}),
+                      dict(name='div', attrs={'id':['tool_holder','elsewhere_on_avclub']})
+                      ]
+    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
+
+    feeds          = [
+                      ('Interviews', 'http://www.avclub.com/feed/interview/'),
+                      ('AV Club Daily', 'http://www.avclub.com/feed/daily'),
+                      ('Film', 'http://www.avclub.com/feed/film/'),
+                      ('Music', 'http://www.avclub.com/feed/music/'),
+                      ('DVD', 'http://www.avclub.com/feed/dvd/'),
+                      ('Books', 'http://www.avclub.com/feed/books/'),
+                      ('Games', 'http://www.avclub.com/feed/games/'),
+                      ('Interviews', 'http://www.avclub.com/feed/interview/'),
+                    ]
--- a/resources/recipes/pc_mag.recipe
+++ b/resources/recipes/pc_mag.recipe
@ -9,8 +9,9 @@ __description__ = 'PCMag (www.pcmag.com) delivers authoritative, labs-based comp
 '''
 http://www.pcmag.com/
 '''
-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Comment

 class pcMag(BasicNewsRecipe):
    __author__     = 'Lorenzo Vigentini'
@ -33,9 +34,6 @@ class pcMag(BasicNewsRecipe):
    remove_javascript     = True
    no_stylesheets = True

-    keep_only_tags     = [
-                            dict(name='div', attrs={'id':'articleContent'})
-                        ]

    feeds          = [
                       (u'Tech Commentary from the Editors of PC Magazine', u'http://rssnewsapps.ziffdavis.com/PCMAG_commentary.xml'),
@ -49,8 +47,13 @@ class pcMag(BasicNewsRecipe):
                       (u'Technology News from Ziff Davis', u'http://rssnewsapps.ziffdavis.com/pcmagbreakingnews.xml')
                     ]

+    keep_only_tags = [dict(attrs={'class':'content-page'})]
    remove_tags         = [
-                            dict(name='div', attrs={'id':['microAd','intellitxt','articleDeckTalkback','inlineDigg','underArticleLinks','w_talkback']}),
-                            dict(name='span', attrs={'id':['highlights_content','yahooBuzzBadge-48558872521263350499378']})
+                            dict(attrs={'class':['control-side','comment','highlights_content','btn-holder','subscribe-panel',
+                                'grey-box comments-box']}),
+                            dict(id=['inlineDigg']),
+                            dict(text=lambda text:isinstance(text, Comment)),
+                            dict(name='img', width='1'),
                        ]
+    preprocess_regexps = [(re.compile(r"<img '[^']+?'"), lambda m : '<img ')]

--- a/resources/recipes/reuters.recipe
+++ b/resources/recipes/reuters.recipe
@ -7,12 +7,29 @@ class Reuters(BasicNewsRecipe):

    title = 'Reuters'
    description = 'Global news'
-    __author__ = 'Kovid Goyal'
+    __author__ = 'Kovid Goyal and Sujata Raman'
    use_embedded_content   = False
    language = 'en'

    max_articles_per_feed = 10
+    no_stylesheets = True
+    remove_javascript = True

+    extra_css      = '''
+                         body{font-family:arial,helvetica,sans;}
+                        h1{ font-size:larger ; font-weight:bold;  }
+                        .byline{color:#006E97;font-size:x-small; font-weight:bold;}
+                        .location{font-size:x-small; font-weight:bold;}
+                        .timestamp{font-size:x-small; }
+                        '''
+
+    keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})]
+
+
+    remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}),
+                   dict(name='p', attrs={'class':['relatedTopics']}),
+                    dict(name='a', attrs={'id':['fullSizeLink']}),
+                   dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),]

    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
 [
@ -38,5 +55,3 @@ class Reuters(BasicNewsRecipe):
                  ('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml')
         ]

-    def print_version(self, url):
-        return ('http://www.reuters.com/article/id' + url + '?sp=true')
--- a/resources/recipes/sueddeutschezeitung.recipe
+++ b/resources/recipes/sueddeutschezeitung.recipe
@ -5,9 +5,8 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 www.sueddeutsche.de/sz/
 '''

-import urllib
-from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime

 class SueddeutcheZeitung(BasicNewsRecipe):
    title                  = 'Sueddeutche Zeitung'
@ -20,12 +19,13 @@ class SueddeutcheZeitung(BasicNewsRecipe):
    encoding               = 'cp1252'
    needs_subscription     = True
    remove_empty_feeds     = True
+    delay                  = 2
    PREFIX                 = 'http://www.sueddeutsche.de'
-    INDEX                  = PREFIX + strftime('/sz/%Y-%m-%d/')
-    LOGIN                  = PREFIX + '/app/lbox/index.html'
+    INDEX                  = PREFIX + '/app/epaper/textversion/'
    use_embedded_content   = False
-    masthead_url           = 'http://pix.sueddeutsche.de/img/g_.gif'
+    masthead_url           = 'http://pix.sueddeutsche.de/img/layout/header/logo.gif'
    language               = 'de'
+    publication_type       = 'newspaper'
    extra_css              = ' body{font-family: Arial,Helvetica,sans-serif} '

    conversion_options = {
@ -40,49 +40,49 @@ class SueddeutcheZeitung(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
-        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({ 'login_name':self.username
-                                     ,'login_passwort':self.password
-                                     ,'lboxaction':'doLogin'
-                                     ,'passtxt':'Passwort'
-                                     ,'referer':self.INDEX
-                                     ,'x':'22'
-                                     ,'y':'7'
-                                   })
-            br.open(self.LOGIN,data)
+            br.open(self.INDEX)
+            br.select_form(name='lbox')
+            br['login_name'    ] = self.username
+            br['login_passwort'] = self.password
+            br.submit()
        return br

    remove_tags        =[
                         dict(attrs={'class':'hidePrint'})
                        ,dict(name=['link','object','embed','base','iframe'])
                        ]
-    remove_tags_before = dict(name='h2')
+    keep_only_tags     = [dict(attrs={'class':'artikelBox'})]
+    remove_tags_before =  dict(attrs={'class':'artikelTitel'})
    remove_tags_after  =  dict(attrs={'class':'author'})

    feeds = [
-               (u'Politik'      , INDEX + 'politik/'      )
-              ,(u'Seite drei'   , INDEX + 'seitedrei/'    )
-              ,(u'Meinungsseite', INDEX + 'meinungsseite/')
-              ,(u'Wissen'       , INDEX + 'wissen/'       )
-              ,(u'Panorama'     , INDEX + 'panorama/'     )
-              ,(u'Feuilleton'   , INDEX + 'feuilleton/'   )
-              ,(u'Medien'       , INDEX + 'medien/'       )
-              ,(u'Wirtschaft'   , INDEX + 'wirtschaft/'   )
-              ,(u'Sport'        , INDEX + 'sport/'        )
-              ,(u'Bayern'       , INDEX + 'bayern/'       )
-              ,(u'Muenchen'     , INDEX + 'muenchen/'     )
-              ,(u'jetzt.de'     , INDEX + 'jetzt.de/'     )
+               (u'Politik'      , INDEX + 'Politik/'      )
+              ,(u'Seite drei'   , INDEX + 'Seite+drei/'   )
+              ,(u'Meinungsseite', INDEX + 'Meinungsseite/')
+              ,(u'Wissen'       , INDEX + 'Wissen/'       )
+              ,(u'Panorama'     , INDEX + 'Panorama/'     )
+              ,(u'Feuilleton'   , INDEX + 'Feuilleton/'   )
+              ,(u'Medien'       , INDEX + 'Medien/'       )
+              ,(u'Wirtschaft'   , INDEX + 'Wirtschaft/'   )
+              ,(u'Sport'        , INDEX + 'Sport/'        )
+              ,(u'Bayern'       , INDEX + 'Bayern/'       )
+              ,(u'Muenchen'     , INDEX + 'M%FCnchen/'    )
            ]

    def parse_index(self):
+        src = self.index_to_soup(self.INDEX)
+        id = ''
+        for itt in src.findAll('a',href=True):
+            if itt['href'].startswith('/app/epaper/textversion/inhalt/'):
+               id = itt['href'].rpartition('/inhalt/')[2]
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
-            soup = self.index_to_soup(feedurl)
+            soup = self.index_to_soup(feedurl + id)
            tbl = soup.find(attrs={'class':'szprintd'})
            for item in tbl.findAll(name='td',attrs={'class':'topthema'}):
                atag    = item.find(attrs={'class':'Titel'}).a
@ -101,7 +101,3 @@ class SueddeutcheZeitung(BasicNewsRecipe):
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
-
-    def print_version(self, url):
-        return url + 'print.html'
-
--- a/resources/recipes/ultimahora.recipe
+++ b/resources/recipes/ultimahora.recipe
@ -0,0 +1,52 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+ultimahora.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class UltimaHora_py(BasicNewsRecipe):
+    title                 = 'Ultima Hora'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias de Paraguay y el resto del mundo'
+    publisher             = 'EDITORIAL EL PAIS S.A.'
+    category              = 'news, politics, Paraguay'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es_PY'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.ultimahora.com/imgs/uh-com.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .sub_titulo_mediano,.TituloNota{font-family: Georgia,"Times New Roman",Times,serif} .sub_titulo_mediano{font-weight: bold} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags    = [dict(name=['form','iframe','embed','object','link','base','table'])]
+    keep_only_tags = [dict(attrs={'id':['nota_titulo','nota_copete','texto']})]
+
+
+    feeds = [
+               (u'Arte y Espectaculos' , u'http://www.ultimahora.com/adjuntos/rss/UHEspectaculos.xml'    )
+              ,(u'Ciudad del Este'     , u'http://www.ultimahora.com/adjuntos/rss/UHCDE.xml'             )
+              ,(u'Deportes'            , u'http://www.ultimahora.com/adjuntos/rss/UHDeportes.xml'        )
+              ,(u'Ultimo momento'      , u'http://www.ultimahora.com/adjuntos/rss/UltimoMomento.xml'     )
+              ,(u'Nacionales'          , u'http://www.ultimahora.com/adjuntos/rss/uh-rss-nacionales.xml' )
+              ,(u'Politica'            , u'http://www.ultimahora.com/adjuntos/rss/uh-rss-politica.xml'   )
+              ,(u'Sucesos'             , u'http://www.ultimahora.com/adjuntos/rss/uh-rss-sucesos.xml'    )
+              ,(u'Economia'            , u'http://www.ultimahora.com/adjuntos/rss/uh-rss-economia.xml'   )
+              ,(u'Ciencia y Tecnologia', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-ciencia.xml'    )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -153,7 +153,7 @@ class LinuxFreeze(Command):
        sys.resources_location = os.path.join(DIR_NAME, 'resources')
        dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
        if dfv and os.path.exists(dfv):
-            sys.path.insert(0, dfv)
+            sys.path.insert(0, os.path.abspath(dfv))

        executables = %(executables)s

--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.49'
+__version__   = '0.6.51'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
@ -56,9 +56,18 @@ if plugins is None:
        plugin_path = sys.extensions_location
        sys.path.insert(0, plugin_path)

-        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
-            'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
-            'chm_extra'] + \
+        for plugin in [
+                'pictureflow',
+                'lzx',
+                'msdes',
+                'podofo',
+                'cPalmdoc',
+                'fontconfig',
+                'pdfreflow',
+                'progress_indicator',
+                'chmlib',
+                'chm_extra'
+            ] + \
                    (['winutil'] if iswindows else []) + \
                    (['usbobserver'] if isosx else []):
            try:
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -7,7 +7,7 @@ import os
 import glob
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
 from calibre.constants import numeric_version
-from calibre.ebooks.metadata.archive import ArchiveExtract
+from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata

 class HTML2ZIP(FileTypePlugin):
    name = 'HTML to ZIP'
@ -97,6 +97,12 @@ class ComicMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata import MetaInformation
        ret = extract_first(stream)
        mi = MetaInformation(None, None)
+        stream.seek(0)
+        if ftype == 'cbz':
+            try:
+                mi.smart_update(get_cbz_metadata(stream))
+            except:
+                pass
        if ret is not None:
            path, data = ret
            ext = os.path.splitext(path)[1][1:]
@ -448,7 +454,7 @@ from calibre.devices.hanvon.driver import N516, EB511, ALEX
 from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import TECLAST_K3
 from calibre.devices.sne.driver import SNE
-from calibre.devices.misc import PALMPRE
+from calibre.devices.misc import PALMPRE, KOBO

 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
@ -530,7 +536,8 @@ plugins += [
    EDGE,
    SNE,
    ALEX,
-    PALMPRE
+    PALMPRE,
+    KOBO,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -4,6 +4,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 from itertools import izip
+from xml.sax.saxutils import escape

 from calibre.customize import Plugin as _Plugin

@ -238,14 +239,14 @@ class OutputProfile(Plugin):

    @classmethod
    def tags_to_string(cls, tags):
-        return ', '.join(tags)
+        return escape(', '.join(tags))

 class iPadOutput(OutputProfile):

    name = 'iPad'
    short_name = 'ipad'
-    screen_size = (1024, 768)
-    comic_screen_size = (1024, 768)
+    screen_size = (768, 1024)
+    comic_screen_size = (768, 1024)
    dpi = 132.0

 class SonyReaderOutput(OutputProfile):
@ -383,7 +384,8 @@ class KindleOutput(OutputProfile):

    @classmethod
    def tags_to_string(cls, tags):
-        return 'ttt '.join(tags)+'ttt '
+        return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags),
+                'ttt '.join(tags)+'ttt ')

 class KindleDXOutput(OutputProfile):

@ -399,7 +401,8 @@ class KindleDXOutput(OutputProfile):

    @classmethod
    def tags_to_string(cls, tags):
-        return 'ttt '.join(tags)+'ttt '
+        return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags),
+                'ttt '.join(tags)+'ttt ')

 class IlliadOutput(OutputProfile):

--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -18,7 +18,8 @@ class ANDROID(USBMS):
    FORMATS     = ['epub', 'pdf']

    VENDOR_ID   = {
-            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
+            # HTC
+            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100], 0x0ff9 : [0x0100]},

            # Motorola
            0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216]},
@ -26,7 +27,7 @@ class ANDROID(USBMS):
            0x18d1 : { 0x4e11 : [0x0100, 0x226], 0x4e12: [0x0100, 0x226]},

            # Samsung
-            0x04e8 : { 0x681d : [0x0222]},
+            0x04e8 : { 0x681d : [0x0222], 0x681c : [0x0222]},

            # Acer
            0x502 : { 0x3203 : [0x0100]},
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -62,7 +62,7 @@ class KINDLE(USBMS):

    def filename_callback(self, fname, mi):
        if fname.startswith('.'):
-            return fname[1:]
+            return 'x'+fname[1:]
        return fname

    def get_annotations(self, path_map):
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -28,3 +28,24 @@ class PALMPRE(USBMS):

    EBOOK_DIR_MAIN = 'E-books'

+class KOBO(USBMS):
+
+    name = 'Kobo Reader Device Interface'
+    gui_name = 'Kobo Reader'
+    description = _('Communicate with the Kobo Reader')
+    author = 'Kovid Goyal'
+
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    # Ordered list of supported formats
+    FORMATS     = ['epub', 'pdf']
+
+    VENDOR_ID   = [0x2237]
+    PRODUCT_ID  = [0x4161]
+    BCD         = [0x0110]
+
+    VENDOR_NAME = 'KOBO_INC'
+    WINDOWS_MAIN_MEM = '.KOBOEREADER'
+
+    EBOOK_DIR_MAIN = 'e-books'
+
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -69,13 +69,15 @@ class PRS505(CLI, Device):

        def write_cache(prefix):
            try:
-                cachep = os.path.join(prefix, self.CACHE_XML)
+                cachep = os.path.join(prefix, *(self.CACHE_XML.split('/')))
                if not os.path.exists(cachep):
+                    dname = os.path.dirname(cachep)
+                    if not os.path.exists(dname):
                        try:
-                        os.makedirs(os.path.dirname(cachep), mode=0777)
+                            os.makedirs(dname, mode=0777)
                        except:
                            time.sleep(5)
-                        os.makedirs(os.path.dirname(cachep), mode=0777)
+                            os.makedirs(dname, mode=0777)
                    with open(cachep, 'wb') as f:
                        f.write(u'''<?xml version="1.0" encoding="UTF-8"?>
                            <cache xmlns="http://www.kinoma.com/FskCache/1">
@ -202,9 +204,11 @@ class PRS505(CLI, Device):

        def write_card_prefix(prefix, listid):
            if prefix is not None and hasattr(booklists[listid], 'write'):
-                if not os.path.exists(prefix):
-                    os.makedirs(prefix)
-                with open(prefix + self.__class__.CACHE_XML, 'wb') as f:
+                tgt  = os.path.join(prefix, *(self.CACHE_XML.split('/')))
+                base = os.path.dirname(tgt)
+                if not os.path.exists(base):
+                    os.makedirs(base)
+                with open(tgt, 'wb') as f:
                    booklists[listid].write(f)
        write_card_prefix(self._card_a_prefix, 1)
        write_card_prefix(self._card_b_prefix, 2)
--- a/src/calibre/devices/usbobserver/usbobserver.c
+++ b/src/calibre/devices/usbobserver/usbobserver.c
@ -25,6 +25,7 @@
 #include <stdio.h>

 #include <CoreFoundation/CFNumber.h>
+#include <CoreServices/CoreServices.h>
 #include <IOKit/usb/IOUSBLib.h>
 #include <IOKit/IOCFPlugIn.h>
 #include <IOKit/IOKitLib.h>
@ -52,6 +53,28 @@

 #define NUKE(x) Py_XDECREF(x); x = NULL;

+/* This function only works on 10.5 and later
+static PyObject* send2trash(PyObject *self, PyObject *args)
+{
+    UInt8 *utf8_chars;
+    FSRef fp;
+    OSStatus op_result;
+
+    if (!PyArg_ParseTuple(args, "es", "utf-8", &utf8_chars)) {
+        return NULL;
+    }
+
+    FSPathMakeRefWithOptions(utf8_chars, kFSPathMakeRefDoNotFollowLeafSymlink, &fp, NULL);
+    op_result = FSMoveObjectToTrashSync(&fp, NULL, kFSFileOperationDefaultOptions);
+    PyMem_Free(utf8_chars);
+    if (op_result != noErr) {
+        PyErr_SetString(PyExc_OSError, GetMacOSStatusCommentString(op_result));
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+*/
+
 static PyObject*
 usbobserver_get_iokit_string_property(io_service_t dev, CFStringRef prop) {
    CFTypeRef PropRef;
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -676,7 +676,10 @@ OptionRecommendation(name='timestamp',
        if mi.cover:
            if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
                mi.cover = self.download_cover(mi.cover)
-            mi.cover_data = ('', open(mi.cover, 'rb').read())
+            ext = mi.cover.rpartition('.')[-1].lower().strip()
+            if ext not in ('png', 'jpg', 'jpeg'):
+                ext = 'jpg'
+            mi.cover_data = (ext, open(mi.cover, 'rb').read())
            mi.cover = None
        self.user_metadata = mi

--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -81,12 +81,40 @@ class EPUBOutput(OutputFormatPlugin):
        OptionRecommendation(name='no_default_epub_cover', recommended_value=False,
            help=_('Normally, if the input file has no cover and you don\'t'
            ' specify one, a default cover is generated with the title, '
-            'authors, etc. This option disables the generation of this cover.')),
+            'authors, etc. This option disables the generation of this cover.')
+        ),
+
+        OptionRecommendation(name='no_svg_cover', recommended_value=False,
+            help=_('Do not use SVG for the book cover. Use this option if '
+                'your EPUB is going to be used ona  device that does not '
+                'support SVG, like the iPhone or the JetBook Lite. '
+                'Without this option, such devices will display the cover '
+                'as a blank page.')
+        ),

        ])

    recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])

+    NONSVG_TITLEPAGE_COVER = '''\
+        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+            <head>
+                <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+                <meta name="calibre:cover" content="true" />
+                <title>Cover</title>
+                <style type="text/css" title="override_css">
+                    @page {padding: 0pt; margin:0pt}
+                    body { text-align: center; padding:0pt; margin: 0pt; }
+                    div { padding:0pt; margin: 0pt; }
+                </style>
+            </head>
+            <body>
+                <div>
+                    <img src="%s" alt="cover" style="height: 100%%" />
+                </div>
+            </body>
+        </html>
+    '''

    TITLEPAGE_COVER = '''\
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
@ -301,7 +329,9 @@ class EPUBOutput(OutputFormatPlugin):
            else:
                href = self.default_cover()
            if href is not None:
-                tp = self.TITLEPAGE_COVER%unquote(href)
+                templ = self.NONSVG_TITLEPAGE_COVER if self.opts.no_svg_cover \
+                        else self.TITLEPAGE_COVER
+                tp = templ%unquote(href)
                id, href = m.generate('titlepage', 'titlepage.xhtml')
                item = m.add(id, href, guess_type('t.xhtml')[0],
                        data=etree.fromstring(tp))
@ -334,6 +364,12 @@ class EPUBOutput(OutputFormatPlugin):
        '''
        from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote

+        stylesheet = None
+        for item in self.oeb.manifest:
+            if item.media_type.lower() in OEB_STYLES:
+                stylesheet = item
+                break
+
        # ADE cries big wet tears when it encounters an invalid fragment
        # identifier in the NCX toc.
        frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$')
@ -430,11 +466,17 @@ class EPUBOutput(OutputFormatPlugin):
                    elem.tail = special_chars.sub('', elem.tail)
                    elem.tail = elem.tail.replace(u'\u2011', '-')

-        stylesheet = None
-        for item in self.oeb.manifest:
-            if item.media_type.lower() in OEB_STYLES:
-                stylesheet = item
-                break
+            if stylesheet is not None:
+                # ADE doesn't render lists correctly if they have left margins
+                from cssutils.css import CSSRule
+                for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root):
+                    sel = '.'+lb.get('class')
+                    for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
+                        if sel == rule.selectorList.selectorText:
+                            val = rule.style.removeProperty('margin-left')
+                            pval = rule.style.getProperty('padding-left')
+                            if val and not pval:
+                                rule.style.setProperty('padding-left', val)

        if stylesheet is not None:
            stylesheet.data.add('a { color: inherit; text-decoration: inherit; '
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
 Input plugin for HTML or OPF ebooks.
 '''

-import os, re, sys, uuid
+import os, re, sys, uuid, tempfile
 from urlparse import urlparse, urlunparse
 from urllib import unquote
 from functools import partial
@ -272,6 +272,7 @@ class HTMLInput(InputFormatPlugin):

    def convert(self, stream, opts, file_ext, log,
                accelerators):
+        self._is_case_sensitive = None
        basedir = os.getcwd()
        self.opts = opts

@ -290,6 +291,15 @@ class HTMLInput(InputFormatPlugin):
        return create_oebbook(log, stream.name, opts, self,
                encoding=opts.input_encoding)

+    def is_case_sensitive(self, path):
+        if self._is_case_sensitive is not None:
+            return self._is_case_sensitive
+        if not path or not os.path.exists(path):
+            return islinux or isfreebsd
+        self._is_case_sensitive = os.path.exists(path.lower()) \
+                and os.path.exists(path.upper())
+        return self._is_case_sensitive
+
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import DirContainer, \
@ -320,7 +330,6 @@ class HTMLInput(InputFormatPlugin):
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate(__('Unknown')))
-
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
@ -328,7 +337,6 @@ class HTMLInput(InputFormatPlugin):
                self.oeb.uid = metadata.identifier[0]
                break

-
        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
@ -345,14 +353,16 @@ class HTMLInput(InputFormatPlugin):

        self.added_resources = {}
        self.log = log
+        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
-            if not (islinux or isfreebsd):
+            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

+        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
@ -417,7 +427,7 @@ class HTMLInput(InputFormatPlugin):
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
-        if not (islinux or isfreebsd):
+        if not self.is_case_sensitive(tempfile.gettempdir()):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -64,3 +64,45 @@ class ArchiveExtract(FileTypePlugin):
                of.write(zf.read(fname))
        return of.name

+def get_comic_book_info(d, mi):
+    series = d.get('series', '')
+    if series.strip():
+        mi.series = series
+        if d.get('volume', -1) > -1:
+            mi.series_index = float(d['volume'])
+    if d.get('rating', -1) > -1:
+        mi.rating = d['rating']
+    for x in ('title', 'publisher'):
+        y = d.get(x, '').strip()
+        if y:
+            setattr(mi, x, y)
+    tags = d.get('tags', [])
+    if tags:
+        mi.tags = tags
+    authors = []
+    for credit in d.get('credits', []):
+        if credit.get('role', '') in ('Writer', 'Artist', 'Cartoonist',
+                'Creator'):
+            x = credit.get('person', '')
+            if x:
+                x = ' '.join((reversed(x.split(', '))))
+                authors.append(x)
+    if authors:
+        mi.authors = authors
+
+
+
+def get_cbz_metadata(stream):
+    from calibre.utils.zipfile import ZipFile
+    from calibre.ebooks.metadata import MetaInformation
+    import json
+
+    zf = ZipFile(stream)
+    mi = MetaInformation(None, None)
+    if zf.comment:
+        m = json.loads(zf.comment)
+        if hasattr(m, 'keys'):
+            for cat in m.keys():
+                if cat.startswith('ComicBookInfo'):
+                    get_comic_book_info(m[cat], mi)
+    return mi
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -61,7 +61,8 @@ class EXTHHeader(object):
                # last update time
                pass
            elif id == 503: # Long title
-                if not title or title == _('Unknown'):
+                if not title or title == _('Unknown') or \
+                        'USER_CONTENT' in title or title.startswith('dtp_'):
                    try:
                        title = content.decode(codec)
                    except:
@ -253,6 +254,8 @@ class MobiReader(object):
            stream = open(filename_or_stream, 'rb')

        raw = stream.read()
+        if raw.startswith('TPZ'):
+            raise ValueError(_('This is an Amazon Topaz book. It cannot be processed.'))

        self.header   = raw[0:72]
        self.name     = self.header[:32].replace('\x00', '')
@ -260,7 +263,7 @@ class MobiReader(object):

        self.ident = self.header[0x3C:0x3C + 8].upper()
        if self.ident not in ['BOOKMOBI', 'TEXTREAD']:
-            raise MobiError('Unknown book type: %s' % self.ident)
+            raise MobiError('Unknown book type: %s' % repr(self.ident))

        self.sections = []
        self.section_headers = []
@ -497,8 +500,8 @@ class MobiReader(object):
                if ':' in x:
                    del tag.attrib[x]
            if tag.tag in ('country-region', 'place', 'placetype', 'placename',
-                'state', 'city', 'street', 'address', 'content'):
-                tag.tag = 'div' if tag.tag == 'content' else 'span'
+                'state', 'city', 'street', 'address', 'content', 'form'):
+                tag.tag = 'div' if tag.tag in ('content', 'form') else 'span'
                for key in tag.attrib.keys():
                    tag.attrib.pop(key)
                continue
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -294,6 +294,9 @@ def xml2str(root, pretty_print=False, strip_comments=False):
 def xml2unicode(root, pretty_print=False):
    return etree.tostring(root, pretty_print=pretty_print)

+def xml2text(elem):
+    return etree.tostring(elem, method='text', encoding=unicode, with_tail=False)
+
 ASCII_CHARS   = set(chr(x) for x in xrange(128))
 UNIBYTE_CHARS = set(chr(x) for x in xrange(256))
 URL_SAFE      = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
@ -1187,6 +1190,7 @@ class Manifest(object):
        if item in self.ids:
            item = self.ids[item]
        del self.ids[item.id]
+        if item.href in self.hrefs:
            del self.hrefs[item.href]
        self.items.remove(item)
        if item in self.oeb.spine:
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -184,6 +184,8 @@ class EbookIterator(object):

        if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
                not hasattr(self.pathtoopf, 'manifest'):
+            if hasattr(self.pathtoopf, 'manifest'):
+                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
            self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
                    plumber.input_plugin)
        if hasattr(self.pathtoopf, 'manifest'):
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -96,6 +96,8 @@ class CSSSelector(etree.XPath):
            path = css_to_xpath(css)
        except UnicodeEncodeError: # Bug in css_to_xpath
            path = '/'
+        except NotImplementedError: # Probably a subselect like :hover
+            path = '/'
        path = self.LOCAL_NAME_RE.sub(r"local-name() = '", path)
        etree.XPath.__init__(self, path, namespaces=namespaces)
        self.css = css
@ -526,7 +528,7 @@ class Style(object):
                base = parent.width
            else:
                base = self._profile.width
-            if 'width' is self._element.attrib:
+            if 'width' in self._element.attrib:
                width = self._element.attrib['width']
            elif 'width' in self._style:
                width = self._style['width']
@ -534,6 +536,8 @@ class Style(object):
                result = base
            else:
                result = self._unit_convert(width, base=base)
+            if isinstance(result, (unicode, str, bytes)):
+                result = self._profile.width
            self._width = result
        return self._width

@ -547,7 +551,7 @@ class Style(object):
                base = parent.height
            else:
                base = self._profile.height
-            if 'height' is self._element.attrib:
+            if 'height' in self._element.attrib:
                height = self._element.attrib['height']
            elif 'height' in self._style:
                height = self._style['height']
@ -555,6 +559,8 @@ class Style(object):
                result = base
            else:
                result = self._unit_convert(height, base=base)
+            if isinstance(result, (unicode, str, bytes)):
+                result = self._profile.height
            self._height = result
        return self._height

--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@ -14,7 +14,7 @@ from lxml import etree

 from calibre.ebooks.oeb.base import XPath, XPNSMAP
 from calibre import guess_type
-
+from calibre.library.comments import comments_to_html
 class Jacket(object):
    '''
    Book jacket manipulation. Remove first image and insert comments at start of
@ -25,6 +25,7 @@ class Jacket(object):
    <html xmlns="%(xmlns)s">
        <head>
            <title>%(title)s</title>
+            <meta name="calibre-content" content="jacket"/>
        </head>
        <body>
            <div class="calibre_rescale_100">
@ -83,7 +84,9 @@ class Jacket(object):
                comments = ''
        if not comments.strip():
            comments = ''
-        comments = comments.replace('\r\n', '\n').replace('\n\n', '<br/><br/>')
+        orig_comments = comments
+        if comments:
+            comments = comments_to_html(comments)
        series = '<b>Series: </b>' + escape(mi.series if mi.series else '')
        if mi.series and mi.series_index is not None:
            series += escape(' [%s]'%mi.format_series_index())
@ -96,21 +99,41 @@ class Jacket(object):
            except:
                tags = []
        if tags:
-            tags = '<b>Tags: </b>' + escape(self.opts.dest.tags_to_string(tags))
+            tags = '<b>Tags: </b>' + self.opts.dest.tags_to_string(tags)
        else:
            tags = ''
        try:
            title = mi.title if mi.title else unicode(self.oeb.metadata.title[0])
        except:
            title = _('Unknown')
-        html = self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
-                title=escape(title), comments=escape(comments),
+
+        def generate_html(comments):
+            return self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
+                title=escape(title), comments=comments,
                jacket=escape(_('Book Jacket')), series=series,
                tags=tags, rating=self.get_rating(mi.rating))
        id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
-        root = etree.fromstring(html)
+        from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath
+        try:
+            root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER)
+        except:
+            root = etree.fromstring(generate_html(escape(orig_comments)),
+                    parser=RECOVER_PARSER)
+        jacket = XPath('//h:meta[@name="calibre-content" and @content="jacket"]')
+        found = None
+        for item in list(self.oeb.spine)[:4]:
+            try:
+                if jacket(item.data):
+                    found = item
+                    break
+            except:
+                continue
+        if found is None:
            item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
            self.oeb.spine.insert(0, item, True)
+        else:
+            self.log('Found existing book jacket, replacing...')
+            found.data = root


    def __call__(self, oeb, opts, metadata):
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'

 import os
 from calibre.utils.date import isoformat, now
+from calibre import guess_type

 def meta_info_to_oeb_metadata(mi, m, log):
    from calibre.ebooks.oeb.base import OPF
@ -92,15 +93,16 @@ class MergeMetadata(object):
                                    scheme='uuid')
            self.oeb.uid = self.oeb.metadata.identifier[-1]

-
-
-
    def set_cover(self, mi, prefer_metadata_cover):
-        cdata = ''
+        cdata, ext = '', 'jpg'
        if mi.cover and os.access(mi.cover, os.R_OK):
            cdata = open(mi.cover, 'rb').read()
+            ext = mi.cover.rpartition('.')[-1].lower().strip()
        elif mi.cover_data and mi.cover_data[-1]:
            cdata = mi.cover_data[1]
+            ext = mi.cover_data[0]
+        if ext not in ('png', 'jpg', 'jpeg'):
+            ext = 'jpg'
        id = old_cover = None
        if 'cover' in self.oeb.guide:
            old_cover = self.oeb.guide['cover']
@ -120,8 +122,8 @@ class MergeMetadata(object):
                self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
                return id
        if cdata:
-            id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
-            self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata)
+            id, href = self.oeb.manifest.generate('cover', 'cover.'+ext)
+            self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata)
            self.oeb.guide.add('cover', 'Cover', href)
        return id

--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'

 '''
 Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
-forces at "likely" locations to conform to size limitations. This transform
+forced at "likely" locations to conform to size limitations. This transform
 assumes a prior call to the flatcss transform.
 '''

@ -385,12 +385,18 @@ class FlowSplitter(object):
            raise SplitError(self.item.href, root)
        self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point))

-        for t in self.do_split(tree, split_point, before):
+        trees = self.do_split(tree, split_point, before)
+        sizes = [len(tostring(t.getroot())) for t in trees]
+        if min(sizes) < 5*1024:
+            self.log.debug('\t\t\tSplit tree too small')
+            self.split_to_size(tree)
+            return
+
+        for t, size in zip(trees, sizes):
            r = t.getroot()
            if self.is_page_empty(r):
                continue
-            size = len(tostring(r))
-            if size <= self.max_flow_size:
+            elif size <= self.max_flow_size:
                self.split_trees.append(t)
                self.log.debug(
                    '\t\t\tCommitted sub-tree #%d (%d KB)'%(
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -11,7 +11,7 @@ import re
 from lxml import etree
 from urlparse import urlparse

-from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
+from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
 from calibre.ebooks import ConversionError

 def XPath(x):
@ -79,8 +79,7 @@ class DetectStructure(object):
            page_break_before = 'display: block; page-break-before: always'
            page_break_after = 'display: block; page-break-after: always'
            for item, elem in self.detected_chapters:
-                text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
-                text = text.strip()
+                text = xml2text(elem).strip()
                self.log('\tDetected chapter:', text[:50])
                if chapter_mark == 'none':
                    continue
@ -120,8 +119,7 @@ class DetectStructure(object):
                    if frag:
                        href = '#'.join((href, frag))
                    if not self.oeb.toc.has_href(href):
-                        text = u' '.join([t.strip() for t in \
-                                a.xpath('descendant::text()')])
+                        text = xml2text(a)
                        text = text[:100].strip()
                        if not self.oeb.toc.has_text(text):
                            num += 1
@ -135,7 +133,7 @@ class DetectStructure(object):


    def elem_to_link(self, item, elem, counter):
-        text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
+        text = xml2text(elem)
        text = text[:100].strip()
        id = elem.get('id', 'calibre_toc_%d'%counter)
        elem.set('id', id)
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -461,7 +461,7 @@ class FileDialog(QObject):

    def get_files(self):
        if self.selected_files is None:
-            return tuple(os.path.abspath(qstring_to_unicode(i)) for i in self.fd.selectedFiles())
+            return tuple(os.path.abspath(unicode(i)) for i in self.fd.selectedFiles())
        return tuple(self.selected_files)


--- a/src/calibre/gui2/convert/epub_output.py
+++ b/src/calibre/gui2/convert/epub_output.py
@ -17,7 +17,8 @@ class PluginWidget(Widget, Ui_Form):

    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent, 'epub_output',
-                ['dont_split_on_page_breaks', 'flow_size', 'no_default_epub_cover']
+                ['dont_split_on_page_breaks', 'flow_size',
+                    'no_default_epub_cover', 'no_svg_cover']
                )
        self.db, self.book_id = db, book_id
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/epub_output.ui
+++ b/src/calibre/gui2/convert/epub_output.ui
@ -21,7 +21,7 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="0">
+   <item row="3" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>Split files &amp;larger than:</string>
@ -31,7 +31,7 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="1">
+   <item row="3" column="1">
    <widget class="QSpinBox" name="opt_flow_size">
     <property name="suffix">
      <string> KB</string>
@ -47,7 +47,7 @@
     </property>
    </widget>
   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -67,6 +67,13 @@
     </property>
    </widget>
   </item>
+   <item row="2" column="0">
+    <widget class="QCheckBox" name="opt_no_svg_cover">
+     <property name="text">
+      <string>No &amp;SVG cover</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/convert/metadata.py
+++ b/src/calibre/gui2/convert/metadata.py
@ -71,7 +71,7 @@ class MetadataWidget(Widget, Ui_Form):
        self.author_sort.setText(mi.author_sort if mi.author_sort else '')
        self.tags.setText(', '.join(mi.tags if mi.tags else []))
        self.tags.update_tags_cache(self.db.all_tags())
-        self.comment.setText(mi.comments if mi.comments else '')
+        self.comment.setPlainText(mi.comments if mi.comments else '')
        if mi.series:
            self.series.setCurrentIndex(self.series.findText(mi.series))
        if mi.series_index is not None:
--- a/src/calibre/gui2/convert/regex_builder.py
+++ b/src/calibre/gui2/convert/regex_builder.py
@ -7,41 +7,15 @@ __docformat__ = 'restructuredtext en'
 import re

 from PyQt4.QtCore import SIGNAL, Qt
-from PyQt4.QtGui import QDialog, QWidget, QDialogButtonBox, QFileDialog, \
-    QBrush, QSyntaxHighlighter, QTextCharFormat
+from PyQt4.QtGui import QDialog, QWidget, QDialogButtonBox, \
+                        QBrush, QTextCursor, QTextEdit

 from calibre.gui2.convert.regex_builder_ui import Ui_RegexBuilder
 from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
-from calibre.gui2 import qstring_to_unicode
-from calibre.gui2 import error_dialog
+from calibre.gui2 import error_dialog, choose_files
 from calibre.ebooks.oeb.iterator import EbookIterator
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog

-class RegexHighlighter(QSyntaxHighlighter):
-
-    def __init__(self, *args):
-        QSyntaxHighlighter.__init__(self, *args)
-
-        self.regex = u''
-
-    def update_regex(self, regex):
-        self.regex = regex
-        self.rehighlight()
-
-    def highlightBlock(self, text):
-        valid_regex = True
-        text = qstring_to_unicode(text)
-        format = QTextCharFormat()
-        format.setBackground(QBrush(Qt.yellow))
-
-        if self.regex:
-            try:
-                for mo in re.finditer(self.regex, text):
-                    self.setFormat(mo.start(), mo.end() - mo.start(), format)
-            except:
-                valid_regex = False
-        self.emit(SIGNAL('regex_valid(PyQt_PyObject)'), valid_regex)
-
 class RegexBuilder(QDialog, Ui_RegexBuilder):

    def __init__(self, db, book_id, regex, *args):
@ -49,9 +23,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
        self.setupUi(self)

        self.regex.setText(regex)
-        self.regex_valid(True)
-        self.highlighter = RegexHighlighter(self.preview.document())
-        self.highlighter.update_regex(regex)
+        self.regex_valid()

        if not db or not book_id:
            self.button_box.addButton(QDialogButtonBox.Open)
@ -62,19 +34,37 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
        self.connect(self.regex, SIGNAL('textChanged(QString)'), self.regex_valid)
        self.connect(self.test, SIGNAL('clicked()'), self.do_test)

-    def regex_valid(self, valid):
-        regex = qstring_to_unicode(self.regex.text())
+    def regex_valid(self):
+        regex = unicode(self.regex.text())
        if regex:
            try:
                re.compile(regex)
                self.regex.setStyleSheet('QLineEdit { color: black; background-color: rgba(0,255,0,20%); }')
            except:
                self.regex.setStyleSheet('QLineEdit { color: black; background-color: rgb(255,0,0,20%); }')
+                return False
        else:
            self.regex.setStyleSheet('QLineEdit { color: black; background-color: white; }')
+        return True

    def do_test(self):
-        self.highlighter.update_regex(qstring_to_unicode(self.regex.text()))
+        selections = []
+        if self.regex_valid():
+            text = unicode(self.preview.toPlainText())
+            regex = unicode(self.regex.text())
+            cursor = QTextCursor(self.preview.document())
+            extsel = QTextEdit.ExtraSelection()
+            extsel.cursor = cursor
+            extsel.format.setBackground(QBrush(Qt.yellow))
+            try:
+                for match in re.finditer(regex, text):
+                    es = QTextEdit.ExtraSelection(extsel)
+                    es.cursor.setPosition(match.start(), QTextCursor.MoveAnchor)
+                    es.cursor.setPosition(match.end(), QTextCursor.KeepAnchor)
+                    selections.append(es)
+            except:
+                pass
+        self.preview.setExtraSelections(selections)

    def select_format(self, db, book_id):
        format = None
@ -104,9 +94,10 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):

    def button_clicked(self, button):
        if button == self.button_box.button(QDialogButtonBox.Open):
-            name = QFileDialog.getOpenFileName(self, _('Open book'), _('~'))
-            if name:
-                self.open_book(qstring_to_unicode(name))
+            files = choose_files(self, 'regexp tester dialog', _('Open book'),
+                    select_only_single_file=True)
+            if files:
+                self.open_book(files[0])
        if button == self.button_box.button(QDialogButtonBox.Ok):
            self.accept()

--- a/src/calibre/gui2/dialogs/book_info.py
+++ b/src/calibre/gui2/dialogs/book_info.py
@ -11,6 +11,7 @@ from PyQt4.QtGui import QDialog, QPixmap, QGraphicsScene, QIcon, QDesktopService
 from calibre.gui2.dialogs.book_info_ui import Ui_BookInfo
 from calibre.gui2 import dynamic
 from calibre import fit_image
+from calibre.library.comments import comments_to_html

 class BookInfo(QDialog, Ui_BookInfo):

@ -96,6 +97,8 @@ class BookInfo(QDialog, Ui_BookInfo):
        self.setWindowTitle(info[_('Title')])
        self.title.setText('<b>'+info.pop(_('Title')))
        comments = info.pop(_('Comments'), '')
+        if comments:
+            comments = comments_to_html(comments)
        if re.search(r'<[a-zA-Z]+>', comments) is None:
            lines = comments.splitlines()
            lines = [x if x.strip() else '<br><br>' for x in lines]
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -180,6 +180,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            self.formats_changed = True

    def get_selected_format_metadata(self):
+        old = prefs['read_file_metadata']
+        if not old:
+            prefs['read_file_metadata'] = True
+        try:
            row = self.formats.currentRow()
            fmt = self.formats.item(row)
            if fmt is None:
@ -201,6 +205,9 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                error_dialog(self, _('Could not read metadata'),
                            _('Could not read metadata from %s format')%ext).exec_()
            return None, None
+        finally:
+            if old != prefs['read_file_metadata']:
+                prefs['read_file_metadata'] = old

    def set_metadata_from_format(self):
        mi, ext = self.get_selected_format_metadata()
@ -231,7 +238,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            if mi.series_index is not None:
                self.series_index.setValue(float(mi.series_index))
        if mi.comments and mi.comments.strip():
-            self.comments.setText(mi.comments)
+            self.comments.setPlainText(mi.comments)


    def set_cover(self):
@ -555,7 +562,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        title  = qstring_to_unicode(self.title.text())
        try:
            author = string_to_authors(unicode(self.authors.text()))[0]
-        except IndexError:
+        except:
            author = ''
        publisher = qstring_to_unicode(self.publisher.currentText())
        if isbn or title or author or publisher:
@ -590,7 +597,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                            prefix = unicode(self.comments.toPlainText())
                            if prefix:
                                prefix += '\n'
-                            self.comments.setText(prefix + summ)
+                            self.comments.setPlainText(prefix + summ)
                        if book.rating is not None:
                            self.rating.setValue(int(book.rating))
                        if book.tags:
@ -654,7 +661,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            self.db.set_series(self.id,
                    unicode(self.series.currentText()).strip(), notify=False)
            self.db.set_series_index(self.id, self.series_index.value(), notify=False)
-            self.db.set_comment(self.id, qstring_to_unicode(self.comments.toPlainText()), notify=False)
+            self.db.set_comment(self.id, unicode(self.comments.toPlainText()), notify=False)
            d = self.pubdate.date()
            d = qt_to_dt(d)
            self.db.set_pubdate(self.id, d, notify=False)
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@ -220,6 +220,10 @@ class Scheduler(QObject):
        self.cac = QAction(QIcon(I('user_profile.svg')), _('Add a custom news source'), self)
        self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds)
        self.news_menu.addAction(self.cac)
+        self.news_menu.addSeparator()
+        self.all_action = self.news_menu.addAction(
+                _('Download all scheduled new sources'),
+                self.download_all_scheduled)

        self.timer = QTimer(self)
        self.timer.start(int(self.INTERVAL * 60000))
@ -304,7 +308,11 @@ class Scheduler(QObject):
        if urn is not None:
            return self.download(urn)
        for urn in self.recipe_model.scheduled_urns():
-            self.download(urn)
+            if not self.download(urn):
+                break
+
+    def download_all_scheduled(self):
+        self.download_clicked(None)

    def download(self, urn):
        self.lock.lock()
@ -316,12 +324,13 @@ class Scheduler(QObject):
                            'is active'))
                d.setModal(False)
                d.show()
-            return
+            return False
        self.internet_connection_failed = False
        doit = urn not in self.download_queue
        self.lock.unlock()
        if doit:
            self.do_download(urn)
+        return True

    def check(self):
        recipes = self.recipe_model.get_to_be_downloaded_recipes()
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -777,7 +777,7 @@ class BooksView(TableView):
            self.setItemDelegateForColumn(cm.index('series'), self.series_delegate)

    def set_context_menu(self, edit_metadata, send_to_device, convert, view,
-                         save, open_folder, book_details, merge, delete, similar_menu=None):
+                         save, open_folder, book_details, delete, similar_menu=None):
        self.setContextMenuPolicy(Qt.DefaultContextMenu)
        self.context_menu = QMenu(self)
        if edit_metadata is not None:
@ -790,8 +790,6 @@ class BooksView(TableView):
        self.context_menu.addAction(save)
        if open_folder is not None:
            self.context_menu.addAction(open_folder)
-        if merge is not None:
-            self.context_menu.addAction(merge)
        if delete is not None:
            self.context_menu.addAction(delete)
        if book_details is not None:
--- a/src/calibre/gui2/pictureflow/pictureflow.cpp
+++ b/src/calibre/gui2/pictureflow/pictureflow.cpp
@ -1379,5 +1379,5 @@ void PictureFlow::dataChanged() { d->dataChanged(); }
 void PictureFlow::emitcurrentChanged(int index) { emit currentChanged(index); }

 int FlowImages::count() { return 0; }
-QImage FlowImages::image(int index) { return QImage(); }
-QString FlowImages::caption(int index) {return QString(); }
+QImage FlowImages::image(int index) { index=0; return QImage(); }
+QString FlowImages::caption(int index) {index=0; return QString(); }
--- a/src/calibre/gui2/shortcuts.py
+++ b/src/calibre/gui2/shortcuts.py
@ -260,7 +260,10 @@ class ShortcutConfig(QWidget):
        self.view.setModel(model)
        self.delegate = Delegate()
        self.view.setItemDelegate(self.delegate)
-        self.delegate.sizeHintChanged.connect(self.view.scrollTo)
+        self.delegate.sizeHintChanged.connect(self.scrollTo)
+
+    def scrollTo(self, index):
+        self.view.scrollTo(index)


 if __name__ == '__main__':
--- a/src/calibre/gui2/status.py
+++ b/src/calibre/gui2/status.py
@ -11,6 +11,7 @@ from calibre.gui2.widgets import IMAGE_EXTENSIONS
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.gui2.notify import get_notifier
 from calibre.ebooks import BOOK_EXTENSIONS
+from calibre.library.comments import comments_to_html

 class BookInfoDisplay(QWidget):

@ -91,9 +92,9 @@ class BookInfoDisplay(QWidget):
    WEIGHTS = collections.defaultdict(lambda : 100)
    WEIGHTS[_('Path')] = 0
    WEIGHTS[_('Formats')] = 1
-    WEIGHTS[_('Comments')] = 2
-    WEIGHTS[_('Series')] = 3
-    WEIGHTS[_('Tags')] = 4
+    WEIGHTS[_('Comments')] = 4
+    WEIGHTS[_('Series')] = 2
+    WEIGHTS[_('Tags')] = 3

    def __init__(self, clear_message):
        QWidget.__init__(self)
@ -127,10 +128,14 @@ class BookInfoDisplay(QWidget):
        keys.sort(cmp=lambda x, y: cmp(self.WEIGHTS[x], self.WEIGHTS[y]))
        for key in keys:
            txt = data[key]
+            if not txt or not txt.strip() or txt == 'None':
+                continue
            if isinstance(key, str):
                key = key.decode(preferred_encoding, 'replace')
            if isinstance(txt, str):
                txt = txt.decode(preferred_encoding, 'replace')
+            if key == _('Comments'):
+                txt = comments_to_html(txt)
            rows += u'<tr><td><b>%s:</b></td><td>%s</td></tr>'%(key, txt)
        self.book_data.setText(u'<table>'+rows+u'</table>')

--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -242,8 +242,12 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        ####################### Vanity ########################
        self.vanity_template  = _('<p>For help see the: <a href="%s">User Manual</a>'
                '<br>')%'http://calibre-ebook.com/user_manual'
+        dv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
+        v = __version__
+        if getattr(sys, 'frozen', False) and dv and os.path.abspath(dv) in sys.path:
+            v += '*'
        self.vanity_template += _('<b>%s</b>: %s by <b>Kovid Goyal '
-            '%%(version)s</b><br>%%(device)s</p>')%(__appname__, __version__)
+            '%%(version)s</b><br>%%(device)s</p>')%(__appname__, v)
        self.latest_version = ' '
        self.vanity.setText(self.vanity_template%dict(version=' ', device=' '))
        self.device_info = ' '
@ -350,7 +354,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):

        self.view_menu = QMenu()
        self.view_menu.addAction(_('View'))
-        self.view_menu.addAction(_('View specific format'))
+        ac = self.view_menu.addAction(_('View specific format'))
+        ac.setShortcut(Qt.AltModifier+Qt.Key_V)
        self.action_view.setMenu(self.view_menu)

        self.delete_menu = QMenu()
@ -478,16 +483,15 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
                                        self.action_save,
                                        self.action_open_containing_folder,
                                        self.action_show_book_details,
-                                        self.action_merge,
                                        self.action_del,
                                        similar_menu=similar_menu)

        self.memory_view.set_context_menu(None, None, None,
-                self.action_view, self.action_save, None, None, None, self.action_del)
+                self.action_view, self.action_save, None, None, self.action_del)
        self.card_a_view.set_context_menu(None, None, None,
-                self.action_view, self.action_save, None, None, None, self.action_del)
+                self.action_view, self.action_save, None, None, self.action_del)
        self.card_b_view.set_context_menu(None, None, None,
-                self.action_view, self.action_save, None, None, None, self.action_del)
+                self.action_view, self.action_save, None, None, self.action_del)

        QObject.connect(self.library_view,
                SIGNAL('files_dropped(PyQt_PyObject)'),
@ -1669,7 +1673,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            if src_book:
                fmt = os.path.splitext(src_book)[-1].replace('.', '').upper()
                with open(src_book, 'rb') as f:
-                    self.db.add_format(dest_id, fmt, f, index_is_id=True,
+                    self.library_view.model().db.add_format(dest_id, fmt, f, index_is_id=True,
                            notify=False, replace=replace)

    def books_to_merge(self, rows):
@ -1684,7 +1688,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
                src_ids.append(id_)
                dbfmts = m.db.formats(id_, index_is_id=True)
                if dbfmts:
-                    for fmt in dbfmts:
+                    for fmt in dbfmts.split(','):
                        src_books.append(m.db.format_abspath(id_, fmt,
                            index_is_id=True))
        return [dest_id, src_books, src_ids]
@ -2092,7 +2096,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
                return
        for row in rows:
            path = self.library_view.model().db.abspath(row.row())
-            QDesktopServices.openUrl(QUrl('file:'+path))
+            QDesktopServices.openUrl(QUrl.fromLocalFile(path))


    def view_book(self, triggered):
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -7,10 +7,12 @@ __docformat__ = 'restructuredtext en'
 '''
 import os, math, re, glob, sys
 from base64 import b64encode
+from functools import partial
+
 from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
                     QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
                     QColor, QPoint, QImage, QRegion, QVariant, QIcon, \
-                     QFont, pyqtSignature, QAction, QByteArray
+                     QFont, pyqtSignature, QAction, QByteArray, QMenu
 from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings

 from calibre.utils.config import Config, StringConfig
@ -392,13 +394,14 @@ class Document(QWebPage):
        return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results

    def set_bottom_padding(self, amount):
-        padding = '%dpx'%amount
-        try:
-            old_padding = unicode(self.javascript('$("body").css("padding-bottom")').toString())
-        except:
-            old_padding = ''
+        body = self.mainFrame().documentElement().findFirst('body')
+        if body.isNull():
+            return
+        old_padding = unicode(body.styleProperty('padding-bottom',
+            body.ComputedStyle)).strip()
+        padding = u'%dpx'%amount
        if old_padding != padding:
-            self.javascript('$("body").css("padding-bottom", "%s")' % padding)
+            body.setStyleProperty('padding-bottom', padding + ' !important')


 class EntityDeclarationProcessor(object):
@ -421,7 +424,7 @@ class DocumentView(QWebView):
        QWebView.__init__(self, *args)
        self.debug_javascript = False
        self.shortcuts =  Shortcuts(SHORTCUTS, 'shortcuts/viewer')
-        self.self_closing_pat = re.compile(r'<([a-z]+)\s+([^>]+)/>',
+        self.self_closing_pat = re.compile(r'<([a-z1-6]+)\s+([^>]+)/>',
                re.IGNORECASE)
        self.setSizePolicy(QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding))
        self._size_hint = QSize(510, 680)
@ -449,6 +452,50 @@ class DocumentView(QWebView):
                _('&Lookup in dictionary'), self)
        self.dictionary_action.setShortcut(Qt.CTRL+Qt.Key_L)
        self.dictionary_action.triggered.connect(self.lookup)
+        self.goto_location_action = QAction(_('Go to...'), self)
+        self.goto_location_menu = m = QMenu(self)
+        self.goto_location_actions = a = {
+                'Next Page': self.next_page,
+                'Previous Page': self.previous_page,
+                'Section Top' : partial(self.scroll_to, 0),
+                'Document Top': self.goto_document_start,
+                'Section Bottom':partial(self.scroll_to, 1),
+                'Document Bottom': self.goto_document_end,
+                'Next Section': self.goto_next_section,
+                'Previous Section': self.goto_previous_section,
+        }
+        for name, key in [(_('Next Section'), 'Next Section'),
+                (_('Previous Section'), 'Previous Section'),
+                (None, None),
+                (_('Document Start'), 'Document Top'),
+                (_('Document End'), 'Document Bottom'),
+                (None, None),
+                (_('Section Start'), 'Section Top'),
+                (_('Section End'), 'Section Bottom'),
+                (None, None),
+                (_('Next Page'), 'Next Page'),
+                (_('Previous Page'), 'Previous Page')]:
+            if key is None:
+                m.addSeparator()
+            else:
+                m.addAction(name, a[key], self.shortcuts.get_sequences(key)[0])
+        self.goto_location_action.setMenu(self.goto_location_menu)
+
+    def goto_next_section(self, *args):
+        if self.manager is not None:
+            self.manager.goto_next_section()
+
+    def goto_previous_section(self, *args):
+        if self.manager is not None:
+            self.manager.goto_previous_section()
+
+    def goto_document_start(self, *args):
+        if self.manager is not None:
+            self.manager.goto_start()
+
+    def goto_document_end(self, *args):
+        if self.manager is not None:
+            self.manager.goto_end()

    @property
    def copy_action(self):
@ -488,6 +535,8 @@ class DocumentView(QWebView):
        text = unicode(self.selectedText())
        if text:
            menu.insertAction(list(menu.actions())[0], self.dictionary_action)
+        menu.addSeparator()
+        menu.addAction(self.goto_location_action)
        menu.exec_(ev.globalPos())

    def lookup(self, *args):
@ -763,20 +812,9 @@ class DocumentView(QWebView):

    def keyPressEvent(self, event):
        key = self.shortcuts.get_match(event)
-        if key == 'Next Page':
-            self.next_page()
-        elif key == 'Previous Page':
-            self.previous_page()
-        elif key == 'Section Top':
-            self.scroll_to(0)
-        elif key == 'Document Top':
-            if self.manager is not None:
-                self.manager.goto_start()
-        elif key == 'Section Bottom':
-            self.scroll_to(1)
-        elif key == 'Document Bottom':
-            if self.manager is not None:
-                self.manager.goto_end()
+        func = self.goto_location_actions.get(key, None)
+        if func is not None:
+            func()
        elif key == 'Down':
            self.scroll_by(y=15)
        elif key == 'Up':
@ -785,12 +823,6 @@ class DocumentView(QWebView):
            self.scroll_by(x=-15)
        elif key == 'Right':
            self.scroll_by(x=15)
-        elif key == 'Next Section':
-            if self.manager is not None:
-                self.manager.goto_next_section()
-        elif key == 'Previous Section':
-            if self.manager is not None:
-                self.manager.goto_previous_section()
        else:
            return QWebView.keyPressEvent(self, event)

--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -822,7 +822,7 @@ def do_remove_custom_column(db, label, force):
    if not force:
        q = raw_input(_('You will lose all data in the column: %r.'
            ' Are you sure (y/n)? ')%label)
-        if q.lower().strip() != 'y':
+        if q.lower().strip() != _('y'):
            return
    db.delete_custom_column(label=label)
    prints('Column %r removed.'%label)
--- a/src/calibre/library/comments.py
+++ b/src/calibre/library/comments.py
@ -0,0 +1,114 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+from calibre.constants import preferred_encoding
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
+from calibre import prepare_string_for_xml
+
+def comments_to_html(comments):
+    '''
+    Convert random comment text to normalized, xml-legal block of <p>s
+    'plain text' returns as
+    <p>plain text</p>
+
+    'plain text with <i>minimal</i> <b>markup</b>' returns as
+    <p>plain text with <i>minimal</i> <b>markup</b></p>
+
+    '<p>pre-formatted text</p> returns untouched
+
+    'A line of text\n\nFollowed by a line of text' returns as
+    <p>A line of text</p>
+    <p>Followed by a line of text</p>
+
+    'A line of text.\nA second line of text.\rA third line of text' returns as
+    <p>A line of text.<br />A second line of text.<br />A third line of text.</p>
+
+    '...end of a paragraph.Somehow the break was lost...' returns as
+    <p>...end of a paragraph.</p>
+    <p>Somehow the break was lost...</p>
+
+    Deprecated HTML returns as HTML via BeautifulSoup()
+
+    '''
+    if not isinstance(comments, unicode):
+        comments = comments.decode(preferred_encoding, 'replace')
+
+    # Hackish - ignoring sentences ending or beginning in numbers to avoid
+    # confusion with decimal points.
+
+    # Explode lost CRs to \n\n
+    for lost_cr in re.finditer('([a-z])([\.\?!])([A-Z])', comments):
+        comments = comments.replace(lost_cr.group(),
+                                    '%s%s\n\n%s' % (lost_cr.group(1),
+                                                    lost_cr.group(2),
+                                                    lost_cr.group(3)))
+
+    # Convert \n\n to <p>s
+    if re.search('\n\n', comments):
+        soup = BeautifulSoup()
+        split_ps = comments.split(u'\n\n')
+        tsc = 0
+        for p in split_ps:
+            pTag = Tag(soup,'p')
+            pTag.insert(0,p)
+            soup.insert(tsc,pTag)
+            tsc += 1
+        comments = soup.renderContents(None)
+
+    # Convert solo returns to <br />
+    comments = re.sub('[\r\n]','<br />', comments)
+
+    # Convert two hyphens to emdash
+    comments = re.sub('--', '&mdash;', comments)
+    soup = BeautifulSoup(comments)
+    result = BeautifulSoup()
+    rtc = 0
+    open_pTag = False
+
+    all_tokens = list(soup.contents)
+    for token in all_tokens:
+        if type(token) is NavigableString:
+            if not open_pTag:
+                pTag = Tag(result,'p')
+                open_pTag = True
+                ptc = 0
+            pTag.insert(ptc,prepare_string_for_xml(token))
+            ptc += 1
+
+        elif token.name in ['br','b','i','em']:
+            if not open_pTag:
+                pTag = Tag(result,'p')
+                open_pTag = True
+                ptc = 0
+            pTag.insert(ptc, token)
+            ptc += 1
+
+        else:
+            if open_pTag:
+                result.insert(rtc, pTag)
+                rtc += 1
+                open_pTag = False
+                ptc = 0
+            # Clean up NavigableStrings for xml
+            sub_tokens = list(token.contents)
+            for sub_token in sub_tokens:
+                if type(sub_token) is NavigableString:
+                    sub_token.replaceWith(prepare_string_for_xml(sub_token))
+            result.insert(rtc, token)
+            rtc += 1
+
+    if open_pTag:
+        result.insert(rtc, pTag)
+
+    paras = result.findAll('p')
+    for p in paras:
+        p['class'] = 'description'
+
+    return result.renderContents(encoding=None)
+
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@ -27,7 +27,9 @@ FORMAT_ARG_DESCS = dict(
            'of the name use {author_sort[0]}'),
        tags=_('The tags'),
        series=_('The series'),
-        series_index=_('The series number. To get leading zeros use {series_index:0>3s}'),
+        series_index=_('The series number. '
+            'To get leading zeros use {series_index:0>3s} or '
+            '{series_index:>3s} for leading spaces'),
        rating=_('The rating'),
        isbn=_('The ISBN'),
        publisher=_('The publisher'),
--- a/src/calibre/manual/plugins.rst
+++ b/src/calibre/manual/plugins.rst
@ -113,7 +113,7 @@ Metadata download plugins
    When :meth:`fetch` is called, the `self` object will have the following
    useful attributes (each of which may be None)::

-        title, author, publisher, isbn, log, verbose and extra
+        title, book_author, publisher, isbn, log, verbose and extra

    Use these attributes to construct the search query. extra is reserved for
    future use.
--- a/src/calibre/startup.py
+++ b/src/calibre/startup.py
@ -19,12 +19,13 @@ __builtin__.__dict__['__'] = lambda s: s
 from calibre.constants import iswindows, preferred_encoding, plugins

 _run_once = False
+winutil = winutilerror = None
+
 if not _run_once:
    _run_once = True

    ################################################################################
    # Platform specific modules
-    winutil = winutilerror = None
    if iswindows:
        winutil, winutilerror = plugins['winutil']
        if not winutil:
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/tr.po
+++ b/src/calibre/translations/tr.po
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -106,6 +106,7 @@ _extra_lang_codes = {
        'en_SG' : _('English (Singapore)'),
        'en_YE' : _('English (Yemen)'),
        'en_IE' : _('English (Ireland)'),
+        'es_PY' : _('Spanish (Paraguay)'),
        'de_AT' : _('German (AT)'),
        'nl'    : _('Dutch (NL)'),
        'nl_BE' : _('Dutch (BE)'),
--- a/src/calibre/web/feeds/recipes/model.py
+++ b/src/calibre/web/feeds/recipes/model.py
@ -183,10 +183,6 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
        lang_map = {}
        self.all_urns = set([])
        self.showing_count = 0
-        for x in self.scheduler_config.iter_recipes():
-            urn = x.get('id')
-            if ok(urn):
-                factory(NewsItem, scheduled, urn, x.get('title'))
        for x in self.custom_recipe_collection:
            urn = x.get('id')
            self.all_urns.add(urn)
@ -202,6 +198,13 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
                    lang_map[lang] = factory(NewsCategory, new_root, lang)
                factory(NewsItem, lang_map[lang], urn, x.get('title'))
                self.showing_count += 1
+        for x in self.scheduler_config.iter_recipes():
+            urn = x.get('id')
+            if urn not in self.all_urns:
+                self.scheduler_config.un_schedule_recipe(urn)
+                continue
+            if ok(urn):
+                factory(NewsItem, scheduled, urn, x.get('title'))
        new_root.prune()
        new_root.sort()
        self.root = new_root