Merge from trunk

2025-08-30 23:00:21 -04:00 · 2012-05-17 06:26:35 +02:00 · 2012-05-17 06:26:35 +02:00 · 99c57ac10d
commit 99c57ac10d
parent 49d1385ea9 b4684cc1c2
201 changed files with 62463 additions and 36984 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -16,7 +16,6 @@ resources/ebook-convert-complete.pickle
 resources/builtin_recipes.xml
 resources/builtin_recipes.zip
 resources/template-functions.json
 resources/display/*.js
 setup/installer/windows/calibre/build.log
 src/calibre/translations/.errors
 src/cssutils/.svn/
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,67 @@
 #   new recipes:
 #     - title: 
 - version: 0.8.51
  date: 2012-05-11
  new features:
    - title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
      tickets: [994514]
    - title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system"
    - title: "Driver for Motorola XT875 and Pandigital SuperNova"
      tickets: [996890]
    - title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors"
      tickets: [994811] 
    - title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB"
    - title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices"
      tickets: [994838] 
  bug fixes:
    - title: "E-book viewer: Improve performance when switching between normal and fullscreen views."
      tickets: [996102]
    - title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags"
    - title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes."
      tickets: [997034]
    - title: "Fix download of news in AZW3 format not working"
      tickets: [996439]
    - title: "Pocketbook driver: Update for new PB 611 firmware."
      tickets: [903079]
    - title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them"
      tickets: [994939] 
    - title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers."
      tickets: [994861]
    - title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word"
  improved recipes:
    - Mainichi news
    - derStandard
    - Endgadget Japan
  new recipes:
    - title: Mainichi English
      author: Hiroshi Miura
    - title: The Grid TO
      author: Yusuf W
    - title: National Geographic (Italy)
      author: faber1971
    - title: Rebelion 
      author: Marc Busque
 - version: 0.8.50
  date: 2012-05-04
--- a/recipes/ads_of_the_world.recipe
+++ b/recipes/ads_of_the_world.recipe
@ -0,0 +1,26 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1336986047(BasicNewsRecipe):
    title          = u'Ads of the World'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = False
    description   = 'The best international advertising campaigns'
    language = 'en'
    __author__ = 'faber1971'
    no_stylesheets = True
    keep_only_tags = [
                       dict(name='div', attrs={'id':'primary'})
                     ]
    remove_tags = [
                       dict(name='ul', attrs={'class':'links inline'})
                      ,dict(name='div', attrs={'class':'form-item'})
                      ,dict(name='div', attrs={'id':['options', 'comments']})
                      ,dict(name='ul', attrs={'id':'nodePager'})
                     ]
    reverse_article_order = True
    masthead_url            = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
    feeds          = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
--- a/recipes/air_force_times.recipe
+++ b/recipes/air_force_times.recipe
@ -0,0 +1,43 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AirForceTimes(BasicNewsRecipe):
    title = 'Air Force Times'
    __author__ = 'jde'
    __date__ = '16 May 2012'
    __version__ = '1.0'
    description = 'News of the U.S. Air Force'
    language = 'en'
    publisher = 'AirForceTimes.com'
    category = 'news, U.S. Air Force'
    tags = 'news, U.S. Air Force'
    cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
    masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
    oldest_article = 7 #days
    max_articles_per_feed = 25
    publication_type = 'newspaper'
    no_stylesheets = True
    use_embedded_content = False
    encoding = None
    recursions = 0
    needs_subscription = False
    remove_javascript = True
    remove_empty_feeds = True
    auto_cleanup = True
    feeds = [
 	('News', 		'http://www.airforcetimes.com/rss_news.php'),
 	('Benefits', 		'http://www.airforcetimes.com/rss_benefits.php'),
 	('Money', 		'http://www.airforcetimes.com/rss_money.php'),
 	('Careers & Education', 	'http://www.airforcetimes.com/rss_careers.php'),
 	('Community', 	'http://www.airforcetimes.com/rss_community.php'),
 	('Off Duty', 		'http://www.airforcetimes.com/rss_off_duty.php'),
 	('Entertainment', 	'http://www.airforcetimes.com/rss_entertainment.php'),
 	('Guard & Reserve', 	'http://www.airforcetimes.com/rss_guard.php'),
              ]
--- a/recipes/army_times.recipe
+++ b/recipes/army_times.recipe
@ -0,0 +1,42 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class ArmyTimes(BasicNewsRecipe):
    title                  	= 'Army Times'
    __author__             	= 'jde'
    __date__		= '16 May 2012'
    __version__	= '1.0'
    description            	= 'News of the U.S. Army'
    language               	= 'en'
    publisher              	= 'ArmyTimes.com'
    category               	= 'news, U.S. Army'
    tags 		= 'news, U.S. Army'
    cover_url        	= 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
    masthead_url        	= 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
    oldest_article 	= 7 #days
    max_articles_per_feed  	= 25
    publication_type 	= 'newspaper'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= True
    feeds          =   [
 ('News', 		'http://www.armytimes.com/rss_news.php'),
 ('Benefits', 		'http://www.armytimes.com/rss_benefits.php'),
 ('Money', 		'http://www.armytimes.com/rss_money.php'),
 ('Careers & Education', 	'http://www.armytimes.com/rss_careers.php'),
 ('Community', 	'http://www.armytimes.com/rss_community.php'),
 ('Off Duty', 		'http://www.armytimes.com/rss_off_duty.php'),
 ('Entertainment', 	'http://www.armytimes.com/rss_entertainment.php'),
 ('Guard & Reserve', 	'http://www.armytimes.com/rss_guard.php'),
    ]
--- a/recipes/ars_technica.recipe
+++ b/recipes/ars_technica.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 arstechnica.com
 '''
@ -12,22 +12,24 @@ class ArsTechnica(BasicNewsRecipe):
    title                 = u'Ars Technica'
    language              = 'en'
    __author__            = 'Darko Miletic, Sujata Raman, Alexis Rohou'
-    description           = 'The art of technology'
+    description           = 'Ars Technica: Serving the technologist for 1.2 decades'
-    publisher             = 'Ars Technica'
+    publisher             = 'Conde Nast Publications'
    category              = 'news, IT, technology'
    oldest_article        = 5
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    extra_css             = 	'''
+    remove_empty_feeds    = True
-				body {font-family: Arial,Helvetica,sans-serif}
+    publication_type      = 'newsportal'    
-				.title{text-align: left}
+    extra_css             = '''
-				.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
+                            body {font-family: Arial,sans-serif}
-				.news-item-figure-caption-text{font-size:small; font-style:italic}
+                            .heading{font-family: "Times New Roman",serif}
-				.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold}
+                            .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
-				'''
+                            img{display: block}
-    ignoreEtcArticles     = True	# Etc feed items can be ignored, as they're not real stories
+                            .caption-text{font-size:small; font-style:italic}
                            .caption-byline{font-size:small; font-style:italic; font-weight:bold}
 				            '''
    conversion_options = {
                             'comments'  : description
@ -36,93 +38,64 @@ class ArsTechnica(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }
-
+    keep_only_tags = [
-    #preprocess_regexps = [
+                       dict(attrs={'class':'standalone'})
-    #            (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
+                      ,dict(attrs={'id':'article-guts'})
-    #           ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
+                     ]
    #                     ]
    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
    remove_tags = [
-                     dict(name=['object','link','embed'])
+                     dict(name=['object','link','embed','iframe','meta'])
-                    ,dict(name='div', attrs={'class':'read-more-link'})
+                    ,dict(attrs={'class':'corner-info'})
                  ]
-    #remove_attributes=['width','height']
+    remove_attributes = ['lang']
    feeds = [
              (u'Infinite Loop (Apple content)'        , u'http://feeds.arstechnica.com/arstechnica/apple/'      )
             ,(u'Opposable Thumbs (Gaming content)'    , u'http://feeds.arstechnica.com/arstechnica/gaming/'     )
             ,(u'Gear and Gadgets'                     , u'http://feeds.arstechnica.com/arstechnica/gadgets/'    )
             ,(u'Chipster (Hardware content)'          , u'http://feeds.arstechnica.com/arstechnica/hardware/'   )
             ,(u'Uptime (IT content)'                  , u'http://feeds.arstechnica.com/arstechnica/business/'   )
             ,(u'Open Ended (Open Source content)'     , u'http://feeds.arstechnica.com/arstechnica/open-source/')
             ,(u'One Microsoft Way'                    , u'http://feeds.arstechnica.com/arstechnica/microsoft/'  )
-             ,(u'Nobel Intent (Science content)'       , u'http://feeds.arstechnica.com/arstechnica/science/'    )
+             ,(u'Scientific method (Science content)'       , u'http://feeds.arstechnica.com/arstechnica/science/'    )
             ,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
            ]
    # This deals with multi-page stories
    def append_page(self, soup, appendtag, position):
-        pager = soup.find('div',attrs={'class':'pager'})
+        pager = soup.find(attrs={'class':'numbers'})
        if pager:
-           for atag in pager.findAll('a',href=True):
+           nexttag = pager.find(attrs={'class':'next'})
-               str = self.tag_to_string(atag)
+           if nexttag:
-               if str.startswith('Next'):
+              nurl = nexttag.parent['href']
-                  nurl = 'http://arstechnica.com' + atag['href']
+              rawc = self.index_to_soup(nurl,True)
-                  rawc = self.index_to_soup(nurl,True)
+              soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
-                  soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
+              texttag = soup2.find(attrs={'id':'article-guts'})
-
+              newpos = len(texttag.contents)
-                  readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
+              self.append_page(soup2,texttag,newpos)
-                  if readmoretag:
+              texttag.extract()
-                     readmoretag.extract()
+              pager.extract()
-                  texttag = soup2.find('div', attrs={'class':'body'})
+              appendtag.insert(position,texttag)
                  for it in texttag.findAll(style=True):
                      del it['style']
                  newpos = len(texttag.contents)
                  self.append_page(soup2,texttag,newpos)
                  texttag.extract()
                  pager.extract()
                  appendtag.insert(position,texttag)
    def preprocess_html(self, soup):
 	# Adds line breaks near the byline (not sure why this is needed)
        ftag = soup.find('div', attrs={'class':'byline'})
        if ftag:
           brtag = Tag(soup,'br')
           brtag2 = Tag(soup,'br')
           ftag.insert(4,brtag)
           ftag.insert(5,brtag2)
 	# Remove style items
        for item in soup.findAll(style=True):
           del item['style']
 	# Remove id
 	for item in soup.findAll(id=True):
 		del item['id']
 	# For some reason, links to authors don't have the domainname
 	a_author = soup.find('a',{'href':re.compile("^/author")})
 	if a_author:
 		a_author['href'] = 'http://arstechnica.com'+a_author['href']
 	# within div class news-item-figure, we need to grab images
 	# Deal with multi-page stories
        self.append_page(soup, soup.body, 3)
-
+        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  item.attrs = []
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'        
        return soup
-    def get_article_url(self, article):
+    def preprocess_raw_html(self, raw, url):
-	# If the article title starts with Etc:, don't return it
+       return '<html><head>'+raw[raw.find('</head>'):]
-	if self.ignoreEtcArticles:
+        
 		article_title = article.get('title',None)
 		if re.match('Etc: ',article_title) is not None:
 			return None
 	# The actual article is in a guid tag
        return article.get('guid',  None).rpartition('?')[0]
--- a/recipes/der_standard.recipe
+++ b/recipes/der_standard.recipe
@ -7,10 +7,11 @@ __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
 ''' http://www.derstandard.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from time import strftime
 class DerStandardRecipe(BasicNewsRecipe):
    title = u'derStandard'
-    __author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira'
+    __author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
    description = u'Nachrichten aus Österreich'
    publisher ='derStandard.at'
    category = 'news, politics, nachrichten, Austria'
@ -88,3 +89,41 @@ class DerStandardRecipe(BasicNewsRecipe):
        for t in soup.findAll(['ul', 'li']):
            t.name = 'div'
        return soup
    def get_cover_url(self):
        highResolution = True
        date    = strftime("%Y/%Y%m%d")
        # it is also possible for the past
        #date    = '2012/20120503'
        urlP1   = 'http://epaper.derstandarddigital.at/'
        urlP2   = 'data_ep/STAN/' + date
        urlP3   = '/V.B1/'
        urlP4   = 'paper.htm'
        urlHTML = urlP1 + urlP2 + urlP3 + urlP4
        br = self.clone_browser(self.browser)
        htmlF  = br.open_novisit(urlHTML)
        htmlC  = htmlF.read()
        # URL EXAMPLE: data_ep/STAN/2012/20120504/V.B1/pages/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE.htm
        # consists of part2 + part3 + 'pages/' + code
        # 'pages/' has length 6, code has lenght 36
        index   = htmlC.find(urlP2) + len(urlP2 + urlP3) + 6
        code    = htmlC[index:index + 36]
        # URL EXAMPLE HIGH RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE_b.png
        # URL EXAMPLE LOW RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/2AB52F71-11C1-4859-9114-CDCD79BEFDCB.png
        urlPic  = urlP1 + urlP2 + '/pagejpg/' + code
        if highResolution:
            urlPic  = urlPic + '_b'
        urlPic  = urlPic + '.png'
        return urlPic
--- a/recipes/economico.recipe
+++ b/recipes/economico.recipe
@ -0,0 +1,30 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Economico(BasicNewsRecipe):
    title          = u'Economico'
    language       = 'pt'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    encoding = 'utf-8'
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
 ('Ultima Hora',
 'http://economico.sapo.pt/rss/ultimas'),
 ('Em Foco',
 'http://economico.sapo.pt/rss/emfoco'),
 ('Mercados',
 'http://economico.sapo.pt/rss/mercados'),
 ('Empresas',
 'http://economico.sapo.pt/rss/empresas'),
 ('Economia',
 'http://economico.sapo.pt/rss/economia'),
 ('Politica',
 'http://economico.sapo.pt/rss/politica'),
 ]
--- a/recipes/endgadget_ja.recipe
+++ b/recipes/endgadget_ja.recipe
@ -17,7 +17,25 @@ class EndgadgetJapan(BasicNewsRecipe):
    no_stylesheets = True
    language = 'ja'
    encoding = 'utf-8'
-    feeds          = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
+    index = 'http://japanese.engadget.com/'
    remove_javascript = True
    remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
    remove_tags_after = dict(name='div', attrs={'class':'post_body'})
    def parse_index(self):
        feeds = []
        newsarticles = []
        soup   = self.index_to_soup(self.index)
        for topstories in soup.findAll('div',attrs={'class':'post_content'}):
           itt = topstories.find('h4')
           itema = itt.find('a',href=True)
           newsarticles.append({
                                      'title'      :itema.string
                                     ,'date'       :''
                                     ,'url'        :itema['href']
                                     ,'description':''
                                    })
        feeds.append(('Latest Posts', newsarticles))
        return feeds
    remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
    remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})
--- a/recipes/folha.recipe
+++ b/recipes/folha.recipe
@ -0,0 +1,82 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.folha.uol.com.br
 '''
 import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
 class Folha_de_s_paulo(BasicNewsRecipe):
    title                 = u'Folha de São Paulo - portal'
    __author__            = 'Darko Miletic'
    description           = 'Um Jornala a servicao do Brasil'
    publisher             = 'Folhapress'
    category              = 'news, politics, Brasil'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'pt_BR'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [dict(name=['meta','link','base','iframe','embed','object'])]
    keep_only_tags = [dict(attrs={'id':'articleNew'})]
    feeds = [
              (u'Poder'          , u'http://feeds.folha.uol.com.br/poder/rss091.xml'               )
             ,(u'Mundo'          , u'http://feeds.folha.uol.com.br/mundo/rss091.xml'               )
             ,(u'Mercado'        , u'http://feeds.folha.uol.com.br/mercado/rss091.xml'             )
             ,(u'Cotidiano'      , u'http://feeds.folha.uol.com.br/cotidiano/rss091.xml'           )
             ,(u'Esporte'        , u'http://feeds.folha.uol.com.br/esporte/rss091.xml'             )
             ,(u'Ilustrada'      , u'http://feeds.folha.uol.com.br/ilustrada/rss091.xml'           )
             ,(u'F5'             , u'http://feeds.folha.uol.com.br/f5/rss091.xml'                  )
             ,(u'Ciência'        , u'http://feeds.folha.uol.com.br/ciencia/rss091.xml'             )
             ,(u'Tec'            , u'http://feeds.folha.uol.com.br/tec/rss091.xml'                 )
             ,(u'Ambiente'       , u'http://feeds.folha.uol.com.br/ambiente/rss091.xml'            )
             ,(u'Bichos'         , u'http://feeds.folha.uol.com.br/bichos/rss091.xml'              )
             ,(u'Celebridades'   , u'http://feeds.folha.uol.com.br/celebridades/rss091.xml'        )
             ,(u'Comida'         , u'http://feeds.folha.uol.com.br/comida/rss091.xml'              )
             ,(u'Equilibrio'     , u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml'    )
             ,(u'Folhateen'      , u'http://feeds.folha.uol.com.br/folhateen/rss091.xml'           )
             ,(u'Folhinha'       , u'http://feeds.folha.uol.com.br/folhinha/rss091.xml'            )
             ,(u'Ilustrissima'   , u'http://feeds.folha.uol.com.br/ilustrissima/rss091.xml'        )
             ,(u'Saber'          , u'http://feeds.folha.uol.com.br/saber/rss091.xml'               )
             ,(u'Turismo'        , u'http://feeds.folha.uol.com.br/turismo/rss091.xml'             )
             ,(u'Panel do Leitor', u'http://feeds.folha.uol.com.br/folha/paineldoleitor/rss091.xml')
             ,(u'Publifolha'     , u'http://feeds.folha.uol.com.br/folha/publifolha/rss091.xml'    )
             ,(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml'        )
            ]
    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        curl = url.partition('/*')[2]
        return curl
    def print_version(self, url):
        return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.folha.uol.com.br/')
        cont = soup.find('div', attrs={'id':'newspaper'})
        if cont:
           ai = cont.find('a', href='http://www1.folha.uol.com.br/fsp/')
           if ai:
              return ai.img['src']
        return None
--- a/recipes/folhadesaopaulo.recipe
+++ b/recipes/folhadesaopaulo.recipe
@ -8,7 +8,7 @@ from urllib2 import Request, urlopen, URLError
 class FolhaOnline(BasicNewsRecipe):
    THUMBALIZR_API        = '' # ---->Get your at http://www.thumbalizr.com/ and put here
    LANGUAGE              = 'pt_br'
-    language = 'pt'
+    language = 'pt_BR'
    LANGHTM               = 'pt-br'
    ENCODING              = 'cp1252'
    ENCHTM                = 'iso-8859-1'
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -14,7 +14,7 @@ class FSP(BasicNewsRecipe):
    HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
-    language = 'pt'
+    language = 'pt_BR'
    no_stylesheets = True
    max_articles_per_feed  = 40
    remove_javascript     = True
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -6,21 +6,20 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
 from datetime import date
 import re
 class GN(BasicNewsRecipe):
        EDITION = 0
        __author__ = 'Piotr Kontek'
        title = u'Gość niedzielny'
        description = 'Weekly magazine'
        encoding = 'utf-8'
        no_stylesheets = True
        language = 'pl'
        remove_javascript = True
        temp_files = []
        simultaneous_downloads = 1
        masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
        title = u'Gość niedzielny'
        articles_are_obfuscated = True
@ -56,22 +55,28 @@ class GN(BasicNewsRecipe):
            self.temp_files[-1].close()
            return self.temp_files[-1].name
-        def find_last_issue(self):
+        def find_last_issue(self, year):
-                soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
+                soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
-                #szukam zdjęcia i linka do porzedniego pełnego numeru
+
                #szukam zdjęcia i linka do poprzedniego pełnego numeru
                first = True
                for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
                    img = d.find('img')
                    if img != None:
                        a = img.parent
                        self.EDITION = a['href']
                        self.title = img['alt']
                        self.cover_url = 'http://www.gosc.pl' + img['src']
-                        if not first:
+                        if year != date.today().year or not first:
                            break
                        first = False
        def parse_index(self):
-                self.find_last_issue()
+                year = date.today().year
                self.find_last_issue(year)
                ##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
                if self.EDITION == 0:
                	self.find_last_issue(year-1)
                soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
                feeds = []
                #wstepniak
--- a/recipes/grid_to.recipe
+++ b/recipes/grid_to.recipe
@ -0,0 +1,79 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheGridTO(BasicNewsRecipe):
    #: The title to use for the ebook
    title               = u'The Grid TO'
    #: A couple of lines that describe the content this recipe downloads.
    #: This will be used primarily in a GUI that presents a list of recipes.
    description         = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
                    'accessible voice for Toronto.')
    #: The author of this recipe
    __author__          = u'Yusuf W'
    #: The language that the news is in. Must be an ISO-639 code either
    #: two or three characters long
    language            = 'en_CA'
    #: Publication type
    #: Set to newspaper, magazine or blog
    publication_type    = 'newspaper'
    #: Convenient flag to disable loading of stylesheets for websites
    #: that have overly complex stylesheets unsuitable for conversion
    #: to ebooks formats
    #: If True stylesheets are not downloaded and processed
    no_stylesheets      = True
    #: List of tags to be removed. Specified tags are removed from downloaded HTML.
    remove_tags_before  = dict(name='div', id='content')
    remove_tags_after   = dict(name='div', id='content')
    remove_tags         =  [
                                dict(name='div', attrs={'class':'right-content pull-right'}),
                                dict(name='div', attrs={'class':'right-content'}),
                                dict(name='div', attrs={'class':'ftr-line'}),
                                dict(name='div', attrs={'class':'pull-right'}),
                                dict(name='div', id='comments'),
                                dict(name='div', id='tags')
                            ]
    #: Keep only the specified tags and their children.
    #keep_only_tags        = [dict(name='div', id='content')]
    cover_margins       = (0, 0, '#ffffff')
    INDEX               = 'http://www.thegridto.com'
    def get_cover_url(self):
        soup = self.index_to_soup(self.INDEX)
        cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src']
        return cover_url
    def parse_index(self):
        # Get the latest issue
        soup = self.index_to_soup(self.INDEX)
        a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
        # Parse the index of the latest issue
        self.INDEX = self.INDEX + a['href']
        soup = self.index_to_soup(self.INDEX)
        feeds = []
        for section in ['city', 'life', 'culture']:
            section_class = 'left-content article-listing ' + section + ' pull-left'
            div = soup.find(attrs={'class': section_class})
            articles = []
            for tag in div.findAllNext(attrs={'class':'search-block'}):
                a = tag.findAll('a', href=True)[1]
                title = self.tag_to_string(a)
                url = a['href']
                articles.append({'title': title, 'url': url, 'description':'', 'date':''})
            feeds.append((section, articles))
        return feeds
--- a/recipes/heavy_metal_it.recipe
+++ b/recipes/heavy_metal_it.recipe
@ -0,0 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1336289226(BasicNewsRecipe):
    title          = u'Heavy Metal'
    oldest_article = 15
    max_articles_per_feed = 100
    auto_cleanup = False
    masthead_url            = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
    feeds          = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
    keep_only_tags = [
                       dict(name='div', attrs={'class':'entry'})
                     ]
    remove_tags_after = [
                            dict(name='div', attrs={'class':'sociable'})
                            ]
    description = 'An Heavy metal Italian magazine'
    __author__      = 'faber1971'
    language = 'it'
 __version__     = 'v1.0'
 __date__        = '6, May 2012'
--- a/recipes/icons/folha.png
+++ b/recipes/icons/folha.png
--- a/recipes/icons/strategic_culture.png
+++ b/recipes/icons/strategic_culture.png
--- a/recipes/jijinews.recipe
+++ b/recipes/jijinews.recipe
@ -20,6 +20,8 @@ class JijiDotCom(BasicNewsRecipe):
    top_url        = 'http://www.jiji.com/'
    feeds          = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
    remove_tags_before = dict(id="article-area")
    remove_tags_after = dict(id="ad_google")
    def get_cover_url(self):
--- a/recipes/juve_la_stampa.recipe
+++ b/recipes/juve_la_stampa.recipe
@ -0,0 +1,24 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1336504510(BasicNewsRecipe):
    title          = u'Juve - La Stampa'
    oldest_article = 1
    language = 'it'
    max_articles_per_feed = 100
    auto_cleanup = True
    masthead_url            = 'http://www3.lastampa.it/fileadmin/media/sport/quijuve/top_quijuve.jpg'
    feeds          = [(u'Qui Juve - La Stampa', u'http://feed43.com/2352784107537677.xml')]
    remove_tags        = [dict(name='div',attrs={'class':['article-toolbar', 'sezione sezione-news', 'intestazione']})]
    extra_css = '''
                div.dettaglio div.immagine_girata p.news-single-imgcaption {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
                .sezione {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
                body {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
                h3 {color: #000000; font-family: "Georgia", "Times", serif; font-size: 22px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
                div.dettaglio h2.catenaccio {color: #000000; font-family: "Georgia", "Times", serif; font-size: 18px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
                '''
    description = 'News about Juventus from La Stampa'
    __author__      = 'faber1971'
 __version__     = 'v1.0'
 __date__        = '8, May 2012'
--- a/recipes/la_republica.recipe
+++ b/recipes/la_republica.recipe
@ -1,7 +1,7 @@
 __license__   = 'GPL v3'
-__author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
+__author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini; minor fixes by faber1971'
-__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>, faber1971'
-description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'
+description   = 'Italian daily newspaper - v1.02 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version; 11.05.2012 new version'
 '''
 http://www.repubblica.it/
@ -12,14 +12,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class LaRepubblica(BasicNewsRecipe):
    title                   = 'La Repubblica'
-    __author__              = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
+    __author__              = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971'
    description             = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
    masthead_url            = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
    publisher               = 'Gruppo editoriale L\'Espresso'
    category                = 'News, politics, culture, economy, general interest'
    language                = 'it'
    timefmt                 = '[%a, %d %b, %Y]'
-    oldest_article          = 5
+    oldest_article          = 1
    encoding                = 'utf8'
    use_embedded_content    = False
    no_stylesheets          = True
@ -59,6 +59,7 @@ class LaRepubblica(BasicNewsRecipe):
                          dict(attrs={'class':'articolo'}),
                          dict(attrs={'class':'body-text'}),
                          dict(name='p', attrs={'class':'disclaimer clearfix'}),
                          dict(name='div', attrs={'id':'main'}),
                          dict(attrs={'id':'contA'})
                         ]
@ -67,7 +68,7 @@ class LaRepubblica(BasicNewsRecipe):
                            dict(name=['object','link','meta','iframe','embed']),
                            dict(name='span',attrs={'class':'linkindice'}),
                            dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
-                            dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
+                            dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head', 'sidebar']}),
                            dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
                            dict(name='div', attrs={'class':'generalbox'}),
                            dict(name='ul', attrs={'id':'hystory'})
@ -88,11 +89,12 @@ class LaRepubblica(BasicNewsRecipe):
                       (u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
                       (u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
                       (u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
-                       (u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
+                       (u'Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
-                       (u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
+                       (u'Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
-                       (u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
+                       (u'Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
-                       (u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
+                       (u'Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
-                       (u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
+                       (u'Bari', u'http://bari.repubblica.it/rss/rss2.0.xml'),
                       (u'Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
                      ]
    def preprocess_html(self, soup):
--- a/recipes/mainichi.recipe
+++ b/recipes/mainichi.recipe
@ -16,12 +16,12 @@ class MainichiDailyNews(BasicNewsRecipe):
    publisher      = 'Mainichi Daily News'
    category       = 'news, japan'
    language       = 'ja'
-
+    index          = 'http://mainichi.jp/select/'
-    feeds          = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
+    remove_javascript = True
    masthead_title = u'MAINICHI DAILY NEWS'
    remove_tags_before = {'class':"NewsTitle"}
-    remove_tags = [{'class':"RelatedArticle"}]
+    remove_tags_after = {'class':"NewsBody clr"}
    remove_tags_after = {'class':"Credit"}
    def parse_feeds(self):
@ -32,9 +32,30 @@ class MainichiDailyNews(BasicNewsRecipe):
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'pheedo.jp', curarticle.url):
                    delList.append(curarticle)
                if re.search(r'rssad.jp', curarticle.url):
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        return feeds
    def parse_index(self):
        feeds = []
        soup   = self.index_to_soup(self.index)
        topstories = soup.find('ul',attrs={'class':'MaiLink'})
        if topstories:
           newsarticles = []
           for itt in topstories.findAll('li'):
                itema = itt.find('a',href=True)
                if itema:
                    newsarticles.append({
                                      'title'      :itema.string
                                     ,'date'       :''
                                     ,'url'        :itema['href']
                                     ,'description':''
                                    })
           feeds.append(('latest', newsarticles))
        return feeds
--- a/recipes/mainichi_en.recipe
+++ b/recipes/mainichi_en.recipe
@ -0,0 +1,67 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.mainichi.jp
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class MainichiEnglishNews(BasicNewsRecipe):
    title          = u'The Mainichi'
    __author__     = 'Hiroshi Miura'
    oldest_article = 2
    max_articles_per_feed = 40
    description    = 'Japanese traditional newspaper Mainichi news in English'
    publisher      = 'Mainichi News'
    category       = 'news, japan'
    language       = 'en_JP'
    index          = 'http://mainichi.jp/english/english/index.html'
    remove_javascript = True
    masthead_url = 'http://mainichi.jp/english/images/themainichi.png'
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags_after = {'class':"NewsBody clr"}
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'pheedo.jp', curarticle.url):
                    delList.append(curarticle)
                if re.search(r'rssad.jp', curarticle.url):
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        return feeds
    def parse_index(self):
        feeds = []
        soup   = self.index_to_soup(self.index)
        for section in soup.findAll('section'):
           newsarticles = []
           section_name = 'news'
           hds = section.find('div', attrs={'class':'CategoryHead clr'})
           if hds:
               section_item = hds.find('h1')
               if section_item:
                   section_name = section_item.find('a').string
               items = section.find('ul', attrs={'class':'MaiLink'})
               for item in items.findAll('li'):
                   if item:
                       itema = item.find('a')
                       newsarticles.append({
                                      'title'      :itema.string
                                     ,'date'       :''
                                     ,'url'        :itema['href']
                                     ,'description':''
                                    })
               feeds.append((section_name, newsarticles))
        return feeds
--- a/recipes/mainichi_it_news.recipe
+++ b/recipes/mainichi_it_news.recipe
@ -1,34 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class MainichiDailyITNews(BasicNewsRecipe):
    title          = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
    __author__     = 'Hiroshi Miura'
    oldest_article = 2
    max_articles_per_feed = 100
    description    = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
    publisher      = 'Mainichi Daily News'
    category       = 'news, Japan, IT, Electronics'
    language       = 'ja'
    feeds          = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags = [{'class':"RelatedArticle"}]
    remove_tags_after = {'class':"Credit"}
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'pheedo.jp', curarticle.url):
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        return feeds
--- a/recipes/mainichi_science_news.recipe
+++ b/recipes/mainichi_science_news.recipe
@ -0,0 +1,59 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.mainichi.jp
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class MainichiDailyScienceNews(BasicNewsRecipe):
    title          = u'\u6bce\u65e5\u65b0\u805e(Science)'
    __author__     = 'Hiroshi Miura'
    oldest_article = 2
    max_articles_per_feed = 20
    description    = 'Japanese traditional newspaper Mainichi Daily News - science'
    publisher      = 'Mainichi Daily News'
    category       = 'news, japan'
    language       = 'ja'
    index          = 'http://mainichi.jp/select/science'
    remove_javascript = True
    masthead_title = u'MAINICHI DAILY NEWS'
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags_after = {'class':"NewsBody clr"}
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'rssad.jp', curarticle.url):
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        return feeds
    def parse_index(self):
        feeds = []
        soup   = self.index_to_soup(self.index)
        topstories = soup.find('ul',attrs={'class':'MaiLink'})
        if topstories:
           newsarticles = []
           for itt in topstories.findAll('li'):
                itema = itt.find('a',href=True)
                if itema:
                    newsarticles.append({
                                      'title'      :itema.string
                                     ,'date'       :''
                                     ,'url'        :itema['href']
                                     ,'description':''
                                    })
           feeds.append(('Science', newsarticles))
        return feeds
--- a/recipes/marine_corps_times.recipe
+++ b/recipes/marine_corps_times.recipe
@ -0,0 +1,42 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MarineCorpsTimes(BasicNewsRecipe):
    title = 'Marine Corps Times'
    __author__ = 'jde'
    __date__ = '16 May 2012'
    __version__ = '1.0'
    description = 'News of the U.S. Marine Corps'
    language = 'en'
    publisher = 'MarineCorpsTimes.com'
    category = 'news, U.S. Marine Corps'
    tags = 'news, U.S. Marine Corps'
    cover_url 		= 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
    masthead_url 	= 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
    oldest_article = 7 #days
    max_articles_per_feed = 25
    publication_type = 'newspaper'
    no_stylesheets = True
    use_embedded_content = False
    encoding = None
    recursions = 0
    needs_subscription = False
    remove_javascript = True
    remove_empty_feeds = True
    auto_cleanup = True
    feeds = [
 	('News', 		'http://www.MarineCorpstimes.com/rss_news.php'),
 	('Benefits', 		'http://www.MarineCorpstimes.com/rss_benefits.php'),
 	('Money', 		'http://www.MarineCorpstimes.com/rss_money.php'),
 	('Careers & Education', 	'http://www.MarineCorpstimes.com/rss_careers.php'),
 	('Community', 	'http://www.MarineCorpstimes.com/rss_community.php'),
 	('Off Duty', 		'http://www.MarineCorpstimes.com/rss_off_duty.php'),
 	('Entertainment', 	'http://www.MarineCorpstimes.com/rss_entertainment.php'),
 	('Guard & Reserve', 	'http://www.MarineCorpstimes.com/rss_guard.php'),
              ]
--- a/recipes/military_times.recipe
+++ b/recipes/military_times.recipe
@ -0,0 +1,41 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MilitaryTimes(BasicNewsRecipe):
    title                  	= 'Military Times'
    __author__             	= 'jde'
    __date__		= '16 May 2012'
    __version__	= '1.0'
    description            	= 'News of the U.S. Military'
    language               	= 'en'
    publisher              	= 'MilitaryTimes.com'
    category               	= 'news, U.S. Military'
    tags 		= 'news, U.S. Military'
    cover_url        	= 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
    masthead_url        	= 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
    oldest_article 	= 7 #days
    max_articles_per_feed  	= 25
    publication_type 	= 'newspaper'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= True
    feeds          =   [
 ('News', 		'http://www.militarytimes.com/rss_news.php'),
 ('Benefits', 		'http://www.militarytimes.com/rss_benefits.php'),
 ('Money', 		'http://www.militarytimes.com/rss_money.php'),
 ('Careers & Education', 	'http://www.militarytimes.com/rss_careers.php'),
 ('Community', 	'http://www.militarytimes.com/rss_community.php'),
 ('Off Duty', 		'http://www.militarytimes.com/rss_off_duty.php'),
 ('Entertainment', 	'http://www.militarytimes.com/rss_entertainment.php'),
 ('Guard & Reserve', 	'http://www.militarytimes.com/rss_guard.php'),
    ]
--- a/recipes/montreal_gazette.recipe
+++ b/recipes/montreal_gazette.recipe
@ -1,5 +1,4 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
@ -7,77 +6,21 @@ __license__   = 'GPL v3'
 www.canada.com
 '''
-import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 class CanWestPaper(BasicNewsRecipe):
-    # un-comment the following four lines for the Victoria Times Colonist
+    # un-comment the following three lines for the Montreal Gazette
 ##    title = u'Victoria Times Colonist'
 ##    url_prefix = 'http://www.timescolonist.com'
 ##    description = u'News from Victoria, BC'
 ##    fp_tag = 'CAN_TC'
    # un-comment the following four lines for the Vancouver Province
 ##    title = u'Vancouver Province'
 ##    url_prefix = 'http://www.theprovince.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VP'
    # un-comment the following four lines for the Vancouver Sun
 ##    title = u'Vancouver Sun'
 ##    url_prefix = 'http://www.vancouversun.com'
 ##    description = u'News from Vancouver, BC'
 ##    fp_tag = 'CAN_VS'
    # un-comment the following four lines for the Edmonton Journal
 ##    title = u'Edmonton Journal'
 ##    url_prefix = 'http://www.edmontonjournal.com'
 ##    description = u'News from Edmonton, AB'
 ##    fp_tag = 'CAN_EJ'
    # un-comment the following four lines for the Calgary Herald
 ##    title = u'Calgary Herald'
 ##    url_prefix = 'http://www.calgaryherald.com'
 ##    description = u'News from Calgary, AB'
 ##    fp_tag = 'CAN_CH'
    # un-comment the following four lines for the Regina Leader-Post
 ##    title = u'Regina Leader-Post'
 ##    url_prefix = 'http://www.leaderpost.com'
 ##    description = u'News from Regina, SK'
 ##    fp_tag = ''
    # un-comment the following four lines for the Saskatoon Star-Phoenix
 ##    title = u'Saskatoon Star-Phoenix'
 ##    url_prefix = 'http://www.thestarphoenix.com'
 ##    description = u'News from Saskatoon, SK'
 ##    fp_tag = ''
    # un-comment the following four lines for the Windsor Star
 ##    title = u'Windsor Star'
 ##    url_prefix = 'http://www.windsorstar.com'
 ##    description = u'News from Windsor, ON'
 ##    fp_tag = 'CAN_'
    # un-comment the following four lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
 ##    description = u'News from Ottawa, ON'
 ##    fp_tag = 'CAN_OC'
    # un-comment the following four lines for the Montreal Gazette
    title = u'Montreal Gazette'
    url_prefix = 'http://www.montrealgazette.com'
    description = u'News from Montreal, QC'
    fp_tag = 'CAN_MG'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    auto_cleanup = True
    auto_cleanup_keep = '//*[@id="imageBox"]'
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
@ -87,135 +30,19 @@ class CanWestPaper(BasicNewsRecipe):
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
-    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-    def get_cover_url(self):
+    feeds          = [
-        from datetime import timedelta, date
+('News', 
-        if self.fp_tag=='':
+ 'http://rss.canada.com/get/?F297'),
-            return None
+ ('Sports', 
-        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
+ 'http://rss.canada.com/get/?F299'),
-        br = BasicNewsRecipe.get_browser()
+ ('Entertainment', 
-        daysback=1
+ 'http://rss.canada.com/get/?F7366'),
-        try:
+ ('Business', 
-            br.open(cover)
+ 'http://rss.canada.com/get/?F6939'),
-        except:
+]
            while daysback<7:
                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
                    daysback = daysback+1
                    continue
                break
        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        fixed = re.sub("&#x2019;","’",fixed)
        return fixed
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def populate_article_metadata(self, article, soup, first):
        if first:
            picdiv = soup.find('body').find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
        xtitle = article.text_summary.strip()
        if len(xtitle) == 0:
            desc = soup.find('meta',attrs={'property':'og:description'})
            if desc is not None:
                article.summary = article.text_summary = desc['content']
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        return self.strip_anchors(soup)
-
+ 
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/recipes/nachdenkseiten.recipe
+++ b/recipes/nachdenkseiten.recipe
@ -0,0 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Nachdenkseiten(BasicNewsRecipe):
    title          = u'Nachdenkseiten'
    __author__ = 'jrda'
    publisher = 'www.nachdenkseiten.de Albrecht Mueller und Dr. Wolfgang Lieb'
    description = 'NachDenkSeiten - Die kritische Website'
    category = 'news'
    oldest_article = 7
    use_embedded_content  = False
    language = 'de'
    timefmt = ''
    max_articles_per_feed = 6
    no_stylesheets        = True
    encoding              = 'utf-8'
    remove_javascript     = True
    keep_only_tags = [
            {'id':'content'}]
    feeds = [
              ('News', 'http://www.nachdenkseiten.de/?feed=rss2'),
            ]
--- a/recipes/national_geographic_it.recipe
+++ b/recipes/national_geographic_it.recipe
@ -0,0 +1,16 @@
 __version__     = 'v1.0'
 __date__        = '5, May 2012'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1336226255(BasicNewsRecipe):
    title          = u'National Geographic'
    __author__      = 'faber1971'
    description = 'Science magazine'
    language = 'it'
    oldest_article = 15
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_tags        = [dict(name='div',attrs={'class':'banner-abbonamenti'})]
    feeds          = [(u'National Geographic', u'http://www.nationalgeographic.it/rss/all/rss2.0.xml')]
--- a/recipes/navy_times.recipe
+++ b/recipes/navy_times.recipe
@ -0,0 +1,42 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class NavyTimes(BasicNewsRecipe):
    title                  	= 'Navy Times'
    __author__             	= 'jde'
    __date__		= '16 May 2012'
    __version__	= '1.0'
    description            	= 'News of the U.S. Navy'
    language               	= 'en'
    publisher              	= 'NavyTimes.com'
    category               	= 'news, U.S. Navy'
    tags 		= 'news, U.S. Navy'
    cover_url        	= 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
    masthead_url        	= 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
    oldest_article 	= 7 #days
    max_articles_per_feed  	= 25
    publication_type 	= 'newspaper'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= True
    feeds          =   [
 ('News', 		'http://www.navytimes.com/rss_news.php'),
 ('Benefits', 		'http://www.navytimes.com/rss_benefits.php'),
 ('Money', 		'http://www.navytimes.com/rss_money.php'),
 ('Careers & Education', 	'http://www.navytimes.com/rss_careers.php'),
 ('Community', 	'http://www.navytimes.com/rss_community.php'),
 ('Off Duty', 		'http://www.navytimes.com/rss_off_duty.php'),
 ('Entertainment', 	'http://www.navytimes.com/rss_entertainment.php'),
 ('Guard & Reserve', 	'http://www.navytimes.com/rss_guard.php'),
    ]
--- a/recipes/news_busters.recipe
+++ b/recipes/news_busters.recipe
@ -0,0 +1,20 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class NewsBusters(BasicNewsRecipe):
    title          	     = u'News Busters'
    description      = 'Exposing and Combating Liberal Media Bias'
    __author__     = 'jde'
    oldest_article = 1#day
    max_articles_per_feed = 100
    cover_url = "http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png"
    language        = 'en'
    encoding        = 'utf8'
    needs_subscription = False
    remove_javascript = True
    recursions      = 0
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [(u'Blog', u'http://www.newsbusters.org/rss.xml')]
--- a/recipes/pescanik.recipe
+++ b/recipes/pescanik.recipe
@ -9,10 +9,10 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Pescanik(BasicNewsRecipe):
-    title                 = 'Peščanik'
+    title                 = u'Peščanik'
    __author__            = 'Darko Miletic'
-    description           = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
+    description           = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
-    publisher             = 'Peščanik'
+    publisher             = u'Peščanik'
    category              = 'news, politics, Serbia'
    oldest_article        = 10
    max_articles_per_feed = 100
@ -45,4 +45,4 @@ class Pescanik(BasicNewsRecipe):
                  ]
    def print_version(self, url):
-        return url + 'print/'
+        return url + 'print/'
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -1,5 +1,5 @@
 """
-Pocket Calibre Recipe v1.0
+Pocket Calibre Recipe v1.2
 """
 __license__   = 'GPL v3'
 __copyright__ = '''
@ -73,6 +73,9 @@ class Pocket(BasicNewsRecipe):
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul', attrs={'id':'list'})
            if ritem is None:
                self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
                continue
            for item in reversed(ritem.findAll('li')):
                if articlesToGrab < 1:
                    break
@ -94,7 +97,12 @@ class Pocket(BasicNewsRecipe):
                    self.readList.append(readLink)
            totalfeeds.append((feedtitle, articles))
        if len(self.readList) < self.minimum_articles:
-            raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
+            self.mark_as_read_after_dl = False
            if hasattr(self, 'abort_recipe_processing'):
               self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
            else:
                self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
                return []
        return totalfeeds
    def mark_as_read(self, markList):
--- a/recipes/revista_summa.recipe
+++ b/recipes/revista_summa.recipe
@ -0,0 +1,22 @@
 __license__   = 'GPL v3'
 __author__    = 'Vakya'
 __version__     = 'v1.0'
 __date__        = '14, May 2012'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1336226255(BasicNewsRecipe):
    title          = u'Revista Summa'
    publisher      = u'Summa'
    __author__      = 'Vakya'
    description = 'Informacion regional sobre economia y negocios'
    language = 'es'
    oldest_article = 15
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_tags_before = dict(name='h1')
    remove_tags_after = dict(name='label')
    feeds          = [(u'Revista Summa', u'http://www.revistasumma.com/rss/rss-v2.0.rss')]
--- a/recipes/spiegel_int.recipe
+++ b/recipes/spiegel_int.recipe
@ -1,3 +1,4 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -15,6 +16,8 @@ class Spiegel_int(BasicNewsRecipe):
    language              = 'en_DE'
    no_stylesheets        = True
    use_embedded_content  = False
    auto_cleanup = True
    auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
    encoding              = 'cp1252'
    publisher             = 'SPIEGEL ONLINE GmbH'
    category              = 'news, politics, Germany'
@ -43,25 +46,25 @@ class Spiegel_int(BasicNewsRecipe):
                   .spPhotoGallery{font-size:x-small; color:#990000 ;}
                '''
-    keep_only_tags    = [dict(attrs={'id':'spArticleContent'})]
+    #keep_only_tags    = [dict(attrs={'id':'spArticleContent'})]
-    remove_tags_after = dict(attrs={'id':'spArticleBody'})
+    #remove_tags_after = dict(attrs={'id':'spArticleBody'})
-    remove_tags       = [dict(name=['meta','base','iframe','embed','object'])]
+    #remove_tags       = [dict(name=['meta','base','iframe','embed','object'])]
-    remove_attributes = ['clear']
+    #remove_attributes = ['clear']
    feeds             = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
-    def print_version(self, url):
+    #def print_version(self, url):
-        main, sep, rest = url.rpartition(',')
+        #main, sep, rest = url.rpartition(',')
-        rmain, rsep, rrest = main.rpartition(',')
+        #rmain, rsep, rrest = main.rpartition(',')
-        return rmain + ',druck-' + rrest + ',' + rest
+        #return rmain + ',druck-' + rrest + ',' + rest
-    def preprocess_html(self, soup):
+    #def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
+        #for item in soup.findAll(style=True):
-            del item['style']
+            #del item['style']
-        for item in soup.findAll('a'):
+        #for item in soup.findAll('a'):
-            if item.string is not None:
+            #if item.string is not None:
-               str = item.string
+               #str = item.string
-               item.replaceWith(str)
+               #item.replaceWith(str)
-            else:
+            #else:
-               str = self.tag_to_string(item)
+               #str = self.tag_to_string(item)
-               item.replaceWith(str)
+               #item.replaceWith(str)
-        return soup
+        #return soup
--- a/recipes/stars_and_stripes.recipe
+++ b/recipes/stars_and_stripes.recipe
@ -0,0 +1,39 @@
 ''' Stars and Stripes
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1308791026(BasicNewsRecipe):
    title          = u'Stars and Stripes'
    oldest_article = 3
    max_articles_per_feed = 100
    __author__             = 'adoucette'
    description            = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.'
    no_stylesheets         = True
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'stripes.com'
    category               = 'news, US, world'
    language               = 'en_US'
    publication_type       = 'newsportal'
    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
    conversion_options = {
        'comments'        : description
        ,'tags'            : category
        ,'language'        : language
        ,'publisher'       : publisher
        ,'linearize_tables': True
        }
    keep_only_tags    = [dict(name='div', attrs={'class':['element article']})]
    remove_tags_after = [dict(name='ul', attrs={'class':'inline-bookmarks'})]
    feeds          = [
            (u'News', u'http://feeds.stripes.com/starsandstripes/news'),
            (u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'),
            (u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'),
            (u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'),
            (u'Travel', u'http://feeds.stripes.com/starsandstripes/travel')
            ]
--- a/recipes/strategic_culture.recipe
+++ b/recipes/strategic_culture.recipe
@ -0,0 +1,92 @@
 __license__   = 'GPL v3'
 __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.strategic-culture.org
 '''
 import time
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class StrategicCulture(BasicNewsRecipe):
    title                 = 'Strategic Culture Foundation'
    __author__            = 'Darko Miletic'
    description           = 'Online Journal'
    publisher             = 'Strategic Culture Foundation'
    category              = 'news, politics'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'en'
    publication_type      = 'newsportal'
    masthead_url          = 'http://www.strategic-culture.org/img/logo.jpg'    
    extra_css             = '''
                             body{font-family: Arial, sans-serif}
                             h1{font-family: "Times New Roman",Times,serif}                             
                             img{margin-bottom: 0.8em}
                            '''
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        }
    keep_only_tags = [
                      dict(name=['h1','p'])
                      ,dict(name='div', attrs={'id':'cke_pastebin'})                      
                     ]
    remove_tags = [dict(name=['object','link','base','meta','iframe'])]
    feeds = [
               (u'News'             , u'http://www.strategic-culture.org/blocks/news.html'                )
              ,(u'Politics'         , u'http://www.strategic-culture.org/rubrics/politics.html'           )
              ,(u'Economics'        , u'http://www.strategic-culture.org/rubrics/economics.html'          )
              ,(u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html')
              ,(u'Columnists'       , u'http://www.strategic-culture.org/rubrics/columnists.html'         )
            ]
    def print_version(self, url):
        return url.replace('-culture.org/news/','-culture.org/pview/')
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            if feedurl.endswith('news.html'):
               clname = 'sini14'
            else:
               clname = 'h22'
            checker = []
            for item in soup.findAll('a', attrs={'class':clname}):
                atag          = item
                url           = atag['href']
                title         = self.tag_to_string(atag)
                description   = ''
                daypart = url.rpartition('/')[0]
                mpart,sep,day = daypart.rpartition('/')
                ypart,sep,month = mpart.rpartition('/')
                year = ypart.rpartition('/')[2]                
                date          = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y"))
                if url not in checker:
                    checker.append(url)
                    articles.append({
                                          'title'      :title
                                         ,'date'       :date
                                         ,'url'        :url
                                         ,'description':description
                                        })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
--- a/resources/compiled_coffeescript.zip
+++ b/resources/compiled_coffeescript.zip
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -506,3 +506,17 @@ change_book_details_font_size_by = 0
 # No compile: compile_gpm_templates = False
 compile_gpm_templates = True
 #: What format to default to when using the Tweak feature
 # The Tweak feature of calibre allows direct editing of a book format.
 # If multiple formats are available, calibre will offer you a choice
 # of formats, defaulting to your preferred output format if it is available.
 # Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
 # to that format rather than your output format preference.
 # Set to a value of 'remember' to use whichever format you chose last time you
 # used the Tweak feature.
 # Examples:
 #   default_tweak_format = None       (Use output format)
 #   default_tweak_format = 'EPUB'
 #   default_tweak_format = 'remember'
 default_tweak_format = None
--- a/session.vim
+++ b/session.vim
@ -20,7 +20,11 @@ vipy.session.initialize(project_name='calibre', src_dir=src_dir,
            project_dir=project_dir, base_dir=project_dir)
 def recipe_title_callback(raw):
-    return eval(raw.decode('utf-8')).replace(' ', '_')
+    try:
        return eval(raw.decode('utf-8')).replace(u' ', u'_')
    except:
        print ('Failed to decode recipe title: %r'%raw)
        raise
 vipy.session.add_content_browser('<leader>r', 'Recipe',
    vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
--- a/setup/install.py
+++ b/setup/install.py
@ -22,7 +22,8 @@ Do not modify it unless you know what you are doing.
 import sys, os
 path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r})
-sys.path.insert(0, path)
+if path not in sys.path:
    sys.path.insert(0, path)
 sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r})
 sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r})
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -41,8 +41,8 @@ binary_includes = [
                '/usr/lib/libgthread-2.0.so.0',
                '/usr/lib/libpng14.so.14',
                '/usr/lib/libexslt.so.0',
-                MAGICK_PREFIX+'/lib/libMagickWand.so.4',
+                MAGICK_PREFIX+'/lib/libMagickWand.so.5',
-                MAGICK_PREFIX+'/lib/libMagickCore.so.4',
+                MAGICK_PREFIX+'/lib/libMagickCore.so.5',
                '/usr/lib/libgcrypt.so.11',
                '/usr/lib/libgpg-error.so.0',
                '/usr/lib/libphonon.so.4',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -429,7 +429,7 @@ class Py2App(object):
    def add_imagemagick(self):
        info('\nAdding ImageMagick')
        for x in ('Wand', 'Core'):
-            self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.4.dylib'%x))
+            self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.5.dylib'%x))
        idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1]
        dest = os.path.join(self.frameworks_dir, 'ImageMagick')
        if os.path.exists(dest):
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -18,7 +18,7 @@ QT_DIR = 'Q:\\Qt\\4.8.1'
 QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
 LIBUNRAR         = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
 SW               = r'C:\cygwin\home\kovid\sw'
-IMAGEMAGICK      = os.path.join(SW, 'build', 'ImageMagick-6.6.6',
+IMAGEMAGICK      = os.path.join(SW, 'build', 'ImageMagick-6.7.6',
        'VisualMagick', 'bin')
 CRT = r'C:\Microsoft.VC90.CRT'
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -336,6 +336,8 @@ Index: src/PdfFiltersPrivate.cpp
 ImageMagick
 --------------
 Get the source from: http://www.imagemagick.org/download/windows/ImageMagick-windows.zip
 Edit VisualMagick/configure/configure.cpp to set
 int projectType = MULTITHREADEDDLL;
@ -349,7 +351,10 @@ Edit magick/magick-config.h
 Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
 Now open VisualMagick/VisualDynamicMT.sln set to Release
-Remove the CORE_xlib and UTIL_Imdisplay project CORE_Magick++
+Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects.
 F7 for build project, you will get one error due to the removal of xlib, ignore
 it.
 calibre
 ---------
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,14 +12,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-04-28 10:42+0000\n"
+"PO-Revision-Date: 2012-05-03 16:09+0000\n"
-"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
+"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-04-29 04:45+0000\n"
+"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
-"X-Generator: Launchpad (build 15149)\n"
+"X-Generator: Launchpad (build 15195)\n"
 "Language: ca\n"
 #. name for aaa
@ -9936,11 +9936,11 @@ msgstr "Ibani"
 #. name for ica
 msgid "Ede Ica"
-msgstr ""
+msgstr "Ede Ica"
 #. name for ich
 msgid "Etkywan"
-msgstr ""
+msgstr "Etkywan"
 #. name for icl
 msgid "Icelandic Sign Language"
@ -9952,7 +9952,7 @@ msgstr "Anglès crioll; Islander"
 #. name for ida
 msgid "Idakho-Isukha-Tiriki"
-msgstr ""
+msgstr "Idakho-Isukha-Tiriki"
 #. name for idb
 msgid "Indo-Portuguese"
@ -9960,15 +9960,15 @@ msgstr "Indo-portuguès"
 #. name for idc
 msgid "Idon"
-msgstr ""
+msgstr "Idon"
 #. name for idd
 msgid "Ede Idaca"
-msgstr ""
+msgstr "Ede Idaca"
 #. name for ide
 msgid "Idere"
-msgstr ""
+msgstr "Idere"
 #. name for idi
 msgid "Idi"
@ -9976,43 +9976,43 @@ msgstr ""
 #. name for ido
 msgid "Ido"
-msgstr ""
+msgstr "ido"
 #. name for idr
 msgid "Indri"
-msgstr ""
+msgstr "Indri"
 #. name for ids
 msgid "Idesa"
-msgstr ""
+msgstr "Idesa"
 #. name for idt
 msgid "Idaté"
-msgstr ""
+msgstr "Idaté"
 #. name for idu
 msgid "Idoma"
-msgstr ""
+msgstr "Idoma"
 #. name for ifa
 msgid "Ifugao; Amganad"
-msgstr ""
+msgstr "Ifugao; Amganad"
 #. name for ifb
 msgid "Ifugao; Batad"
-msgstr ""
+msgstr "Ifugao; Batad"
 #. name for ife
 msgid "Ifè"
-msgstr ""
+msgstr "Ifè"
 #. name for iff
 msgid "Ifo"
-msgstr ""
+msgstr "Ifo"
 #. name for ifk
 msgid "Ifugao; Tuwali"
-msgstr ""
+msgstr "Ifugao; Tuwali"
 #. name for ifm
 msgid "Teke-Fuumu"
@ -10020,15 +10020,15 @@ msgstr "Teke; Fuumu"
 #. name for ifu
 msgid "Ifugao; Mayoyao"
-msgstr ""
+msgstr "Ifugao; Mayoyao"
 #. name for ify
 msgid "Kallahan; Keley-I"
-msgstr ""
+msgstr "Kallahan; Keley-I"
 #. name for igb
 msgid "Ebira"
-msgstr ""
+msgstr "Ebira"
 #. name for ige
 msgid "Igede"
--- a/setup/iso_639/sr.po
+++ b/setup/iso_639/sr.po
@ -8,14 +8,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-03-25 12:19+0000\n"
+"PO-Revision-Date: 2012-05-03 14:49+0000\n"
-"Last-Translator: Radan Putnik <srastral@gmail.com>\n"
+"Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
 "Language-Team: Serbian <gnu@prevod.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-03-26 04:37+0000\n"
+"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
-"X-Generator: Launchpad (build 15008)\n"
+"X-Generator: Launchpad (build 15195)\n"
 "Language: sr\n"
 #. name for aaa
@ -6152,7 +6152,7 @@ msgstr ""
 #. name for deu
 msgid "German"
-msgstr "немачки"
+msgstr "Немачки"
 #. name for dev
 msgid "Domung"
@ -8416,7 +8416,7 @@ msgstr "ирски"
 #. name for glg
 msgid "Galician"
-msgstr ""
+msgstr "Галицијски"
 #. name for glh
 msgid "Pashayi; Northwest"
@ -8472,11 +8472,11 @@ msgstr ""
 #. name for gmh
 msgid "German; Middle High (ca. 1050-1500)"
-msgstr ""
+msgstr "Немачки; средње високи (ca. 1050-1500)"
 #. name for gml
 msgid "German; Middle Low"
-msgstr ""
+msgstr "Немачки; средње низак"
 #. name for gmm
 msgid "Gbaya-Mbodomo"
@ -8792,7 +8792,7 @@ msgstr ""
 #. name for gsg
 msgid "German Sign Language"
-msgstr ""
+msgstr "Немачки језик"
 #. name for gsl
 msgid "Gusilay"
@ -8820,7 +8820,7 @@ msgstr ""
 #. name for gsw
 msgid "German; Swiss"
-msgstr ""
+msgstr "Немачки ; Швајцарска"
 #. name for gta
 msgid "Guató"
@ -17954,7 +17954,7 @@ msgstr ""
 #. name for nds
 msgid "German; Low"
-msgstr ""
+msgstr "Немачки; низак"
 #. name for ndt
 msgid "Ndunga"
@ -18778,7 +18778,7 @@ msgstr ""
 #. name for nno
 msgid "Norwegian Nynorsk"
-msgstr "норвешки модерни"
+msgstr "Норвешки модерни"
 #. name for nnp
 msgid "Naga; Wancho"
@ -18830,7 +18830,7 @@ msgstr ""
 #. name for nob
 msgid "Norwegian Bokmål"
-msgstr ""
+msgstr "Норвешки (књижевни)"
 #. name for noc
 msgid "Nuk"
@ -18886,7 +18886,7 @@ msgstr ""
 #. name for nor
 msgid "Norwegian"
-msgstr "норвешки"
+msgstr "Норвешки"
 #. name for nos
 msgid "Nisu; Eastern"
@ -19066,7 +19066,7 @@ msgstr ""
 #. name for nsl
 msgid "Norwegian Sign Language"
-msgstr ""
+msgstr "Норвешки језик"
 #. name for nsm
 msgid "Naga; Sumi"
@ -20406,7 +20406,7 @@ msgstr ""
 #. name for pdc
 msgid "German; Pennsylvania"
-msgstr ""
+msgstr "Немачки ; Пенсилванија"
 #. name for pdi
 msgid "Pa Di"
@ -22086,7 +22086,7 @@ msgstr ""
 #. name for rmg
 msgid "Norwegian; Traveller"
-msgstr ""
+msgstr "Норвешки; путнички"
 #. name for rmh
 msgid "Murkim"
@ -22871,7 +22871,7 @@ msgstr ""
 #. name for sgg
 msgid "Swiss-German Sign Language"
-msgstr ""
+msgstr "Швајцарско-Немачки језик"
 #. name for sgh
 msgid "Shughni"
--- a/setup/resources.py
+++ b/setup/resources.py
@ -26,7 +26,7 @@ def get_opts_from_parser(parser):
 class Coffee(Command): # {{{
    description = 'Compile coffeescript files into javascript'
-    COFFEE_DIRS = {'ebooks/oeb/display': 'display'}
+    COFFEE_DIRS = ('ebooks/oeb/display',)
    def add_options(self, parser):
        parser.add_option('--watch', '-w', action='store_true', default=False,
@ -47,49 +47,69 @@ class Coffee(Command): # {{{
            except KeyboardInterrupt:
                pass
-    def show_js(self, jsfile):
+    def show_js(self, raw):
        from pygments.lexers import JavascriptLexer
        from pygments.formatters import TerminalFormatter
        from pygments import highlight
        with open(jsfile, 'rb') as f:
            raw = f.read()
        print highlight(raw, JavascriptLexer(), TerminalFormatter())
    def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
-        for toplevel, dest in self.COFFEE_DIRS.iteritems():
+        src_files = {}
-            dest = self.j(self.RESOURCES, dest)
+        for src in self.COFFEE_DIRS:
-            for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
+            for f in glob.glob(self.j(self.SRC, __appname__, src,
-                js = self.j(dest, os.path.basename(x.rpartition('.')[0]+'.js'))
+                '*.coffee')):
-                if self.newer(js, x):
+                bn = os.path.basename(f).rpartition('.')[0]
-                    print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
+                arcname = src.replace('/', '.') + '.' + bn + '.js'
-                        timestamp else '', os.path.basename(x)))
+                src_files[arcname] = (f, os.stat(f).st_mtime)
-                    try:
+
-                        cs = subprocess.check_output(self.compiler +
+        existing = {}
-                                [x]).decode('utf-8')
+        dest = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
-                    except Exception as e:
+        if os.path.exists(dest):
-                        print ('\n\tCompilation of %s failed'%os.path.basename(x))
+            with zipfile.ZipFile(dest, 'r') as zf:
-                        print (e)
+                for info in zf.infolist():
-                        if ignore_errors:
+                    mtime = time.mktime(info.date_time + (0, 0, -1))
-                            with open(js, 'wb') as f:
+                    arcname = info.filename
-                                f.write('# Compilation from coffeescript failed')
+                    if (arcname in src_files and src_files[arcname][1] <
-                        else:
+                            mtime):
-                            raise SystemExit(1)
+                        existing[arcname] = (zf.read(info), info)
-                    else:
+
-                        with open(js, 'wb') as f:
+        todo = set(src_files) - set(existing)
-                            f.write(cs.encode('utf-8'))
+        updated = {}
-                        if opts.show_js:
+        for arcname in todo:
-                            self.show_js(js)
+            name = arcname.rpartition('.')[0]
-                            print ('#'*80)
+            print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
-                            print ('#'*80)
+                        timestamp else '', name))
            src = src_files[arcname][0]
            try:
                js = subprocess.check_output(self.compiler +
                        [src]).decode('utf-8')
            except Exception as e:
                print ('\n\tCompilation of %s failed'%name)
                print (e)
                if ignore_errors:
                    js = u'# Compilation from coffeescript failed'
                else:
                    raise SystemExit(1)
            else:
                if opts.show_js:
                    self.show_js(js)
                    print ('#'*80)
                    print ('#'*80)
            zi = zipfile.ZipInfo()
            zi.filename = arcname
            zi.date_time = time.localtime()[:6]
            updated[arcname] = (js.encode('utf-8'), zi)
        if updated:
            with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
                for raw, zi in updated.itervalues():
                    zf.writestr(zi, raw)
                for raw, zi in existing.itervalues():
                    zf.writestr(zi, raw)
    def clean(self):
-        for toplevel, dest in self.COFFEE_DIRS.iteritems():
+        x = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
-            dest = self.j(self.RESOURCES, dest)
+        if os.path.exists(x):
-            for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
+            os.remove(x)
                x = x.rpartition('.')[0] + '.js'
                x = self.j(dest, os.path.basename(x))
                if os.path.exists(x):
                    os.remove(x)
 # }}}
 class Kakasi(Command): # {{{
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 50)
+numeric_version = (0, 8, 51)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -302,7 +302,9 @@ class OutputFormatPlugin(Plugin):
        :param item: The item (HTML file) being processed
        :param stylizer: A Stylizer object containing the flattened styles for
-        item. You can get the style for any element by stylizer.style(element).
+                         item. You can get the style for any element by
                         stylizer.style(element).
        '''
        pass
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -57,6 +57,7 @@ class ANDROID(USBMS):
                       0x4316 : [0x216],
                       0x42d6 : [0x216],
                       0x42d7 : [0x216],
                       0x42f7 : [0x216],
                     },
            # Freescale
            0x15a2 : {
@ -193,7 +194,7 @@ class ANDROID(USBMS):
            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
            'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
            'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
-            'GT-S5830L_CARD', 'UNIVERSE']
+            'GT-S5830L_CARD', 'UNIVERSE', 'XT875']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -201,7 +202,8 @@ class ANDROID(USBMS):
            'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
            'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
            'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
-            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']
+            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
            'UMS_COMPOSITE']
    OSX_MAIN_MEM = 'Android Device Main Memory'
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -92,6 +92,10 @@ class POCKETBOOK360(EB600):
    name = 'PocketBook 360 Device Interface'
    gui_name = 'PocketBook 360'
    VENDOR_ID   = [0x1f85, 0x525]
    PRODUCT_ID  = [0x1688, 0xa4a5]
    BCD         = [0x110]
    FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']
--- a/src/calibre/ebooks/conversion/init.py
+++ b/src/calibre/ebooks/conversion/init.py
@ -1,4 +1,25 @@
-from __future__ import with_statement
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-__license__ = 'GPL 3'
+from __future__ import (unicode_literals, division, absolute_import,
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 class ConversionUserFeedBack(Exception):
    def __init__(self, title, msg, level='info', det_msg=''):
        ''' Show a simple message to the user
        :param title: The title (very short description)
        :param msg: The message to show the user
        :param level: Must be one of 'info', 'warn' or 'error'
        :param det_msg: Optional detailed message to show the user
        '''
        import json
        Exception.__init__(self, json.dumps({'msg':msg, 'level':level,
            'det_msg':det_msg, 'title':title}))
        self.title, self.msg, self.det_msg = title, msg, det_msg
        self.level = level
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -15,6 +15,7 @@ from calibre.utils.logging import Log
 from calibre.constants import preferred_encoding
 from calibre.customize.conversion import OptionRecommendation
 from calibre import patheq
 from calibre.ebooks.conversion import ConversionUserFeedBack
 USAGE = '%prog ' + _('''\
 input_file output_file [options]
@ -304,7 +305,10 @@ def read_sr_patterns(path, log=None):
 def main(args=sys.argv):
    log = Log()
    parser, plumber = create_option_parser(args, log)
-    opts = parser.parse_args(args)[0]
+    opts, leftover_args = parser.parse_args(args)
    if len(leftover_args) > 3:
        log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
        return 1
    for x in ('read_metadata_from_opf', 'cover'):
        if getattr(opts, x, None) is not None:
            setattr(opts, x, abspath(getattr(opts, x)))
@ -317,7 +321,16 @@ def main(args=sys.argv):
                                        if n.dest]
    plumber.merge_ui_recommendations(recommendations)
-    plumber.run()
+    try:
        plumber.run()
    except ConversionUserFeedBack as e:
        ll = {'info': log.info, 'warn': log.warn,
                'error':log.error}.get(e.level, log.info)
        ll(e.title)
        if e.det_msg:
            log.debug(e.detmsg)
        ll(e.msg)
        raise SystemExit(1)
    log(_('Output saved to'), ' ', plumber.output)
--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -207,7 +207,7 @@ class EPUBInput(InputFormatPlugin):
        if rc:
            cover_toc_item = None
            for item in oeb.toc.iterdescendants():
-                if item.href == rc:
+                if item.href and item.href.partition('#')[0] == rc:
                    cover_toc_item = item
                    break
            spine = {x.href for x in oeb.spine}
--- a/src/calibre/ebooks/conversion/plugins/epub_output.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_output.py
@ -393,8 +393,14 @@ class EPUBOutput(OutputFormatPlugin):
            for tag in XPath('//h:body/descendant::h:script')(root):
                tag.getparent().remove(tag)
            formchildren = XPath('./h:input|./h:button|./h:textarea|'
                    './h:label|./h:fieldset|./h:legend')
            for tag in XPath('//h:form')(root):
-                tag.getparent().remove(tag)
+                if formchildren(tag):
                    tag.getparent().remove(tag)
                else:
                    # Not a real form
                    tag.tag = XHTML('div')
            for tag in XPath('//h:center')(root):
                tag.tag = XHTML('div')
--- a/src/calibre/ebooks/conversion/plugins/mobi_input.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py
@ -12,7 +12,7 @@ class MOBIInput(InputFormatPlugin):
    name        = 'MOBI Input'
    author      = 'Kovid Goyal'
    description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
-    file_types  = set(['mobi', 'prc', 'azw', 'azw3'])
+    file_types  = set(['mobi', 'prc', 'azw', 'azw3', 'pobi'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -343,21 +343,25 @@ OptionRecommendation(name='remove_fake_margins',
 OptionRecommendation(name='margin_top',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the top margin in pts. Default is %default. '
            'Setting this to less than zero will cause no margin to be set. '
            'Note: 72 pts equals 1 inch')),
 OptionRecommendation(name='margin_bottom',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the bottom margin in pts. Default is %default. '
            'Setting this to less than zero will cause no margin to be set. '
            'Note: 72 pts equals 1 inch')),
 OptionRecommendation(name='margin_left',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the left margin in pts. Default is %default. '
            'Setting this to less than zero will cause no margin to be set. '
            'Note: 72 pts equals 1 inch')),
 OptionRecommendation(name='margin_right',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the right margin in pts. Default is %default. '
            'Setting this to less than zero will cause no margin to be set. '
            'Note: 72 pts equals 1 inch')),
 OptionRecommendation(name='change_justification',
@ -885,7 +889,10 @@ OptionRecommendation(name='search_replace',
            self.log.debug('Resolved conversion options')
            try:
                self.log.debug('calibre version:', __version__)
-                self.log.debug(pprint.pformat(self.opts.__dict__))
+                odict = dict(self.opts.__dict__)
                for x in ('username', 'password'):
                    odict.pop(x, None)
                self.log.debug(pprint.pformat(odict))
            except:
                self.log.exception('Failed to get resolved conversion options')
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import re, error as re_error
+import re
 from math import ceil
 from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.utils.logging import default_log
@ -184,7 +184,7 @@ class HeuristicProcessor(object):
                except OverflowError:
                    # match.group(0) was too large to be compiled into a regex
                    continue
-                except re_error:
+                except re.error:
                    # the match was not a valid regular expression
                    continue
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -113,6 +113,11 @@ class HTMLFile(object):
                raise IOError(msg)
            raise IgnoreFile(msg, err.errno)
        if not src:
            if level == 0:
                raise ValueError('The file %s is empty'%self.path)
            self.is_binary = True
        if not self.is_binary:
            if not encoding:
                encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1]
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -18,7 +18,7 @@ from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
        fixauthors)
 from calibre.ebooks.metadata.book.base import Metadata
-from calibre.utils.date import parse_date
+from calibre.utils.date import parse_only_date
 from calibre.utils.localization import canonicalize_lang
 class Worker(Thread): # Get details {{{
@ -471,7 +471,7 @@ class Worker(Thread): # Get details {{{
                ans = x.tail
                date = ans.rpartition('(')[-1].replace(')', '').strip()
                date = self.delocalize_datestr(date)
-                return parse_date(date, assume_utc=True)
+                return parse_only_date(date, assume_utc=True)
    def parse_language(self, pd):
        for x in reversed(pd.xpath(self.language_xpath)):
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@ -306,10 +306,15 @@ class MOBIHeader(object): # {{{
        self.extra_data_flags = 0
        if self.has_extra_data_flags:
            self.unknown4 = self.raw[184:192]
-            self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
+            if self.file_version < 8:
-                    self.raw, 192)
+                self.first_text_record, self.last_text_record = \
-            if self.fdst_count <= 1:
+                    struct.unpack_from(b'>HH', self.raw, 192)
-                self.fdst_idx = NULL_INDEX
+                self.fdst_count = struct.unpack_from(b'>L', self.raw, 196)
            else:
                self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
                        self.raw, 192)
                if self.fdst_count <= 1:
                    self.fdst_idx = NULL_INDEX
            (self.fcis_number, self.fcis_count, self.flis_number,
                    self.flis_count) = struct.unpack(b'>IIII',
                            self.raw[200:216])
@ -409,7 +414,11 @@ class MOBIHeader(object): # {{{
            a('DRM Flags: %r'%self.drm_flags)
        if self.has_extra_data_flags:
            a('Unknown4: %r'%self.unknown4)
-            r('FDST Index', 'fdst_idx')
+            if hasattr(self, 'first_text_record'):
                a('First content record: %d'%self.first_text_record)
                a('Last content record: %d'%self.last_text_record)
            else:
                r('FDST Index', 'fdst_idx')
            a('FDST Count: %d'% self.fdst_count)
            r('FCIS number', 'fcis_number')
            a('FCIS count: %d'% self.fcis_count)
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@ -111,7 +111,11 @@ def update_flow_links(mobi8_reader, resource_map, log):
            continue
        if not isinstance(flow, unicode):
-            flow = flow.decode(mr.header.codec)
+            try:
                flow = flow.decode(mr.header.codec)
            except UnicodeDecodeError:
                log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
                flow = flow.decode(mr.header.codec, 'replace')
        # links to raster image files from image tags
        # image_pattern
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -207,9 +207,9 @@ class Mobi8Reader(object):
                    fname = 'svgimg' + nstr + '.svg'
            else:
                # search for CDATA and if exists inline it
-                if flowpart.find('[CDATA[') >= 0:
+                if flowpart.find(b'[CDATA[') >= 0:
                    typ = 'css'
-                    flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n'
+                    flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
                    format = 'inline'
                    dir = None
                    fname = None
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -382,6 +382,7 @@ class MobiWriter(object):
            first_image_record  = len(self.records)
            self.resources.serialize(self.records, used_images)
        resource_record_count = len(self.records) - old
        last_content_record = len(self.records) - 1
        # FCIS/FLIS (Seems to serve no purpose)
        flis_number = len(self.records)
@ -406,7 +407,7 @@ class MobiWriter(object):
        # header
        header_fields['first_resource_record'] = first_image_record
        header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
-        header_fields['fdst_record'] = NULL_INDEX
+        header_fields['fdst_record'] = pack(b'>HH', 1, last_content_record)
        header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
        header_fields['flis_record'] = flis_number
        header_fields['fcis_record'] = fcis_number
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@ -314,9 +314,9 @@ class KF8Writer(object):
            return
        # Flatten the ToC into a depth first list
-        fl = toc.iter() if is_periodical else toc.iterdescendants()
+        fl = toc.iterdescendants()
        for i, item in enumerate(fl):
-            entry = {'id': id(item), 'index': i, 'href':item.href,
+            entry = {'id': id(item), 'index': i, 'href':item.href or '',
                    'label':(item.title or _('Unknown')),
                    'children':[]}
            entry['depth'] = getattr(item, 'ncx_hlvl', 0)
--- a/src/calibre/ebooks/mobi/writer8/mobi.py
+++ b/src/calibre/ebooks/mobi/writer8/mobi.py
@ -138,6 +138,8 @@ class MOBIHeader(Header): # {{{
    unknown2 = zeroes(8)
    # 192: FDST
    # In MOBI 6 the fdst record is instead two two byte fields storing the
    # index of the first and last content records
    fdst_record = DYN
    fdst_count = DYN
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -966,7 +966,7 @@ class Manifest(object):
                data = data.cssText
                if isinstance(data, unicode):
                    data = data.encode('utf-8')
-                return data
+                return data + b'\n'
            return str(data)
        def __unicode__(self):
--- a/src/calibre/ebooks/oeb/display/cfi.coffee
+++ b/src/calibre/ebooks/oeb/display/cfi.coffee
@ -389,8 +389,17 @@ class CanonicalFragmentIdentifier
        # Drill down into iframes, etc.
        while true
            target = cdoc.elementFromPoint x, y
-            if not target or target.localName == 'html'
+            if not target or target.localName in ['html', 'body']
-                log("No element at (#{ x }, #{ y })")
+                # We ignore both html and body even though body could
                # have text nodes under it as performance is very poor if body
                # has large margins/padding (for e.g. in fullscreen mode)
                # A possible solution for this is to wrap all text node
                # children of body in <span> but that is seriously ugly and
                # might have side effects. Lets do this only if there are lots of
                # books in the wild that actually have text children of body,
                # and even in this case it might be better to change the input
                # plugin to prevent this from happening.
                # log("No element at (#{ x }, #{ y })")
                return null
            name = target.localName
--- a/src/calibre/ebooks/oeb/display/indexing.coffee
+++ b/src/calibre/ebooks/oeb/display/indexing.coffee
@ -0,0 +1,76 @@
 #!/usr/bin/env coffee
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 ###
 Copyright 2012, Kovid Goyal <kovid@kovidgoyal.net>
 Released under the GPLv3 License
 ###
 body_height = () ->
    db = document.body
    dde = document.documentElement
    if db? and dde?
        return Math.max(db.scrollHeight, dde.scrollHeight, db.offsetHeight,
            dde.offsetHeight, db.clientHeight, dde.clientHeight)
    return 0
 abstop = (elem) ->
    ans = elem.offsetTop
    while elem.offsetParent
        elem = elem.offsetParent
        ans += elem.offsetTop
    return ans
 class BookIndexing
    ###
    This class is a namespace to expose indexing functions via the
    window.book_indexing object. The most important functions are:
    anchor_positions(): Get the absolute (document co-ordinate system) position
    for elements with the specified id/name attributes.
    ###
    constructor: () ->
        this.cache = {}
        this.body_height_at_last_check = null
    cache_valid: (anchors) ->
        for a in anchors
            if not Object.prototype.hasOwnProperty.call(this.cache, a)
                return false
        for p of this.cache
            if Object.prototype.hasOwnProperty.call(this.cache, p) and p not in anchors
                return false
        return true
    anchor_positions: (anchors, use_cache=false) ->
        if use_cache and body_height() == this.body_height_at_last_check and this.cache_valid(anchors)
            return this.cache
        ans = {}
        for anchor in anchors
            elem = document.getElementById(anchor)
            if elem == null
                # Look for an <a name="anchor"> element
                try
                    result = document.evaluate(
                        ".//*[local-name() = 'a' and @name='#{ anchor }']",
                        document.body, null,
                        XPathResult.FIRST_ORDERED_NODE_TYPE, null)
                    elem = result.singleNodeValue
                catch error
                    # The anchor had a ' or other invalid char
                    elem = null
            if elem == null
                pos = body_height() + 10000
            else
                pos = abstop(elem)
            ans[anchor] = pos
        this.cache = ans
        this.body_height_at_last_check = body_height()
        return ans
 if window?
    window.book_indexing = new BookIndexing()
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -1,383 +0,0 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008 Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Iterate over the HTML files in an ebook. Useful for writing viewers.
 '''
 import re, os, math
 from cStringIO import StringIO
 from PyQt4.Qt import QFontDatabase
 from calibre.customize.ui import available_input_formats
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.zipfile import safe_replace
 from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import Log
 from calibre import (guess_type, prints, prepare_string_for_xml,
        xml_replace_entities)
 from calibre.ebooks.oeb.transforms.cover import CoverManager
 from calibre.constants import filesystem_encoding
 TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
        '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
        ).replace('__width__', '600').replace('__height__', '800')
 BM_FIELD_SEP = u'*|!|?|*'
 BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
 def character_count(html):
    '''
    Return the number of "significant" text characters in a HTML string.
    '''
    count = 0
    strip_space = re.compile(r'\s+')
    for match in re.finditer(r'>[^<]+<', html):
        count += len(strip_space.sub(' ', match.group()))-2
    return count
 class UnsupportedFormatError(Exception):
    def __init__(self, fmt):
        Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
 class SpineItem(unicode):
    def __new__(cls, path, mime_type=None):
        ppath = path.partition('#')[0]
        if not os.path.exists(path) and os.path.exists(ppath):
            path = ppath
        obj = super(SpineItem, cls).__new__(cls, path)
        raw = open(path, 'rb').read()
        raw, obj.encoding = xml_to_unicode(raw)
        obj.character_count = character_count(raw)
        obj.start_page = -1
        obj.pages      = -1
        obj.max_page   = -1
        if mime_type is None:
            mime_type = guess_type(obj)[0]
        obj.mime_type = mime_type
        return obj
 class FakeOpts(object):
    verbose = 0
    breadth_first = False
    max_levels = 5
    input_encoding = None
 def is_supported(path):
    ext = os.path.splitext(path)[1].replace('.', '').lower()
    ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
    return ext in available_input_formats()
 def write_oebbook(oeb, path):
    from calibre.ebooks.oeb.writer import OEBWriter
    from calibre import walk
    w = OEBWriter()
    w(oeb, path)
    for f in walk(path):
        if f.endswith('.opf'):
            return f
 class EbookIterator(object):
    CHARACTERS_PER_PAGE = 1000
    def __init__(self, pathtoebook, log=None):
        self.log = log
        if log is None:
            self.log = Log()
        pathtoebook = pathtoebook.strip()
        self.pathtoebook = os.path.abspath(pathtoebook)
        self.config = DynamicConfig(name='iterator')
        ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
        ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
        self.ebook_ext = ext.replace('original_', '')
    def search(self, text, index, backwards=False):
        text = prepare_string_for_xml(text.lower())
        pmap = [(i, path) for i, path in enumerate(self.spine)]
        if backwards:
            pmap.reverse()
        for i, path in pmap:
            if (backwards and i < index) or (not backwards and i > index):
                with open(path, 'rb') as f:
                    raw = f.read().decode(path.encoding)
                try:
                    raw = xml_replace_entities(raw)
                except:
                    pass
                if text in raw.lower():
                    return i
    def find_missing_css_files(self):
        for x in os.walk(os.path.dirname(self.pathtoopf)):
            for f in x[-1]:
                if f.endswith('.css'):
                    yield os.path.join(x[0], f)
    def find_declared_css_files(self):
        for item in self.opf.manifest:
            if item.mime_type and 'css' in item.mime_type.lower():
                yield item.path
    def find_embedded_fonts(self):
        '''
        This will become unnecessary once Qt WebKit supports the @font-face rule.
        '''
        css_files = set(self.find_declared_css_files())
        if not css_files:
            css_files = set(self.find_missing_css_files())
        bad_map = {}
        font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
        for csspath in css_files:
            try:
                css = open(csspath, 'rb').read().decode('utf-8', 'replace')
            except:
                continue
            for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
                block  = match.group(1)
                family = font_family_pat.search(block)
                url    = re.compile(r'url\s*\([\'"]*(.+?)[\'"]*\)', re.DOTALL).search(block)
                if url:
                    path = url.group(1).split('/')
                    path = os.path.join(os.path.dirname(csspath), *path)
                    if not os.access(path, os.R_OK):
                        continue
                    id = QFontDatabase.addApplicationFont(path)
                    if id != -1:
                        families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
                        if family:
                            family = family.group(1)
                            specified_families = [x.strip().replace('"',
                                '').replace("'", '') for x in family.split(',')]
                            aliasing_ok = False
                            for f in specified_families:
                                bad_map[f] = families[0]
                                if not aliasing_ok and f in families:
                                    aliasing_ok = True
                            if not aliasing_ok:
                                prints('WARNING: Family aliasing not fully supported.')
                                prints('\tDeclared family: %r not in actual families: %r'
                                        % (family, families))
                            else:
                                prints('Loaded embedded font:', repr(family))
        if bad_map:
            def prepend_embedded_font(match):
                for bad, good in bad_map.items():
                    if bad in match.group(1):
                        prints('Substituting font family: %s -> %s'%(bad, good))
                        return match.group().replace(bad, '"%s"'%good)
            from calibre.ebooks.chardet import force_encoding
            for csspath in css_files:
                with open(csspath, 'r+b') as f:
                    css = f.read()
                    enc = force_encoding(css, False)
                    css = css.decode(enc, 'replace')
                    ncss = font_family_pat.sub(prepend_embedded_font, css)
                    if ncss != css:
                        f.seek(0)
                        f.truncate()
                        f.write(ncss.encode(enc))
    def __enter__(self, processed=False, only_input_plugin=False):
        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
        if not isinstance(self.base, unicode):
            self.base = self.base.decode(filesystem_encoding)
        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
        plumber = Plumber(self.pathtoebook, self.base, self.log)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True
        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin:
            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
                plumber.opts, plumber.input_fmt, self.log,
                {}, self.base)
        if not only_input_plugin:
            if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
                    not hasattr(self.pathtoopf, 'manifest'):
                if hasattr(self.pathtoopf, 'manifest'):
                    self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
                self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
                        plumber.opts)
        if hasattr(self.pathtoopf, 'manifest'):
            self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
        if getattr(plumber.input_plugin, 'is_kf8', False):
            self.book_format = 'KF8'
        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(SpineItem(spath, mime_type=mt))
            except:
                self.log.warn('Missing spine item:', repr(spath))
        cover = self.opf.cover
        if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf', 'fb2') and cover:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
            open(cfile, 'wb').write(chtml)
            self.spine[0:0] = [SpineItem(cfile,
                mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)
        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(SpineItem(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()
        sizes = [i.character_count for i in self.spine]
        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1
        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        self.read_bookmarks()
        return self
    def parse_bookmarks(self, raw):
        for line in raw.splitlines():
            bm = None
            if line.count('^') > 0:
                tokens = line.rpartition('^')
                title, ref = tokens[0], tokens[2]
                try:
                    spine, _, pos = ref.partition('#')
                    spine = int(spine.strip())
                except:
                    continue
                bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
            elif BM_FIELD_SEP in line:
                try:
                    title, spine, pos = line.strip().split(BM_FIELD_SEP)
                    spine = int(spine)
                except:
                    continue
                # Unescape from serialization
                pos = pos.replace(BM_LEGACY_ESC, u'^')
                # Check for pos being a scroll fraction
                try:
                    pos = float(pos)
                except:
                    pass
                bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
            if bm:
                self.bookmarks.append(bm)
    def serialize_bookmarks(self, bookmarks):
        dat = []
        for bm in bookmarks:
            if bm['type'] == 'legacy':
                rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
            else:
                pos = bm['pos']
                if isinstance(pos, (int, float)):
                    pos = unicode(pos)
                else:
                    pos = pos.replace(u'^', BM_LEGACY_ESC)
                rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
            dat.append(rec)
        return (u'\n'.join(dat) +u'\n')
    def read_bookmarks(self):
        self.bookmarks = []
        bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
        raw = ''
        if os.path.exists(bmfile):
            with open(bmfile, 'rb') as f:
                raw = f.read()
        else:
            saved = self.config['bookmarks_'+self.pathtoebook]
            if saved:
                raw = saved
        if not isinstance(raw, unicode):
            raw = raw.decode('utf-8')
        self.parse_bookmarks(raw)
    def save_bookmarks(self, bookmarks=None):
        if bookmarks is None:
            bookmarks = self.bookmarks
        dat = self.serialize_bookmarks(bookmarks)
        if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
            os.access(self.pathtoebook, os.R_OK):
            try:
                zf = open(self.pathtoebook, 'r+b')
            except IOError:
                return
            safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
                    StringIO(dat.encode('utf-8')),
                    add_missing=True)
        else:
            self.config['bookmarks_'+self.pathtoebook] = dat
    def add_bookmark(self, bm):
        self.bookmarks = [x for x in self.bookmarks if x['title'] !=
                bm['title']]
        self.bookmarks.append(bm)
        self.save_bookmarks()
    def set_bookmarks(self, bookmarks):
        self.bookmarks = bookmarks
    def __exit__(self, *args):
        self._tdir.__exit__(*args)
        for x in self.delete_on_exit:
            if os.path.exists(x):
                os.remove(x)
 def get_preprocess_html(path_to_ebook, output):
    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
    iterator = EbookIterator(path_to_ebook)
    iterator.__enter__(only_input_plugin=True)
    preprocessor = HTMLPreProcessor(None, False)
    with open(output, 'wb') as out:
        for path in iterator.spine:
            with open(path, 'rb') as f:
                html = f.read().decode('utf-8', 'replace')
            html = preprocessor(html, get_preprocess_html=True)
            out.write(html.encode('utf-8'))
            out.write(b'\n\n' + b'-'*80 + b'\n\n')
--- a/src/calibre/ebooks/oeb/iterator/init.py
+++ b/src/calibre/ebooks/oeb/iterator/init.py
@ -0,0 +1,42 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, re
 from calibre.customize.ui import available_input_formats
 def is_supported(path):
    ext = os.path.splitext(path)[1].replace('.', '').lower()
    ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
    return ext in available_input_formats()
 class UnsupportedFormatError(Exception):
    def __init__(self, fmt):
        Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
 def EbookIterator(*args, **kwargs):
    'For backwards compatibility'
    from calibre.ebooks.oeb.iterator.book import EbookIterator
    return EbookIterator(*args, **kwargs)
 def get_preprocess_html(path_to_ebook, output):
    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
    iterator = EbookIterator(path_to_ebook)
    iterator.__enter__(only_input_plugin=True, run_char_count=False,
            read_anchor_map=False)
    preprocessor = HTMLPreProcessor(None, False)
    with open(output, 'wb') as out:
        for path in iterator.spine:
            with open(path, 'rb') as f:
                html = f.read().decode('utf-8', 'replace')
            html = preprocessor(html, get_preprocess_html=True)
            out.write(html.encode('utf-8'))
            out.write(b'\n\n' + b'-'*80 + b'\n\n')
--- a/src/calibre/ebooks/oeb/iterator/book.py
+++ b/src/calibre/ebooks/oeb/iterator/book.py
@ -0,0 +1,187 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 '''
 Iterate over the HTML files in an ebook. Useful for writing viewers.
 '''
 import re, os, math
 from functools import partial
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import default_log
 from calibre import (guess_type, prepare_string_for_xml,
        xml_replace_entities)
 from calibre.ebooks.oeb.transforms.cover import CoverManager
 from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
 from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
 TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
        '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
        ).replace('__width__', '600').replace('__height__', '800')
 class FakeOpts(object):
    verbose = 0
    breadth_first = False
    max_levels = 5
    input_encoding = None
 def write_oebbook(oeb, path):
    from calibre.ebooks.oeb.writer import OEBWriter
    from calibre import walk
    w = OEBWriter()
    w(oeb, path)
    for f in walk(path):
        if f.endswith('.opf'):
            return f
 class EbookIterator(BookmarksMixin):
    CHARACTERS_PER_PAGE = 1000
    def __init__(self, pathtoebook, log=None):
        self.log = log or default_log
        pathtoebook = pathtoebook.strip()
        self.pathtoebook = os.path.abspath(pathtoebook)
        self.config = DynamicConfig(name='iterator')
        ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
        ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
        self.ebook_ext = ext.replace('original_', '')
    def search(self, text, index, backwards=False):
        text = prepare_string_for_xml(text.lower())
        pmap = [(i, path) for i, path in enumerate(self.spine)]
        if backwards:
            pmap.reverse()
        for i, path in pmap:
            if (backwards and i < index) or (not backwards and i > index):
                with open(path, 'rb') as f:
                    raw = f.read().decode(path.encoding)
                try:
                    raw = xml_replace_entities(raw)
                except:
                    pass
                if text in raw.lower():
                    return i
    def __enter__(self, processed=False, only_input_plugin=False,
            run_char_count=True, read_anchor_map=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''
        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
        plumber = Plumber(self.pathtoebook, self.base, self.log)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True
        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            self.pathtoopf = plumber.input_plugin(inf,
                plumber.opts, plumber.input_fmt, self.log,
                {}, self.base)
            if not only_input_plugin:
                # Run the HTML preprocess/parsing from the conversion pipeline as
                # well
                if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
                        and not hasattr(self.pathtoopf, 'manifest')):
                    if hasattr(self.pathtoopf, 'manifest'):
                        self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
                    self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
                            plumber.opts)
            if hasattr(self.pathtoopf, 'manifest'):
                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
        if getattr(plumber.input_plugin, 'is_kf8', False):
            self.book_format = 'KF8'
        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
                run_char_count=run_char_count)
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
            except:
                self.log.warn('Missing spine item:', repr(spath))
        cover = self.opf.cover
        if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
                'azw', 'azw3'}:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile,
                mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)
        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()
        sizes = [i.character_count for i in self.spine]
        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1
        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)
        self.read_bookmarks()
        return self
    def __exit__(self, *args):
        self._tdir.__exit__(*args)
        for x in self.delete_on_exit:
            try:
                os.remove(x)
            except:
                pass
--- a/src/calibre/ebooks/oeb/iterator/bookmarks.py
+++ b/src/calibre/ebooks/oeb/iterator/bookmarks.py
@ -0,0 +1,105 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os
 from io import BytesIO
 from calibre.utils.zipfile import safe_replace
 BM_FIELD_SEP = u'*|!|?|*'
 BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
 class BookmarksMixin(object):
    def parse_bookmarks(self, raw):
        for line in raw.splitlines():
            bm = None
            if line.count('^') > 0:
                tokens = line.rpartition('^')
                title, ref = tokens[0], tokens[2]
                try:
                    spine, _, pos = ref.partition('#')
                    spine = int(spine.strip())
                except:
                    continue
                bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
            elif BM_FIELD_SEP in line:
                try:
                    title, spine, pos = line.strip().split(BM_FIELD_SEP)
                    spine = int(spine)
                except:
                    continue
                # Unescape from serialization
                pos = pos.replace(BM_LEGACY_ESC, u'^')
                # Check for pos being a scroll fraction
                try:
                    pos = float(pos)
                except:
                    pass
                bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
            if bm:
                self.bookmarks.append(bm)
    def serialize_bookmarks(self, bookmarks):
        dat = []
        for bm in bookmarks:
            if bm['type'] == 'legacy':
                rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
            else:
                pos = bm['pos']
                if isinstance(pos, (int, float)):
                    pos = unicode(pos)
                else:
                    pos = pos.replace(u'^', BM_LEGACY_ESC)
                rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
            dat.append(rec)
        return (u'\n'.join(dat) +u'\n')
    def read_bookmarks(self):
        self.bookmarks = []
        bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
        raw = ''
        if os.path.exists(bmfile):
            with open(bmfile, 'rb') as f:
                raw = f.read()
        else:
            saved = self.config['bookmarks_'+self.pathtoebook]
            if saved:
                raw = saved
        if not isinstance(raw, unicode):
            raw = raw.decode('utf-8')
        self.parse_bookmarks(raw)
    def save_bookmarks(self, bookmarks=None):
        if bookmarks is None:
            bookmarks = self.bookmarks
        dat = self.serialize_bookmarks(bookmarks)
        if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
            os.access(self.pathtoebook, os.R_OK):
            try:
                zf = open(self.pathtoebook, 'r+b')
            except IOError:
                return
            safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
                    BytesIO(dat.encode('utf-8')),
                    add_missing=True)
        else:
            self.config['bookmarks_'+self.pathtoebook] = dat
    def add_bookmark(self, bm):
        self.bookmarks = [x for x in self.bookmarks if x['title'] !=
                bm['title']]
        self.bookmarks.append(bm)
        self.save_bookmarks()
    def set_bookmarks(self, bookmarks):
        self.bookmarks = bookmarks
--- a/src/calibre/ebooks/oeb/iterator/spine.py
+++ b/src/calibre/ebooks/oeb/iterator/spine.py
@ -0,0 +1,120 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 from future_builtins import map
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re, os
 from functools import partial
 from operator import attrgetter
 from collections import namedtuple
 from calibre import guess_type
 from calibre.ebooks.chardet import xml_to_unicode
 def character_count(html):
    ''' Return the number of "significant" text characters in a HTML string. '''
    count = 0
    strip_space = re.compile(r'\s+')
    for match in re.finditer(r'>[^<]+<', html):
        count += len(strip_space.sub(' ', match.group()))-2
    return count
 def anchor_map(html):
    ''' Return map of all anchor names to their offsets in the html '''
    ans = {}
    for match in re.finditer(
        r'''(?:id|name)\s*=\s*['"]([^'"]+)['"]''', html):
        anchor = match.group(0)
        ans[anchor] = ans.get(anchor, match.start())
    return ans
 class SpineItem(unicode):
    def __new__(cls, path, mime_type=None, read_anchor_map=True,
            run_char_count=True):
        ppath = path.partition('#')[0]
        if not os.path.exists(path) and os.path.exists(ppath):
            path = ppath
        obj = super(SpineItem, cls).__new__(cls, path)
        with open(path, 'rb') as f:
            raw = f.read()
        raw, obj.encoding = xml_to_unicode(raw)
        obj.character_count = character_count(raw) if run_char_count else 10000
        obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
        obj.start_page = -1
        obj.pages      = -1
        obj.max_page   = -1
        obj.index_entries = []
        if mime_type is None:
            mime_type = guess_type(obj)[0]
        obj.mime_type = mime_type
        return obj
 class IndexEntry(object):
    def __init__(self, spine, toc_entry, num):
        self.num = num
        self.text = toc_entry.text or _('Unknown')
        self.key = toc_entry.abspath
        self.anchor = self.start_anchor = toc_entry.fragment or None
        try:
            self.spine_pos = spine.index(self.key)
        except ValueError:
            self.spine_pos = -1
        self.anchor_pos = 0
        if self.spine_pos > -1:
            self.anchor_pos = spine[self.spine_pos].anchor_map.get(self.anchor,
                    0)
        self.depth = 0
        p = toc_entry.parent
        while p is not None:
            self.depth += 1
            p = p.parent
        self.sort_key = (self.spine_pos, self.anchor_pos)
        self.spine_count = len(spine)
    def find_end(self, all_entries):
        potential_enders = [i for i in all_entries if
                i.depth <= self.depth and
                (
                    (i.spine_pos == self.spine_pos and i.anchor_pos >
                                                            self.anchor_pos)
                    or
                    i.spine_pos > self.spine_pos
                )]
        if potential_enders:
            # potential_enders is sorted by (spine_pos, anchor_pos)
            end = potential_enders[0]
            self.end_spine_pos = end.spine_pos
            self.end_anchor = end.anchor
        else:
            self.end_spine_pos = self.spine_count - 1
            self.end_anchor = None
 def create_indexing_data(spine, toc):
    if not toc: return
    f = partial(IndexEntry, spine)
    index_entries = list(map(f,
        (t for t in toc.flat() if t is not toc),
        (i-1 for i, t in enumerate(toc.flat()) if t is not toc)
        ))
    index_entries.sort(key=attrgetter('sort_key'))
    [ i.find_end(index_entries) for i in index_entries ]
    ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor')
    for spine_pos, spine_item in enumerate(spine):
        for i in index_entries:
            if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos:
                continue # Does not touch this file
            start = i.anchor if i.spine_pos == spine_pos else None
            end = i.end_anchor if i.spine_pos == spine_pos else None
            spine_item.index_entries.append(ie(i, start, end))
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -361,9 +361,11 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
    # Remove any encoding-specifying <meta/> elements
    for meta in META_XP(data):
        meta.getparent().remove(meta)
-    etree.SubElement(head, XHTML('meta'),
+    meta = etree.SubElement(head, XHTML('meta'),
-        attrib={'http-equiv': 'Content-Type',
+        attrib={'http-equiv': 'Content-Type'})
-                'content': '%s; charset=utf-8' % XHTML_NS})
+    meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
                                                    # attribute
    # Ensure has a <body/>
    if not xpath(data, '/h:html/h:body'):
        body = xpath(data, '//h:body')
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -347,7 +347,11 @@ class Stylizer(object):
            style = self.flatten_style(rule.style)
            self.page_rule.update(style)
        elif isinstance(rule, CSSFontFaceRule):
-            self.font_face_rules.append(rule)
+            if rule.style.length > 1:
                # Ignore the meaningless font face rules generated by the
                # benighted MS Word that contain only a font-family declaration
                # and nothing else
                self.font_face_rules.append(rule)
        return results
    def flatten_style(self, cssstyle):
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -157,10 +157,12 @@ class CSSFlattener(object):
            bs = body.get('style', '').split(';')
            bs.append('margin-top: 0pt')
            bs.append('margin-bottom: 0pt')
-            bs.append('margin-left : %fpt'%\
+            if float(self.context.margin_left) >= 0:
-                    float(self.context.margin_left))
+                bs.append('margin-left : %gpt'%\
-            bs.append('margin-right : %fpt'%\
+                        float(self.context.margin_left))
-                    float(self.context.margin_right))
+            if float(self.context.margin_right) >= 0:
                bs.append('margin-right : %gpt'%\
                        float(self.context.margin_right))
            bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
            if self.page_break_on_body:
                bs.extend(['page-break-before: always'])
@ -393,10 +395,11 @@ class CSSFlattener(object):
        l = etree.SubElement(head, XHTML('link'),
            rel='stylesheet', type=CSS_MIME, href=href)
        l.tail='\n'
-        href = item.relhref(global_href)
+        if global_href:
-        l = etree.SubElement(head, XHTML('link'),
+            href = item.relhref(global_href)
-            rel='stylesheet', type=CSS_MIME, href=href)
+            l = etree.SubElement(head, XHTML('link'),
-        l.tail = '\n'
+                rel='stylesheet', type=CSS_MIME, href=href)
            l.tail = '\n'
    def replace_css(self, css):
        manifest = self.oeb.manifest
@ -413,14 +416,16 @@ class CSSFlattener(object):
        global_css = defaultdict(list)
        for item in self.oeb.spine:
            stylizer = self.stylizers[item]
-            stylizer.page_rule['margin-top'] = '%gpt'%\
+            if float(self.context.margin_top) >= 0:
-                    float(self.context.margin_top)
+                stylizer.page_rule['margin-top'] = '%gpt'%\
-            stylizer.page_rule['margin-bottom'] = '%gpt'%\
+                        float(self.context.margin_top)
-                    float(self.context.margin_bottom)
+            if float(self.context.margin_bottom) >= 0:
                stylizer.page_rule['margin-bottom'] = '%gpt'%\
                        float(self.context.margin_bottom)
            items = stylizer.page_rule.items()
            items.sort()
            css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
-            css = '@page {\n%s\n}\n'%css
+            css = ('@page {\n%s\n}\n'%css) if items else ''
            rules = [r.cssText for r in stylizer.font_face_rules]
            raw = '\n\n'.join(rules)
            css += '\n\n' + raw
@ -429,9 +434,11 @@ class CSSFlattener(object):
        gc_map = {}
        manifest = self.oeb.manifest
        for css in global_css:
-            id_, href = manifest.generate('page_css', 'page_styles.css')
+            href = None
-            manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
+            if css.strip():
-                validate=False))
+                id_, href = manifest.generate('page_css', 'page_styles.css')
                manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
                    validate=False))
            gc_map[css] = href
        ans = {}
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os
+import os, re
 from calibre.utils.date import isoformat, now
 from calibre import guess_type
@ -141,7 +141,7 @@ class MergeMetadata(object):
                item = self.oeb.manifest.hrefs[old_cover.href]
                if not cdata:
                    return item.id
-                self.oeb.manifest.remove(item)
+                self.remove_old_cover(item)
            elif not cdata:
                id = self.oeb.manifest.generate(id='cover')
                self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
@ -152,3 +152,41 @@ class MergeMetadata(object):
            self.oeb.guide.add('cover', 'Cover', href)
        return id
    def remove_old_cover(self, cover_item):
        from calibre.ebooks.oeb.base import XPath
        from lxml import etree
        self.oeb.manifest.remove(cover_item)
        # Remove any references to the cover in the HTML
        affected_items = set()
        for item in self.oeb.spine:
            try:
                images = XPath('//h:img[@src]')(item.data)
            except:
                images = []
            removed = False
            for img in images:
                href = item.abshref(img.get('src'))
                if href == cover_item.href:
                    img.getparent().remove(img)
                    removed = True
            if removed:
                affected_items.add(item)
        # Check if the resulting HTML has no content, if so remove it
        for item in affected_items:
            body = XPath('//h:body')(item.data)
            if body:
                text = etree.tostring(body[0], method='text', encoding=unicode)
            else:
                text = ''
            text = re.sub(r'\s+', '', text)
            if not text and not XPath('//h:img|//svg:svg')(item.data):
                self.log('Removing %s as it is a wrapper around'
                        ' the cover image'%item.href)
                self.oeb.spine.remove(item)
                self.oeb.manifest.remove(item)
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -372,8 +372,8 @@ class ParseRtf:
        old_rtf = old_rtf_obj.check_if_old_rtf()
        if old_rtf:
            if self.__run_level > 5:
-                msg = 'Older RTF\n'
+                msg = 'Older RTF\n' \
-                msg += 'self.__run_level is "%s"\n' % self.__run_level
+                'self.__run_level is "%s"\n' % self.__run_level
                raise RtfInvalidCodeException, msg
            if self.__run_level > 1:
                sys.stderr.write('File could be older RTF...\n')
@ -381,7 +381,7 @@ class ParseRtf:
                if self.__run_level > 1:
                    sys.stderr.write(
                        'File also has newer RTF.\n'
-                        'Will do the best to convert.\n'
+                        'Will do the best to convert...\n'
                    )
            add_brackets_obj = add_brackets.AddBrackets(
                    in_file = self.__temp_file,
--- a/src/calibre/ebooks/rtf2xml/add_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/add_brackets.py
@ -20,6 +20,9 @@ class AddBrackets:
    """
    Add brackets for old RTF.
    Logic:
    When control words without their own brackets are encountered
    and in the list of allowed words, this will add brackets
    to facilitate the treatment of the file
    """
    def __init__(self, in_file,
            bug_handler,
@ -41,53 +44,56 @@ class AddBrackets:
        self.__copy = copy
        self.__write_to = better_mktemp()
        self.__run_level = run_level
    def __initiate_values(self):
        """
        """
        self.__state_dict = {
            'before_body'           : self.__before_body_func,
            'in_body'               : self.__in_body_func,
            'after_control_word'    : self.__after_control_word_func,
            'in_ignore'             : self.__ignore_func,
        }
        self.__accept = [
            'cw<ci<bold______' ,
            'cw<ci<annotation' ,
            'cw<ci<blue______' ,
            # 'cw<ci<bold______' ,
            'cw<ci<caps______' ,
            'cw<ci<char-style' ,
            'cw<ci<dbl-strike' ,
            'cw<ci<emboss____' ,
            'cw<ci<engrave___' ,
            'cw<ci<font-color' ,
            'cw<ci<font-down_' ,
            'cw<ci<font-size_' ,
            'cw<ci<font-style' ,
            'cw<ci<font-up___' ,
            'cw<ci<footnot-mk' ,
            'cw<ci<green_____' ,
            'cw<ci<hidden____' ,
            'cw<ci<italics___' ,
            'cw<ci<outline___' ,
            'cw<ci<red_______' ,
            'cw<ci<shadow____' ,
            'cw<ci<small-caps' ,
            'cw<ci<strike-thr' ,
            'cw<ci<subscript_' ,
            'cw<ci<superscrip' ,
            'cw<ci<underlined' ,
            # 'cw<ul<underlined' ,
        ]
    def __initiate_values(self):
        """
        Init temp values
        """
        self.__state = 'before_body'
        self.__inline = {}
        self.__temp_group = []
-        self.__open_bracket = 0
+        self.__open_bracket = False
-        self.__found_brackets = 0
+        self.__found_brackets = False
-        self.__accept = [
+        
        'cw<ci<bold______',
        'cw<ci<annotation'  ,
        'cw<ci<blue______' ,
        'cw<ci<bold______' ,
        'cw<ci<caps______' ,
        'cw<ci<char-style' ,
        'cw<ci<dbl-strike' ,
        'cw<ci<emboss____'  ,
        'cw<ci<engrave___' ,
        'cw<ci<font-color' ,
        'cw<ci<font-down_' ,
        'cw<ci<font-size_' ,
        'cw<ci<font-style' ,
        'cw<ci<font-up___',
        'cw<ci<footnot-mk',
        'cw<ci<green_____' ,
        'cw<ci<hidden____',
        'cw<ci<italics___' ,
        'cw<ci<outline___',
        'cw<ci<red_______' ,
        'cw<ci<shadow____',
        'cw<ci<small-caps' ,
        'cw<ci<strike-thr',
        'cw<ci<subscript_' ,
        'cw<ci<superscrip',
        'cw<ci<underlined' ,
        # 'cw<ul<underlined' ,
        ]
    def __before_body_func(self, line):
        """
        If we are before the body, not interest in changing anything
        """
        if self.__token_info == 'mi<mk<body-open_':
            self.__state = 'in_body'
@ -95,6 +101,14 @@ class AddBrackets:
    def __in_body_func(self, line):
        """
        Select what action to take in body:
            1-At the end of the file close the braket if a bracket was opened
            This happens if there is achange
            2-If an open bracket is found the code inside is ignore
            (written without modifications)
            3-If an accepted control word is found put the line
            in a buffer then chage state to after cw
            4-Else simply write the line
        """
        if line == 'cb<nu<clos-brack<0001\n' and self.__open_bracket:
            self.__write_obj.write(
@ -102,7 +116,7 @@ class AddBrackets:
                    )
            self.__write_obj.write(line)
        elif self.__token_info == 'ob<nu<open-brack':
-            self.__found_brackets = 1
+            self.__found_brackets = True
            self.__state = 'in_ignore'
            self.__ignore_count = self.__ob_count
            self.__write_obj.write(line)
@ -114,6 +128,10 @@ class AddBrackets:
    def __after_control_word_func(self, line):
        """
        After a cw either add next allowed cw to temporary list or
        change groupe and write it.
        If the token leading to an exit is an open bracket go to
        ignore otherwise goto in body
        """
        if self.__token_info in self.__accept:
            self.__temp_group.append(line)
@ -129,82 +147,84 @@ class AddBrackets:
    def __write_group(self):
        """
        Write a tempory group after accepted control words end
        But this is mostly useless in my opinion as there is no list of rejected cw
        This may be a way to implement future old rtf processing for cw
        Utility: open a group to just put brackets but why be so complicated?
        Scheme: open brackets, write cw then go to body and back with cw after 
        """
        if self.__open_bracket:
            self.__write_obj.write(
                'cb<nu<clos-brack<0003\n'
                )
-            self.__open_bracket = 0
+            self.__open_bracket = False
-        inline_string = ''
+
-        the_keys = self.__inline.keys()
+        inline_string = ''.join(['%s<nu<%s\n' % (k, v) \
-        for the_key in the_keys:
+                for k, v in self.__inline.iteritems() \
-            value = self.__inline[the_key]
+                    if v != 'false'])
            if value != 'false':
                inline_string += '%s<nu<%s\n' % (the_key, value)
        if inline_string:
-            self.__write_obj.write('ob<nu<open-brack<0003\n')
+            self.__write_obj.write('ob<nu<open-brack<0003\n'
-            self.__write_obj.write(inline_string)
+                '%s' % inline_string)
-            self.__open_bracket = 1
+            self.__open_bracket = True
        self.__temp_group = []
    def __change_permanent_group(self):
        """
-        use temp group to change permanent group
+        Use temp group to change permanent group
        If the control word is not accepted remove it
        What is the interest as it is build to accept only accepted cw
        in __after_control_word_func?
        """
-        for line in self.__temp_group:
+        self.__inline = {line[:16] : line[20:-1]\
-            token_info = line[:16]
+            for line in self.__temp_group\
-            if token_info in self.__accept:
+            # Is this really necessary?
-                att = line[20:-1]
+                if line[:16] in self.__accept}
-                self.__inline[token_info] = att
+
    def __ignore_func(self, line):
        """
-        Don't add any brackets while inside of brackets RTF has already
+        Just copy data inside of RTF brackets already here.
        added.
        """
        self.__write_obj.write(line)
-        if self.__token_info == 'cb<nu<clos-brack'and\
+        if self.__token_info == 'cb<nu<clos-brack'\
-            self.__cb_count == self.__ignore_count:
+            and self.__cb_count == self.__ignore_count:
            self.__state = 'in_body'
    def __check_brackets(self, in_file):
-        self.__check_brack_obj = check_brackets.CheckBrackets\
+        """
        Return True if brackets match
        """
        check_brack_obj = check_brackets.CheckBrackets\
            (file = in_file)
-        good_br =  self.__check_brack_obj.check_brackets()[0]
+        return check_brack_obj.check_brackets()[0]
        if not good_br:
            return 1
    def add_brackets(self):
        """
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
-        while line_to_read:
+                    self.__token_info = line[:16]
-            line_to_read = read_obj.readline()
+                    if self.__token_info == 'ob<nu<open-brack':
-            line = line_to_read
+                        self.__ob_count = line[-5:-1]
-            self.__token_info = line[:16]
+                    if self.__token_info == 'cb<nu<clos-brack':
-            if self.__token_info == 'ob<nu<open-brack':
+                        self.__cb_count = line[-5:-1]
-                self.__ob_count = line[-5:-1]
+                    action = self.__state_dict.get(self.__state)
-            if self.__token_info == 'cb<nu<clos-brack':
+                    if action is None:
-                self.__cb_count = line[-5:-1]
+                        sys.stderr.write(
-            action = self.__state_dict.get(self.__state)
+                            'No matching state in module add_brackets.py\n'
-            if action == None:
+                            '%s\n' % self.__state)
-                sys.stderr.write('No matching state in module add_brackets.py\n')
+                    action(line)
-                sys.stderr.write(self.__state + '\n')
+        #Check bad brackets
-            action(line)
+        if self.__check_brackets(self.__write_to):
        read_obj.close()
        self.__write_obj.close()
        bad_brackets = self.__check_brackets(self.__write_to)
        if not bad_brackets:
            copy_obj = copy.Copy(bug_handler = self.__bug_handler)
            if self.__copy:
                copy_obj.copy_file(self.__write_to, "add_brackets.data")
-            copy_obj.rename(self.__write_to, self.__file)
+            copy_obj.rename(self.__write_to, self.__file)  
        else:
            if self.__run_level > 0:
                sys.stderr.write(
                    'Sorry, but this files has a mix of old and new RTF.\n'
                    'Some characteristics cannot be converted.\n')
-        os.remove(self.__write_to)
+        os.remove(self.__write_to)
--- a/src/calibre/ebooks/rtf2xml/char_set.py
+++ b/src/calibre/ebooks/rtf2xml/char_set.py
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -1,4 +1,5 @@
 import os, sys
 from codecs import EncodedFile
 from calibre.ebooks.rtf2xml import copy, check_encoding
 from calibre.ptempfile import better_mktemp
@ -41,6 +42,7 @@ class ConvertToTags:
        self.__run_level = run_level
        self.__write_to = better_mktemp()
        self.__convert_utf = False
        self.__bad_encoding = False
    def __initiate_values(self):
        """
@ -213,13 +215,14 @@ class ConvertToTags:
        if not check_encoding_obj.check_encoding(self.__file, verbose=False):
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
-        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
+        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding, verbose=False):
            self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
            self.__convert_utf = True
        else:
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
                    ' hope for the best')
            self.__bad_encoding = True
        self.__new_line = 0
        self.__write_new_line()
        if self.__no_dtd:
@ -247,7 +250,7 @@ class ConvertToTags:
        the appropriate function.
        The functions that are called:
            a text function for text
-            an open funciton for open tags
+            an open function for open tags
            an open with attribute function for tags with attributes
            an empty with attribute function for tags that are empty but have
            attribtes.
@ -263,20 +266,19 @@ class ConvertToTags:
                    action = self.__state_dict.get(self.__token_info)
                    if action is not None:
                        action(line)
-        self.__write_obj.close()
+        #convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
-        #convert all encodings to UTF8 to avoid unsupported encodings in lxml
+        if self.__convert_utf or self.__bad_encoding:
        if self.__convert_utf:
            copy_obj = copy.Copy(bug_handler = self.__bug_handler)
            copy_obj.rename(self.__write_to, self.__file)
            file_encoding = "utf-8"
            if self.__bad_encoding:
                file_encoding = "us-ascii"
            with open(self.__file, 'r') as read_obj:
                with open(self.__write_to, 'w') as write_obj:
-                    file = read_obj.read()
+                    write_objenc = EncodedFile(write_obj, self.__encoding,
-                    try:
+                                    file_encoding, 'replace')
-                        file = file.decode(self.__encoding)
+                    for line in read_obj:
-                        write_obj.write(file.encode('utf-8'))
+                        write_objenc.write(line)
                    except:
                        sys.stderr.write('Conversion to UTF-8 is not possible,'
                        ' encoding should be very carefully checked')
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
--- a/src/calibre/ebooks/rtf2xml/header.py
+++ b/src/calibre/ebooks/rtf2xml/header.py
@ -11,6 +11,7 @@
 #                                                                       #
 #########################################################################
 import sys, os
 from calibre.ebooks.rtf2xml import copy
 from calibre.ptempfile import better_mktemp
@ -31,29 +32,29 @@ class Header:
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__write_to = better_mktemp()
-        self.__found_a_header = 0
+        self.__found_a_header = False
    def __in_header_func(self, line):
        """
        Handle all tokens that are part of header
        """
        if self.__cb_count == self.__header_bracket_count:
-            self.__in_header = 0
+            self.__in_header = False
            self.__write_obj.write(line)
            self.__write_to_head_obj.write(
-            'mi<mk<head___clo\n')
+            'mi<mk<head___clo\n' \
-            self.__write_to_head_obj.write(
+            'mi<tg<close_____<header-or-footer\n' \
            'mi<tg<close_____<header-or-footer\n')
            self.__write_to_head_obj.write(
            'mi<mk<header-clo\n')
        else:
            self.__write_to_head_obj.write(line)
    def __found_header(self, line):
        """
        Found a header
        """
        # but this could be header or footer
-        self.__found_a_header = 1
+        self.__found_a_header = True
-        self.__in_header = 1
+        self.__in_header = True
        self.__header_count += 1
        # temporarily set this to zero so I can enter loop
        self.__cb_count = 0
@ -69,18 +70,23 @@ class Header:
                    'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
                    )
        else:
-            sys.stderr.write('module is header\n')
+            sys.stderr.write(
-            sys.stderr.write('method is __found_header\n')
+            'module is header\n' \
-            sys.stderr.write('no dict entry\n')
+            'method is __found_header\n' \
-            sys.stderr.write('line is %s' % line)
+            'no dict entry\n' \
            'line is %s' % line)
            self.__write_to_head_obj.write(
                    'mi<tg<open-att__<header-or-footer<type>none\n'
                    )
    def __default_sep(self, line):
-        """Handle all tokens that are not header tokens"""
+        """
        Handle all tokens that are not header tokens
        """
        if self.__token_info[3:5] == 'hf':
            self.__found_header(line)
        self.__write_obj.write(line)
    def __initiate_sep_values(self):
        """
        initiate counters for separate_footnotes method.
@ -89,7 +95,7 @@ class Header:
        self.__ob_count = 0
        self.__cb_count = 0
        self.__header_bracket_count = 0
-        self.__in_header = 0
+        self.__in_header = False
        self.__header_count = 0
        self.__head_dict = {
            'head-left_'        :   ('header-left'),
@ -101,6 +107,7 @@ class Header:
            'header____'        :   ('header' ),
            'footer____'        :   ('footer' ),
        }
    def separate_headers(self):
        """
        Separate all the footnotes in an RTF file and put them at the bottom,
@ -110,53 +117,47 @@ class Header:
        bottom of the main file.
        """
        self.__initiate_sep_values()
        read_obj = open(self.__file)
        self.__write_obj = open(self.__write_to, 'w')
        self.__header_holder = better_mktemp()
-        self.__write_to_head_obj = open(self.__header_holder, 'w')
+        with open(self.__file) as read_obj:
-        line_to_read = 1
+            with open(self.__write_to, 'w') as self.__write_obj:
-        while line_to_read:
+                with open(self.__header_holder, 'w') as self.__write_to_head_obj:
-            line_to_read = read_obj.readline()
+                    for line in read_obj:
-            line = line_to_read
+                        self.__token_info = line[:16]
-            self.__token_info = line[:16]
+                        # keep track of opening and closing brackets
-            # keep track of opening and closing brackets
+                        if self.__token_info == 'ob<nu<open-brack':
-            if self.__token_info == 'ob<nu<open-brack':
+                            self.__ob_count = line[-5:-1]
-                self.__ob_count = line[-5:-1]
+                        if self.__token_info == 'cb<nu<clos-brack':
-            if self.__token_info == 'cb<nu<clos-brack':
+                            self.__cb_count = line[-5:-1]
-                self.__cb_count = line[-5:-1]
+                        # In the middle of footnote text
-            # In the middle of footnote text
+                        if self.__in_header:
-            if self.__in_header:
+                            self.__in_header_func(line)
-                self.__in_header_func(line)
+                        # not in the middle of footnote text
-            # not in the middle of footnote text
+                        else:
-            else:
+                            self.__default_sep(line)
-                self.__default_sep(line)
+        
-        self.__write_obj.close()
+        with open(self.__header_holder, 'r') as read_obj:
-        read_obj.close()
+            with open(self.__write_to, 'a') as write_obj:
-        self.__write_to_head_obj.close()
+                write_obj.write(
-        read_obj = open(self.__header_holder, 'r')
+                'mi<mk<header-beg\n')
-        write_obj = open(self.__write_to, 'a')
+                for line in read_obj:
-        write_obj.write(
+                    write_obj.write(line)
-        'mi<mk<header-beg\n')
+                write_obj.write(
-        line = 1
+                'mi<mk<header-end\n')
        while line:
            line = read_obj.readline()
            write_obj.write(line)
        write_obj.write(
        'mi<mk<header-end\n')
        read_obj.close()
        write_obj.close()
        os.remove(self.__header_holder)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
-            copy_obj.copy_file(self.__write_to, "header_separate.info")
+            copy_obj.copy_file(self.__write_to, "header_separate.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
    def update_info(self, file, copy):
        """
        Unused method
        """
        self.__file = file
        self.__copy = copy
    def __get_head_body_func(self, line):
        """
        Process lines in main body and look for beginning of headers.
@ -166,6 +167,7 @@ class Header:
            self.__state = 'head'
        else:
            self.__write_obj.write(line)
    def __get_head_head_func(self, line):
        """
        Copy headers and footers from bottom of file to a separate, temporary file.
@ -174,6 +176,7 @@ class Header:
            self.__state = 'body'
        else:
            self.__write_to_head_obj.write(line)
    def __get_headers(self):
        """
        Private method to remove footnotes from main file.  Read one line from
@ -182,21 +185,16 @@ class Header:
        These two functions do the work of separating the footnotes form the
        body.
        """
-        read_obj = open(self.__file)
+        with open(self.__file) as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-            # self.__write_to = "footnote_info.data"
+                with open(self.__header_holder, 'w') as self.__write_to_head_obj:
-        self.__write_to_head_obj = open(self.__header_holder, 'w')
+                    for line in read_obj:
-        line = 1
+                        self.__token_info = line[:16]
-        while line:
+                        if self.__state == 'body':
-            line = read_obj.readline()
+                            self.__get_head_body_func(line)
-            self.__token_info = line[:16]
+                        elif self.__state == 'head':
-            if self.__state == 'body':
+                            self.__get_head_head_func(line)
-                self.__get_head_body_func(line)
+
            elif self.__state == 'head':
                self.__get_head_head_func(line)
        read_obj.close()
        self.__write_obj.close()
        self.__write_to_head_obj.close()
    def __get_head_from_temp(self, num):
        """
        Private method for joining headers and footers to body. This method
@ -205,18 +203,17 @@ class Header:
        returns them as a string.
        """
        look_for = 'mi<mk<header-ope<' + num + '\n'
-        found_head = 0
+        found_head = False
        string_to_return = ''
-        line = 1
+        for line in self.__read_from_head_obj:
        while line:
            line = self.__read_from_head_obj.readline()
            if found_head:
                if line == 'mi<mk<header-clo\n':
                    return string_to_return
-                string_to_return = string_to_return + line
+                string_to_return += line
            else:
                if line == look_for:
-                    found_head = 1
+                    found_head = True
    def __join_from_temp(self):
        """
        Private method for rejoining footnotes to body.  Read from the
@ -227,15 +224,13 @@ class Header:
        If no footnote marker is found, simply print out the token (line).
        """
        self.__read_from_head_obj = open(self.__header_holder, 'r')
        read_obj = open(self.__write_to, 'r')
        self.__write_obj = open(self.__write_to2, 'w')
-        line = 1
+        with open(self.__write_to, 'r') as read_obj:
-        while line:
+            for line in read_obj:
-            line = read_obj.readline()
+                if line[:16] == 'mi<mk<header-ind':
-            if line[:16] == 'mi<mk<header-ind':
+                    line = self.__get_head_from_temp(line[17:-1])
-                line = self.__get_head_from_temp(line[17:-1])
+                self.__write_obj.write(line)
-            self.__write_obj.write(line)
+
        read_obj.close()
    def join_headers(self):
        """
        Join the footnotes from the bottom of the file and put them in their
--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@ -181,7 +181,7 @@ class Hex2Utf8:
            self.__dingbats_dict.update(dingbats_base_dict)
            self.__dingbats_dict.update(ms_dingbats_dict)
        # load dictionary for caps, and make a string for the replacement
-        self.__caps_uni_dict = char_map_obj.get_char_map(map='caps_uni')
+        self.__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni')
        # # print self.__caps_uni_dict
        # don't think I'll need this
        ##keys = self.__caps_uni_dict.keys()
--- a/src/calibre/ebooks/rtf2xml/old_rtf.py
+++ b/src/calibre/ebooks/rtf2xml/old_rtf.py
@ -11,14 +11,18 @@
 #                                                                       #
 #########################################################################
 import sys
-"""
+
 """
 class OldRtf:
    """
    Check to see if the RTF is an older version
    Logic:
    If allowable control word/properties happen in text without being enclosed
    in brackets the file will be considered old rtf
    """
-    def __init__(self, in_file, bug_handler, run_level ):
+    def __init__(self, in_file,
                bug_handler,
                run_level,
                ):
        """
        Required:
            'file'--file to parse
@ -32,46 +36,46 @@ class OldRtf:
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
-        self.__initiate_values()
+        self.__run_level = run_level
        self.__ob_group = 0
    def __initiate_values(self):
        self.__previous_token = ''
        self.__new_found = 0
        self.__allowable = [
-        'annotation' ,
+            'annotation' ,
-        'blue______'  ,
+            'blue______'  ,
-        'bold______',
+            'bold______',
-        'caps______',
+            'caps______',
-        'char-style' ,
+            'char-style' ,
-        'dbl-strike' ,
+            'dbl-strike' ,
-        'emboss____',
+            'emboss____',
-        'engrave___' ,
+            'engrave___' ,
-        'font-color',
+            'font-color',
-        'font-down_' ,
+            'font-down_' ,
-        'font-size_',
+            'font-size_',
-        'font-style',
+            'font-style',
-        'font-up___',
+            'font-up___',
-        'footnot-mk' ,
+            'footnot-mk' ,
-        'green_____' ,
+            'green_____' ,
-        'hidden____',
+            'hidden____',
-        'italics___',
+            'italics___',
-        'outline___',
+            'outline___',
-        'red_______',
+            'red_______',
-        'shadow____' ,
+            'shadow____' ,
-        'small-caps',
+            'small-caps',
-        'strike-thr',
+            'strike-thr',
-        'subscript_',
+            'subscript_',
-        'superscrip' ,
+            'superscrip' ,
-        'underlined' ,
+            'underlined' ,
        ]
        self.__state = 'before_body'
        self.__action_dict = {
            'before_body'   : self.__before_body_func,
            'in_body'       : self.__check_tokens_func,
            'after_pard'    : self.__after_pard_func,
        }
-        self.__is_old = 0
+
    def __initiate_values(self):
        self.__previous_token = ''
        self.__state = 'before_body'
        self.__found_new = 0
        self.__ob_group = 0
    def __check_tokens_func(self, line):
        if self.__inline_info in self.__allowable:
            if self.__ob_group == self.__base_ob_count:
@ -80,48 +84,56 @@ class OldRtf:
                self.__found_new += 1
        elif self.__token_info ==  'cw<pf<par-def___':
            self.__state = 'after_pard'
    def __before_body_func(self, line):
        if self.__token_info == 'mi<mk<body-open_':
            self.__state = 'in_body'
            self.__base_ob_count = self.__ob_group
    def __after_pard_func(self, line):
        if line[0:2] != 'cw':
            self.__state = 'in_body'
    def check_if_old_rtf(self):
        """
        Requires:
            nothing
        Returns:
-            1 if file is older RTf
+            True if file is older RTf
-            0 if file is newer RTF
+            False if file is newer RTF
        """
-
+        self.__initiate_values()
        read_obj = open(self.__file, 'r')
        line = 1
        line_num = 0
-        while line:
+        with open(self.__file, 'r') as read_obj:
-            line = read_obj.readline()
+            for line in read_obj:
-            line_num += 1
+                line_num += 1
-            self.__token_info = line[:16]
+                self.__token_info = line[:16]
-            if self.__token_info == 'mi<mk<body-close':
+                if self.__token_info == 'mi<mk<body-close':
-                return 0
+                    return False
-                self.__ob_group = 0
+                if self.__token_info == 'ob<nu<open-brack':
-            if self.__token_info == 'ob<nu<open-brack':
+                    self.__ob_group += 1
-                self.__ob_group += 1
+                    self.__ob_count = line[-5:-1]
-                self.__ob_count = line[-5:-1]
+                if self.__token_info == 'cb<nu<clos-brack':
-            if self.__token_info == 'cb<nu<clos-brack':
+                    self.__ob_group -= 1
-                self.__ob_group -= 1
+                    self.__cb_count = line[-5:-1]
-                self.__cb_count = line[-5:-1]
+                self.__inline_info = line[6:16]
-            self.__inline_info = line[6:16]
+                if self.__state == 'after_body':
-            if self.__state == 'after_body':
+                    return False
-                return 0
+                action = self.__action_dict.get(self.__state)
-            action = self.__action_dict.get(self.__state)
+                if action is None:
-            if not action:
+                    try:
-                sys.stderr.write('No action for state!\n')
+                        sys.stderr.write('No action for this state!\n')
-            result = action(line)
+                    except:
-            if result == 'new_rtf':
+                        pass
-                return 0
+                result = action(line)
-            elif result == 'old_rtf':
+                if result == 'new_rtf':
-                return 1
+                    return False
-            self.__previous_token = line[6:16]
+                elif result == 'old_rtf':
-        return 0
+                    if self.__run_level > 3:
                        sys.stderr.write(
                            'Old rtf construction %s (bracket %s, line %s)\n' 
                                % (self.__inline_info, str(self.__ob_group), line_num)
                        )
                    return True
                self.__previous_token = line[6:16]
        return False
--- a/src/calibre/ebooks/rtf2xml/output.py
+++ b/src/calibre/ebooks/rtf2xml/output.py
@ -10,7 +10,9 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import sys, os, codecs
+import sys, os
 # , codecs
 class Output:
    """
    Output file
@ -19,7 +21,8 @@ class Output:
            file,
            orig_file,
            output_dir = None,
-            out_file = None
+            out_file = None,
            no_ask = True
            ):
        """
        Required:
@ -33,8 +36,9 @@ class Output:
        self.__file = file
        self.__orig_file = orig_file
        self.__output_dir = output_dir
-        self.__no_ask = 1
+        self.__no_ask = no_ask
        self.__out_file = out_file
    def output(self):
        """
        Required:
@ -45,13 +49,14 @@ class Output:
            output the line to the screen if no output file given. Otherwise, output to
            the file.
        """
        # self.__output_xml(self.__file, self.__out_file)
        if self.__output_dir:
            self.__output_to_dir_func()
        elif self.__out_file:
-            self.__output_xml(self.__file, self.__out_file)
+            self.__output_to_file_func()
            # self.__output_xml(self.__file, self.__out_file)
        else:
            self.__output_to_standard_func()
    def __output_to_dir_func(self):
        """
        Requires:
@ -64,32 +69,25 @@ class Output:
        """
        base_name = os.path.basename(self.__orig_file)
        base_name, ext  = os.path.splitext(base_name)
-        output_file = '%s.xml' % base_name
+        output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
        output_file = os.path.join(self.__output_dir, output_file)
        # change if user wants to output to a specific file
        if self.__out_file:
            output_file = os.path.join(self.__output_dir, self.__out_file)
        user_response = 'o'
-        if os.path.isfile(output_file):
+        if os.path.isfile(output_file) and not self.__no_ask:
-            if self.__no_ask:
+            msg = 'Do you want to overwrite %s?\n' % output_file
-                user_response = 'o'
+            msg += ('Type "o" to overwrite.\n'
-            else:
+                    'Type any other key to print to standard output.\n')
-                msg = 'Do you want to over-write %s?\n' % output_file
+            sys.stderr.write(msg)
-                msg += 'Type "o" to over-write.\n'
+            user_response = raw_input()
                msg += 'Type any other key to print to standard output.\n'
                sys.stderr.write(msg)
                user_response = raw_input()
        if user_response == 'o':
-            read_obj = open(self.__file, 'r')
+            with open(self.__file, 'r') as read_obj:
-            write_obj = open(output_file, 'w')
+                with open(self.output_file, 'w') as write_obj:
-            line = 1
+                    for line in read_obj:
-            while line:
+                        write_obj.write(line)
                line = read_obj.readline()
                write_obj.write(line)
            read_obj.close()
            write_obj.close()
        else:
            self.__output_to_standard_func()
    def __output_to_file_func(self):
        """
        Required:
@ -99,14 +97,11 @@ class Output:
        Logic:
            read one line at a time. Output to standard
        """
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        write_obj = open(self.__out_file, 'w')
+            with open(self.__out_file, 'w') as write_obj:
-        line = 1
+                for line in read_obj:
-        while line:
+                    write_obj.write(line)
-            line = read_obj.readline()
+
            write_obj.write(line)
        read_obj.close()
        write_obj.close()
    def __output_to_standard_func(self):
        """
        Required:
@ -116,26 +111,24 @@ class Output:
        Logic:
            read one line at a time. Output to standard
        """
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        line = 1
+            for line in read_obj:
-        while line:
+                sys.stdout.write(line)
-            line = read_obj.readline()
+
-            sys.stdout.write(line)
+    # def __output_xml(self, in_file, out_file):
-        read_obj.close()
+        # """
-    def __output_xml(self, in_file, out_file):
+        # output the ill-formed xml file
-        """
+        # """
-        output the ill-formed xml file
+        # (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
-        """
+        # write_obj = utf8_writer(open(out_file, 'w'))
-        (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
+        # write_obj = open(out_file, 'w')
-        write_obj = utf8_writer(open(out_file, 'w'))
+        # read_obj = utf8_writer(open(in_file, 'r'))
-        write_obj = open(out_file, 'w')
+        # read_obj = open(in_file, 'r')
-        read_obj = utf8_writer(open(in_file, 'r'))
+        # line = 1
-        read_obj = open(in_file, 'r')
+        # while line:
-        line = 1
+            # line = read_obj.readline()
-        while line:
+            # if isinstance(line, type(u"")):
-            line = read_obj.readline()
+                # line = line.encode("utf-8")
-            if isinstance(line, type(u"")):
+            # write_obj.write(line)
-                line = line.encode("utf-8")
+        # read_obj.close()
-            write_obj.write(line)
+        # write_obj.close()
        read_obj.close()
        write_obj.close()
--- a/src/calibre/ebooks/rtf2xml/paragraphs.py
+++ b/src/calibre/ebooks/rtf2xml/paragraphs.py
@ -11,31 +11,32 @@
 #                                                                       #
 #########################################################################
 import sys, os
 from calibre.ebooks.rtf2xml import copy
 from calibre.ptempfile import better_mktemp
 class Paragraphs:
    """
-=================
+    =================
-Purpose
+    Purpose
-=================
+    =================
-Write paragraph tags for a tokenized file. (This module won't be any use to use
+    Write paragraph tags for a tokenized file. (This module won't be any use to use
-to you unless you use it as part of the other modules.)
+    to you unless you use it as part of the other modules.)
-------------
+    -------------
-Method
+    Method
-------------
+    -------------
-RTF does not tell you when a paragraph begins. It only tells you when the
+    RTF does not tell you when a paragraph begins. It only tells you when the
-paragraph ends.
+    paragraph ends.
-In order to make paragraphs out of this limited info, the parser starts in the
+    In order to make paragraphs out of this limited info, the parser starts in the
-body of the documents and assumes it is not in a paragraph. It looks for clues
+    body of the documents and assumes it is not in a paragraph. It looks for clues
-to begin a paragraph. Text starts a paragraph; so does an inline field or
+    to begin a paragraph. Text starts a paragraph; so does an inline field or
-list-text. If an end of paragraph marker (\par) is found, then this indicates
+    list-text. If an end of paragraph marker (\par) is found, then this indicates
-a blank paragraph.
+    a blank paragraph.
-Once a paragraph is found, the state changes to 'paragraph.' In this state,
+    Once a paragraph is found, the state changes to 'paragraph.' In this state,
-clues are looked to for the end of a paragraph. The end of a paragraph marker
+    clues are looked to for the end of a paragraph. The end of a paragraph marker
-(\par) marks the end of a paragraph. So does the end of a footnote or heading;
+    (\par) marks the end of a paragraph. So does the end of a footnote or heading;
-a paragraph definintion; the end of a field-block; and the beginning of a
+    a paragraph definition; the end of a field-block; and the beginning of a
-section. (How about the end of a section or the end of a field-block?)
+    section. (How about the end of a section or the end of a field-block?)
    """
    def __init__(self,
            in_file,
@ -60,6 +61,7 @@ section. (How about the end of a section or the end of a field-block?)
        self.__write_empty_para = write_empty_para
        self.__run_level = run_level
        self.__write_to = better_mktemp()
    def __initiate_values(self):
        """
        Initiate all values.
@ -77,7 +79,7 @@ section. (How about the end of a section or the end of a field-block?)
        self.__paragraph_dict = {
        'cw<pf<par-end___'      : self.__close_para_func,   # end of paragraph
        'mi<mk<headi_-end'      : self.__close_para_func,   # end of header or footer
-        ##'cw<pf<par-def___'      : self.__close_para_func,   # paragraph definition
+        ## 'cw<pf<par-def___'      : self.__close_para_func,   # paragraph definition
        # 'mi<mk<fld-bk-end'      : self.__close_para_func,   # end of field-block
        'mi<mk<fldbk-end_'      : self.__close_para_func,   # end of field-block
        'mi<mk<body-close'      : self.__close_para_func,   # end of body
@ -99,6 +101,7 @@ section. (How about the end of a section or the end of a field-block?)
        'mi<mk<pict-start'      : self.__start_para_func,
        'cw<pf<page-break'      : self.__empty_pgbk_func,    # page break
        }
    def __before_body_func(self, line):
        """
        Required:
@ -112,6 +115,7 @@ section. (How about the end of a section or the end of a field-block?)
        if self.__token_info == 'mi<mk<body-open_':
            self.__state = 'not_paragraph'
        self.__write_obj.write(line)
    def __not_paragraph_func(self, line):
        """
        Required:
@ -127,6 +131,7 @@ section. (How about the end of a section or the end of a field-block?)
        if action:
            action(line)
        self.__write_obj.write(line)
    def __paragraph_func(self, line):
        """
        Required:
@ -144,6 +149,7 @@ section. (How about the end of a section or the end of a field-block?)
            action(line)
        else:
            self.__write_obj.write(line)
    def __start_para_func(self, line):
        """
        Requires:
@ -160,6 +166,7 @@ section. (How about the end of a section or the end of a field-block?)
        )
        self.__write_obj.write(self.__start2_marker)
        self.__state = 'paragraph'
    def __empty_para_func(self, line):
        """
        Requires:
@ -176,6 +183,7 @@ section. (How about the end of a section or the end of a field-block?)
            'mi<tg<empty_____<para\n'
            )
            self.__write_obj.write(self.__end_marker)   # marker for later parsing
    def __empty_pgbk_func(self, line):
        """
        Requires:
@ -188,6 +196,7 @@ section. (How about the end of a section or the end of a field-block?)
        self.__write_obj.write(
        'mi<tg<empty_____<page-break\n'
        )
    def __close_para_func(self, line):
        """
        Requires:
@ -205,6 +214,7 @@ section. (How about the end of a section or the end of a field-block?)
        self.__write_obj.write(self.__end_marker) # marker for later parser
        self.__write_obj.write(line)
        self.__state = 'not_paragraph'
    def __bogus_para__def_func(self, line):
        """
        Requires:
@ -215,6 +225,7 @@ section. (How about the end of a section or the end of a field-block?)
            if a \pard occurs in a paragraph, I want to ignore it. (I believe)
        """
        self.__write_obj.write('mi<mk<bogus-pard\n')
    def make_paragraphs(self):
        """
        Requires:
@ -229,20 +240,18 @@ section. (How about the end of a section or the end of a field-block?)
            only other state is 'paragraph'.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
-        while line_to_read:
+                    self.__token_info = line[:16]
-            line_to_read = read_obj.readline()
+                    action = self.__state_dict.get(self.__state)
-            line = line_to_read
+                    if action is None:
-            self.__token_info = line[:16]
+                        try:
-            action = self.__state_dict.get(self.__state)
+                            sys.stderr.write('no matching state in module paragraphs.py\n')
-            if action == None:
+                            sys.stderr.write(self.__state + '\n')
-                sys.stderr.write('no no matching state in module sections.py\n')
+                        except:
-                sys.stderr.write(self.__state + '\n')
+                            pass
-            action(line)
+                    action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "paragraphs.data")
--- a/src/calibre/ebooks/rtf2xml/preamble_rest.py
+++ b/src/calibre/ebooks/rtf2xml/preamble_rest.py
@ -11,16 +11,24 @@
 #                                                                       #
 #########################################################################
 import sys,os
 from calibre.ebooks.rtf2xml import copy
 class Preamble:
    """
    Fix the reamaing parts of the preamble. This module does very little. It
    makes sure that no text gets put in the revision of list table. In the
-    future, when I understand how to interprett he revision table and list
+    future, when I understand how to interpret the revision table and list
    table, I will make these methods more functional.
    """
-    def __init__(self, file, bug_handler,  platform, default_font, code_page,
+    def __init__(self, file,
-    copy=None, temp_dir=None):
+                bug_handler,
                platform,
                default_font,
                code_page,
                copy=None,
                temp_dir=None,
                ):
        """
        Required:
            file--file to parse
@ -44,6 +52,7 @@ class Preamble:
            self.__write_to = os.path.join(temp_dir,"info_table_info.data")
        else:
            self.__write_to = "info_table_info.data"
    def __initiate_values(self):
        """
        Initiate all values.
@ -62,12 +71,14 @@ class Preamble:
        'mi<mk<revtbl-beg'      : self.__found_revision_table_func,
        'mi<mk<body-open_'      : self.__found_body_func,
        }
    def __default_func(self, line):
        action = self.__default_dict.get(self.__token_info)
        if action:
            action(line)
        else:
            self.__write_obj.write(line)
    def __found_rtf_head_func(self, line):
        """
        Requires:
@ -84,8 +95,10 @@ class Preamble:
            '<platform>%s\n' % (self.__default_font, self.__code_page,
            self.__platform)
        )
    def __found_list_table_func(self, line):
        self.__state = 'list_table'
    def __list_table_func(self, line):
        if self.__token_info == 'mi<mk<listabend_':
            self.__state = 'default'
@ -93,8 +106,10 @@ class Preamble:
            pass
        else:
            self.__write_obj.write(line)
    def __found_revision_table_func(self, line):
        self.__state = 'revision'
    def __revision_table_func(self, line):
        if self.__token_info == 'mi<mk<revtbl-end':
            self.__state = 'default'
@ -102,11 +117,14 @@ class Preamble:
            pass
        else:
            self.__write_obj.write(line)
    def __found_body_func(self, line):
        self.__state = 'body'
        self.__write_obj.write(line)
    def __body_func(self, line):
        self.__write_obj.write(line)
    def fix_preamble(self):
        """
        Requires:
@ -119,20 +137,15 @@ class Preamble:
            the list table.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
-        while line_to_read:
+                    self.__token_info = line[:16]
-            line_to_read = read_obj.readline()
+                    action = self.__state_dict.get(self.__state)
-            line = line_to_read
+                    if action is None:
-            self.__token_info = line[:16]
+                        sys.stderr.write(
-            action = self.__state_dict.get(self.__state)
+                        'no matching state in module preamble_rest.py\n' + self.__state + '\n')
-            if action == None:
+                    action(line)
                sys.stderr.write('no no matching state in module preamble_rest.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "preamble_div.data")
--- a/src/calibre/ebooks/rtf2xml/sections.py
+++ b/src/calibre/ebooks/rtf2xml/sections.py
@ -11,43 +11,44 @@
 #                                                                       #
 #########################################################################
 import sys, os
 from calibre.ebooks.rtf2xml import copy
 from calibre.ptempfile import better_mktemp
 class Sections:
    """
-=================
+    =================
-Purpose
+    Purpose
-=================
+    =================
-Write section tags for a tokenized file. (This module won't be any use to use
+    Write section tags for a tokenized file. (This module won't be any use to use
-to you unless you use it as part of the other modules.)
+    to you unless you use it as part of the other modules.)
---------------
+    ---------------
-logic
+    logic
---------------
+    ---------------
-The tags for the first section breaks have already been written.
+    The tags for the first section breaks have already been written.
-RTF stores section breaks with the \sect tag. Each time this tag is
+    RTF stores section breaks with the \sect tag. Each time this tag is
-encountered, add one to the counter.
+    encountered, add one to the counter.
-When I encounter the \sectd tag, I want to collect all the appropriate tokens
+    When I encounter the \sectd tag, I want to collect all the appropriate tokens
-that describe the section. When I reach a \pard, I know I an stop collecting
+    that describe the section. When I reach a \pard, I know I an stop collecting
-tokens and write the section tags.
+    tokens and write the section tags.
-The exception to this method occurs when sections occur in field blocks, such
+    The exception to this method occurs when sections occur in field blocks, such
-as the index. Normally, two section break occur within the index and other
+    as the index. Normally, two section break occur within the index and other
-field-blocks. (If less or more section breaks occurr, this code may not work.)
+    field-blocks. (If less or more section breaks occurr, this code may not work.)
-I want the sections to occurr outside of the index. That is, the index
+    I want the sections to occur outside of the index. That is, the index
-should be nested inside one section tag. After the index is complete, a new
+    should be nested inside one section tag. After the index is complete, a new
-section should begin.
+    section should begin.
-In order to write the sections outside of the field blocks, I have to store
+    In order to write the sections outside of the field blocks, I have to store
-all of the field block as a string. When I ecounter the \sect tag, add one to
+    all of the field block as a string. When I ecounter the \sect tag, add one to
-the section counter, but store this number in a list. Likewise, store the
+    the section counter, but store this number in a list. Likewise, store the
-information describing the section in another list.
+    information describing the section in another list.
-When I reach the end of the field block, choose the first item from the
+    When I reach the end of the field block, choose the first item from the
-numbered list as the section number. Choose the first item in the description
+    numbered list as the section number. Choose the first item in the description
-list as the values and attributes of the section. Enclose the field string
+    list as the values and attributes of the section. Enclose the field string
-between the section tags.
+    between the section tags.
-Start a new section outside the field-block strings. Use the second number in
+    Start a new section outside the field-block strings. Use the second number in
-the list; use the second item in the description list.
+    the list; use the second item in the description list.
-CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
+    CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
-Instead, ingore all section information in a field-block.
+    Instead, ingore all section information in a field-block.
    """
    def __init__(self,
            in_file,
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -137,8 +137,9 @@ def _config(): # {{{
    c.add_opt('LRF_ebook_viewer_options', default=None,
              help=_('Options for the LRF ebook viewer'))
    c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
-        'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB', 'HTMLZ'],
+        'MOBI', 'PRC', 'POBI', 'AZW', 'AZW3', 'HTML', 'FB2', 'PDB', 'RB',
-              help=_('Formats that are viewed using the internal viewer'))
+        'SNB', 'HTMLZ'], help=_(
            'Formats that are viewed using the internal viewer'))
    c.add_opt('column_map', default=ALL_COLUMNS,
              help=_('Columns to be displayed in the book list'))
    c.add_opt('autolaunch_server', default=False, help=_('Automatically launch content server on application startup'))
--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -10,7 +10,7 @@ from functools import partial
 from PyQt4.Qt import (QMenu, Qt, QInputDialog, QToolButton, QDialog,
        QDialogButtonBox, QGridLayout, QLabel, QLineEdit, QIcon, QSize,
-        QCoreApplication)
+        QCoreApplication, pyqtSignal)
 from calibre import isbytestring, sanitize_file_name_unicode
 from calibre.constants import filesystem_encoding, iswindows
@ -142,6 +142,7 @@ class ChooseLibraryAction(InterfaceAction):
    dont_add_to = frozenset(['context-menu-device'])
    action_add_menu = True
    action_menu_clone_qaction = _('Switch/create library...')
    restore_view_state = pyqtSignal(object)
    def genesis(self):
        self.base_text = _('%d books')
@ -206,6 +207,17 @@ class ChooseLibraryAction(InterfaceAction):
        self.maintenance_menu.addAction(ac)
        self.choose_menu.addMenu(self.maintenance_menu)
        self.view_state_map = {}
        self.restore_view_state.connect(self._restore_view_state,
                type=Qt.QueuedConnection)
    @property
    def preserve_state_on_switch(self):
        ans = getattr(self, '_preserve_state_on_switch', None)
        if ans is None:
            self._preserve_state_on_switch = ans = \
                self.gui.library_view.preserve_state(require_selected_ids=False)
        return ans
    def pick_random(self, *args):
        self.gui.iactions['Pick Random Book'].pick_random()
@ -221,6 +233,13 @@ class ChooseLibraryAction(InterfaceAction):
    def library_changed(self, db):
        self.stats.library_used(db)
        self.build_menus()
        state = self.view_state_map.get(self.stats.canonicalize_path(
            db.library_path), None)
        if state is not None:
            self.restore_view_state.emit(state)
    def _restore_view_state(self, state):
        self.preserve_state_on_switch.state = state
    def initialization_complete(self):
        self.library_changed(self.gui.library_view.model().db)
@ -401,8 +420,11 @@ class ChooseLibraryAction(InterfaceAction):
    def switch_requested(self, location):
        if not self.change_library_allowed():
            return
        db = self.gui.library_view.model().db
        current_lib = self.stats.canonicalize_path(db.library_path)
        self.view_state_map[current_lib] = self.preserve_state_on_switch.state
        loc = location.replace('/', os.sep)
-        exists = self.gui.library_view.model().db.exists_at(loc)
+        exists = db.exists_at(loc)
        if not exists:
            d = MovedDialog(self.stats, location, self.gui)
            ret = d.exec_()
--- a/src/calibre/gui2/actions/delete.py
+++ b/src/calibre/gui2/actions/delete.py
@ -6,6 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from functools import partial
 from collections import Counter
 from PyQt4.Qt import QObject, QTimer
@ -117,13 +118,14 @@ class DeleteAction(InterfaceAction):
    def _get_selected_formats(self, msg, ids):
        from calibre.gui2.dialogs.select_formats import SelectFormats
-        fmts = set([])
+        c = Counter()
        db = self.gui.library_view.model().db
        for x in ids:
            fmts_ = db.formats(x, index_is_id=True, verify_formats=False)
            if fmts_:
-                fmts.update(frozenset([x.lower() for x in fmts_.split(',')]))
+                for x in frozenset([x.lower() for x in fmts_.split(',')]):
-        d = SelectFormats(list(sorted(fmts)), msg, parent=self.gui)
+                    c[x] += 1
        d = SelectFormats(c, msg, parent=self.gui)
        if d.exec_() != d.Accepted:
            return None
        return d.selected_formats
--- a/src/calibre/gui2/actions/tweak_epub.py
+++ b/src/calibre/gui2/actions/tweak_epub.py
@ -12,11 +12,11 @@ from PyQt4.Qt import (QDialog, QVBoxLayout, QHBoxLayout, QRadioButton, QFrame,
 from calibre import as_unicode
 from calibre.constants import isosx
-from calibre.gui2 import error_dialog, question_dialog, open_local_file
+from calibre.gui2 import error_dialog, question_dialog, open_local_file, gprefs
 from calibre.gui2.actions import InterfaceAction
 from calibre.ptempfile import (PersistentTemporaryDirectory,
        PersistentTemporaryFile)
-from calibre.utils.config import prefs
+from calibre.utils.config import prefs, tweaks
 class TweakBook(QDialog):
@ -32,11 +32,16 @@ class TweakBook(QDialog):
            index_is_id=True))
        button = self.fmt_choice_buttons[0]
        button_map = {unicode(x.text()):x for x in self.fmt_choice_buttons}
        of = prefs['output_format'].upper()
-        for x in self.fmt_choice_buttons:
+        df = tweaks.get('default_tweak_format', None)
-            if unicode(x.text()) == of:
+        lf = gprefs.get('last_tweak_format', None)
-                button = x
+        if df and df.lower() == 'remember' and lf in button_map:
-                break
+            button = button_map[lf]
        elif df and df.upper() in button_map:
            button = button_map[df.upper()]
        elif of in button_map:
            button = button_map[of]
        button.setChecked(True)
        self.init_state()
@ -148,6 +153,8 @@ class TweakBook(QDialog):
    def explode(self):
        self.show_msg(_('Exploding, please wait...'))
        if len(self.fmt_choice_buttons) > 1:
            gprefs.set('last_tweak_format', self.current_format.upper())
        QTimer.singleShot(5, self.do_explode)
    def ask_question(self, msg):
--- a/src/calibre/gui2/comments_editor.py
+++ b/src/calibre/gui2/comments_editor.py
@ -161,8 +161,14 @@ class EditorWidget(QWebView): # {{{
        self.page().setContentEditable(True)
    def clear_text(self, *args):
        us = self.page().undoStack()
        us.beginMacro('clear all text')
        self.action_select_all.trigger()
-        self.action_cut.trigger()
+        self.action_remove_format.trigger()
        self.exec_command('delete')
        us.endMacro()
        self.set_font_style()
        self.setFocus(Qt.OtherFocusReason)
    def link_clicked(self, url):
        open_url(url)
@ -262,20 +268,22 @@ class EditorWidget(QWebView): # {{{
        def fset(self, val):
            self.setHtml(val)
-            fi = QFontInfo(QApplication.font(self))
+            self.set_font_style()
            f  = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
            fam = unicode(fi.family()).strip().replace('"', '')
            if not fam:
                fam = 'sans-serif'
            style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
            # toList() is needed because PyQt on Debian is old/broken
            for body in self.page().mainFrame().documentElement().findAll('body').toList():
                body.setAttribute('style', style)
            self.page().setContentEditable(True)
        return property(fget=fget, fset=fset)
    def set_font_style(self):
        fi = QFontInfo(QApplication.font(self))
        f  = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
        fam = unicode(fi.family()).strip().replace('"', '')
        if not fam:
            fam = 'sans-serif'
        style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
        # toList() is needed because PyQt on Debian is old/broken
        for body in self.page().mainFrame().documentElement().findAll('body').toList():
            body.setAttribute('style', style)
        self.page().setContentEditable(True)
    def keyPressEvent(self, ev):
        if ev.key() in (Qt.Key_Tab, Qt.Key_Escape, Qt.Key_Backtab):
            ev.ignore()
@ -627,4 +635,6 @@ if __name__ == '__main__':
    w = Editor()
    w.resize(800, 600)
    w.show()
    w.html = '<b>testing</b>'
    app.exec_()
    #print w.html
--- a/src/calibre/gui2/convert/bulk.py
+++ b/src/calibre/gui2/convert/bulk.py
@ -126,7 +126,8 @@ class BulkConfig(Config):
    def setup_output_formats(self, db, preferred_output_format):
        if preferred_output_format:
            preferred_output_format = preferred_output_format.lower()
-        output_formats = sorted(available_output_formats())
+        output_formats = sorted(available_output_formats(),
                key=lambda x:{'EPUB':'!A', 'MOBI':'!B'}.get(x.upper(), x))
        output_formats.remove('oeb')
        preferred_output_format = preferred_output_format if \
            preferred_output_format and preferred_output_format \
--- a/src/calibre/gui2/convert/page_setup.ui
+++ b/src/calibre/gui2/convert/page_setup.ui
@ -109,12 +109,18 @@
        </item>
        <item row="0" column="1">
         <widget class="QDoubleSpinBox" name="opt_margin_left">
          <property name="specialValueText">
           <string>No margin</string>
          </property>
          <property name="suffix">
           <string> pt</string>
          </property>
          <property name="decimals">
           <number>1</number>
          </property>
          <property name="minimum">
           <double>-1.000000000000000</double>
          </property>
          <property name="maximum">
           <double>200.000000000000000</double>
          </property>
@ -132,12 +138,18 @@
        </item>
        <item row="1" column="1">
         <widget class="QDoubleSpinBox" name="opt_margin_top">
          <property name="specialValueText">
           <string>No margin</string>
          </property>
          <property name="suffix">
           <string> pt</string>
          </property>
          <property name="decimals">
           <number>1</number>
          </property>
          <property name="minimum">
           <double>-1.000000000000000</double>
          </property>
          <property name="maximum">
           <double>200.000000000000000</double>
          </property>
@ -155,12 +167,18 @@
        </item>
        <item row="2" column="1">
         <widget class="QDoubleSpinBox" name="opt_margin_right">
          <property name="specialValueText">
           <string>No margin</string>
          </property>
          <property name="suffix">
           <string> pt</string>
          </property>
          <property name="decimals">
           <number>1</number>
          </property>
          <property name="minimum">
           <double>-1.000000000000000</double>
          </property>
          <property name="maximum">
           <double>200.000000000000000</double>
          </property>
@ -178,12 +196,18 @@
        </item>
        <item row="3" column="1">
         <widget class="QDoubleSpinBox" name="opt_margin_bottom">
          <property name="specialValueText">
           <string>No margin</string>
          </property>
          <property name="suffix">
           <string> pt</string>
          </property>
          <property name="decimals">
           <number>1</number>
          </property>
          <property name="minimum">
           <double>-1.000000000000000</double>
          </property>
          <property name="maximum">
           <double>200.000000000000000</double>
          </property>
--- a/src/calibre/gui2/convert/single.py
+++ b/src/calibre/gui2/convert/single.py
@ -242,7 +242,8 @@ class Config(ResizableDialog, Ui_Dialog):
            preferred_output_format):
        if preferred_output_format:
            preferred_output_format = preferred_output_format.lower()
-        output_formats = sorted(available_output_formats())
+        output_formats = sorted(available_output_formats(),
                key=lambda x:{'EPUB':'!A', 'MOBI':'!B'}.get(x.upper(), x))
        output_formats.remove('oeb')
        input_format, input_formats = get_input_format_for_book(db, book_id,
                preferred_input_format)
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@ -349,7 +349,8 @@ class Text(Base):
        return d.exec_()
    def edit(self):
-        if self.getter() != self.initial_val:
+        if (self.getter() != self.initial_val and (self.getter() or
            self.initial_val)):
            d = self._save_dialog(self.parent, _('Values changed'),
                    _('You have changed the values. In order to use this '
                       'editor, you must either discard or apply these '
--- a/src/calibre/gui2/dialogs/search.py
+++ b/src/calibre/gui2/dialogs/search.py
@ -182,7 +182,8 @@ class SearchDialog(QDialog, Ui_Dialog):
        global box_values
        box_values = copy.deepcopy(self.box_last_values)
        if general:
-            ans.append(unicode(self.general_combo.currentText()) + ':"' + general + '"')
+            ans.append(unicode(self.general_combo.currentText()) + ':"' +
                    self.mc + general + '"')
        if ans:
            return ' and '.join(ans)
        return ''
--- a/Show More
+++ b/Show More