Merge from trunk

2025-08-30 23:00:21 -04:00 · 2012-06-09 15:43:22 +02:00 · 2012-06-09 15:43:22 +02:00 · 79ca483efa
commit 79ca483efa
parent 7e525f624b 75c5478973
146 changed files with 48795 additions and 42140 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,69 @@
 #   new recipes:
 #     - title: 
 - version: 0.8.55
  date: 2012-06-08
  new features:
    - title: "Add a new 'Calibre style' interface look that is more modern than the default look. You can select it via Preferences->Look & Feel->User interface style."
    - title: "New, subtler look for the Tag Browser"
    - title: "Driver for Trekstor Pyrus and Pantech Android Tablet"
      tickets: [1008946, 1007929]
    - title: "Conversion pipeline: Handle guide elements with incorrectly cased hrefs. Also handle guide elements of type coverimagestandard and thumbimagestandard."
    - title: "Allow user to customize trekstor plugin to send books into sub directories."
      tickets: [1007646]
    - title: "EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now."
      tickets: [1008810]
    - title: "Save single format to disk: Only show the format available in the selected books."
      tickets: [1007287]
  bug fixes:
    - title: "MOBI Output: When using the insert metadata at start of book option, do not use a table to layout the metadata, as the Kindle Fire crashes when rendering the table."
      tickets: [1002119]
    - title: "Device detection: Fix a bug that could cause device detection to fail completely if devices with certain vendor/product ids are connected."
      tickets: [1009718]
    - title: "MOBI Output: When rasterizing svgs only compute style information when an actual svg image is present. Small speedup when converting large svg-free documents to MOBI."
    - title: "SONY T1 driver: Fix support for collections of books placed on the SD card"
      tickets: [986044]
    - title: "Fix partitioning problems in tag browser with fields that have no name, such as identifiers and formats"
    - title: "Welcome wizard: Preferentially use the kindle email address set as default when more than one such address exists."
      tickets: [1007932 ]
    - title: "Fix regression in 0.8.54 that broke the use of the shortcut Alt+A to select books by the same author"
  improved recipes:
    - Various Polish recipes
    - Vice Magazine
    - EL Mundo Today
    - Haaretz
    - Good Housekeeping
    - El Pais
    - Christian Science Monitor
    - Marketing Magazine
    - Instapaper
  new recipes:
    - title: Various Philippine news sources 
      author: jde
    - title: Natemat.pl and wirtualnemedia.pl 
      author: fenuks
    - title: Rabble.ca
      author: timtoo
 - version: 0.8.54
  date: 2012-05-31
--- a/recipes/banat_news.recipe
+++ b/recipes/banat_news.recipe
@ -0,0 +1,68 @@
 '''
 www.philstar.com
 '''
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class BanatNews(BasicNewsRecipe):
    title                  	= 'Banat News'
    custom_title 	= "Banat News - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                       = '31 May 2012'
    __version__                   = '1.0'
    description            	= 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community.   It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
    language               	= 'ceb'
    publisher              	= 'The Philippine STAR'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.philstar.com/images/logo_Banat.jpg'
    masthead_url        	= 'http://www.philstar.com/images/logo_Banat.jpg'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 10
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= False
    remove_tags = [dict(name='img',  attrs={'id':'Image1'})    #Logo
 	       ,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'})   #Section (Headlines, Nation, Metro, ...)
 	       ,dict(name='a',      attrs={'id':'ControlArticle1_FormView1_hlComments'})   #Comments
 	       ,dict(name='img',  attrs={'src':'images/post-comments.jpg'})   #View Comments
 	       ,dict(name='a',      attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'})  #Zoom
 	]
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                         }
    feeds = [
               ('Balita'      		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101' )
              ,('Opinyon'       		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102' )
              ,('Kalingawan'     		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104' )
              ,('Showbiz'          		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62' )
              ,('Palaro'            		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103' )
              ,('Imong Kapalaran'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105' )
           	 ]
 # process the printer friendly version of article
    def print_version(self, url):
          return url.replace('/Article', '/ArticlePrinterFriendly')
 # obtain title from printer friendly version of article; avoiding  add_toc_thumbnail changing title when article has image
    def populate_article_metadata(self, article, soup, first):
          article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()
--- a/recipes/chr_mon.recipe
+++ b/recipes/chr_mon.recipe
@ -1,152 +1,110 @@
-#!/usr/bin/env  python
+__license__   = 'GPL v3'
-__license__     = 'GPL v3'
+__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
-__author__      = 'Kovid Goyal and Sujata Raman, Lorenzo Vigentini'
+'''
-__copyright__   = '2009, Kovid Goyal and Sujata Raman'
+www.csmonitor.com
-__version__     = 'v1.02'
+'''
 __date__        = '10, January 2010'
 __description__ = 'Providing context and clarity on national and international news, peoples and cultures'
 '''csmonitor.com'''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-
+class CSMonitor(BasicNewsRecipe):
-class ChristianScienceMonitor(BasicNewsRecipe):
+    title                 = 'The Christian Science Monitor - daily'
-
+    __author__            = 'Darko Miletic'
-    __author__    = 'Kovid Goyal'
+    description           = 'The Christian Science Monitor is an international news organization that delivers thoughtful, global coverage via its website, weekly magazine, daily news briefing, and email newsletters.'
-    description   = 'Providing context and clarity on national and international news, peoples and cultures'
+    publisher             = 'The Christian Science Monitor'
-
+    category              = 'news, politics, USA'
-    cover_url      = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
+    oldest_article        = 2
-    title          = 'Christian Science Monitor'
+    max_articles_per_feed = 200
-    publisher      = 'The Christian Science Monitor'
+    no_stylesheets        = True
-    category       = 'News, politics, culture, economy, general interest'
+    encoding              = 'utf8'
    language = 'en'
    encoding = 'utf-8'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article        = 16
    max_articles_per_feed = 20
    use_embedded_content  = False
-    recursion             = 10
+    language              = 'en'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
    extra_css             = """
                               body{font-family: Arial,Tahoma,Verdana,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
                               .head {font-family: Georgia,"Times New Roman",Times,serif}
                               .sByline,.caption{font-size: x-small}
                               .hide{display: none}
                               .sLoc{font-weight: bold}
                               ul{list-style-type: none}
                            """
-    remove_javascript     = True
+    conversion_options = {
-    no_stylesheets = True
+                          'comment'   : description
-    requires_version = (0, 8, 39)
+                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
-    def preprocess_raw_html(self, raw, url):
+    remove_tags = [
-        try:
+                     dict(name=['meta','link','iframe','object','embed'])
-            from html5lib import parse
+                    ,dict(attrs={'class':['podStoryRel','bottom-rel','hide']})
-            root = parse(raw, namespaceHTMLElements=False,
+                    ,dict(attrs={'id':['pgallerycarousel_enlarge','pgallerycarousel_related']})
-                    treebuilder='lxml').getroot()
+                  ]
-            from lxml import etree
+    keep_only_tags = [
-            for tag in root.xpath(
+                        dict(name='h1', attrs={'class':'head'})
-                    '//script|//style|//noscript|//meta|//link|//object'):
+                       ,dict(name='h2', attrs={'class':'subhead'})
-                tag.getparent().remove(tag)
+                       ,dict(attrs={'class':['sByline','podStoryGal','ui-body-header','sBody']})
-            for elem in list(root.iterdescendants(tag=etree.Comment)):
+                     ]
-                elem.getparent().remove(elem)
+    remove_attributes=['xmlns:fb']
            ans = etree.tostring(root, encoding=unicode)
            ans = re.sub('.*<html', '<html', ans, flags=re.DOTALL)
            return ans
        except:
            import traceback
            traceback.print_exc()
            raise
-    def index_to_soup(self, url):
+    feeds = [
-        raw = BasicNewsRecipe.index_to_soup(self, url,
+              (u'USA'           , u'http://rss.csmonitor.com/feeds/usa'        )
-                raw=True).decode('utf-8')
+             ,(u'World'         , u'http://rss.csmonitor.com/feeds/world'      )
-        raw = self.preprocess_raw_html(raw, url)
+             ,(u'Politics'      , u'http://rss.csmonitor.com/feeds/politics'   )
-        return BasicNewsRecipe.index_to_soup(self, raw)
+             ,(u'Business'      , u'http://rss.csmonitor.com/feeds/wam'        )
             ,(u'Commentary'    , u'http://rss.csmonitor.com/feeds/commentary' )
             ,(u'Books'         , u'http://rss.csmonitor.com/feeds/books'      )
             ,(u'Arts'          , u'http://rss.csmonitor.com/feeds/arts'       )
             ,(u'Environment'   , u'http://rss.csmonitor.com/feeds/environment')
             ,(u'Innovation'    , u'http://rss.csmonitor.com/feeds/scitech'    )
             ,(u'Living'        , u'http://rss.csmonitor.com/feeds/living'     )
             ,(u'Science'       , u'http://rss.csmonitor.com/feeds/science'    )
             ,(u'The Culture'   , u'http://rss.csmonitor.com/feeds/theculture' )
             ,(u'The Home Forum', u'http://rss.csmonitor.com/feeds/homeforum'  )
             ,(u'Articles'      , u'http://rss.csmonitor.com/feeds/csarticles' )
            ]
-    def append_page(self, soup, appendtag, position):
+    def append_page(self, soup):
-        nav = soup.find('div',attrs={'class':'navigation'})
+        pager = soup.find('div', attrs={'class':'navigation'})
-        if nav:
+        if pager:
-            pager = nav.findAll('a')
+           nexttag = pager.find(attrs={'id':'next-button'})
-            for part in pager:
+           if nexttag:
-                if 'Next' in part:
+              nurl = 'http://www.csmonitor.com' + nexttag['href']
-                    nexturl = ('http://www.csmonitor.com' +
+              soup2 = self.index_to_soup(nurl)
-                           re.findall(r'href="(.*?)"', str(part))[0])
+              texttag = soup2.find(attrs={'class':'sBody'})
-                    soup2 = self.index_to_soup(nexturl)
+              if texttag:
-                    texttag = soup2.find('div',
+                  appendtag = soup.find(attrs={'class':'sBody'})
-                                 attrs={'class': re.compile('list-article-.*')})
+                  for citem in texttag.findAll(attrs={'class':['podStoryRel','bottom-rel','hide']}):
-                    trash_c = soup2.findAll(attrs={'class': 'list-description'})
+                      citem.extract()
-                    trash_h = soup2.h1
+                  self.append_page(soup2)
-                    for tc in trash_c: tc.extract()
+                  texttag.extract()
-                    trash_h.extract()
+                  pager.extract()
-
+                  appendtag.append(texttag)
                    newpos = len(texttag.contents)
                    self.append_page(soup2, texttag, newpos)
                    texttag.extract()
                    appendtag.insert(position, texttag)
    def preprocess_html(self, soup):
-        PRINT_RE = re.compile(r'/layout/set/print/content/view/print/[0-9]*')
+        self.append_page(soup)
-        html = str(soup)
+        pager = soup.find('div', attrs={'class':'navigation'})
-        try:
+        if pager:
-            print_found = PRINT_RE.findall(html)
+           pager.extract()
-        except Exception:
+        for item in soup.findAll('a'):
-            pass
+            limg = item.find('img')
-        if print_found:
+            if item.string is not None:
-            print_url = 'http://www.csmonitor.com' + print_found[0]
+               str = item.string
-            print_soup = self.index_to_soup(print_url)
+               item.replaceWith(str)
-        else:
+            else:
-            self.append_page(soup, soup.body, 3)
+               if limg:
-
+                  item.name = 'div'
-            trash_a = soup.findAll(attrs={'class': re.compile('navigation.*')})
+                  item.attrs = []
-            trash_b = soup.findAll(attrs={'style': re.compile('.*')})
+               else:
-            trash_d = soup.findAll(attrs={'class': 'sByline'})
+                   str = self.tag_to_string(item)
-            for ta in trash_a: ta.extract()
+                   item.replaceWith(str)
-            for tb in trash_b: tb.extract()
+        for item in soup.findAll('img'):
-            for td in trash_d: td.extract()
+            if 'scorecardresearch' in item['src']:
-
+               item.extract()
-            print_soup = soup
+            else:
-        return print_soup
+                if not item.has_key('alt'):
-
+                   item['alt'] = 'image'
-    extra_css      = '''
+        return soup
                        h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
                        .sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
                        .byline{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
                        .postdate{color:#999999 ;  font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
                        h3{color:#999999 ;  font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
                        .photoCutline{ color:#333333 ; font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
                        .photoCredit{ color:#999999 ; font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
                        #story{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
                        #main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
                        #photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
                        span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
                        p#dateline{color:#444444 ;  font-family:Arial,Helvetica,sans-serif ; font-style:italic;} '''
    feeds          = [(u'Top Stories', u'http://rss.csmonitor.com/feeds/top'),
                        (u'World' , u'http://rss.csmonitor.com/feeds/world'),
                        (u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
                        (u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
                        (u'Money' , u'http://rss.csmonitor.com/feeds/wam'),
                        (u'Learning' , u'http://rss.csmonitor.com/feeds/learning'),
                        (u'Living', u'http://rss.csmonitor.com/feeds/living'),
                        (u'Innovation', u'http://rss.csmonitor.com/feeds/scitech'),
                        (u'Gardening', u'http://rss.csmonitor.com/feeds/gardening'),
                        (u'Environment',u'http://rss.csmonitor.com/feeds/environment'),
                        (u'Arts', u'http://rss.csmonitor.com/feeds/arts'),
                        (u'Books', u'http://rss.csmonitor.com/feeds/books'),
                        (u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
                     ]
    keep_only_tags = [dict(name='div', attrs={'id':'mainColumn'}), ]
    remove_tags    = [
                        dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
                        dict(name=['div','a'], attrs={'class':
                            ['storyToolbar cfx','podStoryRel','spacer3',
                                'divvy spacer7','comment','storyIncludeBottom',
                                'hide', 'podBrdr']}),
                        dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
                        dict(name='form', attrs={'id':[ 'commentform']}) ,
          dict(name='div', attrs={'class': ['ui-comments']})
                    ]
    remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']}),
              dict(name='div', attrs={'class': [re.compile('navigation.*')]}),
              dict(name='div', attrs={'style': [re.compile('.*')]})
                        ]
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -7,7 +7,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    description = 'News as provided by The Daily Mirror -UK'
    __author__ = 'Dave Asbury'
-    # last updated 28/4/12
+    # last updated 8/6/12
    language = 'en_GB'
    #cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@ -28,7 +28,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
                         dict(name='div',attrs={'class' : 'lead-text'}),
                         dict(name='div',attrs={'class' : 'styleGroup clearfix'}),
                         dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}),
-                         dict(name='figure',attrs={'class' : 'clearfix'}),
+                        # dict(name='figure',attrs={'class' : 'clearfix'}),
                         dict(name='div',attrs={'class' :'body '}),
       #dict(attrs={'class' : ['article-attr','byline append-1','published']}),
@ -37,6 +37,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    remove_tags = [
           dict(attrs={'class' : ['article sa-teaser type-opinion','image-gallery','gallery-caption']}),
           dict(attrs={'class' : 'comment'}),
           dict(name='title'),
           dict(name='ul',attrs={'class' :  'clearfix breadcrumbs '}),
@ -89,6 +90,3 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
        #cover_url = cov2
        #cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
        return cover_url
--- a/recipes/el_mundo_today.recipe
+++ b/recipes/el_mundo_today.recipe
@ -1,3 +1,4 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElMundoTodayRecipe(BasicNewsRecipe):
@ -7,11 +8,32 @@ class ElMundoTodayRecipe(BasicNewsRecipe):
    category = 'Noticias, humor'
    cover_url = 'http://www.elmundotoday.com/wp-content/themes/EarthlyTouch/images/logo.png'
    oldest_article = 30
-    max_articles_per_feed = 30
+    max_articles_per_feed = 60
-    auto_cleanup = True
+    auto_cleanup = False
    no_stylesheets = True
    remove_javascript = True
    language = 'es'
-    use_embedded_content  = True
+    use_embedded_content  = False
    preprocess_regexps = [
        (re.compile(r'</title>.*<!--Begin Article Single-->', re.DOTALL),
        lambda match: '</title><body>'),
        #(re.compile(r'^\t{5}<a href.*Permanent Link to ">$'), lambda match: ''),
        #(re.compile(r'\t{5}</a>$'), lambda match: ''),
        (re.compile(r'<div class="social4i".*</body>', re.DOTALL),
        lambda match: '</body>'),
    ]
    keep_only_tags = [
        dict(name='div', attrs={'class':'post-wrapper'})
    ]
    remove_attributes = [ 'href', 'title', 'alt' ]
    extra_css = '''
        .antetitulo{font-variant:small-caps; font-weight:bold} .articleinfo{font-size:small}
        img{margin-bottom:0.4em; display:block; margin-left:auto; margin-right:auto}
    '''
    feeds = [('El Mundo Today', 'http://www.elmundotoday.com/feed/')]
--- a/recipes/elektroda_pl.recipe
+++ b/recipes/elektroda_pl.recipe
@ -10,6 +10,7 @@ class Elektroda(BasicNewsRecipe):
    category       = 'electronics'
    language       = 'pl'
    max_articles_per_feed = 100
    no_stylesheets= True
    remove_tags_before=dict(name='span', attrs={'class':'postbody'})
    remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
    remove_tags=[dict(name='a', attrs={'href':'#top'})]
--- a/recipes/elpais_impreso.recipe
+++ b/recipes/elpais_impreso.recipe
@ -1,5 +1,6 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.elpais.com
 '''
@ -7,23 +8,24 @@ www.elpais.com
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElPais_RSS(BasicNewsRecipe):
-    title                 = 'El Pais'
+    title                 = u'El País'
    __author__            = 'Darko Miletic'
-    description           = 'el periodico global en Castellano'
+    description           = u'Noticias de última hora sobre la actualidad en España y el mundo: política, economía, deportes, cultura, sociedad, tecnología, gente, opinión, viajes, moda, televisión, los blogs y las firmas de EL PAÍS. Además especiales, vídeos, fotos, audios, gráficos, entrevistas, promociones y todos los servicios de EL PAÍS.'
    publisher             = 'EDICIONES EL PAIS, S.L.'
    category              = 'news, politics, finances, world, spain'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
-    encoding              = 'cp1252'
+    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
-    masthead_url          = 'http://www.elpais.com/im/tit_logo.gif'
+    masthead_url          = 'http://ep01.epimg.net/iconos/v1.x/v1.0/logos/cabecera_portada.png'
    extra_css             = """
-                               body{font-family: Georgia,"Times New Roman",Times,serif }
+                               h1{font-family: Georgia,"Times New Roman",Times,serif }
-                               h3{font-family: Arial,Helvetica,sans-serif}
+                               #subtitulo_noticia, .firma, .figcaption{font-size: small}
                               body{font-family: Arial,Helvetica,Garuda,sans-serif}
                               img{margin-bottom: 0.4em; display:block}
                            """
@ -34,49 +36,61 @@ class ElPais_RSS(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    keep_only_tags    = [dict(attrs={'class':['cabecera_noticia estirar','cabecera_noticia','','contenido_noticia']})]
+    keep_only_tags    = [
-    remove_tags       = [
+                           dict(attrs={'id':['titulo_noticia','subtitulo_noticia']})
-                           dict(name=['meta','link','base','iframe','embed','object'])
+                          ,dict(attrs={'class':['firma','columna_texto','entrevista_p_r']})
-                          ,dict(attrs={'class':['info_complementa','estructura_2col_der','votos estirar','votos']})
+                        ]
-                          ,dict(attrs={'id':'utilidades'})
+    remove_tags       = [
                          dict(name=['meta','link','base','iframe','embed','object'])
                         ,dict(attrs={'class':'disposicion_vertical'})
                        ]
    remove_tags_after = dict(attrs={'id':'utilidades'})
    remove_attributes = ['lang','border','width','height']
    feeds = [
-              (u'Lo ultimo'            , u'http://www.elpais.com/rss/feed.html?feedId=17046')
+              (u'Lo ultimo'            , u'http://ep00.epimg.net/rss/tags/ultimas_noticias.xml')
-             ,(u'America Latina'       , u'http://www.elpais.com/rss/feed.html?feedId=17041')
+             ,(u'America Latina'       , u'http://elpais.com/tag/rss/latinoamerica/a/'         )
-             ,(u'Mexico'               , u'http://www.elpais.com/rss/feed.html?feedId=17042')
+             ,(u'Mexico'               , u'http://elpais.com/tag/rss/mexico/a/'                )
-             ,(u'Europa'               , u'http://www.elpais.com/rss/feed.html?feedId=17043')
+             ,(u'Europa'               , u'http://elpais.com/tag/rss/europa/a/'                )
-             ,(u'Estados Unidos'       , u'http://www.elpais.com/rss/feed.html?feedId=17044')
+             ,(u'Estados Unidos'       , u'http://elpais.com/tag/rss/estados_unidos/a/'        )
-             ,(u'Oriente proximo'      , u'http://www.elpais.com/rss/feed.html?feedId=17045')
+             ,(u'Oriente proximo'      , u'http://elpais.com/tag/rss/oriente_proximo/a/'       )
-             ,(u'Espana'               , u'http://www.elpais.com/rss/feed.html?feedId=1002' )
+             ,(u'Andalucia'            , u'http://ep00.epimg.net/rss/ccaa/andalucia.xml'       )
-             ,(u'Andalucia'            , u'http://www.elpais.com/rss/feed.html?feedId=17057')
+             ,(u'Catalunia'            , u'http://ep00.epimg.net/rss/ccaa/catalunya.xml'       )
-             ,(u'Catalunia'            , u'http://www.elpais.com/rss/feed.html?feedId=17059')
+             ,(u'Comunidad Valenciana' , u'http://ep00.epimg.net/rss/ccaa/valencia.xml'        )
-             ,(u'Comunidad Valenciana' , u'http://www.elpais.com/rss/feed.html?feedId=17061')
+             ,(u'Madrid'               , u'http://ep00.epimg.net/rss/ccaa/madrid.xml'          )
-             ,(u'Madrid'               , u'http://www.elpais.com/rss/feed.html?feedId=1016' )
+             ,(u'Pais Vasco'           , u'http://ep00.epimg.net/rss/ccaa/paisvasco.xml'       )
-             ,(u'Pais Vasco'           , u'http://www.elpais.com/rss/feed.html?feedId=17062')
+             ,(u'Galicia'              , u'http://ep00.epimg.net/rss/ccaa/galicia.xml'         )
-             ,(u'Galicia'              , u'http://www.elpais.com/rss/feed.html?feedId=17063')
+             ,(u'Sociedad'             , u'http://ep00.epimg.net/rss/sociedad/portada.xml'     )
-             ,(u'Opinion'              , u'http://www.elpais.com/rss/feed.html?feedId=1003' )
+             ,(u'Deportes'             , u'http://ep00.epimg.net/rss/deportes/portada.xml'     )
-             ,(u'Sociedad'             , u'http://www.elpais.com/rss/feed.html?feedId=1004' )
+             ,(u'Cultura'              , u'http://ep00.epimg.net/rss/cultura/portada.xml'      )
-             ,(u'Deportes'             , u'http://www.elpais.com/rss/feed.html?feedId=1007' )
+             ,(u'Cine'                 , u'http://elpais.com/tag/rss/cine/a/'                  )
-             ,(u'Cultura'              , u'http://www.elpais.com/rss/feed.html?feedId=1008' )
+             ,(u'Economía'             , u'http://elpais.com/tag/rss/economia/a/'              )
-             ,(u'Cine'                 , u'http://www.elpais.com/rss/feed.html?feedId=17052')
+             ,(u'Literatura'           , u'http://elpais.com/tag/rss/libros/a/'                )
-             ,(u'Literatura'           , u'http://www.elpais.com/rss/feed.html?feedId=17053')
+             ,(u'Musica'               , u'http://elpais.com/tag/rss/musica/a/'                )
-             ,(u'Musica'               , u'http://www.elpais.com/rss/feed.html?feedId=17051')
+             ,(u'Arte'                 , u'http://elpais.com/tag/rss/arte/a/'                  )
-             ,(u'Arte'                 , u'http://www.elpais.com/rss/feed.html?feedId=17060')
+             ,(u'Medio Ambiente'       , u'http://elpais.com/tag/rss/medio_ambiente/a/'        )
-             ,(u'Tecnologia'           , u'http://www.elpais.com/rss/feed.html?feedId=1005' )
+             ,(u'Tecnologia'           , u'http://ep01.epimg.net/rss/tecnologia/portada.xml'   )
-             ,(u'Economia'             , u'http://www.elpais.com/rss/feed.html?feedId=1006' )
+             ,(u'Ciencia'              , u'http://ep00.epimg.net/rss/tags/c_ciencia.xml'       )
-             ,(u'Ciencia'              , u'http://www.elpais.com/rss/feed.html?feedId=17068')
+             ,(u'Salud'                , u'http://elpais.com/tag/rss/salud/a/'                 )
-             ,(u'Salud'                , u'http://www.elpais.com/rss/feed.html?feedId=17074')
+             ,(u'Ocio'                 , u'http://elpais.com/tag/rss/ocio/a/'                  )
-             ,(u'Ocio'                 , u'http://www.elpais.com/rss/feed.html?feedId=17075')
+             ,(u'Justicia y Leyes'     , u'http://elpais.com/tag/rss/justicia/a/'              )
-             ,(u'Justicia y Leyes'     , u'http://www.elpais.com/rss/feed.html?feedId=17069')
+             ,(u'Guerras y conflictos' , u'http://elpais.com/tag/rss/conflictos/a/'            )
-             ,(u'Guerras y conflictos' , u'http://www.elpais.com/rss/feed.html?feedId=17070')
+             ,(u'Politica'             , u'http://ep00.epimg.net/rss/politica/portada.xml'     )
-             ,(u'Politica'             , u'http://www.elpais.com/rss/feed.html?feedId=17073')
+             ,(u'Opinion'              , u'http://ep01.epimg.net/rss/politica/opinion.xml'     )
            ]
-    def print_version(self, url):
+    def get_article_url(self, article):
-        return url + '?print=1'
+        url = BasicNewsRecipe.get_article_url(self, article)
        if url and (not('/album/' in url) and not('/futbol/partido/' in url)):
            return url
        self.log('Skipping non-article', url)
        return None
    def get_cover_url(self):
        soup = self.index_to_soup('http://elpais.com/')
        for image in soup.findAll('img'):
           if image['src'].endswith('elpaisTodayMiddle.jpg'):
              sstr = image['src']
              return sstr.replace('elpaisTodayMiddle.jpg', 'elpaisToday.jpg')
        return None
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/gameplay_pl.recipe
+++ b/recipes/gameplay_pl.recipe
@ -12,8 +12,8 @@ class Gameplay_pl(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_javascript= True
    no_stylesheets= True
-    keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
+    keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news', 'news_container']})]
-    remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
+    remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi', 'news_tagi']}), dict(attrs={'usemap':'#map'}), dict(name='a', attrs={'class':['pin-it-button', 'twitter-share-button']})]
    feeds          = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
    def image_url_processor(self, baseurl, url):
--- a/recipes/good_house_keeping.recipe
+++ b/recipes/good_house_keeping.recipe
@ -8,12 +8,17 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets         = True
    use_embedded_content   = False
    #auto_cleanup = True
    remove_javascript      = True
    def print_version(self,url):
-          segments = url.split('/')
+       if '/tips-for-making-desserts?' in url:
-          printURL = '/'.join(segments[0:3]) + '/print-this/' + '/'.join(segments[4:])
+           return None
-          return printURL
+       segments = url.split('/')
       segments[-1] = segments[-1].split('?')[0]
       segments[-1] +='?page=all'
       printURL = '/'.join(segments[0:3]) + '/print-this/' + segments[-1]
       return printURL
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
@ -22,10 +27,19 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
               alink.replaceWith(tstr)
        return soup
-    feeds  = [	(u'Recipes & Entertaining', u'http://www.goodhousekeeping.com/food/food-rss/?src=rss'),
+
-	(u'Home & House', u'http://www.goodhousekeeping.com/home/home-rss/?src=rss'),
+    #feeds          = [
-	(u'Diet & Health', u'http://www.goodhousekeeping.com/health/health-rss/?src=rss'),
+#(u'Food and Recipes', u'http://www.goodhousekeeping.com/rss/recipes/'),
-	(u'Beauty & Style', u'http://www.goodhousekeeping.com/beauty/beauty-rss/?src=rss'),
+#]
-	(u'Family & Pets', u'http://www.goodhousekeeping.com/family/family-rss/?src=rss'),
+
-	(u'Saving Money', u'http://www.goodhousekeeping.com/money/money-rss/?src=rss'),
+
-	]
+    feeds          = [
 (u'Food and Recipes', u'http://www.goodhousekeeping.com/rss/recipes/'),
 (u'Home and Organizing', u'http://www.goodhousekeeping.com/rss/home/'),
 (u'Diet and Health', u'http://www.goodhousekeeping.com/rss/health/'),
 (u'Beauty and Anti-Aging', u'http://www.goodhousekeeping.com/rss/beauty/'),
 (u'Family and Relationships', u'http://www.goodhousekeeping.com/rss/family/'),
 (u'Holidays', u'http://www.goodhousekeeping.com/rss/holidays/'),
 (u'In the Test Kitchen', 'http://www.goodhousekeeping.com/rss/test-kitchen-blog/'),
 ]
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@ -12,13 +12,16 @@ class Gram_pl(BasicNewsRecipe):
    no_stylesheets= True
    extra_css = 'h2 {font-style: italic;  font-size:20px;} .picbox div {float: left;}'
    cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
-    remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
+    remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info', 'entry-footer clearfix']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button', 'entry-comment-counter', 'snap_nopreview sharing robots-nocontent']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
-    keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
+    keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']}), dict(name='article')]
    feeds          = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
-	      (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
+	      (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'),
                (u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'),
                #(u'Kolektyw- Moto Games', u'http://www.motogames.gram.pl/news.rss')
                ]
-    def parse_feeds (self): 
+    def parse_feeds (self):
-      feeds = BasicNewsRecipe.parse_feeds(self) 
+      feeds = BasicNewsRecipe.parse_feeds(self)
      for feed in feeds:
        for article in feed.articles[:]:
          if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
@ -56,4 +59,4 @@ class Gram_pl(BasicNewsRecipe):
        for a in soup('a'):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
                a['href']=self.index + a['href']
-        return soup
+        return soup
--- a/recipes/greenlinux_pl.recipe
+++ b/recipes/greenlinux_pl.recipe
@ -1,13 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class GreenLinux(BasicNewsRecipe):
    title          = u'GreenLinux.pl'
    __author__  = 'fenuks'
    category       = 'IT'
    language       = 'pl'
    cover_url = 'http://lh5.ggpht.com/_xd_6Y9kXhEc/S8tjyqlfhfI/AAAAAAAAAYU/zFNTp07ZQko/top.png'
    oldest_article = 15
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Newsy', u'http://feeds.feedburner.com/greenlinux')]
--- a/recipes/haaretz_en.recipe
+++ b/recipes/haaretz_en.recipe
@ -1,16 +1,15 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.haaretz.com
 '''
 import re
-from calibre import strftime
+import urllib
 from time import gmtime
 from calibre.web.feeds.news import BasicNewsRecipe
-class HaaretzPrint_en(BasicNewsRecipe):
+class Haaretz_en(BasicNewsRecipe):
-    title                 = 'Haaretz - print edition'
+    title                 = 'Haaretz'
    __author__            = 'Darko Miletic'
    description           = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
    publisher             = 'Haaretz'
@ -21,10 +20,16 @@ class HaaretzPrint_en(BasicNewsRecipe):
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en_IL'
    needs_subscription    = True
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    PREFIX                = 'http://www.haaretz.com'
-    masthead_url          = PREFIX + '/images/logos/logoGrey.gif'
+    masthead_url          = PREFIX + '/images/logos/HaaretzLogo.gif'
-    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '
+    extra_css             = """
                                body{font-family: Verdana,Arial,Helvetica,sans-serif }
                                h1, .articleBody {font-family: Georgia, serif}
                                .authorBar {font-size: small}
                            """
    preprocess_regexps = [(re.compile(r'</body>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</body></html>')]
@ -44,53 +49,42 @@ class HaaretzPrint_en(BasicNewsRecipe):
    feeds = [
-              (u'News'          , PREFIX + u'/print-edition/news'         )
+              (u'Headlines'             , 'http://feeds.feedburner.com/haaretz/LBao'        )
-             ,(u'Opinion'       , PREFIX + u'/print-edition/opinion'      )
+             ,(u'Opinion'               , 'http://feeds.feedburner.com/haaretz/opinions'    )
-             ,(u'Business'      , PREFIX + u'/print-edition/business'     )
+             ,(u'Defence and diplomacy' , 'http://feeds.feedburner.com/DefenseAndDiplomacy' )
-             ,(u'Real estate'   , PREFIX + u'/print-edition/real-estate'  )
+             ,(u'National'              , 'http://feeds.feedburner.com/haaretz/National'    )
-             ,(u'Sports'        , PREFIX + u'/print-edition/sports'       )
+             ,(u'International'         , 'http://feeds.feedburner.com/InternationalRss'    )
-             ,(u'Travel'        , PREFIX + u'/print-edition/travel'       )
+             ,(u'Jewish World'          , 'http://feeds.feedburner.com/JewishWorldRss'      )
-             ,(u'Books'         , PREFIX + u'/print-edition/books'        )
+             ,(u'Business'              , 'http://feeds.feedburner.com/BusinessPrintRss'    )
-             ,(u'Food & Wine'   , PREFIX + u'/print-edition/food-wine'    )
+             ,(u'Real Estate'           , 'http://feeds.feedburner.com/RealEstatePrintRss'  )
-             ,(u'Arts & Leisure', PREFIX + u'/print-edition/arts-leisure' )
+             ,(u'Features'              , 'http://feeds.feedburner.com/FeaturesPrintRss'    )
-             ,(u'Features'      , PREFIX + u'/print-edition/features'     )
+             ,(u'Arts & Leisure'        , 'http://feeds.feedburner.com/ArtsAndLeisureRss'   )
             ,(u'Books'                 , 'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false')
             ,(u'Food & Wine'           , 'http://feeds.feedburner.com/FoodAndWinePrintRss' )
             ,(u'Sports'                , 'http://feeds.feedburner.com/haaretz/Sport'       )
            ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open(self.PREFIX)
        if self.username is not None and self.password is not None:
            data = urllib.urlencode({ 'cb':'parseEngReply'
                                     ,'newsso':'true'
                                     ,'fromlogin':'true'
                                     ,'layer':'eng_login'
                                     ,'userName':self.username
                                     ,'password':self.password
                                   })
            br.open('https://sso.haaretz.com/sso/sso/signIn',data)
        return br
    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        return self.browser.open_novisit(url).geturl()
    def print_version(self, url):
        article = url.rpartition('/')[2]
        return 'http://www.haaretz.com/misc/article-print-page/' + article
-    def parse_index(self):
+    def preprocess_raw_html(self, raw, url):
-        totalfeeds = []
+       return '<html><head>'+raw[raw.find('</head>'):]
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll(attrs={'class':'text'}):
                sp = item.find('span',attrs={'class':'h3 font-weight-normal'})
                desc = item.find('p')
                description = ''
                if sp:
                    if desc:
                       description = self.tag_to_string(desc)
                    link        = sp.a
                    url         = self.PREFIX + link['href']
                    title       = self.tag_to_string(link)
                    times        = strftime('%a, %d %b %Y %H:%M:%S +0000',gmtime())
                    articles.append({
                                          'title'      :title
                                         ,'date'       :times
                                         ,'url'        :url
                                         ,'description':description
                                        })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/historia_pl.recipe
+++ b/recipes/historia_pl.recipe
@ -8,15 +8,21 @@ class Historia_org_pl(BasicNewsRecipe):
    category       = 'history'
    language       = 'pl'
    oldest_article = 8
-    remove_empty_feeds=True
+    remove_empty_feeds= True
    no_stylesheets = True
    use_embedded_content = True
    max_articles_per_feed = 100
-    feeds          = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=rss'),
+    feeds          = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=atom'),
-		(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=rss'),
+		(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=atom'),
-		(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=rss'),
+		(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=atom'),
-		(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=rss'),
+		(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=atom'),
-		(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=rss'),
+		(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=atom'),
-		(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=rss'),
+		(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=atom'),
-		(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=rss'),
+		(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=atom'),
-		(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=rss'),
+		(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=atom'),
-		(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=rss')]
+		(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=atom')]
    def print_version(self, url):
        return url + '?tmpl=component&print=1&layout=default&page='
--- a/recipes/icons/natemat_pl.png
+++ b/recipes/icons/natemat_pl.png
--- a/recipes/icons/wirtualnemedia_pl.png
+++ b/recipes/icons/wirtualnemedia_pl.png
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -15,6 +15,10 @@ class TheIndependentNew(BasicNewsRecipe):
    #Flag to enable/disable image fetching (not business)
    _FETCH_IMAGES = True
    #Set max gallery images here (respects _FETCH_IMAGES)
    # -1 for infinite
    _MAX_GALLERY_IMAGES = -1
     #used for converting rating to stars
    _STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png'
@ -41,6 +45,7 @@ class TheIndependentNew(BasicNewsRecipe):
                               dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
                               dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
                               dict(name='img',attrs={'alt' : ['view gallery']}),
                               dict(attrs={'style' : re.compile('.*')}),
                             ]
@ -119,15 +124,15 @@ class TheIndependentNew(BasicNewsRecipe):
                if len(para.contents) and isinstance(para.contents[0],NavigableString) \
                and para.contents[0] == 'ADVERTORIAL FEATURE':
                    return None
-        
+
-        # remove Suggested Topics           
+        # remove Suggested Topics
        items_to_extract = []
-        
+
        for item in soup.findAll('div',attrs={'class' : re.compile('.*RelatedArtTag.*')}):
            items_to_extract.append(item)
-              
+
        for item in items_to_extract:
-            item.extract()             
+            item.extract()
        items_to_extract = []
        slideshow_elements = []
@ -171,25 +176,43 @@ class TheIndependentNew(BasicNewsRecipe):
                for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
                    if item.img is not None:
                        #use full size image
                        images = []
                        img = item.findNext('img')
-                        img['src'] = item['href']
+                        if  not '?action=gallery' in item['href']:
-
+                            img['src'] = item['href']
                        #insert caption if available
                        if img.get('title') and (len(img['title']) > 1):
                            tag = Tag(soup,'h3')
-                            text = NavigableString(img['title'])
+                            text = ''
                            try:
                                text = img['data-title']
                            except:
                                pass
                            if img.get('title') and (len(img['title']) > 1):
                                text = NavigableString(img['title'])
                            tag.insert(0,text)
-
+                            images.append((img, tag))
-                            #picture before text
+                        else:
                            gallery_images, remove_link = self._get_gallery_images(item['href'])
                            images = images + gallery_images
                            if remove_link:
                                gal_link = soup.find('a',attrs={'id' : 'view-gallery'})
                                if gal_link:
                                    gal_link.extract()
                            img.extract()
-                            item.insert(0,img)
+                        for (img, title) in images:
-                            item.insert(1,tag)
+                            #insert caption if available
                            if title:
                                #picture before text
                                img.extract()
                                item.insert(0,img)
                                item.insert(1,title)
-                        # remove link
+                            # remove link
-                        item.name = "div"
+                            item.name = "div"
-                        item["class"]='image'
+                            item["class"]='image'
-                        del item["href"]
+                            del item["href"]
        #remove empty subtitles
@ -317,13 +340,51 @@ class TheIndependentNew(BasicNewsRecipe):
        for item in items_to_extract:
            item.extract()
-        
+
-        # nickredding's fix for non-justified text   
+        # nickredding's fix for non-justified text
        for ptag in soup.findAll('p',attrs={'align':'left'}):
            del(ptag['align'])
-                        
+
        return soup
    def _get_gallery_images(self,url):
        gallery_soup = self.index_to_soup(url)
        images = []
        remove_link = True
        total = 1
        try:
            counter = gallery_soup.find('div',attrs={'id' : ['counter']})
            total = counter.contents[0].split('/')
            total = int(total[1].rstrip())
        except:
            total = 1
        if self._MAX_GALLERY_IMAGES >= 0 and total > self._MAX_GALLERY_IMAGES:
            total = self._MAX_GALLERY_IMAGES
            remove_link = False
        for i in range(1, total +1):
            image, title = self._get_image_from_gallery(gallery_soup)
            if image:
                images.append((image,title))
            next = url + '&ino=' + str(i + 1)
            gallery_soup = self.index_to_soup(next)
        images.reverse()
        return images, remove_link
    def _get_image_from_gallery(self,soup):
        try:
            container = soup.find('div',attrs={'id' : ['main-image']})
            image = container.find('img')
            if image:
                title = soup.find('div',attrs={'id' : ['image-title']})
            return image, title
        except:
            print 'error fetching gallery image'
            return None
    def _recurisvely_linearise_tag_tree(
        self,
        item,
--- a/recipes/la_gazetta_del_mezzogiorno.recipe
+++ b/recipes/la_gazetta_del_mezzogiorno.recipe
@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1324038402(BasicNewsRecipe):
    title          = u'La Gazzetta del Mezzogiorno'
    language = 'it'
    __author__      = 'faber1971'
    description = 'Italian regional magazine - Apulia'
    oldest_article = 1
--- a/recipes/malaya_business_insight.recipe
+++ b/recipes/malaya_business_insight.recipe
@ -0,0 +1,77 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import time
 class MalayaBusinessInsight(BasicNewsRecipe):
    title          		= u'Malaya Business Insight'
    custom_title 	= "Malaya Business Insight - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                	= '07 June 2012'
    __version__            	 = '1.2'
    description            	= "The Malaya Business Insight is a broadsheet newspaper in the Philippines.  The newspaper's name was derived from the Filipino word that means 'freedom'."
    language               	= 'en_PH'
    publisher              	= 'Malaya Business Insight'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.malaya.com.ph/templates/ja_teline_iv/images/logo.png'
    masthead_url        	= 'http://www.malaya.com.ph/templates/ja_teline_iv/images/logo.png'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 20
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= False
    keep_only_tags = [
 	        dict(name='div', attrs={'id':'ja-main'})
 	       ]
    remove_tags = [
 	        dict(name='a', attrs={'class':'ja-back-btn'})
 	       ,dict(name='li', attrs={'class':'print-icon'})
 	       ,dict(name='li', attrs={'class':'email-icon'})
 	       ,dict(name='p', attrs={'class':'dnn'})
 	       ,dict(name='span', attrs={'class':'breadcrumbs pathway'})
 	       ,dict(name='dt', attrs={'class':'article-info-term'})
 	       ,dict(name='div', attrs={'class':'ja-articles-mainwrap'})
 	       ,dict(name='h1', attrs={'class':'componentheading'})
 	       ,dict(name='div', attrs={'id':'ja-content-mass-top'})
 	       ]
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                            }
    feeds          = [
 	(u'Business', u'http://www.malaya.com.ph/index.php/business?format=feed&amp;type=rss')
 	, (u'Market', u'http://www.malaya.com.ph/index.php/business/market?format=feed&amp;type=rss')
 	, (u'Shipping and Transportation', u'http://www.malaya.com.ph/index.php/business/shipping-and-transportation?format=feed&amp;type=rss')
 	, (u'Business Incidental', u'http://www.malaya.com.ph/index.php/business/business-incidental?format=feed&amp;type=rss')
 	, (u'Banking and Finance', u'http://www.malaya.com.ph/index.php/special-features/banking-and-finance?format=feed&amp;type=rss')
 	, (u'Motoring', u'http://www.malaya.com.ph/index.php/special-features/motoring?format=feed&amp;type=rss')
 	, (u'Info Tech - Telecoms', u'http://www.malaya.com.ph/index.php/special-features/infotech-telecoms?format=feed&amp;type=rss')
 	, (u'Property', u'http://www.malaya.com.ph/index.php/special-features/property?format=feed&amp;type=rss')
 	, (u'Environment', u'http://www.malaya.com.ph/index.php/special-features/environment?format=feed&amp;type=rss')
 	, (u'Agriculture', u'http://www.malaya.com.ph/index.php/special-features/agriculture?format=feed&amp;type=rss')
 	, (u'News - National', u'http://www.malaya.com.ph/index.php/news/nation?format=feed&amp;type=rss')
 	, (u'News - International', u'http://www.malaya.com.ph/index.php/news/international?format=feed&amp;type=rss')
 	, (u'Sports', u'http://www.malaya.com.ph/index.php/sports?format=feed&amp;type=rss')
 	, (u'Entertainment', u'http://www.malaya.com.ph/index.php/entertainment?format=feed&amp;type=rss')
 	, (u'Living', u'http://www.malaya.com.ph/index.php/living?format=feed&amp;type=rss')
 	, (u'Opinion', u'http://www.malaya.com.ph/index.php/opinion?format=feed&amp;type=rss')
 	]
--- a/recipes/manila_standard_today.recipe
+++ b/recipes/manila_standard_today.recipe
@ -0,0 +1,54 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import time
 class ManilaStandardToday(BasicNewsRecipe):
    title          		= u'Manila Standard Today'
    custom_title 	= "Manila Standard Today - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                	= '06 June 2012'
    __version__            	 = '1.0'
    description            	= 'The Manila Standard Today is the fourth-largest broadsheet newspaper in the Philippines as of 2006. Initially established as the Manila Standard, it merged with another newspaper of record, Today, on March 6, 2005. It was the first newspaper merger in the Philippines.'
    language               	= 'en_PH'
    publisher              	= 'Manila Standard Today'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.manilastandardtoday.com/wp-content/uploads/Manila-Standard-Today-June-06-12.jpg'
    masthead_url        	= 'http://www.manilastandardtoday.com/wp-content/uploads/Manila-Standard-Today-June-06-12.jpg'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 20
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= False
    keep_only_tags = [
 	        dict(name='div', attrs={'id':'main'})
 	       ]
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                            }
    feeds          = [
 	(u'Headlines', u'http://news.manilastandardtoday.com/feed/')
 	, (u'Nation', u'http://news.manilastandardtoday.com/archives/nation/feed/')
 	, (u'Business', u'http://business.manilastandardtoday.com/feed/')
 	, (u'Metro', u'http://news.manilastandardtoday.com/archives/metro/feed/')
 	, (u'Sports', u'http://sports.manilastandardtoday.com/feed/')
 	, (u'Entertainment', u'http://entertainment.manilastandardtoday.com/feed/')
 	, (u'Opinion', u'http://opinion.manilastandardtoday.com/feed/')
 	, (u'Lifestyle', u'http://lifestyle.manilastandardtoday.com/feed/')
 	]
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -4,6 +4,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    description = 'News as provide by The Metro -UK'
    #timefmt = ''
    __author__ = 'Dave Asbury'
    #last update 9/6/12
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
    #no_stylesheets = True
    oldest_article = 1
@ -11,7 +12,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    remove_empty_feeds = True
    remove_javascript     = True
    auto_cleanup = True
-
+    encoding = 'UTF-8'
    language = 'en_GB'
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
--- a/recipes/naczytniki.recipe
+++ b/recipes/naczytniki.recipe
@ -1,18 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class naczytniki(BasicNewsRecipe):
    title          = u'naczytniki.pl'
    __author__        = 'fenuks'
    masthead_url= 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
    cover_url      = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
    language       = 'pl'
    description ='everything about e-readers'
    category='e-readers'
    no_stylesheets=True
    use_embedded_content=False
    oldest_article = 7
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
    keep_only_tags=[dict(name='div', attrs={'class':'post'})]
    remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
    feeds          = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]
--- a/recipes/natemat_pl.recipe
+++ b/recipes/natemat_pl.recipe
@ -0,0 +1,15 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class NaTemat(BasicNewsRecipe):
    title          = u'NaTemat.pl'
    oldest_article = 7
    max_articles_per_feed = 100
    __author__        = 'fenuks'
    description   = u'informacje, komentarze, opinie'
    category       = 'news'
    language       = 'pl'
    cover_url= 'http://blog.plona.pl/wp-content/uploads/2012/05/natemat.png'
    no_stylesheets = True
    keep_only_tags= [dict(id='main')]
    remove_tags= [dict(attrs={'class':['button', 'block-inside style_default', 'article-related']})]
    feeds          = [(u'Artyku\u0142y', u'http://natemat.pl/rss/wszystkie')]
--- a/recipes/nme.recipe
+++ b/recipes/nme.recipe
@ -1,23 +1,47 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+from calibre import browser
 class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    title          = u'New Musical Express Magazine'
-    __author__ = "scissors"
+    description = 'Author D.Asbury. UK Rock & Pop Mag. '
-    language = 'en'
+    __author__ = 'Dave Asbury'
    # last updated 9/6/12
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    oldest_article = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed = 20
-    cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
+    #auto_cleanup = True
    language = 'en_GB'
    def get_cover_url(self):
          soup = self.index_to_soup('http://www.magazinesdirect.com/categories/mens/tv-and-music/')
          cov = soup.find(attrs={'title' : 'NME magazine subscriptions'})
          cov2 = 'http://www.magazinesdirect.com'+cov['src']
          print '***cov =  ',cov2,' ***'
          cover_url = str(cov2)
         # print '**** Cov url =*', cover_url,'***'
          #print '**** Cov url =*','http://www.magazinesdirect.com/article_images/articledir_3138/1569221/1_largelisting.jpg','***'
          br = browser()
          br.set_handle_redirect(False)
          try:
               br.open_novisit(cov2)
               cover_url = str(cov2)
          except:
                    cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
          return cover_url
    masthead_url   = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
    remove_tags = [
-    dict( attrs={'class':'clear_icons'}),
+        dict( attrs={'class':'clear_icons'}),
        dict( attrs={'class':'share_links'}),
                        dict( attrs={'id':'right_panel'}),
-        dict( attrs={'class':'today box'})
+        dict( attrs={'class':'today box'}),
-]
+
                      ]
    keep_only_tags = [
@ -28,7 +52,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    dict(attrs={'class' : 'bPosts'}),
    dict(attrs={'class' :  'text'}),
    dict(attrs={'id' :  'article_gallery'}),
                    #dict(attrs={'class' :  'image'}),
    dict(attrs={'class' :  'article_text'})
 ]
@ -36,7 +62,8 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    feeds          = [
    (u'NME News', u'http://feeds2.feedburner.com/nmecom/rss/newsxml'),
-    (u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
+    #(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
-    (u'Blogs', u'http://www.nme.com/blog/index.php?blog=140&tempskin=_rss2'),
+    (u'Reviews',u'http://feed43.com/4138608576351646.xml'),
                    (u'Bloggs',u'http://feed43.com/3326754333186048.xml'),
    ]
--- a/recipes/oclab_pl.recipe
+++ b/recipes/oclab_pl.recipe
@ -11,7 +11,7 @@ class OCLab(BasicNewsRecipe):
    no_stylesheets = True
    keep_only_tags=[dict(id='main')]
    remove_tags_after= dict(attrs={'class':'single-postmetadata'})
-    remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar']})]
+    remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-enjoy']})]
    feeds          = [(u'Wpisy', u'http://oclab.pl/feed/')]
--- a/recipes/philippino_star_ngayon.recipe
+++ b/recipes/philippino_star_ngayon.recipe
@ -0,0 +1,73 @@
 '''
 www.philstar.com
 '''
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class PilipinoStarNgayon(BasicNewsRecipe):
    title                  	= 'Pilipino Star Ngayon'
    custom_title 	= "Pilipino Star Ngayon - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                       = '31 May 2012'
    __version__                   = '1.0'
    description            	= 'A daily Tabloid written in Tagalog, distributed in the Philippines. A tabloid style newspaper published in the national language - philstar.com is a Philippine news and entertainment portal for the Filipino global community.   It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
    language               	= 'tgl'
    publisher              	= 'The Philippine STAR'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.philstar.com/images/logo_PSN.jpg'
    masthead_url        	= 'http://www.philstar.com/images/logo_PSN.jpg'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 10
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= False
    remove_tags = [dict(name='img',  attrs={'id':'Image1'})    #Logo
 	       ,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'})   #Section (Headlines, Nation, Metro, ...)
 	       ,dict(name='a',      attrs={'id':'ControlArticle1_FormView1_hlComments'})   #Comments
 	       ,dict(name='img',  attrs={'src':'images/post-comments.jpg'})   #View Comments
 	       ,dict(name='a',      attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'})  #Zoom
 	]
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                         }
    feeds = [
               ('Litra-talk'      		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=535' )
              ,('Bansa'       		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=92' )
              ,('Probinsiya'     		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=49' )
              ,('Metro'          		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=93' )
              ,('Opinyon'            	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=94' )
              ,('Palaro'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=95' )
              ,('Showbiz'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=96' )
              ,('True Confessions'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=97' )
              ,('Dr. Love'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=98' )
              ,('Kutob'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=99' )
              ,('Komiks'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=100' )
           	 ]
 # process the printer friendly version of article
    def print_version(self, url):
          return url.replace('/Article', '/ArticlePrinterFriendly')
 # obtain title from printer friendly version of article; avoiding  add_toc_thumbnail changing title when article has image
    def populate_article_metadata(self, article, soup, first):
          article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()
--- a/recipes/polska_times.recipe
+++ b/recipes/polska_times.recipe
@ -1,5 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Polska_times(BasicNewsRecipe):
    title          = u'Polska Times'
    __author__        = 'fenuks'
@ -11,71 +10,20 @@ class Polska_times(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_emty_feeds= True
    no_stylesheets = True
-    preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
+    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
-    keep_only_tags= [dict(id=['tytul-artykulu', 'kontent'])]
+    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags_after= dict(id='material-tagi')
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
    remove_tags=[dict(attrs={'id':'reklama_srodtekst_0'}), dict(attrs={'id':'material-tagi'}), dict(name='div', attrs={'class':'zakladki'}), dict(attrs={'title':u'CZYTAJ TAKŻE'}), dict(attrs={'id':'podobne'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/newsletter'})]
    feeds          = [(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'), (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'), (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'), (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'), (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'), (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'), (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
    def print_version(self, url):
        return url.replace('artykul', 'drukuj')
    def skip_ad_pages(self, soup):
        if 'Advertisement' in soup.title:
            nexturl=soup.find('a')['href']
            return self.index_to_soup(nexturl, raw=True)
    def append_page(self, soup, appendtag):
        nexturl=soup.find(id='nastepna_strona')
        while nexturl:
            soup2= self.index_to_soup(nexturl['href'])
            nexturl=soup2.find(id='nastepna_strona')
            pagetext = soup2.find(id='tresc')
            for dictionary in self.remove_tags:
                 v=pagetext.findAll(attrs=dictionary['attrs'])
                 for delete in v:
                     delete.extract()
            for b in pagetext.findAll(name='b'):
                if b.string:
                    if u'CZYTAJ TEŻ' in b.string or u'Czytaj także' in b.string or u'Czytaj też' in b.string or u'Zobacz także' in b.string:
                        b.extract()
            for center in pagetext.findAll(name='center'):
                if center.h4:
                    if center.h4.a:
                        center.extract()
            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
        for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
            paginator.extract()
    def image_article(self, soup, appendtag):
        nexturl=soup.find('a', attrs={'class':'nastepna'})
        urls=[]
        while nexturl:
            if nexturl not in urls:
                urls.append(nexturl)
            else:
                break
            soup2= self.index_to_soup('http://www.polskatimes.pl/artykul/' + nexturl['href'])
            nexturl=soup2.find('a', attrs={'class':'nastepna'})
            if nexturl in urls:
                break;
            pagetext = soup2.find(id='galeria-material')
            pos = len(appendtag.contents)
            appendtag.insert(pos, '<br />')
            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
        for rem in appendtag.findAll(attrs={'class':['galeriaNawigator', 'miniaturyPojemnik']}):
            rem.extract()
        for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
            paginator.extract()
    def preprocess_html(self, soup):
        if soup.find('a', attrs={'class':'nastepna'}):
            self.image_article(soup, soup.body)
        elif soup.find(id='nastepna_strona'):
            self.append_page(soup, soup.body)
        return soup
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/')
        self.cover_url=soup.find(id='pojemnik').img['src']
-        return getattr(self, 'cover_url', self.cover_url)
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/the_freeman.recipe
+++ b/recipes/the_freeman.recipe
@ -0,0 +1,70 @@
 '''
 www.philstar.com
 '''
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Freeman(BasicNewsRecipe):
    title                  	= 'The Freeman'
    custom_title 	= "The Freeman - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                       = '31 May 2012'
    __version__                   = '1.0'
    description            	= 'The Freeman is a daily English-language newspaper published in Cebu, Philippines, by the Philippine Star. It was the first newspaper in Cebu, first published in May 1919.  The motto of the newspaper is "The fair and fearless" - philstar.com is a Philippine news and entertainment portal for the Filipino global community.   It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
    language               	= 'en_PH'
    publisher              	= 'The Philippine STAR'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.philstar.com/images/logo_Freeman.jpg'
    masthead_url        	= 'http://www.philstar.com/images/logo_Freeman.jpg'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 10
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= False
    remove_tags = [dict(name='img',  attrs={'id':'Image1'})    #Logo
 	       ,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'})   #Section (Headlines, Nation, Metro, ...)
 	       ,dict(name='a',      attrs={'id':'ControlArticle1_FormView1_hlComments'})   #Comments
 	       ,dict(name='img',  attrs={'src':'images/post-comments.jpg'})   #View Comments
 	       ,dict(name='a',      attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'})  #Zoom
 	]
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                         }
    feeds = [
               ('Cebu News'      		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=107' )
              ,('Freeman Opinion'       	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=109' )
              ,('Metro Cebu'     		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=531' )
              ,('Region'          		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=530' )
              ,('Cebu Business'            	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=108' )
              ,('Cebu Sports'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=110' )
              ,('Cebu Lifestyle'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=111' )
              ,('Cebu Entertainment'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=51' )
           	 ]
 # process the printer friendly version of article
    def print_version(self, url):
          return url.replace('/Article', '/ArticlePrinterFriendly')
 # obtain title from printer friendly version of article; avoiding  add_toc_thumbnail changing title when article has image
    def populate_article_metadata(self, article, soup, first):
          article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()
--- a/recipes/the_manila_bulletin.recipe
+++ b/recipes/the_manila_bulletin.recipe
@ -0,0 +1,88 @@
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class TheManilaBulletin(BasicNewsRecipe):
    title          		= u'The Manila Bulletin'
    custom_title 	= "The Manila Bulletin - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                	= '06 June 2012'
    __version__            	 = '1.0'
    description            	= "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation."
    language               	= 'en_PH'
    publisher              	= 'The Manila Bulletin'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
    masthead_url        	= 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 20
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    keep_only_tags = [
 	        dict(name='div', attrs={'class':'article node'})
 	       ,dict(name='div', attrs={'class':'label'})
 	       ,dict(name='div', attrs={'class':'content clear-block'})
 	       ]
    remove_tags = [
 	        dict(name='li', attrs={'class':'print_html'})
 	       ,dict(name='li', attrs={'class':'print_html first'})
 	       ,dict(name='li', attrs={'class':'print_mail'})
 	       ,dict(name='li', attrs={'class':'print_mail last'})
 	       ,dict(name='div', attrs={'class':'article-sidebar'})
 	       ,dict(name='table', attrs={'id':'attachments'})
 	       ]
    auto_cleanup 	= False
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                            }
    feeds          = [
 	(u'Main News', u'http://www.mb.com.ph/feed/news/main')
 #	, (u'Regional', u'http://www.mb.com.ph/feed/news/regional')
 	, (u'Business', u'http://www.mb.com.ph/feed/business')
 	, (u'Sports', u'http://www.mb.com.ph/feed/sports')
 	, (u'Entertainment', u'http://www.mb.com.ph/feed/entertainment')
 	, (u'Opinion', u'http://www.mb.com.ph/feed/news/opinion')
 #	, (u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture')
 #	, (u'Environment', u'http://www.mb.com.ph/feed/news/environment')
 	, (u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology')
 	, (u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle')
 #	, (u'Arts & Living', u'http://www.mb.com.ph/feed/lifestyle/arts-and-living')
 #	, (u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive')
 #	, (u'Food', u'http://www.mb.com.ph/feed/lifestyle/food')
 #	, (u'Travel', u'http://www.mb.com.ph/feed/lifestyle/travel')
 #	, (u'Picture Perfect', u'http://www.mb.com.ph/feed/lifestyle/picture-perfect')
 	]
 #  if use print version - convert url
 #  http://www.mb.com.ph/articles/361252/higher-power-rate-looms
 #  http://www.mb.com.ph/print/361252
 #
 #    def print_version(self,url):
 #        segments = url.split('/')
 #        printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
 #        return printURL
--- a/recipes/the_manila_times.recipe
+++ b/recipes/the_manila_times.recipe
@ -0,0 +1,55 @@
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class TheManilaTimes(BasicNewsRecipe):
    title          		= u'The Manila Times'
    custom_title 	= "The Manila Times - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                	= '06 June 2012'
    __version__            	 = '1.0'
    description            	= 'The Manila Times is the oldest existing English language newspaper in the Philippines.'
    language               	= 'en_PH'
    publisher              	= 'The Manila Times'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.manilatimes.net/images/banners/logo-mt.png'
    masthead_url        	= 'http://www.manilatimes.net/images/banners/logo-mt.png'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 20
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    remove_tags = [
 	        dict(name='img', attrs={'alt':'Print'})
 	       ,dict(name='img', attrs={'alt':'Email:'})
 	       ,dict(name='dd', attrs={'class':'hits'})
 	       ]
    auto_cleanup = True
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                            }
    feeds          = [(u'Breaking News', u'http://www.manilatimes.net/index.php/news/breaking-news?format=feed&amp;type=rss'), (u'Top Stories', u'http://www.manilatimes.net/index.php/news/top-stories?format=feed&amp;type=rss'), (u'Headlines', u'http://www.manilatimes.net/index.php/news/headlines-mt?format=feed&amp;type=rss'), (u'Nation', u'http://www.manilatimes.net/index.php/news/nation?format=feed&amp;type=rss'), (u'Regions', u'http://www.manilatimes.net/index.php/news/regions?format=feed&amp;type=rss'), (u'World', u'http://www.manilatimes.net/index.php/news/world?format=feed&amp;type=rss'), (u'Top Business News', u'http://www.manilatimes.net/index.php/business/top-business-news?format=feed&amp;type=rss'), (u'Business Columnist', u'http://www.manilatimes.net/index.php/business/business-columnist?format=feed&amp;type=rss'), (u'Opinion - Editorials', u'http://www.manilatimes.net/index.php/opinion/editorials?format=feed&amp;type=rss'), (u'Opinion - Columnist', u'http://www.manilatimes.net/index.php/opinion/columnist1?format=feed&amp;type=rss'), (u'Opinion - Editorial Cartoon', u'http://www.manilatimes.net/index.php/opinion/editorial-cartoon?format=feed&amp;type=rss'), (u'Top Sports News', u'http://www.manilatimes.net/index.php/sports/top-sports-news?format=feed&amp;type=rss'), (u'Sports Columnist', u'http://www.manilatimes.net/index.php/sports/sports-columnist?format=feed&amp;type=rss'), (u'Life & Times', u'http://www.manilatimes.net/index.php/life-and-times?format=feed&amp;type=rss'), (u'Showtime', u'http://www.manilatimes.net/index.php/life-and-times/showtime?format=feed&amp;type=rss'), (u'Sunday Times', u'http://www.manilatimes.net/index.php/sunday-times?format=feed&amp;type=rss'), (u'Sunday Times Magazine', u'http://www.manilatimes.net/index.php/sunday-times/the-sunday-times-magazines?format=feed&amp;type=rss'), (u'Motoring News', u'http://www.manilatimes.net/index.php/fast-times/motoring-news?format=feed&amp;type=rss'), (u'Motoring Columnist', u'http://www.manilatimes.net/index.php/fast-times/motoring-columnist?format=feed&amp;type=rss'), (u'Technology', u'http://www.manilatimes.net/index.php/technology?format=feed&amp;type=rss')]
--- a/recipes/the_philippine_daily_inquirer.recipe
+++ b/recipes/the_philippine_daily_inquirer.recipe
@ -0,0 +1,129 @@
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class PhilippineDailyInquirer(BasicNewsRecipe):
    title                  	= 'The Philippine Daily Inquirer'
    custom_title 	= "The Philippine Daily Inquirer - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                	= '03 June 2012'
    __version__            	 = '1.0'
    description            	= 'The Philippine Daily Inquirer is a widely read and circulated newspaper.'
    language               	= 'en_PH'
    publisher              	= 'The Philippine Daily Inquirer'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.inquirer.com.ph/assets/bg/logo.jpg'
    masthead_url        	= 'http://www.inquirer.com.ph/assets/bg/logo.jpg'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 20
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= False
    remove_tags_after =  [
 	        dict(name='div', attrs={'id':'entryMeta'})
 	       ,dict(name='div', attrs={'id':'taboola-div'})
 	       ,dict(name='br', attrs={'class':'clear'})
                           ]
    remove_tags = [
 	        dict(name='div', attrs={'class':'recent'})
 	       ,dict(name='div', attrs={'id':'sharefeature'})
 	       ,dict(name='div', attrs={'id':'masthead_bg'})
 	       ,dict(name='div', attrs={'id':'navmenu_main'})
 	       ,dict(name='div', attrs={'id':'navmenu_channel'})
 	       ,dict(name='div', attrs={'class':'breadcrumbs'})
 	       ,dict(name='div', attrs={'id':'search_container'})
 	       ,dict(name='a', attrs={'href':'http://ruby.inquirer.net/redirect/redirect.php?item_id=1143'})
 	       ,dict(name='a', attrs={'href':'http://ruby.inquirer.net/redirect/redirect.php?item_id=1147'})
 	       ]
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                            }
    feeds = [
               ('Headlines'      		, 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed' )
              ,('Latest Stories'      	, 'http://newsinfo.inquirer.net/category/latest-stories/feed' )
              ,('Nation'       		, 'http://newsinfo.inquirer.net/category/nation/feed' )
              ,('Nation - Latest Stories'       	, 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed' )
              ,('Metro'      		, 'http://newsinfo.inquirer.net/category/metro/feed' )
              ,('Metro - Latest Stories'      	, 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed' )
              ,('Regions'       		, 'http://newsinfo.inquirer.net/category/regions/feed' )
              ,('Regions - Latest Stories'       	, 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed' )
 #            ,('News'      		, 'http://www.inquirer.net/fullfeed' )
 #            ,('More News'      	, 'http://newsinfo.inquirer.net/feed' )
              ,('Global Nation'       	, 'http://globalnation.inquirer.net/feed' )
              ,('Global Nation - Latest Stories'   , 'http://globalnation.inquirer.net/category/latest-stories/feed' )
              ,('Global Nation - Philippines'     , 'http://globalnation.inquirer.net/category/news/philippines/feed' )
              ,('Global Nation - Asia & Pacific' , 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed' )
              ,('Global Nation - Americas'       , 'http://globalnation.inquirer.net/category/news/uscanada/feed' )
              ,('Global Nation - Middle East & Africa'  , 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed' )
              ,('Global Nation - Europe'       	, 'http://globalnation.inquirer.net/category/news/europe/feed' )
              ,('Global Nation - Global Pinoy'   , 'http://globalnation.inquirer.net/category/global-pinoy/feed' )
              ,('Global Nation - Events'       	, 'http://globalnation.inquirer.net/category/events/feed' )
              ,('Business'     		, 'http://business.inquirer.net/feed' )
              ,('Business - Latest Stories'     	, 'http://business.inquirer.net/category/latest-stories/feed' )
              ,('Business - Money'     	, 'http://business.inquirer.net/category/money/feed' )
              ,('Business - Science & Health'  , 'http://business.inquirer.net/category/science-and-health/feed' )
              ,('Business - Motoring'     	, 'http://business.inquirer.net/category/motoring/feed' )
              ,('Business - Property Guide'     	, 'http://business.inquirer.net/category/property-guide/feed' )
              ,('Business - Columnists'     	, 'http://business.inquirer.net/category/columnists/feed' )
              ,('Sports'          		, 'http://sports.inquirer.net/feed' )
              ,('Sports - Latest Stories'          	, 'http://sports.inquirer.net/category/latest-stories/feed' )
              ,('Sports - Basketball'          	, 'http://sports.inquirer.net/category/section/basketball/feed' )
              ,('Sports - Boxing & MMA'         , 'http://sports.inquirer.net/category/section/boxing-mma/feed' )
              ,('Sports - Golf'          	, 'http://sports.inquirer.net/category/section/golf/feed' )
              ,('Sports - Football'          	, 'http://sports.inquirer.net/category/section/other-sports/football/feed' )
              ,('Sports - Other Sports'          	, 'http://sports.inquirer.net/category/section/other-sports/feed' )
              ,('Technology'           	, 'http://technology.inquirer.net/feed' )
              ,('Technology Latest Stories'      , 'http://technology.inquirer.net/category/latest-stories/feed' )
              ,('Entertainment'        	, 'http://entertainment.inquirer.net/feed' )
              ,('Entertainment - Headlines'        , 'http://entertainment.inquirer.net/category/headlines/feed' )
              ,('Entertainment - Latest Stories'  , 'http://entertainment.inquirer.net/category/latest-stories/feed' )
              ,('Entertainment - Movies'        	, 'http://movies.inquirer.net/feed' )
              ,('Lifestyle'        		, 'http://lifestyle.inquirer.net/feed' )
              ,('Lifestyle - Latest Stories'        , 'http://lifestyle.inquirer.net/category/latest-stories/feed' )
              ,('Lifestyle - Arts & Books'        	, 'http://lifestyle.inquirer.net/category/arts-and-books/feed' )
              ,('Lifestyle - Wellness'        	, 'http://lifestyle.inquirer.net/category/wellness/feed' )
              ,('Lifestyle - Home & Entertaining' , 'http://lifestyle.inquirer.net/category/home-and-entertaining/feed' )
              ,('Lifestyle - Parenting'        	, 'http://lifestyle.inquirer.net/category/parenting/feed' )
              ,('Lifestyle - Food'        	, 'http://lifestyle.inquirer.net/category/food/feed' )
              ,('Lifestyle - Fashion & Beauty'   , 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed' )
              ,('Lifestyle - Super'        	, 'http://lifestyle.inquirer.net/category/super/feed' )
              ,('Lifestyle - 2BU'        	, 'http://lifestyle.inquirer.net/category/2bu/feed' )
              ,('Lifestyle - Sunday Lifestyle'     , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed' )
              ,('Lifestyle - Wedding'        	, 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed' )
              ,('Lifestyle - Travel'        	, 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed' )
              ,('Lifestyle - Relationship'        	, 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed' )
              ,('Opinion'        		, 'http://opinion.inquirer.net/feed' )
              ,('Opinion - Viewpoints'        	, 'http://opinion.inquirer.net/category/viewpoints/feed' )
              ,('Opinion - Talk of the Town'     , 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed' )
              ,('Editorial'        		, 'http://opinion.inquirer.net/category/editorial/feed' )
              ,('Letters to the Editor'      	, 'http://opinion.inquirer.net/category/letters-to-the-editor/feed' )
              ,('Columns'        		, 'http://opinion.inquirer.net/category/columns/feed' )
              ,('Citizens Journalism'       	, 'http://newsinfo.inquirer.net/category/citizens-journalism/feed' )
              ,('Cebu - Daily News'       	, 'http://newsinfo.inquirer.net/category/cdn/feed' )
              ,('Cebu - More News'       	, 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed' )
              ,('Cebu - Community'       	, 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed' )
              ,('Cebu - Metro'       	, 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed' )
              ,('Cebu - Business'       	, 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed' )
              ,('Cebu - Sports'       	, 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed' )
              ,('Cebu - Visayas'       	, 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed' )
              ,('Cebu - Opinion'       	, 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed' )
           	 ]
--- a/recipes/the_philippine_star.recipe
+++ b/recipes/the_philippine_star.recipe
@ -0,0 +1,97 @@
 '''
 www.philstar.com
 '''
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class PhilippineStar(BasicNewsRecipe):
    title                  	= 'The Philippine Star'
    custom_title 	= "The Philippine Star - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                       = '31 May 2012'
    __version__                   = '1.0'
    description            	= 'The Philippine Star is a daily English-language broadsheet newspaper based in Manila. It has the most subscribers of any newspaper in the Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community.   It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
    language               	= 'en_PH'
    publisher              	= 'The Philippine STAR'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.philstar.com/images/philstar-logo-white.jpg'
    masthead_url        	= 'http://www.philstar.com/images/philstar-logo-white.jpg'
    oldest_article 	= 1 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 20
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True
    auto_cleanup 	= False
    remove_tags = [dict(name='img',  attrs={'id':'Image1'})    #Logo
 	       ,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'})   #Section (Headlines, Nation, Metro, ...)
 	       ,dict(name='a',      attrs={'id':'ControlArticle1_FormView1_hlComments'})   #Comments
 	       ,dict(name='img',  attrs={'src':'images/post-comments.jpg'})   #View Comments
 	       ,dict(name='a',      attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'})  #Zoom
 	]
    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                         }
    feeds = [
               ('Headlines'      		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=63' )
              ,('Breaking News'       	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=200' )
              ,('News Feature'     	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=68' )
              ,('Nation'          		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=67' )
              ,('Metro'            		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=65' )
              ,('Business'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=66' )
              ,('Sports'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=69' )
              ,('Entertainment'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=70' )
              ,('Science & Technology'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=75' )
              ,('Networks'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=71' )
              ,('Business as Usual'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=78' )
              ,('Banking'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=74' )
              ,('Motoring'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=72' )
              ,('Real Estate'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=76' )
              ,('Telecoms'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=73' )
              ,('Agriculture'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=77' )
              ,('Arts & Culture'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=79' )
              ,('Food & Leisure'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=81' )
              ,('Health & Family'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=80' )
              ,('Education & Home'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=442' )
              ,('Travel & Tourism'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=87' )
              ,('Newsmakers'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=88' )
              ,('Business Life'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=82' )
              ,('Fashion & Beauty'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=83' )
              ,('For Men'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=446' )
              ,('Gadgets'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=449' )
              ,('Sunday Life'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=86' )
              ,('Supreme'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=448' )
              ,('Opinion'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=64' )
              ,('Letters to the Editor'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=135' )
              ,('Starweek Magazine'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=90' )
              ,('Modern Living'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=85' )
              ,('YStyle'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=451' )
              ,('Allure'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=89' )
              ,('Weather'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=116' )
           	 ]
 # process the printer friendly version of article
    def print_version(self, url):
          return url.replace('/Article', '/ArticlePrinterFriendly')
 # obtain title from printer friendly version of article; avoiding  add_toc_thumbnail changing title when article has image
    def populate_article_metadata(self, article, soup, first):
          article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()
--- a/recipes/tvn24.recipe
+++ b/recipes/tvn24.recipe
@ -13,10 +13,11 @@ class tvn24(BasicNewsRecipe):
    remove_empty_feeds = True
    remove_javascript = True
    no_stylesheets = True
-    keep_only_tags=[dict(id='tvn24_wiadomosci_detal'), dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']})]
+    keep_only_tags=[dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']}), dict(attrs={'class':'mainLeftColumn'})]
-    remove_tags_after= dict(name='div', attrs={'class':'socialBoxesBottom'})
+    remove_tags=[dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text']})]
-    remove_tags=[dict(attrs={'class':['tagi_detal', 'socialBoxesBottom', 'twitterBox', 'commentsInfo', 'textSize', 'obj_ukrytydruk obj_ramka1_r', 'related newsNews align-right', 'box', 'newsUserList', 'watchMaterial text']})]
+    #remove_tags_after= dict(attrs={'class':'articleAuthors mb30 mt5 grey_v6'})
-    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), (u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
+    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
 		#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/vice_magazine.recipe
+++ b/recipes/vice_magazine.recipe
@ -1,4 +1,5 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class ViceESRecipe(BasicNewsRecipe):
@ -7,11 +8,33 @@ class ViceESRecipe(BasicNewsRecipe):
    description = u'La página web oficial de la revista Vice España'
    category = u'noticias, fotografía, blogs, moda, arte, cine, música, literatura, tecnología'
    cover_url = 'http://www.seeklogo.com/images/V/Vice-logo-668578AC94-seeklogo.com.gif'
-    oldest_article = 20
+    oldest_article = 14
-    max_articles_per_feed = 30
+    max_articles_per_feed = 100
-    auto_cleanup = True
+    auto_cleanup = False
    no_stylesheets = True
    language = 'es'
    use_embedded_content  = False
    remove_javascript = True
    publication_type = 'magazine'
    recursions=10
    match_regexps = [r'/read/.*\?Contentpage=[2-9]$']
    keep_only_tags = [
        dict(attrs={'class':['article_title','article_content','next']})
    ]
    remove_tags = [
        dict(attrs={'class':['social_buttons','search','tweet','like','inline_socials'
           ,'stumblebadge','plusone']})
    ]
    extra_css = '''
        .author{font-size:small}
        img{margin-bottom: 0.4em; display:block; margin-left:auto; margin-right: auto}
    '''
    preprocess_regexps = [
        (re.compile(r'<img src="http://.*\.scorecardresearch\.com/'), lambda m: '')
    ]
    feeds = [('Vice', 'http://www.vice.com/es/rss')]
--- a/recipes/wirtualnemedia_pl.recipe
+++ b/recipes/wirtualnemedia_pl.recipe
@ -0,0 +1,30 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class WirtualneMedia(BasicNewsRecipe):
    title          = u'wirtualnemedia.pl'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
    remove_empty_feeds = True
    __author__        = 'fenuks'
    description   = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
    category       = 'internet'
    language       = 'pl'
    masthead_url= 'http://i.wp.pl/a/f/jpeg/8654/wirtualnemedia.jpeg'
    cover_url= 'http://static.wirtualnemedia.pl/img/logo_wirtualnemedia_newsletter.gif'
    remove_tags=[dict(id=['header', 'footer'])]
    feeds          = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'), 
 		(u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'), 
 		(u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'), 
 		(u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'), 
 		(u'Prasa', u'http://www.wirtualnemedia.pl/rss/wm_prasa.xml'), 
 		(u'Radio', u'http://www.wirtualnemedia.pl/rss/wm_radio.xml'), 
 		(u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'), 
 		(u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'), 
 		(u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'), 
 		(u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')
 		]
    def print_version(self, url):
        return url.replace('artykul', 'print')
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -375,7 +375,6 @@ class Build(Command):
           "common/common.h",
           "common/config_file.h",
           "style/blurhelper.h",
           "style/dialogpixmaps.h",
           "style/fixx11h.h",
           "style/pixmaps.h",
           "style/qtcurve.h",
--- a/setup/iso_639/es.po
+++ b/setup/iso_639/es.po
@ -8,14 +8,14 @@ msgstr ""
 "Project-Id-Version: calibre\n"
 "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-04-27 18:24+0000\n"
+"PO-Revision-Date: 2012-06-06 17:20+0000\n"
 "Last-Translator: Jellby <Unknown>\n"
 "Language-Team: Spanish <es@li.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-04-28 04:54+0000\n"
+"X-Launchpad-Export-Date: 2012-06-07 04:40+0000\n"
-"X-Generator: Launchpad (build 15149)\n"
+"X-Generator: Launchpad (build 15353)\n"
 #. name for aaa
 msgid "Ghotuo"
@ -383,7 +383,7 @@ msgstr "Tibetano amdo"
 #. name for ady
 msgid "Adyghe"
-msgstr "Adyghe"
+msgstr "Adigué"
 #. name for adz
 msgid "Adzera"
@ -951,7 +951,7 @@ msgstr "Alune"
 #. name for alq
 msgid "Algonquin"
-msgstr "Algonquin"
+msgstr "Algonquino"
 #. name for alr
 msgid "Alutor"
@ -1451,7 +1451,7 @@ msgstr "Araona"
 #. name for arp
 msgid "Arapaho"
-msgstr "Arapaho"
+msgstr "Arapajó"
 #. name for arq
 msgid "Arabic; Algerian"
@ -4363,7 +4363,7 @@ msgstr "Jalkunan"
 #. name for bxm
 msgid "Buriat; Mongolia"
-msgstr "Buriat de Mongolia"
+msgstr "Buriato de Mongolia"
 #. name for bxn
 msgid "Burduna"
@ -4383,7 +4383,7 @@ msgstr "Beele"
 #. name for bxr
 msgid "Buriat; Russia"
-msgstr "Buriat de Rusia"
+msgstr "Buriato de Rusia"
 #. name for bxs
 msgid "Busam"
@ -4391,7 +4391,7 @@ msgstr "Busam"
 #. name for bxu
 msgid "Buriat; China"
-msgstr "Buriat de China"
+msgstr "Buriato de China"
 #. name for bxv
 msgid "Berakou"
@ -4999,7 +4999,7 @@ msgstr "Mari (Rusia)"
 #. name for chn
 msgid "Chinook jargon"
-msgstr "Chinook"
+msgstr "Jerga chinook"
 #. name for cho
 msgid "Choctaw"
@ -6135,7 +6135,7 @@ msgstr "Slave (atabascano)"
 #. name for dep
 msgid "Delaware; Pidgin"
-msgstr "Delaware pidyin"
+msgstr "Pidyin delaware"
 #. name for deq
 msgid "Dendi (Central African Republic)"
@ -6723,7 +6723,7 @@ msgstr "Darai"
 #. name for dsb
 msgid "Sorbian; Lower"
-msgstr "Sorabo inferior"
+msgstr "Bajo sorabo"
 #. name for dse
 msgid "Dutch Sign Language"
@ -7831,7 +7831,7 @@ msgstr "Gabri"
 #. name for gac
 msgid "Great Andamanese; Mixed"
-msgstr "Gran Andamanés mixto"
+msgstr "Gran andamanés mixto"
 #. name for gad
 msgid "Gaddang"
@ -8479,23 +8479,23 @@ msgstr "Bajo alemán medio"
 #. name for gmm
 msgid "Gbaya-Mbodomo"
-msgstr ""
+msgstr "Gbaya-Mbodomo"
 #. name for gmn
 msgid "Gimnime"
-msgstr ""
+msgstr "Gimnime"
 #. name for gmu
 msgid "Gumalu"
-msgstr ""
+msgstr "Gumalu"
 #. name for gmv
 msgid "Gamo"
-msgstr ""
+msgstr "Gamo"
 #. name for gmx
 msgid "Magoma"
-msgstr ""
+msgstr "Magoma"
 #. name for gmy
 msgid "Greek; Mycenaean"
@ -8503,11 +8503,11 @@ msgstr "Griego micénico"
 #. name for gna
 msgid "Kaansa"
-msgstr ""
+msgstr "Kaansa"
 #. name for gnb
 msgid "Gangte"
-msgstr ""
+msgstr "Gangte"
 #. name for gnc
 msgid "Guanche"
@ -8515,15 +8515,15 @@ msgstr "Guanche"
 #. name for gnd
 msgid "Zulgo-Gemzek"
-msgstr ""
+msgstr "Zulgo-Gemzek"
 #. name for gne
 msgid "Ganang"
-msgstr ""
+msgstr "Ganang"
 #. name for gng
 msgid "Ngangam"
-msgstr ""
+msgstr "Ngangam"
 #. name for gnh
 msgid "Lere"
@ -8535,7 +8535,7 @@ msgstr ""
 #. name for gnk
 msgid "//Gana"
-msgstr ""
+msgstr "//Gana"
 #. name for gnl
 msgid "Gangulu"
@ -8555,7 +8555,7 @@ msgstr "Gondi septentrional"
 #. name for gnq
 msgid "Gana"
-msgstr ""
+msgstr "Gana"
 #. name for gnr
 msgid "Gureng Gureng"
@ -8563,11 +8563,11 @@ msgstr ""
 #. name for gnt
 msgid "Guntai"
-msgstr ""
+msgstr "Guntai"
 #. name for gnu
 msgid "Gnau"
-msgstr ""
+msgstr "Gnau"
 #. name for gnw
 msgid "Guaraní; Western Bolivian"
@ -8575,35 +8575,35 @@ msgstr "Guaraní boliviano occidental"
 #. name for gnz
 msgid "Ganzi"
-msgstr ""
+msgstr "Ganzi"
 #. name for goa
 msgid "Guro"
-msgstr ""
+msgstr "Guro"
 #. name for gob
 msgid "Playero"
-msgstr ""
+msgstr "Playero"
 #. name for goc
 msgid "Gorakor"
-msgstr ""
+msgstr "Gorakor"
 #. name for god
 msgid "Godié"
-msgstr ""
+msgstr "Godié"
 #. name for goe
 msgid "Gongduk"
-msgstr ""
+msgstr "Gongduk"
 #. name for gof
 msgid "Gofa"
-msgstr ""
+msgstr "Gofa"
 #. name for gog
 msgid "Gogo"
-msgstr ""
+msgstr "Gogo"
 #. name for goh
 msgid "German; Old High (ca. 750-1050)"
@ -8611,19 +8611,19 @@ msgstr "Alto alemán antiguo (ca. 750-1050)"
 #. name for goi
 msgid "Gobasi"
-msgstr ""
+msgstr "Gobasi"
 #. name for goj
 msgid "Gowlan"
-msgstr ""
+msgstr "Gowlan"
 #. name for gok
 msgid "Gowli"
-msgstr ""
+msgstr "Gowli"
 #. name for gol
 msgid "Gola"
-msgstr ""
+msgstr "Gola"
 #. name for gom
 msgid "Konkani; Goan"
@ -8635,15 +8635,15 @@ msgstr "Gondi"
 #. name for goo
 msgid "Gone Dau"
-msgstr ""
+msgstr "Gone Dau"
 #. name for gop
 msgid "Yeretuar"
-msgstr ""
+msgstr "Yeretuar"
 #. name for goq
 msgid "Gorap"
-msgstr ""
+msgstr "Gorap"
 #. name for gor
 msgid "Gorontalo"
@ -8651,7 +8651,7 @@ msgstr "Gorontalo"
 #. name for gos
 msgid "Gronings"
-msgstr ""
+msgstr "Gronings"
 #. name for got
 msgid "Gothic"
@ -8659,15 +8659,15 @@ msgstr "Gótico"
 #. name for gou
 msgid "Gavar"
-msgstr ""
+msgstr "Gavar"
 #. name for gow
 msgid "Gorowa"
-msgstr ""
+msgstr "Gorowa"
 #. name for gox
 msgid "Gobu"
-msgstr ""
+msgstr "Gobu"
 #. name for goy
 msgid "Goundo"
@ -9683,7 +9683,7 @@ msgstr ""
 #. name for hsb
 msgid "Sorbian; Upper"
-msgstr ""
+msgstr "Alto sorabo"
 #. name for hsh
 msgid "Hungarian Sign Language"
@ -19291,7 +19291,7 @@ msgstr ""
 #. name for nwc
 msgid "Newari; Old"
-msgstr "Newari antiguo"
+msgstr "Newarí antiguo"
 #. name for nwe
 msgid "Ngwe"
@ -19311,7 +19311,7 @@ msgstr ""
 #. name for nwx
 msgid "Newar; Middle"
-msgstr "Newari medio"
+msgstr "Newarí medio"
 #. name for nwy
 msgid "Nottoway-Meherrin"
@ -23027,7 +23027,7 @@ msgstr ""
 #. name for sia
 msgid "Sami; Akkala"
-msgstr ""
+msgstr "Sami de Akkala"
 #. name for sib
 msgid "Sebop"
@ -23127,11 +23127,11 @@ msgstr ""
 #. name for sjd
 msgid "Sami; Kildin"
-msgstr ""
+msgstr "Sami de Kildin"
 #. name for sje
 msgid "Sami; Pite"
-msgstr ""
+msgstr "Sami de Pite"
 #. name for sjg
 msgid "Assangori"
@ -23139,7 +23139,7 @@ msgstr ""
 #. name for sjk
 msgid "Sami; Kemi"
-msgstr ""
+msgstr "Sami de Kemi"
 #. name for sjl
 msgid "Sajalong"
@ -23171,11 +23171,11 @@ msgstr ""
 #. name for sjt
 msgid "Sami; Ter"
-msgstr ""
+msgstr "Sami de Ter"
 #. name for sju
 msgid "Sami; Ume"
-msgstr ""
+msgstr "Sami de Ume"
 #. name for sjw
 msgid "Shawnee"
@ -23407,7 +23407,7 @@ msgstr ""
 #. name for smj
 msgid "Lule Sami"
-msgstr "Sami lule"
+msgstr "Sami de Lule"
 #. name for smk
 msgid "Bolinao"
@ -23423,7 +23423,7 @@ msgstr ""
 #. name for smn
 msgid "Sami; Inari"
-msgstr ""
+msgstr "Sami de Inari"
 #. name for smo
 msgid "Samoan"
@ -23443,7 +23443,7 @@ msgstr ""
 #. name for sms
 msgid "Sami; Skolt"
-msgstr ""
+msgstr "Sami de Skolt"
 #. name for smt
 msgid "Simte"
@ -24339,7 +24339,7 @@ msgstr "Subanen central"
 #. name for syc
 msgid "Syriac; Classical"
-msgstr ""
+msgstr "Siríaco clásico"
 #. name for syi
 msgid "Seki"
@ -28235,7 +28235,7 @@ msgstr ""
 #. name for xal
 msgid "Kalmyk"
-msgstr ""
+msgstr "Calmuco"
 #. name for xam
 msgid "/Xam"
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 54)
+numeric_version = (0, 8, 55)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -276,6 +276,16 @@ class ODTMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.odt import get_metadata
        return get_metadata(stream)
 class DocXMetadataReader(MetadataReaderPlugin):
    name        = 'Read DOCX metadata'
    file_types  = set(['docx'])
    description = _('Read metadata from %s files')%'DOCX'
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.docx import get_metadata
        return get_metadata(stream)
 class OPFMetadataReader(MetadataReaderPlugin):
    name        = 'Read OPF metadata'
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -672,6 +672,12 @@ class KindleFireOutput(KindleDXOutput):
    dpi                       = 169.0
    comic_screen_size = (570, 1016)
    @classmethod
    def tags_to_string(cls, tags):
        # The idiotic fire doesn't obey the color:white directive
        from xml.sax.saxutils import escape
        return escape(', '.join(tags))
 class IlliadOutput(OutputProfile):
    name        = 'Illiad'
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -72,6 +72,7 @@ class ANDROID(USBMS):
            # Sony Ericsson
            0xfce : {
                0xd12e : [0x0100],
                0xe156 : [0x226],
                0xe15d : [0x226],
                0xe14f : [0x0226],
                0x614f : [0x0226, 0x100],
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -178,7 +178,13 @@ class DevicePlugin(Plugin):
                    if cvid == vid:
                        if pid in products:
                            if hasattr(self.VENDOR_ID, 'keys'):
-                                cbcd = self.VENDOR_ID[vid][pid]
+                                try:
                                    cbcd = self.VENDOR_ID[vid][pid]
                                except KeyError:
                                    # Vendor vid does not have product pid, pid
                                    # exists for some other vendor in this
                                    # device
                                    continue
                            else:
                                cbcd = self.BCD
                            if self.test_bcd(bcd, cbcd):
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -224,16 +224,18 @@ class TREKSTOR(USBMS):
    FORMATS     = ['epub', 'txt', 'pdf']
    VENDOR_ID   = [0x1e68]
-    PRODUCT_ID  = [0x0041, 0x0042, 0x0052, 0x004e,
+    PRODUCT_ID  = [0x0041, 0x0042, 0x0052, 0x004e, 0x0056,
-            0x003e # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091
+            0x003e, # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091
            ]
-    BCD         = [0x0002]
+    BCD         = [0x0002, 0x100]
    EBOOK_DIR_MAIN = 'Ebooks'
    VENDOR_NAME = 'TREKSTOR'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_PLAYER_7',
-            'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0']
+            'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0', 'EREADER_PYRUS']
    SUPPORTS_SUB_DIRS = True
    SUPPORTS_SUB_DIRS_DEFAULT = False
 class EEEREADER(USBMS):
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -268,20 +268,92 @@ class PRST1(USBMS):
        collections = booklist.get_collections(collections_attributes)
        with closing(sqlite.connect(dbpath)) as connection:
            self.remove_orphaned_records(connection, dbpath)
            self.update_device_books(connection, booklist, source_id,
                    plugboard, dbpath)
-            self.update_device_collections(connection, booklist, collections, source_id)
+            self.update_device_collections(connection, booklist, collections, source_id, dbpath)
        debug_print('PRST1: finished update_device_database')
-    def update_device_books(self, connection, booklist, source_id, plugboard,
+    def remove_orphaned_records(self, connection, dbpath):
            dbpath):
        from sqlite3 import DatabaseError
-        opts = self.settings()
+        try:
-        upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
+            cursor = connection.cursor()
-        refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
+        
-        use_sony_authors = opts.extra_customization[self.OPT_USE_SONY_AUTHORS]
+            debug_print("Removing Orphaned Collection Records")
            # Purge any collections references that point into the abyss
            query = 'DELETE FROM collections WHERE content_id NOT IN (SELECT _id FROM books)'
            cursor.execute(query)
            query = 'DELETE FROM collections WHERE collection_id NOT IN (SELECT _id FROM collection)'
            cursor.execute(query)
            debug_print("Removing Orphaned Book Records")
            # Purge any references to books not in this database
            # Idea is to prevent any spill-over where these wind up applying to some other book
            query = 'DELETE FROM %s WHERE content_id NOT IN (SELECT _id FROM books)'
            cursor.execute(query%'annotation')
            cursor.execute(query%'bookmark')
            cursor.execute(query%'current_position')
            cursor.execute(query%'freehand')
            cursor.execute(query%'history')
            cursor.execute(query%'layout_cache')
            cursor.execute(query%'preference')
            cursor.close()
        except DatabaseError:
            import traceback
            tb = traceback.format_exc()
            raise DeviceError((('The SONY database is corrupted. '
                    ' Delete the file %s on your reader and then disconnect '
                    ' reconnect it. If you are using an SD card, you '
                    ' should delete the file on the card as well. Note that '
                    ' deleting this file will cause your reader to forget '
                    ' any notes/highlights, etc.')%dbpath)+' Underlying error:'
                    '\n'+tb)
 	def get_lastrowid(self, cursor):
 		# SQLite3 + Python has a fun issue on 32-bit systems with integer overflows.
 		# Issue a SQL query instead, getting the value as a string, and then converting to a long python int manually.
 		query = 'SELECT last_insert_rowid()'
 		cursor.execute(query)
 		row = cursor.fetchone()
 		return long(row[0])
    def get_database_min_id(self, source_id):
        sequence_min = 0L
        if source_id == 1:
            sequence_min = 4294967296L
        return sequence_min
    def set_database_sequence_id(self, connection, table, sequence_id):
        cursor = connection.cursor()
        # Update the sequence Id if it exists
        query = 'UPDATE sqlite_sequence SET seq = ? WHERE name = ?'
        t = (sequence_id, table,)
        cursor.execute(query, t)
        # Insert the sequence Id if it doesn't
        query = ('INSERT INTO sqlite_sequence (name, seq) '
                'SELECT ?, ? '
                'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
        cursor.execute(query, (table, sequence_id, table,))
        cursor.close()
    def read_device_books(self, connection, source_id, dbpath):
        from sqlite3 import DatabaseError
        sequence_min = self.get_database_min_id(source_id)
        sequence_max = sequence_min
        sequence_dirty = 0
        debug_print("Book Sequence Min: %d, Source Id: %d"%(sequence_min,source_id))
        try:
            cursor = connection.cursor()
@ -300,27 +372,70 @@ class PRST1(USBMS):
                    ' any notes/highlights, etc.')%dbpath)+' Underlying error:'
                    '\n'+tb)
        # Get the books themselves, but keep track of any that are less than the minimum.
        # Record what the max id being used is as well.
        db_books = {}
        for i, row in enumerate(cursor):
            lpath = row[0].replace('\\', '/')
            db_books[lpath] = row[1]
            if row[1] < sequence_min:
                sequence_dirty = 1
            else:
                sequence_max = max(sequence_max, row[1])
-        # Work-around for Sony Bug (SD Card DB not using right SQLite sequence)
+        # If the database is 'dirty', then we should fix up the Ids and the sequence number
-        if source_id == 1:
+        if sequence_dirty == 1:
-			# Update any existing sequence numbers in the table that aren't in the required range
+            debug_print("Book Sequence Dirty for Source Id: %d"%source_id)
-            sdcard_sequence_start = '4294967296'
+            sequence_max = sequence_max + 1
-            query = 'UPDATE sqlite_sequence SET seq = ? WHERE seq < ?'
+            for book, bookId in db_books.items():
-            t = (sdcard_sequence_start, sdcard_sequence_start,)
+                if bookId < sequence_min:
-            cursor.execute(query, t)
+                    # Record the new Id and write it to the DB
                    db_books[book] = sequence_max
                    sequence_max = sequence_max + 1
-			# Insert sequence numbers for tables we will be manipulating, if they don't already exist
+                    # Fix the Books DB
-			query = ('INSERT INTO sqlite_sequence (name, seq) ' 
+                    query = 'UPDATE books SET _id = ? WHERE file_path = ?'
-					'SELECT ?, ? '
+                    t = (db_books[book], book,)
-				    'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
+                    cursor.execute(query, t)
-			cursor.execute(query, ('books',sdcard_sequence_start,'books',))
+
-			cursor.execute(query, ('collection',sdcard_sequence_start,'collection',))
+                    # Fix any references so that they point back to the right book
-			cursor.execute(query, ('collections',sdcard_sequence_start,'collections',))
+                    t = (db_books[book], bookId,)
-			
+                    query = 'UPDATE collections SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE annotation SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE bookmark SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE current_position SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE deleted_markups SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE dic_histories SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE freehand SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE history SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE layout_cache SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
                    query = 'UPDATE preference SET content_id = ? WHERE content_id = ?'
                    cursor.execute(query, t)
            self.set_database_sequence_id(connection, 'books', sequence_max)
            debug_print("Book Sequence Max: %d, Source Id: %d"%(sequence_max,source_id))
        cursor.close()
        return db_books
    def update_device_books(self, connection, booklist, source_id, plugboard,
            dbpath):
        opts = self.settings()
        upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
        refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
        use_sony_authors = opts.extra_customization[self.OPT_USE_SONY_AUTHORS]
        db_books = self.read_device_books(connection, source_id, dbpath)
        cursor = connection.cursor()
        for book in booklist:
            # Run through plugboard if needed
@ -365,10 +480,10 @@ class PRST1(USBMS):
                        modified_date, lpath,
                        os.path.basename(lpath), book.size, book.mime)
                cursor.execute(query, t)
-                book.bookId = cursor.lastrowid
+                book.bookId = self.get_lastrowid(cursor)
                if upload_covers:
                    self.upload_book_cover(connection, book, source_id)
-                debug_print('Inserted New Book: ' + book.title)
+                debug_print('Inserted New Book: (%u) '%book.bookId + book.title)
            else:
                query = '''
                UPDATE books
@ -400,26 +515,111 @@ class PRST1(USBMS):
        connection.commit()
        cursor.close()
-    def update_device_collections(self, connection, booklist, collections,
+    def read_device_collections(self, connection, source_id, dbpath):
-            source_id):
+        from sqlite3 import DatabaseError
-        cursor = connection.cursor()
+
        sequence_min = self.get_database_min_id(source_id)
        sequence_max = sequence_min
        sequence_dirty = 0
        debug_print("Collection Sequence Min: %d, Source Id: %d"%(sequence_min,source_id))
        try:
            cursor = connection.cursor()
        if collections:
            # Get existing collections
            query = 'SELECT _id, title FROM collection'
            cursor.execute(query)
        except DatabaseError:
            import traceback
            tb = traceback.format_exc()
            raise DeviceError((('The SONY database is corrupted. '
                    ' Delete the file %s on your reader and then disconnect '
                    ' reconnect it. If you are using an SD card, you '
                    ' should delete the file on the card as well. Note that '
                    ' deleting this file will cause your reader to forget '
                    ' any notes/highlights, etc.')%dbpath)+' Underlying error:'
                    '\n'+tb)
-            db_collections = {}
+        db_collections = {}
-            for i, row in enumerate(cursor):
+        for i, row in enumerate(cursor):
-                db_collections[row[1]] = row[0]
+            db_collections[row[1]] = row[0]
            if row[0] < sequence_min:
                sequence_dirty = 1
            else:
                sequence_max = max(sequence_max, row[0])
        # If the database is 'dirty', then we should fix up the Ids and the sequence number
        if sequence_dirty == 1:
            debug_print("Collection Sequence Dirty for Source Id: %d"%source_id)
            sequence_max = sequence_max + 1
            for collection, collectionId in db_collections.items():
                if collectionId < sequence_min:
                    # Record the new Id and write it to the DB
                    db_collections[collection] = sequence_max
                    sequence_max = sequence_max + 1
                    # Fix the collection DB
                    query = 'UPDATE collection SET _id = ? WHERE title = ?'
                    t = (db_collections[collection], collection, )
                    cursor.execute(query, t)
                    # Fix any references in existing collections
                    query = 'UPDATE collections SET collection_id = ? WHERE collection_id = ?'
                    t = (db_collections[collection], collectionId,)
                    cursor.execute(query, t)
            self.set_database_sequence_id(connection, 'collection', sequence_max)
            debug_print("Collection Sequence Max: %d, Source Id: %d"%(sequence_max,source_id))
        # Fix up the collections table now...
        sequence_dirty = 0
        sequence_max = sequence_min
        debug_print("Collections Sequence Min: %d, Source Id: %d"%(sequence_min,source_id))
        query = 'SELECT _id FROM collections'
        cursor.execute(query)
        db_collection_pairs = []
        for i, row in enumerate(cursor):
            db_collection_pairs.append(row[0])
            if row[0] < sequence_min:
                sequence_dirty = 1
            else:
                sequence_max = max(sequence_max, row[0])
        if sequence_dirty == 1:
            debug_print("Collections Sequence Dirty for Source Id: %d"%source_id)
            sequence_max = sequence_max + 1
            for pairId in db_collection_pairs:
                if pairId < sequence_min:
                    # Record the new Id and write it to the DB
                    query = 'UPDATE collections SET _id = ? WHERE _id = ?'
                    t = (sequence_max, pairId,)
                    cursor.execute(query, t)
                    sequence_max = sequence_max + 1
            self.set_database_sequence_id(connection, 'collections', sequence_max)
            debug_print("Collections Sequence Max: %d, Source Id: %d"%(sequence_max,source_id))
        cursor.close()
        return db_collections
    def update_device_collections(self, connection, booklist, collections,
            source_id, dbpath):
        if collections:
            db_collections = self.read_device_collections(connection, source_id, dbpath)
            cursor = connection.cursor()
            for collection, books in collections.items():
                if collection not in db_collections:
                    query = 'INSERT INTO collection (title, source_id) VALUES (?,?)'
                    t = (collection, source_id)
                    cursor.execute(query, t)
-                    db_collections[collection] = cursor.lastrowid
+                    db_collections[collection] = self.get_lastrowid(cursor)
-                    debug_print('Inserted New Collection: ' + collection)
+                    debug_print('Inserted New Collection: (%u) '%db_collections[collection] + collection)
                # Get existing books in collection
                query = '''
@ -483,9 +683,8 @@ class PRST1(USBMS):
                    cursor.execute(query, t)
                    debug_print('Deleted Collection: ' + collection)
-
+            connection.commit()
-        connection.commit()
+            cursor.close()
        cursor.close()
    def rebuild_collections(self, booklist, oncard):
        debug_print('PRST1: starting rebuild_collections')
--- a/src/calibre/devices/usbms/deviceconfig.py
+++ b/src/calibre/devices/usbms/deviceconfig.py
@ -30,6 +30,8 @@ class DeviceConfig(object):
    SUPPORTS_SUB_DIRS = False
    SUPPORTS_SUB_DIRS_FOR_SCAN = False # This setting is used when scanning for
                                       # books when SUPPORTS_SUB_DIRS is False
    SUPPORTS_SUB_DIRS_DEFAULT = True
    MUST_READ_METADATA = False
    SUPPORTS_USE_AUTHOR_SORT = False
@ -57,7 +59,7 @@ class DeviceConfig(object):
        c = Config('device_drivers_%s' % name, _('settings for device drivers'))
        c.add_opt('format_map', default=cls.FORMATS,
                help=_('Ordered list of formats the device will accept'))
-        c.add_opt('use_subdirs', default=True,
+        c.add_opt('use_subdirs', default=cls.SUPPORTS_SUB_DIRS_DEFAULT,
                help=_('Place files in sub directories if the device supports them'))
        c.add_opt('read_metadata', default=True,
                help=_('Read metadata from files on device'))
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -382,7 +382,8 @@ class USBMS(CLI, Device):
            os.makedirs(self.normalize_path(self._main_prefix))
        def write_prefix(prefix, listid):
-            if prefix is not None and isinstance(booklists[listid], self.booklist_class):
+            if (prefix is not None and len(booklists) > listid and
                    isinstance(booklists[listid], self.booklist_class)):
                if not os.path.exists(prefix):
                    os.makedirs(self.normalize_path(prefix))
                with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:
--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -8,6 +8,8 @@ from itertools import cycle
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 ADOBE_OBFUSCATION =  'http://ns.adobe.com/pdf/enc#RC'
 class EPUBInput(InputFormatPlugin):
    name        = 'EPUB Input'
@ -18,18 +20,24 @@ class EPUBInput(InputFormatPlugin):
    recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
-    def decrypt_font(self, key, path):
+    def decrypt_font(self, key, path, algorithm):
-        raw = open(path, 'rb').read()
+        is_adobe = algorithm == ADOBE_OBFUSCATION
-        crypt = raw[:1024]
+        crypt_len = 1024 if is_adobe else 1040
-        key = cycle(iter(key))
+        with open(path, 'rb') as f:
-        decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
+            raw = f.read()
        crypt = bytearray(raw[:crypt_len])
        key = cycle(iter(bytearray(key)))
        decrypt = bytes(bytearray(x^key.next() for x in crypt))
        with open(path, 'wb') as f:
            f.write(decrypt)
-            f.write(raw[1024:])
+            f.write(raw[crypt_len:])
    def process_encryption(self, encfile, opf, log):
        from lxml import etree
-        import uuid
+        import uuid, hashlib
        idpf_key = opf.unique_identifier
        if idpf_key:
            idpf_key = hashlib.sha1(idpf_key).digest()
        key = None
        for item in opf.identifier_iter():
            scheme = None
@ -39,8 +47,8 @@ class EPUBInput(InputFormatPlugin):
            if (scheme and scheme.lower() == 'uuid') or \
                    (item.text and item.text.startswith('urn:uuid:')):
                try:
-                    key = str(item.text).rpartition(':')[-1]
+                    key = bytes(item.text).rpartition(':')[-1]
-                    key = list(map(ord, uuid.UUID(key).bytes))
+                    key = uuid.UUID(key).bytes
                except:
                    import traceback
                    traceback.print_exc()
@ -50,14 +58,16 @@ class EPUBInput(InputFormatPlugin):
            root = etree.parse(encfile)
            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
                algorithm = em.get('Algorithm', '')
-                if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
+                if algorithm not in {ADOBE_OBFUSCATION,
                        'http://www.idpf.org/2008/embedding'}:
                    return False
                cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
                uri = cr.get('URI')
                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
-                if key is not None and os.path.exists(path):
+                tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
                if (tkey and os.path.exists(path)):
                    self._encrypted_font_uris.append(uri)
-                    self.decrypt_font(key, path)
+                    self.decrypt_font(tkey, path, algorithm)
            return True
        except:
            import traceback
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
@ -223,6 +223,8 @@ class MOBIOutput(OutputFormatPlugin):
        else:
            # Add rasterized SVG images
            resources.add_extra_images()
        if hasattr(self.oeb, 'inserted_metadata_jacket'):
            self.workaround_fire_bugs(self.oeb.inserted_metadata_jacket)
        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
        mobimlizer(oeb, opts)
        write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
@ -236,6 +238,18 @@ class MOBIOutput(OutputFormatPlugin):
        from calibre.ebooks.mobi.writer8.cleanup import CSSCleanup
        CSSCleanup(log, opts)(item, stylizer)
    def workaround_fire_bugs(self, jacket):
        # The idiotic Fire crashes when trying to render the table used to
        # layout the jacket
        from calibre.ebooks.oeb.base import XHTML
        for table in jacket.data.xpath('//*[local-name()="table"]'):
            table.tag = XHTML('div')
            for tr in table.xpath('descendant::*[local-name()="tr"]'):
                cols = tr.xpath('descendant::*[local-name()="td"]')
                tr.tag = XHTML('div')
                for td in cols:
                    td.tag = XHTML('span' if cols else 'div')
 class AZW3Output(OutputFormatPlugin):
    name = 'AZW3 Output'
--- a/src/calibre/ebooks/metadata/docx.py
+++ b/src/calibre/ebooks/metadata/docx.py
@ -0,0 +1,89 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from lxml import etree
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.zipfile import ZipFile
 from calibre.utils.magick.draw import identify_data
 from calibre.ebooks.oeb.base import DC11_NS
 from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
 NSMAP = {'dc':DC11_NS,
 'cp':'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'}
 def XPath(expr):
    return etree.XPath(expr, namespaces=NSMAP)
 def _read_doc_props(raw, mi):
    from calibre.ebooks.metadata import string_to_authors
    root = etree.fromstring(raw, parser=RECOVER_PARSER)
    titles = XPath('//dc:title')(root)
    if titles:
        title = titles[0].text
        if title and title.strip():
            mi.title = title.strip()
    tags = []
    for subject in XPath('//dc:subject')(root):
        if subject.text and subject.text.strip():
            tags.append(subject.text.strip().replace(',', '_'))
    for keywords in XPath('//cp:keywords')(root):
        if keywords.text and keywords.text.strip():
            for x in keywords.text.split():
                tags.extend(y.strip() for y in x.split(','))
    if tags:
        mi.tags = tags
    authors = XPath('//dc:creator')(root)
    aut = []
    for author in authors:
        if author.text and author.text.strip():
            aut.extend(string_to_authors(author.text))
    if aut:
        mi.authors = aut
    desc = XPath('//dc:description')(root)
    if desc:
        raw = etree.tostring(desc[0], method='text', encoding=unicode)
        mi.comments = raw
 def _read_app_props(raw, mi):
    root = etree.fromstring(raw, parser=RECOVER_PARSER)
    company = root.xpath('//*[local-name()="Company"]')
    if company and company[0].text and company[0].text.strip():
        mi.publisher = company[0].text.strip()
 def get_metadata(stream):
    with ZipFile(stream, 'r') as zf:
        mi = Metadata(_('Unknown'))
        cdata = None
        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
            if zi.filename.lower() == 'docprops/core.xml':
                _read_doc_props(zf.read(zi), mi)
            elif zi.filename.lower() == 'docprops/app.xml':
                _read_app_props(zf.read(zi), mi)
            elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    continue
                if 0.8 <= height/width <= 1.8 and height*width >= 12000:
                    cdata = (fmt, raw)
        if cdata is not None:
            mi.cover_data = cdata
    return mi
 if __name__ == '__main__':
    import sys
    with open(sys.argv[-1], 'rb') as stream:
        print (get_metadata(stream))
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -991,6 +991,21 @@ class OPF(object): # {{{
        for item in self.identifier_path(self.metadata):
            yield item
    @property
    def unique_identifier(self):
        uuid_elem = None
        for attr in self.root.attrib:
            if attr.endswith('unique-identifier'):
                uuid_elem = self.root.attrib[attr]
                break
        if uuid_elem:
            matches = self.root.xpath('//*[@id=%r]'%uuid_elem)
            if matches:
                for m in matches:
                    raw = m.text
                    if raw:
                        return raw.rpartition(':')[-1]
    def guess_cover(self):
        '''
        Try to guess a cover. Needed for some old/badly formed OPF files.
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -338,8 +338,15 @@ class OEBReader(object):
            href = elem.get('href')
            path = urlnormalize(urldefrag(href)[0])
            if path not in manifest.hrefs:
-                self.logger.warn(u'Guide reference %r not found' % href)
+                corrected_href = None
-                continue
+                for href in manifest.hrefs:
                    if href.lower() == path.lower():
                        corrected_href = href
                        break
                if corrected_href is None:
                    self.logger.warn(u'Guide reference %r not found' % href)
                    continue
                href = corrected_href
            guide.add(elem.get('type'), elem.get('title'), href)
    def _find_ncx(self, opf):
--- a/src/calibre/ebooks/oeb/transforms/guide.py
+++ b/src/calibre/ebooks/oeb/transforms/guide.py
@ -15,10 +15,10 @@ class Clean(object):
        if 'cover' not in self.oeb.guide:
            covers = []
-            for x in ('other.ms-coverimage-standard',
+            for x in ('other.ms-coverimage-standard', 'coverimagestandard',
                    'other.ms-titleimage-standard', 'other.ms-titleimage',
                    'other.ms-coverimage', 'other.ms-thumbimage-standard',
-                    'other.ms-thumbimage'):
+                    'other.ms-thumbimage', 'thumbimagestandard'):
                if x in self.oeb.guide:
                    href = self.oeb.guide[x].href
                    item = self.oeb.manifest.hrefs[href]
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@ -72,6 +72,7 @@ class Jacket(object):
        item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
        self.oeb.spine.insert(0, item, True)
        self.oeb.inserted_metadata_jacket = item
    def remove_existing_jacket(self):
        for x in self.oeb.spine[:4]:
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@ -46,6 +46,7 @@ class SVGRasterizer(object):
    def __call__(self, oeb, context):
        oeb.logger.info('Rasterizing SVG images...')
        self.stylizer_cache = {}
        self.oeb = oeb
        self.opts = context
        self.profile = context.dest
@ -116,29 +117,35 @@ class SVGRasterizer(object):
            elem.attrib[XLINK('href')] = data
        return svg
    def stylizer(self, item):
        ans = self.stylizer_cache.get(item, None)
        if ans is None:
            ans = Stylizer(item.data, item.href, self.oeb, self.opts,
                    self.profile)
            self.stylizer_cache[item] = ans
        return ans
    def rasterize_spine(self):
        for item in self.oeb.spine:
-            html = item.data
+            self.rasterize_item(item)
            stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
            self.rasterize_item(item, stylizer)
-    def rasterize_item(self, item, stylizer):
+    def rasterize_item(self, item):
        html = item.data
        hrefs = self.oeb.manifest.hrefs
        for elem in xpath(html, '//h:img[@src]'):
            src = urlnormalize(elem.attrib['src'])
            image = hrefs.get(item.abshref(src), None)
            if image and image.media_type == SVG_MIME:
-                style = stylizer.style(elem)
+                style = self.stylizer(item).style(elem)
                self.rasterize_external(elem, style, item, image)
        for elem in xpath(html, '//h:object[@type="%s" and @data]' % SVG_MIME):
            data = urlnormalize(elem.attrib['data'])
            image = hrefs.get(item.abshref(data), None)
            if image and image.media_type == SVG_MIME:
-                style = stylizer.style(elem)
+                style = self.stylizer(item).style(elem)
                self.rasterize_external(elem, style, item, image)
        for elem in xpath(html, '//svg:svg'):
-            style = stylizer.style(elem)
+            style = self.stylizer(item).style(elem)
            self.rasterize_inline(elem, style, item)
    def rasterize_inline(self, elem, style, item):
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -13,7 +13,7 @@ from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt,
 ORG_NAME = 'KovidsBrain'
 APP_UID  = 'libprs500'
 from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx,
-        config_dir)
+        config_dir, filesystem_encoding)
 from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.date import UNDEFINED_DATE
@ -742,6 +742,35 @@ class Application(QApplication):
            'pyd' if iswindows else 'so'))
        pi.load_style(path, 'Calibre')
        self.setPalette(orig_pal)
        style = self.style()
        icon_map = {}
        pcache = {}
        for k, v in {
                'DialogYesButton': u'ok.png',
                'DialogNoButton': u'window-close.png',
                'DialogCloseButton': u'window-close.png',
                'DialogOkButton': u'ok.png',
                'DialogCancelButton': u'window-close.png',
                'DialogHelpButton': u'help.png',
                'DialogOpenButton': u'document_open.png',
                'DialogSaveButton': u'save.png',
                'DialogApplyButton': u'ok.png',
                'DialogDiscardButton': u'trash.png',
                'MessageBoxInformation': u'dialog_information.png',
                'MessageBoxWarning': u'dialog_warning.png',
                'MessageBoxCritical': u'dialog_error.png',
                'MessageBoxQuestion': u'dialog_question.png',
                }.iteritems():
            if v not in pcache:
                p = I(v)
                if isinstance(p, bytes):
                    p = p.decode(filesystem_encoding)
                # if not os.path.exists(p): raise ValueError(p)
                pcache[v] = p
            v = pcache[v]
            icon_map[type('')(getattr(style, 'SP_'+k))] = v
        style.setProperty(u'calibre_icon_map', icon_map)
        self.__icon_map_memory_ = icon_map
    def setup_styles(self, force_calibre_style):
        self.original_font = QFont(QApplication.font())
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -100,19 +100,6 @@ class MetadataSingleDialogBase(ResizableDialog):
        geom = gprefs.get('metasingle_window_geometry3', None)
        if geom is not None:
            self.restoreGeometry(bytes(geom))
        self.title.resizeEvent = self.fix_push_buttons
    def fix_push_buttons(self, *args):
        # Ensure all PushButtons stay the same consistent height throughout this
        # dialog. Without this, the buttons inside scrollareas get shrunk,
        # while the buttons outside them do not, leading to weirdness.
        # Further, buttons with and without icons have different minimum sizes
        # so things look even more out of whack.
        ht = self.title.height() + 2
        for but in self.findChildren(QPushButton):
            but.setMaximumHeight(ht)
            but.setMinimumHeight(ht)
        return TitleEdit.resizeEvent(self.title, *args)
    # }}}
    def create_basic_metadata_widgets(self): # {{{
@ -525,6 +512,8 @@ class MetadataSingleDialogBase(ResizableDialog):
                    ' [Alt+Left]')%prev
            self.prev_button.setToolTip(tip)
        self.prev_button.setEnabled(prev is not None)
        self.button_box.button(self.button_box.Ok).setDefault(True)
        self.button_box.button(self.button_box.Ok).setFocus(Qt.OtherFocusReason)
        self(self.db.id(self.row_list[self.current_row]))
    def break_cycles(self):
@ -993,7 +982,7 @@ def edit_metadata(db, row_list, current_row, parent=None, view_slot=None,
    return d.changed, d.rows_to_refresh
 if __name__ == '__main__':
-    from PyQt4.Qt import QApplication
+    from calibre.gui2 import Application as QApplication
    app = QApplication([])
    from calibre.library import db as db_
    db = db_()
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from PyQt4.Qt import (QApplication, QFont, QFontInfo, QFontDialog,
-        QAbstractListModel, Qt, QIcon, QKeySequence, QStyleFactory)
+        QAbstractListModel, Qt, QIcon, QKeySequence)
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
 from calibre.gui2.preferences.look_feel_ui import Ui_Form
@ -104,11 +104,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        r('widget_style', gprefs, restart_required=True, choices=
                [(_('System default'), 'system'), (_('Calibre style'),
                    'calibre')])
        styles = set(map(unicode, QStyleFactory.keys()))
        if 'QtCurve' not in styles:
            # Can happen in linux
            for x in ('opt', 'label'):
                getattr(self, x+'_widget_style').setVisible(False)
        r('cover_flow_queue_length', config, restart_required=True)
--- a/src/calibre/gui2/preferences/main.py
+++ b/src/calibre/gui2/preferences/main.py
@ -206,12 +206,12 @@ class Preferences(QMainWindow):
        self.cw.layout().addWidget(self.stack)
        self.bb = QDialogButtonBox(QDialogButtonBox.Close)
        self.wizard_button = self.bb.addButton(_('Run welcome wizard'),
-                self.bb.DestructiveRole)
+                self.bb.ActionRole)
        self.wizard_button.setIcon(QIcon(I('wizard.png')))
        self.wizard_button.clicked.connect(self.run_wizard,
                type=Qt.QueuedConnection)
        self.bb.button(self.bb.Close).setDefault(True)
        self.cw.layout().addWidget(self.bb)
        self.bb.button(self.bb.Close).setDefault(True)
        self.bb.rejected.connect(self.close, type=Qt.QueuedConnection)
        self.setCentralWidget(self.cw)
        self.browser = Browser(self)
@ -381,8 +381,8 @@ class Preferences(QMainWindow):
        return QMainWindow.closeEvent(self, *args)
 if __name__ == '__main__':
-    from PyQt4.Qt import QApplication
+    from calibre.gui2 import Application
-    app = QApplication([])
+    app = Application([])
    app
    gui = init_gui()
--- a/src/calibre/gui2/proceed.py
+++ b/src/calibre/gui2/proceed.py
@ -42,7 +42,7 @@ class ProceedQuestion(QDialog):
        ic.setMaximumHeight(100)
        ic.setScaledContents(True)
        ic.setStyleSheet('QLabel { margin-right: 10px }')
-        self.bb = QDialogButtonBox(QDialogButtonBox.Yes|QDialogButtonBox.No)
+        self.bb = QDialogButtonBox()
        self.bb.accepted.connect(self.accept)
        self.bb.rejected.connect(self.reject)
        self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
@ -59,6 +59,7 @@ class ProceedQuestion(QDialog):
                _('Show detailed information about this error'))
        self.det_msg = QPlainTextEdit(self)
        self.det_msg.setReadOnly(True)
        self.bb.setStandardButtons(self.bb.Yes|self.bb.No)
        self.bb.button(self.bb.Yes).setDefault(True)
        l.addWidget(ic, 0, 0, 1, 1)
@ -121,10 +122,10 @@ class ProceedQuestion(QDialog):
            self.det_msg.setVisible(False)
            self.det_msg_toggle.setVisible(bool(question.det_msg))
            self.det_msg_toggle.setText(self.show_det_msg)
            self.bb.button(self.bb.Yes).setDefault(True)
            self.do_resize()
            self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason)
            self.show()
            self.bb.button(self.bb.Yes).setDefault(True)
            self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason)
    def __call__(self, callback, payload, html_log, log_viewer_title, title,
            msg, det_msg='', show_copy_button=False, cancel_callback=None,
@ -164,7 +165,14 @@ class ProceedQuestion(QDialog):
            self.log_viewer = ViewLog(q.log_viewer_title, log,
                        parent=self)
-if __name__ == '__main__':
+def main():
-    app = QApplication([])
+    from calibre.gui2 import Application
-    ProceedQuestion(None).exec_()
+    app = Application([])
    p = ProceedQuestion(None)
    p(lambda p:None, None, 'ass', 'ass', 'testing', 'testing')
    p.exec_()
    app
 if __name__ == '__main__':
    main()
--- a/src/calibre/gui2/tag_browser/view.py
+++ b/src/calibre/gui2/tag_browser/view.py
@ -104,7 +104,7 @@ class TagsView(QTreeView): # {{{
        self.setStyleSheet('''
                QTreeView {
                    background-color: palette(window);
-                    color: palette(text);
+                    color: palette(window-text);
                    border: none;
                }
@ -117,7 +117,7 @@ class TagsView(QTreeView): # {{{
                QTreeView::item:hover {
                    background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #e7effd, stop: 1 #cbdaf1);
                    border: 1px solid #bfcde4;
-                    border-radius: 8px;
+                    border-radius: 6px;
                }
        ''')
--- a/src/calibre/gui2/viewer/toc.py
+++ b/src/calibre/gui2/viewer/toc.py
@ -20,7 +20,7 @@ class TOCView(QTreeView):
        self.setStyleSheet('''
                QTreeView {
                    background-color: palette(window);
-                    color: palette(text);
+                    color: palette(window-text);
                    border: none;
                }
                QTreeView::item {
@ -32,7 +32,7 @@ class TOCView(QTreeView):
                QTreeView::item:hover {
                    background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #e7effd, stop: 1 #cbdaf1);
                    border: 1px solid #bfcde4;
-                    border-radius: 8px;
+                    border-radius: 6px;
                }
                QHeaderView::section {
                    background-color: qlineargradient(x1:0, y1:0, x2:0, y2:1,
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/fr_CA.po
+++ b/src/calibre/translations/fr_CA.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/is.po
+++ b/src/calibre/translations/is.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/Show More
+++ b/Show More