[Sync] Sync with trunk, revision 6907

2025-07-09 03:04:10 -04:00 · 2010-11-16 23:35:43 +08:00 · 2010-11-16 23:35:43 +08:00 · e77eca28c3
commit e77eca28c3
parent ce9f35a6cc fee80b5cf2
160 changed files with 33981 additions and 19620 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,190 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.
 - version: 0.7.28
  date: 2010-11-12
  new features:
    - title: "Update the version of the grahical toolkit (Qt 4.7.1) used in the calibre binary builds on windows and linux. This should result in a significant speed up for the calibre ebook viewer"
    - title: "Driver for Nook Color, Eken M001"
    - title: "Add a tweak to turn off double clicking to open viewer"
    - title: "Catalog generation: Add indication when a book has no formats"
      tickets: [7376]
    - title: "Advanced search dialog: Add a tab to allow searching particular metadata fields easily"
    - title: "Conversion pipeline: When using the Level x Table of Contents expressions, if a tag is empty but has a non-empty title attribute, use that instead of ignoring the tag"
  bug fixes:
    - title: "Comic metadata reader: Sort filenames aplhabetically when choosing an image for the cover"
      tickets: [7488]
    - title: "Bulk convert dialog: Hide useless restore defaults button."
      tickets: [7471]
    - title: "Conversion pipeline: Handle input documents that encode null bytes as HTML entities correctly"
      tickets: [7355]
    - title: "Fix some SONY readers not being detected on windows"
      tickets: [7413]
    - title: "MOBI Input: Fix images missing when converting MOBI news downloads created with Mobipocket reader"
      tickets: [7455]
    - title: "ODT Input: Handle hyperlinks to headings that have truncated destination specifiers correctly"
      tickets: [7506]
    - title: "Sony driver: Ignore invalid strings when updating XML database"
    - title: "Content Server: Add day to displayed date in /mobile book listing"
    - title: "MOBI Input: Do not generate filenames with only extensions if the MOBI file has no internal name"
      tickets: [7481]
    - title: "MOBI Input: Handle files that has the record sizes set incorrectly to a long integer"
      tickets: [7472]
    - title: "Fix not enough vertical space for text in the preferences dialog category listing"
    - title: "Remove 'sort' from Search and replace destination fields and add it to source fields. S&R is no longer marked experimental"
    - title: "Edit metadata dialog: Save dialog geometry on reject as well as on accept"
    - title: "E-book viewer: Fix clicking entries in TOC that point to the currently loaded flow not scrolling view to the top of the document"
    - title: "Fix bug in regex used to extract charset from <meta> tags"
    - title: "MOBI Output: Add support for the <q> tag"
  improved recipes:
    - Zeit Online
    - Gamespot Review
    - Ploitika
    - Pagina12
    - Irish Times
    - elektrolese
  new recipes:
    - title: "Handelsblatt and European Voice"
      author: "malfi"
    - title: "Polityka and Newsweek"
      author: "Mateusz Kielar"
    - title: "MarcTV"
      author: "Marc Toensings"
    - title: "Rolling Stone"
      author: "Darko Miletic"
    - title: "Vedomosti"
      author: "Nikolai Kotchetkov"
    - title: "Hola.com"
      author: "bmsleight"
    - title: "Dnevnik, Siol.net, MMC-RTV and Avto-magazon"
      author: "BlonG"
    - title: "SC Print Magazine"
      author: "Tony Maro"
    - title: "Diario Sport"
      author: "Jefferson Frantz"
 - version: 0.7.27
  date: 2010-11-05
  new features:
    - title: "The book list behavior has changed"
      type: major
      description: >
        "Now double clicking on an entry in the book list will open it in the viewer. To edit metadata single click a previously selected entry instead. This is consistent with
        the usage in most operating systems, so should be most intuitive for new users. Also typing any key no longer starts an edit, instead press F2 (Enter on OS X) to start editing
        the current cell. Also you now have to double click instead of single clicking the book details panel to open the detailed info dialog for the book."
    - title: "Added a new HTML output format plugin, which converts the input document to a ZIP file. The zip file contains HTML pages suitable for display in a website"
    - title: "Support for iRiver Cover Story and Digma Q600"
    - title: "Add a search button (labelled Go!) to explicitly run a search with the text currently in the quick search box"
    - title: "Add author to the calibre geenrated book jacket"
      tickets: [7402]
    - title: "Add the title of the destination book to the merge dialog warning message"
    - title: "calibre-server: Make auto reload control separate from --devlop with a new command line option --auto-reload"
  bug fixes:
    - title: "Fix book details panel not being updated after a delete-merge"
      tickets: [7426]
    - title: "Fix clicking in the search box launches a search if you have search as you type enabled"
      tickets: [7425]
    - title: "Use a browser widget to display book details for more robustness and better performance when vieweing large HTML comments"
    - title: "Fix cover browser not updated after copy to library and delete"
      tickets: [7416]
    - title: "Fix regression that broke sending non calibre EPUB files to the iPad. Also handle failure to set cover in iTunes gracefully"
      tickets: [7356]
    - title: "News download: Workaround lack of thread safety in python mechanize, causing corrupted network packets (degrading network performance) on Ubuntu Maverick 64bit kernels"
      tickets: [7321]
    - title: "Convert comments to HTML for book details panel in separate thread to make scrolling through the book list faster when large comments are present"
    - title: "calibre-server: Fix regression that broke --daemonize"
    - title: "EPUB Input: Handle ncx files that have <navpoint> elements with no content correctly."
      tickets: [7396]
    - title: "SNBOutput: Fixed a bug in handling pre tag"
    - title: "MOBI Output: Don't ignore hidden anchors."
      tickets: [7384]
    - title: "Fix switching libraries and generating a catalog could generate a catalog for the wrong library"
    - title: "MOBI Output: Fix regression that broke conversion of anchors inside superscripts/subscripts."
      tickets: [7368]
    - title: "Content server: Fix various minor bugs"
      tickets: [7379, 6768, 7354]
    - title: "Amazon metadata download plugin: Make it more robust and add option to auto convert HTML to text"
    - title: "Re-arrange send to device menu to make it harder to accidentally trigger the send and delete actions"
  improved recipes:
    - Danas
    - Fudzilla
    - Zeit Online
    - New York Times
    - Mediapart
  new recipes:
    - title: "Ynet and Calcalist"
      author: "marbs"
    - title: "El Faro de Vigo"
      author: "Jefferson Frantz"
    - title: "Clic_RBS"
      author: "avoredo"
    - title: "Correio da Manha"
      author: "jmst"
    - title: "Rue89"
      author: "Louis Gesbert"
 - version: 0.7.26
  date: 2010-10-30
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -81,6 +81,14 @@ p.unread_book {
 	text-indent:-2em;
 	}
 p.wishlist_item {
 	text-align:left;
 	margin-top:0px;
 	margin-bottom:0px;
 	margin-left:2em;
 	text-indent:-2em;
 	}
 p.date_read {
 	text-align:left;
 	margin-top:0px;
@ -104,3 +112,14 @@ hr.annotations_divider {
 	margin-top:0em;
 	margin-bottom:0em;
 	}
 td.publisher, td.date {
 	font-weight:bold;
 	text-align:center;
 	}
 td.rating {
 	text-align: center;
 	}
 td.thumbnail img {
 	-webkit-box-shadow: 6px 6px 6px #888;
 	}
--- a/resources/content_server/browse/browse.html
+++ b/resources/content_server/browse/browse.html
@ -4,7 +4,7 @@
 <html>
 <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-    <title>..:: calibre library ::.. {title}</title>
+    <title>..:: calibre {library} ::.. {title}</title>
    <meta http-equiv="X-UA-Compatible" content="IE=100" />
    <link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
@ -41,7 +41,7 @@
            <div class="area">
              <div class="bubble">
                  <p><a href="{prefix}/browse" title="Return to top level"
-                      >&rarr;&nbsp;home&nbsp;&larr;</a></p>
+                      >&rarr;&nbsp;{home}&nbsp;&larr;</a></p>
              </div>
            </div>
            <div id="nav-container">&nbsp;
@ -80,7 +80,7 @@
                <form name="search_form"  action="{prefix}/browse/search" method="get" accept-charset="UTF-8">
                    <input value="{initial_search}" type="text" title="Search" name="query"
                                        class="search_input" />&nbsp;
-                    <input type="submit" value="Search" title="Search" alt="Search" />
+                    <input type="submit" value="{Search}" title="{Search}" alt="{Search}" />
                </form>
            </div>
            <div>&nbsp;</div>
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -211,3 +211,9 @@ generate_cover_title_font = None
 # Absolute path to a TTF font file to use as the font for the footer in the
 # default cover
 generate_cover_foot_font = None
 # Behavior of doubleclick on the books list. Choices:
 # open_viewer, do_nothing, edit_cell. Default: open_viewer.
 # Example: doubleclick_on_library_view = 'do_nothing'
 doubleclick_on_library_view = 'open_viewer'
--- a/resources/images/format-text-bold.png
+++ b/resources/images/format-text-bold.png
--- a/resources/images/format-text-italic.png
+++ b/resources/images/format-text-italic.png
--- a/resources/images/format-text-strikethrough.png
+++ b/resources/images/format-text-strikethrough.png
--- a/resources/images/format-text-underline.png
+++ b/resources/images/format-text-underline.png
--- a/resources/images/hotmail.png
+++ b/resources/images/hotmail.png
--- a/resources/images/news/avto-magazin.png
+++ b/resources/images/news/avto-magazin.png
--- a/resources/images/news/dnevnik.png
+++ b/resources/images/news/dnevnik.png
--- a/resources/images/news/rollingstone.png
+++ b/resources/images/news/rollingstone.png
--- a/resources/images/news/siol.png
+++ b/resources/images/news/siol.png
--- a/resources/jacket/stylesheet.css
+++ b/resources/jacket/stylesheet.css
@ -39,7 +39,16 @@
 .cbj_title {
 	font-size: x-large;
 	text-align: center;
-	}
+}
 /*
 **	Author
 */
 .cbj_author {
 	font-size: medium;
 	text-align: center;
    margin-bottom: 1ex;
 }
 /*
 **	Table containing Series, Publication Year, Rating and Tags
--- a/resources/jacket/template.xhtml
+++ b/resources/jacket/template.xhtml
@ -7,6 +7,7 @@
    <body>
        <div class="cbj_banner">
            <div class="cbj_title">{title}</div>
            <div class="cbj_author">{author}</div>
            <table class="cbj_header">
                <tr class="cbj_series">
                    <td class="cbj_label">{series_label}:</td>
--- a/resources/recipes/avto-magazin.recipe
+++ b/resources/recipes/avto-magazin.recipe
@ -0,0 +1,46 @@
 __license__ = 'GPL v3'
 __copyright__ = '2010, BlonG'
 '''
 avto-magazin.si
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Dnevnik(BasicNewsRecipe):
  title = u'Avto Magazin'
  __author__ = u'BlonG'
  description = u'Za avtomobilisti\xc4\x8dne frike, poznavalce in nedeljske \xc5\xa1oferje.'
  oldest_article = 7
  max_articles_per_feed = 20
  labguage = 'sl'
  no_stylesheets = True
  use_embedded_content = False
  conversion_options = {'linearize_tables' : True}
  cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
  extra_css = '''
 	h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
 	h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
 	p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
 	body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 	'''
  keep_only_tags = [
 	dict(name='div', attrs={'id':'_iprom_inStream'}),
 #	dict(name='div', attrs={'class':'entry-content'}),
 	]
  remove_tags = [
 	dict(name='div', attrs={'id':'voteConfirmation'}),
 	dict(name='div', attrs={'id':'InsideVote'}),
 	dict(name='div', attrs={'class':'Zone234'}),
 	dict(name='div', attrs={'class':'Comments'}),
 	dict(name='div', attrs={'class':'sorodneNovice'}),
 	dict(name='div', attrs={'id':'footer'}),
 	]
  feeds = [
 	(u'Novice', u'http://www.avto-magazin.si/rss/')
 	]
--- a/resources/recipes/calcalist.recipe
+++ b/resources/recipes/calcalist.recipe
@ -0,0 +1,43 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import re
 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    description   = 'This is a recipe of Calcalist.co.il. The recipe downloads the article page to not hurt the sites advertising income.'
    cover_url      = 'http://ftp5.bizportal.co.il/web/giflib/news/calcalist.JPG'
    title          = u'Calcalist'
    language              = 'he'
    __author__ = 'marbs'
    extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
    simultaneous_downloads = 5
    remove_javascript     = True
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1
    max_articles_per_feed = 100
    remove_attributes = ['width']
    simultaneous_downloads = 5
    keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
    remove_tags = [dict(name='p', attrs={'text':['&nbsp;']})]
    max_articles_per_feed = 100
    preprocess_regexps = [
        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: '')
        ]
    feeds          = [(u'\u05d3\u05e3 \u05d4\u05d1\u05d9\u05ea', u'http://www.calcalist.co.il/integration/StoryRss8.xml'),
                           (u'24/7', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
                           (u'\u05d1\u05d0\u05d6\u05d6', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
                           (u'\u05de\u05d1\u05d6\u05e7\u05d9\u05dd', u'http://www.calcalist.co.il/integration/StoryRss184.xml'),
                           (u'\u05d4\u05e9\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss2.xml'),
                           (u'\u05d1\u05d0\u05e8\u05e5', u'http://www.calcalist.co.il/integration/StoryRss14.xml'),
                           (u'\u05d4\u05db\u05e1\u05e3', u'http://www.calcalist.co.il/integration/StoryRss9.xml'),
                           (u'\u05e0\u05d3\u05dc"\u05df', u'http://www.calcalist.co.il/integration/StoryRss7.xml'),
                           (u'\u05e2\u05d5\u05dc\u05dd', u'http://www.calcalist.co.il/integration/StoryRss13.xml'),
                           (u'\u05e4\u05e8\u05e1\u05d5\u05dd \u05d5\u05e9\u05d9\u05d5\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss5.xml'),
                           (u'\u05e4\u05e0\u05d0\u05d9', u'http://www.calcalist.co.il/integration/StoryRss3.xml'),
                           (u'\u05d8\u05db\u05e0\u05d5\u05dc\u05d5\u05d2\u05d9', u'http://www.calcalist.co.il/integration/StoryRss4.xml'),
                           (u'\u05e2\u05e1\u05e7\u05d9 \u05e1\u05e4\u05d5\u05e8\u05d8', u'http://www.calcalist.co.il/integration/StoryRss18.xml')]
    def print_version(self, url):
        split1 = url.split("-")
        print_url = 'http://www.calcalist.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
        return print_url
--- a/resources/recipes/clic_rbs.recipe
+++ b/resources/recipes/clic_rbs.recipe
@ -0,0 +1,50 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class ClicRBS(BasicNewsRecipe):
    title          = u'ClicRBS'
    language = 'pt'
    __author__ = 'arvoredo'
    oldest_article = 3
    max_articles_per_feed = 9
    cover_url             = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
    remove_tags = [
                       dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']})
                        ]
    remove_tags_before = dict(name='div ', attrs={'class':'descricao'})
    remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'})
    remove_tags_before = dict(name='div', attrs={'class':'descricao'})
    remove_tags_before = dict(name='div', attrs={'class':'coluna'})
    remove_tags_after = dict(name='div', attrs={'class':'extra'})
    remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'})
    remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'})
    remove_tags_after = dict(name='ul', attrs={'class':'lista'})
    feeds = [
               (u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13')
             , (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67')
             , (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml')
             , (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1')
             , (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13')
             , (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13')
             , (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1')
             , (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1')
             , (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1')
             , (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2')
             , (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1')
             , (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13')
             , (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2')
             , (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18')
             , (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2')
             , (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
            ]
    extra_css = '''
                    cite{color:#007BB5; font-size:xx-small; font-style:italic;}
                    body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
                    h3{font-size:large; color:#082963; font-weight:bold;}
                    #ident{color:#0179B4; font-size:xx-small;}
                    p{color:#000000;font-weight:normal;}
                    .commentario p{color:#007BB5; font-style:italic;}
                '''
--- a/resources/recipes/cm_journal.recipe
+++ b/resources/recipes/cm_journal.recipe
@ -0,0 +1,44 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class CMJornal_pt(BasicNewsRecipe):
    title                 = 'Correio da Manha - Portugal'
    __author__            = 'jmst'
    description           = 'As noticias de Portugal e do Mundo'
    publisher             = 'Cofina Media'
    category              = ''
    oldest_article        = 1
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'pt'
    extra_css             = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [
                        dict(name=['h2','h1'])
                      , dict(name='div', attrs={'class': ['news']})
                     ]
    remove_tags = [
                    dict(name=['object','embed','iframe'])
                   ,dict(name='a',attrs={'href':['#']})
                  ]
    feeds = [
              (u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' )
             ,(u'Portugal'    , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010'    )
             ,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' )
             ,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' )
             ,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012'  )
             ,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092')
            ]
    def print_version(self, url):
        return url.replace('noticia.aspx', 'Imprimir.aspx')
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -25,7 +25,7 @@ class Danas(BasicNewsRecipe):
    remove_empty_feeds    = True
    extra_css             = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
-                                .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
+                                .article,.articledescription,body,.lokacija,.feed{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
                                .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif}
                                .antrfileText{border-left: 2px solid #999999; 
                                              margin-left: 0.8em; 
@ -59,11 +59,14 @@ class Danas(BasicNewsRecipe):
                          ,(re.compile(u'\u201d'), lambda match: '&rdquo;') # right double quotation mark
                          ,(re.compile(u'\u201e'), lambda match: '&ldquo;') # double low-9 quotation mark                          
                          ,(re.compile(u'\u201f'), lambda match: '&rdquo;') # double high-reversed-9 quotation mark
                          ,(re.compile(u'\u00f4'), lambda match: '&ldquo;') # latin small letter o with circumflex
                          ,(re.compile(u'\u00f6'), lambda match: '&rdquo;') # latin small letter o with dieaeresis
                          ,(re.compile(u'\u00e1'), lambda match: '&nbsp;' ) # latin small letter a with acute
                         ]
    keep_only_tags     = [dict(name='div', attrs={'id':'left'})]
    remove_tags = [
-                     dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
+                     dict(name='div', attrs={'class':['width_1_4','metaClanka','baner','listaVesti','article_nav']})
                    ,dict(name='div', attrs={'id':'comments'})
                    ,dict(name=['object','link','iframe','meta'])
                  ]
--- a/resources/recipes/deredactie.recipe
+++ b/resources/recipes/deredactie.recipe
@ -0,0 +1,61 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class deredactie(BasicNewsRecipe):
    title          = u'Deredactie.be'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.deredactie.be/polopoly_fs/1.510827!image/2710428628.gif'
    language = 'de'
    keep_only_tags = []
    __author__ = 'malfi'
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlehead'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlebody'}))
    remove_tags = []
    remove_tags.append(dict(name = 'div', attrs = {'id': 'story'}))
    remove_tags.append(dict(name = 'div', attrs = {'id': 'useractions'}))
    remove_tags.append(dict(name = 'hr'))
    extra_css = '''
     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
     '''
    def parse_index(self):
        categories = []
        catnames = {}
        soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch')
        for elem in soup.findAll('li', attrs={'id' : re.compile("^navItem[2-9]") }):
              a = elem.find('a', href=True)
              m = re.search('(?<=/)[^/]*$', a['href'])
              cat = str(m.group(0))
              categories.append(cat)
              catnames[cat] = a['title']
              self.log("found cat %s\n" % catnames[cat])
        feeds = []
        for cat in categories:
            articles = []
            soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch/'+cat)
            for a in soup.findAll('a',attrs={'href' : re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
                skip_this_article = False
                url = a['href'].strip()
                if url.startswith('/'):
                    url = 'http://www.deredactie.be' + url
                myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
                for article in articles :
                    if article['url'] == url :
                        skip_this_article = True
                        self.log("SKIPPING DUP %s" % url)
                        break
                if skip_this_article :
                        continue;
                articles.append(myarticle)
                self.log("Adding URL %s\n" %url)
            if articles:
                feeds.append((catnames[cat], articles))
        return feeds
--- a/resources/recipes/diario_sport.recipe
+++ b/resources/recipes/diario_sport.recipe
@ -0,0 +1,42 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class DiarioSport(BasicNewsRecipe):
    title          = u'Diario Sport'
    oldest_article = 2
    max_articles_per_feed = 75
    __author__  = 'Jefferson Frantz'
    description = 'Todas las noticias del Barça y del mundo del deporte en general'
    timefmt = ' [%d %b, %Y]'
    language = 'es'
    no_stylesheets = True
    feeds          = [(u'Sport', u'http://feeds.feedburner.com/sport/ultimahora')]
    extra_css              = '''
                                h2{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: justify}
                                '''
    keep_only_tags = [dict(name='div', attrs={'id':['noticiasMedio']})]
    remove_tags        = [
                             dict(name=['object','link','script','ul'])
                            ,dict(name='div', attrs={'id':['scrAdSense','herramientas2','participacion','participacion2','bloque1resultados','bloque2resultados','cont_vinyetesAnt','tinta','noticiasSuperior','cintillopublicidad2']})
                            ,dict(name='p', attrs={'class':['masinformacion','hora']})
                            ,dict(name='a', attrs={'class':["'link'"]})
                            ,dict(name='div', attrs={'class':['addthis_toolbox addthis_default_style','firma','pretitularnoticia']})
                            ,dict(name='form', attrs={'id':['formularioDeBusquedaAvanzada']})
                         ]
    def preprocess_html(self, soup):
            for item in soup.findAll(style=True):
               del item['style']
            return soup
    def postprocess_html(self, soup, first_fetch):
        img = soup.find('img',src='/img/videos/mascaravideo.png')
        if not img is None:
            img.extract()
        return soup
--- a/resources/recipes/dnevnik.recipe
+++ b/resources/recipes/dnevnik.recipe
@ -0,0 +1,63 @@
 __license__ = 'GPL v3'
 __copyright__ = '2010, BlonG'
 '''
 dnevnik.si
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Dnevnik(BasicNewsRecipe):
  title = u'Dnevnik.si'
  __author__ = u'BlonG'
  description = u'''Dnevnik je \u010dasnik z ve\u010d kot polstoletno zgodovino.
  Pod sloganom \xbb\u017divljenje ima besedo\xab na svojih straneh prina\u0161a
  bralcem bogastvo informacij, komentarjev in kolumen in raznovrstnost
  pogledov, zaznamovanih z odgovornostjo do posameznika in \u0161ir\u0161e
  dru\u017ebe.'''
  oldest_article = 3
  max_articles_per_feed = 20
  language = 'sl'
  no_stylesheets = True
  use_embedded_content = False
  cover_url = 'https://sites.google.com/site/javno2010/home/dnevnik_cover.jpg'
  extra_css = '''
 	h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
 	h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
 	p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
 	body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 	'''
  keep_only_tags = [
 	dict(name='div', attrs={'id':'_iprom_inStream'}),
 	dict(name='div', attrs={'class':'entry-content'}),
 	]
  remove_tags = [
 	dict(name='div', attrs={'class':'fb_article_top'}),
 	dict(name='div', attrs={'class':'related'}),
 	dict(name='div', attrs={'class':'fb_article_foot'}),
 	dict(name='div', attrs={'class':'spreading'}),
 	dict(name='dl', attrs={'class':'ad'}),
 	dict(name='p', attrs={'class':'report'}),
 	dict(name='div', attrs={'class':'hfeed comments'}),
 	dict(name='dl', attrs={'id':'entryPanel'}),
 	dict(name='dl', attrs={'class':'infopush ip_wide'}),
 	dict(name='div', attrs={'class':'sidebar'}),
 	dict(name='dl', attrs={'class':'bottom'}),
 	dict(name='div', attrs={'id':'footer'}),
 	]
  feeds = [
 	(u'Slovenija', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=13')
 	,(u'Svet', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=14')
 	,(u'EU', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=116')
 	,(u'Poslovni dnevnik', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=5')
 	,(u'Kronika', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=15')
 	,(u'Kultura', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=17')
 	,(u'Zdravje', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=18')
 	,(u'Znanost in IT', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=19')
 	,(u'(Ne)verjetno', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=20')
 	,(u'E-strada', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=21')
 	,(u'Svet vozil', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=22')
 	]
--- a/resources/recipes/el_faro.recipe
+++ b/resources/recipes/el_faro.recipe
@ -0,0 +1,77 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElFaroDeVigo(BasicNewsRecipe):
    title          = u'El Faro de Vigo'
    oldest_article = 1
    max_articles_per_feed = 100
    __author__  = 'Jefferson Frantz'
    description = 'Noticias de Vigo'
    timefmt = ' [%d %b, %Y]'
    language = 'es'
    encoding               = 'cp1252'
    no_stylesheets = True
    remove_javascript = True
    feeds          = [
 ##                        (u'Vigo', u'http://www.farodevigo.es/elementosInt/rss/1'),
 ##                        (u'Gran Vigo', u'http://www.farodevigo.es/elementosInt/rss/2'),
                        (u'Galicia', u'http://www.farodevigo.es/elementosInt/rss/4'),
                        (u'España', u'http://www.farodevigo.es/elementosInt/rss/6'),
                        (u'Mundo', u'http://www.farodevigo.es/elementosInt/rss/7'),
 ##                        (u'Opinión', u'http://www.farodevigo.es/elementosInt/rss/5'),
                        (u'Economía', u'http://www.farodevigo.es/elementosInt/rss/10'),
                        (u'Sociedad y Cultura', u'http://www.farodevigo.es/elementosInt/rss/8'),
                        (u'Sucesos', u'http://www.farodevigo.es/elementosInt/rss/9'),
                        (u'Deportes', u'http://www.farodevigo.es/elementosInt/rss/11'),
                        (u'Agenda', u'http://www.farodevigo.es/elementosInt/rss/21'),
                        (u'Gente', u'http://www.farodevigo.es/elementosInt/rss/24'),
                        (u'Televisión', u'http://www.farodevigo.es/elementosInt/rss/25'),
                        (u'Ciencia y Tecnología', u'http://www.farodevigo.es/elementosInt/rss/26')]
    extra_css              = '''.noticia_texto{ font-family: sans-serif; font-size: medium; text-align: justify }
                                h1{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}
                                h2{font-family: serif; font-size: medium; font-weight: bold; color: #000000; text-align: left}
                                .enlacenegrita10{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: left}
                                .noticia_titular{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}'''
    def preprocess_html(self, soup):
            for item in soup.findAll(style=True):
               del item['style']
            url = 'http://estaticos00.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
            fitem = soup.find('img',src=url)
            if fitem:
               par = fitem.parent
               par.extract()
            url = 'http://estaticos01.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
            fitem = soup.find('img',src=url)
            if fitem:
               par = fitem.parent
               par.extract()
            url = 'http://estaticos02.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
            fitem = soup.find('img',src=url)
            if fitem:
               par = fitem.parent
               par.extract()
            return self.adeify_images(soup)
    def postprocess_html(self, soup, first_fetch):
        divs = soup.findAll(True, {'class':'enlacenegrita10'})
        for div in divs:
            div['align'] = 'left'
        return soup
    keep_only_tags = [dict(name='div', attrs={'class':['noticias']})]
    remove_tags        = [
                             dict(name=['object','link','script','ul','iframe','ol'])
                            ,dict(name='div', attrs={'class':['noticiadd2', 'cintillo2', 'noticiadd', 'noticiadd2']})
                            ,dict(name='div', attrs={'class':['imagen_derecha', 'noticiadd3', 'extraHTML']})
                         ]
--- a/resources/recipes/elektrolese.recipe
+++ b/resources/recipes/elektrolese.recipe
@ -1,38 +0,0 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Fetch elektrolese.
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class elektrolese(BasicNewsRecipe):
    title = u'elektrolese'
    description = 'News about electronic publishing'
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
    timefmt = ' [%a %d %b %Y]'
    language = 'de'
    oldest_article = 14
    max_articles_per_feed = 50
    no_stylesheets = True
    conversion_options = {'linearize_tables':True}
    encoding = 'utf-8'
    remove_tags_after = [dict(id='comments')]
    filter_regexps = [r'ad\.doubleclick\.net']
    remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
                   dict(id='comments'),
                   dict(id='Navbar1')]
    feeds =  [ (u'elektrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
--- a/resources/recipes/eu_commission.recipe
+++ b/resources/recipes/eu_commission.recipe
@ -0,0 +1,58 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 LANGUAGE = 'de'
 def feedlink(num):
    return    u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id='+\
            str(num)+'&lang='+ LANGUAGE
 class EUCommissionPress(BasicNewsRecipe):
    title          = u'Pressemitteilungen der EU Kommission pro Politikbereich'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
    __author__ = 'malfi'
    language = LANGUAGE
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'pressReleaseContentMain'}))
    remove_tags = []
    feeds          = [
                      (u'Pressemitteilung des Tages',feedlink(64)),
                      (u'Presidency',feedlink(137)),
                      (u'Foreign affairs and security policy',feedlink(138)),
                      (u'Agriculture and rural development',feedlink(139)),
                      (u'Budget and financial programming ',feedlink(140)),
                      (u'Climate action',feedlink(141)),
                      (u'Competition',feedlink(142)),
                      (u'Development',feedlink(143)),
                      (u'Digital agenda',feedlink(144)),
                      (u'Economic and monetary affairs',feedlink(145)),
                      (u'Education, culture, multilingualism and youth ',feedlink(146)),
                      (u'Employment, social Affairs and inclusion ',feedlink(147)),
                      (u'Energy',feedlink(148)),
                      (u'Enlargment and European neighbourhood policy ',feedlink(149)),
                      (u'Environment',feedlink(150)),
                      (u'Health and consumer policy',feedlink(151)),
                      (u'Home affairs',feedlink(152)),
                      (u'Industry and entrepreneurship',feedlink(153)),
                      (u'Inter-Institutional relations and administration',feedlink(154)),
                      (u'Internal market and services',feedlink(155)),
                      (u'International cooperation, humanitarian aid and crisis response',feedlink(156)),
                      (u'Justice, fundamental rights and citizenship',feedlink(157)),
                      (u'Maritime affairs and fisheries',feedlink(158)),
                      (u'Regional policy',feedlink(159)),
                      (u'Research and innovation',feedlink(160)),
                      (u'Taxation and customs union, audit and anti-fraud',feedlink(161)),
                      (u'Trade',feedlink(162)),
                      (u'Transport',feedlink(163))
                      ]
    extra_css = '''
     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
     '''
--- a/resources/recipes/european_voice.recipe
+++ b/resources/recipes/european_voice.recipe
@ -0,0 +1,51 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class EuropeanVoice(BasicNewsRecipe):
    title          = u'European Voice'
    __author__     = 'malfi'
    oldest_article = 14
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
    language = 'en'
    keep_only_tags    = [dict(name='div', attrs={'id':'articleLeftColumn'})]
    remove_tags    = [dict(name='div', attrs={'id':'BreadCrump'})]
    feeds          = [
                        (u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
                          (u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
                          (u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
                          (u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
                          (u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
                          (u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
                          (u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
                          (u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
                          (u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
                          (u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
                          (u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
                          (u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
                          (u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
                          (u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
                          (u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
                          (u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
                          (u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
                          (u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
                          (u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
                          (u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
                     ]
    extra_css = '''
        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
        '''
    def print_version(self, url):
          return url + '?bPrint=1'
    def preprocess_html(self, soup):
           denied = soup.findAll(True,text='Subscribers')
           if denied:
                raise Exception('Article skipped, because content can only be seen with subscription')
           return soup
--- a/resources/recipes/fudzilla.recipe
+++ b/resources/recipes/fudzilla.recipe
@ -33,7 +33,7 @@ class Fudzilla(BasicNewsRecipe):
                   ]
    feeds = [
-             (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
+            (u'Posts', u'http://www.fudzilla.com/?format=feed')
             ]
    preprocess_regexps = [
--- a/resources/recipes/gamespot.recipe
+++ b/resources/recipes/gamespot.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__author__ = u'Marc T\xf6nsing'
+__author__    = u'Marc Toensing'
 from calibre.web.feeds.news import BasicNewsRecipe
@ -17,6 +17,7 @@ class GamespotCom(BasicNewsRecipe):
    no_javascript = True
    feeds =  [
               ('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
               ('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
               ('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
               ('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
@ -37,5 +38,3 @@ class GamespotCom(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('link') + '?print=1'
--- a/resources/recipes/german_gov.recipe
+++ b/resources/recipes/german_gov.recipe
@ -0,0 +1,28 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class GermanGovermentPress(BasicNewsRecipe):
    title          = u'Pressemitteilungen der Bundesregierung'
    oldest_article = 14
    __author__ = 'malfi'
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
    language = 'de'
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'h2'))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'textblack'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subtitle'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text'}))
    remove_tags = []
    feeds          = [ (u'Pressemitteilungen',u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf') ]
    extra_css = '''
     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
     '''
    def print_version(self, url):
         m = re.search(r'^(.*).html$', url)
         return str(m.group(1)) + ',layoutVariant=Druckansicht.html'
--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__copyright__ = '2010, Szing'
 __docformat__ = 'restructuredtext en'
 '''
@ -10,49 +10,52 @@ globeandmail.com
 from calibre.web.feeds.news import BasicNewsRecipe
-class GlobeAndMail(BasicNewsRecipe):
+class AdvancedUserRecipe1287083651(BasicNewsRecipe):
-    title = u'Globe and Mail'
+    title          = u'Globe & Mail'
-    language = 'en_CA'
+    __license__   = 'GPL v3'
-
+    __author__ = 'Szing'
    __author__ = 'Kovid Goyal'
    oldest_article = 2
    max_articles_per_feed = 10
    no_stylesheets = True
-    extra_css = '''
+    max_articles_per_feed = 100
-    h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
+    encoding               = 'utf8'
-    h4 {margin-top: 0px;}
+    publisher              = 'Globe & Mail'
-    #byline { font-family: monospace; font-weight:bold; }
+    language               = 'en_CA'
-    #placeline {font-weight:bold;}
+    extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
    #credit {margin-top:0px;}
    .tag {font-size: 22pt;}'''
    description = 'Canada\'s national newspaper'
    keep_only_tags = [dict(name='article')]
    remove_tags = [dict(name='aside'),
                   dict(name='footer'),
                   dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
                   dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
                  ]
    feeds = [
            (u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
            (u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
            (u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
            (u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
            (u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
            (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
            (u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
            (u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
            (u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
            (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
            (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
            (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
            (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
            (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
            (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
            (u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
            ]
-    def get_article_url(self, article):
+    feeds          = [
-        url = BasicNewsRecipe.get_article_url(self, article)
+      (u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
-        if '/video/' not in url:
+      (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
-            return url
+      (u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
      (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
      (u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
      (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
      (u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
      (u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
      (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
      (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
      (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
      (u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
      (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
    ]
    keep_only_tags = [
      dict(name='h1'),
      dict(name='h2', attrs={'id':'articletitle'}),
      dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
      dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
      dict(name='id', attrs={'class':'article'}),
      dict(name='table', attrs={'class':'todays-market'}),
      dict(name='header', attrs={'id':'leadheader'})
    ]
    remove_tags = [
      dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
    ]
    #this has to be here or the text in the article appears twice.
    remove_tags_after = [dict(id='article')]
    #Use the mobile version rather than the web version
    def print_version(self, url):
        return url + '&service=mobile'
--- a/resources/recipes/handelsblatt.recipe
+++ b/resources/recipes/handelsblatt.recipe
@ -0,0 +1,41 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Handelsblatt(BasicNewsRecipe):
    title          = u'Handelsblatt'
    __author__ = 'malfi'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
    language = 'de'
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
    remove_tags    = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
    feeds          = [
                        (u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
                        (u'Handelsblatt Top-Themen',u'http://www.handelsblatt.com/rss/top-themen'),
                        (u'Handelsblatt Schlagzeilen',u'http://www.handelsblatt.com/rss/ticker/'),
                        (u'Handelsblatt Finanzen',u'http://www.handelsblatt.com/rss/finanzen/'),
                        (u'Handelsblatt Unternehmen',u'http://www.handelsblatt.com/rss/unternehmen/'),
                        (u'Handelsblatt Politik',u'http://www.handelsblatt.com/rss/politik/'),
                        (u'Handelsblatt Technologie',u'http://www.handelsblatt.com/rss/technologie/'),
                        (u'Handelsblatt Meinung',u'http://www.handelsblatt.com/rss/meinung'),
                        (u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
                        (u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
                     ]
    extra_css = '''
        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
        '''
    def print_version(self, url):
         m = re.search('(?<=;)[0-9]*', url)
         return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'
--- a/resources/recipes/hola.recipe
+++ b/resources/recipes/hola.recipe
@ -0,0 +1,38 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Brendan Sleight <bms.calibre at barwap.com>'
 '''
 hola.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Hackaday(BasicNewsRecipe):
    title                 = u'Hola'
    __author__            = 'bmsleight'
    description           = 'diario de actualidad, moda y belleza.'
    oldest_article        = 10
    max_articles_per_feed = 100
    no_stylesheets        = True
    language              = 'es'
    use_embedded_content  = False
    keep_only_tags      = [
                           dict(name='div', attrs={'id':'cuerpo'})
                          ]
    feeds               = [
                         (u'Famosos'        , u'http://www.hola.com/famosos/rss.xml'           ),
                         (u'Realeza'        , u'http://www.hola.com/realeza/rss.xml'           ),
                         (u'Cine'           , u'http://www.hola.com/cine/rss.xml'              ),
                         (u'Música'         , u'http://www.hola.com/musica/rss.xml'            ),
                         (u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml'      ),
                         (u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml'   ),
                         (u'Niños'          , u'http://www.hola.com/ninos/rss.xml'             ),
                         (u'Todas las noticias', u'http://int2.hola.com/app/feeds/rss_hola.php'),
                         ]
    def get_article_url(self, article):
        url = article.get('guid', None)
        return url
--- a/resources/recipes/irish_times.recipe
+++ b/resources/recipes/irish_times.recipe
@ -33,13 +33,14 @@ class IrishTimes(BasicNewsRecipe):
                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
                    ]
    def print_version(self, url):
-        if url.count('rss.feedsportal.com'):
+         if url.count('rss.feedsportal.com'):
-            u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
+             u = 'http://www.irishtimes.com' + \
-        else:
+ (((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01..htm','_pf.html')
-            u = url.replace('.html','_pf.html')
+         else:
-        return u
+             u = url.replace('.html','_pf.html')
         return u
    def get_article_url(self, article):
        return article.link
--- a/resources/recipes/la_jornada.recipe
+++ b/resources/recipes/la_jornada.recipe
@ -38,6 +38,7 @@ class LaJornada_mx(BasicNewsRecipe):
                                .loc{font-weight: bold} 
                                .carton{text-align: center}
                                .credit{font-weight: bold}
                                .sumario{font-weight: bold; text-align: center}
                                .text{margin-top: 1.4em}
                                p.inicial{display: inline; font-size: xx-large; font-weight: bold}
                                p.s-s{display: inline; text-indent: 0}
--- a/resources/recipes/marctv.recipe
+++ b/resources/recipes/marctv.recipe
@ -0,0 +1,35 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Fetch MarcTV.
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MarcTVde(BasicNewsRecipe):
    title = 'Marc Toensings Visionen'
    description = 'Marc Toensings Visionen'
    language = 'de'
    __author__ = 'Marc Toensing'
    max_articles_per_feed = 40
    oldest_article = 665
    use_embedded_content = False
    remove_tags = []
    keep_only_tags = dict(name='div', attrs={'class':["content"]})
    feeds          = [(u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), (u'Leben', u'http://feeds.feedburner.com/marctv/leben'), (u'Medien', u'http://feeds.feedburner.com/marctv/medien')]
    extra_css = '.#wrapper .entry p img{width:620px; height: 270px;}'
    def get_cover_url(self):
            return 'http://marctv.de/marctv.png'
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@ -1,7 +1,9 @@
-cense__   = 'GPL v3'
+__license__   = 'GPL v3'
 __copyright__ = '2010, Eddie Lau'
 '''
 modified from Singtao Toronto calibre recipe by rty
 Change Log:
 2010/10/31: skip repeated articles in section pages
 '''
 import datetime
@ -23,42 +25,37 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
    recursions = 0
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
    keep_only_tags = [dict(name='h1'),
                      dict(attrs={'id':['newscontent01','newscontent02']})]
    def get_fetchdate(self):
        dt_utc = datetime.datetime.utcnow()
-        # convert UTC to local hk time
+        # convert UTC to local hk time - at around HKT 5.30am, all news are available
-        dt_local = dt_utc - datetime.timedelta(-8.0/24)
+        dt_local = dt_utc - datetime.timedelta(-2.5/24)
        return dt_local.strftime("%Y%m%d")
    def parse_index(self):
-            feeds = []
+        feeds = []
-            dateStr = self.get_fetchdate()
+        dateStr = self.get_fetchdate()
-            for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
+        for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
-                articles = self.parse_section(url)
+            articles = self.parse_section(url)
-                if articles:
+            if articles:
-                    feeds.append((title, articles))
+                feeds.append((title, articles))
            return feeds
    def parse_section(self, url):
-            dateStr = self.get_fetchdate()
+        dateStr = self.get_fetchdate()
-            soup = self.index_to_soup(url)
+        soup = self.index_to_soup(url)
-            divs = soup.findAll(attrs={'class': ['bullet']})
+        divs = soup.findAll(attrs={'class': ['bullet']})
-            current_articles = []
+        current_articles = []
-            for i in divs:
+        included_urls = []
-                a = i.find('a', href = True)
+        for i in divs:
-                title = self.tag_to_string(a)
+            a = i.find('a', href = True)
-                url = a.get('href', False)
+            title = self.tag_to_string(a)
-                url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
            if url not in included_urls:
                current_articles.append({'title': title, 'url': url, 'description':''})
-            return current_articles
+                included_urls.append(url)
-
+        return current_articles
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
           del item['style']
        for item in soup.findAll(width=True):
           del item['width']
        return soup
--- a/resources/recipes/mmc_rtv.recipe
+++ b/resources/recipes/mmc_rtv.recipe
@ -0,0 +1,57 @@
 __license__ = 'GPL v3'
 __copyright__ = '2010, BlonG'
 '''
 www.rtvslo.si
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MMCRTV(BasicNewsRecipe):
    title = u'MMC RTV Slovenija'
    __author__ = u'BlonG'
    description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija"
    oldest_article = 3
    max_articles_per_feed = 20
    language = 'sl'
    no_stylesheets = True
    use_embedded_content = False
    cover_url = 'https://sites.google.com/site/javno2010/home/rtv_slo_cover.jpg'
    extra_css = '''
            h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
            h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
            p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
            body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
            '''
    def print_version(self, url):
            split_url = url.split("/")
            print_url = 'http://www.rtvslo.si/index.php?c_mod=news&op=print&id=' + split_url[-1]
            return print_url
    keep_only_tags = [
            dict(name='div', attrs={'class':'title'}),
            dict(name='div', attrs={'id':'newsbody'}),
            dict(name='div', attrs={'id':'newsblocks'}),
            ]
 #    remove_tags=[
 # 40            dict(name='div', attrs={'id':'newsblocks'}),
 #            ]
    feeds = [
            (u'Slovenija', u'http://www.rtvslo.si/feeds/01.xml'),
            (u'Svet', u'http://www.rtvslo.si/feeds/02.xml'),
            (u'Evropska unija', u'http://www.rtvslo.si/feeds/16.xml'),
            (u'Gospodarstvo', u'http://www.rtvslo.si/feeds/04.xml'),
            (u'\u010crna kronika', u'http://www.rtvslo.si/feeds/08.xml'),
            (u'Okolje', u'http://www.rtvslo.si/feeds/12.xml'),
            (u'Znanost in tehnologija', u'http://www.rtvslo.si/feeds/09.xml'),
            (u'Zabava', u'http://www.rtvslo.si/feeds/06.xml'),
            (u'Ture avanture', u'http://www.rtvslo.si/feeds/28.xml'),
            ]
 #    def preprocess_html(self, soup):
 #            newsblocks = soup.find('div',attrs = ['id':'newsblocks'])
 #            soup.find('div', attrs = {'id':'newsbody'}).insert(-1, newsblocks)
 #            return soup
--- a/resources/recipes/newsweek_polska.recipe
+++ b/resources/recipes/newsweek_polska.recipe
@ -0,0 +1,68 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 class Newsweek(BasicNewsRecipe):
 	EDITION = 0
 	title = u'Newsweek Polska'
 	__author__ = 'Mateusz Kielar'
 	description = 'Weekly magazine'
 	encoding = 'utf-8'
 	no_stylesheets = True
 	language = 'en'
 	remove_javascript = True
 	keep_only_tags =[]
 	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}))
 	remove_tags =[]
 	remove_tags.append(dict(name = 'div', attrs = {'class' : 'copy'}))
 	remove_tags.append(dict(name = 'div', attrs = {'class' : 'url'}))
 	extra_css = '''
 					.body {font-size: small}
 					.author {font-size: x-small}
 					.lead {font-size: x-small}
 					.title{font-size: x-large; font-weight: bold}
 					'''
 	def print_version(self, url):
 		return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
 	def find_last_full_issue(self):
 		page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
 		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
 		page = self.index_to_soup(issue)
 		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
 		page = self.index_to_soup(issue)
 		self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
 	def parse_index(self):
 		self.find_last_full_issue()
 		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
 		img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
 		self.cover_url = img['src']
 		feeds = []
 		parent = soup.find(id='content-left-big')
 		for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
 			section = self.tag_to_string(txt).capitalize()
 			articles = list(self.find_articles(txt))
 			feeds.append((section, articles))
 		return feeds
 	def find_articles(self, txt):
 		for a in txt.findAllNext( attrs={'class':['strong','hr']}):
 			if a.name in "div":
 				break
 			yield {
 				'title' : self.tag_to_string(a),
 				'url'   : 'http://www.newsweek.pl'+a['href'],
 				'date'  : '',
 				'description' : ''
 				}
--- a/resources/recipes/now_toronto.recipe
+++ b/resources/recipes/now_toronto.recipe
@ -0,0 +1,35 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 #Based on Lars Jacob's Taz Digiabo recipe
 __license__   = 'GPL v3'
 __copyright__ = '2010, Starson17'
 import os, urllib2, zipfile
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
 class NowToronto(BasicNewsRecipe):
    title = u'Now Toronto'
    description = u'Now Toronto'
    __author__ = 'Starson17'
    conversion_options = {
        'no_default_epub_cover' : True
    }
    def build_index(self):
        epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
        soup = self.index_to_soup(epub_feed)
        url = soup.find(name = 'feedburner:origlink').string
        f = urllib2.urlopen(url)
        tmp = PersistentTemporaryFile(suffix='.epub')
        self.report_progress(0,_('downloading epub'))
        tmp.write(f.read())
        tmp.close()
        zfile = zipfile.ZipFile(tmp.name, 'r')
        self.report_progress(0,_('extracting epub'))
        zfile.extractall(self.output_dir)
        tmp.close()
        index = os.path.join(self.output_dir, 'content.opf')
        self.report_progress(1,_('epub downloaded and extracted'))
        return index
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -5,65 +5,61 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import re
+import re, string, time
-import time
+from calibre import entity_to_unicode, strftime
 from calibre import entity_to_unicode
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
 Comment, BeautifulStoneSoup
 class NYTimes(BasicNewsRecipe):
-    title       = 'New York Times Top Stories'
+    # set headlinesOnly to True for the headlines-only version
-    __author__  = 'GRiker'
+    headlinesOnly = True
    language = 'en'
    requires_version = (0, 7, 5)
    description = 'Top Stories from the New York Times'
-    # List of sections typically included in Top Stories.  Use a keyword from the
+    # includeSections: List of sections to include. If empty, all sections found will be included.
-    # right column in the excludeSectionKeywords[] list to skip downloading that section
+    # Otherwise, only the sections named will be included. For example,
-    sections = {
+    #
-                 'arts'             :   'Arts',
+    #    includeSections = ['Politics','Sports']
-                 'business'         :   'Business',
+    #
-                 'diningwine'       :   'Dining & Wine',
+    # would cause only the Politics and Sports sections to be included.
                 'editorials'       :   'Editorials',
                 'health'           :   'Health',
                 'magazine'         :   'Magazine',
                 'mediaadvertising' :   'Media & Advertising',
                 'newyorkregion'    :   'New York/Region',
                 'oped'             :   'Op-Ed',
                 'politics'         :   'Politics',
                 'science'          :   'Science',
                 'sports'           :   'Sports',
                 'technology'       :   'Technology',
                 'topstories'       :   'Top Stories',
                 'travel'           :   'Travel',
                 'us'               :   'U.S.',
                 'world'            :   'World'
               }
-    # Add section keywords from the right column above to skip that section
+    includeSections = []  # by default, all sections included
-    # For example, to skip sections containing the word 'Sports' or 'Dining', use:
+
-    # excludeSectionKeywords = ['Sports', 'Dining']
+    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
-    # Fetch only Business and Technology
+    # Otherwise, the sections named will be excluded. For example,
-    # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
+    #
-    # Fetch only Top Stories
+    #    excludeSections = ['Politics','Sports']
-    # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
+    #
-    # By default, no sections are skipped.
+    # would cause the Politics and Sports sections to be excluded. This parameter can be used
-    excludeSectionKeywords = []
+    # in conjuction with includeSections although in most cases using one or the other, but
    # not both, is sufficient.
    excludeSections = []
    # one_picture_per_article specifies that calibre should only use the first image
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
    # and shown in their original location.
    one_picture_per_article = True
    # The maximum number of articles that will be downloaded
-    max_articles_per_feed = 40
+    max_articles_per_feed = 100
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
    requires_version = (0, 7, 5)
    timefmt = ''
    needs_subscription = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    cover_margins = (18,18,'grey99')
@ -82,6 +78,7 @@ class NYTimes(BasicNewsRecipe):
                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'metaFootnote',
                            'module box nav',
                            'nextArticleLink',
                            'nextArticleLink clearfix',
@ -89,12 +86,13 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
-                            'subNavigation clearfix',
+                            re.compile('^subNavigation'),
-                            'subNavigation tabContent active',
+                            re.compile('^leaderboard'),
-                            'subNavigation tabContent active clearfix',
+                            re.compile('^module'),
                            ]}),
                   dict(id=[
                            'adxLeaderboard',
                            'adxSponLink',
                            'archive',
                            'articleExtras',
                            'articleInline',
@ -105,87 +103,98 @@ class NYTimes(BasicNewsRecipe):
                            'footer',
                            'header',
                            'header_search',
                            'inlineBox',
                            'login',
                            'masthead',
                            'masthead-nav',
                            'memberTools',
                            'navigation',
                            'portfolioInline',
                            'readerReviews',
                            'readerReviewsCount',
                            'relatedArticles',
                            'relatedTopics',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
                            'toolsRight',
                            ]),
-                   dict(name=['script', 'noscript', 'style'])]
+                   dict(name=['script', 'noscript', 'style','form','hr'])]
    no_stylesheets = True
-    extra_css = '.headline      {text-align:    left;}\n    \
+    extra_css = '''
-                 .byline        {font-family:   monospace;  \
+                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                                 text-align:    left;       \
+                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                                 margin-top:    0px;        \
+                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                                 margin-bottom: 0px;}\n     \
+                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                 .dateline      {font-size:     small;      \
+                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                                 margin-top:    0px;        \
+                .timestamp { text-align: left; font-size: small; }
-                                 margin-bottom: 0px;}\n     \
+                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                 .timestamp     {font-size:     small;      \
+                a:link {text-decoration: none; }
-                                 margin-top:    0px;        \
+                .articleBody { }
-                                 margin-bottom: 0px;}\n     \
+                .authorId {text-align: left; }
-                 .source        {text-align:    left;}\n    \
+                .image {text-align: center;}
-                 .image         {text-align:    center;}\n  \
+                .source {text-align: left; }'''
                 .credit        {text-align:    right;      \
                                 font-size:     small;      \
                                 margin-top:    0px;        \
                                 margin-bottom: 0px;}\n     \
                 .articleBody   {text-align:    left;}\n    \
                 .authorId      {text-align:    left;       \
                                 font-style:    italic;}\n  '
-    def dump_ans(self, ans) :
+    def filter_ans(self, ans) :
        total_article_count = 0
-        for section in ans :
+        idx = 0
        idx_max = len(ans)-1
        while idx <= idx_max:
            if self.includeSections != []:
                if ans[idx][0] not in self.includeSections:
                    print "SECTION NOT INCLUDED: ",ans[idx][0]
                    del ans[idx]
                    idx_max = idx_max-1
                    continue
            if ans[idx][0] in self.excludeSections:
                print "SECTION EXCLUDED: ",ans[idx][0]
                del ans[idx]
                idx_max = idx_max-1
                continue
            if self.verbose:
-                self.log("section %s: %d articles" % (section[0], len(section[1])) )
+                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
-            for article in section[1]:
+            for article in ans[idx][1]:
                total_article_count += 1
                if self.verbose:
                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
                              article['url'].encode('cp1252','replace')))
            idx = idx+1
        self.log( "Queued %d articles" % total_article_count )
        return ans
    def fixChars(self,string):
        # Replace lsquo (\x91)
-        fixed = re.sub("\x91","&#8216;",string)
+        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
-        fixed = re.sub("\x92","&#8217;",fixed)
+        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
-        fixed = re.sub("\x93","&#8220;",fixed)
+        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
-        fixed = re.sub("\x94","&#8221;",fixed)
+        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
-        fixed = re.sub("\x96","&#8211;",fixed)
+        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
-        fixed = re.sub("\x97","&#8212;",fixed)
+        fixed = re.sub("\x97","—",fixed)
        return fixed
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
-            try:
+            br.open('http://www.nytimes.com/auth/login')
-                br.open('http://www.nytimes.com/auth/login')
+            br.select_form(name='login')
-                br.select_form(name='login')
+            br['USERID']   = self.username
-                br['USERID']   = self.username
+            br['PASSWORD'] = self.password
-                br['PASSWORD'] = self.password
+            raw = br.submit().read()
-                br.submit()
+            if 'Please try again' in raw:
-            except:
+                raise Exception('Your username and password are incorrect')
                self.log("\nFailed to login")
        return br
    def skip_ad_pages(self, soup):
@ -213,6 +222,9 @@ class NYTimes(BasicNewsRecipe):
            cover = None
        return cover
    def short_title(self):
        return self.title
    def index_to_soup(self, url_or_raw, raw=False):
        '''
        OVERRIDE of class method
@ -255,157 +267,184 @@ class NYTimes(BasicNewsRecipe):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
-            # Replace '&' with '&#38;'
+            # Replace '&' with '&'
-            massaged = re.sub("&","&#38;", massaged)
+            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
-    def parse_index(self):
+    def parse_todays_index(self):
        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=True)).strip()
        articles = {}
        key = None
        ans = []
        url_list = []
        def handle_article(div):
            a = div.find('a', href=True)
            if not a:
                return
            url = re.sub(r'\?.*', '', a['href'])
            if not url.startswith("http"):
                return
            if not url.endswith(".html"):
                return
            if 'podcast' in url:
                return
            if '/video/' in url:
                return
            url += '?pagewanted=all'
            if url in url_list:
                return
            url_list.append(url)
            title = self.tag_to_string(a, use_alt=True).strip()
            description = ''
            pubdate = strftime('%a, %d %b')
            summary = div.find(True, attrs={'class':'summary'})
            if summary:
                description = self.tag_to_string(summary, use_alt=False)
            author = ''
            authorAttribution = div.find(True, attrs={'class':'byline'})
            if authorAttribution:
                author = self.tag_to_string(authorAttribution, use_alt=False)
            else:
                authorAttribution = div.find(True, attrs={'class':'byline'})
                if authorAttribution:
                    author = self.tag_to_string(authorAttribution, use_alt=False)
            feed = key if key is not None else 'Uncategorized'
            if not articles.has_key(feed):
                ans.append(feed)
                articles[feed] = []
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
                                description=description, author=author,
                                content=''))
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
            if div['class'] in ['section-headline','sectionHeader']:
                key = string.capwords(feed_title(div))
                key = key.replace('Op-ed','Op-Ed')
                key = key.replace('U.s.','U.S.')
            elif div['class'] in ['story', 'story headline'] :
                handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
                    handle_article(lidiv)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return self.filter_ans(ans)
    def parse_headline_index(self):
        articles = {}
        ans = []
-
+        url_list = []
        feed = key = 'All Top Stories'
        articles[key] = []
        ans.append(key)
        self.log("Scanning 1 section ...")
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
-        # Fetch the outer table
+        # Fetch the content table
-        table = soup.find('table')
+        content_table = soup.find('table',{'id':'content'})
-        previousTable = table
+        if content_table is None:
            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
            return None
-        # Find the deepest table containing the stories
+        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
        while True :
            table = table.find('table')
            if table.find(text=re.compile('top stories start')) :
                previousTable = table
                continue
            else :
                table = previousTable
                break
-        # There are multiple subtables, find the one containing the stories
+        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
-        for block in table.findAll('table') :
+            for div_sec in td_col.findAll('div',recursive=False):
-            if block.find(text=re.compile('top stories start')) :
+                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
-                table = block
+                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
-                break
+                    section_name = re.sub(r'^ *$','',section_name)
-            else :
+                    if section_name == '':
-                continue
+                        continue
                    section_name=string.capwords(section_name)
                    if section_name == 'U.s.':
                       section_name = 'U.S.'
                    elif section_name == 'Op-ed':
                       section_name = 'Op-Ed'
                    pubdate = strftime('%a, %d %b')
-        # Again there are multiple subtables, find the one containing the stories
+                    search_div = div_sec
-        for storyblock in table.findAll('table') :
+                    for next_tag in h6_sec_name.findNextSiblings(True):
-            if storyblock.find(text=re.compile('top stories start')) :
+                        if next_tag.__class__.__name__ == 'Tag':
-                break
+                            if next_tag.name == 'div':
-            else :
+                                search_div = next_tag
                continue
        skipThisSection = False
        todays_article_count = 0
        # Within this table are <font face="times new roman, times, san serif"> entries
        self.log("Fetching feed Top Stories")
        for tr in storyblock.findAllNext('tr'):
            if tr.find('span') is not None :
                sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
                                                         'times new roman,times, sans serif',
                                                         'times new roman, times, sans serif']})
                section = None
                bylines = []
                descriptions = []
                pubdate = None
                # Get the Section title
                for (x,i) in enumerate(sectionblock.contents) :
                    skipThisSection = False
                    # Extract the section title
                    if ('Comment' in str(i.__class__)) :
                        if 'start(name=' in i :
                            section = i[i.find('=')+1:-2]
                        if not self.sections.has_key(section) :
                            skipThisSection = True
                            break
-                        # Check for excluded section
+                    # Get the articles
-                        if len(self.excludeSectionKeywords):
+                    for h3_item in search_div.findAll('h3'):
-                            key = self.sections[section]
+                        byline = h3_item.h6
-                            excluded = re.compile('|'.join(self.excludeSectionKeywords))
+                        if byline is not None:
-                            if excluded.search(key) or articles.has_key(key):
+                            author = self.tag_to_string(byline,usa_alt=False)
                                skipThisSection = True
                                break
                # Get the bylines and descriptions
                if not skipThisSection :
                    lines = sectionblock.contents
                    contentStrings = []
                    for line in lines:
                        if not isinstance(line, Comment) and line.strip and line.strip() > "":
                            contentStrings.append(line.strip())
                    # Gather the byline/description pairs
                    bylines = []
                    descriptions = []
                    for contentString in contentStrings:
                        if contentString[0:3] == 'By ' and contentString[3].isupper() :
                            bylines.append(contentString)
                        else:
-                            descriptions.append(contentString)
+                            author = ''
-
+                        a = h3_item.find('a', href=True)
-                    # Fetch the article titles and URLs
+                        if not a:
-                    articleCount = len(sectionblock.findAll('span'))
+                            continue
                    todays_article_count += articleCount
                    for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) :
                        a = span.find('a', href=True)
                        url = re.sub(r'\?.*', '', a['href'])
                        if not url.startswith("http"):
                            continue
                        if not url.endswith(".html"):
                            continue
                        if 'podcast' in url:
                            continue
                        if 'video' in url:
                            continue
                        url += '?pagewanted=all'
                        if url in url_list:
                            continue
                        url_list.append(url)
                        self.log("URL %s" % url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
                        if not articles.has_key(section_name):
                            ans.append(section_name)
                            articles[section_name] = []
                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
                        title = self.tag_to_string(a, use_alt=True)
                        # prepend the section name
                        title = self.sections[section] + " &middot; " + title
                        if not isinstance(title, unicode):
                            title = title.decode('utf-8', 'replace')
                        # Allow for unattributed, undescribed entries "Editor's Note"
                        if i >= len(descriptions) :
                            description = None
                        else :
                            description = descriptions[i]
                        if len(bylines) == articleCount :
                            author = bylines[i]
                        else :
                            author = None
                        # Check for duplicates
                        duplicateFound = False
                        if len(articles[feed]) > 1:
                            for article in articles[feed] :
                                if url == article['url'] :
                                    duplicateFound = True
                                    break
                            if duplicateFound:
                                # Continue fetching, don't add this article
                                todays_article_count -= 1
                                continue
                        if not articles.has_key(feed):
                            articles[feed] = []
                        articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
                                 description=description, author=author, content=''))
 #        self.log("Queuing %d articles from %s" % (todays_article_count, "Top Stories"))
        ans = self.sort_index_by(ans, {'Top Stories':-1})
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        self.dump_ans(ans)
+        return self.filter_ans(ans)
-        return ans
+
    def parse_index(self):
        if self.headlinesOnly:
            return self.parse_headline_index()
        else:
            return self.parse_todays_index()
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        kicker_tag = soup.find(attrs={'class':'kicker'})
        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
            if tagline=='Op-Ed Columnist':
                img_div = soup.find('div','inlineImage module')
                if img_div:
                    img_div.extract()
        return self.strip_anchors(soup)
    def postprocess_html(self,soup, True):
@ -422,8 +461,9 @@ class NYTimes(BasicNewsRecipe):
                    firstImg = inlineImgs[0]
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
-                    # Move firstImg after headline
+                    # Move firstImg before article body
-                    cgFirst = soup.find(True, {'class':'columnGroup  first'})
+                    #article_body = soup.find(True, {'id':'articleBody'})
                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
                        navstrings = cgFirst.findAll(text=True, recursive=False)
@ -443,30 +483,18 @@ class NYTimes(BasicNewsRecipe):
                        if headline_found:
                            cgFirst.insert(insertLoc,firstImg)
                    else:
-                        self.log(">>> No class:'columnGroup  first' found <<<")
+                        self.log(">>> No class:'columnGroup first' found <<<")
        # Change class="kicker" to <h3>
        kicker = soup.find(True, {'class':'kicker'})
        if kicker and kicker.contents[0]:
            h3Tag = Tag(soup, "h3")
            h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
                             use_alt=False)))
            kicker.replaceWith(h3Tag)
-        # Change captions to italic -1
+        # Change captions to italic
        for caption in soup.findAll(True, {'class':'caption'}) :
            if caption and caption.contents[0]:
-                emTag = Tag(soup, "em")
+                cTag = Tag(soup, "p", [("class", "caption")])
                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
                mp_off = c.find("More Photos")
                if mp_off >= 0:
                    c = c[:mp_off]
-                emTag.insert(0, c)
+                cTag.insert(0, c)
-                #hrTag = Tag(soup, 'hr')
+                caption.replaceWith(cTag)
                #hrTag['class'] = 'caption_divider'
                hrTag = Tag(soup, 'div')
                hrTag['class'] = 'divider'
                emTag.insert(1, hrTag)
                caption.replaceWith(emTag)
        # Change <nyt_headline> to <h2>
        h1 = soup.find('h1')
@ -506,17 +534,6 @@ class NYTimes(BasicNewsRecipe):
                bTag.insert(0, subhead.contents[0])
                subhead.replaceWith(bTag)
        # Synthesize a section header
        dsk = soup.find('meta', attrs={'name':'dsk'})
        if dsk and dsk.has_key('content'):
            hTag = Tag(soup,'h3')
            hTag['class'] = 'section'
            hTag.insert(0,NavigableString(dsk['content']))
            articleTag = soup.find(True, attrs={'id':'article'})
            if articleTag:
                articleTag.insert(0,hTag)
        # Add class="articleBody" to <div> so we can format with CSS
        divTag = soup.find('div',attrs={'id':'articleBody'})
        if divTag:
            divTag['class'] = divTag['id']
@ -532,11 +549,3 @@ class NYTimes(BasicNewsRecipe):
        return soup
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -5,52 +5,186 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import string, re, time
+import re, string, time
-from calibre import strftime
+from calibre import entity_to_unicode, strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
 def decode(self, src):
    enc = 'utf-8'
    if 'iso-8859-1' in src:
        enc = 'cp1252'
    return src.decode(enc, 'ignore')
 class NYTimes(BasicNewsRecipe):
-    title       = u'New York Times'
+    # set headlinesOnly to True for the headlines-only version
-    __author__  = 'Kovid Goyal/Nick Redding'
+    headlinesOnly = False
    language = 'en'
    requires_version = (0, 6, 36)
-    description = 'Daily news from the New York Times (subscription version)'
+    # includeSections: List of sections to include. If empty, all sections found will be included.
-    timefmt = ' [%b %d]'
+    # Otherwise, only the sections named will be included. For example,
    #
    #    includeSections = ['Politics','Sports']
    #
    # would cause only the Politics and Sports sections to be included.
    includeSections = []  # by default, all sections included
    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
    # Otherwise, the sections named will be excluded. For example,
    #
    #    excludeSections = ['Politics','Sports']
    #
    # would cause the Politics and Sports sections to be excluded. This parameter can be used
    # in conjuction with includeSections although in most cases using one or the other, but
    # not both, is sufficient.
    excludeSections = []
    # one_picture_per_article specifies that calibre should only use the first image
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
    # and shown in their original location.
    one_picture_per_article = True
    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 100
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
    requires_version = (0, 7, 5)
    timefmt = ''
    needs_subscription = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    cover_margins = (18,18,'grey99')
    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
-    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool','nextArticleLink',
+    remove_tags = [dict(attrs={'class':[
-                                        'nextArticleLink clearfix','columnGroup doubleRule','doubleRule','entry-meta',
+                            'articleFooter',
-                                        'icon enlargeThis','columnGroup  last','relatedSearchesModule']}),
+                            'articleTools',
-                   dict({'class':re.compile('^subNavigation')}),
+                            'columnGroup doubleRule',
-                   dict({'class':re.compile('^leaderboard')}),
+                            'columnGroup singleRule',
-                   dict({'class':re.compile('^module')}),
+                            'columnGroup last',
-                   dict({'class':'metaFootnote'}),
+                            'columnGroup  last',
-                   dict(id=['inlineBox','footer', 'toolsRight', 'articleInline','login','masthead',
+                            'doubleRule',
-                            'navigation', 'archive', 'side_search', 'blog_sidebar','cCol','portfolioInline',
+                            'dottedLine',
-                            'side_tool', 'side_index','header','readerReviewsCount','readerReviews',
+                            'entry-meta',
-                            'relatedArticles', 'relatedTopics', 'adxSponLink']),
+                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'metaFootnote',
                            'module box nav',
                            'nextArticleLink',
                            'nextArticleLink clearfix',
                            'post-tools',
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
                            re.compile('^subNavigation'),
                            re.compile('^leaderboard'),
                            re.compile('^module'),
                            ]}),
                   dict(id=[
                            'adxLeaderboard',
                            'adxSponLink',
                            'archive',
                            'articleExtras',
                            'articleInline',
                            'blog_sidebar',
                            'businessSearchBar',
                            'cCol',
                            'entertainmentSearchBar',
                            'footer',
                            'header',
                            'header_search',
                            'inlineBox',
                            'login',
                            'masthead',
                            'masthead-nav',
                            'memberTools',
                            'navigation',
                            'portfolioInline',
                            'readerReviews',
                            'readerReviewsCount',
                            'relatedArticles',
                            'relatedTopics',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
                            'toolsRight',
                            ]),
                   dict(name=['script', 'noscript', 'style','form','hr'])]
    encoding = decode
    no_stylesheets = True
    extra_css = '''
-                .articleHeadline { margin-top:0.5em; margin-bottom:0.25em; }
+                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .byline { font-size: small; font-style:italic; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .dateline { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .timestamp { font-size: small; }
+                .timestamp { text-align: left; font-size: small; }
-                .caption { font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                a:link {text-decoration: none; }'''
+                a:link {text-decoration: none; }
                .articleBody { }
                .authorId {text-align: left; }
                .image {text-align: center;}
                .source {text-align: left; }'''
    def filter_ans(self, ans) :
        total_article_count = 0
        idx = 0
        idx_max = len(ans)-1
        while idx <= idx_max:
            if self.includeSections != []:
                if ans[idx][0] not in self.includeSections:
                    print "SECTION NOT INCLUDED: ",ans[idx][0]
                    del ans[idx]
                    idx_max = idx_max-1
                    continue
            if ans[idx][0] in self.excludeSections:
                print "SECTION EXCLUDED: ",ans[idx][0]
                del ans[idx]
                idx_max = idx_max-1
                continue
            if self.verbose:
                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
            for article in ans[idx][1]:
                total_article_count += 1
                if self.verbose:
                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
                              article['url'].encode('cp1252','replace')))
            idx = idx+1
        self.log( "Queued %d articles" % total_article_count )
        return ans
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        return fixed
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -60,22 +194,19 @@ class NYTimes(BasicNewsRecipe):
            br['USERID']   = self.username
            br['PASSWORD'] = self.password
            raw = br.submit().read()
-            if 'Sorry, we could not find the combination you entered. Please try again.' in raw:
+            if 'Please try again' in raw:
                raise Exception('Your username and password are incorrect')
            #open('/t/log.html', 'wb').write(raw)
        return br
-    def get_masthead_url(self):
+    def skip_ad_pages(self, soup):
-        masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+        # Skip ad pages served before actual article
-        #masthead = 'http://members.cox.net/nickredding/nytlogo.gif'
+        skip_tag = soup.find(True, {'name':'skip'})
-        br = BasicNewsRecipe.get_browser()
+        if skip_tag is not None:
-        try:
+            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
-            br.open(masthead)
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
-        except:
+            url += '?pagewanted=all'
-            self.log("\nMasthead unavailable")
+            self.log.warn("Skipping ad to article at '%s'" % url)
-            masthead = None
+            return self.index_to_soup(url, raw=True)
        return masthead
    def get_cover_url(self):
        cover = None
@ -93,12 +224,57 @@ class NYTimes(BasicNewsRecipe):
        return cover
    def short_title(self):
-        return 'New York Times'
+        return self.title
-    def parse_index(self):
+    def index_to_soup(self, url_or_raw, raw=False):
-        self.encoding = 'cp1252'
+        '''
-        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
+        OVERRIDE of class method
-        self.encoding = decode
+        deals with various page encodings between index and articles
        '''
        def get_the_soup(docEncoding, url_or_raw, raw=False) :
            if re.match(r'\w+://', url_or_raw):
                f = self.browser.open(url_or_raw)
                _raw = f.read()
                f.close()
                if not _raw:
                    raise RuntimeError('Could not fetch index from %s'%url_or_raw)
            else:
                _raw = url_or_raw
            if raw:
                return _raw
            if not isinstance(_raw, unicode) and self.encoding:
                _raw = _raw.decode(docEncoding, 'replace')
            massage = list(BeautifulSoup.MARKUP_MASSAGE)
            massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
            return BeautifulSoup(_raw, markupMassage=massage)
        # Entry point
        print "index_to_soup()"
        soup = get_the_soup( self.encoding, url_or_raw )
        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
        if docEncoding == '' :
            docEncoding = self.encoding
        if self.verbose > 2:
            self.log( "  document encoding: '%s'" % docEncoding)
        if docEncoding != self.encoding :
            soup = get_the_soup(docEncoding, url_or_raw)
        return soup
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def parse_todays_index(self):
        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=True)).strip()
@ -119,12 +295,13 @@ class NYTimes(BasicNewsRecipe):
                return
            if 'podcast' in url:
                return
            if '/video/' in url:
                return
            url += '?pagewanted=all'
            if url in url_list:
                return
            url_list.append(url)
            title = self.tag_to_string(a, use_alt=True).strip()
            #self.log("Title: %s" % title)
            description = ''
            pubdate = strftime('%a, %d %b')
            summary = div.find(True, attrs={'class':'summary'})
@ -140,6 +317,7 @@ class NYTimes(BasicNewsRecipe):
                    author = self.tag_to_string(authorAttribution, use_alt=False)
            feed = key if key is not None else 'Uncategorized'
            if not articles.has_key(feed):
                ans.append(feed)
                articles[feed] = []
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
@ -147,46 +325,228 @@ class NYTimes(BasicNewsRecipe):
                                content=''))
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-        # Find each instance of class="section-headline", class="story", class="story headline"
+
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
            if div['class'] in ['section-headline','sectionHeader']:
                key = string.capwords(feed_title(div))
-                articles[key] = []
+                key = key.replace('Op-ed','Op-Ed')
-                ans.append(key)
+                key = key.replace('U.s.','U.S.')
                #self.log('Section: %s' % key)
            elif div['class'] in ['story', 'story headline'] :
                handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
                    handle_article(lidiv)
 #        ans = self.sort_index_by(ans, {'The Front Page':-1,
 #                                      'Dining In, Dining Out':1,
 #                                     'Obituaries':2})
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return self.filter_ans(ans)
    def parse_headline_index(self):
        articles = {}
        ans = []
        url_list = []
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
        # Fetch the content table
        content_table = soup.find('table',{'id':'content'})
        if content_table is None:
            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
            return None
        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
            for div_sec in td_col.findAll('div',recursive=False):
                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
                    section_name = re.sub(r'^ *$','',section_name)
                    if section_name == '':
                        continue
                    section_name=string.capwords(section_name)
                    if section_name == 'U.s.':
                       section_name = 'U.S.'
                    elif section_name == 'Op-ed':
                       section_name = 'Op-Ed'
                    pubdate = strftime('%a, %d %b')
                    search_div = div_sec
                    for next_tag in h6_sec_name.findNextSiblings(True):
                        if next_tag.__class__.__name__ == 'Tag':
                            if next_tag.name == 'div':
                                search_div = next_tag
                            break
                    # Get the articles
                    for h3_item in search_div.findAll('h3'):
                        byline = h3_item.h6
                        if byline is not None:
                            author = self.tag_to_string(byline,usa_alt=False)
                        else:
                            author = ''
                        a = h3_item.find('a', href=True)
                        if not a:
                            continue
                        url = re.sub(r'\?.*', '', a['href'])
                        if not url.startswith("http"):
                            continue
                        if not url.endswith(".html"):
                            continue
                        if 'podcast' in url:
                            continue
                        if 'video' in url:
                            continue
                        url += '?pagewanted=all'
                        if url in url_list:
                            continue
                        url_list.append(url)
                        self.log("URL %s" % url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
                        if not articles.has_key(section_name):
                            ans.append(section_name)
                            articles[section_name] = []
                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return self.filter_ans(ans)
    def parse_index(self):
        if self.headlinesOnly:
            return self.parse_headline_index()
        else:
            return self.parse_todays_index()
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
        return ans
    def preprocess_html(self, soup):
        kicker_tag = soup.find(attrs={'class':'kicker'})
-        if kicker_tag:
+        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
            #self.log("FOUND KICKER %s" % tagline)
            if tagline=='Op-Ed Columnist':
                img_div = soup.find('div','inlineImage module')
                #self.log("Searching for photo")
                if img_div:
                    img_div.extract()
-                    #self.log("Photo deleted")
+        return self.strip_anchors(soup)
        refresh = soup.find('meta', {'http-equiv':'refresh'})
        if refresh is None:
            return soup
        content = refresh.get('content').partition('=')[2]
        raw = self.browser.open_novisit('http://www.nytimes.com'+content).read()
        return BeautifulSoup(raw.decode('cp1252', 'replace'))
    def postprocess_html(self,soup, True):
        if self.one_picture_per_article:
            # Remove all images after first
            largeImg = soup.find(True, {'class':'articleSpanImage'})
            inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
            if largeImg:
                for inlineImg in inlineImgs:
                    inlineImg.extract()
            else:
                if inlineImgs:
                    firstImg = inlineImgs[0]
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
                    # Move firstImg before article body
                    #article_body = soup.find(True, {'id':'articleBody'})
                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
                        navstrings = cgFirst.findAll(text=True, recursive=False)
                        [ns.extract() for ns in navstrings]
                        headline_found = False
                        tag = cgFirst.find(True)
                        insertLoc = 0
                        while True:
                            insertLoc += 1
                            if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
                                    headline_found = True
                                    break
                            tag = tag.nextSibling
                            if not tag:
                                headline_found = False
                                break
                        if headline_found:
                            cgFirst.insert(insertLoc,firstImg)
                    else:
                        self.log(">>> No class:'columnGroup first' found <<<")
        # Change captions to italic
        for caption in soup.findAll(True, {'class':'caption'}) :
            if caption and caption.contents[0]:
                cTag = Tag(soup, "p", [("class", "caption")])
                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
                mp_off = c.find("More Photos")
                if mp_off >= 0:
                    c = c[:mp_off]
                cTag.insert(0, c)
                caption.replaceWith(cTag)
        # Change <nyt_headline> to <h2>
        h1 = soup.find('h1')
        if h1:
            headline = h1.find("nyt_headline")
            if headline:
                tag = Tag(soup, "h2")
                tag['class'] = "headline"
                tag.insert(0, self.fixChars(headline.contents[0]))
                h1.replaceWith(tag)
        else:
            # Blog entry - replace headline, remove <hr> tags
            headline = soup.find('title')
            if headline:
                tag = Tag(soup, "h2")
                tag['class'] = "headline"
                tag.insert(0, self.fixChars(headline.contents[0]))
                soup.insert(0, tag)
                hrs = soup.findAll('hr')
                for hr in hrs:
                    hr.extract()
        # Change <h1> to <h3> - used in editorial blogs
        masthead = soup.find("h1")
        if masthead:
            # Nuke the href
            if masthead.a:
                del(masthead.a['href'])
            tag = Tag(soup, "h3")
            tag.insert(0, self.fixChars(masthead.contents[0]))
            masthead.replaceWith(tag)
        # Change <span class="bold"> to <b>
        for subhead in soup.findAll(True, {'class':'bold'}) :
            if subhead.contents:
                bTag = Tag(soup, "b")
                bTag.insert(0, subhead.contents[0])
                subhead.replaceWith(bTag)
        divTag = soup.find('div',attrs={'id':'articleBody'})
        if divTag:
            divTag['class'] = divTag['id']
        # Add class="authorId" to <div> so we can format with CSS
        divTag = soup.find('div',attrs={'id':'authorId'})
        if divTag and divTag.contents[0]:
            tag = Tag(soup, "p")
            tag['class'] = "authorId"
            tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
                             use_alt=False)))
            divTag.replaceWith(tag)
        return soup
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }  '
+    extra_css             = """ 
                               body{font-family: Arial,Helvetica,sans-serif } 
                               img{margin-bottom: 0.4em; display:block}
                               #autor{font-weight: bold} 
                               #fecha,#epigrafe{font-size: 0.9em; margin: 5px} 
                               #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
                               .fgprincipal{font-size: large; font-weight: bold}
                            """
    conversion_options = {
                          'comment'   : description
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
+    remove_tags = [
                     dict(name=['meta','link'])
                    ,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
                  ]
    remove_attributes=['lang']
    feeds = [
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('span', attrs={'id':'seccion'}):
            it = item.a
            it.name='span'
            del it['href']
            del it['title']
        for item in soup.findAll('p'):
            it = item.find('h3')            
            if it:
               it.name='span'
        return soup
--- a/resources/recipes/pc_lab.recipe
+++ b/resources/recipes/pc_lab.recipe
@ -0,0 +1,70 @@
 #!/usr/bin/env  python
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class PCLab(BasicNewsRecipe):
    cover_url             = 'http://pclab.pl/img/logo.png'
    title                 = u"PC Lab"
    __author__            = 'ravcio - rlelusz[at]gmail.com'
    description           = u"Articles from PC Lab website"
    language              = 'pl'
    oldest_article        = 30.0
    max_articles_per_feed = 100
    recursions            = 0
    encoding              = 'iso-8859-2'
    no_stylesheets        = True
    remove_javascript     = True
    use_embedded_content  = False
    keep_only_tags = [
            dict(name='div', attrs={'class':['substance']})
                     ]
    remove_tags = [
            dict(name='div', attrs={'class':['chapters']})
            ,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
                  ]
    remove_tags_after = [
            dict(name='div', attrs={'class':['navigation']})
                ]
    #links to RSS feeds
    feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
    #load second and subsequent page content
    # in: soup - full page with 'next' button
    # out: appendtag - tag to which new page is to be added
    def append_page(self, soup, appendtag):
        # find the 'Next' button
        pager = soup.find('div', attrs={'class':'next'})
        if pager:
            #search for 'a' element with link to next page (exit if not found)
            a = pager.find('a')
            if a:
                nexturl = a['href']
                soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
                pagetext_substance = soup2.find('div', attrs={'class':'substance'})
                pagetext = pagetext_substance.find('div', attrs={'class':'data'})
                pagetext.extract()
                pos = len(appendtag.contents)
                appendtag.insert(pos, pagetext)
                pos = len(appendtag.contents)
                self.append_page(soup2, appendtag)
    def preprocess_html(self, soup):
        # soup.body contains no title and no navigator, they are in soup
        self.append_page(soup, soup.body)
        # finally remove some tags
        tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
        [tag.extract() for tag in tags]
        return soup
--- a/resources/recipes/politika.recipe
+++ b/resources/recipes/politika.recipe
@ -1,13 +1,10 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 politika.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Politika(BasicNewsRecipe):
    title                 = 'Politika Online'
@ -19,53 +16,51 @@ class Politika(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    remove_javascript     = True
    encoding              = 'utf8'
-    language = 'sr'
+    delay                 = 1
-
+    language              = 'sr'
-    lang                 = 'sr-Latn-RS'
+    publication_type      = 'newspaper'    
-    direction            = 'ltr'
+    masthead_url          = 'http://static.politika.co.rs/images_new/politika.gif'    
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    extra_css             = """ 
                               @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} 
                               @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} 
                               body{font-family: Arial,Helvetica,sans1,sans-serif}
                               h1{font-family: "Times New Roman",Times,serif1,serif}
                               .articledescription{font-family: sans1, sans-serif}
                            """
    conversion_options = {
-                          'comment'          : description
+                          'comment'   : description
-                        , 'tags'             : category
+                        , 'tags'      : category
-                        , 'publisher'        : publisher
+                        , 'publisher' : publisher
-                        , 'language'         : lang
+                        , 'language'  : language
                        , 'pretty_print'     : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    keep_only_tags     = [dict(name='div', attrs={'class':'content_center_border'})]
+    keep_only_tags     = [dict(name='div', attrs={'class':'big_article_home item_details'})]
-
+    remove_tags_after  = dict(attrs={'class':'online_date'})
-    remove_tags = [
+    remove_tags        = [dict(name=['link','meta','iframe','embed','object'])]
                    dict(name='div', attrs={'class':['send_print','txt-komentar']})
                   ,dict(name=['object','link','a'])
                   ,dict(name='h1', attrs={'class':'box_header-tags'})
                  ]
    feeds          = [
-                         (u'Politika'             , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'             )
+                         (u'Politika'         , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'         )
-                        ,(u'Svet'                 , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'                 )
+                        ,(u'Svet'             , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'             )
-                        ,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
+                        ,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
-                        ,(u'Pogledi'              , u'http://www.politika.rs/pogledi/index.lt.xml'                        )
+                        ,(u'Pogledi'          , u'http://www.politika.rs/pogledi/index.lt.xml'                    )
-                        ,(u'Pogledi sa strane'    , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml'    )
+                        ,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
-                        ,(u'Tema dana'            , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'            )
+                        ,(u'Tema dana'        , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'        )
-                        ,(u'Kultura'              , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'              )
+                        ,(u'Kultura'          , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'          )
-                        ,(u'Zivot i stil'         , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'         )
+                        ,(u'Spektar'          , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'     )
                     ]
    def preprocess_html(self, soup):
        soup.html['lang'] = self.lang
        soup.html['dir' ] = self.direction
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
-        ftag = soup.find('div',attrs={'class':'content_center_border'})
+        for item in soup.findAll('a', attrs={'class':'category'}):
-        if ftag.has_key('align'):
+            item.name='span'
-           del ftag['align']
+            if item.has_key('href'):            
-        return self.adeify_images(soup)
+               del item['href']
            if item.has_key('title'):            
               del item['title']
        return soup
--- a/resources/recipes/polityka.recipe
+++ b/resources/recipes/polityka.recipe
@ -0,0 +1,68 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 class Polityka(BasicNewsRecipe):
 	title = u'Polityka'
 	__author__ = 'Mateusz Kielar'
 	description = 'Weekly magazine. Last archive issue'
 	encoding = 'utf-8'
 	no_stylesheets = True
 	language = 'en'
 	remove_javascript = True
 	remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
 	remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
 	remove_tags =[]
 	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
 	remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
 	extra_css = '''
 					h1 {font-size: x-large; font-weight: bold}
 					'''
 	def parse_index(self):
 		soup = self.index_to_soup('http://archiwum.polityka.pl/')
 		box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
 		feeds = []
 		last = 0
 		self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
 		last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
 		while True:
 			index = self.index_to_soup(last_edition)
 			box_list = index.findAll('div', attrs={'class' : 'box_list'})
 			if len(box_list) == 0:
 				break
 			articles = {}
 			for box in box_list:
 				for div in box.findAll('div', attrs={'class': 'list_tresc'}):
 					article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
 					section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
 					print section
 					if not articles.has_key(section):
 						articles[section] = []
 					articles[section].append( {
 						'title' : self.tag_to_string(div.a),
 						'url'   : 'http://archiwum.polityka.pl' + div.a['href'],
 						'date'  : '',
 						'description' : ''
 						})
 			for section in articles:
 				feeds.append((section, articles[section]))
 			last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
 			last = last + 1
 		return feeds
--- a/resources/recipes/rollingstone.recipe
+++ b/resources/recipes/rollingstone.recipe
@ -0,0 +1,69 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 rollingstone.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class RollingStone(BasicNewsRecipe):
    title                 = 'Rolling Stone Magazine - free content'
    __author__            = 'Darko Miletic'
    description           = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.'
    publisher             = 'Werner Media inc.'
    category              = 'news, music, USA, world'
    oldest_article        = 15
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    publication_type      = 'magazine'
    masthead_url          = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
    extra_css             = """
                               body{font-family: Georgia,Times,serif }
                               img{margin-bottom: 0.4em; display:block}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    preprocess_regexps = [
                          (re.compile(r'xml:lang="en">.*?<head>', re.DOTALL|re.IGNORECASE),lambda match: 'xml:lang="en">\n<head>\n')
                         ,(re.compile(r'</title>.*?</head>'     , re.DOTALL|re.IGNORECASE),lambda match: '</title>\n</head>\n'     )
                         ]
    keep_only_tags=[
                     dict(attrs={'class':['headerImgHolder','headerContent']})
                    ,dict(name='div',attrs={'id':['teaser','storyTextContainer']})
                    ,dict(name='div',attrs={'class':'blogDetailModule clearfix'})
                   ]
    remove_tags = [
                      dict(name=['meta','iframe','object','embed'])
                     ,dict(attrs={'id':'mpStoryHeader'})
                     ,dict(attrs={'class':'relatedTopics'})
                  ]
    remove_attributes=['lang','onclick','width','height','name']
    remove_tags_before=dict(attrs={'class':'bloggerInfo'})
    remove_tags_after=dict(attrs={'class':'relatedTopics'})
    feeds = [
              (u'All News'      , u'http://www.rollingstone.com/siteServices/rss/allNews'      )
             ,(u'All Blogs'     , u'http://www.rollingstone.com/siteServices/rss/allBlogs'     )
             ,(u'Movie Reviews' , u'http://www.rollingstone.com/siteServices/rss/movieReviews' )
             ,(u'Album Reviews' , u'http://www.rollingstone.com/siteServices/rss/albumReviews' )
             ,(u'Song Reviews'  , u'http://www.rollingstone.com/siteServices/rss/songReviews'  )
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/scprint.recipe
+++ b/resources/recipes/scprint.recipe
@ -0,0 +1,73 @@
 from calibre.web.feeds.news import BasicNewsRecipe, LoginFailed
 class SCPrintMagazine(BasicNewsRecipe):
    title          = u'SC Print Magazine'
    __author__ = u'Tony Maro'
    description = u'Last print version of the data security magazine'
    INDEX = "http://www.scmagazineus.com/issuearchive/"
    no_stylesheets = True
    language = 'en'
    keep_only_tags = [dict(id=['article','review'])]
    remove_tags = [dict(id=['articlePrintTools','reviewBodyColumn'])]
    LOG_IN = 'http://www.scmagazineus.com/login/'
    tags = 'News,SC Magazine'
    needs_subscription = True
    def parse_index(self):
        articles = []
        issuelink = printsections = None
        soup = self.index_to_soup(self.INDEX)
        sectit = soup.find('div', attrs={'class':'issueArchiveItem'})
        if sectit is not None:
            linkt = sectit.find('a')
            issuelink = linkt['href']
            imgt = sectit.find('img')
            self.cover_url = imgt['src']
        if issuelink is not None:
            issue = self.index_to_soup(issuelink)
            if issue is not None:
                printsections = issue.findAll('div',attrs={'class':'PrintSection'})
        if printsections is not None:
            for printsection in printsections:
                onesection = []
                sectiontitle = printsection.find('h3').contents[0]
                articlesec = printsection.findAll('div',attrs={'class':'IssueArchiveFormat'})
                if articlesec is not None:
                    ''' got articles '''
                    for onearticle in articlesec:
                        ''' process one article '''
                        arttitlet = onearticle.find('h3')
                        if arttitlet is not None:
                            mylink = arttitlet.find('a')
                            if mylink is not None:
                                if mylink.has_key('title'):
                                    arttitle = mylink['title']
                                else:
                                    arttitle = 'unknown'
                                if mylink.has_key('href'):
                                    artlink = mylink['href']
                                    artlink = artlink.replace("/article","/printarticle")
                                    artlink = artlink.replace("/review","/printreview")
                                    deck = onearticle.find('div',attrs={'class':'deck'})
                                    if deck is not None:
                                        deck = deck.contents[0]
                                        onesection.append({'title':arttitle, 'url':artlink, 'description':deck,'date':''})
                    articles.append((sectiontitle, onesection))
        return articles
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.LOG_IN)
        br.select_form(name='aspnetForm')
        br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtEmail'] = self.username
        br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtPassword'] = self.password
        raw = br.submit("ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$btnLogin").read()
        if 'Logout</a>' not in raw:
            raise LoginFailed(
                    _('Failed to log in, check your username and password for'
                    ' the calibre Periodicals service.'))
        return br
--- a/resources/recipes/siol.recipe
+++ b/resources/recipes/siol.recipe
@ -0,0 +1,55 @@
 # coding: utf-8
 __license__   = 'GPL v3'
 __copyright__ = '2010, BlonG'
 '''
 www.siol.si
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Siol(BasicNewsRecipe):
    title = u'Siol.net'
    __author__ = u'BlonG'
    description = "Multimedijski portal z aktualnimi vsebinami, intervjuji, komentarji iz Slovenije in sveta, sportal, trendi, avtomoto, blogos"
    oldest_article = 3
    language = 'sl'
    max_articles_per_feed = 20
    no_stylesheets = True
    use_embedded_content = False
    cover_url = 'https://sites.google.com/site/javno2010/home/siol_cover.jpg'
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 	'''
    html2lrf_options = ['--base-font-size', '10']
    keep_only_tags = [
 	dict(name='div', attrs={'id':'idContent'}),
 	]
    remove_tags  = [
 	dict(name='span', attrs={'class':'com1'}),
 	dict(name='div', attrs={'class':'relation'}),
 	dict(name='p', attrs={'class':'path'}),
 	dict(name='div', attrs={'class':'clear_r'}),
 	dict(name='div', attrs={'id':'appendix'}),
 	dict(name='div', attrs={'id':'rail'}),
 	dict(name='div', attrs={'id':'div_comments'}),
 	dict(name='div', attrs={'class':'thumbs'}),
 	]
    feeds          = [
 	(u'Slovenija', u'http://www.siol.net/rss.aspx?path=Slovenija')
 	,(u'Lokalne novice', u'http://www.siol.net/rss.aspx?path=Slovenija/Lokalne_novice')
 	,(u'EU', u'http://www.siol.net/rss.aspx?path=EU')
 	,(u'Svet', u'http://www.siol.net/rss.aspx?path=Svet')
 	,(u'Gospodarstvo', u'http://www.siol.net/rss.aspx?path=Gospodarstvo')
 	,(u'Sportal', u'http://www.siol.net/rss.aspx?path=Sportal')
 	,(u'Trendi', u'http://www.siol.net/rss.aspx?path=Trendi')
 	,(u'Avtomoto', u'http://www.siol.net/rss.aspx?path=Avtomoto')
 	,(u'Tehnologija', u'http://www.siol.net/rss.aspx?path=Tehnologija')
 	,(u'TV / Film', u'http://www.siol.net/rss.aspx?path=TV')
 	]
--- a/resources/recipes/tagesan.recipe
+++ b/resources/recipes/tagesan.recipe
@ -7,7 +7,7 @@ class AdvancedUserRecipe1284927619(BasicNewsRecipe):
    __author__ = 'noxxx'
    max_articles_per_feed = 100
    description = 'tagesanzeiger.ch: Nichts verpassen'
-    category = 'News, Politik, Nachrichten, Schweiz, Zürich'
+    category = 'News, Politik, Nachrichten, Schweiz, Zuerich'
    language = 'de'
    conversion_options = {
--- a/resources/recipes/taggeschau_de.recipe
+++ b/resources/recipes/taggeschau_de.recipe
@ -4,7 +4,7 @@ class Tagesschau(BasicNewsRecipe):
     title          = 'Tagesschau'
     description    = 'Nachrichten der ARD'
     publisher      = 'ARD'
-     language       = 'de_DE'
+     language       = 'de'
     __author__     = 'Florian Andreas Pfaff'
     oldest_article = 7
--- a/resources/recipes/vedomosti.recipe
+++ b/resources/recipes/vedomosti.recipe
@ -0,0 +1,195 @@
 #!/usr/bin/env  python
 u'''
 Ведомости
 '''
 from calibre.web.feeds.feedparser import parse
 from calibre.ebooks.BeautifulSoup import Tag
 from calibre.web.feeds.news import BasicNewsRecipe
 class VedomostiRecipe(BasicNewsRecipe):
    title = u'Ведомости'
    __author__ = 'Nikolai Kotchetkov'
    publisher = 'vedomosti.ru'
    category = 'press, Russia'
    description = u'Ежедневная деловая газета'
    oldest_article = 3
    max_articles_per_feed = 100
    masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
    cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
    #Add feed names if you want them to be sorted (feeds of this list appear first)
    sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги']
    encoding = 'cp1251'
    language = 'ru'
    no_stylesheets = True
    remove_javascript = True
    recursions = 0
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [dict(name='td', attrs={'class' : ['second_content']})]
    remove_tags_after = [dict(name='div', attrs={'class' : 'article_text'})]
    remove_tags = [dict(name='div', attrs={'class' : ['sep', 'choice', 'articleRightTbl']})]
    feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml']
    #base URL for relative links
    base_url = u'http://www.vedomosti.ru'
    extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\
                'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\
                'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\
                '.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
                '.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
                '.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\
                '.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\
                '.article_desc {font-size: 1em; font-style:italic;}'
    def parse_index(self):
        try:
            feedData = parse(self.feeds[0])
            if not feedData:
                raise NotImplementedError
            self.log("parse_index: Feed loaded successfully.")
            if feedData.feed.has_key('title'):
                self.title = feedData.feed.title
                self.log("parse_index: Title updated to: ", self.title)
            if feedData.feed.has_key('description'):
                self.description = feedData.feed.description
                self.log("parse_index: Description updated to: ", self.description)
            def get_virtual_feed_articles(feed):
                if feeds.has_key(feed):
                    return feeds[feed][1]
                self.log("Adding new feed: ", feed)
                articles = []
                feeds[feed] = (feed, articles)
                return articles
            feeds = {}
            #Iterate feed items and distribute articles using tags
            for item in feedData.entries:
                link = item.get('link', '');
                title = item.get('title', '');
                if '' == link or '' == title:
                    continue
                article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''};
                if not item.has_key('tags'):
                    get_virtual_feed_articles('_default').append(article)
                    continue
                for tag in item.tags:
                    addedToDefault = False
                    term = tag.get('term', '')
                    if '' == term:
                        if (not addedToDefault):
                            get_virtual_feed_articles('_default').append(article)
                        continue
                    get_virtual_feed_articles(term).append(article)
            #Get feed list
            #Select sorted feeds first of all
            result = []
            for feedName in self.sortOrder:
                if (not feeds.has_key(feedName)): continue
                result.append(feeds[feedName])
                del feeds[feedName]
            result = result + feeds.values()
            return result
        except Exception, err:
            self.log(err)
            raise NotImplementedError
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
    def postprocess_html(self, soup, first_fetch):
        #self.log('Original: ', soup.prettify())
        #Find article
        contents = soup.find('div', {'class':['article_text']})
        if not contents:
            self.log('postprocess_html: article div not found!')
            return soup
        contents.extract()
        #Find title
        title = soup.find('h1')
        if title:
            contents.insert(0, title)
        #Find article image
        newstop = soup.find('div', {'class':['newstop']})
        if newstop:
            img = newstop.find('img')
            if img:
                imgDiv = Tag(soup, 'div')
                imgDiv['class'] = 'article_img'
                if img.has_key('width'):
                    del(img['width'])
                if img.has_key('height'):
                    del(img['height'])
                #find description
                element = img.parent.nextSibling
                img.extract()
                imgDiv.insert(0, img)
                while element:
                    if not isinstance(element, Tag):
                        continue
                    nextElement = element.nextSibling
                    if 'p' == element.name:
                        element.extract()
                        element['class'] = 'article_img_desc'
                        imgDiv.insert(len(imgDiv.contents), element)
                    element = nextElement
                contents.insert(1, imgDiv)
        #find article abstract
        abstract = soup.find('p', {'class':['subhead']})
        if abstract:
            abstract['class'] = 'article_desc'
            contents.insert(2, abstract)
        #Find article authors
        authorsDiv = soup.find('div', {'class':['autors']})
        if authorsDiv:
            authorsP = authorsDiv.find('p')
            if authorsP:
                authorsP['class'] = 'article_authors'
                contents.insert(len(contents.contents), authorsP)
        #Fix urls that use relative path
        urls = contents.findAll('a');
        if urls:
            for url in urls:
                if not url.has_key('href'):
                    continue
                if '/' == url['href'][0]:
                    url['href'] = self.base_url + url['href']
        body = soup.find('td', {'class':['second_content']})
        if body:
            body.replaceWith(contents)
        self.log('Result: ', soup.prettify())
        return soup
--- a/resources/recipes/wash_post.recipe
+++ b/resources/recipes/wash_post.recipe
@ -31,8 +31,9 @@ class WashingtonPost(BasicNewsRecipe):
                ('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
                ('Style',
                     'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
-                ('Sports',
+                ('NFL Sports',
-                     'http://feeds.washingtonpost.com/wp-dyn/rss/linkset/2010/08/19/LI2010081904067_xml'),
+                     'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
                ('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
                ('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
    ]
--- a/resources/recipes/ynet.recipe
+++ b/resources/recipes/ynet.recipe
@ -0,0 +1,72 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 import mechanize
 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    description   = 'This is a recipe of Ynet.co.il. The recipe opens the article page and clicks on an advertisement to not hurt the sites advertising income.'
    cover_url      = 'http://www.bneiakiva.net/uploads/images/ynet%282%29.jpg'
    title          = u'Ynet'
    __author__ = 'marbs'
    language              = 'he'
    extra_css='img {max-width:100%;direction: rtl;} #article{direction: rtl;} div{direction: rtl;} title{direction: rtl; } article_description{direction: rtl; } a.article{direction: rtl; } calibre_feed_description{direction: rtl; } body{direction: ltr;}'
    remove_attributes = ['width']
    simultaneous_downloads = 5
    keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
    remove_javascript     = True
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1
    remove_tags = [dict(name='p', attrs={'text':['&nbsp;']})]
    max_articles_per_feed = 100
    preprocess_regexps = [
        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: '')
        ]
    def preprocess_html(self, soup):
        soup.html['dir'] = 'rtl'
        soup.body['dir'] = 'rtl'
        return soup
    feeds =[(u'\u05d7\u05d3\u05e9\u05d5\u05ea',
  u'http://www.ynet.co.il/Integration/StoryRss2.xml'),
 (u'\u05db\u05dc\u05db\u05dc\u05d4',
  u'http://www.ynet.co.il/Integration/StoryRss6.xml'),
 (u'\u05e6\u05e8\u05db\u05e0\u05d5\u05ea',
  u'http://www.ynet.co.il/Integration/StoryRss437.xml'),
 (u'\u05e1\u05e4\u05d5\u05e8\u05d8',
  u'http://www.ynet.co.il/Integration/StoryRss3.xml'),
 (u'\u05ea\u05e8\u05d1\u05d5\u05ea',
  u'http://www.ynet.co.il/Integration/StoryRss538.xml'),
 (u'\u05de\u05e2\u05d5\u05e8\u05d1\u05d5\u05ea \u05d5\u05d7\u05d1\u05e8\u05d4',
  u'http://www.ynet.co.il/Integration/StoryRss3262.xml'),
 (u'\u05d1\u05e8\u05d9\u05d0\u05d5\u05ea',
  u'http://www.ynet.co.il/Integration/StoryRss1208.xml'),
 (u'\u05d9\u05e8\u05d5\u05e7',
  u'http://www.ynet.co.il/Integration/StoryRss4872.xml'),
 (u'\u05de\u05d7\u05e9\u05d1\u05d9\u05dd',
  u'http://www.ynet.co.il/Integration/StoryRss544.xml'),
 (u'\u05e8\u05db\u05d1', u'http://www.ynet.co.il/Integration/StoryRss550.xml'),
 (u'\u05ea\u05d9\u05d9\u05e8\u05d5\u05ea',
  u'http://www.ynet.co.il/Integration/StoryRss598.xml'),
 (u'\u05d4\u05d5\u05e8\u05d9\u05dd',
  u'http://www.ynet.co.il/Integration/StoryRss3052.xml'),
 (u'\u05d0\u05d5\u05db\u05dc',
  u'http://www.ynet.co.il/Integration/StoryRss975.xml'),
 (u'\u05d9\u05d4\u05d3\u05d5\u05ea',
  u'http://www.ynet.co.il/Integration/StoryRss4403.xml'),
 (u'\u05de\u05d3\u05e2 \u05d5\u05d8\u05d1\u05e2',
  u'http://www.ynet.co.il/Integration/StoryRss2142.xml'),
 (u'\u05d9\u05d7\u05e1\u05d9\u05dd',
  u'http://www.ynet.co.il/Integration/StoryRss3925.xml'),
 (u'\u05d3\u05e2\u05d5\u05ea',
  u'http://www.ynet.co.il/Integration/StoryRss194.xml')]
    def print_version(self, url):
 #remove from here
        br = BasicNewsRecipe.get_browser()
        br.open(url)
        br.follow_link(mechanize.Link(base_url = '', url =url, text = '', tag = 'a', attrs = [{'id':'buzzerATop'}]))
 #to here to stop supporting ynet...
        split1 = url.split("-")
        print_url = 'http://www.ynet.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
        return print_url
--- a/resources/recipes/zeitde.recipe
+++ b/resources/recipes/zeitde.recipe
@ -6,22 +6,25 @@ Fetch Die Zeit.
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class ZeitDe(BasicNewsRecipe):
-    title = 'ZEIT Online'
+    title = 'Zeit Online'
-    description = 'ZEIT Online'
+    description = 'Zeit Online'
    language = 'de'
    lang = 'de_DE'
-    __author__ = 'Martin Pitt, Sujata Raman and Ingo Paschke'
+    __author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
-    use_embedded_content   = False
+
    max_articles_per_feed = 40
-    remove_empty_feeds = True
+
-    no_stylesheets = True
+    remove_tags = [
-    no_javascript = True
+	                    dict(name='iframe'),
-    encoding = 'utf-8'
+	                    dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }),
 	                    dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
 	                    dict(name='div', attrs={'id':["place_5","place_4","comments"]})
 	                  ]
    keep_only_tags = [dict(id=['main'])]
    feeds =  [
               ('Seite 1', 'http://newsfeed.zeit.de/index_xml'),
@ -40,71 +43,31 @@ class ZeitDe(BasicNewsRecipe):
               ('Sport', 'http://newsfeed.zeit.de/sport/index'),
             ]
-    extra_css = '''
+    extra_css = '.excerpt{font-size:1em}.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
-                .supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
+
                .excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:small;}
                .title{font-family:Arial,Helvetica,sans-serif;font-size:large;clear:right;}
                .caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
                .quote{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
                .quote .cite{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small}
                .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
                .inline{float:left;margin-top:0;margin-right:15px;position:relative;width:180px; }
                img.inline{float:none}
                .intertitle{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small;font-weight:700}
                .ebinfobox{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small;list-style-type:none;float:right;margin-top:0;border-left-style:solid;border-left-width:1px;padding-left:10px;}
                .infobox {border-style: solid; border-width: 1px;padding:8px;}
                .infobox dt {font-weight:700;}
                '''
    #filter_regexps = [r'ad.de.doubleclick.net/']
    keep_only_tags = [
                        dict(name='div', attrs={'class':["article"]}) ,
                        dict(name='ul', attrs={'class':["tools"]}) ,
                         ]
    remove_tags = [
                    dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),
                    dict(name='div', attrs={'class':["pagination block","pagenav","inline link", "copyright"] }),
                    dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
                    dict(name='div', attrs={'id':["place_5","place_4","comments"]})
                  ]
    remove_attributes = ['style', 'font']
    def get_article_url(self, article):
        ans = article.get('link',None)
-        ans += "?page=all"
+        ans += "?page=all&print=true"
-        if 'video' in ans or 'quiz' in ans :
+        if 'video' in ans or 'quiz' in ans or 'blog' in ans :
              ans = None
        return ans
    def preprocess_html(self, soup):
        for tag in soup.findAll(name=['ul','li']):
            tag.name = 'div'
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
    def get_cover_url(self):
        try:
            inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
            return inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
        except:
            return 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        title = soup.find('h2', attrs={'class':'title'})
        if title is None:
            print "no title"
            return soup
        info = Tag(soup,'ul',[('class','ebinfobox')])
        tools = soup.find('ul', attrs={'class':'tools'})
        #author = tools.find('li','author first')
        for tag in ['author first', 'date', 'date first', 'author', 'source']:
            line = tools.find('li', tag)
            if line:
                info.insert(0,line)
        title.parent.insert(0,info)
        tools.extract()
        return soup
--- a/resources/templates/html_export_default.css
+++ b/resources/templates/html_export_default.css
@ -0,0 +1,60 @@
 body{
  margin:0px;
  padding: 0.5em;
  background-color:#F6F3E9;
  font-size:12px;
  font-family:Arial, Helvetica, sans-serif;
 }
 .calibreMeta{
  background-color:#39322B;
  color:white;
  padding:10px;
 }
 .calibreMeta a, .calibreEbNav a, .calibreEbNavTop a, .calibreToc a{
  color:white;
 }
 .calibreMeta h1{
  margin:0px;
  font-size:18px;
  background-color:#39322B;
 }
 .calibreEbookContent{
  padding:20px;
 }
 .calibreEbNav, .calibreEbNavTop{
  clear:both;
  background-color:#39322B;
  color:white;
  padding:10px;
  text-align:center;
 }
 .calibreEbNavTop{
  margin-bottom:20px;
 }
 .calibreEbNav a, .calibreEbNavTop a{
  padding:0px 5px;
 }
 .calibreTocIndex{
  line-height:18px;
 }
 .calibreToc{
  float:left;
  margin:20px;
  width:300px;
  background-color:#39322B;
  color:white;
  padding:10px;
 }
 .calibreEbookContent{
  width:600px;
  float:left;
 }
--- a/resources/templates/html_export_default.tmpl
+++ b/resources/templates/html_export_default.tmpl
@ -0,0 +1,74 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 ${head_content}$
 <link href="${cssLink}$" type="text/css" rel="stylesheet" />
 </head>
 <body>
 <div class="calibreMeta">
  <div class="calibreMetaTitle">
  ${pos1=1}$
  ${for title in meta.titles():}$
    ${if pos1:}$
    <h1>
      <a href="${tocUrl}$">${print title}$</a>
    </h1>
    ${:else:}$
    <div class="calibreMetaSubtitle">${print title}$</div>
    ${:endif}$
    ${pos1=0}$
  ${:endfor}$
  </div>
  <div class="calibreMetaAuthor">
    ${print ', '.join(meta.creators())}$
  </div>
 </div>
 <div class="calibreMain">
  <div class="calibreEbookContent">
    ${if prevLink or nextLink:}$
      <div class="calibreEbNavTop">
        ${if prevLink:}$
          <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
        ${:else:}$
          <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
        ${:endif}$
        ${if nextLink:}$
          <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
        ${:endif}$
      </div>
    ${:endif}$
    ${ebookContent}$
  </div>
  ${if has_toc:}$
  <div class="calibreToc">
    <h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
    ${print toc()}$
  </div>
  ${:endif}$
  <div class="calibreEbNav">
    ${if prevLink:}$
      <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
    ${:else:}$
      <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
    ${:endif}$
    <a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
    ${if nextLink:}$
      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
    ${:endif}$
  </div>
 </div>
 </body>
 </html>
--- a/resources/templates/html_export_default_index.tmpl
+++ b/resources/templates/html_export_default_index.tmpl
@ -0,0 +1,61 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
 <link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
 <link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
 <title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
 ${for item in meta:}$
  <meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
 ${:endfor}$
 <link href="${cssLink}$" type="text/css" rel="stylesheet" />
 </head>
 <body>
 <div class="calibreMeta">
  <div class="calibreMetaTitle">
  ${pos1=1}$
  ${for title in meta.titles():}$
    ${if pos1:}$
    <h1>
      <a href="${tocUrl}$">${print title}$</a>
    </h1>
    ${:else:}$
    <div class="calibreMetaSubtitle">${print title}$</div>
    ${:endif}$
    ${pos1=0}$
  ${:endfor}$
  </div>
  <div class="calibreMetaAuthor">
    ${print ', '.join(meta.creators()),}$
  </div>
 </div>
 <div class="calibreMain">
  <div class="calibreEbookContent">
    ${if has_toc:}$
      <div class="calibreTocIndex">
        <h2>${print _('Table of contents'),}$</h2>
        ${toc}$
      </div>
    ${:else:}$
        <h2>${print _('No table of contents present'),}$</h2>
        <div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
    ${:endif}$
  </div>
  <div class="calibreEbNav">
    ${if nextLink:}$
      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
    ${:endif}$
  </div>
 </div>
 </body>
 </html>
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -348,8 +348,12 @@ class Build(Command):
                VERSION  = 1.0.0
                CONFIG   += %s
            ''')%(ext.name, ' '.join(ext.headers), ' '.join(ext.sources), archs)
            pro = pro.replace('\\', '\\\\')
            open(ext.name+'.pro', 'wb').write(pro)
-            subprocess.check_call([QMAKE, '-o', 'Makefile', ext.name+'.pro'])
+            qmc = [QMAKE, '-o', 'Makefile']
            if iswindows:
                qmc += ['-spec', 'win32-msvc2008']
            subprocess.check_call(qmc + [ext.name+'.pro'])
            subprocess.check_call([make, '-f', 'Makefile'])
            objects = glob.glob(obj_pat)
        return list(map(self.a, objects))
--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -11,7 +11,7 @@ import subprocess, tempfile, os, time
 from setup import Command, installer_name
 from setup.build_environment import HOST, PROJECT
-BASE_RSYNC = 'rsync -avz --delete'.split()
+BASE_RSYNC = ['rsync', '-avz', '--delete']
 EXCLUDES = []
 for x in [
    'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
@ -42,13 +42,13 @@ class Push(Command):
        threads = []
        for host in (
            r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
-            'kovid@ox:calibre'
+            'kovid@ox:calibre',
            r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
            ):
            rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
            print '\n\nPushing to:', host, '\n'
            threads.append(Thread(target=subprocess.check_call, args=(rcmd,)))
            threads[-1].start()
            subprocess.check_call(rcmd)
        for thread in threads:
            thread.join()
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -13,7 +13,7 @@ from setup import Command, modules, functions, basenames, __version__, \
 from setup.build_environment import msvc, MT, RC
 from setup.installer.windows.wix import WixMixIn
-QT_DIR = 'C:\\Qt\\4.6.3'
+QT_DIR = 'Q:\\Qt\\4.7.1'
 QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
 LIBUSB_DIR       = 'C:\\libusb'
 LIBUNRAR         = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -28,15 +28,16 @@ If there are no windows binaries already compiled for the version of python you
 Run the following command to install python dependencies::
-    easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython
+    easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform
 Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)
 Qt
 --------
 Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
-    configure -opensource -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc -no-qt3support -webkit -xmlpatterns -no-phonon
+    configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license && nmake
    nmake
 SIP
 -----
--- a/setup/server.py
+++ b/setup/server.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import subprocess, tempfile, os, time, sys
+import subprocess, tempfile, os, time, sys, telnetlib
 from threading import RLock
 from setup import Command
@ -28,7 +28,12 @@ else:
        def process_default(self, event):
            name = getattr(event,
                    'name', None)
-            if name and os.path.splitext(name)[1] == '.py':
+            if not name:
                return
            ext = os.path.splitext(name)[1]
            reload = False
            if ext == '.py':
                reload = True
                print
                print name, 'changed'
                self.command.kill_server()
@ -36,6 +41,9 @@ else:
                print self.command.prompt,
                sys.stdout.flush()
            if reload:
                self.command.reload_browser(delay=1)
 class Server(Command):
@ -75,6 +83,19 @@ class Server(Command):
            self.notifier.start()
            self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
    def reload_browser(self, delay=0.1):
        time.sleep(delay)
        try:
            t = telnetlib.Telnet('localhost', 4242)
            t.read_until("repl>")
            t.write('BrowserReload();')
            t.read_until("repl>")
            t.close()
        except:
            print 'Failed to reload browser'
            import traceback
            traceback.print_exc()
    def run(self, opts):
        self.lock = RLock()
        tdir = tempfile.gettempdir()
@ -85,8 +106,13 @@ class Server(Command):
        print
        self.watch()
        first = True
        while True:
            self.launch_server()
            if not first:
                self.reload_browser()
            first = False
            try:
                raw_input(self.prompt)
            except:
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -21,8 +21,6 @@ from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
                              filesystem_encoding, plugins, config_dir
 from calibre.startup import winutil, winutilerror
 import mechanize
 uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
 if False:
@ -269,7 +267,8 @@ def browser(honor_time=True, max_time=2, mobile_browser=False):
    :param honor_time: If True honors pause time in refresh requests
    :param max_time: Maximum time in seconds to wait during a refresh request
    '''
-    opener = mechanize.Browser()
+    from calibre.utils.browser import Browser
    opener = Browser()
    opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
    opener.set_handle_robots(False)
    opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
@ -445,6 +444,9 @@ xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions = {
 def replace_entities(raw):
    return _ent_pat.sub(entity_to_unicode, raw)
 def xml_replace_entities(raw):
    return _ent_pat.sub(xml_entity_to_unicode, raw)
 def prepare_string_for_xml(raw, attribute=False):
    raw = _ent_pat.sub(entity_to_unicode, raw)
    raw = raw.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.26'
+__version__   = '0.7.28'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re
@ -105,7 +105,9 @@ else:
        os.makedirs(config_dir, mode=CONFIG_DIR_MODE)
    except:
        pass
-    if not os.access(config_dir, os.W_OK) or not os.access(config_dir, os.X_OK):
+    if not os.path.exists(config_dir) or \
            not os.access(config_dir, os.W_OK) or not \
            os.access(config_dir, os.X_OK):
        print 'No write acces to', config_dir, 'using a temporary dir instead'
        import tempfile, atexit
        config_dir = tempfile.mkdtemp(prefix='calibre-config-')
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -2,9 +2,7 @@ import os.path
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import textwrap
+import textwrap, os, glob, functools
 import os
 import glob
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
 from calibre.constants import numeric_version
@ -95,10 +93,12 @@ class ComicMetadataReader(MetadataReaderPlugin):
    def get_metadata(self, stream, ftype):
        if ftype == 'cbr':
-            from calibre.libunrar import extract_member as extract_first
+            from calibre.libunrar import extract_first_alphabetically as extract_first
            extract_first
        else:
-            from calibre.libunzip import extract_member as extract_first
+            from calibre.libunzip import extract_member
            extract_first = functools.partial(extract_member,
                    sort_alphabetically=True)
        from calibre.ebooks.metadata import MetaInformation
        ret = extract_first(stream)
        mi = MetaInformation(None, None)
@ -446,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.html.output import HTMLOutput
 from calibre.ebooks.snb.output import SNBOutput
 from calibre.customize.profiles import input_profiles, output_profiles
@ -453,7 +454,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
 from calibre.devices.apple.driver import ITUNES
 from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
 from calibre.devices.blackberry.driver import BLACKBERRY
-from calibre.devices.cybook.driver import CYBOOK
+from calibre.devices.cybook.driver import CYBOOK, ORIZON
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
                BOOQ, ELONEX, POCKETBOOK301, MENTOR
@ -461,7 +462,7 @@ from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
-from calibre.devices.nook.driver import NOOK
+from calibre.devices.nook.driver import NOOK, NOOK_COLOR
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.android.driver import ANDROID, S60
 from calibre.devices.nokia.driver import N770, N810, E71X, E52
@ -475,7 +476,7 @@ from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
        SOVOS, PICO
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
-        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
+        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
@ -525,6 +526,7 @@ plugins += [
    RTFOutput,
    TCROutput,
    TXTOutput,
    HTMLOutput,
    SNBOutput,
 ]
 # Order here matters. The first matched device is the one used.
@ -533,6 +535,7 @@ plugins += [
    HANLINV5,
    BLACKBERRY,
    CYBOOK,
    ORIZON,
    ILIAD,
    IREXDR1000,
    IREXDR800,
@ -546,6 +549,7 @@ plugins += [
    KINDLE2,
    KINDLE_DX,
    NOOK,
    NOOK_COLOR,
    PRS505,
    ANDROID,
    S60,
@ -586,6 +590,7 @@ plugins += [
    AVANT,
    MENTOR,
    SWEEX,
    Q600,
    KOGAN,
    PDNOVEL,
    SPECTRA,
@ -892,4 +897,3 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
        Email, Server, Plugins, Tweaks, Misc]
 #}}}
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -250,8 +250,11 @@ class OutputProfile(Plugin):
    #: If True, the date is appended to the title of downloaded news
    periodical_date_in_title = True
-    #: The character used to represent a star in ratings
+    #: Characters used in jackets and catalogs
 	missing_char = u'x'
    ratings_char = u'*'
    empty_ratings_char = u' '
    read_char = u'+'
    #: Unsupported unicode characters to be replaced during preprocessing
    unsupported_unicode_chars = []
@ -287,7 +290,12 @@ class iPadOutput(OutputProfile):
            'macros': {'border-width': '{length}|medium|thick|thin'}
        }
    ]
-    ratings_char = u'\u2605'
+
 	missing_char = u'\u2715\u200a'		# stylized 'x' plus hair space
    ratings_char = u'\u2605'			# filled star
 	empty_ratings_char = u'\u2606'		# hollow star
    read_char = u'\u2713'				# check mark
    touchscreen = True
    # touchscreen_news_css {{{
    touchscreen_news_css = u'''
@ -498,7 +506,6 @@ class SonyReaderLandscapeOutput(SonyReaderOutput):
    screen_size               = (784, 1012)
    comic_screen_size         = (784, 1012)
 class MSReaderOutput(OutputProfile):
    name        = 'Microsoft Reader'
@ -582,7 +589,12 @@ class KindleOutput(OutputProfile):
    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
    supports_mobi_indexing = True
    periodical_date_in_title = False
 	missing_char = u'x\u2009'
 	empty_ratings_char = u'\u2606'
    ratings_char = u'\u2605'
    read_char = u'\u2713'
    mobi_ems_per_blockquote = 2.0
    @classmethod
@ -603,6 +615,8 @@ class KindleDXOutput(OutputProfile):
    #comic_screen_size         = (741, 1022)
    supports_mobi_indexing = True
    periodical_date_in_title = False
    ratings_char = u'\u2605'
    read_char = u'\u2713'
    mobi_ems_per_blockquote = 2.0
    @classmethod
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -23,6 +23,9 @@ class ANDROID(USBMS):
                : [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
                0xc92 : [0x100]},
            # Eken
            0x040d : { 0x8510 : [0x0001] },
            # Motorola
            0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216],
                0x4285 : [0x216]},
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.epub import set_metadata
 from calibre.library.server.utils import strftime
 from calibre.utils.config import config_dir, prefs
-from calibre.utils.date import isoformat, now, parse_date
+from calibre.utils.date import now, parse_date
 from calibre.utils.logging import Log
 from calibre.utils.zipfile import ZipFile
@ -1221,12 +1221,19 @@ class ITUNES(DriverBase):
                    return thumb
                if isosx:
                    # The following commands generate an error, but the artwork does in fact
                    # get sent to the device.  Seems like a bug in Apple's automation interface?
                    # Could also be a problem with the integrity of the cover data?
                    if lb_added:
-                        lb_added.artworks[1].data_.set(cover_data)
+                        try:
                            lb_added.artworks[1].data_.set(cover_data)
                        except:
                            if DEBUG:
                                self.log.warning("  iTunes automation interface reported an error"
                                                 " when adding artwork to '%s' in the iTunes Library" % metadata.title)
                            pass
                    if db_added:
                        # The following command generates an error, but the artwork does in fact
                        # get sent to the device.  Seems like a bug in Apple's automation interface
                        try:
                            db_added.artworks[1].data_.set(cover_data)
                        except:
@ -2521,11 +2528,11 @@ class ITUNES(DriverBase):
                        metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
                                                   old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
                    else:
-                        metadata.timestamp = isoformat(now())
+                        metadata.timestamp = now()
                        if DEBUG:
                            self.log.info("   add timestamp: %s" % metadata.timestamp)
                else:
-                    metadata.timestamp = isoformat(now())
+                    metadata.timestamp = now()
                    if DEBUG:
                        self.log.warning("   missing <metadata> block in OPF file")
                        self.log.info("   add timestamp: %s" % metadata.timestamp)
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):
    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0210, 0x0201]
+    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211]
    VENDOR_NAME = 'RIM'
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
--- a/src/calibre/devices/cybook/driver.py
+++ b/src/calibre/devices/cybook/driver.py
@ -5,7 +5,7 @@ __copyright__ = '2009, John Schember <john at nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
-Device driver for Bookeen's Cybook Gen 3 and Opus
+Device driver for Bookeen's Cybook Gen 3 and Opus and Orizon
 '''
 import os
@ -56,3 +56,23 @@ class CYBOOK(USBMS):
        if isunix:
            return device_info[3] == 'Bookeen' and (device_info[4] == 'Cybook Gen3' or device_info[4] == 'Cybook Opus')
        return True
 class ORIZON(CYBOOK):
    name           = 'Orizon Device Interface'
    gui_name       = 'Orizon'
    description    = _('Communicate with the Cybook Orizon eBook reader.')
    BCD         = [0x319]
    WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_ORIZON__-FD')
    WINDOWS_CARD_A_MEM = re.compile('CYBOOK_ORIZON__-SD')
    EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions'
    @classmethod
    def can_handle(cls, device_info, debug=False):
        if isunix:
            return device_info[3] == 'Bookeen' and device_info[4] == 'Cybook Orizon'
        return True
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -74,9 +74,9 @@ class DevicePlugin(Plugin):
        if bcd is None or len(bcd) == 0:
            return True
        for c in bcd:
-            # Bug in winutil.get_usb_devices converts a to :
+            rev = 'rev_%4.4x'%c
-            rev = ('rev_%4.4x'%c).replace('a', ':')
+            # Bug in winutil.get_usb_devices sometimes converts a to :
-            if rev in device_id:
+            if rev in device_id or rev.replace('a', ':') in device_id:
                return True
        return False
--- a/src/calibre/devices/iriver/driver.py
+++ b/src/calibre/devices/iriver/driver.py
@ -17,15 +17,15 @@ class IRIVER_STORY(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']
    # Ordered list of supported formats
-    FORMATS     = ['epub', 'pdf', 'txt']
+    FORMATS     = ['epub', 'fb2', 'pdf', 'djvu', 'txt']
    VENDOR_ID   = [0x1006]
-    PRODUCT_ID  = [0x4023]
+    PRODUCT_ID  = [0x4023, 0x4025]
    BCD         = [0x0323]
    VENDOR_NAME = 'IRIVER'
-    WINDOWS_MAIN_MEM = 'STORY'
+    WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05']
-    WINDOWS_CARD_A_MEM = 'STORY'
+    WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD']
    #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
    #OSX_CARD_A_MEM = 'Kindle Card Storage Media'
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -503,7 +503,11 @@ class KOBO(USBMS):
                        ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
                        ContentID = self.contentid_from_path(book.path, ContentType)
-                        datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
+
                        t = (ContentID,)
                        cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t)
                        result = cursor.fetchone()
                        datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00' 
                        t = (datelastread,ContentID,)
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -72,6 +72,15 @@ class SWEEX(USBMS):
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True
 class Q600(SWEEX):
    name = 'Digma Q600 Device interface'
    gui_name = 'Q600'
    description    = _('Communicate with the Digma Q600')
    BCD = [0x325]
    FORMATS     = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
 class KOGAN(SWEEX):
    name           = 'Kogan Device Interface'
@ -115,12 +124,15 @@ class PDNOVEL_KOBO(PDNOVEL):
    BCD         = [0x222]
-    EBOOK_DIR_MAIN = 'eBooks/Kobo'
+    EBOOK_DIR_MAIN = 'eBooks'
    def upload_cover(self, path, filename, metadata, filepath):
        coverdata = getattr(metadata, 'thumbnail', None)
        if coverdata and coverdata[2]:
-            with open(os.path.join(path, '.thumbnail', filename+'.jpg'), 'wb') as coverfile:
+            dirpath = os.path.join(path, '.thumbnail')
            if not os.path.exists(dirpath):
                os.makedirs(dirpath)
            with open(os.path.join(dirpath, filename+'.jpg'), 'wb') as coverfile:
                coverfile.write(coverdata[2])
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@ -80,3 +80,14 @@ class NOOK(USBMS):
    def sanitize_path_components(self, components):
        return [x.replace('#', '_') for x in components]
 class NOOK_COLOR(NOOK):
    gui_name       = _('Nook Color')
    description    = _('Communicate with the Nook Color eBook reader.')
    PRODUCT_ID  = [0x002]
    BCD         = [0x216]
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOK_DISK'
    EBOOK_DIR_MAIN = 'My Files/Books'
--- a/src/calibre/devices/prs505/sony_cache.py
+++ b/src/calibre/devices/prs505/sony_cache.py
@ -610,7 +610,11 @@ class XMLCache(object):
        # is not new, compare its Sony DB date against localtime and gmtime.
        # Count the matches. When we must set a date, use the one with the most
        # matches. Use localtime if the case of a tie, and hope it is right.
-        timestamp = os.path.getmtime(path)
+        try:
            timestamp = os.path.getmtime(path)
        except:
            debug_print('Failed to get timestamp for:', path)
            timestamp = time.time()
        rec_date = record.get('date', None)
        def clean(x):
@ -619,13 +623,20 @@ class XMLCache(object):
            x.replace(u'\0', '')
            return x
        def record_set(k, v):
            try:
                record.set(k, clean(v))
            except:
                # v is not suitable for XML, ignore
                pass
        if not getattr(book, '_new_book', False): # book is not new
-                if record.get('tz', None) is not None:
+            if record.get('tz', None) is not None:
-                    use_tz_var = True
+                use_tz_var = True
-                if strftime(timestamp, zone=time.gmtime) == rec_date:
+            if strftime(timestamp, zone=time.gmtime) == rec_date:
-                    gtz_count += 1
+                gtz_count += 1
-                elif strftime(timestamp, zone=time.localtime) == rec_date:
+            elif strftime(timestamp, zone=time.localtime) == rec_date:
-                    ltz_count += 1
+                ltz_count += 1
        else: # book is new. Set the time using the current votes
            if use_tz_var:
                tz = time.localtime
@ -639,22 +650,25 @@ class XMLCache(object):
                debug_print("Use GMT TZ for new book", book.lpath)
            date = strftime(timestamp, zone=tz)
            record.set('date', clean(date))
-        record.set('size', clean(str(os.stat(path).st_size)))
+        try:
            record.set('size', clean(str(os.stat(path).st_size)))
        except:
            record.set('size', '0')
        title = book.title if book.title else _('Unknown')
-        record.set('title', clean(title))
+        record_set('title', title)
        ts = book.title_sort
        if not ts:
            ts = title_sort(title)
-        record.set('titleSorter', clean(ts))
+        record_set('titleSorter', ts)
        if self.use_author_sort:
            if book.author_sort:
                aus = book.author_sort
            else:
                debug_print('Author_sort is None for book', book.lpath)
                aus = authors_to_sort_string(book.authors)
-            record.set('author', clean(aus))
+            record_set('author', aus)
        else:
-            record.set('author', clean(authors_to_string(book.authors)))
+            record_set('author', authors_to_string(book.authors))
        ext = os.path.splitext(path)[1]
        if ext:
            ext = ext[1:].lower()
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -32,7 +32,7 @@ def detect(aBuf):
 ENCODING_PATS = [
                 re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
                            re.IGNORECASE),
-                 re.compile(r'''<meta\s+?[^<>]+?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
+                 re.compile(r'''<meta\s+?[^<>]*?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
                            re.IGNORECASE)
                 ]
 ENTITY_PATTERN = re.compile(r'&(\S+?);')
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -144,7 +144,10 @@ class DocAnalysis(object):
        # Normalize the histogram into percents
        totalLines = len(self.lines)
-        h = [ float(count)/totalLines for count in hRaw ]
+        if totalLines > 0:
            h = [ float(count)/totalLines for count in hRaw ]
        else:
            h = []
        #print "\nhRaw histogram lengths are: "+str(hRaw)
        #print "              percents are: "+str(h)+"\n"
--- a/src/calibre/ebooks/html/meta.py
+++ b/src/calibre/ebooks/html/meta.py
@ -0,0 +1,33 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
 class EasyMeta(object):
    def __init__(self, meta):
        self.meta = meta
    def __iter__(self):
        meta = self.meta
        for item_name in meta.items:
            for item in meta[item_name]:
                if namespace(item.term) == DC11_NS:
                    yield { 'name': barename(item.term), 'value': item.value }
    def __len__(self):
        count = 0
        for item in self:
            count = count+1
        return count
    def titles(self):
        for item in self.meta['title']:
            yield item.value
    def creators(self):
        for item in self.meta['creator']:
            yield item.value
--- a/src/calibre/ebooks/html/output.py
+++ b/src/calibre/ebooks/html/output.py
@ -0,0 +1,209 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
 __docformat__ = 'restructuredtext en'
 import os, re, shutil
 from calibre.utils import zipfile
 from os.path import dirname, abspath, relpath, exists, basename
 from lxml import etree
 from templite import Templite
 from calibre.ebooks.oeb.base import element
 from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from calibre import CurrentDir
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 from urllib import unquote
 from calibre.ebooks.html.meta import EasyMeta
 class HTMLOutput(OutputFormatPlugin):
    name = 'HTML Output'
    author = 'Fabian Grassl'
    file_type = 'zip'
    options = set([
        OptionRecommendation(name='template_css',
            help=_('CSS file used for the output instead of the default file')),
        OptionRecommendation(name='template_html_index',
            help=_('Template used for generation of the html index file instead of the default file')),
        OptionRecommendation(name='template_html',
            help=_('Template used for the generation of the html contents of the book instead of the default file')),
        OptionRecommendation(name='extract_to',
            help=_('Extract the contents of the generated ZIP file to the '
                'specified directory. WARNING: The contents of the directory '
                'will be deleted.')
        ),
    ])
    recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
    def generate_toc(self, oeb_book, ref_url, output_dir):
        '''
        Generate table of contents
        '''
        with CurrentDir(output_dir):
            def build_node(current_node, parent=None):
                if parent is None:
                    parent = etree.Element('ul')
                elif len(current_node.nodes):
                    parent = element(parent, ('ul'))
                for node in current_node.nodes:
                    point = element(parent, 'li')
                    href = relpath(abspath(unquote(node.href)), dirname(ref_url))
                    link = element(point, 'a', href=href)
                    title = node.title
                    if title:
                        title = re.sub(r'\s+', ' ', title)
                    link.text=title
                    build_node(node, point)
                return parent
            wrap = etree.Element('div')
            wrap.append(build_node(oeb_book.toc))
            return wrap
    def generate_html_toc(self, oeb_book, ref_url, output_dir):
        root = self.generate_toc(oeb_book, ref_url, output_dir)
        return etree.tostring(root, pretty_print=True, encoding='utf-8',
                xml_declaration=False)
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        # read template files
        if opts.template_html_index is not None:
            template_html_index_data = open(opts.template_html_index, 'rb').read()
        else:
            template_html_index_data = P('templates/html_export_default_index.tmpl', data=True)
        if opts.template_html is not None:
            template_html_data = open(opts.template_html, 'rb').read()
        else:
            template_html_data = P('templates/html_export_default.tmpl', data=True)
        if opts.template_css is not None:
            template_css_data = open(opts.template_css, 'rb').read()
        else:
            template_css_data = P('templates/html_export_default.css', data=True)
        template_html_index_data = template_html_index_data.decode('utf-8')
        template_html_data = template_html_data.decode('utf-8')
        template_css_data = template_css_data.decode('utf-8')
        self.log  = log
        self.opts = opts
        meta = EasyMeta(oeb_book.metadata)
        tempdir = os.path.realpath(PersistentTemporaryDirectory())
        output_file = os.path.join(tempdir,
                basename(re.sub(r'\.zip', '', output_path)+'.html'))
        output_dir = re.sub(r'\.html', '', output_file)+'_files'
        if not exists(output_dir):
            os.makedirs(output_dir)
        css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
        with open(css_path, 'wb') as f:
            f.write(template_css_data.encode('utf-8'))
        with open(output_file, 'wb') as f:
            html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
            templite = Templite(template_html_index_data)
            nextLink = oeb_book.spine[0].href
            nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
            cssLink = relpath(abspath(css_path), dirname(output_file))
            tocUrl = relpath(output_file, dirname(output_file))
            t = templite.render(has_toc=bool(oeb_book.toc.count()),
                    toc=html_toc, meta=meta, nextLink=nextLink,
                    tocUrl=tocUrl, cssLink=cssLink,
                    firstContentPageLink=nextLink)
            f.write(t)
        with CurrentDir(output_dir):
            for item in oeb_book.manifest:
                path = abspath(unquote(item.href))
                dir = dirname(path)
                if not exists(dir):
                    os.makedirs(dir)
                if item.spine_position is not None:
                    with open(path, 'wb') as f:
                        pass
                else:
                    with open(path, 'wb') as f:
                        f.write(str(item))
                    item.unload_data_from_memory(memory=path)
            for item in oeb_book.spine:
                path = abspath(unquote(item.href))
                dir = dirname(path)
                root = item.data.getroottree()
                # get & clean HTML <HEAD>-data
                head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
                head_content = etree.tostring(head, pretty_print=True, encoding='utf-8')
                head_content = re.sub(r'\<\/?head.*\>', '', head_content)
                head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
                head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2></\1>', head_content)
                # get & clean HTML <BODY>-data
                body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
                ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8')
                ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
                ebook_content = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ebook_content)
                # generate link to next page
                if item.spine_position+1 < len(oeb_book.spine):
                    nextLink = oeb_book.spine[item.spine_position+1].href
                    nextLink = relpath(abspath(nextLink), dir)
                else:
                    nextLink = None
                # generate link to previous page
                if item.spine_position > 0:
                    prevLink = oeb_book.spine[item.spine_position-1].href
                    prevLink = relpath(abspath(prevLink), dir)
                else:
                    prevLink = None
                cssLink = relpath(abspath(css_path), dir)
                tocUrl = relpath(output_file, dir)
                firstContentPageLink = oeb_book.spine[0].href
                # render template
                templite = Templite(template_html_data)
                toc = lambda: self.generate_html_toc(oeb_book, path, output_dir)
                t = templite.render(ebookContent=ebook_content,
                        prevLink=prevLink, nextLink=nextLink,
                        has_toc=bool(oeb_book.toc.count()), toc=toc,
                        tocUrl=tocUrl, head_content=head_content,
                        meta=meta, cssLink=cssLink,
                        firstContentPageLink=firstContentPageLink)
                # write html to file
                with open(path, 'wb') as f:
                    f.write(t)
                item.unload_data_from_memory(memory=path)
        zfile = ZipFile(output_path, "w")
        zfile.add_dir(output_dir, basename(output_dir))
        zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
        if opts.extract_to:
            if os.path.exists(opts.extract_to):
                shutil.rmtree(opts.extract_to)
            os.makedirs(opts.extract_to)
            zfile.extractall(opts.extract_to)
            self.log('Zip file extracted to', opts.extract_to)
        zfile.close()
        # cleanup temp dir
        shutil.rmtree(tempdir)
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -9,11 +9,13 @@ Fetch metadata using Amazon AWS
 import sys, re
 from lxml import html
 from lxml.html import soupparser
 from calibre import browser
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.library.comments import sanitize_comments_html
 def find_asin(br, isbn):
    q = 'http://www.amazon.com/s?field-keywords='+isbn
@ -47,13 +49,12 @@ def get_social_metadata(title, authors, publisher, isbn):
        return mi
    br = browser()
    asin = to_asin(br, isbn)
-    if asin:
+    if asin and get_metadata(br, asin, mi):
-        if get_metadata(br, asin, mi):
+        return mi
            return mi
    from calibre.ebooks.metadata.xisbn import xisbn
    for i in xisbn.get_associated_isbns(isbn):
        asin = to_asin(br, i)
-        if get_metadata(br, asin, mi):
+        if asin and get_metadata(br, asin, mi):
            return mi
    return mi
@ -70,7 +71,10 @@ def get_metadata(br, asin, mi):
        return False
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
-    root = html.fromstring(raw)
+    try:
        root = soupparser.fromstring(raw)
    except:
        return False
    ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
    if ratings:
        pat = re.compile(r'([0-9.]+) out of (\d+) stars')
@ -95,13 +99,13 @@ def get_metadata(br, asin, mi):
        # remove all attributes from tags
        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
        # Collapse whitespace
-        desc = re.sub('\n+', '\n', desc)
+        #desc = re.sub('\n+', '\n', desc)
-        desc = re.sub(' +', ' ', desc)
+        #desc = re.sub(' +', ' ', desc)
        # Remove the notice about text referring to out of print editions
        desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
        # Remove comments
        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
-        mi.comments = desc
+        mi.comments = sanitize_comments_html(desc)
    return True
@ -112,7 +116,7 @@ def main(args=sys.argv):
    print
    # Test sophisticated comment formatting
-    print get_social_metadata('Swan Thieves', None, None, '9780316065795')
+    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
    print
    # Random tests
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -109,9 +109,11 @@ class OCFZipReader(OCFReader):
            raise EPubException("not a ZIP .epub OCF container")
        self.root = root
        if self.root is None:
-            self.root = os.getcwdu()
+            name = getattr(stream, 'name', False)
-            if hasattr(stream, 'name'):
+            if name:
-                self.root = os.path.abspath(os.path.dirname(stream.name))
+                self.root = os.path.abspath(os.path.dirname(name))
            else:
                self.root = os.getcwdu()
        super(OCFZipReader, self).__init__()
    def open(self, name, mode='r'):
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -12,6 +12,7 @@ from calibre.utils.logging import default_log
 from calibre.utils.titlecase import titlecase
 from calibre.customize import Plugin
 from calibre.ebooks.metadata.covers import check_for_cover
 from calibre.utils.html2text import html2text
 metadata_config = None
@ -48,6 +49,11 @@ class MetadataSource(Plugin): # {{{
    #: member.
    string_customization_help = None
    #: Set this to true if your plugin returns HTML markup in comments.
    #: Then if the user disables HTML, calibre will automagically convert
    #: the HTML to Markdown.
    has_html_comments = False
    type = _('Metadata download')
    def __call__(self, title, author, publisher, isbn, verbose, log=None,
@ -79,6 +85,13 @@ class MetadataSource(Plugin): # {{{
                        mi.comments = None
                    if not c.get('tags', True):
                        mi.tags = []
                    if self.has_html_comments and mi.comments and \
                            c.get('textcomments', False):
                        try:
                            mi.comments = html2text(mi.comments)
                        except:
                            traceback.print_exc()
                            mi.comments = None
        except Exception, e:
            self.exception = e
@ -132,11 +145,17 @@ class MetadataSource(Plugin): # {{{
            setattr(w, '_'+x, cb)
            cb.setChecked(c.get(x, True))
            w._layout.addWidget(cb)
        cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
        setattr(w, '_textcomments', cb)
        cb.setChecked(c.get('textcomments', False))
        w._layout.addWidget(cb)
        return w
    def save_settings(self, w):
        dl_settings = {}
-        for x in ('rating', 'tags', 'comments'):
+        for x in ('rating', 'tags', 'comments', 'textcomments'):
            dl_settings[x] = getattr(w, '_'+x).isChecked()
        c = self.config_store()
        c.set(self.name, dl_settings)
@ -210,6 +229,8 @@ class Amazon(MetadataSource): # {{{
    metadata_type = 'social'
    description = _('Downloads social metadata from amazon.com')
    has_html_comments = True
    def fetch(self):
        if not self.isbn:
            return
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -182,6 +182,7 @@ class TOC(list):
            except:
                play_order = 1
            href = fragment = text = None
            nd = dest
            nl = nl_path(np)
            if nl:
                nl = nl[0]
@ -190,17 +191,14 @@ class TOC(list):
                    text += etree.tostring(txt, method='text',
                            encoding=unicode, with_tail=False)
                content = content_path(np)
-                if not content or not text:
+                if content and text:
-                    return
+                    content = content[0]
-                content = content[0]
+                    src = get_attr(content, attr='src')
-                src = get_attr(content, attr='src')
+                    if src:
-                if src is None:
+                        purl = urlparse(unquote(content.get('src')))
-                    return
+                        href, fragment = purl[2], purl[5]
-
+                        nd = dest.add_item(href, fragment, text)
-                purl = urlparse(unquote(content.get('src')))
+                        nd.play_order = play_order
                href, fragment = purl[2], purl[5]
            nd = dest.add_item(href, fragment, text)
            nd.play_order = play_order
            for c in np_path(np):
                process_navpoint(c, nd)
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -275,7 +275,15 @@ class MobiMLizer(object):
        # <mbp:frame-set/> does not exist lalalala
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
-            return
+            id_ = elem.get('id', None)
            if id_:
                # Keep anchors so people can use display:none
                # to generate hidden TOCs
                elem.clear()
                elem.text = None
                elem.set('id', id_)
            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
@ -355,11 +363,15 @@ class MobiMLizer(object):
                    if value == getattr(self.profile, prop):
                        result = '100%'
                    else:
                        # Amazon's renderer does not support
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
-                            ems = int(round(float(value) / self.profile.fbase))
+                            pixs = int(round(float(value) / \
                                (72./self.profile.dpi)))
                        except:
                            continue
-                        result = "%dem" % ems
+                        result = "%d"%pixs
                    istate.attrib[prop] = result
        elif tag == 'hr' and asfloat(style['width']) > 0:
            prop = style['width'] / self.profile.width
@ -378,6 +390,15 @@ class MobiMLizer(object):
            for attr in ('rowspan', 'colspan','width','border','scope'):
                if attr in elem.attrib:
                    istate.attrib[attr] = elem.attrib[attr]
        if tag == 'q':
            t = elem.text
            if not t:
                t = ''
            elem.text = u'\u201c' + t
            t = elem.tail
            if not t:
                t = ''
            elem.tail = u'\u201d' + t
        text = None
        if elem.text:
            if istate.preserve:
@ -406,6 +427,12 @@ class MobiMLizer(object):
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
                        vtag.append(child)
                    else:
                        break
                for child in vbstate.para:
                    vtag.append(child)
                return
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -221,7 +221,10 @@ class MetadataHeader(BookHeader):
        else:
            end = self.section_offset(number + 1)
        self.stream.seek(start)
-        return self.stream.read(end - start)
+        try:
            return self.stream.read(end - start)
        except OverflowError:
            return self.stream.read(os.stat(self.stream.name).st_size - start)
 class MobiReader(object):
@ -398,6 +401,8 @@ class MobiReader(object):
            elem.getparent().remove(elem)
        fname = self.name.encode('ascii', 'replace')
        fname = re.sub(r'[\x08\x15\0]+', '', fname)
        if not fname:
            fname = 'dummy'
        htmlfile = os.path.join(output_dir,
            ascii_filename(fname) + '.html')
        try:
@ -564,6 +569,10 @@ class MobiReader(object):
                for attr in self.IMAGE_ATTRS:
                    recindex = attrib.pop(attr, None) or recindex
                if recindex is not None:
                    try:
                        recindex = '%05d'%int(recindex)
                    except:
                        pass
                    attrib['src'] = 'images/%s.jpg' % recindex
                for attr in ('width', 'height'):
                    if attr in attrib:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -787,6 +787,8 @@ class Manifest(object):
            # Convert to Unicode and normalize line endings
            data = self.oeb.decode(data)
            data = self.oeb.html_preprocessor(data)
            # There could be null bytes in data if it had &#0; entities in it
            data = data.replace('\0', '')
            # Remove DOCTYPE declaration as it messes up parsing
            # In particular, it causes tostring to insert xmlns
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -49,5 +49,3 @@ class OEBOutput(OutputFormatPlugin):
                with open(path, 'wb') as f:
                    f.write(str(item))
                item.unload_data_from_memory(memory=path)
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@ -143,11 +143,17 @@ def render_jacket(mi, output_profile,
    if comments:
        comments = comments_to_html(comments)
    try:
        author = mi.format_authors()
    except:
        author = ''
    def generate_html(comments):
        args = dict(xmlns=XHTML_NS,
                    title_str=title_str,
                    css=css,
                    title=title,
                    author=author,
                    pubdate_label=_('Published'), pubdate=pubdate,
                    series_label=_('Series'), series=series,
                    rating_label=_('Rating'), rating=rating,
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -133,7 +133,11 @@ class DetectStructure(object):
    def elem_to_link(self, item, elem, counter):
-        text = xml2text(elem)
+        text = xml2text(elem).strip()
        if not text:
            text = elem.get('title', '')
        if not text:
            text = elem.get('alt', '')
        text = text[:100].strip()
        id = elem.get('id', 'calibre_toc_%d'%counter)
        elem.set('id', id)
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -223,7 +223,6 @@ class MessageBox(QMessageBox):
        if default_button is not None:
            self.setDefaultButton(default_button)
    def copy_to_clipboard(self):
        QApplication.clipboard().setText('%s: %s\n\n%s' %
                (self.title, self.msg, self.det_msg))
@ -715,13 +714,13 @@ def build_forms(srcdir, info=None):
            dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
            dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
            dat = pat.sub(sub, dat)
            dat = dat.replace('from QtWebKit.QWebView import QWebView',
                    'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
            if form.endswith('viewer%smain.ui'%os.sep):
                info('\t\tPromoting WebView')
                dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
                dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
                dat = dat.replace('from QtWebKit.QWebView import QWebView',
                        'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
                dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
            open(compiled_form, 'wb').write(dat)
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -192,14 +192,15 @@ class EditMetadataAction(InterfaceAction):
                        _('At least two books must be selected for merging'),
                        show=True)
        dest_id, src_books, src_ids = self.books_to_merge(rows)
        title = self.gui.library_view.model().db.title(dest_id, index_is_id=True)
        if safe_merge:
            if not confirm('<p>'+_(
                'Book formats and metadata from the selected books '
-                'will be added to the <b>first selected book.</b> '
+                'will be added to the <b>first selected book</b> (%s). '
                'ISBN will <i>not</i> be merged.<br><br> '
                'The second and subsequently selected books will not '
                'be deleted or changed.<br><br>'
-                'Please confirm you want to proceed.')
+                'Please confirm you want to proceed.')%title
            +'</p>', 'merge_books_safe', self.gui):
                return
            self.add_formats(dest_id, src_books)
@ -207,14 +208,14 @@ class EditMetadataAction(InterfaceAction):
        else:
            if not confirm('<p>'+_(
                'Book formats and metadata from the selected books will be merged '
-                'into the <b>first selected book</b>. '
+                'into the <b>first selected book</b> (%s). '
                'ISBN will <i>not</i> be merged.<br><br>'
                'After merger the second and '
                'subsequently selected books will be <b>deleted</b>. <br><br>'
                'All book formats of the first selected book will be kept '
                'and any duplicate formats in the second and subsequently selected books '
                'will be permanently <b>deleted</b> from your computer.<br><br>  '
-                'Are you <b>sure</b> you want to proceed?')
+                'Are you <b>sure</b> you want to proceed?')%title
            +'</p>', 'merge_books', self.gui):
                return
            if len(rows)>5:
@ -233,6 +234,7 @@ class EditMetadataAction(InterfaceAction):
            ci = self.gui.library_view.model().index(dest_row, 0)
            if ci.isValid():
                self.gui.library_view.setCurrentIndex(ci)
                self.gui.library_view.model().current_changed(ci, ci)
    def add_formats(self, dest_id, src_books, replace=False):
        for src_book in src_books:
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -35,7 +35,6 @@ class ViewAction(InterfaceAction):
        self.qaction.setMenu(self.view_menu)
        ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
    def location_selected(self, loc):
        enabled = loc == 'library'
        for action in list(self.view_menu.actions())[1:]:
@ -134,6 +133,9 @@ class ViewAction(InterfaceAction):
        rows = self.gui.current_view().selectionModel().selectedRows()
        self._view_books(rows)
    def view_triggered(self, index):
        self._view_books([index])
    def view_specific_book(self, index):
        self._view_books([index])
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -5,11 +5,13 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, collections
+import os, collections, sys
 from Queue import Queue
-from PyQt4.Qt import QLabel, QPixmap, QSize, QWidget, Qt, pyqtSignal, \
+from PyQt4.Qt import QPixmap, QSize, QWidget, Qt, pyqtSignal, \
-    QVBoxLayout, QScrollArea, QPropertyAnimation, QEasingCurve, \
+    QPropertyAnimation, QEasingCurve, QThread, QApplication, QFontInfo, \
-    QSizePolicy, QPainter, QRect, pyqtProperty
+    QSizePolicy, QPainter, QRect, pyqtProperty, QLayout, QPalette
 from PyQt4.QtWebKit import QWebView
 from calibre import fit_image, prepare_string_for_xml
 from calibre.gui2.widgets import IMAGE_EXTENSIONS
@ -67,10 +69,7 @@ class CoverView(QWidget): # {{{
    def __init__(self, vertical, parent=None):
        QWidget.__init__(self, parent)
-        self.setMaximumSize(QSize(120, 120))
+        self._current_pixmap_size = QSize(120, 120)
        self.setMinimumSize(QSize(120 if vertical else 20, 120 if vertical else
            20))
        self._current_pixmap_size = self.maximumSize()
        self.vertical = vertical
        self.animation = QPropertyAnimation(self, 'current_pixmap_size', self)
@ -79,8 +78,9 @@ class CoverView(QWidget): # {{{
        self.animation.setStartValue(QSize(0, 0))
        self.animation.valueChanged.connect(self.value_changed)
-        self.setSizePolicy(QSizePolicy.Expanding if vertical else
+        self.setSizePolicy(
-                QSizePolicy.Minimum, QSizePolicy.Expanding)
+                QSizePolicy.Expanding if vertical else QSizePolicy.Minimum,
                QSizePolicy.Expanding)
        self.default_pixmap = QPixmap(I('book.png'))
        self.pixmap = self.default_pixmap
@ -109,20 +109,6 @@ class CoverView(QWidget): # {{{
        self.current_pixmap_size = QSize(self.pwidth, self.pheight)
        self.animation.setEndValue(self.current_pixmap_size)
    def relayout(self, parent_size):
        if self.vertical:
            self.setMaximumSize(parent_size.width(),
                min(int(parent_size.height()/2.),int(4/3. * parent_size.width())+1))
        else:
            self.setMaximumSize(1+int(3/4. * parent_size.height()),
                    parent_size.height())
        self.resize(self.maximumSize())
        self.animation.stop()
        self.do_layout()
    def sizeHint(self):
        return self.maximumSize()
    def show_data(self, data):
        self.animation.stop()
        same_item = data.get('id', True) == self.data.get('id', False)
@ -165,70 +151,187 @@ class CoverView(QWidget): # {{{
    # }}}
 # Book Info {{{
 class Label(QLabel):
-    mr = pyqtSignal(object)
+class RenderComments(QThread):
    rdone = pyqtSignal(object, object)
    def __init__(self, parent):
        QThread.__init__(self, parent)
        self.queue = Queue()
        self.start()
    def run(self):
        while True:
            try:
                rows, comments = self.queue.get()
            except:
                break
            import time
            time.sleep(0.001)
            oint = sys.getcheckinterval()
            sys.setcheckinterval(5)
            try:
                self.rdone.emit(rows, comments_to_html(comments))
            except:
                pass
            sys.setcheckinterval(oint)
 class BookInfo(QWebView):
    link_clicked = pyqtSignal(object)
-    def __init__(self):
+    def __init__(self, vertical, parent=None):
-        QLabel.__init__(self)
+        QWebView.__init__(self, parent)
-        self.setTextFormat(Qt.RichText)
+        self.vertical = vertical
-        self.setText('')
+        self.renderer = RenderComments(self)
-        self.setWordWrap(True)
+        self.renderer.rdone.connect(self._show_data, type=Qt.QueuedConnection)
-        self.setAlignment(Qt.AlignTop)
+        self.page().setLinkDelegationPolicy(self.page().DelegateAllLinks)
-        self.linkActivated.connect(self.link_activated)
+        self.linkClicked.connect(self.link_activated)
        self._link_clicked = False
        self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)
    def link_activated(self, link):
        self._link_clicked = True
-        link = unicode(link)
+        link = unicode(link.toString())
        self.link_clicked.emit(link)
-    def mouseReleaseEvent(self, ev):
+    def turnoff_scrollbar(self, *args):
-        QLabel.mouseReleaseEvent(self, ev)
+        self.page().mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
        if not self._link_clicked:
            self.mr.emit(ev)
        self._link_clicked = False
 class BookInfo(QScrollArea):
    def __init__(self, vertical, parent=None):
        QScrollArea.__init__(self, parent)
        self.vertical = vertical
        self.setWidgetResizable(True)
        self.label = Label()
        self.setWidget(self.label)
        self.link_clicked = self.label.link_clicked
        self.mr = self.label.mr
        self.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
    def show_data(self, data):
        self.label.setText('')
        rows = render_rows(data)
        rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
            k, t in rows])
-        comments = ''
+        comments = data.get(_('Comments'), '')
-        if data.get(_('Comments'), '') not in ('', u'None'):
+        if comments and comments != u'None':
-            comments = data[_('Comments')]
+            self.renderer.queue.put((rows, comments))
-            comments = comments_to_html(comments)
+        self._show_data(rows, '')
    def _show_data(self, rows, comments):
        f = QFontInfo(QApplication.font(self.parent())).pixelSize()
        p = unicode(QApplication.palette().color(QPalette.Normal,
            QPalette.Window).name())
        c = unicode(QApplication.palette().color(QPalette.Normal,
                        QPalette.WindowText).name())
        templ = u'''\
        <html>
            <head>
            <style type="text/css">
                body, td {background-color: %s; font-size: %dpx; color: %s }
                a { text-decoration: none; color: blue }
            </style>
            </head>
            <body>
            %%s
            </body>
        <html>
        '''%(p, f, c)
        if self.vertical:
            if comments:
                rows += u'<tr><td colspan="2">%s</td></tr>'%comments
-            self.label.setText(u'<table>%s</table>'%rows)
+            self.setHtml(templ%(u'<table>%s</table>'%rows))
        else:
            left_pane = u'<table>%s</table>'%rows
            right_pane = u'<div>%s</div>'%comments
-            self.label.setText(u'<table><tr><td valign="top" '
+            self.setHtml(templ%(u'<table><tr><td valign="top" '
                    'style="padding-right:2em">%s</td><td valign="top">%s</td></tr></table>'
-                    % (left_pane, right_pane))
+                    % (left_pane, right_pane)))
    def mouseDoubleClickEvent(self, ev):
        ev.ignore()
 # }}}
 class DetailsLayout(QLayout): # {{{
    def __init__(self, vertical, parent):
        QLayout.__init__(self, parent)
        self.vertical = vertical
        self._children = []
        self.min_size = QSize(190, 200) if vertical else QSize(120, 120)
        self.setContentsMargins(0, 0, 0, 0)
    def minimumSize(self):
        return QSize(self.min_size)
    def addItem(self, child):
        if len(self._children) > 2:
            raise ValueError('This layout can only manage two children')
        self._children.append(child)
    def itemAt(self, i):
        try:
            return self._children[i]
        except:
            pass
        return None
    def takeAt(self, i):
        try:
            self._children.pop(i)
        except:
            pass
        return None
    def count(self):
        return len(self._children)
    def sizeHint(self):
        return QSize(self.min_size)
    def setGeometry(self, r):
        QLayout.setGeometry(self, r)
        self.do_layout(r)
    def cover_height(self, r):
        mh = min(int(r.height()/2.), int(4/3. * r.width())+1)
        try:
            ph = self._children[0].widget().pixmap.height()
        except:
            ph = 0
        if ph > 0:
            mh = min(mh, ph)
        return mh
    def cover_width(self, r):
        mw = 1 + int(3/4. * r.height())
        try:
            pw = self._children[0].widget().pixmap.width()
        except:
            pw = 0
        if pw > 0:
            mw = min(mw, pw)
        return mw
    def do_layout(self, rect):
        if len(self._children) != 2:
            return
        left, top, right, bottom = self.getContentsMargins()
        r = rect.adjusted(+left, +top, -right, -bottom)
        x = r.x()
        y = r.y()
        cover, details = self._children
        if self.vertical:
            ch = self.cover_height(r)
            cover.setGeometry(QRect(x, y, r.width(), ch))
            cover.widget().do_layout()
            y += ch + 5
            details.setGeometry(QRect(x, y, r.width(), r.height()-ch-5))
        else:
            cw = self.cover_width(r)
            cover.setGeometry(QRect(x, y, cw, r.height()))
            cover.widget().do_layout()
            x += cw + 5
            details.setGeometry(QRect(x, y, r.width() - cw - 5, r.height()))
 # }}}
 class BookDetails(QWidget): # {{{
    resized = pyqtSignal(object)
    show_book_info = pyqtSignal()
    open_containing_folder = pyqtSignal(int)
    view_specific_format = pyqtSignal(int, object)
@ -269,23 +372,14 @@ class BookDetails(QWidget): # {{{
    def __init__(self, vertical, parent=None):
        QWidget.__init__(self, parent)
        self.setAcceptDrops(True)
-        self._layout = QVBoxLayout()
+        self._layout = DetailsLayout(vertical, self)
        if not vertical:
            self._layout.setDirection(self._layout.LeftToRight)
        self.setLayout(self._layout)
        self.cover_view = CoverView(vertical, self)
        self.cover_view.relayout(self.size())
        self.resized.connect(self.cover_view.relayout, type=Qt.QueuedConnection)
        self._layout.addWidget(self.cover_view)
        self.book_info = BookInfo(vertical, self)
        self._layout.addWidget(self.book_info)
        self.book_info.link_clicked.connect(self._link_clicked)
        self.book_info.mr.connect(self.mouseReleaseEvent)
        if vertical:
            self.setMinimumSize(QSize(190, 200))
        else:
            self.setMinimumSize(120, 120)
        self.setCursor(Qt.PointingHandCursor)
    def _link_clicked(self, link):
@ -299,17 +393,15 @@ class BookDetails(QWidget): # {{{
            open_local_file(val)
-    def mouseReleaseEvent(self, ev):
+    def mouseDoubleClickEvent(self, ev):
        ev.accept()
        self.show_book_info.emit()
    def resizeEvent(self, ev):
        self.resized.emit(self.size())
    def show_data(self, data):
        self.cover_view.show_data(data)
        self.book_info.show_data(data)
-        self.setToolTip('<p>'+_('Click to open Book Details window') +
+        self.cover_view.show_data(data)
        self._layout.do_layout(self.rect())
        self.setToolTip('<p>'+_('Double-click to open Book Details window') +
                '<br><br>' + _('Path') + ': ' + data.get(_('Path'), ''))
    def reset_info(self):
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -23,7 +23,9 @@ class PluginWidget(QWidget,Ui_Form):
                     ('generate_recently_added', True),
                     ('note_tag','*'),
                     ('numbers_as_text', False),
-                     ('read_tag','+')]
+                     ('read_tag','+'),
                     ('wishlist_tag','Wishlist'),
                     ]
    # Output synced to the connected device?
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@ -42,28 +42,28 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="0">
+   <item row="3" column="0">
    <widget class="QLabel" name="label_4">
     <property name="text">
      <string>Additional note tag prefix:</string>
     </property>
    </widget>
   </item>
-   <item row="2" column="1">
+   <item row="3" column="1">
    <widget class="QLineEdit" name="note_tag">
     <property name="toolTip">
      <string extracomment="Default: *"/>
     </property>
    </widget>
   </item>
-   <item row="4" column="1">
+   <item row="5" column="1">
    <widget class="QLineEdit" name="exclude_genre">
     <property name="toolTip">
      <string extracomment="Default: \[[\w]*\]"/>
     </property>
    </widget>
   </item>
-   <item row="4" column="0">
+   <item row="5" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>Regex pattern describing tags to exclude as genres:</string>
@ -76,7 +76,7 @@
     </property>
    </widget>
   </item>
-   <item row="5" column="1">
+   <item row="6" column="1">
    <widget class="QLabel" name="label_6">
     <property name="text">
      <string>Regex tips:
@ -88,7 +88,7 @@
     </property>
    </widget>
   </item>
-   <item row="6" column="0">
+   <item row="7" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -101,34 +101,44 @@
     </property>
    </spacer>
   </item>
-   <item row="8" column="0">
+   <item row="9" column="0">
    <widget class="QCheckBox" name="generate_titles">
     <property name="text">
      <string>Include 'Titles' Section</string>
     </property>
    </widget>
   </item>
-   <item row="10" column="0">
+   <item row="11" column="0">
    <widget class="QCheckBox" name="generate_recently_added">
     <property name="text">
      <string>Include 'Recently Added' Section</string>
     </property>
    </widget>
   </item>
-   <item row="11" column="0">
+   <item row="12" column="0">
    <widget class="QCheckBox" name="numbers_as_text">
     <property name="text">
      <string>Sort numbers as text</string>
     </property>
    </widget>
   </item>
-   <item row="9" column="0">
+   <item row="10" column="0">
    <widget class="QCheckBox" name="generate_series">
     <property name="text">
      <string>Include 'Series' Section</string>
     </property>
    </widget>
   </item>
   <item row="2" column="1">
    <widget class="QLineEdit" name="wishlist_tag"/>
   </item>
   <item row="2" column="0">
    <widget class="QLabel" name="label_5">
     <property name="text">
      <string>Wishlist tag:</string>
     </property>
    </widget>
   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/comments_editor.py
+++ b/src/calibre/gui2/comments_editor.py
--- a/src/calibre/gui2/convert/bulk.py
+++ b/src/calibre/gui2/convert/bulk.py
@ -47,6 +47,8 @@ class BulkConfig(Config):
                self.show_pane)
        self.connect(self.groups, SIGNAL('entered(QModelIndex)'),
                self.show_group_help)
        rb = self.buttonBox.button(self.buttonBox.RestoreDefaults)
        rb.setVisible(False)
        self.groups.setMouseTracking(True)
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -28,6 +28,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, conne
    if log is None:
        log = Log()
    from calibre.library import db
    from calibre.utils.config import prefs
    prefs.refresh()
    db = db()
    db.catalog_plugin_on_device_temp_mapping = dbspec
--- a/Show More
+++ b/Show More