[Sync] Sync with trunk, revision 6907

2025-07-09 03:04:10 -04:00 · 2010-11-16 23:35:43 +08:00 · 2010-11-16 23:35:43 +08:00 · e77eca28c3
commit e77eca28c3
parent ce9f35a6cc fee80b5cf2
160 changed files with 33981 additions and 19620 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,190 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.7.28
+  date: 2010-11-12
+
+  new features:
+    - title: "Update the version of the grahical toolkit (Qt 4.7.1) used in the calibre binary builds on windows and linux. This should result in a significant speed up for the calibre ebook viewer"
+
+    - title: "Driver for Nook Color, Eken M001"
+
+    - title: "Add a tweak to turn off double clicking to open viewer"
+
+    - title: "Catalog generation: Add indication when a book has no formats"
+      tickets: [7376]
+
+    - title: "Advanced search dialog: Add a tab to allow searching particular metadata fields easily"
+
+    - title: "Conversion pipeline: When using the Level x Table of Contents expressions, if a tag is empty but has a non-empty title attribute, use that instead of ignoring the tag"
+
+  bug fixes:
+    - title: "Comic metadata reader: Sort filenames aplhabetically when choosing an image for the cover"
+      tickets: [7488]
+
+    - title: "Bulk convert dialog: Hide useless restore defaults button."
+      tickets: [7471]
+
+    - title: "Conversion pipeline: Handle input documents that encode null bytes as HTML entities correctly"
+      tickets: [7355]
+
+    - title: "Fix some SONY readers not being detected on windows"
+      tickets: [7413]
+
+    - title: "MOBI Input: Fix images missing when converting MOBI news downloads created with Mobipocket reader"
+      tickets: [7455]
+
+    - title: "ODT Input: Handle hyperlinks to headings that have truncated destination specifiers correctly"
+      tickets: [7506]
+
+    - title: "Sony driver: Ignore invalid strings when updating XML database"
+
+    - title: "Content Server: Add day to displayed date in /mobile book listing"
+
+    - title: "MOBI Input: Do not generate filenames with only extensions if the MOBI file has no internal name"
+      tickets: [7481]
+
+    - title: "MOBI Input: Handle files that has the record sizes set incorrectly to a long integer"
+      tickets: [7472]
+
+    - title: "Fix not enough vertical space for text in the preferences dialog category listing"
+
+    - title: "Remove 'sort' from Search and replace destination fields and add it to source fields. S&R is no longer marked experimental"
+
+    - title: "Edit metadata dialog: Save dialog geometry on reject as well as on accept"
+
+    - title: "E-book viewer: Fix clicking entries in TOC that point to the currently loaded flow not scrolling view to the top of the document"
+
+    - title: "Fix bug in regex used to extract charset from <meta> tags"
+
+    - title: "MOBI Output: Add support for the <q> tag"
+
+  improved recipes:
+    - Zeit Online
+    - Gamespot Review
+    - Ploitika
+    - Pagina12
+    - Irish Times
+    - elektrolese
+
+  new recipes:
+    - title: "Handelsblatt and European Voice"
+      author: "malfi"
+      
+    - title: "Polityka and Newsweek"
+      author: "Mateusz Kielar"
+
+    - title: "MarcTV"
+      author: "Marc Toensings"
+
+    - title: "Rolling Stone"
+      author: "Darko Miletic"
+
+    - title: "Vedomosti"
+      author: "Nikolai Kotchetkov"
+
+    - title: "Hola.com"
+      author: "bmsleight"
+
+    - title: "Dnevnik, Siol.net, MMC-RTV and Avto-magazon"
+      author: "BlonG"
+
+    - title: "SC Print Magazine"
+      author: "Tony Maro"
+
+    - title: "Diario Sport"
+      author: "Jefferson Frantz"
+
+- version: 0.7.27
+  date: 2010-11-05
+
+  new features:
+    - title: "The book list behavior has changed"
+      type: major
+      description: >
+        "Now double clicking on an entry in the book list will open it in the viewer. To edit metadata single click a previously selected entry instead. This is consistent with
+        the usage in most operating systems, so should be most intuitive for new users. Also typing any key no longer starts an edit, instead press F2 (Enter on OS X) to start editing
+        the current cell. Also you now have to double click instead of single clicking the book details panel to open the detailed info dialog for the book."
+
+    - title: "Added a new HTML output format plugin, which converts the input document to a ZIP file. The zip file contains HTML pages suitable for display in a website"
+
+    - title: "Support for iRiver Cover Story and Digma Q600"
+
+    - title: "Add a search button (labelled Go!) to explicitly run a search with the text currently in the quick search box"
+
+    - title: "Add author to the calibre geenrated book jacket"
+      tickets: [7402]
+
+    - title: "Add the title of the destination book to the merge dialog warning message"
+
+    - title: "calibre-server: Make auto reload control separate from --devlop with a new command line option --auto-reload"
+
+  bug fixes:
+    - title: "Fix book details panel not being updated after a delete-merge"
+      tickets: [7426]
+
+    - title: "Fix clicking in the search box launches a search if you have search as you type enabled"
+      tickets: [7425]
+
+    - title: "Use a browser widget to display book details for more robustness and better performance when vieweing large HTML comments"
+
+    - title: "Fix cover browser not updated after copy to library and delete"
+      tickets: [7416]
+
+    - title: "Fix regression that broke sending non calibre EPUB files to the iPad. Also handle failure to set cover in iTunes gracefully"
+      tickets: [7356]
+
+    - title: "News download: Workaround lack of thread safety in python mechanize, causing corrupted network packets (degrading network performance) on Ubuntu Maverick 64bit kernels"
+      tickets: [7321]
+
+    - title: "Convert comments to HTML for book details panel in separate thread to make scrolling through the book list faster when large comments are present"
+
+    - title: "calibre-server: Fix regression that broke --daemonize"
+
+    - title: "EPUB Input: Handle ncx files that have <navpoint> elements with no content correctly."
+      tickets: [7396]
+
+    - title: "SNBOutput: Fixed a bug in handling pre tag"
+
+    - title: "MOBI Output: Don't ignore hidden anchors."
+      tickets: [7384]
+
+    - title: "Fix switching libraries and generating a catalog could generate a catalog for the wrong library"
+
+    - title: "MOBI Output: Fix regression that broke conversion of anchors inside superscripts/subscripts."
+      tickets: [7368]
+
+    - title: "Content server: Fix various minor bugs"
+      tickets: [7379, 6768, 7354]
+
+    - title: "Amazon metadata download plugin: Make it more robust and add option to auto convert HTML to text"
+
+    - title: "Re-arrange send to device menu to make it harder to accidentally trigger the send and delete actions"
+      
+
+  improved recipes:
+    - Danas
+    - Fudzilla
+    - Zeit Online
+    - New York Times
+    - Mediapart
+
+  new recipes:
+    - title: "Ynet and Calcalist"
+      author: "marbs"
+      
+    - title: "El Faro de Vigo"
+      author: "Jefferson Frantz"
+
+    - title: "Clic_RBS"
+      author: "avoredo"
+
+    - title: "Correio da Manha"
+      author: "jmst"
+
+    - title: "Rue89"
+      author: "Louis Gesbert"
+
 - version: 0.7.26
  date: 2010-10-30

--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -81,6 +81,14 @@ p.unread_book {
 	text-indent:-2em;
 	}

+p.wishlist_item {
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:2em;
+	text-indent:-2em;
+	}
+
 p.date_read {
 	text-align:left;
 	margin-top:0px;
@ -104,3 +112,14 @@ hr.annotations_divider {
 	margin-top:0em;
 	margin-bottom:0em;
 	}
+
+td.publisher, td.date {
+	font-weight:bold;
+	text-align:center;
+	}
+td.rating {
+	text-align: center;
+	}
+td.thumbnail img {
+	-webkit-box-shadow: 6px 6px 6px #888;
+	}
--- a/resources/content_server/browse/browse.html
+++ b/resources/content_server/browse/browse.html
@ -4,7 +4,7 @@
 <html>
 <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-    <title>..:: calibre library ::.. {title}</title>
+    <title>..:: calibre {library} ::.. {title}</title>
    <meta http-equiv="X-UA-Compatible" content="IE=100" />
    <link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />

@ -41,7 +41,7 @@
            <div class="area">
              <div class="bubble">
                  <p><a href="{prefix}/browse" title="Return to top level"
-                      >&rarr;&nbsp;home&nbsp;&larr;</a></p>
+                      >&rarr;&nbsp;{home}&nbsp;&larr;</a></p>
              </div>
            </div>
            <div id="nav-container">&nbsp;
@ -80,7 +80,7 @@
                <form name="search_form"  action="{prefix}/browse/search" method="get" accept-charset="UTF-8">
                    <input value="{initial_search}" type="text" title="Search" name="query"
                                        class="search_input" />&nbsp;
-                    <input type="submit" value="Search" title="Search" alt="Search" />
+                    <input type="submit" value="{Search}" title="{Search}" alt="{Search}" />
                </form>
            </div>
            <div>&nbsp;</div>
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -211,3 +211,9 @@ generate_cover_title_font = None
 # Absolute path to a TTF font file to use as the font for the footer in the
 # default cover
 generate_cover_foot_font = None
+
+
+# Behavior of doubleclick on the books list. Choices:
+# open_viewer, do_nothing, edit_cell. Default: open_viewer.
+# Example: doubleclick_on_library_view = 'do_nothing'
+doubleclick_on_library_view = 'open_viewer'
--- a/resources/images/format-text-bold.png
+++ b/resources/images/format-text-bold.png
--- a/resources/images/format-text-italic.png
+++ b/resources/images/format-text-italic.png
--- a/resources/images/format-text-strikethrough.png
+++ b/resources/images/format-text-strikethrough.png
--- a/resources/images/format-text-underline.png
+++ b/resources/images/format-text-underline.png
--- a/resources/images/hotmail.png
+++ b/resources/images/hotmail.png
--- a/resources/images/news/avto-magazin.png
+++ b/resources/images/news/avto-magazin.png
--- a/resources/images/news/dnevnik.png
+++ b/resources/images/news/dnevnik.png
--- a/resources/images/news/rollingstone.png
+++ b/resources/images/news/rollingstone.png
--- a/resources/images/news/siol.png
+++ b/resources/images/news/siol.png
--- a/resources/jacket/stylesheet.css
+++ b/resources/jacket/stylesheet.css
@ -41,6 +41,15 @@
 	text-align: center;
 }

+/*
+**	Author
+*/
+.cbj_author {
+	font-size: medium;
+	text-align: center;
+    margin-bottom: 1ex;
+}
+
 /*
 **	Table containing Series, Publication Year, Rating and Tags
 */
--- a/resources/jacket/template.xhtml
+++ b/resources/jacket/template.xhtml
@ -7,6 +7,7 @@
    <body>
        <div class="cbj_banner">
            <div class="cbj_title">{title}</div>
+            <div class="cbj_author">{author}</div>
            <table class="cbj_header">
                <tr class="cbj_series">
                    <td class="cbj_label">{series_label}:</td>
--- a/resources/recipes/avto-magazin.recipe
+++ b/resources/recipes/avto-magazin.recipe
@ -0,0 +1,46 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, BlonG'
+'''
+avto-magazin.si
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+class Dnevnik(BasicNewsRecipe):
+  title = u'Avto Magazin'
+  __author__ = u'BlonG'
+  description = u'Za avtomobilisti\xc4\x8dne frike, poznavalce in nedeljske \xc5\xa1oferje.'
+  oldest_article = 7
+  max_articles_per_feed = 20
+  labguage = 'sl'
+  no_stylesheets = True
+  use_embedded_content = False
+
+  conversion_options = {'linearize_tables' : True}
+
+
+  cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
+
+  extra_css = '''
+	h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+	h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+	p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+	body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+	'''
+
+  keep_only_tags = [
+	dict(name='div', attrs={'id':'_iprom_inStream'}),
+#	dict(name='div', attrs={'class':'entry-content'}),
+	]
+
+  remove_tags = [
+	dict(name='div', attrs={'id':'voteConfirmation'}),
+	dict(name='div', attrs={'id':'InsideVote'}),
+	dict(name='div', attrs={'class':'Zone234'}),
+	dict(name='div', attrs={'class':'Comments'}),
+	dict(name='div', attrs={'class':'sorodneNovice'}),
+	dict(name='div', attrs={'id':'footer'}),
+	]
+
+
+  feeds = [
+	(u'Novice', u'http://www.avto-magazin.si/rss/')
+	]
--- a/resources/recipes/calcalist.recipe
+++ b/resources/recipes/calcalist.recipe
@ -0,0 +1,43 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import re
+
+class AdvancedUserRecipe1283848012(BasicNewsRecipe):
+    description   = 'This is a recipe of Calcalist.co.il. The recipe downloads the article page to not hurt the sites advertising income.'
+    cover_url      = 'http://ftp5.bizportal.co.il/web/giflib/news/calcalist.JPG'
+    title          = u'Calcalist'
+    language              = 'he'
+    __author__ = 'marbs'
+    extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
+    simultaneous_downloads = 5
+    remove_javascript     = True
+    timefmt        = '[%a, %d %b, %Y]'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_attributes = ['width']
+    simultaneous_downloads = 5
+    keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
+    remove_tags = [dict(name='p', attrs={'text':['&nbsp;']})]
+    max_articles_per_feed = 100
+    preprocess_regexps = [
+        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: '')
+        ]
+
+
+    feeds          = [(u'\u05d3\u05e3 \u05d4\u05d1\u05d9\u05ea', u'http://www.calcalist.co.il/integration/StoryRss8.xml'),
+                           (u'24/7', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
+                           (u'\u05d1\u05d0\u05d6\u05d6', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
+                           (u'\u05de\u05d1\u05d6\u05e7\u05d9\u05dd', u'http://www.calcalist.co.il/integration/StoryRss184.xml'),
+                           (u'\u05d4\u05e9\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss2.xml'),
+                           (u'\u05d1\u05d0\u05e8\u05e5', u'http://www.calcalist.co.il/integration/StoryRss14.xml'),
+                           (u'\u05d4\u05db\u05e1\u05e3', u'http://www.calcalist.co.il/integration/StoryRss9.xml'),
+                           (u'\u05e0\u05d3\u05dc"\u05df', u'http://www.calcalist.co.il/integration/StoryRss7.xml'),
+                           (u'\u05e2\u05d5\u05dc\u05dd', u'http://www.calcalist.co.il/integration/StoryRss13.xml'),
+                           (u'\u05e4\u05e8\u05e1\u05d5\u05dd \u05d5\u05e9\u05d9\u05d5\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss5.xml'),
+                           (u'\u05e4\u05e0\u05d0\u05d9', u'http://www.calcalist.co.il/integration/StoryRss3.xml'),
+                           (u'\u05d8\u05db\u05e0\u05d5\u05dc\u05d5\u05d2\u05d9', u'http://www.calcalist.co.il/integration/StoryRss4.xml'),
+                           (u'\u05e2\u05e1\u05e7\u05d9 \u05e1\u05e4\u05d5\u05e8\u05d8', u'http://www.calcalist.co.il/integration/StoryRss18.xml')]
+
+    def print_version(self, url):
+        split1 = url.split("-")
+        print_url = 'http://www.calcalist.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
+        return print_url
--- a/resources/recipes/clic_rbs.recipe
+++ b/resources/recipes/clic_rbs.recipe
@ -0,0 +1,50 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ClicRBS(BasicNewsRecipe):
+    title          = u'ClicRBS'
+    language = 'pt'
+    __author__ = 'arvoredo'
+    oldest_article = 3
+    max_articles_per_feed = 9
+    cover_url             = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
+
+    remove_tags = [
+                       dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']})
+                        ]
+
+    remove_tags_before = dict(name='div ', attrs={'class':'descricao'})
+    remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'})
+    remove_tags_before = dict(name='div', attrs={'class':'descricao'})
+    remove_tags_before = dict(name='div', attrs={'class':'coluna'})
+    remove_tags_after = dict(name='div', attrs={'class':'extra'})
+    remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'})
+    remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'})
+    remove_tags_after = dict(name='ul', attrs={'class':'lista'})
+
+    feeds = [
+               (u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13')
+             , (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67')
+             , (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml')
+             , (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1')
+             , (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13')
+             , (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13')
+             , (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1')
+             , (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1')
+             , (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1')
+             , (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2')
+             , (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1')
+             , (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13')
+             , (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2')
+             , (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18')
+             , (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2')
+             , (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
+            ]
+
+    extra_css = '''
+                    cite{color:#007BB5; font-size:xx-small; font-style:italic;}
+                    body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
+                    h3{font-size:large; color:#082963; font-weight:bold;}
+                    #ident{color:#0179B4; font-size:xx-small;}
+                    p{color:#000000;font-weight:normal;}
+                    .commentario p{color:#007BB5; font-style:italic;}
+                '''
--- a/resources/recipes/cm_journal.recipe
+++ b/resources/recipes/cm_journal.recipe
@ -0,0 +1,44 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CMJornal_pt(BasicNewsRecipe):
+    title                 = 'Correio da Manha - Portugal'
+    __author__            = 'jmst'
+    description           = 'As noticias de Portugal e do Mundo'
+    publisher             = 'Cofina Media'
+    category              = ''
+    oldest_article        = 1
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'pt'
+    extra_css             = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                        dict(name=['h2','h1'])
+                      , dict(name='div', attrs={'class': ['news']})
+                     ]
+
+    remove_tags = [
+                    dict(name=['object','embed','iframe'])
+                   ,dict(name='a',attrs={'href':['#']})
+                  ]
+
+    feeds = [
+              (u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' )
+             ,(u'Portugal'    , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010'    )
+             ,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' )
+             ,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' )
+             ,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012'  )
+             ,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092')
+            ]
+
+    def print_version(self, url):
+        return url.replace('noticia.aspx', 'Imprimir.aspx')
+
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -25,7 +25,7 @@ class Danas(BasicNewsRecipe):
    remove_empty_feeds    = True
    extra_css             = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
-                                .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
+                                .article,.articledescription,body,.lokacija,.feed{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
                                .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif}
                                .antrfileText{border-left: 2px solid #999999; 
                                              margin-left: 0.8em; 
@ -59,11 +59,14 @@ class Danas(BasicNewsRecipe):
                          ,(re.compile(u'\u201d'), lambda match: '&rdquo;') # right double quotation mark
                          ,(re.compile(u'\u201e'), lambda match: '&ldquo;') # double low-9 quotation mark                          
                          ,(re.compile(u'\u201f'), lambda match: '&rdquo;') # double high-reversed-9 quotation mark
+                          ,(re.compile(u'\u00f4'), lambda match: '&ldquo;') # latin small letter o with circumflex
+                          ,(re.compile(u'\u00f6'), lambda match: '&rdquo;') # latin small letter o with dieaeresis
+                          ,(re.compile(u'\u00e1'), lambda match: '&nbsp;' ) # latin small letter a with acute
                         ]

    keep_only_tags     = [dict(name='div', attrs={'id':'left'})]
    remove_tags = [
-                     dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
+                     dict(name='div', attrs={'class':['width_1_4','metaClanka','baner','listaVesti','article_nav']})
                    ,dict(name='div', attrs={'id':'comments'})
                    ,dict(name=['object','link','iframe','meta'])
                  ]
--- a/resources/recipes/deredactie.recipe
+++ b/resources/recipes/deredactie.recipe
@ -0,0 +1,61 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class deredactie(BasicNewsRecipe):
+    title          = u'Deredactie.be'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://www.deredactie.be/polopoly_fs/1.510827!image/2710428628.gif'
+    language = 'de'
+    keep_only_tags = []
+    __author__ = 'malfi'
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlehead'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlebody'}))
+    remove_tags = []
+    remove_tags.append(dict(name = 'div', attrs = {'id': 'story'}))
+    remove_tags.append(dict(name = 'div', attrs = {'id': 'useractions'}))
+    remove_tags.append(dict(name = 'hr'))
+
+    extra_css = '''
+     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+     '''
+    def parse_index(self):
+        categories = []
+        catnames = {}
+        soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch')
+        for elem in soup.findAll('li', attrs={'id' : re.compile("^navItem[2-9]") }):
+              a = elem.find('a', href=True)
+              m = re.search('(?<=/)[^/]*$', a['href'])
+              cat = str(m.group(0))
+              categories.append(cat)
+              catnames[cat] = a['title']
+              self.log("found cat %s\n" % catnames[cat])
+
+        feeds = []
+
+        for cat in categories:
+            articles = []
+            soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch/'+cat)
+            for a in soup.findAll('a',attrs={'href' : re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
+                skip_this_article = False
+                url = a['href'].strip()
+                if url.startswith('/'):
+                    url = 'http://www.deredactie.be' + url
+                myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
+                for article in articles :
+                    if article['url'] == url :
+                        skip_this_article = True
+                        self.log("SKIPPING DUP %s" % url)
+                        break
+                if skip_this_article :
+                        continue;
+                articles.append(myarticle)
+                self.log("Adding URL %s\n" %url)
+            if articles:
+                feeds.append((catnames[cat], articles))
+        return feeds
+
--- a/resources/recipes/diario_sport.recipe
+++ b/resources/recipes/diario_sport.recipe
@ -0,0 +1,42 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DiarioSport(BasicNewsRecipe):
+    title          = u'Diario Sport'
+    oldest_article = 2
+    max_articles_per_feed = 75
+    __author__  = 'Jefferson Frantz'
+    description = 'Todas las noticias del Barça y del mundo del deporte en general'
+    timefmt = ' [%d %b, %Y]'
+    language = 'es'
+    no_stylesheets = True
+
+    feeds          = [(u'Sport', u'http://feeds.feedburner.com/sport/ultimahora')]
+
+    extra_css              = '''
+                                h2{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: justify}
+                                '''
+
+    keep_only_tags = [dict(name='div', attrs={'id':['noticiasMedio']})]
+
+    remove_tags        = [
+                             dict(name=['object','link','script','ul'])
+                            ,dict(name='div', attrs={'id':['scrAdSense','herramientas2','participacion','participacion2','bloque1resultados','bloque2resultados','cont_vinyetesAnt','tinta','noticiasSuperior','cintillopublicidad2']})
+                            ,dict(name='p', attrs={'class':['masinformacion','hora']})
+                            ,dict(name='a', attrs={'class':["'link'"]})
+                            ,dict(name='div', attrs={'class':['addthis_toolbox addthis_default_style','firma','pretitularnoticia']})
+                            ,dict(name='form', attrs={'id':['formularioDeBusquedaAvanzada']})
+                         ]
+
+    def preprocess_html(self, soup):
+            for item in soup.findAll(style=True):
+               del item['style']
+            return soup
+
+
+    def postprocess_html(self, soup, first_fetch):
+        img = soup.find('img',src='/img/videos/mascaravideo.png')
+        if not img is None:
+            img.extract()
+
+        return soup
+
--- a/resources/recipes/dnevnik.recipe
+++ b/resources/recipes/dnevnik.recipe
@ -0,0 +1,63 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, BlonG'
+'''
+dnevnik.si
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+class Dnevnik(BasicNewsRecipe):
+  title = u'Dnevnik.si'
+  __author__ = u'BlonG'
+  description = u'''Dnevnik je \u010dasnik z ve\u010d kot polstoletno zgodovino.
+  Pod sloganom \xbb\u017divljenje ima besedo\xab na svojih straneh prina\u0161a
+  bralcem bogastvo informacij, komentarjev in kolumen in raznovrstnost
+  pogledov, zaznamovanih z odgovornostjo do posameznika in \u0161ir\u0161e
+  dru\u017ebe.'''
+  oldest_article = 3
+  max_articles_per_feed = 20
+  language = 'sl'
+  no_stylesheets = True
+  use_embedded_content = False
+
+  cover_url = 'https://sites.google.com/site/javno2010/home/dnevnik_cover.jpg'
+
+  extra_css = '''
+	h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+	h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+	p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+	body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+	'''
+
+  keep_only_tags = [
+	dict(name='div', attrs={'id':'_iprom_inStream'}),
+	dict(name='div', attrs={'class':'entry-content'}),
+	]
+
+  remove_tags = [
+	dict(name='div', attrs={'class':'fb_article_top'}),
+	dict(name='div', attrs={'class':'related'}),
+	dict(name='div', attrs={'class':'fb_article_foot'}),
+	dict(name='div', attrs={'class':'spreading'}),
+	dict(name='dl', attrs={'class':'ad'}),
+	dict(name='p', attrs={'class':'report'}),
+	dict(name='div', attrs={'class':'hfeed comments'}),
+	dict(name='dl', attrs={'id':'entryPanel'}),
+	dict(name='dl', attrs={'class':'infopush ip_wide'}),
+	dict(name='div', attrs={'class':'sidebar'}),
+	dict(name='dl', attrs={'class':'bottom'}),
+	dict(name='div', attrs={'id':'footer'}),
+	]
+
+
+  feeds = [
+	(u'Slovenija', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=13')
+	,(u'Svet', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=14')
+	,(u'EU', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=116')
+	,(u'Poslovni dnevnik', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=5')
+	,(u'Kronika', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=15')
+	,(u'Kultura', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=17')
+	,(u'Zdravje', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=18')
+	,(u'Znanost in IT', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=19')
+	,(u'(Ne)verjetno', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=20')
+	,(u'E-strada', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=21')
+	,(u'Svet vozil', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=22')
+	]
--- a/resources/recipes/el_faro.recipe
+++ b/resources/recipes/el_faro.recipe
@ -0,0 +1,77 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElFaroDeVigo(BasicNewsRecipe):
+    title          = u'El Faro de Vigo'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    __author__  = 'Jefferson Frantz'
+    description = 'Noticias de Vigo'
+    timefmt = ' [%d %b, %Y]'
+    language = 'es'
+    encoding               = 'cp1252'
+    no_stylesheets = True
+    remove_javascript = True
+
+    feeds          = [
+##                        (u'Vigo', u'http://www.farodevigo.es/elementosInt/rss/1'),
+##                        (u'Gran Vigo', u'http://www.farodevigo.es/elementosInt/rss/2'),
+                        (u'Galicia', u'http://www.farodevigo.es/elementosInt/rss/4'),
+                        (u'España', u'http://www.farodevigo.es/elementosInt/rss/6'),
+                        (u'Mundo', u'http://www.farodevigo.es/elementosInt/rss/7'),
+##                        (u'Opinión', u'http://www.farodevigo.es/elementosInt/rss/5'),
+                        (u'Economía', u'http://www.farodevigo.es/elementosInt/rss/10'),
+                        (u'Sociedad y Cultura', u'http://www.farodevigo.es/elementosInt/rss/8'),
+                        (u'Sucesos', u'http://www.farodevigo.es/elementosInt/rss/9'),
+                        (u'Deportes', u'http://www.farodevigo.es/elementosInt/rss/11'),
+                        (u'Agenda', u'http://www.farodevigo.es/elementosInt/rss/21'),
+                        (u'Gente', u'http://www.farodevigo.es/elementosInt/rss/24'),
+                        (u'Televisión', u'http://www.farodevigo.es/elementosInt/rss/25'),
+                        (u'Ciencia y Tecnología', u'http://www.farodevigo.es/elementosInt/rss/26')]
+
+    extra_css              = '''.noticia_texto{ font-family: sans-serif; font-size: medium; text-align: justify }
+                                h1{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}
+                                h2{font-family: serif; font-size: medium; font-weight: bold; color: #000000; text-align: left}
+                                .enlacenegrita10{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: left}
+                                .noticia_titular{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}'''
+
+
+    def preprocess_html(self, soup):
+            for item in soup.findAll(style=True):
+               del item['style']
+
+            url = 'http://estaticos00.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
+            fitem = soup.find('img',src=url)
+            if fitem:
+               par = fitem.parent
+               par.extract()
+            url = 'http://estaticos01.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
+            fitem = soup.find('img',src=url)
+            if fitem:
+               par = fitem.parent
+               par.extract()
+            url = 'http://estaticos02.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
+            fitem = soup.find('img',src=url)
+            if fitem:
+               par = fitem.parent
+               par.extract()
+
+            return self.adeify_images(soup)
+
+    def postprocess_html(self, soup, first_fetch):
+        divs = soup.findAll(True, {'class':'enlacenegrita10'})
+        for div in divs:
+            div['align'] = 'left'
+
+        return soup
+
+
+    keep_only_tags = [dict(name='div', attrs={'class':['noticias']})]
+
+    remove_tags        = [
+                             dict(name=['object','link','script','ul','iframe','ol'])
+                            ,dict(name='div', attrs={'class':['noticiadd2', 'cintillo2', 'noticiadd', 'noticiadd2']})
+                            ,dict(name='div', attrs={'class':['imagen_derecha', 'noticiadd3', 'extraHTML']})
+
+                         ]
+
+
--- a/resources/recipes/elektrolese.recipe
+++ b/resources/recipes/elektrolese.recipe
@ -1,38 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
-'''
-Fetch elektrolese.
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class elektrolese(BasicNewsRecipe):
-
-    title = u'elektrolese'
-    description = 'News about electronic publishing'
-    __author__ = 'Oliver Niesner'
-    use_embedded_content   = False
-    timefmt = ' [%a %d %b %Y]'
-    language = 'de'
-
-    oldest_article = 14
-    max_articles_per_feed = 50
-    no_stylesheets = True
-    conversion_options = {'linearize_tables':True}
-    encoding = 'utf-8'
-
-
-    remove_tags_after = [dict(id='comments')]
-    filter_regexps = [r'ad\.doubleclick\.net']
-
-    remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
-                   dict(id='comments'),
-                   dict(id='Navbar1')]
-
-
-
-    feeds =  [ (u'elektrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
-
-
--- a/resources/recipes/eu_commission.recipe
+++ b/resources/recipes/eu_commission.recipe
@ -0,0 +1,58 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+LANGUAGE = 'de'
+
+def feedlink(num):
+    return    u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id='+\
+            str(num)+'&lang='+ LANGUAGE
+
+class EUCommissionPress(BasicNewsRecipe):
+    title          = u'Pressemitteilungen der EU Kommission pro Politikbereich'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
+    __author__ = 'malfi'
+    language = LANGUAGE
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'pressReleaseContentMain'}))
+    remove_tags = []
+
+
+    feeds          = [
+                      (u'Pressemitteilung des Tages',feedlink(64)),
+                      (u'Presidency',feedlink(137)),
+                      (u'Foreign affairs and security policy',feedlink(138)),
+                      (u'Agriculture and rural development',feedlink(139)),
+                      (u'Budget and financial programming ',feedlink(140)),
+                      (u'Climate action',feedlink(141)),
+                      (u'Competition',feedlink(142)),
+                      (u'Development',feedlink(143)),
+                      (u'Digital agenda',feedlink(144)),
+                      (u'Economic and monetary affairs',feedlink(145)),
+                      (u'Education, culture, multilingualism and youth ',feedlink(146)),
+                      (u'Employment, social Affairs and inclusion ',feedlink(147)),
+                      (u'Energy',feedlink(148)),
+                      (u'Enlargment and European neighbourhood policy ',feedlink(149)),
+                      (u'Environment',feedlink(150)),
+                      (u'Health and consumer policy',feedlink(151)),
+                      (u'Home affairs',feedlink(152)),
+                      (u'Industry and entrepreneurship',feedlink(153)),
+                      (u'Inter-Institutional relations and administration',feedlink(154)),
+                      (u'Internal market and services',feedlink(155)),
+                      (u'International cooperation, humanitarian aid and crisis response',feedlink(156)),
+                      (u'Justice, fundamental rights and citizenship',feedlink(157)),
+                      (u'Maritime affairs and fisheries',feedlink(158)),
+                      (u'Regional policy',feedlink(159)),
+                      (u'Research and innovation',feedlink(160)),
+                      (u'Taxation and customs union, audit and anti-fraud',feedlink(161)),
+                      (u'Trade',feedlink(162)),
+                      (u'Transport',feedlink(163))
+                      ]
+    extra_css = '''
+     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+     '''
+
--- a/resources/recipes/european_voice.recipe
+++ b/resources/recipes/european_voice.recipe
@ -0,0 +1,51 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class EuropeanVoice(BasicNewsRecipe):
+    title          = u'European Voice'
+    __author__     = 'malfi'
+    oldest_article = 14
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
+    language = 'en'
+    keep_only_tags    = [dict(name='div', attrs={'id':'articleLeftColumn'})]
+    remove_tags    = [dict(name='div', attrs={'id':'BreadCrump'})]
+    feeds          = [
+                        (u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
+                          (u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
+                          (u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
+                          (u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
+                          (u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
+                          (u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
+                          (u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
+                          (u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
+                          (u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
+                          (u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
+                          (u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
+                          (u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
+                          (u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
+                          (u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
+                          (u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
+                          (u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
+                          (u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
+                          (u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
+                          (u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
+                          (u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
+                     ]
+    extra_css = '''
+        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+        '''
+
+    def print_version(self, url):
+          return url + '?bPrint=1'
+
+    def preprocess_html(self, soup):
+           denied = soup.findAll(True,text='Subscribers')
+           if denied:
+                raise Exception('Article skipped, because content can only be seen with subscription')
+           return soup
+
--- a/resources/recipes/fudzilla.recipe
+++ b/resources/recipes/fudzilla.recipe
@ -33,7 +33,7 @@ class Fudzilla(BasicNewsRecipe):
                   ]

    feeds = [
-             (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
+            (u'Posts', u'http://www.fudzilla.com/?format=feed')
             ]

    preprocess_regexps = [
--- a/resources/recipes/gamespot.recipe
+++ b/resources/recipes/gamespot.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__author__ = u'Marc T\xf6nsing'
+__author__    = u'Marc Toensing'

 from calibre.web.feeds.news import BasicNewsRecipe

@ -17,6 +17,7 @@ class GamespotCom(BasicNewsRecipe):
    no_javascript = True

    feeds =  [
+               ('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
               ('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
               ('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
               ('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
@ -37,5 +38,3 @@ class GamespotCom(BasicNewsRecipe):

    def get_article_url(self, article):
        return article.get('link') + '?print=1'
-
-
--- a/resources/recipes/german_gov.recipe
+++ b/resources/recipes/german_gov.recipe
@ -0,0 +1,28 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GermanGovermentPress(BasicNewsRecipe):
+    title          = u'Pressemitteilungen der Bundesregierung'
+    oldest_article = 14
+    __author__ = 'malfi'
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
+    language = 'de'
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'h2'))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'textblack'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subtitle'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text'}))
+    remove_tags = []
+    feeds          = [ (u'Pressemitteilungen',u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf') ]
+    extra_css = '''
+     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+     '''
+    def print_version(self, url):
+         m = re.search(r'^(.*).html$', url)
+         return str(m.group(1)) + ',layoutVariant=Druckansicht.html'
--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'

-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__copyright__ = '2010, Szing'
 __docformat__ = 'restructuredtext en'

 '''
@ -10,49 +10,52 @@ globeandmail.com

 from calibre.web.feeds.news import BasicNewsRecipe

-class GlobeAndMail(BasicNewsRecipe):
-    title = u'Globe and Mail'
-    language = 'en_CA'
-
-    __author__ = 'Kovid Goyal'
+class AdvancedUserRecipe1287083651(BasicNewsRecipe):
+    title          = u'Globe & Mail'
+    __license__   = 'GPL v3'
+    __author__ = 'Szing'
    oldest_article = 2
-    max_articles_per_feed = 10
    no_stylesheets = True
-    extra_css = '''
-    h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
-    h4 {margin-top: 0px;}
-    #byline { font-family: monospace; font-weight:bold; }
-    #placeline {font-weight:bold;}
-    #credit {margin-top:0px;}
-    .tag {font-size: 22pt;}'''
-    description = 'Canada\'s national newspaper'
-    keep_only_tags = [dict(name='article')]
-    remove_tags = [dict(name='aside'),
-                   dict(name='footer'),
-                   dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
-                   dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
-                  ]
+    max_articles_per_feed = 100
+    encoding               = 'utf8'
+    publisher              = 'Globe & Mail'
+    language               = 'en_CA'
+    extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
+
    feeds          = [
-            (u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
-            (u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
-            (u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
-            (u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
-            (u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
+      (u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
      (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
-            (u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
-            (u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
-            (u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
-            (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
+      (u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
+      (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
+      (u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
      (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
+      (u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
+      (u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
      (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
      (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
-            (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
      (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
-            (u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
+      (u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
+      (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
    ]

-    def get_article_url(self, article):
-        url = BasicNewsRecipe.get_article_url(self, article)
-        if '/video/' not in url:
-            return url
+    keep_only_tags = [
+      dict(name='h1'),
+      dict(name='h2', attrs={'id':'articletitle'}),
+      dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
+      dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
+      dict(name='id', attrs={'class':'article'}),
+      dict(name='table', attrs={'class':'todays-market'}),
+      dict(name='header', attrs={'id':'leadheader'})
+    ]
+
+    remove_tags = [
+      dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
+    ]
+
+    #this has to be here or the text in the article appears twice.
+    remove_tags_after = [dict(id='article')]
+
+    #Use the mobile version rather than the web version
+    def print_version(self, url):
+        return url + '&service=mobile'

--- a/resources/recipes/handelsblatt.recipe
+++ b/resources/recipes/handelsblatt.recipe
@ -0,0 +1,41 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Handelsblatt(BasicNewsRecipe):
+    title          = u'Handelsblatt'
+    __author__ = 'malfi'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
+    language = 'de'
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
+    remove_tags    = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
+
+    feeds          = [
+                        (u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
+                        (u'Handelsblatt Top-Themen',u'http://www.handelsblatt.com/rss/top-themen'),
+                        (u'Handelsblatt Schlagzeilen',u'http://www.handelsblatt.com/rss/ticker/'),
+                        (u'Handelsblatt Finanzen',u'http://www.handelsblatt.com/rss/finanzen/'),
+                        (u'Handelsblatt Unternehmen',u'http://www.handelsblatt.com/rss/unternehmen/'),
+                        (u'Handelsblatt Politik',u'http://www.handelsblatt.com/rss/politik/'),
+                        (u'Handelsblatt Technologie',u'http://www.handelsblatt.com/rss/technologie/'),
+                        (u'Handelsblatt Meinung',u'http://www.handelsblatt.com/rss/meinung'),
+                        (u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
+                        (u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
+                     ]
+    extra_css = '''
+        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+        '''
+
+    def print_version(self, url):
+         m = re.search('(?<=;)[0-9]*', url)
+         return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'
+
+
--- a/resources/recipes/hola.recipe
+++ b/resources/recipes/hola.recipe
@ -0,0 +1,38 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2010, Brendan Sleight <bms.calibre at barwap.com>'
+'''
+hola.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Hackaday(BasicNewsRecipe):
+    title                 = u'Hola'
+    __author__            = 'bmsleight'
+    description           = 'diario de actualidad, moda y belleza.'
+    oldest_article        = 10
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    language              = 'es'
+
+    use_embedded_content  = False
+
+    keep_only_tags      = [
+                           dict(name='div', attrs={'id':'cuerpo'})
+                          ]
+
+    feeds               = [
+                         (u'Famosos'        , u'http://www.hola.com/famosos/rss.xml'           ),
+                         (u'Realeza'        , u'http://www.hola.com/realeza/rss.xml'           ),
+                         (u'Cine'           , u'http://www.hola.com/cine/rss.xml'              ),
+                         (u'Música'         , u'http://www.hola.com/musica/rss.xml'            ),
+                         (u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml'      ),
+                         (u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml'   ),
+                         (u'Niños'          , u'http://www.hola.com/ninos/rss.xml'             ),
+                         (u'Todas las noticias', u'http://int2.hola.com/app/feeds/rss_hola.php'),
+                         ]
+
+    def get_article_url(self, article):
+        url = article.get('guid', None)
+        return url
--- a/resources/recipes/irish_times.recipe
+++ b/resources/recipes/irish_times.recipe
@ -33,13 +33,14 @@ class IrishTimes(BasicNewsRecipe):
                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
                    ]

-
    def print_version(self, url):
         if url.count('rss.feedsportal.com'):
-            u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
+             u = 'http://www.irishtimes.com' + \
+ (((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01..htm','_pf.html')
         else:
             u = url.replace('.html','_pf.html')
         return u

+
    def get_article_url(self, article):
        return article.link
--- a/resources/recipes/la_jornada.recipe
+++ b/resources/recipes/la_jornada.recipe
@ -38,6 +38,7 @@ class LaJornada_mx(BasicNewsRecipe):
                                .loc{font-weight: bold} 
                                .carton{text-align: center}
                                .credit{font-weight: bold}
+                                .sumario{font-weight: bold; text-align: center}
                                .text{margin-top: 1.4em}
                                p.inicial{display: inline; font-size: xx-large; font-weight: bold}
                                p.s-s{display: inline; text-indent: 0}
--- a/resources/recipes/marctv.recipe
+++ b/resources/recipes/marctv.recipe
@ -0,0 +1,35 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch MarcTV.
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MarcTVde(BasicNewsRecipe):
+
+    title = 'Marc Toensings Visionen'
+
+    description = 'Marc Toensings Visionen'
+
+    language = 'de'
+
+    __author__ = 'Marc Toensing'
+
+    max_articles_per_feed = 40
+
+    oldest_article = 665
+
+    use_embedded_content = False
+
+    remove_tags = []
+
+    keep_only_tags = dict(name='div', attrs={'class':["content"]})
+
+    feeds          = [(u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), (u'Leben', u'http://feeds.feedburner.com/marctv/leben'), (u'Medien', u'http://feeds.feedburner.com/marctv/medien')]
+
+    extra_css = '.#wrapper .entry p img{width:620px; height: 270px;}'
+
+    def get_cover_url(self):
+            return 'http://marctv.de/marctv.png'
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@ -1,7 +1,9 @@
-cense__   = 'GPL v3'
+__license__   = 'GPL v3'
 __copyright__ = '2010, Eddie Lau'
 '''
 modified from Singtao Toronto calibre recipe by rty
+Change Log:
+2010/10/31: skip repeated articles in section pages
 '''

 import datetime
@ -23,14 +25,13 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
    recursions = 0
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
-
    keep_only_tags = [dict(name='h1'),
                      dict(attrs={'id':['newscontent01','newscontent02']})]

    def get_fetchdate(self):
        dt_utc = datetime.datetime.utcnow()
-        # convert UTC to local hk time
-        dt_local = dt_utc - datetime.timedelta(-8.0/24)
+        # convert UTC to local hk time - at around HKT 5.30am, all news are available
+        dt_local = dt_utc - datetime.timedelta(-2.5/24)
        return dt_local.strftime("%Y%m%d")

    def parse_index(self):
@ -47,18 +48,14 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
        soup = self.index_to_soup(url)
        divs = soup.findAll(attrs={'class': ['bullet']})
        current_articles = []
+        included_urls = []
        for i in divs:
            a = i.find('a', href = True)
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            if url not in included_urls:
                current_articles.append({'title': title, 'url': url, 'description':''})
+                included_urls.append(url)
        return current_articles

-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-           del item['style']
-        for item in soup.findAll(width=True):
-           del item['width']
-        return soup
-
--- a/resources/recipes/mmc_rtv.recipe
+++ b/resources/recipes/mmc_rtv.recipe
@ -0,0 +1,57 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, BlonG'
+'''
+www.rtvslo.si
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MMCRTV(BasicNewsRecipe):
+    title = u'MMC RTV Slovenija'
+    __author__ = u'BlonG'
+    description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija"
+    oldest_article = 3
+    max_articles_per_feed = 20
+    language = 'sl'
+    no_stylesheets = True
+    use_embedded_content = False
+
+    cover_url = 'https://sites.google.com/site/javno2010/home/rtv_slo_cover.jpg'
+
+    extra_css = '''
+            h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+            h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+            p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+            body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+            '''
+
+    def print_version(self, url):
+            split_url = url.split("/")
+            print_url = 'http://www.rtvslo.si/index.php?c_mod=news&op=print&id=' + split_url[-1]
+            return print_url
+
+    keep_only_tags = [
+            dict(name='div', attrs={'class':'title'}),
+            dict(name='div', attrs={'id':'newsbody'}),
+            dict(name='div', attrs={'id':'newsblocks'}),
+            ]
+#    remove_tags=[
+# 40            dict(name='div', attrs={'id':'newsblocks'}),
+#            ]
+
+    feeds = [
+            (u'Slovenija', u'http://www.rtvslo.si/feeds/01.xml'),
+            (u'Svet', u'http://www.rtvslo.si/feeds/02.xml'),
+            (u'Evropska unija', u'http://www.rtvslo.si/feeds/16.xml'),
+            (u'Gospodarstvo', u'http://www.rtvslo.si/feeds/04.xml'),
+            (u'\u010crna kronika', u'http://www.rtvslo.si/feeds/08.xml'),
+            (u'Okolje', u'http://www.rtvslo.si/feeds/12.xml'),
+            (u'Znanost in tehnologija', u'http://www.rtvslo.si/feeds/09.xml'),
+            (u'Zabava', u'http://www.rtvslo.si/feeds/06.xml'),
+            (u'Ture avanture', u'http://www.rtvslo.si/feeds/28.xml'),
+            ]
+
+#    def preprocess_html(self, soup):
+#            newsblocks = soup.find('div',attrs = ['id':'newsblocks'])
+#            soup.find('div', attrs = {'id':'newsbody'}).insert(-1, newsblocks)
+#            return soup
+
--- a/resources/recipes/newsweek_polska.recipe
+++ b/resources/recipes/newsweek_polska.recipe
@ -0,0 +1,68 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Newsweek(BasicNewsRecipe):
+	EDITION = 0
+
+	title = u'Newsweek Polska'
+	__author__ = 'Mateusz Kielar'
+	description = 'Weekly magazine'
+	encoding = 'utf-8'
+	no_stylesheets = True
+	language = 'en'
+	remove_javascript = True
+
+	keep_only_tags =[]
+	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}))
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'copy'}))
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'url'}))
+
+	extra_css = '''
+					.body {font-size: small}
+					.author {font-size: x-small}
+					.lead {font-size: x-small}
+					.title{font-size: x-large; font-weight: bold}
+					'''
+
+	def print_version(self, url):
+		return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
+
+	def find_last_full_issue(self):
+		page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
+		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
+		page = self.index_to_soup(issue)
+		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
+		page = self.index_to_soup(issue)
+		self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
+
+	def parse_index(self):
+		self.find_last_full_issue()
+		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
+		img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
+		self.cover_url = img['src']
+		feeds = []
+		parent = soup.find(id='content-left-big')
+		for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
+			section = self.tag_to_string(txt).capitalize()
+			articles = list(self.find_articles(txt))
+			feeds.append((section, articles))
+		return feeds
+
+	def find_articles(self, txt):
+		for a in txt.findAllNext( attrs={'class':['strong','hr']}):
+			if a.name in "div":
+				break
+			yield {
+				'title' : self.tag_to_string(a),
+				'url'   : 'http://www.newsweek.pl'+a['href'],
+				'date'  : '',
+				'description' : ''
+				}
+
+
--- a/resources/recipes/now_toronto.recipe
+++ b/resources/recipes/now_toronto.recipe
@ -0,0 +1,35 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+#Based on Lars Jacob's Taz Digiabo recipe
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Starson17'
+
+import os, urllib2, zipfile
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+class NowToronto(BasicNewsRecipe):
+    title = u'Now Toronto'
+    description = u'Now Toronto'
+    __author__ = 'Starson17'
+    conversion_options = {
+        'no_default_epub_cover' : True
+    }
+
+    def build_index(self):
+        epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
+        soup = self.index_to_soup(epub_feed)
+        url = soup.find(name = 'feedburner:origlink').string
+        f = urllib2.urlopen(url)
+        tmp = PersistentTemporaryFile(suffix='.epub')
+        self.report_progress(0,_('downloading epub'))
+        tmp.write(f.read())
+        tmp.close()
+        zfile = zipfile.ZipFile(tmp.name, 'r')
+        self.report_progress(0,_('extracting epub'))
+        zfile.extractall(self.output_dir)
+        tmp.close()
+        index = os.path.join(self.output_dir, 'content.opf')
+        self.report_progress(1,_('epub downloaded and extracted'))
+        return index
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -5,65 +5,61 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import re
-import time
-from calibre import entity_to_unicode
+import re, string, time
+from calibre import entity_to_unicode, strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
-Comment, BeautifulStoneSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

 class NYTimes(BasicNewsRecipe):

-    title       = 'New York Times Top Stories'
-    __author__  = 'GRiker'
-    language = 'en'
-    requires_version = (0, 7, 5)
-    description = 'Top Stories from the New York Times'
+    # set headlinesOnly to True for the headlines-only version
+    headlinesOnly = True

-    # List of sections typically included in Top Stories.  Use a keyword from the
-    # right column in the excludeSectionKeywords[] list to skip downloading that section
-    sections = {
-                 'arts'             :   'Arts',
-                 'business'         :   'Business',
-                 'diningwine'       :   'Dining & Wine',
-                 'editorials'       :   'Editorials',
-                 'health'           :   'Health',
-                 'magazine'         :   'Magazine',
-                 'mediaadvertising' :   'Media & Advertising',
-                 'newyorkregion'    :   'New York/Region',
-                 'oped'             :   'Op-Ed',
-                 'politics'         :   'Politics',
-                 'science'          :   'Science',
-                 'sports'           :   'Sports',
-                 'technology'       :   'Technology',
-                 'topstories'       :   'Top Stories',
-                 'travel'           :   'Travel',
-                 'us'               :   'U.S.',
-                 'world'            :   'World'
-               }
+    # includeSections: List of sections to include. If empty, all sections found will be included.
+    # Otherwise, only the sections named will be included. For example,
+    #
+    #    includeSections = ['Politics','Sports']
+    #
+    # would cause only the Politics and Sports sections to be included.

-    # Add section keywords from the right column above to skip that section
-    # For example, to skip sections containing the word 'Sports' or 'Dining', use:
-    # excludeSectionKeywords = ['Sports', 'Dining']
-    # Fetch only Business and Technology
-    # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
-    # Fetch only Top Stories
-    # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
-    # By default, no sections are skipped.
-    excludeSectionKeywords = []
+    includeSections = []  # by default, all sections included
+
+    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
+    # Otherwise, the sections named will be excluded. For example,
+    #
+    #    excludeSections = ['Politics','Sports']
+    #
+    # would cause the Politics and Sports sections to be excluded. This parameter can be used
+    # in conjuction with includeSections although in most cases using one or the other, but
+    # not both, is sufficient.
+
+    excludeSections = []

    # one_picture_per_article specifies that calibre should only use the first image
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
+
    # and shown in their original location.
    one_picture_per_article = True

    # The maximum number of articles that will be downloaded
-    max_articles_per_feed = 40
+    max_articles_per_feed = 100
+
+
+    if headlinesOnly:
+        title='New York Times Headlines'
+        description = 'Headlines from the New York Times'
+    else:
+        title='New York Times'
+        description = 'Today\'s New York Times'
+
+    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
+    language = 'en'
+    requires_version = (0, 7, 5)
+

    timefmt = ''
-    needs_subscription = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    cover_margins = (18,18,'grey99')

@ -82,6 +78,7 @@ class NYTimes(BasicNewsRecipe):
                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
+                            'metaFootnote',
                            'module box nav',
                            'nextArticleLink',
                            'nextArticleLink clearfix',
@ -89,12 +86,13 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
-                            'subNavigation clearfix',
-                            'subNavigation tabContent active',
-                            'subNavigation tabContent active clearfix',
+                            re.compile('^subNavigation'),
+                            re.compile('^leaderboard'),
+                            re.compile('^module'),
                            ]}),
                   dict(id=[
                            'adxLeaderboard',
+                            'adxSponLink',
                            'archive',
                            'articleExtras',
                            'articleInline',
@ -105,87 +103,98 @@ class NYTimes(BasicNewsRecipe):
                            'footer',
                            'header',
                            'header_search',
+                            'inlineBox',
                            'login',
                            'masthead',
                            'masthead-nav',
                            'memberTools',
                            'navigation',
                            'portfolioInline',
+                            'readerReviews',
+                            'readerReviewsCount',
                            'relatedArticles',
+                            'relatedTopics',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
                            'toolsRight',
                            ]),
-                   dict(name=['script', 'noscript', 'style'])]
-
+                   dict(name=['script', 'noscript', 'style','form','hr'])]
    no_stylesheets = True
-    extra_css = '.headline      {text-align:    left;}\n    \
-                 .byline        {font-family:   monospace;  \
-                                 text-align:    left;       \
-                                 margin-top:    0px;        \
-                                 margin-bottom: 0px;}\n     \
-                 .dateline      {font-size:     small;      \
-                                 margin-top:    0px;        \
-                                 margin-bottom: 0px;}\n     \
-                 .timestamp     {font-size:     small;      \
-                                 margin-top:    0px;        \
-                                 margin-bottom: 0px;}\n     \
-                 .source        {text-align:    left;}\n    \
-                 .image         {text-align:    center;}\n  \
-                 .credit        {text-align:    right;      \
-                                 font-size:     small;      \
-                                 margin-top:    0px;        \
-                                 margin-bottom: 0px;}\n     \
-                 .articleBody   {text-align:    left;}\n    \
-                 .authorId      {text-align:    left;       \
-                                 font-style:    italic;}\n  '
+    extra_css = '''
+                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
+                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .timestamp { text-align: left; font-size: small; }
+                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                a:link {text-decoration: none; }
+                .articleBody { }
+                .authorId {text-align: left; }
+                .image {text-align: center;}
+                .source {text-align: left; }'''

-    def dump_ans(self, ans) :
+    def filter_ans(self, ans) :
        total_article_count = 0
-        for section in ans :
+        idx = 0
+        idx_max = len(ans)-1
+        while idx <= idx_max:
+            if self.includeSections != []:
+                if ans[idx][0] not in self.includeSections:
+                    print "SECTION NOT INCLUDED: ",ans[idx][0]
+                    del ans[idx]
+                    idx_max = idx_max-1
+                    continue
+            if ans[idx][0] in self.excludeSections:
+                print "SECTION EXCLUDED: ",ans[idx][0]
+                del ans[idx]
+                idx_max = idx_max-1
+                continue
            if self.verbose:
-                self.log("section %s: %d articles" % (section[0], len(section[1])) )
-            for article in section[1]:
+                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
+            for article in ans[idx][1]:
                total_article_count += 1
                if self.verbose:
                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
                              article['url'].encode('cp1252','replace')))
+            idx = idx+1
+
        self.log( "Queued %d articles" % total_article_count )
+        return ans

    def fixChars(self,string):
        # Replace lsquo (\x91)
-        fixed = re.sub("\x91","&#8216;",string)
+        fixed = re.sub("\x91","‘",string)

        # Replace rsquo (\x92)
-        fixed = re.sub("\x92","&#8217;",fixed)
+        fixed = re.sub("\x92","’",fixed)

        # Replace ldquo (\x93)
-        fixed = re.sub("\x93","&#8220;",fixed)
+        fixed = re.sub("\x93","“",fixed)

        # Replace rdquo (\x94)
-        fixed = re.sub("\x94","&#8221;",fixed)
+        fixed = re.sub("\x94","”",fixed)

        # Replace ndash (\x96)
-        fixed = re.sub("\x96","&#8211;",fixed)
+        fixed = re.sub("\x96","–",fixed)

        # Replace mdash (\x97)
-        fixed = re.sub("\x97","&#8212;",fixed)
+        fixed = re.sub("\x97","—",fixed)

        return fixed

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
-            try:
            br.open('http://www.nytimes.com/auth/login')
            br.select_form(name='login')
            br['USERID']   = self.username
            br['PASSWORD'] = self.password
-                br.submit()
-            except:
-                self.log("\nFailed to login")
+            raw = br.submit().read()
+            if 'Please try again' in raw:
+                raise Exception('Your username and password are incorrect')
        return br

    def skip_ad_pages(self, soup):
@ -213,6 +222,9 @@ class NYTimes(BasicNewsRecipe):
            cover = None
        return cover

+    def short_title(self):
+        return self.title
+
    def index_to_soup(self, url_or_raw, raw=False):
        '''
        OVERRIDE of class method
@ -255,157 +267,184 @@ class NYTimes(BasicNewsRecipe):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
-            # Replace '&' with '&#38;'
-            massaged = re.sub("&","&#38;", massaged)
+            # Replace '&' with '&'
+            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description

-    def parse_index(self):
+    def parse_todays_index(self):
+
+        def feed_title(div):
+            return ''.join(div.findAll(text=True, recursive=True)).strip()
+
        articles = {}
+        key = None
        ans = []
+        url_list = []

-        feed = key = 'All Top Stories'
-        articles[key] = []
-        ans.append(key)
-        self.log("Scanning 1 section ...")
-
-        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
-
-        # Fetch the outer table
-        table = soup.find('table')
-        previousTable = table
-
-        # Find the deepest table containing the stories
-        while True :
-            table = table.find('table')
-            if table.find(text=re.compile('top stories start')) :
-                previousTable = table
-                continue
-            else :
-                table = previousTable
-                break
-
-        # There are multiple subtables, find the one containing the stories
-        for block in table.findAll('table') :
-            if block.find(text=re.compile('top stories start')) :
-                table = block
-                break
-            else :
-                continue
-
-        # Again there are multiple subtables, find the one containing the stories
-        for storyblock in table.findAll('table') :
-            if storyblock.find(text=re.compile('top stories start')) :
-                break
-            else :
-                continue
-
-        skipThisSection = False
-        todays_article_count = 0
-        # Within this table are <font face="times new roman, times, san serif"> entries
-        self.log("Fetching feed Top Stories")
-        for tr in storyblock.findAllNext('tr'):
-            if tr.find('span') is not None :
-
-                sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
-                                                         'times new roman,times, sans serif',
-                                                         'times new roman, times, sans serif']})
-                section = None
-                bylines = []
-                descriptions = []
-                pubdate = None
-
-                # Get the Section title
-                for (x,i) in enumerate(sectionblock.contents) :
-                    skipThisSection = False
-                    # Extract the section title
-                    if ('Comment' in str(i.__class__)) :
-                        if 'start(name=' in i :
-                            section = i[i.find('=')+1:-2]
-
-                        if not self.sections.has_key(section) :
-                            skipThisSection = True
-                            break
-
-                        # Check for excluded section
-                        if len(self.excludeSectionKeywords):
-                            key = self.sections[section]
-                            excluded = re.compile('|'.join(self.excludeSectionKeywords))
-                            if excluded.search(key) or articles.has_key(key):
-                                skipThisSection = True
-                                break
-
-                # Get the bylines and descriptions
-                if not skipThisSection :
-                    lines = sectionblock.contents
-                    contentStrings = []
-
-                    for line in lines:
-                        if not isinstance(line, Comment) and line.strip and line.strip() > "":
-                            contentStrings.append(line.strip())
-
-                    # Gather the byline/description pairs
-                    bylines = []
-                    descriptions = []
-                    for contentString in contentStrings:
-                        if contentString[0:3] == 'By ' and contentString[3].isupper() :
-                            bylines.append(contentString)
-                        else:
-                            descriptions.append(contentString)
-
-                    # Fetch the article titles and URLs
-                    articleCount = len(sectionblock.findAll('span'))
-                    todays_article_count += articleCount
-                    for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) :
-                        a = span.find('a', href=True)
+        def handle_article(div):
+            a = div.find('a', href=True)
+            if not a:
+                return
            url = re.sub(r'\?.*', '', a['href'])
+            if not url.startswith("http"):
+                return
+            if not url.endswith(".html"):
+                return
+            if 'podcast' in url:
+                return
+            if '/video/' in url:
+                return
            url += '?pagewanted=all'
-
-                        title = self.tag_to_string(a, use_alt=True)
-                        # prepend the section name
-                        title = self.sections[section] + " &middot; " + title
-
-                        if not isinstance(title, unicode):
-                            title = title.decode('utf-8', 'replace')
-
-                        # Allow for unattributed, undescribed entries "Editor's Note"
-                        if i >= len(descriptions) :
-                            description = None
+            if url in url_list:
+                return
+            url_list.append(url)
+            title = self.tag_to_string(a, use_alt=True).strip()
+            description = ''
+            pubdate = strftime('%a, %d %b')
+            summary = div.find(True, attrs={'class':'summary'})
+            if summary:
+                description = self.tag_to_string(summary, use_alt=False)
+            author = ''
+            authorAttribution = div.find(True, attrs={'class':'byline'})
+            if authorAttribution:
+                author = self.tag_to_string(authorAttribution, use_alt=False)
            else:
-                            description = descriptions[i]
-
-                        if len(bylines) == articleCount :
-                            author = bylines[i]
-                        else :
-                            author = None
-
-                        # Check for duplicates
-                        duplicateFound = False
-                        if len(articles[feed]) > 1:
-                            for article in articles[feed] :
-                                if url == article['url'] :
-                                    duplicateFound = True
-                                    break
-
-                            if duplicateFound:
-                                # Continue fetching, don't add this article
-                                todays_article_count -= 1
-                                continue
-
+                authorAttribution = div.find(True, attrs={'class':'byline'})
+                if authorAttribution:
+                    author = self.tag_to_string(authorAttribution, use_alt=False)
+            feed = key if key is not None else 'Uncategorized'
            if not articles.has_key(feed):
+                ans.append(feed)
                articles[feed] = []
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
-                                 description=description, author=author, content=''))
-#        self.log("Queuing %d articles from %s" % (todays_article_count, "Top Stories"))
+                                description=description, author=author,
+                                content=''))
+
+
+        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
+
+
+        # Find each article
+        for div in soup.findAll(True,
+            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
+
+            if div['class'] in ['section-headline','sectionHeader']:
+                key = string.capwords(feed_title(div))
+                key = key.replace('Op-ed','Op-Ed')
+                key = key.replace('U.s.','U.S.')
+            elif div['class'] in ['story', 'story headline'] :
+                handle_article(div)
+            elif div['class'] == 'headlinesOnly multiline flush':
+                for lidiv in div.findAll('li'):
+                    handle_article(lidiv)

-        ans = self.sort_index_by(ans, {'Top Stories':-1})
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        self.dump_ans(ans)
-        return ans
+        return self.filter_ans(ans)
+
+    def parse_headline_index(self):
+
+        articles = {}
+        ans = []
+        url_list = []
+
+        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
+
+        # Fetch the content table
+        content_table = soup.find('table',{'id':'content'})
+        if content_table is None:
+            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
+            return None
+
+        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
+
+        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
+            for div_sec in td_col.findAll('div',recursive=False):
+                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
+                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
+                    section_name = re.sub(r'^ *$','',section_name)
+                    if section_name == '':
+                        continue
+                    section_name=string.capwords(section_name)
+                    if section_name == 'U.s.':
+                       section_name = 'U.S.'
+                    elif section_name == 'Op-ed':
+                       section_name = 'Op-Ed'
+                    pubdate = strftime('%a, %d %b')
+
+                    search_div = div_sec
+                    for next_tag in h6_sec_name.findNextSiblings(True):
+                        if next_tag.__class__.__name__ == 'Tag':
+                            if next_tag.name == 'div':
+                                search_div = next_tag
+                            break
+
+                    # Get the articles
+                    for h3_item in search_div.findAll('h3'):
+                        byline = h3_item.h6
+                        if byline is not None:
+                            author = self.tag_to_string(byline,usa_alt=False)
+                        else:
+                            author = ''
+                        a = h3_item.find('a', href=True)
+                        if not a:
+                            continue
+                        url = re.sub(r'\?.*', '', a['href'])
+                        if not url.startswith("http"):
+                            continue
+                        if not url.endswith(".html"):
+                            continue
+                        if 'podcast' in url:
+                            continue
+                        if 'video' in url:
+                            continue
+                        url += '?pagewanted=all'
+                        if url in url_list:
+                            continue
+                        url_list.append(url)
+                        self.log("URL %s" % url)
+                        title = self.tag_to_string(a, use_alt=True).strip()
+                        desc = h3_item.find('p')
+                        if desc is not None:
+                            description = self.tag_to_string(desc,use_alt=False)
+                        else:
+                            description = ''
+                        if not articles.has_key(section_name):
+                            ans.append(section_name)
+                            articles[section_name] = []
+                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return self.filter_ans(ans)
+
+    def parse_index(self):
+        if self.headlinesOnly:
+            return self.parse_headline_index()
+        else:
+            return self.parse_todays_index()
+
+    def strip_anchors(self,soup):
+        paras = soup.findAll(True)
+        for para in paras:
+            aTags = para.findAll('a')
+            for a in aTags:
+                if a.img is None:
+                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+        return soup
+

    def preprocess_html(self, soup):
+
+        kicker_tag = soup.find(attrs={'class':'kicker'})
+        if kicker_tag: # remove Op_Ed author head shots
+            tagline = self.tag_to_string(kicker_tag)
+            if tagline=='Op-Ed Columnist':
+                img_div = soup.find('div','inlineImage module')
+                if img_div:
+                    img_div.extract()
        return self.strip_anchors(soup)

    def postprocess_html(self,soup, True):
@ -422,8 +461,9 @@ class NYTimes(BasicNewsRecipe):
                    firstImg = inlineImgs[0]
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
-                    # Move firstImg after headline
-                    cgFirst = soup.find(True, {'class':'columnGroup  first'})
+                    # Move firstImg before article body
+                    #article_body = soup.find(True, {'id':'articleBody'})
+                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
                        navstrings = cgFirst.findAll(text=True, recursive=False)
@ -444,29 +484,17 @@ class NYTimes(BasicNewsRecipe):
                            cgFirst.insert(insertLoc,firstImg)
                    else:
                        self.log(">>> No class:'columnGroup first' found <<<")
-        # Change class="kicker" to <h3>
-        kicker = soup.find(True, {'class':'kicker'})
-        if kicker and kicker.contents[0]:
-            h3Tag = Tag(soup, "h3")
-            h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
-                             use_alt=False)))
-            kicker.replaceWith(h3Tag)

-        # Change captions to italic -1
+        # Change captions to italic
        for caption in soup.findAll(True, {'class':'caption'}) :
            if caption and caption.contents[0]:
-                emTag = Tag(soup, "em")
+                cTag = Tag(soup, "p", [("class", "caption")])
                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
                mp_off = c.find("More Photos")
                if mp_off >= 0:
                    c = c[:mp_off]
-                emTag.insert(0, c)
-                #hrTag = Tag(soup, 'hr')
-                #hrTag['class'] = 'caption_divider'
-                hrTag = Tag(soup, 'div')
-                hrTag['class'] = 'divider'
-                emTag.insert(1, hrTag)
-                caption.replaceWith(emTag)
+                cTag.insert(0, c)
+                caption.replaceWith(cTag)

        # Change <nyt_headline> to <h2>
        h1 = soup.find('h1')
@ -506,17 +534,6 @@ class NYTimes(BasicNewsRecipe):
                bTag.insert(0, subhead.contents[0])
                subhead.replaceWith(bTag)

-        # Synthesize a section header
-        dsk = soup.find('meta', attrs={'name':'dsk'})
-        if dsk and dsk.has_key('content'):
-            hTag = Tag(soup,'h3')
-            hTag['class'] = 'section'
-            hTag.insert(0,NavigableString(dsk['content']))
-            articleTag = soup.find(True, attrs={'id':'article'})
-            if articleTag:
-                articleTag.insert(0,hTag)
-
-        # Add class="articleBody" to <div> so we can format with CSS
        divTag = soup.find('div',attrs={'id':'articleBody'})
        if divTag:
            divTag['class'] = divTag['id']
@ -532,11 +549,3 @@ class NYTimes(BasicNewsRecipe):

        return soup

-    def strip_anchors(self,soup):
-        paras = soup.findAll(True)
-        for para in paras:
-            aTags = para.findAll('a')
-            for a in aTags:
-                if a.img is None:
-                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
-        return soup
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -5,52 +5,186 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import string, re, time
-from calibre import strftime
+import re, string, time
+from calibre import entity_to_unicode, strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-
-def decode(self, src):
-    enc = 'utf-8'
-    if 'iso-8859-1' in src:
-        enc = 'cp1252'
-    return src.decode(enc, 'ignore')
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

 class NYTimes(BasicNewsRecipe):

-    title       = u'New York Times'
-    __author__  = 'Kovid Goyal/Nick Redding'
-    language = 'en'
-    requires_version = (0, 6, 36)
+    # set headlinesOnly to True for the headlines-only version
+    headlinesOnly = False

-    description = 'Daily news from the New York Times (subscription version)'
-    timefmt = ' [%b %d]'
+    # includeSections: List of sections to include. If empty, all sections found will be included.
+    # Otherwise, only the sections named will be included. For example,
+    #
+    #    includeSections = ['Politics','Sports']
+    #
+    # would cause only the Politics and Sports sections to be included.
+
+    includeSections = []  # by default, all sections included
+
+    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
+    # Otherwise, the sections named will be excluded. For example,
+    #
+    #    excludeSections = ['Politics','Sports']
+    #
+    # would cause the Politics and Sports sections to be excluded. This parameter can be used
+    # in conjuction with includeSections although in most cases using one or the other, but
+    # not both, is sufficient.
+
+    excludeSections = []
+
+    # one_picture_per_article specifies that calibre should only use the first image
+    # from an article (if one exists).  If one_picture_per_article = True, the image
+    # will be moved to a location between the headline and the byline.
+    # If one_picture_per_article = False, all images from the article will be included
+
+    # and shown in their original location.
+    one_picture_per_article = True
+
+    # The maximum number of articles that will be downloaded
+    max_articles_per_feed = 100
+
+
+    if headlinesOnly:
+        title='New York Times Headlines'
+        description = 'Headlines from the New York Times'
+    else:
+        title='New York Times'
+        description = 'Today\'s New York Times'
+
+    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
+    language = 'en'
+    requires_version = (0, 7, 5)
+
+
+    timefmt = ''
    needs_subscription = True
+    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+    cover_margins = (18,18,'grey99')
+
    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
-    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool','nextArticleLink',
-                                        'nextArticleLink clearfix','columnGroup doubleRule','doubleRule','entry-meta',
-                                        'icon enlargeThis','columnGroup  last','relatedSearchesModule']}),
-                   dict({'class':re.compile('^subNavigation')}),
-                   dict({'class':re.compile('^leaderboard')}),
-                   dict({'class':re.compile('^module')}),
-                   dict({'class':'metaFootnote'}),
-                   dict(id=['inlineBox','footer', 'toolsRight', 'articleInline','login','masthead',
-                            'navigation', 'archive', 'side_search', 'blog_sidebar','cCol','portfolioInline',
-                            'side_tool', 'side_index','header','readerReviewsCount','readerReviews',
-                            'relatedArticles', 'relatedTopics', 'adxSponLink']),
+    remove_tags = [dict(attrs={'class':[
+                            'articleFooter',
+                            'articleTools',
+                            'columnGroup doubleRule',
+                            'columnGroup singleRule',
+                            'columnGroup last',
+                            'columnGroup  last',
+                            'doubleRule',
+                            'dottedLine',
+                            'entry-meta',
+                            'entry-response module',
+                            'icon enlargeThis',
+                            'leftNavTabs',
+                            'metaFootnote',
+                            'module box nav',
+                            'nextArticleLink',
+                            'nextArticleLink clearfix',
+                            'post-tools',
+                            'relatedSearchesModule',
+                            'side_tool',
+                            'singleAd',
+                            re.compile('^subNavigation'),
+                            re.compile('^leaderboard'),
+                            re.compile('^module'),
+                            ]}),
+                   dict(id=[
+                            'adxLeaderboard',
+                            'adxSponLink',
+                            'archive',
+                            'articleExtras',
+                            'articleInline',
+                            'blog_sidebar',
+                            'businessSearchBar',
+                            'cCol',
+                            'entertainmentSearchBar',
+                            'footer',
+                            'header',
+                            'header_search',
+                            'inlineBox',
+                            'login',
+                            'masthead',
+                            'masthead-nav',
+                            'memberTools',
+                            'navigation',
+                            'portfolioInline',
+                            'readerReviews',
+                            'readerReviewsCount',
+                            'relatedArticles',
+                            'relatedTopics',
+                            'respond',
+                            'side_search',
+                            'side_index',
+                            'side_tool',
+                            'toolsRight',
+                            ]),
                   dict(name=['script', 'noscript', 'style','form','hr'])]
-    encoding = decode
    no_stylesheets = True
    extra_css = '''
-                .articleHeadline { margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .byline { font-size: small; font-style:italic; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .dateline { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
+                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .timestamp { font-size: small; }
-                .caption { font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                a:link {text-decoration: none; }'''
+                .timestamp { text-align: left; font-size: small; }
+                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                a:link {text-decoration: none; }
+                .articleBody { }
+                .authorId {text-align: left; }
+                .image {text-align: center;}
+                .source {text-align: left; }'''
+
+    def filter_ans(self, ans) :
+        total_article_count = 0
+        idx = 0
+        idx_max = len(ans)-1
+        while idx <= idx_max:
+            if self.includeSections != []:
+                if ans[idx][0] not in self.includeSections:
+                    print "SECTION NOT INCLUDED: ",ans[idx][0]
+                    del ans[idx]
+                    idx_max = idx_max-1
+                    continue
+            if ans[idx][0] in self.excludeSections:
+                print "SECTION EXCLUDED: ",ans[idx][0]
+                del ans[idx]
+                idx_max = idx_max-1
+                continue
+            if self.verbose:
+                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
+            for article in ans[idx][1]:
+                total_article_count += 1
+                if self.verbose:
+                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
+                              article['url'].encode('cp1252','replace')))
+            idx = idx+1
+
+        self.log( "Queued %d articles" % total_article_count )
+        return ans
+
+    def fixChars(self,string):
+        # Replace lsquo (\x91)
+        fixed = re.sub("\x91","‘",string)
+
+        # Replace rsquo (\x92)
+        fixed = re.sub("\x92","’",fixed)
+
+        # Replace ldquo (\x93)
+        fixed = re.sub("\x93","“",fixed)
+
+        # Replace rdquo (\x94)
+        fixed = re.sub("\x94","”",fixed)
+
+        # Replace ndash (\x96)
+        fixed = re.sub("\x96","–",fixed)
+
+        # Replace mdash (\x97)
+        fixed = re.sub("\x97","—",fixed)
+
+        return fixed

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -60,22 +194,19 @@ class NYTimes(BasicNewsRecipe):
            br['USERID']   = self.username
            br['PASSWORD'] = self.password
            raw = br.submit().read()
-            if 'Sorry, we could not find the combination you entered. Please try again.' in raw:
+            if 'Please try again' in raw:
                raise Exception('Your username and password are incorrect')
-            #open('/t/log.html', 'wb').write(raw)
        return br

-    def get_masthead_url(self):
-        masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
-        #masthead = 'http://members.cox.net/nickredding/nytlogo.gif'
-        br = BasicNewsRecipe.get_browser()
-        try:
-            br.open(masthead)
-        except:
-            self.log("\nMasthead unavailable")
-            masthead = None
-        return masthead
-
+    def skip_ad_pages(self, soup):
+        # Skip ad pages served before actual article
+        skip_tag = soup.find(True, {'name':'skip'})
+        if skip_tag is not None:
+            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+            url += '?pagewanted=all'
+            self.log.warn("Skipping ad to article at '%s'" % url)
+            return self.index_to_soup(url, raw=True)

    def get_cover_url(self):
        cover = None
@ -93,12 +224,57 @@ class NYTimes(BasicNewsRecipe):
        return cover

    def short_title(self):
-        return 'New York Times'
+        return self.title

-    def parse_index(self):
-        self.encoding = 'cp1252'
-        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-        self.encoding = decode
+    def index_to_soup(self, url_or_raw, raw=False):
+        '''
+        OVERRIDE of class method
+        deals with various page encodings between index and articles
+        '''
+        def get_the_soup(docEncoding, url_or_raw, raw=False) :
+            if re.match(r'\w+://', url_or_raw):
+                f = self.browser.open(url_or_raw)
+                _raw = f.read()
+                f.close()
+                if not _raw:
+                    raise RuntimeError('Could not fetch index from %s'%url_or_raw)
+            else:
+                _raw = url_or_raw
+            if raw:
+                return _raw
+
+            if not isinstance(_raw, unicode) and self.encoding:
+                _raw = _raw.decode(docEncoding, 'replace')
+            massage = list(BeautifulSoup.MARKUP_MASSAGE)
+            massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
+            return BeautifulSoup(_raw, markupMassage=massage)
+
+        # Entry point
+        print "index_to_soup()"
+        soup = get_the_soup( self.encoding, url_or_raw )
+        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
+        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
+        if docEncoding == '' :
+            docEncoding = self.encoding
+
+        if self.verbose > 2:
+            self.log( "  document encoding: '%s'" % docEncoding)
+        if docEncoding != self.encoding :
+            soup = get_the_soup(docEncoding, url_or_raw)
+
+        return soup
+
+    def massageNCXText(self, description):
+        # Kindle TOC descriptions won't render certain characters
+        if description:
+            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
+            # Replace '&' with '&'
+            massaged = re.sub("&","&", massaged)
+            return self.fixChars(massaged)
+        else:
+            return description
+
+    def parse_todays_index(self):

        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=True)).strip()
@ -119,12 +295,13 @@ class NYTimes(BasicNewsRecipe):
                return
            if 'podcast' in url:
                return
+            if '/video/' in url:
+                return
            url += '?pagewanted=all'
            if url in url_list:
                return
            url_list.append(url)
            title = self.tag_to_string(a, use_alt=True).strip()
-            #self.log("Title: %s" % title)
            description = ''
            pubdate = strftime('%a, %d %b')
            summary = div.find(True, attrs={'class':'summary'})
@ -140,6 +317,7 @@ class NYTimes(BasicNewsRecipe):
                    author = self.tag_to_string(authorAttribution, use_alt=False)
            feed = key if key is not None else 'Uncategorized'
            if not articles.has_key(feed):
+                ans.append(feed)
                articles[feed] = []
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
@ -147,46 +325,228 @@ class NYTimes(BasicNewsRecipe):
                                content=''))


+        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')

-        # Find each instance of class="section-headline", class="story", class="story headline"
+
+        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):

            if div['class'] in ['section-headline','sectionHeader']:
                key = string.capwords(feed_title(div))
-                articles[key] = []
-                ans.append(key)
-                #self.log('Section: %s' % key)
-
+                key = key.replace('Op-ed','Op-Ed')
+                key = key.replace('U.s.','U.S.')
            elif div['class'] in ['story', 'story headline'] :
                handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
                    handle_article(lidiv)

-#        ans = self.sort_index_by(ans, {'The Front Page':-1,
-#                                      'Dining In, Dining Out':1,
-#                                     'Obituaries':2})
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return self.filter_ans(ans)
+
+    def parse_headline_index(self):
+
+        articles = {}
+        ans = []
+        url_list = []
+
+        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
+
+        # Fetch the content table
+        content_table = soup.find('table',{'id':'content'})
+        if content_table is None:
+            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
+            return None
+
+        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
+
+        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
+            for div_sec in td_col.findAll('div',recursive=False):
+                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
+                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
+                    section_name = re.sub(r'^ *$','',section_name)
+                    if section_name == '':
+                        continue
+                    section_name=string.capwords(section_name)
+                    if section_name == 'U.s.':
+                       section_name = 'U.S.'
+                    elif section_name == 'Op-ed':
+                       section_name = 'Op-Ed'
+                    pubdate = strftime('%a, %d %b')
+
+                    search_div = div_sec
+                    for next_tag in h6_sec_name.findNextSiblings(True):
+                        if next_tag.__class__.__name__ == 'Tag':
+                            if next_tag.name == 'div':
+                                search_div = next_tag
+                            break
+
+                    # Get the articles
+                    for h3_item in search_div.findAll('h3'):
+                        byline = h3_item.h6
+                        if byline is not None:
+                            author = self.tag_to_string(byline,usa_alt=False)
+                        else:
+                            author = ''
+                        a = h3_item.find('a', href=True)
+                        if not a:
+                            continue
+                        url = re.sub(r'\?.*', '', a['href'])
+                        if not url.startswith("http"):
+                            continue
+                        if not url.endswith(".html"):
+                            continue
+                        if 'podcast' in url:
+                            continue
+                        if 'video' in url:
+                            continue
+                        url += '?pagewanted=all'
+                        if url in url_list:
+                            continue
+                        url_list.append(url)
+                        self.log("URL %s" % url)
+                        title = self.tag_to_string(a, use_alt=True).strip()
+                        desc = h3_item.find('p')
+                        if desc is not None:
+                            description = self.tag_to_string(desc,use_alt=False)
+                        else:
+                            description = ''
+                        if not articles.has_key(section_name):
+                            ans.append(section_name)
+                            articles[section_name] = []
+                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return self.filter_ans(ans)
+
+    def parse_index(self):
+        if self.headlinesOnly:
+            return self.parse_headline_index()
+        else:
+            return self.parse_todays_index()
+
+    def strip_anchors(self,soup):
+        paras = soup.findAll(True)
+        for para in paras:
+            aTags = para.findAll('a')
+            for a in aTags:
+                if a.img is None:
+                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+        return soup

-        return ans

    def preprocess_html(self, soup):
+
        kicker_tag = soup.find(attrs={'class':'kicker'})
-        if kicker_tag:
+        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
-            #self.log("FOUND KICKER %s" % tagline)
            if tagline=='Op-Ed Columnist':
                img_div = soup.find('div','inlineImage module')
-                #self.log("Searching for photo")
                if img_div:
                    img_div.extract()
-                    #self.log("Photo deleted")
-        refresh = soup.find('meta', {'http-equiv':'refresh'})
-        if refresh is None:
+        return self.strip_anchors(soup)
+
+    def postprocess_html(self,soup, True):
+
+        if self.one_picture_per_article:
+            # Remove all images after first
+            largeImg = soup.find(True, {'class':'articleSpanImage'})
+            inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
+            if largeImg:
+                for inlineImg in inlineImgs:
+                    inlineImg.extract()
+            else:
+                if inlineImgs:
+                    firstImg = inlineImgs[0]
+                    for inlineImg in inlineImgs[1:]:
+                        inlineImg.extract()
+                    # Move firstImg before article body
+                    #article_body = soup.find(True, {'id':'articleBody'})
+                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
+                    if cgFirst:
+                        # Strip all sibling NavigableStrings: noise
+                        navstrings = cgFirst.findAll(text=True, recursive=False)
+                        [ns.extract() for ns in navstrings]
+                        headline_found = False
+                        tag = cgFirst.find(True)
+                        insertLoc = 0
+                        while True:
+                            insertLoc += 1
+                            if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
+                                    headline_found = True
+                                    break
+                            tag = tag.nextSibling
+                            if not tag:
+                                headline_found = False
+                                break
+                        if headline_found:
+                            cgFirst.insert(insertLoc,firstImg)
+                    else:
+                        self.log(">>> No class:'columnGroup first' found <<<")
+
+        # Change captions to italic
+        for caption in soup.findAll(True, {'class':'caption'}) :
+            if caption and caption.contents[0]:
+                cTag = Tag(soup, "p", [("class", "caption")])
+                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
+                mp_off = c.find("More Photos")
+                if mp_off >= 0:
+                    c = c[:mp_off]
+                cTag.insert(0, c)
+                caption.replaceWith(cTag)
+
+        # Change <nyt_headline> to <h2>
+        h1 = soup.find('h1')
+        if h1:
+            headline = h1.find("nyt_headline")
+            if headline:
+                tag = Tag(soup, "h2")
+                tag['class'] = "headline"
+                tag.insert(0, self.fixChars(headline.contents[0]))
+                h1.replaceWith(tag)
+        else:
+            # Blog entry - replace headline, remove <hr> tags
+            headline = soup.find('title')
+            if headline:
+                tag = Tag(soup, "h2")
+                tag['class'] = "headline"
+                tag.insert(0, self.fixChars(headline.contents[0]))
+                soup.insert(0, tag)
+                hrs = soup.findAll('hr')
+                for hr in hrs:
+                    hr.extract()
+
+        # Change <h1> to <h3> - used in editorial blogs
+        masthead = soup.find("h1")
+        if masthead:
+            # Nuke the href
+            if masthead.a:
+                del(masthead.a['href'])
+            tag = Tag(soup, "h3")
+            tag.insert(0, self.fixChars(masthead.contents[0]))
+            masthead.replaceWith(tag)
+
+        # Change <span class="bold"> to <b>
+        for subhead in soup.findAll(True, {'class':'bold'}) :
+            if subhead.contents:
+                bTag = Tag(soup, "b")
+                bTag.insert(0, subhead.contents[0])
+                subhead.replaceWith(bTag)
+
+        divTag = soup.find('div',attrs={'id':'articleBody'})
+        if divTag:
+            divTag['class'] = divTag['id']
+
+        # Add class="authorId" to <div> so we can format with CSS
+        divTag = soup.find('div',attrs={'id':'authorId'})
+        if divTag and divTag.contents[0]:
+            tag = Tag(soup, "p")
+            tag['class'] = "authorId"
+            tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
+                             use_alt=False)))
+            divTag.replaceWith(tag)
+
        return soup
-        content = refresh.get('content').partition('=')[2]
-        raw = self.browser.open_novisit('http://www.nytimes.com'+content).read()
-        return BeautifulSoup(raw.decode('cp1252', 'replace'))
-

--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }  '
+    extra_css             = """ 
+                               body{font-family: Arial,Helvetica,sans-serif } 
+                               img{margin-bottom: 0.4em; display:block}
+                               #autor{font-weight: bold} 
+                               #fecha,#epigrafe{font-size: 0.9em; margin: 5px} 
+                               #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
+                               .fgprincipal{font-size: large; font-weight: bold}
+                            """

    conversion_options = {
                          'comment'   : description
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
+    remove_tags = [
+                     dict(name=['meta','link'])
+                    ,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
+                  ]
+    remove_attributes=['lang']


    feeds = [
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
+        for item in soup.findAll('span', attrs={'id':'seccion'}):
+            it = item.a
+            it.name='span'
+            del it['href']
+            del it['title']
+        for item in soup.findAll('p'):
+            it = item.find('h3')            
+            if it:
+               it.name='span'
        return soup
--- a/resources/recipes/pc_lab.recipe
+++ b/resources/recipes/pc_lab.recipe
@ -0,0 +1,70 @@
+#!/usr/bin/env  python
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class PCLab(BasicNewsRecipe):
+    cover_url             = 'http://pclab.pl/img/logo.png'
+    title                 = u"PC Lab"
+    __author__            = 'ravcio - rlelusz[at]gmail.com'
+    description           = u"Articles from PC Lab website"
+    language              = 'pl'
+    oldest_article        = 30.0
+    max_articles_per_feed = 100
+    recursions            = 0
+    encoding              = 'iso-8859-2'
+    no_stylesheets        = True
+    remove_javascript     = True
+    use_embedded_content  = False
+
+    keep_only_tags = [
+            dict(name='div', attrs={'class':['substance']})
+                     ]
+
+    remove_tags = [
+            dict(name='div', attrs={'class':['chapters']})
+            ,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
+                  ]
+
+    remove_tags_after = [
+            dict(name='div', attrs={'class':['navigation']})
+                ]
+
+    #links to RSS feeds
+    feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
+
+    #load second and subsequent page content
+    # in: soup - full page with 'next' button
+    # out: appendtag - tag to which new page is to be added
+    def append_page(self, soup, appendtag):
+        # find the 'Next' button
+        pager = soup.find('div', attrs={'class':'next'})
+
+        if pager:
+            #search for 'a' element with link to next page (exit if not found)
+            a = pager.find('a')
+            if a:
+                nexturl = a['href']
+
+                soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
+
+                pagetext_substance = soup2.find('div', attrs={'class':'substance'})
+                pagetext = pagetext_substance.find('div', attrs={'class':'data'})
+                pagetext.extract()
+
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+                pos = len(appendtag.contents)
+
+                self.append_page(soup2, appendtag)
+
+
+    def preprocess_html(self, soup):
+
+        # soup.body contains no title and no navigator, they are in soup
+        self.append_page(soup, soup.body)
+
+        # finally remove some tags
+        tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
+        [tag.extract() for tag in tags]
+
+        return soup
--- a/resources/recipes/politika.recipe
+++ b/resources/recipes/politika.recipe
@ -1,13 +1,10 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 politika.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Politika(BasicNewsRecipe):
    title                 = 'Politika Online'
@ -19,53 +16,51 @@ class Politika(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    remove_javascript     = True
    encoding              = 'utf8'
+    delay                 = 1
    language              = 'sr'
-
-    lang                 = 'sr-Latn-RS'
-    direction            = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    publication_type      = 'newspaper'    
+    masthead_url          = 'http://static.politika.co.rs/images_new/politika.gif'    
+    extra_css             = """ 
+                               @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} 
+                               @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} 
+                               body{font-family: Arial,Helvetica,sans1,sans-serif}
+                               h1{font-family: "Times New Roman",Times,serif1,serif}
+                               .articledescription{font-family: sans1, sans-serif}
+                            """

    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'  : language
                        }


    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

-    keep_only_tags     = [dict(name='div', attrs={'class':'content_center_border'})]
-
-    remove_tags = [
-                    dict(name='div', attrs={'class':['send_print','txt-komentar']})
-                   ,dict(name=['object','link','a'])
-                   ,dict(name='h1', attrs={'class':'box_header-tags'})
-                  ]
-
+    keep_only_tags     = [dict(name='div', attrs={'class':'big_article_home item_details'})]
+    remove_tags_after  = dict(attrs={'class':'online_date'})
+    remove_tags        = [dict(name=['link','meta','iframe','embed','object'])]
    
    feeds          = [
                         (u'Politika'         , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'         )
                        ,(u'Svet'             , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'             )
-                        ,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
+                        ,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
                        ,(u'Pogledi'          , u'http://www.politika.rs/pogledi/index.lt.xml'                    )
                        ,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
                        ,(u'Tema dana'        , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'        )
                        ,(u'Kultura'          , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'          )
-                        ,(u'Zivot i stil'         , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'         )
+                        ,(u'Spektar'          , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'     )
                     ]

    def preprocess_html(self, soup):
-        soup.html['lang'] = self.lang
-        soup.html['dir' ] = self.direction
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
-        ftag = soup.find('div',attrs={'class':'content_center_border'})
-        if ftag.has_key('align'):
-           del ftag['align']
-        return self.adeify_images(soup)
+        for item in soup.findAll('a', attrs={'class':'category'}):
+            item.name='span'
+            if item.has_key('href'):            
+               del item['href']
+            if item.has_key('title'):            
+               del item['title']
+        return soup
--- a/resources/recipes/polityka.recipe
+++ b/resources/recipes/polityka.recipe
@ -0,0 +1,68 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Polityka(BasicNewsRecipe):
+
+	title = u'Polityka'
+	__author__ = 'Mateusz Kielar'
+	description = 'Weekly magazine. Last archive issue'
+	encoding = 'utf-8'
+	no_stylesheets = True
+	language = 'en'
+	remove_javascript = True
+
+	remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
+	remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
+
+
+	extra_css = '''
+					h1 {font-size: x-large; font-weight: bold}
+					'''
+
+	def parse_index(self):
+		soup = self.index_to_soup('http://archiwum.polityka.pl/')
+		box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
+		feeds = []
+		last = 0
+		self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
+		last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
+
+		while True:
+			index = self.index_to_soup(last_edition)
+
+
+			box_list = index.findAll('div', attrs={'class' : 'box_list'})
+			if len(box_list) == 0:
+				break
+
+			articles = {}
+			for box in box_list:
+				for div in box.findAll('div', attrs={'class': 'list_tresc'}):
+					article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
+					section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
+					print section
+					if not articles.has_key(section):
+						articles[section] = []
+					articles[section].append( {
+						'title' : self.tag_to_string(div.a),
+						'url'   : 'http://archiwum.polityka.pl' + div.a['href'],
+						'date'  : '',
+						'description' : ''
+						})
+
+			for section in articles:
+				feeds.append((section, articles[section]))
+
+			last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
+			last = last + 1
+
+		return feeds
+
--- a/resources/recipes/rollingstone.recipe
+++ b/resources/recipes/rollingstone.recipe
@ -0,0 +1,69 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+rollingstone.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class RollingStone(BasicNewsRecipe):
+    title                 = 'Rolling Stone Magazine - free content'
+    __author__            = 'Darko Miletic'
+    description           = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.'
+    publisher             = 'Werner Media inc.'
+    category              = 'news, music, USA, world'
+    oldest_article        = 15
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'magazine'
+    masthead_url          = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
+    extra_css             = """
+                               body{font-family: Georgia,Times,serif }
+                               img{margin-bottom: 0.4em; display:block}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    preprocess_regexps = [
+                          (re.compile(r'xml:lang="en">.*?<head>', re.DOTALL|re.IGNORECASE),lambda match: 'xml:lang="en">\n<head>\n')
+                         ,(re.compile(r'</title>.*?</head>'     , re.DOTALL|re.IGNORECASE),lambda match: '</title>\n</head>\n'     )
+                         ]
+
+    keep_only_tags=[
+                     dict(attrs={'class':['headerImgHolder','headerContent']})
+                    ,dict(name='div',attrs={'id':['teaser','storyTextContainer']})
+                    ,dict(name='div',attrs={'class':'blogDetailModule clearfix'})
+                   ]
+
+    remove_tags = [
+                      dict(name=['meta','iframe','object','embed'])
+                     ,dict(attrs={'id':'mpStoryHeader'})
+                     ,dict(attrs={'class':'relatedTopics'})
+                  ]
+    remove_attributes=['lang','onclick','width','height','name']
+    remove_tags_before=dict(attrs={'class':'bloggerInfo'})
+    remove_tags_after=dict(attrs={'class':'relatedTopics'})
+
+
+    feeds = [
+              (u'All News'      , u'http://www.rollingstone.com/siteServices/rss/allNews'      )
+             ,(u'All Blogs'     , u'http://www.rollingstone.com/siteServices/rss/allBlogs'     )
+             ,(u'Movie Reviews' , u'http://www.rollingstone.com/siteServices/rss/movieReviews' )
+             ,(u'Album Reviews' , u'http://www.rollingstone.com/siteServices/rss/albumReviews' )
+             ,(u'Song Reviews'  , u'http://www.rollingstone.com/siteServices/rss/songReviews'  )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/scprint.recipe
+++ b/resources/recipes/scprint.recipe
@ -0,0 +1,73 @@
+from calibre.web.feeds.news import BasicNewsRecipe, LoginFailed
+
+class SCPrintMagazine(BasicNewsRecipe):
+    title          = u'SC Print Magazine'
+    __author__ = u'Tony Maro'
+    description = u'Last print version of the data security magazine'
+    INDEX = "http://www.scmagazineus.com/issuearchive/"
+    no_stylesheets = True
+    language = 'en'
+    keep_only_tags = [dict(id=['article','review'])]
+    remove_tags = [dict(id=['articlePrintTools','reviewBodyColumn'])]
+    LOG_IN = 'http://www.scmagazineus.com/login/'
+    tags = 'News,SC Magazine'
+    needs_subscription = True
+
+    def parse_index(self):
+        articles = []
+        issuelink = printsections = None
+
+        soup = self.index_to_soup(self.INDEX)
+        sectit = soup.find('div', attrs={'class':'issueArchiveItem'})
+        if sectit is not None:
+            linkt = sectit.find('a')
+            issuelink = linkt['href']
+            imgt = sectit.find('img')
+            self.cover_url = imgt['src']
+
+        if issuelink is not None:
+            issue = self.index_to_soup(issuelink)
+            if issue is not None:
+                printsections = issue.findAll('div',attrs={'class':'PrintSection'})
+        if printsections is not None:
+            for printsection in printsections:
+                onesection = []
+                sectiontitle = printsection.find('h3').contents[0]
+                articlesec = printsection.findAll('div',attrs={'class':'IssueArchiveFormat'})
+                if articlesec is not None:
+                    ''' got articles '''
+                    for onearticle in articlesec:
+                        ''' process one article '''
+                        arttitlet = onearticle.find('h3')
+                        if arttitlet is not None:
+                            mylink = arttitlet.find('a')
+                            if mylink is not None:
+                                if mylink.has_key('title'):
+                                    arttitle = mylink['title']
+                                else:
+                                    arttitle = 'unknown'
+                                if mylink.has_key('href'):
+                                    artlink = mylink['href']
+                                    artlink = artlink.replace("/article","/printarticle")
+                                    artlink = artlink.replace("/review","/printreview")
+                                    deck = onearticle.find('div',attrs={'class':'deck'})
+                                    if deck is not None:
+                                        deck = deck.contents[0]
+                                        onesection.append({'title':arttitle, 'url':artlink, 'description':deck,'date':''})
+                    articles.append((sectiontitle, onesection))
+
+        return articles
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.open(self.LOG_IN)
+        br.select_form(name='aspnetForm')
+        br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtEmail'] = self.username
+        br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtPassword'] = self.password
+        raw = br.submit("ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$btnLogin").read()
+        if 'Logout</a>' not in raw:
+            raise LoginFailed(
+                    _('Failed to log in, check your username and password for'
+                    ' the calibre Periodicals service.'))
+        return br
+
--- a/resources/recipes/siol.recipe
+++ b/resources/recipes/siol.recipe
@ -0,0 +1,55 @@
+# coding: utf-8
+__license__   = 'GPL v3'
+__copyright__ = '2010, BlonG'
+'''
+www.siol.si
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+class Siol(BasicNewsRecipe):
+    title = u'Siol.net'
+    __author__ = u'BlonG'
+    description = "Multimedijski portal z aktualnimi vsebinami, intervjuji, komentarji iz Slovenije in sveta, sportal, trendi, avtomoto, blogos"
+    oldest_article = 3
+    language = 'sl'
+    max_articles_per_feed = 20
+    no_stylesheets = True
+    use_embedded_content = False
+
+    cover_url = 'https://sites.google.com/site/javno2010/home/siol_cover.jpg'
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+	'''
+
+    html2lrf_options = ['--base-font-size', '10']
+
+    keep_only_tags = [
+	dict(name='div', attrs={'id':'idContent'}),
+	]
+
+    remove_tags  = [
+	dict(name='span', attrs={'class':'com1'}),
+	dict(name='div', attrs={'class':'relation'}),
+	dict(name='p', attrs={'class':'path'}),
+	dict(name='div', attrs={'class':'clear_r'}),
+	dict(name='div', attrs={'id':'appendix'}),
+	dict(name='div', attrs={'id':'rail'}),
+	dict(name='div', attrs={'id':'div_comments'}),
+	dict(name='div', attrs={'class':'thumbs'}),
+	]
+
+    feeds          = [
+	(u'Slovenija', u'http://www.siol.net/rss.aspx?path=Slovenija')
+	,(u'Lokalne novice', u'http://www.siol.net/rss.aspx?path=Slovenija/Lokalne_novice')
+	,(u'EU', u'http://www.siol.net/rss.aspx?path=EU')
+	,(u'Svet', u'http://www.siol.net/rss.aspx?path=Svet')
+	,(u'Gospodarstvo', u'http://www.siol.net/rss.aspx?path=Gospodarstvo')
+	,(u'Sportal', u'http://www.siol.net/rss.aspx?path=Sportal')
+	,(u'Trendi', u'http://www.siol.net/rss.aspx?path=Trendi')
+	,(u'Avtomoto', u'http://www.siol.net/rss.aspx?path=Avtomoto')
+	,(u'Tehnologija', u'http://www.siol.net/rss.aspx?path=Tehnologija')
+	,(u'TV / Film', u'http://www.siol.net/rss.aspx?path=TV')
+	]
--- a/resources/recipes/tagesan.recipe
+++ b/resources/recipes/tagesan.recipe
@ -7,7 +7,7 @@ class AdvancedUserRecipe1284927619(BasicNewsRecipe):
    __author__ = 'noxxx'
    max_articles_per_feed = 100
    description = 'tagesanzeiger.ch: Nichts verpassen'
-    category = 'News, Politik, Nachrichten, Schweiz, Zürich'
+    category = 'News, Politik, Nachrichten, Schweiz, Zuerich'
    language = 'de'

    conversion_options = {
--- a/resources/recipes/taggeschau_de.recipe
+++ b/resources/recipes/taggeschau_de.recipe
@ -4,7 +4,7 @@ class Tagesschau(BasicNewsRecipe):
     title          = 'Tagesschau'
     description    = 'Nachrichten der ARD'
     publisher      = 'ARD'
-     language       = 'de_DE'
+     language       = 'de'

     __author__     = 'Florian Andreas Pfaff'
     oldest_article = 7
--- a/resources/recipes/vedomosti.recipe
+++ b/resources/recipes/vedomosti.recipe
@ -0,0 +1,195 @@
+#!/usr/bin/env  python
+
+u'''
+Ведомости
+'''
+
+from calibre.web.feeds.feedparser import parse
+from calibre.ebooks.BeautifulSoup import Tag
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class VedomostiRecipe(BasicNewsRecipe):
+    title = u'Ведомости'
+    __author__ = 'Nikolai Kotchetkov'
+    publisher = 'vedomosti.ru'
+    category = 'press, Russia'
+    description = u'Ежедневная деловая газета'
+    oldest_article = 3
+    max_articles_per_feed = 100
+
+    masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
+    cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
+
+    #Add feed names if you want them to be sorted (feeds of this list appear first)
+    sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги']
+
+    encoding = 'cp1251'
+    language = 'ru'
+    no_stylesheets = True
+    remove_javascript = True
+    recursions = 0
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+
+    keep_only_tags = [dict(name='td', attrs={'class' : ['second_content']})]
+
+    remove_tags_after = [dict(name='div', attrs={'class' : 'article_text'})]
+
+    remove_tags = [dict(name='div', attrs={'class' : ['sep', 'choice', 'articleRightTbl']})]
+
+    feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml']
+
+    #base URL for relative links
+    base_url = u'http://www.vedomosti.ru'
+
+    extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\
+                'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\
+                'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\
+                '.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
+                '.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
+                '.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\
+                '.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\
+                '.article_desc {font-size: 1em; font-style:italic;}'
+
+    def parse_index(self):
+        try:
+            feedData = parse(self.feeds[0])
+            if not feedData:
+                raise NotImplementedError
+            self.log("parse_index: Feed loaded successfully.")
+            if feedData.feed.has_key('title'):
+                self.title = feedData.feed.title
+                self.log("parse_index: Title updated to: ", self.title)
+            if feedData.feed.has_key('description'):
+                self.description = feedData.feed.description
+                self.log("parse_index: Description updated to: ", self.description)
+
+            def get_virtual_feed_articles(feed):
+                if feeds.has_key(feed):
+                    return feeds[feed][1]
+                self.log("Adding new feed: ", feed)
+                articles = []
+                feeds[feed] = (feed, articles)
+                return articles
+
+            feeds = {}
+
+            #Iterate feed items and distribute articles using tags
+            for item in feedData.entries:
+                link = item.get('link', '');
+                title = item.get('title', '');
+                if '' == link or '' == title:
+                    continue
+                article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''};
+                if not item.has_key('tags'):
+                    get_virtual_feed_articles('_default').append(article)
+                    continue
+                for tag in item.tags:
+                    addedToDefault = False
+                    term = tag.get('term', '')
+                    if '' == term:
+                        if (not addedToDefault):
+                            get_virtual_feed_articles('_default').append(article)
+                        continue
+                    get_virtual_feed_articles(term).append(article)
+
+            #Get feed list
+            #Select sorted feeds first of all
+            result = []
+            for feedName in self.sortOrder:
+                if (not feeds.has_key(feedName)): continue
+                result.append(feeds[feedName])
+                del feeds[feedName]
+            result = result + feeds.values()
+
+            return result
+
+        except Exception, err:
+            self.log(err)
+            raise NotImplementedError
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+    def postprocess_html(self, soup, first_fetch):
+        #self.log('Original: ', soup.prettify())
+
+        #Find article
+        contents = soup.find('div', {'class':['article_text']})
+        if not contents:
+            self.log('postprocess_html: article div not found!')
+            return soup
+        contents.extract()
+
+        #Find title
+        title = soup.find('h1')
+        if title:
+            contents.insert(0, title)
+
+        #Find article image
+        newstop = soup.find('div', {'class':['newstop']})
+        if newstop:
+            img = newstop.find('img')
+            if img:
+                imgDiv = Tag(soup, 'div')
+                imgDiv['class'] = 'article_img'
+
+                if img.has_key('width'):
+                    del(img['width'])
+                if img.has_key('height'):
+                    del(img['height'])
+
+                #find description
+                element = img.parent.nextSibling
+
+                img.extract()
+                imgDiv.insert(0, img)
+
+                while element:
+                    if not isinstance(element, Tag):
+                        continue
+                    nextElement = element.nextSibling
+                    if 'p' == element.name:
+                        element.extract()
+                        element['class'] = 'article_img_desc'
+                        imgDiv.insert(len(imgDiv.contents), element)
+                    element = nextElement
+
+                contents.insert(1, imgDiv)
+
+        #find article abstract
+        abstract = soup.find('p', {'class':['subhead']})
+        if abstract:
+            abstract['class'] = 'article_desc'
+            contents.insert(2, abstract)
+
+        #Find article authors
+        authorsDiv = soup.find('div', {'class':['autors']})
+        if authorsDiv:
+            authorsP = authorsDiv.find('p')
+            if authorsP:
+                authorsP['class'] = 'article_authors'
+                contents.insert(len(contents.contents), authorsP)
+
+        #Fix urls that use relative path
+        urls = contents.findAll('a');
+        if urls:
+            for url in urls:
+                if not url.has_key('href'):
+                    continue
+                if '/' == url['href'][0]:
+                    url['href'] = self.base_url + url['href']
+
+        body = soup.find('td', {'class':['second_content']})
+        if body:
+            body.replaceWith(contents)
+
+        self.log('Result: ', soup.prettify())
+        return soup
+
--- a/resources/recipes/wash_post.recipe
+++ b/resources/recipes/wash_post.recipe
@ -31,8 +31,9 @@ class WashingtonPost(BasicNewsRecipe):
                ('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
                ('Style',
                     'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
-                ('Sports',
-                     'http://feeds.washingtonpost.com/wp-dyn/rss/linkset/2010/08/19/LI2010081904067_xml'),
+                ('NFL Sports',
+                     'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
+                ('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
                ('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
    ]

--- a/resources/recipes/ynet.recipe
+++ b/resources/recipes/ynet.recipe
@ -0,0 +1,72 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import mechanize
+
+class AdvancedUserRecipe1283848012(BasicNewsRecipe):
+    description   = 'This is a recipe of Ynet.co.il. The recipe opens the article page and clicks on an advertisement to not hurt the sites advertising income.'
+    cover_url      = 'http://www.bneiakiva.net/uploads/images/ynet%282%29.jpg'
+    title          = u'Ynet'
+    __author__ = 'marbs'
+    language              = 'he'
+    extra_css='img {max-width:100%;direction: rtl;} #article{direction: rtl;} div{direction: rtl;} title{direction: rtl; } article_description{direction: rtl; } a.article{direction: rtl; } calibre_feed_description{direction: rtl; } body{direction: ltr;}'
+    remove_attributes = ['width']
+    simultaneous_downloads = 5
+    keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
+    remove_javascript     = True
+    timefmt        = '[%a, %d %b, %Y]'
+    oldest_article = 1
+    remove_tags = [dict(name='p', attrs={'text':['&nbsp;']})]
+    max_articles_per_feed = 100
+    preprocess_regexps = [
+        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: '')
+        ]
+
+    def preprocess_html(self, soup):
+        soup.html['dir'] = 'rtl'
+        soup.body['dir'] = 'rtl'
+        return soup
+
+    feeds =[(u'\u05d7\u05d3\u05e9\u05d5\u05ea',
+  u'http://www.ynet.co.il/Integration/StoryRss2.xml'),
+ (u'\u05db\u05dc\u05db\u05dc\u05d4',
+  u'http://www.ynet.co.il/Integration/StoryRss6.xml'),
+ (u'\u05e6\u05e8\u05db\u05e0\u05d5\u05ea',
+  u'http://www.ynet.co.il/Integration/StoryRss437.xml'),
+ (u'\u05e1\u05e4\u05d5\u05e8\u05d8',
+  u'http://www.ynet.co.il/Integration/StoryRss3.xml'),
+ (u'\u05ea\u05e8\u05d1\u05d5\u05ea',
+  u'http://www.ynet.co.il/Integration/StoryRss538.xml'),
+ (u'\u05de\u05e2\u05d5\u05e8\u05d1\u05d5\u05ea \u05d5\u05d7\u05d1\u05e8\u05d4',
+  u'http://www.ynet.co.il/Integration/StoryRss3262.xml'),
+ (u'\u05d1\u05e8\u05d9\u05d0\u05d5\u05ea',
+  u'http://www.ynet.co.il/Integration/StoryRss1208.xml'),
+ (u'\u05d9\u05e8\u05d5\u05e7',
+  u'http://www.ynet.co.il/Integration/StoryRss4872.xml'),
+ (u'\u05de\u05d7\u05e9\u05d1\u05d9\u05dd',
+  u'http://www.ynet.co.il/Integration/StoryRss544.xml'),
+ (u'\u05e8\u05db\u05d1', u'http://www.ynet.co.il/Integration/StoryRss550.xml'),
+ (u'\u05ea\u05d9\u05d9\u05e8\u05d5\u05ea',
+  u'http://www.ynet.co.il/Integration/StoryRss598.xml'),
+ (u'\u05d4\u05d5\u05e8\u05d9\u05dd',
+  u'http://www.ynet.co.il/Integration/StoryRss3052.xml'),
+ (u'\u05d0\u05d5\u05db\u05dc',
+  u'http://www.ynet.co.il/Integration/StoryRss975.xml'),
+ (u'\u05d9\u05d4\u05d3\u05d5\u05ea',
+  u'http://www.ynet.co.il/Integration/StoryRss4403.xml'),
+ (u'\u05de\u05d3\u05e2 \u05d5\u05d8\u05d1\u05e2',
+  u'http://www.ynet.co.il/Integration/StoryRss2142.xml'),
+ (u'\u05d9\u05d7\u05e1\u05d9\u05dd',
+  u'http://www.ynet.co.il/Integration/StoryRss3925.xml'),
+ (u'\u05d3\u05e2\u05d5\u05ea',
+  u'http://www.ynet.co.il/Integration/StoryRss194.xml')]
+
+    def print_version(self, url):
+#remove from here
+        br = BasicNewsRecipe.get_browser()
+        br.open(url)
+        br.follow_link(mechanize.Link(base_url = '', url =url, text = '', tag = 'a', attrs = [{'id':'buzzerATop'}]))
+#to here to stop supporting ynet...
+        split1 = url.split("-")
+        print_url = 'http://www.ynet.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
+        return print_url
--- a/resources/recipes/zeitde.recipe
+++ b/resources/recipes/zeitde.recipe
@ -6,22 +6,25 @@ Fetch Die Zeit.
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class ZeitDe(BasicNewsRecipe):

-    title = 'ZEIT Online'
-    description = 'ZEIT Online'
+    title = 'Zeit Online'
+    description = 'Zeit Online'
    language = 'de'
-    lang = 'de_DE'

-    __author__ = 'Martin Pitt, Sujata Raman and Ingo Paschke'
-    use_embedded_content   = False
+    __author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
+
    max_articles_per_feed = 40
-    remove_empty_feeds = True
-    no_stylesheets = True
-    no_javascript = True
-    encoding = 'utf-8'
+
+    remove_tags = [
+	                    dict(name='iframe'),
+	                    dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }),
+	                    dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
+	                    dict(name='div', attrs={'id':["place_5","place_4","comments"]})
+	                  ]
+
+    keep_only_tags = [dict(id=['main'])]

    feeds =  [
               ('Seite 1', 'http://newsfeed.zeit.de/index_xml'),
@ -40,71 +43,31 @@ class ZeitDe(BasicNewsRecipe):
               ('Sport', 'http://newsfeed.zeit.de/sport/index'),
             ]

-    extra_css = '''
-                .supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
-                .excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:small;}
-                .title{font-family:Arial,Helvetica,sans-serif;font-size:large;clear:right;}
-                .caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
-                .copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
-                .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
-                .quote{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
-                .quote .cite{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small}
-                .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
-                .inline{float:left;margin-top:0;margin-right:15px;position:relative;width:180px; }
-                img.inline{float:none}
-                .intertitle{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small;font-weight:700}
-                .ebinfobox{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small;list-style-type:none;float:right;margin-top:0;border-left-style:solid;border-left-width:1px;padding-left:10px;}
-                .infobox {border-style: solid; border-width: 1px;padding:8px;}
-                .infobox dt {font-weight:700;}
-                '''
+    extra_css = '.excerpt{font-size:1em}.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
+
    #filter_regexps = [r'ad.de.doubleclick.net/']

-    keep_only_tags = [
-                        dict(name='div', attrs={'class':["article"]}) ,
-                        dict(name='ul', attrs={'class':["tools"]}) ,
-                         ]
-    remove_tags = [
-                    dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),
-                    dict(name='div', attrs={'class':["pagination block","pagenav","inline link", "copyright"] }),
-                    dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
-                    dict(name='div', attrs={'id':["place_5","place_4","comments"]})
-                  ]
-
-    remove_attributes = ['style', 'font']
-
    def get_article_url(self, article):
        ans = article.get('link',None)
-        ans += "?page=all"
+        ans += "?page=all&print=true"

-        if 'video' in ans or 'quiz' in ans :
+        if 'video' in ans or 'quiz' in ans or 'blog' in ans :
              ans = None
        return ans

+    def preprocess_html(self, soup):
+        for tag in soup.findAll(name=['ul','li']):
+            tag.name = 'div'
+        
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
+        soup.head.insert(0,mtag)
+        return soup
+
    def get_cover_url(self):
        try:
            inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
            return inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
        except:
            return 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
-
-    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
-        soup.head.insert(0,mtag)
-        title = soup.find('h2', attrs={'class':'title'})
-        if title is None:
-            print "no title"
-            return soup
-        info = Tag(soup,'ul',[('class','ebinfobox')])
-        tools = soup.find('ul', attrs={'class':'tools'})
-        #author = tools.find('li','author first')
-        for tag in ['author first', 'date', 'date first', 'author', 'source']:
-            line = tools.find('li', tag)
-            if line:
-                info.insert(0,line)
-        title.parent.insert(0,info)
-        tools.extract()
-        return soup
-
-
--- a/resources/templates/html_export_default.css
+++ b/resources/templates/html_export_default.css
@ -0,0 +1,60 @@
+body{
+  margin:0px;
+  padding: 0.5em;
+  background-color:#F6F3E9;
+  font-size:12px;
+  font-family:Arial, Helvetica, sans-serif;
+}
+
+.calibreMeta{
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+}
+
+.calibreMeta a, .calibreEbNav a, .calibreEbNavTop a, .calibreToc a{
+  color:white;
+}
+
+.calibreMeta h1{
+  margin:0px;
+  font-size:18px;
+  background-color:#39322B;
+}
+
+.calibreEbookContent{
+  padding:20px;
+}
+
+.calibreEbNav, .calibreEbNavTop{
+  clear:both;
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+  text-align:center;
+}
+
+.calibreEbNavTop{
+  margin-bottom:20px;
+}
+
+.calibreEbNav a, .calibreEbNavTop a{
+  padding:0px 5px;
+}
+
+.calibreTocIndex{
+  line-height:18px;
+}
+
+.calibreToc{
+  float:left;
+  margin:20px;
+  width:300px;
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+}
+.calibreEbookContent{
+  width:600px;
+  float:left;
+}
--- a/resources/templates/html_export_default.tmpl
+++ b/resources/templates/html_export_default.tmpl
@ -0,0 +1,74 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+${head_content}$
+
+<link href="${cssLink}$" type="text/css" rel="stylesheet" />
+
+</head>
+<body>
+
+<div class="calibreMeta">
+  <div class="calibreMetaTitle">
+  ${pos1=1}$
+  ${for title in meta.titles():}$
+    ${if pos1:}$
+    <h1>
+      <a href="${tocUrl}$">${print title}$</a>
+    </h1>
+    ${:else:}$
+    <div class="calibreMetaSubtitle">${print title}$</div>
+    ${:endif}$
+    ${pos1=0}$
+  ${:endfor}$
+  </div>
+  <div class="calibreMetaAuthor">
+    ${print ', '.join(meta.creators())}$
+  </div>
+</div>
+
+<div class="calibreMain">
+
+  <div class="calibreEbookContent">
+    ${if prevLink or nextLink:}$
+      <div class="calibreEbNavTop">
+        ${if prevLink:}$
+          <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
+        ${:else:}$
+          <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
+        ${:endif}$
+
+        ${if nextLink:}$
+          <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+        ${:endif}$
+      </div>
+    ${:endif}$
+
+    ${ebookContent}$
+  </div>
+
+  ${if has_toc:}$
+  <div class="calibreToc">
+    <h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
+    ${print toc()}$
+  </div>
+  ${:endif}$
+
+  <div class="calibreEbNav">
+    ${if prevLink:}$
+      <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
+    ${:else:}$
+      <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
+    ${:endif}$
+
+    <a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
+
+    ${if nextLink:}$
+      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+    ${:endif}$
+  </div>
+
+</div>
+
+</body>
+</html>
--- a/resources/templates/html_export_default_index.tmpl
+++ b/resources/templates/html_export_default_index.tmpl
@ -0,0 +1,61 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+
+<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
+<link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
+
+<title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
+
+${for item in meta:}$
+  <meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
+${:endfor}$
+
+<link href="${cssLink}$" type="text/css" rel="stylesheet" />
+</head>
+<body>
+
+<div class="calibreMeta">
+  <div class="calibreMetaTitle">
+  ${pos1=1}$
+  ${for title in meta.titles():}$
+    ${if pos1:}$
+    <h1>
+      <a href="${tocUrl}$">${print title}$</a>
+    </h1>
+    ${:else:}$
+    <div class="calibreMetaSubtitle">${print title}$</div>
+    ${:endif}$
+    ${pos1=0}$
+  ${:endfor}$
+  </div>
+  <div class="calibreMetaAuthor">
+    ${print ', '.join(meta.creators()),}$
+  </div>
+</div>
+
+<div class="calibreMain">
+  <div class="calibreEbookContent">
+
+    ${if has_toc:}$
+      <div class="calibreTocIndex">
+        <h2>${print _('Table of contents'),}$</h2>
+        ${toc}$
+      </div>
+    ${:else:}$
+        <h2>${print _('No table of contents present'),}$</h2>
+        <div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
+    ${:endif}$
+
+  </div>
+
+  <div class="calibreEbNav">
+    ${if nextLink:}$
+      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+    ${:endif}$
+  </div>
+</div>
+
+</body>
+</html>
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -348,8 +348,12 @@ class Build(Command):
                VERSION  = 1.0.0
                CONFIG   += %s
            ''')%(ext.name, ' '.join(ext.headers), ' '.join(ext.sources), archs)
+            pro = pro.replace('\\', '\\\\')
            open(ext.name+'.pro', 'wb').write(pro)
-            subprocess.check_call([QMAKE, '-o', 'Makefile', ext.name+'.pro'])
+            qmc = [QMAKE, '-o', 'Makefile']
+            if iswindows:
+                qmc += ['-spec', 'win32-msvc2008']
+            subprocess.check_call(qmc + [ext.name+'.pro'])
            subprocess.check_call([make, '-f', 'Makefile'])
            objects = glob.glob(obj_pat)
        return list(map(self.a, objects))
--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -11,7 +11,7 @@ import subprocess, tempfile, os, time
 from setup import Command, installer_name
 from setup.build_environment import HOST, PROJECT

-BASE_RSYNC = 'rsync -avz --delete'.split()
+BASE_RSYNC = ['rsync', '-avz', '--delete']
 EXCLUDES = []
 for x in [
    'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
@ -42,13 +42,13 @@ class Push(Command):
        threads = []
        for host in (
            r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
-            'kovid@ox:calibre'
+            'kovid@ox:calibre',
+            r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
            ):
            rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
            print '\n\nPushing to:', host, '\n'
            threads.append(Thread(target=subprocess.check_call, args=(rcmd,)))
            threads[-1].start()
-            subprocess.check_call(rcmd)
        for thread in threads:
            thread.join()

--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -13,7 +13,7 @@ from setup import Command, modules, functions, basenames, __version__, \
 from setup.build_environment import msvc, MT, RC
 from setup.installer.windows.wix import WixMixIn

-QT_DIR = 'C:\\Qt\\4.6.3'
+QT_DIR = 'Q:\\Qt\\4.7.1'
 QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
 LIBUSB_DIR       = 'C:\\libusb'
 LIBUNRAR         = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -28,15 +28,16 @@ If there are no windows binaries already compiled for the version of python you

 Run the following command to install python dependencies::

-    easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython
+    easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform
+
+Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)

 Qt
 --------

 Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::

-    configure -opensource -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc -no-qt3support -webkit -xmlpatterns -no-phonon
-    nmake
+    configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license && nmake

 SIP
 -----
--- a/setup/server.py
+++ b/setup/server.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import subprocess, tempfile, os, time, sys
+import subprocess, tempfile, os, time, sys, telnetlib
 from threading import RLock

 from setup import Command
@ -28,7 +28,12 @@ else:
        def process_default(self, event):
            name = getattr(event,
                    'name', None)
-            if name and os.path.splitext(name)[1] == '.py':
+            if not name:
+                return
+            ext = os.path.splitext(name)[1]
+            reload = False
+            if ext == '.py':
+                reload = True
                print
                print name, 'changed'
                self.command.kill_server()
@ -36,6 +41,9 @@ else:
                print self.command.prompt,
                sys.stdout.flush()

+            if reload:
+                self.command.reload_browser(delay=1)
+

 class Server(Command):

@ -75,6 +83,19 @@ class Server(Command):
            self.notifier.start()
            self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)

+    def reload_browser(self, delay=0.1):
+        time.sleep(delay)
+        try:
+            t = telnetlib.Telnet('localhost', 4242)
+            t.read_until("repl>")
+            t.write('BrowserReload();')
+            t.read_until("repl>")
+            t.close()
+        except:
+            print 'Failed to reload browser'
+            import traceback
+            traceback.print_exc()
+
    def run(self, opts):
        self.lock = RLock()
        tdir = tempfile.gettempdir()
@ -85,8 +106,13 @@ class Server(Command):
        print
        self.watch()

+        first = True
        while True:
            self.launch_server()
+            if not first:
+                self.reload_browser()
+            first = False
+
            try:
                raw_input(self.prompt)
            except:
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -21,8 +21,6 @@ from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
                              filesystem_encoding, plugins, config_dir
 from calibre.startup import winutil, winutilerror

-import mechanize
-
 uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo

 if False:
@ -269,7 +267,8 @@ def browser(honor_time=True, max_time=2, mobile_browser=False):
    :param honor_time: If True honors pause time in refresh requests
    :param max_time: Maximum time in seconds to wait during a refresh request
    '''
-    opener = mechanize.Browser()
+    from calibre.utils.browser import Browser
+    opener = Browser()
    opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
    opener.set_handle_robots(False)
    opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
@ -445,6 +444,9 @@ xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions = {
 def replace_entities(raw):
    return _ent_pat.sub(entity_to_unicode, raw)

+def xml_replace_entities(raw):
+    return _ent_pat.sub(xml_entity_to_unicode, raw)
+
 def prepare_string_for_xml(raw, attribute=False):
    raw = _ent_pat.sub(entity_to_unicode, raw)
    raw = raw.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.26'
+__version__   = '0.7.28'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
@ -105,7 +105,9 @@ else:
        os.makedirs(config_dir, mode=CONFIG_DIR_MODE)
    except:
        pass
-    if not os.access(config_dir, os.W_OK) or not os.access(config_dir, os.X_OK):
+    if not os.path.exists(config_dir) or \
+            not os.access(config_dir, os.W_OK) or not \
+            os.access(config_dir, os.X_OK):
        print 'No write acces to', config_dir, 'using a temporary dir instead'
        import tempfile, atexit
        config_dir = tempfile.mkdtemp(prefix='calibre-config-')
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -2,9 +2,7 @@ import os.path
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import textwrap
-import os
-import glob
+import textwrap, os, glob, functools
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
 from calibre.constants import numeric_version
@ -95,10 +93,12 @@ class ComicMetadataReader(MetadataReaderPlugin):

    def get_metadata(self, stream, ftype):
        if ftype == 'cbr':
-            from calibre.libunrar import extract_member as extract_first
+            from calibre.libunrar import extract_first_alphabetically as extract_first
            extract_first
        else:
-            from calibre.libunzip import extract_member as extract_first
+            from calibre.libunzip import extract_member
+            extract_first = functools.partial(extract_member,
+                    sort_alphabetically=True)
        from calibre.ebooks.metadata import MetaInformation
        ret = extract_first(stream)
        mi = MetaInformation(None, None)
@ -446,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
+from calibre.ebooks.html.output import HTMLOutput
 from calibre.ebooks.snb.output import SNBOutput

 from calibre.customize.profiles import input_profiles, output_profiles
@ -453,7 +454,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
 from calibre.devices.apple.driver import ITUNES
 from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
 from calibre.devices.blackberry.driver import BLACKBERRY
-from calibre.devices.cybook.driver import CYBOOK
+from calibre.devices.cybook.driver import CYBOOK, ORIZON
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
                BOOQ, ELONEX, POCKETBOOK301, MENTOR
@ -461,7 +462,7 @@ from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
-from calibre.devices.nook.driver import NOOK
+from calibre.devices.nook.driver import NOOK, NOOK_COLOR
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.android.driver import ANDROID, S60
 from calibre.devices.nokia.driver import N770, N810, E71X, E52
@ -475,7 +476,7 @@ from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
        SOVOS, PICO
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
-        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
+        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO

@ -525,6 +526,7 @@ plugins += [
    RTFOutput,
    TCROutput,
    TXTOutput,
+    HTMLOutput,
    SNBOutput,
 ]
 # Order here matters. The first matched device is the one used.
@ -533,6 +535,7 @@ plugins += [
    HANLINV5,
    BLACKBERRY,
    CYBOOK,
+    ORIZON,
    ILIAD,
    IREXDR1000,
    IREXDR800,
@ -546,6 +549,7 @@ plugins += [
    KINDLE2,
    KINDLE_DX,
    NOOK,
+    NOOK_COLOR,
    PRS505,
    ANDROID,
    S60,
@ -586,6 +590,7 @@ plugins += [
    AVANT,
    MENTOR,
    SWEEX,
+    Q600,
    KOGAN,
    PDNOVEL,
    SPECTRA,
@ -892,4 +897,3 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
        Email, Server, Plugins, Tweaks, Misc]

 #}}}
-
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -250,8 +250,11 @@ class OutputProfile(Plugin):
    #: If True, the date is appended to the title of downloaded news
    periodical_date_in_title = True

-    #: The character used to represent a star in ratings
+    #: Characters used in jackets and catalogs
+	missing_char = u'x'
    ratings_char = u'*'
+    empty_ratings_char = u' '
+    read_char = u'+'

    #: Unsupported unicode characters to be replaced during preprocessing
    unsupported_unicode_chars = []
@ -287,7 +290,12 @@ class iPadOutput(OutputProfile):
            'macros': {'border-width': '{length}|medium|thick|thin'}
        }
    ]
-    ratings_char = u'\u2605'
+
+	missing_char = u'\u2715\u200a'		# stylized 'x' plus hair space
+    ratings_char = u'\u2605'			# filled star
+	empty_ratings_char = u'\u2606'		# hollow star
+    read_char = u'\u2713'				# check mark
+
    touchscreen = True
    # touchscreen_news_css {{{
    touchscreen_news_css = u'''
@ -498,7 +506,6 @@ class SonyReaderLandscapeOutput(SonyReaderOutput):
    screen_size               = (784, 1012)
    comic_screen_size         = (784, 1012)

-
 class MSReaderOutput(OutputProfile):

    name        = 'Microsoft Reader'
@ -582,7 +589,12 @@ class KindleOutput(OutputProfile):
    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
    supports_mobi_indexing = True
    periodical_date_in_title = False
+
+	missing_char = u'x\u2009'
+	empty_ratings_char = u'\u2606'
    ratings_char = u'\u2605'
+    read_char = u'\u2713'
+
    mobi_ems_per_blockquote = 2.0

    @classmethod
@ -603,6 +615,8 @@ class KindleDXOutput(OutputProfile):
    #comic_screen_size         = (741, 1022)
    supports_mobi_indexing = True
    periodical_date_in_title = False
+    ratings_char = u'\u2605'
+    read_char = u'\u2713'
    mobi_ems_per_blockquote = 2.0

    @classmethod
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -23,6 +23,9 @@ class ANDROID(USBMS):
                : [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
                0xc92 : [0x100]},

+            # Eken
+            0x040d : { 0x8510 : [0x0001] },
+
            # Motorola
            0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216],
                0x4285 : [0x216]},
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.epub import set_metadata
 from calibre.library.server.utils import strftime
 from calibre.utils.config import config_dir, prefs
-from calibre.utils.date import isoformat, now, parse_date
+from calibre.utils.date import now, parse_date
 from calibre.utils.logging import Log
 from calibre.utils.zipfile import ZipFile

@ -1221,12 +1221,19 @@ class ITUNES(DriverBase):
                    return thumb

                if isosx:
+                    # The following commands generate an error, but the artwork does in fact
+                    # get sent to the device.  Seems like a bug in Apple's automation interface?
+                    # Could also be a problem with the integrity of the cover data?
                    if lb_added:
+                        try:
                            lb_added.artworks[1].data_.set(cover_data)
+                        except:
+                            if DEBUG:
+                                self.log.warning("  iTunes automation interface reported an error"
+                                                 " when adding artwork to '%s' in the iTunes Library" % metadata.title)
+                            pass

                    if db_added:
-                        # The following command generates an error, but the artwork does in fact
-                        # get sent to the device.  Seems like a bug in Apple's automation interface
                        try:
                            db_added.artworks[1].data_.set(cover_data)
                        except:
@ -2521,11 +2528,11 @@ class ITUNES(DriverBase):
                        metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
                                                   old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
                    else:
-                        metadata.timestamp = isoformat(now())
+                        metadata.timestamp = now()
                        if DEBUG:
                            self.log.info("   add timestamp: %s" % metadata.timestamp)
                else:
-                    metadata.timestamp = isoformat(now())
+                    metadata.timestamp = now()
                    if DEBUG:
                        self.log.warning("   missing <metadata> block in OPF file")
                        self.log.info("   add timestamp: %s" % metadata.timestamp)
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):

    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0210, 0x0201]
+    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211]

    VENDOR_NAME = 'RIM'
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
--- a/src/calibre/devices/cybook/driver.py
+++ b/src/calibre/devices/cybook/driver.py
@ -5,7 +5,7 @@ __copyright__ = '2009, John Schember <john at nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

 '''
-Device driver for Bookeen's Cybook Gen 3 and Opus
+Device driver for Bookeen's Cybook Gen 3 and Opus and Orizon
 '''

 import os
@ -56,3 +56,23 @@ class CYBOOK(USBMS):
        if isunix:
            return device_info[3] == 'Bookeen' and (device_info[4] == 'Cybook Gen3' or device_info[4] == 'Cybook Opus')
        return True
+
+class ORIZON(CYBOOK):
+
+    name           = 'Orizon Device Interface'
+    gui_name       = 'Orizon'
+    description    = _('Communicate with the Cybook Orizon eBook reader.')
+
+    BCD         = [0x319]
+
+    WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_ORIZON__-FD')
+    WINDOWS_CARD_A_MEM = re.compile('CYBOOK_ORIZON__-SD')
+
+    EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions'
+
+    @classmethod
+    def can_handle(cls, device_info, debug=False):
+        if isunix:
+            return device_info[3] == 'Bookeen' and device_info[4] == 'Cybook Orizon'
+        return True
+
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -74,9 +74,9 @@ class DevicePlugin(Plugin):
        if bcd is None or len(bcd) == 0:
            return True
        for c in bcd:
-            # Bug in winutil.get_usb_devices converts a to :
-            rev = ('rev_%4.4x'%c).replace('a', ':')
-            if rev in device_id:
+            rev = 'rev_%4.4x'%c
+            # Bug in winutil.get_usb_devices sometimes converts a to :
+            if rev in device_id or rev.replace('a', ':') in device_id:
                return True
        return False

--- a/src/calibre/devices/iriver/driver.py
+++ b/src/calibre/devices/iriver/driver.py
@ -17,15 +17,15 @@ class IRIVER_STORY(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']

    # Ordered list of supported formats
-    FORMATS     = ['epub', 'pdf', 'txt']
+    FORMATS     = ['epub', 'fb2', 'pdf', 'djvu', 'txt']

    VENDOR_ID   = [0x1006]
-    PRODUCT_ID  = [0x4023]
+    PRODUCT_ID  = [0x4023, 0x4025]
    BCD         = [0x0323]

    VENDOR_NAME = 'IRIVER'
-    WINDOWS_MAIN_MEM = 'STORY'
-    WINDOWS_CARD_A_MEM = 'STORY'
+    WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05']
+    WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD']

    #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
    #OSX_CARD_A_MEM = 'Kindle Card Storage Media'
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -503,7 +503,11 @@ class KOBO(USBMS):
                        ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)

                        ContentID = self.contentid_from_path(book.path, ContentType)
-                        datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
+
+                        t = (ContentID,)
+                        cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t)
+                        result = cursor.fetchone()
+                        datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00' 

                        t = (datelastread,ContentID,)

--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -72,6 +72,15 @@ class SWEEX(USBMS):
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True

+class Q600(SWEEX):
+
+    name = 'Digma Q600 Device interface'
+    gui_name = 'Q600'
+    description    = _('Communicate with the Digma Q600')
+
+    BCD = [0x325]
+    FORMATS     = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
+
 class KOGAN(SWEEX):

    name           = 'Kogan Device Interface'
@ -115,12 +124,15 @@ class PDNOVEL_KOBO(PDNOVEL):

    BCD         = [0x222]

-    EBOOK_DIR_MAIN = 'eBooks/Kobo'
+    EBOOK_DIR_MAIN = 'eBooks'

    def upload_cover(self, path, filename, metadata, filepath):
        coverdata = getattr(metadata, 'thumbnail', None)
        if coverdata and coverdata[2]:
-            with open(os.path.join(path, '.thumbnail', filename+'.jpg'), 'wb') as coverfile:
+            dirpath = os.path.join(path, '.thumbnail')
+            if not os.path.exists(dirpath):
+                os.makedirs(dirpath)
+            with open(os.path.join(dirpath, filename+'.jpg'), 'wb') as coverfile:
                coverfile.write(coverdata[2])


--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@ -80,3 +80,14 @@ class NOOK(USBMS):

    def sanitize_path_components(self, components):
        return [x.replace('#', '_') for x in components]
+
+class NOOK_COLOR(NOOK):
+    gui_name       = _('Nook Color')
+    description    = _('Communicate with the Nook Color eBook reader.')
+
+    PRODUCT_ID  = [0x002]
+    BCD         = [0x216]
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOK_DISK'
+
+    EBOOK_DIR_MAIN = 'My Files/Books'
+
--- a/src/calibre/devices/prs505/sony_cache.py
+++ b/src/calibre/devices/prs505/sony_cache.py
@ -610,7 +610,11 @@ class XMLCache(object):
        # is not new, compare its Sony DB date against localtime and gmtime.
        # Count the matches. When we must set a date, use the one with the most
        # matches. Use localtime if the case of a tie, and hope it is right.
+        try:
            timestamp = os.path.getmtime(path)
+        except:
+            debug_print('Failed to get timestamp for:', path)
+            timestamp = time.time()
        rec_date = record.get('date', None)

        def clean(x):
@ -619,6 +623,13 @@ class XMLCache(object):
            x.replace(u'\0', '')
            return x

+        def record_set(k, v):
+            try:
+                record.set(k, clean(v))
+            except:
+                # v is not suitable for XML, ignore
+                pass
+
        if not getattr(book, '_new_book', False): # book is not new
            if record.get('tz', None) is not None:
                use_tz_var = True
@ -639,22 +650,25 @@ class XMLCache(object):
                debug_print("Use GMT TZ for new book", book.lpath)
            date = strftime(timestamp, zone=tz)
            record.set('date', clean(date))
+        try:
            record.set('size', clean(str(os.stat(path).st_size)))
+        except:
+            record.set('size', '0')
        title = book.title if book.title else _('Unknown')
-        record.set('title', clean(title))
+        record_set('title', title)
        ts = book.title_sort
        if not ts:
            ts = title_sort(title)
-        record.set('titleSorter', clean(ts))
+        record_set('titleSorter', ts)
        if self.use_author_sort:
            if book.author_sort:
                aus = book.author_sort
            else:
                debug_print('Author_sort is None for book', book.lpath)
                aus = authors_to_sort_string(book.authors)
-            record.set('author', clean(aus))
+            record_set('author', aus)
        else:
-            record.set('author', clean(authors_to_string(book.authors)))
+            record_set('author', authors_to_string(book.authors))
        ext = os.path.splitext(path)[1]
        if ext:
            ext = ext[1:].lower()
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -32,7 +32,7 @@ def detect(aBuf):
 ENCODING_PATS = [
                 re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
                            re.IGNORECASE),
-                 re.compile(r'''<meta\s+?[^<>]+?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
+                 re.compile(r'''<meta\s+?[^<>]*?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
                            re.IGNORECASE)
                 ]
 ENTITY_PATTERN = re.compile(r'&(\S+?);')
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -144,7 +144,10 @@ class DocAnalysis(object):

        # Normalize the histogram into percents
        totalLines = len(self.lines)
+        if totalLines > 0:
            h = [ float(count)/totalLines for count in hRaw ]
+        else:
+            h = []
        #print "\nhRaw histogram lengths are: "+str(hRaw)
        #print "              percents are: "+str(h)+"\n"

--- a/src/calibre/ebooks/html/meta.py
+++ b/src/calibre/ebooks/html/meta.py
@ -0,0 +1,33 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
+
+class EasyMeta(object):
+
+    def __init__(self, meta):
+        self.meta = meta
+
+    def __iter__(self):
+        meta = self.meta
+        for item_name in meta.items:
+            for item in meta[item_name]:
+                if namespace(item.term) == DC11_NS:
+                    yield { 'name': barename(item.term), 'value': item.value }
+
+    def __len__(self):
+        count = 0
+        for item in self:
+            count = count+1
+        return count
+
+    def titles(self):
+        for item in self.meta['title']:
+            yield item.value
+
+    def creators(self):
+        for item in self.meta['creator']:
+            yield item.value
--- a/src/calibre/ebooks/html/output.py
+++ b/src/calibre/ebooks/html/output.py
@ -0,0 +1,209 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
+__docformat__ = 'restructuredtext en'
+
+import os, re, shutil
+
+from calibre.utils import zipfile
+
+from os.path import dirname, abspath, relpath, exists, basename
+
+from lxml import etree
+from templite import Templite
+
+from calibre.ebooks.oeb.base import element
+from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
+from calibre import CurrentDir
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.zipfile import ZipFile
+
+from urllib import unquote
+
+from calibre.ebooks.html.meta import EasyMeta
+
+class HTMLOutput(OutputFormatPlugin):
+
+    name = 'HTML Output'
+    author = 'Fabian Grassl'
+    file_type = 'zip'
+
+    options = set([
+        OptionRecommendation(name='template_css',
+            help=_('CSS file used for the output instead of the default file')),
+
+        OptionRecommendation(name='template_html_index',
+            help=_('Template used for generation of the html index file instead of the default file')),
+
+        OptionRecommendation(name='template_html',
+            help=_('Template used for the generation of the html contents of the book instead of the default file')),
+
+        OptionRecommendation(name='extract_to',
+            help=_('Extract the contents of the generated ZIP file to the '
+                'specified directory. WARNING: The contents of the directory '
+                'will be deleted.')
+        ),
+    ])
+
+    recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
+
+    def generate_toc(self, oeb_book, ref_url, output_dir):
+        '''
+        Generate table of contents
+        '''
+        with CurrentDir(output_dir):
+            def build_node(current_node, parent=None):
+                if parent is None:
+                    parent = etree.Element('ul')
+                elif len(current_node.nodes):
+                    parent = element(parent, ('ul'))
+                for node in current_node.nodes:
+                    point = element(parent, 'li')
+                    href = relpath(abspath(unquote(node.href)), dirname(ref_url))
+                    link = element(point, 'a', href=href)
+                    title = node.title
+                    if title:
+                        title = re.sub(r'\s+', ' ', title)
+                    link.text=title
+                    build_node(node, point)
+                return parent
+            wrap = etree.Element('div')
+            wrap.append(build_node(oeb_book.toc))
+            return wrap
+
+    def generate_html_toc(self, oeb_book, ref_url, output_dir):
+        root = self.generate_toc(oeb_book, ref_url, output_dir)
+        return etree.tostring(root, pretty_print=True, encoding='utf-8',
+                xml_declaration=False)
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+
+        # read template files
+        if opts.template_html_index is not None:
+            template_html_index_data = open(opts.template_html_index, 'rb').read()
+        else:
+            template_html_index_data = P('templates/html_export_default_index.tmpl', data=True)
+
+        if opts.template_html is not None:
+            template_html_data = open(opts.template_html, 'rb').read()
+        else:
+            template_html_data = P('templates/html_export_default.tmpl', data=True)
+
+        if opts.template_css is not None:
+            template_css_data = open(opts.template_css, 'rb').read()
+        else:
+            template_css_data = P('templates/html_export_default.css', data=True)
+
+        template_html_index_data = template_html_index_data.decode('utf-8')
+        template_html_data = template_html_data.decode('utf-8')
+        template_css_data = template_css_data.decode('utf-8')
+
+        self.log  = log
+        self.opts = opts
+        meta = EasyMeta(oeb_book.metadata)
+
+        tempdir = os.path.realpath(PersistentTemporaryDirectory())
+        output_file = os.path.join(tempdir,
+                basename(re.sub(r'\.zip', '', output_path)+'.html'))
+        output_dir = re.sub(r'\.html', '', output_file)+'_files'
+
+        if not exists(output_dir):
+            os.makedirs(output_dir)
+
+        css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
+        with open(css_path, 'wb') as f:
+            f.write(template_css_data.encode('utf-8'))
+
+        with open(output_file, 'wb') as f:
+            html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
+            templite = Templite(template_html_index_data)
+            nextLink = oeb_book.spine[0].href
+            nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
+            cssLink = relpath(abspath(css_path), dirname(output_file))
+            tocUrl = relpath(output_file, dirname(output_file))
+            t = templite.render(has_toc=bool(oeb_book.toc.count()),
+                    toc=html_toc, meta=meta, nextLink=nextLink,
+                    tocUrl=tocUrl, cssLink=cssLink,
+                    firstContentPageLink=nextLink)
+            f.write(t)
+
+        with CurrentDir(output_dir):
+            for item in oeb_book.manifest:
+                path = abspath(unquote(item.href))
+                dir = dirname(path)
+                if not exists(dir):
+                    os.makedirs(dir)
+                if item.spine_position is not None:
+                    with open(path, 'wb') as f:
+                        pass
+                else:
+                    with open(path, 'wb') as f:
+                        f.write(str(item))
+                    item.unload_data_from_memory(memory=path)
+
+            for item in oeb_book.spine:
+                path = abspath(unquote(item.href))
+                dir = dirname(path)
+                root = item.data.getroottree()
+
+                # get & clean HTML <HEAD>-data
+                head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
+                head_content = etree.tostring(head, pretty_print=True, encoding='utf-8')
+                head_content = re.sub(r'\<\/?head.*\>', '', head_content)
+                head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
+                head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2></\1>', head_content)
+
+                # get & clean HTML <BODY>-data
+                body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
+                ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8')
+                ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
+                ebook_content = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ebook_content)
+
+                # generate link to next page
+                if item.spine_position+1 < len(oeb_book.spine):
+                    nextLink = oeb_book.spine[item.spine_position+1].href
+                    nextLink = relpath(abspath(nextLink), dir)
+                else:
+                    nextLink = None
+
+                # generate link to previous page
+                if item.spine_position > 0:
+                    prevLink = oeb_book.spine[item.spine_position-1].href
+                    prevLink = relpath(abspath(prevLink), dir)
+                else:
+                    prevLink = None
+
+                cssLink = relpath(abspath(css_path), dir)
+                tocUrl = relpath(output_file, dir)
+                firstContentPageLink = oeb_book.spine[0].href
+
+                # render template
+                templite = Templite(template_html_data)
+                toc = lambda: self.generate_html_toc(oeb_book, path, output_dir)
+                t = templite.render(ebookContent=ebook_content,
+                        prevLink=prevLink, nextLink=nextLink,
+                        has_toc=bool(oeb_book.toc.count()), toc=toc,
+                        tocUrl=tocUrl, head_content=head_content,
+                        meta=meta, cssLink=cssLink,
+                        firstContentPageLink=firstContentPageLink)
+
+                # write html to file
+                with open(path, 'wb') as f:
+                    f.write(t)
+                item.unload_data_from_memory(memory=path)
+
+        zfile = ZipFile(output_path, "w")
+        zfile.add_dir(output_dir, basename(output_dir))
+        zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
+
+        if opts.extract_to:
+            if os.path.exists(opts.extract_to):
+                shutil.rmtree(opts.extract_to)
+            os.makedirs(opts.extract_to)
+            zfile.extractall(opts.extract_to)
+            self.log('Zip file extracted to', opts.extract_to)
+
+        zfile.close()
+
+        # cleanup temp dir
+        shutil.rmtree(tempdir)
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -9,11 +9,13 @@ Fetch metadata using Amazon AWS
 import sys, re

 from lxml import html
+from lxml.html import soupparser

 from calibre import browser
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
+from calibre.library.comments import sanitize_comments_html

 def find_asin(br, isbn):
    q = 'http://www.amazon.com/s?field-keywords='+isbn
@ -47,13 +49,12 @@ def get_social_metadata(title, authors, publisher, isbn):
        return mi
    br = browser()
    asin = to_asin(br, isbn)
-    if asin:
-        if get_metadata(br, asin, mi):
+    if asin and get_metadata(br, asin, mi):
        return mi
    from calibre.ebooks.metadata.xisbn import xisbn
    for i in xisbn.get_associated_isbns(isbn):
        asin = to_asin(br, i)
-        if get_metadata(br, asin, mi):
+        if asin and get_metadata(br, asin, mi):
            return mi
    return mi

@ -70,7 +71,10 @@ def get_metadata(br, asin, mi):
        return False
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
-    root = html.fromstring(raw)
+    try:
+        root = soupparser.fromstring(raw)
+    except:
+        return False
    ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
    if ratings:
        pat = re.compile(r'([0-9.]+) out of (\d+) stars')
@ -95,13 +99,13 @@ def get_metadata(br, asin, mi):
        # remove all attributes from tags
        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
        # Collapse whitespace
-        desc = re.sub('\n+', '\n', desc)
-        desc = re.sub(' +', ' ', desc)
+        #desc = re.sub('\n+', '\n', desc)
+        #desc = re.sub(' +', ' ', desc)
        # Remove the notice about text referring to out of print editions
        desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
        # Remove comments
        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
-        mi.comments = desc
+        mi.comments = sanitize_comments_html(desc)

    return True

@ -112,7 +116,7 @@ def main(args=sys.argv):
    print

    # Test sophisticated comment formatting
-    print get_social_metadata('Swan Thieves', None, None, '9780316065795')
+    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
    print

    # Random tests
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -109,9 +109,11 @@ class OCFZipReader(OCFReader):
            raise EPubException("not a ZIP .epub OCF container")
        self.root = root
        if self.root is None:
+            name = getattr(stream, 'name', False)
+            if name:
+                self.root = os.path.abspath(os.path.dirname(name))
+            else:
                self.root = os.getcwdu()
-            if hasattr(stream, 'name'):
-                self.root = os.path.abspath(os.path.dirname(stream.name))
        super(OCFZipReader, self).__init__()

    def open(self, name, mode='r'):
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -12,6 +12,7 @@ from calibre.utils.logging import default_log
 from calibre.utils.titlecase import titlecase
 from calibre.customize import Plugin
 from calibre.ebooks.metadata.covers import check_for_cover
+from calibre.utils.html2text import html2text

 metadata_config = None

@ -48,6 +49,11 @@ class MetadataSource(Plugin): # {{{
    #: member.
    string_customization_help = None

+    #: Set this to true if your plugin returns HTML markup in comments.
+    #: Then if the user disables HTML, calibre will automagically convert
+    #: the HTML to Markdown.
+    has_html_comments = False
+
    type = _('Metadata download')

    def __call__(self, title, author, publisher, isbn, verbose, log=None,
@ -79,6 +85,13 @@ class MetadataSource(Plugin): # {{{
                        mi.comments = None
                    if not c.get('tags', True):
                        mi.tags = []
+                    if self.has_html_comments and mi.comments and \
+                            c.get('textcomments', False):
+                        try:
+                            mi.comments = html2text(mi.comments)
+                        except:
+                            traceback.print_exc()
+                            mi.comments = None

        except Exception, e:
            self.exception = e
@ -132,11 +145,17 @@ class MetadataSource(Plugin): # {{{
            setattr(w, '_'+x, cb)
            cb.setChecked(c.get(x, True))
            w._layout.addWidget(cb)
+
+        cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
+        setattr(w, '_textcomments', cb)
+        cb.setChecked(c.get('textcomments', False))
+        w._layout.addWidget(cb)
+
        return w

    def save_settings(self, w):
        dl_settings = {}
-        for x in ('rating', 'tags', 'comments'):
+        for x in ('rating', 'tags', 'comments', 'textcomments'):
            dl_settings[x] = getattr(w, '_'+x).isChecked()
        c = self.config_store()
        c.set(self.name, dl_settings)
@ -210,6 +229,8 @@ class Amazon(MetadataSource): # {{{
    metadata_type = 'social'
    description = _('Downloads social metadata from amazon.com')

+    has_html_comments = True
+
    def fetch(self):
        if not self.isbn:
            return
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -182,6 +182,7 @@ class TOC(list):
            except:
                play_order = 1
            href = fragment = text = None
+            nd = dest
            nl = nl_path(np)
            if nl:
                nl = nl[0]
@ -190,13 +191,10 @@ class TOC(list):
                    text += etree.tostring(txt, method='text',
                            encoding=unicode, with_tail=False)
                content = content_path(np)
-                if not content or not text:
-                    return
+                if content and text:
                    content = content[0]
                    src = get_attr(content, attr='src')
-                if src is None:
-                    return
-
+                    if src:
                        purl = urlparse(unquote(content.get('src')))
                        href, fragment = purl[2], purl[5]
                        nd = dest.add_item(href, fragment, text)
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -275,6 +275,14 @@ class MobiMLizer(object):
        # <mbp:frame-set/> does not exist lalalala
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
+            id_ = elem.get('id', None)
+            if id_:
+                # Keep anchors so people can use display:none
+                # to generate hidden TOCs
+                elem.clear()
+                elem.text = None
+                elem.set('id', id_)
+            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
@ -355,11 +363,15 @@ class MobiMLizer(object):
                    if value == getattr(self.profile, prop):
                        result = '100%'
                    else:
+                        # Amazon's renderer does not support
+                        # img sizes in units other than px
+                        # See #7520 for test case
                        try:
-                            ems = int(round(float(value) / self.profile.fbase))
+                            pixs = int(round(float(value) / \
+                                (72./self.profile.dpi)))
                        except:
                            continue
-                        result = "%dem" % ems
+                        result = "%d"%pixs
                    istate.attrib[prop] = result
        elif tag == 'hr' and asfloat(style['width']) > 0:
            prop = style['width'] / self.profile.width
@ -378,6 +390,15 @@ class MobiMLizer(object):
            for attr in ('rowspan', 'colspan','width','border','scope'):
                if attr in elem.attrib:
                    istate.attrib[attr] = elem.attrib[attr]
+        if tag == 'q':
+            t = elem.text
+            if not t:
+                t = ''
+            elem.text = u'\u201c' + t
+            t = elem.tail
+            if not t:
+                t = ''
+            elem.tail = u'\u201d' + t
        text = None
        if elem.text:
            if istate.preserve:
@ -406,6 +427,12 @@ class MobiMLizer(object):
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
+                # Add anchors
+                for child in vbstate.body:
+                    if child is not vbstate.para:
+                        vtag.append(child)
+                    else:
+                        break
                for child in vbstate.para:
                    vtag.append(child)
                return
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -221,7 +221,10 @@ class MetadataHeader(BookHeader):
        else:
            end = self.section_offset(number + 1)
        self.stream.seek(start)
+        try:
            return self.stream.read(end - start)
+        except OverflowError:
+            return self.stream.read(os.stat(self.stream.name).st_size - start)


 class MobiReader(object):
@ -398,6 +401,8 @@ class MobiReader(object):
            elem.getparent().remove(elem)
        fname = self.name.encode('ascii', 'replace')
        fname = re.sub(r'[\x08\x15\0]+', '', fname)
+        if not fname:
+            fname = 'dummy'
        htmlfile = os.path.join(output_dir,
            ascii_filename(fname) + '.html')
        try:
@ -564,6 +569,10 @@ class MobiReader(object):
                for attr in self.IMAGE_ATTRS:
                    recindex = attrib.pop(attr, None) or recindex
                if recindex is not None:
+                    try:
+                        recindex = '%05d'%int(recindex)
+                    except:
+                        pass
                    attrib['src'] = 'images/%s.jpg' % recindex
                for attr in ('width', 'height'):
                    if attr in attrib:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -787,6 +787,8 @@ class Manifest(object):
            # Convert to Unicode and normalize line endings
            data = self.oeb.decode(data)
            data = self.oeb.html_preprocessor(data)
+            # There could be null bytes in data if it had &#0; entities in it
+            data = data.replace('\0', '')

            # Remove DOCTYPE declaration as it messes up parsing
            # In particular, it causes tostring to insert xmlns
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -49,5 +49,3 @@ class OEBOutput(OutputFormatPlugin):
                with open(path, 'wb') as f:
                    f.write(str(item))
                item.unload_data_from_memory(memory=path)
-
-
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@ -143,11 +143,17 @@ def render_jacket(mi, output_profile,
    if comments:
        comments = comments_to_html(comments)

+    try:
+        author = mi.format_authors()
+    except:
+        author = ''
+
    def generate_html(comments):
        args = dict(xmlns=XHTML_NS,
                    title_str=title_str,
                    css=css,
                    title=title,
+                    author=author,
                    pubdate_label=_('Published'), pubdate=pubdate,
                    series_label=_('Series'), series=series,
                    rating_label=_('Rating'), rating=rating,
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -133,7 +133,11 @@ class DetectStructure(object):


    def elem_to_link(self, item, elem, counter):
-        text = xml2text(elem)
+        text = xml2text(elem).strip()
+        if not text:
+            text = elem.get('title', '')
+        if not text:
+            text = elem.get('alt', '')
        text = text[:100].strip()
        id = elem.get('id', 'calibre_toc_%d'%counter)
        elem.set('id', id)
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -223,7 +223,6 @@ class MessageBox(QMessageBox):
        if default_button is not None:
            self.setDefaultButton(default_button)

-
    def copy_to_clipboard(self):
        QApplication.clipboard().setText('%s: %s\n\n%s' %
                (self.title, self.msg, self.det_msg))
@ -715,13 +714,13 @@ def build_forms(srcdir, info=None):
            dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
            dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
            dat = pat.sub(sub, dat)
+            dat = dat.replace('from QtWebKit.QWebView import QWebView',
+                    'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')

            if form.endswith('viewer%smain.ui'%os.sep):
                info('\t\tPromoting WebView')
                dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
                dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
-                dat = dat.replace('from QtWebKit.QWebView import QWebView',
-                        'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
                dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'

            open(compiled_form, 'wb').write(dat)
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -192,14 +192,15 @@ class EditMetadataAction(InterfaceAction):
                        _('At least two books must be selected for merging'),
                        show=True)
        dest_id, src_books, src_ids = self.books_to_merge(rows)
+        title = self.gui.library_view.model().db.title(dest_id, index_is_id=True)
        if safe_merge:
            if not confirm('<p>'+_(
                'Book formats and metadata from the selected books '
-                'will be added to the <b>first selected book.</b> '
+                'will be added to the <b>first selected book</b> (%s). '
                'ISBN will <i>not</i> be merged.<br><br> '
                'The second and subsequently selected books will not '
                'be deleted or changed.<br><br>'
-                'Please confirm you want to proceed.')
+                'Please confirm you want to proceed.')%title
            +'</p>', 'merge_books_safe', self.gui):
                return
            self.add_formats(dest_id, src_books)
@ -207,14 +208,14 @@ class EditMetadataAction(InterfaceAction):
        else:
            if not confirm('<p>'+_(
                'Book formats and metadata from the selected books will be merged '
-                'into the <b>first selected book</b>. '
+                'into the <b>first selected book</b> (%s). '
                'ISBN will <i>not</i> be merged.<br><br>'
                'After merger the second and '
                'subsequently selected books will be <b>deleted</b>. <br><br>'
                'All book formats of the first selected book will be kept '
                'and any duplicate formats in the second and subsequently selected books '
                'will be permanently <b>deleted</b> from your computer.<br><br>  '
-                'Are you <b>sure</b> you want to proceed?')
+                'Are you <b>sure</b> you want to proceed?')%title
            +'</p>', 'merge_books', self.gui):
                return
            if len(rows)>5:
@ -233,6 +234,7 @@ class EditMetadataAction(InterfaceAction):
            ci = self.gui.library_view.model().index(dest_row, 0)
            if ci.isValid():
                self.gui.library_view.setCurrentIndex(ci)
+                self.gui.library_view.model().current_changed(ci, ci)

    def add_formats(self, dest_id, src_books, replace=False):
        for src_book in src_books:
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -35,7 +35,6 @@ class ViewAction(InterfaceAction):
        self.qaction.setMenu(self.view_menu)
        ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)

-
    def location_selected(self, loc):
        enabled = loc == 'library'
        for action in list(self.view_menu.actions())[1:]:
@ -134,6 +133,9 @@ class ViewAction(InterfaceAction):
        rows = self.gui.current_view().selectionModel().selectedRows()
        self._view_books(rows)

+    def view_triggered(self, index):
+        self._view_books([index])
+
    def view_specific_book(self, index):
        self._view_books([index])

--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -5,11 +5,13 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, collections
+import os, collections, sys
+from Queue import Queue

-from PyQt4.Qt import QLabel, QPixmap, QSize, QWidget, Qt, pyqtSignal, \
-    QVBoxLayout, QScrollArea, QPropertyAnimation, QEasingCurve, \
-    QSizePolicy, QPainter, QRect, pyqtProperty
+from PyQt4.Qt import QPixmap, QSize, QWidget, Qt, pyqtSignal, \
+    QPropertyAnimation, QEasingCurve, QThread, QApplication, QFontInfo, \
+    QSizePolicy, QPainter, QRect, pyqtProperty, QLayout, QPalette
+from PyQt4.QtWebKit import QWebView

 from calibre import fit_image, prepare_string_for_xml
 from calibre.gui2.widgets import IMAGE_EXTENSIONS
@ -67,10 +69,7 @@ class CoverView(QWidget): # {{{

    def __init__(self, vertical, parent=None):
        QWidget.__init__(self, parent)
-        self.setMaximumSize(QSize(120, 120))
-        self.setMinimumSize(QSize(120 if vertical else 20, 120 if vertical else
-            20))
-        self._current_pixmap_size = self.maximumSize()
+        self._current_pixmap_size = QSize(120, 120)
        self.vertical = vertical

        self.animation = QPropertyAnimation(self, 'current_pixmap_size', self)
@ -79,8 +78,9 @@ class CoverView(QWidget): # {{{
        self.animation.setStartValue(QSize(0, 0))
        self.animation.valueChanged.connect(self.value_changed)

-        self.setSizePolicy(QSizePolicy.Expanding if vertical else
-                QSizePolicy.Minimum, QSizePolicy.Expanding)
+        self.setSizePolicy(
+                QSizePolicy.Expanding if vertical else QSizePolicy.Minimum,
+                QSizePolicy.Expanding)

        self.default_pixmap = QPixmap(I('book.png'))
        self.pixmap = self.default_pixmap
@ -109,20 +109,6 @@ class CoverView(QWidget): # {{{
        self.current_pixmap_size = QSize(self.pwidth, self.pheight)
        self.animation.setEndValue(self.current_pixmap_size)

-    def relayout(self, parent_size):
-        if self.vertical:
-            self.setMaximumSize(parent_size.width(),
-                min(int(parent_size.height()/2.),int(4/3. * parent_size.width())+1))
-        else:
-            self.setMaximumSize(1+int(3/4. * parent_size.height()),
-                    parent_size.height())
-        self.resize(self.maximumSize())
-        self.animation.stop()
-        self.do_layout()
-
-    def sizeHint(self):
-        return self.maximumSize()
-
    def show_data(self, data):
        self.animation.stop()
        same_item = data.get('id', True) == self.data.get('id', False)
@ -165,70 +151,187 @@ class CoverView(QWidget): # {{{
    # }}}

 # Book Info {{{
-class Label(QLabel):

-    mr = pyqtSignal(object)
+class RenderComments(QThread):
+
+    rdone = pyqtSignal(object, object)
+
+    def __init__(self, parent):
+        QThread.__init__(self, parent)
+        self.queue = Queue()
+        self.start()
+
+    def run(self):
+        while True:
+            try:
+                rows, comments = self.queue.get()
+            except:
+                break
+            import time
+            time.sleep(0.001)
+            oint = sys.getcheckinterval()
+            sys.setcheckinterval(5)
+            try:
+                self.rdone.emit(rows, comments_to_html(comments))
+            except:
+                pass
+            sys.setcheckinterval(oint)
+
+
+class BookInfo(QWebView):
+
    link_clicked = pyqtSignal(object)

-    def __init__(self):
-        QLabel.__init__(self)
-        self.setTextFormat(Qt.RichText)
-        self.setText('')
-        self.setWordWrap(True)
-        self.setAlignment(Qt.AlignTop)
-        self.linkActivated.connect(self.link_activated)
+    def __init__(self, vertical, parent=None):
+        QWebView.__init__(self, parent)
+        self.vertical = vertical
+        self.renderer = RenderComments(self)
+        self.renderer.rdone.connect(self._show_data, type=Qt.QueuedConnection)
+        self.page().setLinkDelegationPolicy(self.page().DelegateAllLinks)
+        self.linkClicked.connect(self.link_activated)
        self._link_clicked = False
-        self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)

    def link_activated(self, link):
        self._link_clicked = True
-        link = unicode(link)
+        link = unicode(link.toString())
        self.link_clicked.emit(link)

-    def mouseReleaseEvent(self, ev):
-        QLabel.mouseReleaseEvent(self, ev)
-        if not self._link_clicked:
-            self.mr.emit(ev)
-        self._link_clicked = False
-
-class BookInfo(QScrollArea):
-
-    def __init__(self, vertical, parent=None):
-        QScrollArea.__init__(self, parent)
-        self.vertical = vertical
-        self.setWidgetResizable(True)
-        self.label = Label()
-        self.setWidget(self.label)
-        self.link_clicked = self.label.link_clicked
-        self.mr = self.label.mr
-        self.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
+    def turnoff_scrollbar(self, *args):
+        self.page().mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)

    def show_data(self, data):
-        self.label.setText('')
        rows = render_rows(data)
        rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
            k, t in rows])
-        comments = ''
-        if data.get(_('Comments'), '') not in ('', u'None'):
-            comments = data[_('Comments')]
-            comments = comments_to_html(comments)
+        comments = data.get(_('Comments'), '')
+        if comments and comments != u'None':
+            self.renderer.queue.put((rows, comments))
+        self._show_data(rows, '')
+
+
+    def _show_data(self, rows, comments):
+        f = QFontInfo(QApplication.font(self.parent())).pixelSize()
+        p = unicode(QApplication.palette().color(QPalette.Normal,
+            QPalette.Window).name())
+        c = unicode(QApplication.palette().color(QPalette.Normal,
+                        QPalette.WindowText).name())
+        templ = u'''\
+        <html>
+            <head>
+            <style type="text/css">
+                body, td {background-color: %s; font-size: %dpx; color: %s }
+                a { text-decoration: none; color: blue }
+            </style>
+            </head>
+            <body>
+            %%s
+            </body>
+        <html>
+        '''%(p, f, c)
        if self.vertical:
            if comments:
                rows += u'<tr><td colspan="2">%s</td></tr>'%comments
-            self.label.setText(u'<table>%s</table>'%rows)
+            self.setHtml(templ%(u'<table>%s</table>'%rows))
        else:
            left_pane = u'<table>%s</table>'%rows
            right_pane = u'<div>%s</div>'%comments
-            self.label.setText(u'<table><tr><td valign="top" '
+            self.setHtml(templ%(u'<table><tr><td valign="top" '
                    'style="padding-right:2em">%s</td><td valign="top">%s</td></tr></table>'
-                    % (left_pane, right_pane))
+                    % (left_pane, right_pane)))

+    def mouseDoubleClickEvent(self, ev):
+        ev.ignore()
+
+# }}}
+
+class DetailsLayout(QLayout): # {{{
+
+    def __init__(self, vertical, parent):
+        QLayout.__init__(self, parent)
+        self.vertical = vertical
+        self._children = []
+
+        self.min_size = QSize(190, 200) if vertical else QSize(120, 120)
+        self.setContentsMargins(0, 0, 0, 0)
+
+    def minimumSize(self):
+        return QSize(self.min_size)
+
+    def addItem(self, child):
+        if len(self._children) > 2:
+            raise ValueError('This layout can only manage two children')
+        self._children.append(child)
+
+    def itemAt(self, i):
+        try:
+            return self._children[i]
+        except:
+            pass
+        return None
+
+    def takeAt(self, i):
+        try:
+            self._children.pop(i)
+        except:
+            pass
+        return None
+
+    def count(self):
+        return len(self._children)
+
+    def sizeHint(self):
+        return QSize(self.min_size)
+
+    def setGeometry(self, r):
+        QLayout.setGeometry(self, r)
+        self.do_layout(r)
+
+    def cover_height(self, r):
+        mh = min(int(r.height()/2.), int(4/3. * r.width())+1)
+        try:
+            ph = self._children[0].widget().pixmap.height()
+        except:
+            ph = 0
+        if ph > 0:
+            mh = min(mh, ph)
+        return mh
+
+    def cover_width(self, r):
+        mw = 1 + int(3/4. * r.height())
+        try:
+            pw = self._children[0].widget().pixmap.width()
+        except:
+            pw = 0
+        if pw > 0:
+            mw = min(mw, pw)
+        return mw
+
+
+    def do_layout(self, rect):
+        if len(self._children) != 2:
+            return
+        left, top, right, bottom = self.getContentsMargins()
+        r = rect.adjusted(+left, +top, -right, -bottom)
+        x = r.x()
+        y = r.y()
+        cover, details = self._children
+        if self.vertical:
+            ch = self.cover_height(r)
+            cover.setGeometry(QRect(x, y, r.width(), ch))
+            cover.widget().do_layout()
+            y += ch + 5
+            details.setGeometry(QRect(x, y, r.width(), r.height()-ch-5))
+        else:
+            cw = self.cover_width(r)
+            cover.setGeometry(QRect(x, y, cw, r.height()))
+            cover.widget().do_layout()
+            x += cw + 5
+            details.setGeometry(QRect(x, y, r.width() - cw - 5, r.height()))

 # }}}

 class BookDetails(QWidget): # {{{

-    resized = pyqtSignal(object)
    show_book_info = pyqtSignal()
    open_containing_folder = pyqtSignal(int)
    view_specific_format = pyqtSignal(int, object)
@ -269,23 +372,14 @@ class BookDetails(QWidget): # {{{
    def __init__(self, vertical, parent=None):
        QWidget.__init__(self, parent)
        self.setAcceptDrops(True)
-        self._layout = QVBoxLayout()
-        if not vertical:
-            self._layout.setDirection(self._layout.LeftToRight)
+        self._layout = DetailsLayout(vertical, self)
        self.setLayout(self._layout)

        self.cover_view = CoverView(vertical, self)
-        self.cover_view.relayout(self.size())
-        self.resized.connect(self.cover_view.relayout, type=Qt.QueuedConnection)
        self._layout.addWidget(self.cover_view)
        self.book_info = BookInfo(vertical, self)
        self._layout.addWidget(self.book_info)
        self.book_info.link_clicked.connect(self._link_clicked)
-        self.book_info.mr.connect(self.mouseReleaseEvent)
-        if vertical:
-            self.setMinimumSize(QSize(190, 200))
-        else:
-            self.setMinimumSize(120, 120)
        self.setCursor(Qt.PointingHandCursor)

    def _link_clicked(self, link):
@ -299,17 +393,15 @@ class BookDetails(QWidget): # {{{
            open_local_file(val)


-    def mouseReleaseEvent(self, ev):
+    def mouseDoubleClickEvent(self, ev):
        ev.accept()
        self.show_book_info.emit()

-    def resizeEvent(self, ev):
-        self.resized.emit(self.size())
-
    def show_data(self, data):
-        self.cover_view.show_data(data)
        self.book_info.show_data(data)
-        self.setToolTip('<p>'+_('Click to open Book Details window') +
+        self.cover_view.show_data(data)
+        self._layout.do_layout(self.rect())
+        self.setToolTip('<p>'+_('Double-click to open Book Details window') +
                '<br><br>' + _('Path') + ': ' + data.get(_('Path'), ''))

    def reset_info(self):
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -23,7 +23,9 @@ class PluginWidget(QWidget,Ui_Form):
                     ('generate_recently_added', True),
                     ('note_tag','*'),
                     ('numbers_as_text', False),
-                     ('read_tag','+')]
+                     ('read_tag','+'),
+                     ('wishlist_tag','Wishlist'),
+                     ]


    # Output synced to the connected device?
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@ -42,28 +42,28 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="0">
+   <item row="3" column="0">
    <widget class="QLabel" name="label_4">
     <property name="text">
      <string>Additional note tag prefix:</string>
     </property>
    </widget>
   </item>
-   <item row="2" column="1">
+   <item row="3" column="1">
    <widget class="QLineEdit" name="note_tag">
     <property name="toolTip">
      <string extracomment="Default: *"/>
     </property>
    </widget>
   </item>
-   <item row="4" column="1">
+   <item row="5" column="1">
    <widget class="QLineEdit" name="exclude_genre">
     <property name="toolTip">
      <string extracomment="Default: \[[\w]*\]"/>
     </property>
    </widget>
   </item>
-   <item row="4" column="0">
+   <item row="5" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>Regex pattern describing tags to exclude as genres:</string>
@ -76,7 +76,7 @@
     </property>
    </widget>
   </item>
-   <item row="5" column="1">
+   <item row="6" column="1">
    <widget class="QLabel" name="label_6">
     <property name="text">
      <string>Regex tips:
@ -88,7 +88,7 @@
     </property>
    </widget>
   </item>
-   <item row="6" column="0">
+   <item row="7" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -101,34 +101,44 @@
     </property>
    </spacer>
   </item>
-   <item row="8" column="0">
+   <item row="9" column="0">
    <widget class="QCheckBox" name="generate_titles">
     <property name="text">
      <string>Include 'Titles' Section</string>
     </property>
    </widget>
   </item>
-   <item row="10" column="0">
+   <item row="11" column="0">
    <widget class="QCheckBox" name="generate_recently_added">
     <property name="text">
      <string>Include 'Recently Added' Section</string>
     </property>
    </widget>
   </item>
-   <item row="11" column="0">
+   <item row="12" column="0">
    <widget class="QCheckBox" name="numbers_as_text">
     <property name="text">
      <string>Sort numbers as text</string>
     </property>
    </widget>
   </item>
-   <item row="9" column="0">
+   <item row="10" column="0">
    <widget class="QCheckBox" name="generate_series">
     <property name="text">
      <string>Include 'Series' Section</string>
     </property>
    </widget>
   </item>
+   <item row="2" column="1">
+    <widget class="QLineEdit" name="wishlist_tag"/>
+   </item>
+   <item row="2" column="0">
+    <widget class="QLabel" name="label_5">
+     <property name="text">
+      <string>Wishlist tag:</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/comments_editor.py
+++ b/src/calibre/gui2/comments_editor.py
--- a/src/calibre/gui2/convert/bulk.py
+++ b/src/calibre/gui2/convert/bulk.py
@ -47,6 +47,8 @@ class BulkConfig(Config):
                self.show_pane)
        self.connect(self.groups, SIGNAL('entered(QModelIndex)'),
                self.show_group_help)
+        rb = self.buttonBox.button(self.buttonBox.RestoreDefaults)
+        rb.setVisible(False)
        self.groups.setMouseTracking(True)


--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -28,6 +28,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, conne
    if log is None:
        log = Log()
    from calibre.library import db
+    from calibre.utils.config import prefs
+    prefs.refresh()
    db = db()
    db.catalog_plugin_on_device_temp_mapping = dbspec

--- a/Show More
+++ b/Show More