Merge from trunk

2025-08-11 09:13:57 -04:00 · 2010-11-26 16:46:56 +00:00 · 2010-11-26 16:46:56 +00:00 · fde541d817
commit fde541d817
parent 0ed382a28d c996a001a7
269 changed files with 40785 additions and 22072 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,176 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.
 - version: 0.7.29
  date: 2010-11-19
  new features:
    - title: "OSX binary build is now based on Qt 4.7. Also, the build is now Intel only and requires at least OS X 10.5.2. If you are on a PowerPC machine or an older OS X version, do not upgrade"
    - title: "Content server: Allow direct navigation to a set of books in the book list."
      tickets: [7453]
    - title: "OS X: When deleting books, put the files into the recycle bin instead of deleting them permanently"
    - title: "Add button to easy configure Hotmail as email relay. Also improve usability of easy config buttons"
    - title: "Kobo driver: Support Currently_Reading category"
    - title: "Catalog generation: Thumbnail caching, wishlist, improved description layout."
      tickets: [7376]
    - title: "Support for the Cybook Orizon"
  bug fixes:
    - title: "Fix restore to defaults in preferences incorrectly setting PDF unwrap factor to 0.0"
    - title: "PDF Input: Fix unwrapping of accented characters"
    - title: "Do not display dialogs asking for confirmation or showing conversion errors when calibre is minimized to system tray"
      tickets: [7549]
    - title: "calibre server: Fix regression that broke digest authentication when the calibre interface language was set to non English"
    - title: "EPUB Output: Do not raise an error for invalid embedded fonts in the input document."
      tickets: [7567]
    - title: "RTF Input: Improved conversion of tables, with support for border styles on table cells"
    - title: "E-book viewer: Fix regression that broke hyphenation. Also add more language patterns for hyphenation"
    - title: "SONY driver: Fix cover thumbnails being uploaded to wrong directory on windows"
    - title: "Fix UnicodeDecodeError when displaying a failed metadata fetch message"
      tickets: [7560]
    - title: "Bulk metadata edit: Speed up remove all tags operation"
    - title: "MOBI Output: Specify image sizes in pixels instead of em to accomodate Amazon's @#$%#@! MOBI renderer"
    - title: "Fix bug preventing customizing of builtin recipes if they are not ascii encoded"
    - title: "SONY XML cache: Handle case where XML db contains reference to a file that does not exist gracefully"
  improved recipes:
    - Al Jazeera 
    - The Moscow Times
    - GLobe and Mail
    - Washington Post
  new recipes:
    - title: "Hannoversche Allgemeine Zeitung"
      author: "Artemis"
    - title: "globes.co.il"
      author: "marbs"
    - title: "THN and RDS"
      author: "Nexus"
    - title: "pclab.pl"
      author: "ravcio"
    - title: "Now Toronto"
      author: "Starson17"
    - title: "Press releases of the German government and EU Commission"
      author: "malfi"
 - version: 0.7.28
  date: 2010-11-12
  new features:
    - title: "Update the version of the grahical toolkit (Qt 4.7.1) used in the calibre binary builds on windows and linux. This should result in a significant speed up for the calibre ebook viewer"
    - title: "Driver for Nook Color, Eken M001"
    - title: "Add a tweak to turn off double clicking to open viewer"
    - title: "Catalog generation: Add indication when a book has no formats"
      tickets: [7376]
    - title: "Advanced search dialog: Add a tab to allow searching particular metadata fields easily"
    - title: "Conversion pipeline: When using the Level x Table of Contents expressions, if a tag is empty but has a non-empty title attribute, use that instead of ignoring the tag"
  bug fixes:
    - title: "Comic metadata reader: Sort filenames aplhabetically when choosing an image for the cover"
      tickets: [7488]
    - title: "Bulk convert dialog: Hide useless restore defaults button."
      tickets: [7471]
    - title: "Conversion pipeline: Handle input documents that encode null bytes as HTML entities correctly"
      tickets: [7355]
    - title: "Fix some SONY readers not being detected on windows"
      tickets: [7413]
    - title: "MOBI Input: Fix images missing when converting MOBI news downloads created with Mobipocket reader"
      tickets: [7455]
    - title: "ODT Input: Handle hyperlinks to headings that have truncated destination specifiers correctly"
      tickets: [7506]
    - title: "Sony driver: Ignore invalid strings when updating XML database"
    - title: "Content Server: Add day to displayed date in /mobile book listing"
    - title: "MOBI Input: Do not generate filenames with only extensions if the MOBI file has no internal name"
      tickets: [7481]
    - title: "MOBI Input: Handle files that has the record sizes set incorrectly to a long integer"
      tickets: [7472]
    - title: "Fix not enough vertical space for text in the preferences dialog category listing"
    - title: "Remove 'sort' from Search and replace destination fields and add it to source fields. S&R is no longer marked experimental"
    - title: "Edit metadata dialog: Save dialog geometry on reject as well as on accept"
    - title: "E-book viewer: Fix clicking entries in TOC that point to the currently loaded flow not scrolling view to the top of the document"
    - title: "Fix bug in regex used to extract charset from <meta> tags"
    - title: "MOBI Output: Add support for the <q> tag"
  improved recipes:
    - Zeit Online
    - Gamespot Review
    - Ploitika
    - Pagina12
    - Irish Times
    - elektrolese
  new recipes:
    - title: "Handelsblatt and European Voice"
      author: "malfi"
    - title: "Polityka and Newsweek"
      author: "Mateusz Kielar"
    - title: "MarcTV"
      author: "Marc Toensings"
    - title: "Rolling Stone"
      author: "Darko Miletic"
    - title: "Vedomosti"
      author: "Nikolai Kotchetkov"
    - title: "Hola.com"
      author: "bmsleight"
    - title: "Dnevnik, Siol.net, MMC-RTV and Avto-magazon"
      author: "BlonG"
    - title: "SC Print Magazine"
      author: "Tony Maro"
    - title: "Diario Sport"
      author: "Jefferson Frantz"
 - version: 0.7.27
  date: 2010-11-05
@ -44,6 +214,7 @@
      tickets: [7356]
    - title: "News download: Workaround lack of thread safety in python mechanize, causing corrupted network packets (degrading network performance) on Ubuntu Maverick 64bit kernels"
      tickets: [7321]
    - title: "Convert comments to HTML for book details panel in separate thread to make scrolling through the book list faster when large comments are present"
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -12,8 +12,8 @@ p.title  {
 p.author {
 	margin-top:0em;
 	margin-bottom:0em;
-	text-align: left;
+	text-align: center;
-	text-indent: 1em;
+	text-indent: 0em;
 	font-size:large;
  	}
@ -27,17 +27,28 @@ p.author_index {
 	}
 p.tags {
-	margin-top:0em;
+	margin-top:0.5em;
 	margin-bottom:0em;
 	text-align: left;
-	text-indent: 1em;
+	text-indent: 0.0in;
 	font-size:small;
 	}
-p.description {
+p.formats {
-	text-align:left;
+	font-size:90%;
-	font-style:normal;
+	margin-top:0em;
-	margin-top: 0em;
+	margin-bottom:0.5em;
 	text-align: left;
 	text-indent: 0.0in;
 	}
 div.description > p:first-child {
 	margin: 0 0 0 0;
 	text-indent: 0em;
 	}
 div.description {
 	margin: 0 0 0 0;
 	text-indent: 1em;
 	}
 p.date_index {
@ -81,6 +92,14 @@ p.unread_book {
 	text-indent:-2em;
 	}
 p.wishlist_item {
 	text-align:left;
 	margin-top:0px;
 	margin-bottom:0px;
 	margin-left:2em;
 	text-indent:-2em;
 	}
 p.date_read {
 	text-align:left;
 	margin-top:0px;
@ -104,3 +123,14 @@ hr.annotations_divider {
 	margin-top:0em;
 	margin-bottom:0em;
 	}
 td.publisher, td.date {
 	font-weight:bold;
 	text-align:center;
 	}
 td.rating {
 	text-align: center;
 	}
 td.thumbnail img {
 	-webkit-box-shadow: 4px 4px 12px #999;
 	}
--- a/resources/content_server/browse/browse.css
+++ b/resources/content_server/browse/browse.css
@ -355,6 +355,25 @@ h2.library_name {
    color: red;
 }
 #booklist > #pagelist { display: none; }
 #goto_page_dialog ul {
    list-style-type: none;
    font-size: medium;
 }
 #goto_page_dialog li {
    margin-bottom: 1.5ex;
 }
 #goto_page_dialog a {
    text-decoration: none;
    color: blue;
 }
 #goto_page_dialog a:hover {
    color: red;
 }
 #booklist .left .ui-button-text {
    font-size: medium;
--- a/resources/content_server/browse/browse.html
+++ b/resources/content_server/browse/browse.html
@ -96,5 +96,6 @@
        </div>
    </div>
    <div id="book_details_dialog"></div>
    <div id="goto_page_dialog"></div>
 </body>
 </html>
--- a/resources/content_server/browse/browse.js
+++ b/resources/content_server/browse/browse.js
@ -202,6 +202,23 @@ function previous_page() {
    else last_page();
 }
 function gp_internal(id) {
    var gp = $('#goto_page_dialog');
    gp.dialog('close');
    var elem = $("#booklist #" + id);
    load_page(elem);
 }
 function goto_page() {
    var gp = $('#goto_page_dialog');
    var pl = $('#booklist > #pagelist');
    gp.html(pl.html());
    gp.dialog('option', 'title', pl.attr('title'));
    gp.dialog('option', 'height', $(window).height() - 100);
    gp.dialog('open');
 }
 function load_page(elem) {
    if (elem.is(":visible")) return;
    var ld = elem.find('.load_data');
@ -251,6 +268,12 @@ function booklist(hide_sort) {
        modal: true,
        show: 'slide'
    });
    $("#goto_page_dialog").dialog({
        autoOpen: false,
        modal: true,
        show: 'slide'
    });
    first_page(); 
 }
--- a/resources/content_server/read/monocle.js
+++ b/resources/content_server/read/monocle.js
@ -38,6 +38,7 @@ Monocle.Browser.on = {
  iPad: navigator.userAgent.indexOf("iPad") != -1,
  BlackBerry: navigator.userAgent.indexOf("BlackBerry") != -1,
  Android: navigator.userAgent.indexOf('Android') != -1,
  MacOSX: navigator.userAgent.indexOf('Mac OS X') != -1,
  Kindle3: navigator.userAgent.match(/Kindle\/3/)
 }
@ -162,12 +163,23 @@ Monocle.Browser.has.transform3d = Monocle.Browser.CSSProps.isSupported([
  'OPerspective',
  'msPerspective'
 ]) && Monocle.Browser.CSSProps.supportsMediaQueryProperty('transform-3d');
 Monocle.Browser.has.embedded = (top != self);
 Monocle.Browser.has.iframeTouchBug = Monocle.Browser.iOSVersionBelow("4.2");
 Monocle.Browser.has.selectThruBug = Monocle.Browser.iOSVersionBelow("4.2");
 Monocle.Browser.has.mustScrollSheaf = Monocle.Browser.is.MobileSafari;
 Monocle.Browser.has.iframeDoubleWidthBug = Monocle.Browser.has.mustScrollSheaf;
 Monocle.Browser.has.floatColumnBug = Monocle.Browser.is.WebKit;
 Monocle.Browser.has.relativeIframeWidthBug = Monocle.Browser.on.Android;
 Monocle.Browser.has.jumpFlickerBug =
  Monocle.Browser.on.MacOSX && Monocle.Browser.is.WebKit;
 if (typeof window.console == "undefined") {
  window.console = {
@ -1091,11 +1103,29 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
      cmpt.dom.setStyles(Monocle.Styles.component);
      Monocle.Styles.applyRules(cmpt.contentDocument.body, Monocle.Styles.body);
    }
    lockFrameWidths();
    dom.find('overlay').dom.setStyles(Monocle.Styles.overlay);
    dispatchEvent('monocle:styles');
  }
  function lockingFrameWidths() {
    if (!Monocle.Browser.has.relativeIframeWidthBug) { return; }
    for (var i = 0, cmpt; cmpt = dom.find('component', i); ++i) {
      cmpt.style.display = "none";
    }
  }
  function lockFrameWidths() {
    if (!Monocle.Browser.has.relativeIframeWidthBug) { return; }
    for (var i = 0, cmpt; cmpt = dom.find('component', i); ++i) {
      cmpt.style.width = cmpt.parentNode.offsetWidth+"px";
      cmpt.style.display = "block";
    }
  }
  function setBook(bk, place, callback) {
    p.book = bk;
    var pageCount = 0;
@ -1121,12 +1151,14 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
    if (!p.initialized) {
      console.warn('Attempt to resize book before initialization.');
    }
    lockingFrameWidths();
    if (!dispatchEvent("monocle:resizing", {}, true)) {
      return;
    }
    clearTimeout(p.resizeTimer);
    p.resizeTimer = setTimeout(
      function () {
        lockFrameWidths();
        p.flipper.moveTo({ page: pageNumber() });
        dispatchEvent("monocle:resize");
      },
@ -1765,12 +1797,7 @@ Monocle.Book = function (dataSource) {
  function componentIdMatching(str) {
-    for (var i = 0; i < p.componentIds.length; ++i) {
+    return p.componentIds.indexOf(str) >= 0 ? str : null;
      if (str.indexOf(p.componentIds[i]) > -1) {
        return p.componentIds[i];
      }
    }
    return null;
  }
@ -2018,6 +2045,12 @@ Monocle.Component = function (book, id, index, chapters, source) {
  function loadFrameFromURL(url, frame, callback) {
    if (!url.match(/^\//)) {
      var link = document.createElement('a');
      link.setAttribute('href', url);
      url = link.href;
      delete(link);
    }
    frame.onload = function () {
      frame.onload = null;
      Monocle.defer(callback);
@ -2460,7 +2493,7 @@ Monocle.Flippers.Legacy = function (reader) {
  function moveTo(locus, callback) {
    var fn = frameToLocus;
    if (typeof callback == "function") {
-      fn = function () { frameToLocus(); callback(); }
+      fn = function (locus) { frameToLocus(locus); callback(locus); }
    }
    p.reader.getBook().setOrLoadPageAt(page(), locus, fn);
  }
@ -2794,7 +2827,9 @@ Monocle.Dimensions.Columns = function (pageDiv) {
  function scrollerWidth() {
    var bdy = p.page.m.activeFrame.contentDocument.body;
    if (Monocle.Browser.has.iframeDoubleWidthBug) {
-      if (Monocle.Browser.iOSVersion < "4.1") {
+      if (Monocle.Browser.on.Android) {
        return bdy.scrollWidth * 1.5; // I actually have no idea why 1.5.
      } else if (Monocle.Browser.iOSVersion < "4.1") {
        var hbw = bdy.scrollWidth / 2;
        var sew = scrollerElement().scrollWidth;
        return Math.max(sew, hbw);
@ -2969,6 +3004,7 @@ Monocle.Flippers.Slider = function (reader) {
  function setPage(pageDiv, locus, callback) {
    ensureWaitControl();
    p.reader.getBook().setOrLoadPageAt(
      pageDiv,
      locus,
@ -3048,6 +3084,7 @@ Monocle.Flippers.Slider = function (reader) {
    checkPoint(boxPointX);
    p.turnData.releasing = true;
    showWaitControl(lowerPage());
    if (dir == k.FORWARDS) {
      if (
@ -3088,14 +3125,18 @@ Monocle.Flippers.Slider = function (reader) {
  function onGoingBackward(x) {
-    var lp = lowerPage();
+    var lp = lowerPage(), up = upperPage();
    showWaitControl(up);
    jumpOut(lp, // move lower page off-screen
      function () {
        flipPages(); // flip lower to upper
        setPage( // set upper page to previous
          lp,
          getPlace(lowerPage()).getLocus({ direction: k.BACKWARDS }),
-          function () { lifted(x); }
+          function () {
            lifted(x);
            hideWaitControl(up);
          }
        );
      }
    );
@ -3103,8 +3144,10 @@ Monocle.Flippers.Slider = function (reader) {
  function afterGoingForward() {
-    var up = upperPage();
+    var up = upperPage(), lp = lowerPage();
    if (p.interactive) {
      showWaitControl(up);
      showWaitControl(lp);
      setPage( // set upper (off screen) to current
        up,
        getPlace().getLocus({ direction: k.FORWARDS }),
@ -3113,6 +3156,7 @@ Monocle.Flippers.Slider = function (reader) {
        }
      );
    } else {
      showWaitControl(lp);
      flipPages();
      jumpIn(up, function () { prepareNextPage(announceTurn); });
    }
@ -3171,6 +3215,8 @@ Monocle.Flippers.Slider = function (reader) {
  function announceTurn() {
    hideWaitControl(upperPage());
    hideWaitControl(lowerPage());
    p.reader.dispatchEvent('monocle:turn');
    resetTurnData();
  }
@ -3319,12 +3365,14 @@ Monocle.Flippers.Slider = function (reader) {
  function jumpIn(pageDiv, callback) {
-    setX(pageDiv, 0, { duration: 1 }, callback);
+    var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
    setX(pageDiv, 0, { duration: dur }, callback);
  }
  function jumpOut(pageDiv, callback) {
-    setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: 1 }, callback);
+    var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
    setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: dur }, callback);
  }
@ -3357,6 +3405,28 @@ Monocle.Flippers.Slider = function (reader) {
  }
  function ensureWaitControl() {
    if (p.waitControl) { return; }
    p.waitControl = {
      createControlElements: function (holder) {
        return holder.dom.make('div', 'flippers_slider_wait');
      }
    }
    p.reader.addControl(p.waitControl, 'page');
  }
  function showWaitControl(page) {
    var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
    ctrl.style.opacity = 0.5;
  }
  function hideWaitControl(page) {
    var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
    ctrl.style.opacity = 0;
  }
  API.pageCount = p.pageCount;
  API.addPage = addPage;
  API.getPlace = getPlace;
--- a/resources/images/format-text-bold.png
+++ b/resources/images/format-text-bold.png
--- a/resources/images/format-text-italic.png
+++ b/resources/images/format-text-italic.png
--- a/resources/images/format-text-strikethrough.png
+++ b/resources/images/format-text-strikethrough.png
--- a/resources/images/format-text-underline.png
+++ b/resources/images/format-text-underline.png
--- a/resources/images/hotmail.png
+++ b/resources/images/hotmail.png
--- a/resources/images/news/cnetjapan.png
+++ b/resources/images/news/cnetjapan.png
--- a/resources/images/news/deutsche_welle_bs.png
+++ b/resources/images/news/deutsche_welle_bs.png
--- a/resources/images/news/deutsche_welle_en.png
+++ b/resources/images/news/deutsche_welle_en.png
--- a/resources/images/news/deutsche_welle_es.png
+++ b/resources/images/news/deutsche_welle_es.png
--- a/resources/images/news/deutsche_welle_hr.png
+++ b/resources/images/news/deutsche_welle_hr.png
--- a/resources/images/news/deutsche_welle_pt.png
+++ b/resources/images/news/deutsche_welle_pt.png
--- a/resources/images/news/deutsche_welle_sr.png
+++ b/resources/images/news/deutsche_welle_sr.png
--- a/resources/images/news/endgadget_ja.png
+++ b/resources/images/news/endgadget_ja.png
--- a/resources/images/news/jijinews.png
+++ b/resources/images/news/jijinews.png
--- a/resources/images/news/moscow_times.png
+++ b/resources/images/news/moscow_times.png
--- a/resources/images/news/msnsankei.png
+++ b/resources/images/news/msnsankei.png
--- a/resources/images/news/nikkei_free.png
+++ b/resources/images/news/nikkei_free.png
--- a/resources/images/news/nikkei_sub_economy.png
+++ b/resources/images/news/nikkei_sub_economy.png
--- a/resources/images/news/nikkei_sub_industory.png
+++ b/resources/images/news/nikkei_sub_industory.png
--- a/resources/images/news/nikkei_sub_life.png
+++ b/resources/images/news/nikkei_sub_life.png
--- a/resources/images/news/nikkei_sub_main.png
+++ b/resources/images/news/nikkei_sub_main.png
--- a/resources/images/news/nikkei_sub_sports.png
+++ b/resources/images/news/nikkei_sub_sports.png
--- a/resources/images/news/reuters.png
+++ b/resources/images/news/reuters.png
--- a/resources/images/news/reuters_ja.png
+++ b/resources/images/news/reuters_ja.png
--- a/resources/images/news/rollingstone.png
+++ b/resources/images/news/rollingstone.png
--- a/resources/images/news/the_workingham_times.png
+++ b/resources/images/news/the_workingham_times.png
--- a/resources/recipes/180.recipe
+++ b/resources/recipes/180.recipe
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 180.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Noticias(BasicNewsRecipe):
    title                 = '180.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf-8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})]
    remove_tags = [
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Titulares', u'http://www.180.com.uy/feed.php')
        ]
    def get_cover_url(self):
 		return 'http://www.180.com.uy/tplef/img/logo.gif'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/al_jazeera.recipe
+++ b/resources/recipes/al_jazeera.recipe
@ -1,10 +1,8 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
-aljazeera.net
+english.aljazeera.net
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
@ -12,41 +10,59 @@ class AlJazeera(BasicNewsRecipe):
    title                  = 'Al Jazeera in English'
    __author__             = 'Darko Miletic'
    description            = 'News from Middle East'
-    language = 'en'
+    language               = 'en'
    publisher              = 'Al Jazeera'
    category               = 'news, politics, middle east'
-    simultaneous_downloads = 1
+    delay                  = 1
-    delay                  = 4
+    oldest_article         = 2
    oldest_article         = 1
    max_articles_per_feed  = 100
    no_stylesheets         = True
    encoding               = 'iso-8859-1'
    remove_javascript      = True
    use_embedded_content   = False
    extra_css              = """
                                body{font-family: Arial,sans-serif}
                                #ctl00_cphBody_dvSummary{font-weight: bold}
                                #dvArticleDate{font-size: small; color: #999999}
                             """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
-    html2lrf_options = [
+    keep_only_tags = [
-                          '--comment', description
+                         dict(attrs={'id':['DetailedTitle','ctl00_cphBody_dvSummary','dvArticleDate']})
-                        , '--category', category
+                        ,dict(name='td',attrs={'class':'DetailedSummary'})
-                        , '--publisher', publisher
+                     ]
                        , '--ignore-tables'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
    keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
    remove_tags = [
-                     dict(name=['object','link'])
+                     dict(name=['object','link','table','meta','base','iframe','embed'])
                    ,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
                  ]
    feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
    def get_article_url(self, article):
        artlurl =  article.get('link',  None)
        return artlurl.replace('http://english.aljazeera.net//','http://english.aljazeera.net/')
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll(face=True):
            del item['face']
        td = soup.find('td',attrs={'class':'DetailedSummary'})
        if td:
           td.name = 'div'
        spn = soup.find('span',attrs={'id':'DetailedTitle'})
        if spn:
           spn.name='h1'
        for itm in soup.findAll('span', attrs={'id':['dvArticleDate','ctl00_cphBody_lblDate']}):
            itm.name = 'div'
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
--- a/resources/recipes/arcamax.recipe
+++ b/resources/recipes/arcamax.recipe
@ -0,0 +1,110 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = 'Copyright 2010 Starson17'
 '''
 www.arcamax.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Arcamax(BasicNewsRecipe):
    title               = 'Arcamax'
    __author__          = 'Starson17'
    __version__         = '1.03'
    __date__            = '25 November 2010'
    description         = u'Family Friendly Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
    category            = 'news, comics'
    language            = 'en'
    use_embedded_content= False
    no_stylesheets      = True
    remove_javascript   = True
    cover_url           = 'http://www.arcamax.com/images/pub/amuse/leftcol/zits.jpg'
    ####### USER PREFERENCES - SET COMICS AND NUMBER OF COMICS TO RETRIEVE ########
    num_comics_to_get = 7
    # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
    conversion_options = {'linearize_tables'  : True
                        , 'comment'           : description
                        , 'tags'              : category
                        , 'language'          : language
                        }
    keep_only_tags     = [dict(name='div', attrs={'class':['toon']}),
                          ]
    def parse_index(self):
        feeds = []
        for title, url in [
                            ######## COMICS - GENERAL ########
                            #(u"9 Chickweed Lane", u"http://www.arcamax.com/ninechickweedlane"),
                            #(u"Agnes", u"http://www.arcamax.com/agnes"),
                            #(u"Andy Capp", u"http://www.arcamax.com/andycapp"),
                            (u"BC", u"http://www.arcamax.com/bc"),
                            #(u"Baby Blues", u"http://www.arcamax.com/babyblues"),
                            #(u"Beetle Bailey", u"http://www.arcamax.com/beetlebailey"),
                            (u"Blondie", u"http://www.arcamax.com/blondie"),
                            #u"Boondocks", u"http://www.arcamax.com/boondocks"),
                            #(u"Cathy", u"http://www.arcamax.com/cathy"),
                            #(u"Daddys Home", u"http://www.arcamax.com/daddyshome"),
                            (u"Dilbert", u"http://www.arcamax.com/dilbert"),
                            #(u"Dinette Set", u"http://www.arcamax.com/thedinetteset"),
                            (u"Dog Eat Doug", u"http://www.arcamax.com/dogeatdoug"),
                            (u"Doonesbury", u"http://www.arcamax.com/doonesbury"),
                            #(u"Dustin", u"http://www.arcamax.com/dustin"),
                            (u"Family Circus", u"http://www.arcamax.com/familycircus"),
                            (u"Garfield", u"http://www.arcamax.com/garfield"),
                            #(u"Get Fuzzy", u"http://www.arcamax.com/getfuzzy"),
                            #(u"Girls and Sports", u"http://www.arcamax.com/girlsandsports"),
                            #(u"Hagar the Horrible", u"http://www.arcamax.com/hagarthehorrible"),
                            #(u"Heathcliff", u"http://www.arcamax.com/heathcliff"),
                            #(u"Jerry King Cartoons", u"http://www.arcamax.com/humorcartoon"),
                            #(u"Luann", u"http://www.arcamax.com/luann"),
                            #(u"Momma", u"http://www.arcamax.com/momma"),
                            #(u"Mother Goose and Grimm", u"http://www.arcamax.com/mothergooseandgrimm"),
                            (u"Mutts", u"http://www.arcamax.com/mutts"),
                            #(u"Non Sequitur", u"http://www.arcamax.com/nonsequitur"),
                            #(u"Pearls Before Swine", u"http://www.arcamax.com/pearlsbeforeswine"),
                            #(u"Pickles", u"http://www.arcamax.com/pickles"),
                            #(u"Red and Rover", u"http://www.arcamax.com/redandrover"),
                            #(u"Rubes", u"http://www.arcamax.com/rubes"),
                            #(u"Rugrats", u"http://www.arcamax.com/rugrats"),
                            (u"Speed Bump", u"http://www.arcamax.com/speedbump"),
                            (u"Wizard of Id", u"http://www.arcamax.com/wizardofid"),
                            (u"Dilbert", u"http://www.arcamax.com/dilbert"),
                            (u"Zits", u"http://www.arcamax.com/zits"),
                             ]:
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds
    def make_links(self, url):
        title = 'Temp'
        current_articles = []
        pages = range(1, self.num_comics_to_get+1)
        for page in pages:
            page_soup = self.index_to_soup(url)
            if page_soup:
                title = page_soup.find(name='div', attrs={'class':'toon'}).p.img['alt']
                page_url = url
                prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'next'}, text='Previous').parent['href']
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
            url = prev_page_url
        current_articles.reverse()
        return current_articles
    def preprocess_html(self, soup):
        main_comic = soup.find('p',attrs={'class':'m0'})
        if main_comic.a['target'] == '_blank':
            main_comic.a.img['id'] = 'main_comic'
        return soup
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    img#main_comic {max-width:100%; min-width:100%;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 		'''
--- a/resources/recipes/avto-magazin.recipe
+++ b/resources/recipes/avto-magazin.recipe
@ -13,6 +13,7 @@ class Dnevnik(BasicNewsRecipe):
  labguage = 'sl'
  no_stylesheets = True
  use_embedded_content = False
  language = 'sl'
  conversion_options = {'linearize_tables' : True}
--- a/resources/recipes/bangkok_biz.recipe
+++ b/resources/recipes/bangkok_biz.recipe
@ -0,0 +1,25 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1290689337(BasicNewsRecipe):
    __author__ = 'Anat R.'
    language = 'th'
    title          = u'Bangkok Biz News'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_javascript = True
    use_embedded_content = False
    feeds          = [(u'Headlines',
    u'http://www.bangkokbiznews.com/home/services/rss/home.xml'),
    (u'Politics', u'http://www.bangkokbiznews.com/home/services/rss/politics.xml'),
    (u'Business', u'http://www.bangkokbiznews.com/home/services/rss/business.xml'),
    (u'Finance', u' http://www.bangkokbiznews.com/home/services/rss/finance.xml'),
    (u'Technology', u' http://www.bangkokbiznews.com/home/services/rss/it.xml')]
    remove_tags_before  = dict(name='div', attrs={'class':'box-Detailcontent'})
    remove_tags_after = dict(name='p', attrs={'class':'allTags'})
    remove_tags = []
    remove_tags.append(dict(name = 'div', attrs = {'id': 'content-tools'}))
    remove_tags.append(dict(name = 'p', attrs = {'class':'allTags'}))
    remove_tags.append(dict(name = 'div', attrs = {'id':'morePic'}))
    remove_tags.append(dict(name = 'ul', attrs = {'class':'tabs-nav'}))
--- a/resources/recipes/bitacora.recipe
+++ b/resources/recipes/bitacora.recipe
@ -0,0 +1,58 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 bitacora.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'bitacora.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'iso-8859-1'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['txt'])]
    remove_tags = [
             dict(name='div', attrs={'class':'tablafoot'}),
             dict(name=['object','h4']),
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15')
        ]
    def get_cover_url(self):
 	cover_url = None
 	index = 'http://www.bitacora.com.uy'
 	soup = self.index_to_soup(index)
 	link_item = soup.find('img',attrs={'class':'imgtapa'})
 	if link_item:
 		cover_url = "http://www.bitacora.com.uy/"+link_item['src']
 	return cover_url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/biz_portal.recipe
+++ b/resources/recipes/biz_portal.recipe
@ -0,0 +1,40 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    description   = 'This is a recipe of BizPortal.co.il.'
    cover_url      = 'http://www.bizportal.co.il/shukhahon/images/bizportal.jpg'
    title          = u'BizPortal'
    language              = 'he'
    __author__ = 'marbs'
    extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
    simultaneous_downloads = 5
    remove_javascript     = True
    timefmt        = '[%a, %d %b, %Y]'
    remove_empty_feeds = True
    oldest_article = 1
    max_articles_per_feed = 100
    remove_attributes = ['width']
    simultaneous_downloads = 5
  #  keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
    remove_tags = [dict(name='img', attrs={'scr':['images/bizlogo_nl.gif']})]
    max_articles_per_feed = 100
    #preprocess_regexps = [
  #      (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: '')
 #        ]
    feeds          = [(u'חדשות שוק ההון', u'http://www.bizportal.co.il/shukhahon/messRssUTF2.xml'),
                           (u'חדשות וול סטריט בעברית', u'http://www.bizportal.co.il/shukhahon/images/bizportal.jpg'),
                           (u'שיווק ופרסום', u'http://www.bizportal.co.il/shukhahon/messRssUTF145.xml'),
                           (u'משפט', u'http://www.bizportal.co.il/shukhahon/messRssUTF3.xml'),
                           (u'ניתוח טכני', u'http://www.bizportal.co.il/shukhahon/messRssUTF5.xml'),
                           (u'דיני עבודה ושכר', u'http://www.bizportal.co.il/shukhahon/messRssUTF6.xml'),
                           (u'מיסוי', u'http://www.bizportal.co.il/shukhahon/messRssUTF7.xml'),
                           (u'טאבו', u'http://www.bizportal.co.il/shukhahon/messRssUTF8.xml'),
                           (u'נדל"ן', u'http://www.bizportal.co.il/shukhahon/messRssUTF160.xml'),
                              ]
    def print_version(self, url):
        split1 = url.split("=")
        print_url = 'http://www.bizportal.co.il/web/webnew/shukhahon/biznews02print.shtml?mid=' + split1[1]
        return print_url
--- a/resources/recipes/brand_eins.recipe
+++ b/resources/recipes/brand_eins.recipe
@ -1,18 +1,22 @@
 #!/usr/bin/env  python
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 mode: python -*-
 # Find the newest version of this recipe here:
 # https://github.com/consti/BrandEins-Recipe/raw/master/brandeins.recipe
 __license__   = 'GPL v3'
-__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>'
+__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
-__version__   = '0.95'
+__version__   = '0.96'
 ''' http://brandeins.de - Wirtschaftsmagazin '''
 import re
 import string
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class BrandEins(BasicNewsRecipe):
-  title = u'Brand Eins'
+  title = u'brand eins'
  __author__ = 'Constantin Hofstetter'
  description = u'Wirtschaftsmagazin'
  publisher ='brandeins.de'
@ -22,11 +26,14 @@ class BrandEins(BasicNewsRecipe):
  no_stylesheets = True
  encoding = 'utf-8'
  language = 'de'
  publication_type = 'magazine'
  needs_subscription = 'optional'
  # 2 is the last full magazine (default)
  # 1 is the newest (but not full)
  # 3 is one before 2 etc.
-  which_ausgabe = 2
+  # This value can be set via the username field.
  default_issue = 2
  keep_only_tags = [dict(name='div', attrs={'id':'theContent'}), dict(name='div', attrs={'id':'sidebar'}), dict(name='div', attrs={'class':'intro'}), dict(name='p', attrs={'class':'bodytext'}), dict(name='div', attrs={'class':'single_image'})]
@ -61,17 +68,31 @@ class BrandEins(BasicNewsRecipe):
    return soup
  def get_cover(self, soup):
    cover_url = None
    cover_item = soup.find('div', attrs = {'class': 'cover_image'})
    if cover_item:
      cover_url = 'http://www.brandeins.de/' + cover_item.img['src']
    return cover_url
  def parse_index(self):
    feeds = []
    archive = "http://www.brandeins.de/archiv.html"
    issue = self.default_issue
    if self.username:
      try:
        issue = int(self.username)
      except:
        pass
    soup = self.index_to_soup(archive)
    latest_jahrgang = soup.findAll('div', attrs={'class': re.compile(r'\bjahrgang-latest\b') })[0].findAll('ul')[0]
-    pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-self.which_ausgabe]
+    pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-issue]
    url = pre_latest_issue.get('href', False)
-    # Get the title for the magazin - build it out of the title of the cover - take the issue and year;
+    # Get month and year of the magazine issue - build it out of the title of the cover
-    self.title = "Brand Eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d+)", pre_latest_issue.find('img').get('title', False)).group('date')
+    self.timefmt = " " + re.search(r"(?P<date>\d\d\/\d\d\d\d)", pre_latest_issue.find('img').get('title', False)).group('date')
    url = 'http://brandeins.de/'+url
    # url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
@ -83,6 +104,7 @@ class BrandEins(BasicNewsRecipe):
  def brand_eins_parse_latest_issue(self, url):
    soup = self.index_to_soup(url)
    self.cover_url = self.get_cover(soup)
    article_lists = [soup.find('div', attrs={'class':'subColumnLeft articleList'}), soup.find('div', attrs={'class':'subColumnRight articleList'})]
    titles_and_articles = []
@ -123,3 +145,4 @@ class BrandEins(BasicNewsRecipe):
          current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
    titles_and_articles.append([chapter_title, current_articles])
    return titles_and_articles
--- a/resources/recipes/cbc_canada.recipe
+++ b/resources/recipes/cbc_canada.recipe
@ -11,7 +11,6 @@ class AdvancedUserRecipe1275798572(BasicNewsRecipe):
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en'
    masthead_url = 'http://www.cbc.ca/includes/gfx/cbcnews_logo_09.gif'
    cover_url = 'http://img692.imageshack.us/img692/2814/cbc.png'
    keep_only_tags = [dict(name='div', attrs={'id':['storyhead','storybody']})]
--- a/resources/recipes/cnetjapan.recipe
+++ b/resources/recipes/cnetjapan.recipe
@ -0,0 +1,32 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class CNetJapan(BasicNewsRecipe):
    title          = u'CNET Japan'
    oldest_article = 3
    max_articles_per_feed = 30
    __author__  = 'Hiroshi Miura'
    feeds          = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')]
    language       = 'ja'
    encoding       = 'Shift_JIS'
    remove_javascript = True
    preprocess_regexps = [
       (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE),
        lambda match: '</body>'),
       (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL|re.IGNORECASE),
        lambda match: '</body>'),
       (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
        lambda match: '<!-- removed -->'),
        ]
    remove_tags_before = dict(name="h2")
    remove_tags = [
                   {'class':"social_bkm_share"},
                   {'class':"social_bkm_print"},
                   {'class':"block20 clearfix"},
                   dict(name="div",attrs={'id':'bookreview'}),
                    ]
    remove_tags_after = {'class':"block20"}
--- a/resources/recipes/comics_com.recipe
+++ b/resources/recipes/comics_com.recipe
@ -347,6 +347,7 @@ class Comics(BasicNewsRecipe):
                  title = strip_tag['title']
                  print 'title: ', title
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
        current_articles.reverse()
        return current_articles
    extra_css = '''
--- a/resources/recipes/cosmopolitan.recipe
+++ b/resources/recipes/cosmopolitan.recipe
@ -0,0 +1,69 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 Muy Interesante
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'Cosmopolitan'
    __author__            = 'Gustavo Azambuja'
    description           = 'Revista Cosmopolitan, Edicion Espanola'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 1
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    conversion_options = {'linearize_tables': True}
    oldest_article        = 180
    max_articles_per_feed = 100
    keep_only_tags = [
             dict(id=['contenido']),
             dict(name='td', attrs={'class':['contentheading', 'txt_articulo']})
                     ]
    remove_tags = [
             dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
             dict(name='div', attrs={'id':'comment'}),
             dict(name='table', attrs={'class':'pagenav'}),
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                img {float:left; clear:both; margin:10px}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
                  (u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
    ]
    def preprocess_html(self, soup):
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]
        return soup
    def get_cover_url(self):
 		index = 'http://www.cosmohispano.com/revista'
 		soup = self.index_to_soup(index)
 		link_item = soup.find('img',attrs={'class':'img_portada'})
 		if link_item:
 			cover_url = "http://www.cosmohispano.com"+link_item['src']
 		return cover_url
--- a/resources/recipes/deredactie.recipe
+++ b/resources/recipes/deredactie.recipe
@ -0,0 +1,61 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class deredactie(BasicNewsRecipe):
    title          = u'Deredactie.be'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.deredactie.be/polopoly_fs/1.510827!image/2710428628.gif'
    language = 'de'
    keep_only_tags = []
    __author__ = 'malfi'
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlehead'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlebody'}))
    remove_tags = []
    remove_tags.append(dict(name = 'div', attrs = {'id': 'story'}))
    remove_tags.append(dict(name = 'div', attrs = {'id': 'useractions'}))
    remove_tags.append(dict(name = 'hr'))
    extra_css = '''
     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
     '''
    def parse_index(self):
        categories = []
        catnames = {}
        soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch')
        for elem in soup.findAll('li', attrs={'id' : re.compile("^navItem[2-9]") }):
              a = elem.find('a', href=True)
              m = re.search('(?<=/)[^/]*$', a['href'])
              cat = str(m.group(0))
              categories.append(cat)
              catnames[cat] = a['title']
              self.log("found cat %s\n" % catnames[cat])
        feeds = []
        for cat in categories:
            articles = []
            soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch/'+cat)
            for a in soup.findAll('a',attrs={'href' : re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
                skip_this_article = False
                url = a['href'].strip()
                if url.startswith('/'):
                    url = 'http://www.deredactie.be' + url
                myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
                for article in articles :
                    if article['url'] == url :
                        skip_this_article = True
                        self.log("SKIPPING DUP %s" % url)
                        break
                if skip_this_article :
                        continue;
                articles.append(myarticle)
                self.log("Adding URL %s\n" %url)
            if articles:
                feeds.append((catnames[cat], articles))
        return feeds
--- a/resources/recipes/deutsche_welle_bs.recipe
+++ b/resources/recipes/deutsche_welle_bs.recipe
@ -0,0 +1,76 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 dw-world.de
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DeutscheWelle_bs(BasicNewsRecipe):
    title                 = 'Deutsche Welle'
    __author__            = 'Darko Miletic'
    description           = 'Vijesti iz Njemacke i svijeta'
    publisher             = 'Deutsche Welle'
    category              = 'news, politics, Germany'
    oldest_article        = 1
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
    language              = 'bs'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
    extra_css             = """
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                                body{font-family: Arial,sans1,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
                            """
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    remove_tags = [
                     dict(name=['iframe','embed','object','form','base','meta','link'])
                    ,dict(attrs={'class':'actionFooter'})
                  ]
    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
    remove_attributes = ['height','width','onclick','border','lang']
    feeds          = [
                        (u'Politika'         , u'http://rss.dw-world.de/rdf/rss-bos-pol')
                       ,(u'Evropa'           , u'http://rss.dw-world.de/rdf/rss-bos-eu' )
                       ,(u'Kiosk'            , u'http://rss.dw-world.de/rdf/rss-bos-eu' )
                       ,(u'Ekonomija i Nuka' , u'http://rss.dw-world.de/rdf/rss-bos-eco')
                       ,(u'Kultura'          , u'http://rss.dw-world.de/rdf/rss-bos-cul')
                       ,(u'Sport'            , u'http://rss.dw-world.de/rdf/rss-bos-sp' )
                     ]
    def print_version(self, url):
        artl = url.rpartition('/')[2]
        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
    def preprocess_html(self, soup):
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  del item['href']
                  if item.has_key('target'):
                     del item['target']
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/resources/recipes/deutsche_welle_en.recipe
+++ b/resources/recipes/deutsche_welle_en.recipe
@ -0,0 +1,66 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 dw-world.de
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DeutscheWelle_en(BasicNewsRecipe):
    title                 = 'Deutsche Welle'
    __author__            = 'Darko Miletic'
    description           = 'News from Germany and World'
    publisher             = 'Deutsche Welle'
    category              = 'news, politics, Germany'
    oldest_article        = 1
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
    language              = 'en'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
    extra_css             = """
                                body{font-family: Arial,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
                            """
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    remove_tags = [
                     dict(name=['iframe','embed','object','form','base','meta','link'])
                    ,dict(attrs={'class':'actionFooter'})
                  ]
    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
    remove_attributes = ['height','width','onclick','border','lang']
    feeds          = [(u'All news', u'http://rss.dw-world.de/rdf/rss-en-all')]
    def print_version(self, url):
        artl = url.rpartition('/')[2]
        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
    def preprocess_html(self, soup):
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  del item['href']
                  if item.has_key('target'):
                     del item['target']
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/resources/recipes/deutsche_welle_es.recipe
+++ b/resources/recipes/deutsche_welle_es.recipe
@ -0,0 +1,66 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 dw-world.de
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DeutscheWelle_es(BasicNewsRecipe):
    title                 = 'Deutsche Welle'
    __author__            = 'Darko Miletic'
    description           = 'Noticias desde Alemania y mundo'
    publisher             = 'Deutsche Welle'
    category              = 'news, politics, Germany'
    oldest_article        = 1
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
    language              = 'es'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
    extra_css             = """
                                body{font-family: Arial,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
                            """
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    remove_tags = [
                     dict(name=['iframe','embed','object','form','base','meta','link'])
                    ,dict(attrs={'class':'actionFooter'})
                  ]
    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
    remove_attributes = ['height','width','onclick','border','lang']
    feeds          = [(u'Noticias', u'http://rss.dw-world.de/rdf/rss-sp-all')]
    def print_version(self, url):
        artl = url.rpartition('/')[2]
        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
    def preprocess_html(self, soup):
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  del item['href']
                  if item.has_key('target'):
                     del item['target']
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/resources/recipes/deutsche_welle_hr.recipe
+++ b/resources/recipes/deutsche_welle_hr.recipe
@ -0,0 +1,74 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 dw-world.de
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DeutscheWelle_hr(BasicNewsRecipe):
    title                 = 'Deutsche Welle'
    __author__            = 'Darko Miletic'
    description           = 'Vesti iz Njemacke i svijeta'
    publisher             = 'Deutsche Welle'
    category              = 'news, politics, Germany'
    oldest_article        = 1
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
    language              = 'hr'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
    extra_css             = """
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                                body{font-family: Arial,sans1,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
                            """
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    remove_tags = [
                     dict(name=['iframe','embed','object','form','base','meta','link'])
                    ,dict(attrs={'class':'actionFooter'})
                  ]
    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
    remove_attributes = ['height','width','onclick','border','lang']
    feeds          = [
                        (u'Svijet'   , u'http://rss.dw-world.de/rdf/rss-cro-svijet')
                       ,(u'Europa'   , u'http://rss.dw-world.de/rdf/rss-cro-eu'    )
                       ,(u'Njemacka' , u'http://rss.dw-world.de/rdf/rss-cro-ger'   )
                       ,(u'Vijesti'  , u'http://rss.dw-world.de/rdf/rss-cro-all'   )
                     ]
    def print_version(self, url):
        artl = url.rpartition('/')[2]
        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
    def preprocess_html(self, soup):
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  del item['href']
                  if item.has_key('target'):
                     del item['target']
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/resources/recipes/deutsche_welle_pt.recipe
+++ b/resources/recipes/deutsche_welle_pt.recipe
@ -0,0 +1,66 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 dw-world.de
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DeutscheWelle_pt(BasicNewsRecipe):
    title                 = 'Deutsche Welle'
    __author__            = 'Darko Miletic'
    description           = 'Noticias desde Alemania y mundo'
    publisher             = 'Deutsche Welle'
    category              = 'news, politics, Germany'
    oldest_article        = 1
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
    language              = 'pt'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
    extra_css             = """
                                body{font-family: Arial,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
                            """
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    remove_tags = [
                     dict(name=['iframe','embed','object','form','base','meta','link'])
                    ,dict(attrs={'class':'actionFooter'})
                  ]
    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
    remove_attributes = ['height','width','onclick','border','lang']
    feeds          = [(u'Noticias', u'http://rss.dw-world.de/rdf/rss-br-all')]
    def print_version(self, url):
        artl = url.rpartition('/')[2]
        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
    def preprocess_html(self, soup):
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  del item['href']
                  if item.has_key('target'):
                     del item['target']
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/resources/recipes/deutsche_welle_sr.recipe
+++ b/resources/recipes/deutsche_welle_sr.recipe
@ -0,0 +1,79 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 dw-world.de
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DeutscheWelle_sr(BasicNewsRecipe):
    title                 = 'Deutsche Welle'
    __author__            = 'Darko Miletic'
    description           = 'Vesti iz Nemacke i sveta'
    publisher             = 'Deutsche Welle'
    category              = 'news, politics, Germany'
    oldest_article        = 1
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
    language              = 'sr'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
    extra_css             = """
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                                body{font-family: Arial,sans1,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
                            """
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    remove_tags = [
                     dict(name=['iframe','embed','object','form','base','meta','link'])
                    ,dict(attrs={'class':'actionFooter'})
                  ]
    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
    remove_attributes = ['height','width','onclick','border','lang']
    feeds          = [
                        (u'Politika'  , u'http://rss.dw-world.de/rdf/rss-ser-pol'        )
                       ,(u'Srbija'    , u'http://rss.dw-world.de/rdf/rss-ser-pol-ser'    )
                       ,(u'Region'    , u'http://rss.dw-world.de/rdf/rss-ser-pol-region' )
                       ,(u'Evropa'    , u'http://rss.dw-world.de/rdf/rss-ser-pol-eu'     )
                       ,(u'Nemacka'   , u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'    )
                       ,(u'Svet'      , u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'    )
                       ,(u'Pregled stampe', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger')
                       ,(u'Nauka Tehnika Medicina', u'http://rss.dw-world.de/rdf/rss-ser-science')
                       ,(u'Kultura'   , u'feed:http://rss.dw-world.de/rdf/rss-ser-cul'   )
                     ]
    def print_version(self, url):
        artl = url.rpartition('/')[2]
        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
    def preprocess_html(self, soup):
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  del item['href']
                  if item.has_key('target'):
                     del item['target']
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/resources/recipes/el_pais_uy.recipe
+++ b/resources/recipes/el_pais_uy.recipe
@ -0,0 +1,80 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 http://www.elpais.com.uy/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'El Pais - Uruguay'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay y el resto del mundo'
    publisher             = 'EL PAIS S.A.'
    category              = 'news, politics, Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 2
    encoding = 'iso-8859-1'
    masthead_url          = 'http://www.elpais.com.uy/Images/09/cabezal/logo_PDEP.png'
    publication_type      = 'newspaper'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 200
    keep_only_tags = [
                      dict(name='h1'),
                      dict(name='div', attrs={'id':'Contenido'})
                      ]
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        }
    remove_tags = [
 				 dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
 				 dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
 				 dict(name='p', attrs={'class':'FacebookLikeButton'}),
 				 dict(name=['object','form']),
 				 dict(name=['object','table']) ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                body{font-family: Verdana,Arial,Helvetica,sans-serif }
                img{margin-bottom: 0.4em; display:block;}
                '''
    feeds = [
           (u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'),
           (u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'),
           (u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'),
           (u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'),
           (u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'),
           (u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'),
           (u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'),
           (u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia')
        ]
    def get_cover_url(self):
 		cover_url = None
 		index = 'http://www.elpais.com.uy'
 		soup = self.index_to_soup(index)
 		link_item = soup.find('div',attrs={'class':'boxmedio box257'})
 		print link_item
 		if link_item:
 			cover_url = 'http://www.elpais.com.uy'+link_item.img['src']
 		return cover_url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/endgadget_ja.recipe
+++ b/resources/recipes/endgadget_ja.recipe
@ -0,0 +1,22 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 japan.engadget.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class EndgadgetJapan(BasicNewsRecipe):
    title          = u'Endgadget\u65e5\u672c\u7248'
    language = 'ja'
    __author__ = 'Hiroshi Miura'
    cover_url      = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg'
    masthead_url   = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    language = 'ja'
    encoding = 'utf-8'
    feeds          = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
--- a/resources/recipes/eu_commission.recipe
+++ b/resources/recipes/eu_commission.recipe
@ -0,0 +1,58 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 LANGUAGE = 'de'
 def feedlink(num):
    return    u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id='+\
            str(num)+'&lang='+ LANGUAGE
 class EUCommissionPress(BasicNewsRecipe):
    title          = u'Pressemitteilungen der EU Kommission pro Politikbereich'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
    __author__ = 'malfi'
    language = LANGUAGE
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'pressReleaseContentMain'}))
    remove_tags = []
    feeds          = [
                      (u'Pressemitteilung des Tages',feedlink(64)),
                      (u'Presidency',feedlink(137)),
                      (u'Foreign affairs and security policy',feedlink(138)),
                      (u'Agriculture and rural development',feedlink(139)),
                      (u'Budget and financial programming ',feedlink(140)),
                      (u'Climate action',feedlink(141)),
                      (u'Competition',feedlink(142)),
                      (u'Development',feedlink(143)),
                      (u'Digital agenda',feedlink(144)),
                      (u'Economic and monetary affairs',feedlink(145)),
                      (u'Education, culture, multilingualism and youth ',feedlink(146)),
                      (u'Employment, social Affairs and inclusion ',feedlink(147)),
                      (u'Energy',feedlink(148)),
                      (u'Enlargment and European neighbourhood policy ',feedlink(149)),
                      (u'Environment',feedlink(150)),
                      (u'Health and consumer policy',feedlink(151)),
                      (u'Home affairs',feedlink(152)),
                      (u'Industry and entrepreneurship',feedlink(153)),
                      (u'Inter-Institutional relations and administration',feedlink(154)),
                      (u'Internal market and services',feedlink(155)),
                      (u'International cooperation, humanitarian aid and crisis response',feedlink(156)),
                      (u'Justice, fundamental rights and citizenship',feedlink(157)),
                      (u'Maritime affairs and fisheries',feedlink(158)),
                      (u'Regional policy',feedlink(159)),
                      (u'Research and innovation',feedlink(160)),
                      (u'Taxation and customs union, audit and anti-fraud',feedlink(161)),
                      (u'Trade',feedlink(162)),
                      (u'Transport',feedlink(163))
                      ]
    extra_css = '''
     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
     '''
--- a/resources/recipes/european_voice.recipe
+++ b/resources/recipes/european_voice.recipe
@ -0,0 +1,51 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class EuropeanVoice(BasicNewsRecipe):
    title          = u'European Voice'
    __author__     = 'malfi'
    oldest_article = 14
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
    language = 'en'
    keep_only_tags    = [dict(name='div', attrs={'id':'articleLeftColumn'})]
    remove_tags    = [dict(name='div', attrs={'id':'BreadCrump'})]
    feeds          = [
                        (u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
                          (u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
                          (u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
                          (u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
                          (u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
                          (u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
                          (u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
                          (u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
                          (u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
                          (u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
                          (u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
                          (u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
                          (u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
                          (u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
                          (u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
                          (u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
                          (u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
                          (u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
                          (u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
                          (u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
                     ]
    extra_css = '''
        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
        '''
    def print_version(self, url):
          return url + '?bPrint=1'
    def preprocess_html(self, soup):
           denied = soup.findAll(True,text='Subscribers')
           if denied:
                raise Exception('Article skipped, because content can only be seen with subscription')
           return soup
--- a/resources/recipes/freeway.recipe
+++ b/resources/recipes/freeway.recipe
@ -0,0 +1,100 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 http://freeway.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'freeway.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Revista Freeway, Montevideo, Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 1
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    conversion_options = {'linearize_tables': True}
    oldest_article        = 180
    max_articles_per_feed = 100
    keep_only_tags = [
             dict(id=['contenido']),
             dict(name='a', attrs={'class':'titulo_art_ppal'}),
             dict(name='img', attrs={'class':'recuadro'}),
             dict(name='td', attrs={'class':'txt_art_ppal'})
                     ]
    remove_tags = [
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                img {float:left; clear:both; margin:10px}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    def parse_index(self):
            feeds = []
            for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
               articles = self.art_parse_section(url)
               if articles:
                   feeds.append((title, articles))
            return feeds
    def art_parse_section(self, url):
            soup = self.index_to_soup(url)
            div = soup.find(attrs={'id': 'tbl_1'})
            current_articles = []
            for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}):
                if tag.get('class') == 'link-list-heading':
                    break
                for td in tag.findAll('td'):
                    a = td.find('a', attrs= {'class': 'titulo_articulos'})
                    if a is None:
                        continue
                    title = self.tag_to_string(a)
                    url = a.get('href', False)
                    if not url or not title:
                        continue
                    if url.startswith('/'):
                         url = 'http://freeway.com.uy'+url
                    p = td.find('p', attrs= {'class': 'txt_articulos'})
                    description = self.tag_to_string(p)
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    self.log('\t\t\t', description)
                    current_articles.append({'title': title, 'url': url, 'description':description, 'date':''})
            return current_articles
    def preprocess_html(self, soup):
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]
        return soup
    def get_cover_url(self):
 		#index = 'http://www.cosmohispano.com/revista'
 		#soup = self.index_to_soup(index)
 		#link_item = soup.find('img',attrs={'class':'img_portada'})
 		#if link_item:
 		#	cover_url = "http://www.cosmohispano.com"+link_item['src']
 		return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'
--- a/resources/recipes/gamespot.recipe
+++ b/resources/recipes/gamespot.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__author__ = u'Marc T\xf6nsing'
+__author__    = u'Marc Toensing'
 from calibre.web.feeds.news import BasicNewsRecipe
@ -17,6 +17,7 @@ class GamespotCom(BasicNewsRecipe):
    no_javascript = True
    feeds =  [
               ('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
               ('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
               ('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
               ('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
@ -37,5 +38,3 @@ class GamespotCom(BasicNewsRecipe):
    def get_article_url(self, article):
        return article.get('link') + '?print=1'
--- a/resources/recipes/german_gov.recipe
+++ b/resources/recipes/german_gov.recipe
@ -0,0 +1,28 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class GermanGovermentPress(BasicNewsRecipe):
    title          = u'Pressemitteilungen der Bundesregierung'
    oldest_article = 14
    __author__ = 'malfi'
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
    language = 'de'
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'h2'))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'textblack'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subtitle'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text'}))
    remove_tags = []
    feeds          = [ (u'Pressemitteilungen',u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf') ]
    extra_css = '''
     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
     '''
    def print_version(self, url):
         m = re.search(r'^(.*).html$', url)
         return str(m.group(1)) + ',layoutVariant=Druckansicht.html'
--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__copyright__ = '2010, Szing'
 __docformat__ = 'restructuredtext en'
 '''
@ -10,49 +10,52 @@ globeandmail.com
 from calibre.web.feeds.news import BasicNewsRecipe
-class GlobeAndMail(BasicNewsRecipe):
+class AdvancedUserRecipe1287083651(BasicNewsRecipe):
-    title = u'Globe and Mail'
+    title          = u'Globe & Mail'
-    language = 'en_CA'
+    __license__   = 'GPL v3'
-
+    __author__ = 'Szing'
    __author__ = 'Kovid Goyal'
    oldest_article = 2
    max_articles_per_feed = 10
    no_stylesheets = True
-    extra_css = '''
+    max_articles_per_feed = 100
-    h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
+    encoding               = 'utf8'
-    h4 {margin-top: 0px;}
+    publisher              = 'Globe & Mail'
-    #byline { font-family: monospace; font-weight:bold; }
+    language               = 'en_CA'
-    #placeline {font-weight:bold;}
+    extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
    #credit {margin-top:0px;}
    .tag {font-size: 22pt;}'''
    description = 'Canada\'s national newspaper'
    keep_only_tags = [dict(name='article')]
    remove_tags = [dict(name='aside'),
                   dict(name='footer'),
                   dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
                   dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
                  ]
    feeds = [
            (u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
            (u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
            (u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
            (u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
            (u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
            (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
            (u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
            (u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
            (u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
            (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
            (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
            (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
            (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
            (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
            (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
            (u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
            ]
-    def get_article_url(self, article):
+    feeds          = [
-        url = BasicNewsRecipe.get_article_url(self, article)
+      (u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
-        if '/video/' not in url:
+      (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
-            return url
+      (u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
      (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
      (u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
      (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
      (u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
      (u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
      (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
      (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
      (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
      (u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
      (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
    ]
    keep_only_tags = [
      dict(name='h1'),
      dict(name='h2', attrs={'id':'articletitle'}),
      dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
      dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
      dict(name='id', attrs={'class':'article'}),
      dict(name='table', attrs={'class':'todays-market'}),
      dict(name='header', attrs={'id':'leadheader'})
    ]
    remove_tags = [
      dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
    ]
    #this has to be here or the text in the article appears twice.
    remove_tags_after = [dict(id='article')]
    #Use the mobile version rather than the web version
    def print_version(self, url):
        return url + '&service=mobile'
--- a/resources/recipes/globes_co_il.recipe
+++ b/resources/recipes/globes_co_il.recipe
@ -0,0 +1,47 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import re
 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    description   = 'This is Globes.co.il.'
    cover_url      = 'http://www.the7eye.org.il/SiteCollectionImages/BAKTANA/arye_avnery_010709_377.jpg'
    title          = u'Globes'
    language              = 'he'
    __author__ = 'marbs'
    extra_css='img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }'
    simultaneous_downloads = 5
    remove_javascript     = True
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1
    max_articles_per_feed = 100
    remove_attributes = ['width','style']
    feeds          = [(u'שוק ההון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585'),
                           (u'נדל"ן', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607'),
                           (u'וול סטריט ושווקי העולם', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225'),
                           (u'ניתוח טכני', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1294'),
                           (u'היי טק', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594'),
                           (u'נתח שוק וצרכנות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821'),
                           (u'דין וחשבון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829'),
                           (u'רכב', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220'),
                           (u'דעות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845'),
                           (u'קניון המניות - טור שבועי', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3175'),
                           (u'סביבה', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3221')]
    def print_version(self, url):
        split1 = url.split("=")
        print_url = 'http://www.globes.co.il/serve/globes/printwindow.asp?did=' + split1[1]
        return print_url
    def preprocess_html(self, soup):
        soup.find('tr',attrs={'bgcolor':'black'}).findPrevious('tr').extract()
        soup.find('tr',attrs={'bgcolor':'black'}).extract()
        return soup
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("■","■",string)
        return fixed
--- a/resources/recipes/handelsblatt.recipe
+++ b/resources/recipes/handelsblatt.recipe
@ -0,0 +1,41 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Handelsblatt(BasicNewsRecipe):
    title          = u'Handelsblatt'
    __author__ = 'malfi'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
    language = 'de'
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
    remove_tags    = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
    feeds          = [
                        (u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
                        (u'Handelsblatt Top-Themen',u'http://www.handelsblatt.com/rss/top-themen'),
                        (u'Handelsblatt Schlagzeilen',u'http://www.handelsblatt.com/rss/ticker/'),
                        (u'Handelsblatt Finanzen',u'http://www.handelsblatt.com/rss/finanzen/'),
                        (u'Handelsblatt Unternehmen',u'http://www.handelsblatt.com/rss/unternehmen/'),
                        (u'Handelsblatt Politik',u'http://www.handelsblatt.com/rss/politik/'),
                        (u'Handelsblatt Technologie',u'http://www.handelsblatt.com/rss/technologie/'),
                        (u'Handelsblatt Meinung',u'http://www.handelsblatt.com/rss/meinung'),
                        (u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
                        (u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
                     ]
    extra_css = '''
        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
        '''
    def print_version(self, url):
         m = re.search('(?<=;)[0-9]*', url)
         return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'
--- a/resources/recipes/hannoversche_zeitung.recipe
+++ b/resources/recipes/hannoversche_zeitung.recipe
@ -0,0 +1,38 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1287519083(BasicNewsRecipe):
    title          = u'Hannoversche Allgemeine Zeitung'
    oldest_article = 1
    __author__ = 'Artemis'
    max_articles_per_feed = 30
    language = 'de'
    no_stylesheets = True
    feeds = [
 	#(u'Schlagzeilen', u'http://www.haz.de/rss/feed/haz_schlagzeilen'),
 	(u'Politik', u'http://www.haz.de/rss/feed/haz_politik'),
 	(u'Wirtschaft', u'http://www.haz.de/rss/feed/haz_wirtschaft'),
 	(u'Panorama', u'http://www.haz.de/rss/feed/haz_panorama'),
 	(u'Wissen', u'http://www.haz.de/rss/feed/haz_wissen'),
 	(u'Kultur', u'http://www.haz.de/rss/feed/haz_kultur'),
 	(u'Sp\xe4tvorstellung', u'http://www.haz.de/rss/feed/haz_spaetvorstellung'),
 	(u'Hannover & Region', u'http://www.haz.de/rss/feed/haz_hannoverregion'),
 	(u'Netzgefl\xfcster', u'http://www.haz.de/rss/feed/haz_netzgefluester'),
 	(u'Meinung', u'http://www.haz.de/rss/feed/haz_meinung'),
 	(u'ZiSH', u'http://www.haz.de/rss/feed/haz_zish'),
 	(u'Medien', u'http://www.haz.de/rss/feed/haz_medien'),
 	#(u'Sport', u'http://www.haz.de/rss/feed/haz_sport'),
 	#(u'Hannover 96', u'http://www.haz.de/rss/feed/haz_hannover96')
 	]
    remove_tags_before =dict(id='modul_artikel')
    remove_tags_after =dict(id='articlecontent')
    remove_tags = dict(id='articlesidebar')
    remove_tags = [
 	dict(name='div', attrs={'class':['articlecomment',
 	'articlebookmark', 'teaser_anzeige', 'teaser_umfrage',
 	'navigation', 'subnavigation']})
 	]
--- a/resources/recipes/irish_times.recipe
+++ b/resources/recipes/irish_times.recipe
@ -13,7 +13,6 @@ class IrishTimes(BasicNewsRecipe):
    language = 'en_IE'
    timefmt = ' (%A, %B %d, %Y)'
    oldest_article = 3
    no_stylesheets = True
    simultaneous_downloads= 1
@ -33,13 +32,13 @@ class IrishTimes(BasicNewsRecipe):
                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
                    ]
    def print_version(self, url):
-        if url.count('rss.feedsportal.com'):
+         if url.count('rss.feedsportal.com'):
-            u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
+            u = 'http://www.irishtimes.com' + \
-        else:
+                     (((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html')
-            u = url.replace('.html','_pf.html')
+         else:
-        return u
+             u = url.replace('.html','_pf.html')
         return u
    def get_article_url(self, article):
        return article.link
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@ -0,0 +1,26 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.jiji.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class JijiDotCom(BasicNewsRecipe):
    title          = u'\u6642\u4e8b\u901a\u4fe1'
    __author__     = 'Hiroshi Miura'
    description    = 'World News from Jiji Press'
    publisher      = 'Jiji Press Ltd.'
    category       = 'news'
    encoding       = 'utf-8'
    oldest_article = 6
    max_articles_per_feed = 100
    language       = 'ja'
    cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
    masthead_url    = 'http://jen.jiji.com/images/logo_jijipress.gif'
    feeds          = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
    remove_tags_after = dict(id="ad_google")
--- a/resources/recipes/la_diaria.recipe
+++ b/resources/recipes/la_diaria.recipe
@ -0,0 +1,48 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 ladiaria.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'La Diaria'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['article'])]
    remove_tags = [
             dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}),
             dict(name='div', attrs={'id':'discussion'}),
             dict(name=['object','link'])
                  ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Articulos', u'http://ladiaria.com/feeds/articulos')
        ]
    def get_cover_url(self):
        return 'http://ladiaria.com/edicion/imagenportada/'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/la_jornada.recipe
+++ b/resources/recipes/la_jornada.recipe
@ -54,10 +54,7 @@ class LaJornada_mx(BasicNewsRecipe):
    preprocess_regexps = [
                          (re.compile(  r'<div class="inicial">(.*)</div><p class="s-s">'
                                       ,re.DOTALL|re.IGNORECASE)
-                                       ,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">'),
+                                       ,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
                          (re.compile(  r'<q>(.*?)</q>'
                                       ,re.DOTALL|re.IGNORECASE)
                                       ,lambda match: '"' + match.group(1) + '"')
                         ]
    keep_only_tags = [
--- a/resources/recipes/la_razon_bo.recipe
+++ b/resources/recipes/la_razon_bo.recipe
@ -8,7 +8,7 @@ from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class LaRazon_Bol(BasicNewsRecipe):
-    title                 = 'La Razón - Bolivia'
+    title                 = u'La Razón - Bolivia'
    __author__            = 'Darko Miletic'
    description           = 'El diario nacional de Bolivia'
    publisher             = 'Praxsis S.R.L.'
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -20,11 +20,14 @@ class Lanacion(BasicNewsRecipe):
    publication_type      = 'newspaper'
    remove_empty_feeds    = True    
    masthead_url          = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
-    extra_css             = """ h1{font-family: Georgia,serif} 
+    extra_css             = """ h1{font-family: Georgia,serif}
                                h2{color: #626262}    
                                body{font-family: Arial,sans-serif} 
-                                img{margin-top: 0.5em; margin-bottom: 0.2em} 
+                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
                                .notaFecha{color: #808080}                                
                                .notaEpigrafe{font-size: x-small} 
-                                .topNota h1{font-family: Arial,sans-serif} """
+                                .topNota h1{font-family: Arial,sans-serif} 
                            """
    conversion_options = {
@ -38,12 +41,12 @@ class Lanacion(BasicNewsRecipe):
    remove_tags = [
                     dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
                    ,dict(name='ul'  , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']})
-                    ,dict(name='div' , attrs={'class':'cajaHerramientas noprint'        })
+                    ,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix']   })
-                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix']})
+                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
-                    ,dict(name=['iframe','embed','object','form','base','hr'])
+                    ,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
                  ]
    remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})                
-    remove_attributes = ['height','width','visible']
+    remove_attributes = ['height','width','visible','onclick','data-count','name']
    feeds          = [
                         (u'Ultimas noticias'     , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2'         )
--- a/resources/recipes/mainichi.recipe
+++ b/resources/recipes/mainichi.recipe
@ -0,0 +1,26 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.mainichi.jp
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MainichiDailyNews(BasicNewsRecipe):
    title          = u'\u6bce\u65e5\u65b0\u805e'
    __author__     = 'Hiroshi Miura'
    oldest_article = 2
    max_articles_per_feed = 20
    description    = 'Japanese traditional newspaper Mainichi Daily News'
    publisher      = 'Mainichi Daily News'
    category       = 'news, japan'
    language       = 'ja'
    feeds          = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags = [{'class':"RelatedArticle"}]
    remove_tags_after = {'class':"Credit"}
--- a/resources/recipes/mainichi_it_news.recipe
+++ b/resources/recipes/mainichi_it_news.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MainichiDailyITNews(BasicNewsRecipe):
    title          = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
    __author__     = 'Hiroshi Miura'
    oldest_article = 2
    max_articles_per_feed = 100
    description    = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
    publisher      = 'Mainichi Daily News'
    category       = 'news, Japan, IT, Electronics'
    language       = 'ja'
    feeds          = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags = [{'class':"RelatedArticle"}]
    remove_tags_after = {'class':"Credit"}
--- a/resources/recipes/marctv.recipe
+++ b/resources/recipes/marctv.recipe
@ -0,0 +1,35 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Fetch MarcTV.
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MarcTVde(BasicNewsRecipe):
    title = 'Marc Toensings Visionen'
    description = 'Marc Toensings Visionen'
    language = 'de'
    __author__ = 'Marc Toensing'
    max_articles_per_feed = 40
    oldest_article = 665
    use_embedded_content = False
    remove_tags = []
    keep_only_tags = dict(name='div', attrs={'class':["content"]})
    feeds          = [(u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), (u'Leben', u'http://feeds.feedburner.com/marctv/leben'), (u'Medien', u'http://feeds.feedburner.com/marctv/medien')]
    extra_css = '.#wrapper .entry p img{width:620px; height: 270px;}'
    def get_cover_url(self):
            return 'http://marctv.de/marctv.png'
--- a/resources/recipes/matichon.recipe
+++ b/resources/recipes/matichon.recipe
@ -0,0 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1290412756(BasicNewsRecipe):
    __author__ = 'Anat R.'
    title          = u'Matichon'
    oldest_article = 7
    language = 'th'
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_javascript = True
    use_embedded_content = False
    feeds          = [(u'News', u'http://www.matichon.co.th/rss/news_article.xml'),
    (u'Columns', u'http://www.matichon.co.th/rss/news_columns.xml'),
    (u'Politics', u'http://www.matichon.co.th/rss/news_politic.xml'),
    (u'Business', u'http://www.matichon.co.th/rss/news_business.xml'),
    (u'World', u'http://www.matichon.co.th/rss/news_world.xml'),
    (u'Sports', u'http://www.matichon.co.th/rss/news_sport.xml'),
    (u'Entertainment', u'http://www.matichon.co.th/rss/news_entertainment.xml')]
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'h3', attrs = {'class' : 'read-h'}))
    keep_only_tags.append(dict(name = 'p', attrs = {'class' : 'read-time'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-content'}))
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@ -3,13 +3,28 @@ __copyright__ = '2010, Eddie Lau'
 '''
 modified from Singtao Toronto calibre recipe by rty
 Change Log:
 2010/11/22: add English section, remove eco-news section which is not updated daily, correct
            ordering of articles
 2010/11/12: add news image and eco-news section
 2010/11/08: add parsing of finance section
 2010/11/06: temporary work-around for Kindle device having no capability to display unicode
            in section/article list.
 2010/10/31: skip repeated articles in section pages
 '''
-import datetime
+import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
-class AdvancedUserRecipe1278063072(BasicNewsRecipe):
+
 from calibre import __appname__, strftime
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.date import now as nowf
 class MPHKRecipe(BasicNewsRecipe):
    title          = 'Ming Pao - Hong Kong'
    oldest_article = 1
    max_articles_per_feed = 100
@ -24,27 +39,131 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
    encoding = 'Big5-HKSCS'
    recursions = 0
    conversion_options = {'linearize_tables':True}
    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;}'
    #extra_css = 'img {float:right; margin:4px;}'
    masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
    keep_only_tags = [dict(name='h1'),
                      #dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page
                      dict(attrs={'class':['photo']}),
                      dict(attrs={'id':['newscontent']}),
                      dict(attrs={'id':['newscontent01','newscontent02']})]
    remove_tags = [dict(name='style'),
                   dict(attrs={'id':['newscontent135']})]  # for the finance page
    remove_attributes = ['width']
    preprocess_regexps = [
                            (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
                            lambda match: '<h1>'),
                            (re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
                            lambda match: '</h1>'),
                         ]
    def image_url_processor(cls, baseurl, url):
        # trick: break the url at the first occurance of digit, add an additional
        # '_' at the front
        # not working, may need to move this to preprocess_html() method
        #minIdx = 10000
        #i0 = url.find('0')
        #if i0 >= 0 and i0 < minIdx:
        #   minIdx = i0
        #i1 = url.find('1')
        #if i1 >= 0 and i1 < minIdx:
        #   minIdx = i1
        #i2 = url.find('2')
        #if i2 >= 0 and i2 < minIdx:
        #   minIdx = i2
        #i3 = url.find('3')
        #if i3 >= 0 and i0 < minIdx:
        #   minIdx = i3
        #i4 = url.find('4')
        #if i4 >= 0 and i4 < minIdx:
        #   minIdx = i4
        #i5 = url.find('5')
        #if i5 >= 0 and i5 < minIdx:
        #   minIdx = i5
        #i6 = url.find('6')
        #if i6 >= 0 and i6 < minIdx:
        #   minIdx = i6
        #i7 = url.find('7')
        #if i7 >= 0 and i7 < minIdx:
        #   minIdx = i7
        #i8 = url.find('8')
        #if i8 >= 0 and i8 < minIdx:
        #   minIdx = i8
        #i9 = url.find('9')
        #if i9 >= 0 and i9 < minIdx:
        #   minIdx = i9
        #return url[0:minIdx] + '_' + url[minIdx+1:]
        return url
    def get_fetchdate(self):
        dt_utc = datetime.datetime.utcnow()
-        # convert UTC to local hk time - at around HKT 5.30am, all news are available
+        # convert UTC to local hk time - at around HKT 6.00am, all news are available
-        dt_local = dt_utc - datetime.timedelta(-2.5/24)
+        dt_local = dt_utc - datetime.timedelta(-2.0/24)
        return dt_local.strftime("%Y%m%d")
    def parse_index(self):
-        feeds = []
+            feeds = []
-        dateStr = self.get_fetchdate()
+            dateStr = self.get_fetchdate()
-        for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
+            for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
-            articles = self.parse_section(url)
+                               (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
-            if articles:
+                               (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                feeds.append((title, articles))
+                               (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
                               (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                               (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
                               (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
                               ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
                               (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
                               (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                               (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                articles = self.parse_section(url)
                if articles:
                    feeds.append((title, articles))
            # special - finance
            fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
            if fin_articles:
                feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
            # special - eco-friendly
            # eco_articles = self.parse_eco_section('http://tssl.mingpao.com/htm/marketing/eco/cfm/Eco1.cfm')
            # if eco_articles:
            #   feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
            # special - entertainment
            #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
            #if ent_articles:
            #   feeds.append(('Entertainment', ent_articles))
            return feeds
    def parse_section(self, url):
            dateStr = self.get_fetchdate()
            soup = self.index_to_soup(url)
            divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
            current_articles = []
            included_urls = []
            divs.reverse()
            for i in divs:
                a = i.find('a', href = True)
                title = self.tag_to_string(a)
                url = a.get('href', False)
                url = 'http://news.mingpao.com/' + dateStr + '/' +url
                if url not in included_urls and url.rfind('Redirect') == -1:
                    current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                    included_urls.append(url)
            current_articles.reverse()
            return current_articles
    def parse_fin_section(self, url):
        dateStr = self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href= True)
        current_articles = []
        for i in a:
            url = i.get('href', False)
            if not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
                title = self.tag_to_string(i)
                url = 'http://www.mpfinance.com/cfm/' +url
                current_articles.append({'title': title, 'url': url, 'description':''})
        return current_articles
    def parse_eco_section(self, url):
        soup = self.index_to_soup(url)
        divs = soup.findAll(attrs={'class': ['bullet']})
        current_articles = []
@ -53,9 +172,162 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
            a = i.find('a', href = True)
            title = self.tag_to_string(a)
            url = a.get('href', False)
-            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
-            if url not in included_urls:
+            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':''})
                included_urls.append(url)
        return current_articles
    #def parse_ent_section(self, url):
    #   dateStr = self.get_fetchdate()
    #   soup = self.index_to_soup(url)
    #   a = soup.findAll('a', href=True)
    #   current_articles = []
    #   included_urls = []
    #   for i in a:
    #       title = self.tag_to_string(i)
    #       url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
    #       if url not in included_urls and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1 and not title == '':
    #           current_articles.append({'title': title, 'url': url, 'description': ''})
    #   return current_articles
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll(style=True):
            del item['width']
        for item in soup.findAll(stype=True):
            del item['absmiddle']
        return soup
    def create_opf(self, feeds, dir=None):
        #super(MPHKRecipe,self).create_opf(feeds, dir)
        if dir is None:
            dir = self.output_dir
        title = self.short_title()
        if self.output_profile.periodical_date_in_title:
            title += strftime(self.timefmt)
        mi = MetaInformation(title, [__appname__])
        mi.publisher = __appname__
        mi.author_sort = __appname__
        mi.publication_type = self.publication_type+':'+self.short_title()
        mi.timestamp = nowf()
        mi.comments = self.description
        if not isinstance(mi.comments, unicode):
            mi.comments = mi.comments.decode('utf-8', 'replace')
        mi.pubdate = nowf()
        opf_path = os.path.join(dir, 'index.opf')
        ncx_path = os.path.join(dir, 'index.ncx')
        opf = OPFCreator(dir, mi)
        # Add mastheadImage entry to <guide> section
        mp = getattr(self, 'masthead_path', None)
        if mp is not None and os.access(mp, os.R_OK):
            from calibre.ebooks.metadata.opf2 import Guide
            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
            ref.type = 'masthead'
            ref.title = 'Masthead Image'
            opf.guide.append(ref)
        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
        manifest.append(os.path.join(dir, 'index.html'))
        manifest.append(os.path.join(dir, 'index.ncx'))
        # Get cover
        cpath = getattr(self, 'cover_path', None)
        if cpath is None:
            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
            if self.default_cover(pf):
                cpath =  pf.name
        if cpath is not None and os.access(cpath, os.R_OK):
            opf.cover = cpath
            manifest.append(cpath)
        # Get masthead
        mpath = getattr(self, 'masthead_path', None)
        if mpath is not None and os.access(mpath, os.R_OK):
            manifest.append(mpath)
        opf.create_manifest_from_files_in(manifest)
        for mani in opf.manifest:
            if mani.path.endswith('.ncx'):
                mani.id = 'ncx'
            if mani.path.endswith('mastheadImage.jpg'):
                mani.id = 'masthead-image'
        entries = ['index.html']
        toc = TOC(base_path=dir)
        self.play_order_counter = 0
        self.play_order_map = {}
        def feed_index(num, parent):
            f = feeds[num]
            for j, a in enumerate(f):
                if getattr(a, 'downloaded', False):
                    adir = 'feed_%d/article_%d/'%(num, j)
                    auth = a.author
                    if not auth:
                        auth = None
                    desc = a.text_summary
                    if not desc:
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
                                    play_order=po, author=auth, description=desc)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
                        relp = sp[len(prefix):]
                        entries.append(relp.replace(os.sep, '/'))
                        last = sp
                    if os.path.exists(last):
                        with open(last, 'rb') as fi:
                            src = fi.read().decode('utf-8')
                        soup = BeautifulSoup(src)
                        body = soup.find('body')
                        if body is not None:
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
                                            a.orig_url, __appname__, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
                            with open(last, 'wb') as fi:
                                fi.write(unicode(soup).encode('utf-8'))
        if len(feeds) == 0:
            raise Exception('All feeds are empty, aborting.')
        if len(feeds) > 1:
            for i, f in enumerate(feeds):
                entries.append('feed_%d/index.html'%i)
                po = self.play_order_map.get(entries[-1], None)
                if po is None:
                    self.play_order_counter += 1
                    po = self.play_order_counter
                auth = getattr(f, 'author', None)
                if not auth:
                    auth = None
                desc = getattr(f, 'description', None)
                if not desc:
                    desc = None
                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
                    f.title, play_order=po, description=desc, author=auth))
        else:
            entries.append('feed_%d/index.html'%0)
            feed_index(0, toc)
        for i, p in enumerate(entries):
            entries[i] = os.path.join(dir, p.replace('/', os.sep))
        opf.create_spine(entries)
        opf.set_toc(toc)
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/resources/recipes/montevideo_com.recipe
+++ b/resources/recipes/montevideo_com.recipe
@ -0,0 +1,56 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 http://www.montevideo.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Noticias(BasicNewsRecipe):
    title                 = 'Montevideo COMM'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf-8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['txt'])]
    remove_tags = [
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'),
           (u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'),
           (u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'),
           (u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'),
           # (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'),
           # (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'),
           (u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023')
        ]
    def get_cover_url(self):
 		return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/moscow_times.recipe
+++ b/resources/recipes/moscow_times.recipe
@ -1,31 +1,33 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
-moscowtimes.ru
+www.themoscowtimes.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Moscowtimes(BasicNewsRecipe):
-    title                 = u'The Moscow Times'
+    title                 = 'The Moscow Times'
    __author__            = 'Darko Miletic and Sujata Raman'
-    description           = 'News from Russia'
+    description           = 'The Moscow Times is a daily English-language newspaper featuring objective, reliable news on business, politics, sports and culture in Moscow, in Russia and the former Soviet Union (CIS).'
-    language = 'en'
+    category              = 'Russia, Moscow, Russian news, Moscow news, Russian newspaper, daily news, independent news, reliable news, USSR, Soviet Union, CIS, Russian politics, Russian business, Russian culture, Russian opinion, St Petersburg, Saint Petersburg'
-    lang = 'en'
+    publisher             = 'The Moscow Times'
-    oldest_article        = 7
+    language              = 'en'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    #encoding = 'utf-8'
+    remove_empty_feeds    = True
-    encoding =  'cp1252'
+    encoding              = 'cp1251'
-    remove_javascript = True
+    masthead_url          = 'http://www.themoscowtimes.com/bitrix/templates/tmt/img/logo.gif'
    publication_type      = 'newspaper'
    conversion_options = {
-          'comment'          : description
+                          'comment'   : description
-        , 'language'         : lang
+                        , 'tags'      : category
-    }
+                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    extra_css      = '''
                        h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large}
@ -35,39 +37,37 @@ class Moscowtimes(BasicNewsRecipe):
                        .text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
                        '''
    feeds          = [
-                        (u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'),
+                         (u'Top Stories'   , u'http://www.themoscowtimes.com/rss/top'     )
-                        (u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'),
+                        ,(u'Current Issue' , u'http://www.themoscowtimes.com/rss/issue'   )
-                        (u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'),
+                        ,(u'News'          , u'http://www.themoscowtimes.com/rss/news'    )
-                        (u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'),
+                        ,(u'Business'      , u'http://www.themoscowtimes.com/rss/business')
-                        (u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'),
+                        ,(u'Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'     )
-                        (u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion')
+                        ,(u'Opinion'       , u'http://www.themoscowtimes.com/rss/opinion' )
                     ]
-    keep_only_tags = [
+    keep_only_tags = [dict(name='div', attrs={'id':'content'})]
                        dict(name='div', attrs={'class':['newstextblock']})
                    ]
    remove_tags    = [
-                        dict(name='div', attrs={'class':['photo_nav']})
+                       dict(name='div', attrs={'class':['photo_nav','phototext']})
-                    ]
+                      ,dict(name=['iframe','meta','base','link','embed','object'])
-
+                     ]
    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
+        for lnk in soup.findAll('a'):
-        soup.html['lang']     = self.lang
+            if lnk.string is not None:
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
+               ind = self.tag_to_string(lnk)
-        soup.head.insert(0,mtag)
+               lnk.replaceWith(ind)
-
+        return soup
        return self.adeify_images(soup)
    def print_version(self, url):        
        return url.replace('.themoscowtimes.com/','.themoscowtimes.com/print/')
    def get_cover_url(self):
-
+        cover_url = None
        href =  'http://www.themoscowtimes.com/pdf/'
-
+        soup = self.index_to_soup(href)        
        soup = self.index_to_soup(href)
        div = soup.find('div',attrs={'class':'left'})
-        a = div.find('a')
+        if div:
-        print a
+            a = div.find('a')
-        if a :
+            if a :
-           cover_url = a.img['src']
+               cover_url = 'http://www.themoscowtimes.com' + a.img['src']
        return cover_url
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@ -0,0 +1,24 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 sankei.jp.msn.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MSNSankeiNewsProduct(BasicNewsRecipe):
    title          = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)'
    __author__      = 'Hiroshi Miura'
    description     = 'Products release from Japan'
    oldest_article = 7
    max_articles_per_feed = 100
    encoding       = 'Shift_JIS'
    language       = 'ja'
    feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
    remove_tags_before = dict(id="__r_article_title__")
    remove_tags_after  = dict(id="ajax_release_news")
    remove_tags = [{'class':"parent chromeCustom6G"}]
--- a/resources/recipes/newsweek_polska.recipe
+++ b/resources/recipes/newsweek_polska.recipe
@ -0,0 +1,68 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 class Newsweek(BasicNewsRecipe):
 	EDITION = 0
 	title = u'Newsweek Polska'
 	__author__ = 'Mateusz Kielar'
 	description = 'Weekly magazine'
 	encoding = 'utf-8'
 	no_stylesheets = True
 	language = 'en'
 	remove_javascript = True
 	keep_only_tags =[]
 	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}))
 	remove_tags =[]
 	remove_tags.append(dict(name = 'div', attrs = {'class' : 'copy'}))
 	remove_tags.append(dict(name = 'div', attrs = {'class' : 'url'}))
 	extra_css = '''
 					.body {font-size: small}
 					.author {font-size: x-small}
 					.lead {font-size: x-small}
 					.title{font-size: x-large; font-weight: bold}
 					'''
 	def print_version(self, url):
 		return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
 	def find_last_full_issue(self):
 		page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
 		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
 		page = self.index_to_soup(issue)
 		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
 		page = self.index_to_soup(issue)
 		self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
 	def parse_index(self):
 		self.find_last_full_issue()
 		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
 		img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
 		self.cover_url = img['src']
 		feeds = []
 		parent = soup.find(id='content-left-big')
 		for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
 			section = self.tag_to_string(txt).capitalize()
 			articles = list(self.find_articles(txt))
 			feeds.append((section, articles))
 		return feeds
 	def find_articles(self, txt):
 		for a in txt.findAllNext( attrs={'class':['strong','hr']}):
 			if a.name in "div":
 				break
 			yield {
 				'title' : self.tag_to_string(a),
 				'url'   : 'http://www.newsweek.pl'+a['href'],
 				'date'  : '',
 				'description' : ''
 				}
--- a/resources/recipes/nikkei_free.recipe
+++ b/resources/recipes/nikkei_free.recipe
@ -0,0 +1,60 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class NikkeiNet(BasicNewsRecipe):
    title          = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)'
    __author__     = 'Hiroshi Miura'
    description    = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    oldest_article = 2
    max_articles_per_feed = 20
    language       = 'ja'
    feeds          =  [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
 		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
 		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
 		 (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
 		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
 		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
 		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
 		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
 		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
 		 (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
 		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
 		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
 		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
 		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
 		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
 		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
 		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
 		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
 		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
 		 (u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
 		 (u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
 		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
 		 (u'\u4f1a\u898b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
 		 (u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
 		 (u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
 		]
    remove_tags_before = dict(id="CONTENTS")
    remove_tags = [
                   dict(name="form"),
                   {'class':"cmn-hide"},
                  ]
    remove_tags_after = {'class':"cmn-pr_list"}
--- a/resources/recipes/nikkei_sub.recipe
+++ b/resources/recipes/nikkei_sub.recipe
@ -0,0 +1,125 @@
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_subscription(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
    feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
 		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
 		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
 		 (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
 		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
 		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
 		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
 		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
 		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
 		 (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
 		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
 		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
 		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
 		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
 		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
 		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
 		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
 		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
 		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
 		 (u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
 		 (u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
 		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
 		 (u'\u4f1a\u898b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
 		 (u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
 		 (u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
 		]
--- a/resources/recipes/nikkei_sub_economy.recipe
+++ b/resources/recipes/nikkei_sub_economy.recipe
@ -0,0 +1,109 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_economy(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [  (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
 		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
 		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
 		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
 		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
 		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
 		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
 		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/nikkei_sub_industry.recipe
+++ b/resources/recipes/nikkei_sub_industry.recipe
@ -0,0 +1,108 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_industory(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7523\u696d)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
 		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
 		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
 		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
 		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/nikkei_sub_life.recipe
+++ b/resources/recipes/nikkei_sub_life.recipe
@ -0,0 +1,109 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_life(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [  (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
 		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
 		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
 		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
 		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
 		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
 		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/nikkei_sub_main.recipe
+++ b/resources/recipes/nikkei_sub_main.recipe
@ -0,0 +1,102 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_main(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7dcf\u5408)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [ (u'NIKKEI', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/nikkei_sub_sports.recipe
+++ b/resources/recipes/nikkei_sub_sports.recipe
@ -0,0 +1,109 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_sports(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u30b9\u30dd\u30fc\u30c4)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba')
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/now_toronto.recipe
+++ b/resources/recipes/now_toronto.recipe
@ -0,0 +1,36 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 #Based on Lars Jacob's Taz Digiabo recipe
 __license__   = 'GPL v3'
 __copyright__ = '2010, Starson17'
 import os, urllib2, zipfile
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
 class NowToronto(BasicNewsRecipe):
    title = u'Now Toronto'
    description = u'Now Toronto'
    __author__ = 'Starson17'
    language = 'en_CA'
    conversion_options = {
        'no_default_epub_cover' : True
    }
    def build_index(self):
        epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
        soup = self.index_to_soup(epub_feed)
        url = soup.find(name = 'feedburner:origlink').string
        f = urllib2.urlopen(url)
        tmp = PersistentTemporaryFile(suffix='.epub')
        self.report_progress(0,_('downloading epub'))
        tmp.write(f.read())
        tmp.close()
        zfile = zipfile.ZipFile(tmp.name, 'r')
        self.report_progress(0,_('extracting epub'))
        zfile.extractall(self.output_dir)
        tmp.close()
        index = os.path.join(self.output_dir, 'content.opf')
        self.report_progress(1,_('epub downloaded and extracted'))
        return index
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -7,14 +7,22 @@ nytimes.com
 '''
 import re, string, time
 from calibre import entity_to_unicode, strftime
 from datetime import timedelta, date
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
 class NYTimes(BasicNewsRecipe):
-    # set headlinesOnly to True for the headlines-only version
+    # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
    headlinesOnly = True
    # set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
    # number of days old an article can be for inclusion. If oldest_article = 0 all articles
    # will be included. Note: oldest_article is ignored if webEdition = False
    webEdition = False
    oldest_article = 7
    # includeSections: List of sections to include. If empty, all sections found will be included.
    # Otherwise, only the sections named will be included. For example,
    #
@ -39,20 +47,76 @@ class NYTimes(BasicNewsRecipe):
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
    # and shown in their original location.
-    one_picture_per_article = True
+    one_picture_per_article = False
    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 100
    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
    # more than one section). If True, only the first occurance will be downloaded.
    filterDuplicates = True
    # Sections to collect for the Web edition.
    # Delete any you don't want, or use includeSections or excludeSections
    web_sections = [(u'World',u'world'),
                    (u'U.S.',u'national'),
                    (u'Politics',u'politics'),
                    (u'New York',u'nyregion'),
                    (u'Business','business'),
                    (u'Technology',u'technology'),
                    (u'Sports',u'sports'),
                    (u'Science',u'science'),
                    (u'Health',u'health'),
                    (u'Opinion',u'opinion'),
                    (u'Arts',u'arts'),
                    (u'Books',u'books'),
                    (u'Movies',u'movies'),
                    (u'Music',u'arts/music'),
                    (u'Television',u'arts/television'),
                    (u'Style',u'style'),
                    (u'Dining & Wine',u'dining'),
                    (u'Fashion & Style',u'fashion'),
                    (u'Home & Garden',u'garden'),
                    (u'Travel',u'travel'),
                    ('Education',u'education'),
                    ('Multimedia',u'multimedia'),
                    (u'Obituaries',u'obituaries'),
                    (u'Sunday Magazine',u'magazine'),
                    (u'Week in Review',u'weekinreview')]
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
        needs_subscription = False
    elif webEdition:
        title='New York Times (Web)'
        description = 'New York Times on the Web'
        needs_subscription = True
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
        needs_subscription = True
    month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
    def decode_us_date(self,datestr):
        udate = datestr.strip().lower().split()
        try:
            m = self.month_list.index(udate[0])+1
        except:
            return date.today()
        d = int(udate[1])
        y = int(udate[2])
        try:
            d = date(y,m,d)
        except:
            d = date.today
        return d
    earliest_date = date.today() - timedelta(days=oldest_article)
    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
@ -136,6 +200,12 @@ class NYTimes(BasicNewsRecipe):
                .image {text-align: center;}
                .source {text-align: left; }'''
    articles = {}
    key = None
    ans = []
    url_list = []
    def filter_ans(self, ans) :
        total_article_count = 0
        idx = 0
@ -164,6 +234,29 @@ class NYTimes(BasicNewsRecipe):
        self.log( "Queued %d articles" % total_article_count )
        return ans
    def exclude_url(self,url):
        if not url.startswith("http"):
            return True
        if not url.endswith(".html"):
            return True
        if 'nytimes.com' not in url:
            return True
        if 'podcast' in url:
            return True
        if '/video/' in url:
            return True
        if '/slideshow/' in url:
            return True
        if '/magazine/index' in url:
            return True
        if '/interactive/' in url:
            return True
        if '/reference/' in url:
            return True
        if '/premium/' in url:
            return True
        return False
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
@ -249,7 +342,6 @@ class NYTimes(BasicNewsRecipe):
            return BeautifulSoup(_raw, markupMassage=massage)
        # Entry point
        print "index_to_soup()"
        soup = get_the_soup( self.encoding, url_or_raw )
        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
@ -273,83 +365,110 @@ class NYTimes(BasicNewsRecipe):
        else:
            return description
-    def parse_todays_index(self):
+    def feed_title(self,div):
        return ''.join(div.findAll(text=True, recursive=True)).strip()
-        def feed_title(div):
+    def handle_article(self,div):
-            return ''.join(div.findAll(text=True, recursive=True)).strip()
+        thumbnail = div.find('div','thumbnail')
-
+        if thumbnail:
-        articles = {}
+            thumbnail.extract()
-        key = None
+        a = div.find('a', href=True)
-        ans = []
+        if not a:
-        url_list = []
+            return
-
+        url = re.sub(r'\?.*', '', a['href'])
-        def handle_article(div):
+        if self.exclude_url(url):
-            a = div.find('a', href=True)
+            return
-            if not a:
+        url += '?pagewanted=all'
        if self.filterDuplicates:
            if url in self.url_list:
                return
-            url = re.sub(r'\?.*', '', a['href'])
+        self.url_list.append(url)
-            if not url.startswith("http"):
+        title = self.tag_to_string(a, use_alt=True).strip()
-                return
+        description = ''
-            if not url.endswith(".html"):
+        pubdate = strftime('%a, %d %b')
-                return
+        summary = div.find(True, attrs={'class':'summary'})
-            if 'podcast' in url:
+        if summary:
-                return
+            description = self.tag_to_string(summary, use_alt=False)
-            if '/video/' in url:
+        author = ''
-                return
+        authorAttribution = div.find(True, attrs={'class':'byline'})
-            url += '?pagewanted=all'
+        if authorAttribution:
-            if url in url_list:
+            author = self.tag_to_string(authorAttribution, use_alt=False)
-                return
+        else:
            url_list.append(url)
            title = self.tag_to_string(a, use_alt=True).strip()
            description = ''
            pubdate = strftime('%a, %d %b')
            summary = div.find(True, attrs={'class':'summary'})
            if summary:
                description = self.tag_to_string(summary, use_alt=False)
            author = ''
            authorAttribution = div.find(True, attrs={'class':'byline'})
            if authorAttribution:
                author = self.tag_to_string(authorAttribution, use_alt=False)
-            else:
+        feed = self.key if self.key is not None else 'Uncategorized'
-                authorAttribution = div.find(True, attrs={'class':'byline'})
+        if not self.articles.has_key(feed):
-                if authorAttribution:
+            self.ans.append(feed)
-                    author = self.tag_to_string(authorAttribution, use_alt=False)
+            self.articles[feed] = []
-            feed = key if key is not None else 'Uncategorized'
+        self.articles[feed].append(
-            if not articles.has_key(feed):
+                        dict(title=title, url=url, date=pubdate,
-                ans.append(feed)
+                            description=description, author=author,
-                articles[feed] = []
+                            content=''))
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
                                description=description, author=author,
                                content=''))
    def parse_web_edition(self):
        for (sec_title,index_url) in self.web_sections:
            if self.includeSections != []:
                if sec_title not in self.includeSections:
                    print "SECTION NOT INCLUDED: ",sec_title
                    continue
            if sec_title in self.excludeSections:
                print "SECTION EXCLUDED: ",sec_title
                continue
            print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
            soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
            self.key = sec_title
            # Find each article
            for div in soup.findAll(True,
                attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
                if div['class'] in ['story', 'story headline'] :
                    self.handle_article(div)
                elif div['class'] == 'headlinesOnly multiline flush':
                    for lidiv in div.findAll('li'):
                        self.handle_article(lidiv)
        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
        return self.filter_ans(self.ans)
    def parse_todays_index(self):
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-
+        skipping = False
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
            if div['class'] in ['section-headline','sectionHeader']:
-                key = string.capwords(feed_title(div))
+                self.key = string.capwords(self.feed_title(div))
-                key = key.replace('Op-ed','Op-Ed')
+                self.key = self.key.replace('Op-ed','Op-Ed')
-                key = key.replace('U.s.','U.S.')
+                self.key = self.key.replace('U.s.','U.S.')
                self.key = self.key.replace('N.y.','N.Y.')
                skipping = False
                if self.includeSections != []:
                    if self.key not in self.includeSections:
                        print "SECTION NOT INCLUDED: ",self.key
                        skipping = True
                if self.key in self.excludeSections:
                    print "SECTION EXCLUDED: ",self.key
                    skipping = True
            elif div['class'] in ['story', 'story headline'] :
-                handle_article(div)
+                if not skipping:
                    self.handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
-                    handle_article(lidiv)
+                    if not skipping:
                        self.handle_article(lidiv)
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(ans)
+        return self.filter_ans(self.ans)
    def parse_headline_index(self):
        articles = {}
        ans = []
        url_list = []
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
        # Fetch the content table
@ -363,15 +482,24 @@ class NYTimes(BasicNewsRecipe):
        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
            for div_sec in td_col.findAll('div',recursive=False):
                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
                    section_name = re.sub(r'^ *$','',section_name)
                    if section_name == '':
                        continue
                    if self.includeSections != []:
                        if section_name not in self.includeSections:
                            print "SECTION NOT INCLUDED: ",section_name
                            continue
                    if section_name in self.excludeSections:
                        print "SECTION EXCLUDED: ",section_name
                        continue
                    section_name=string.capwords(section_name)
-                    if section_name == 'U.s.':
+                    section_name = section_name.replace('Op-ed','Op-Ed')
-                       section_name = 'U.S.'
+                    section_name = section_name.replace('U.s.','U.S.')
-                    elif section_name == 'Op-ed':
+                    section_name = section_name.replace('N.y.','N.Y.')
                       section_name = 'Op-Ed'
                    pubdate = strftime('%a, %d %b')
                    search_div = div_sec
@ -392,37 +520,32 @@ class NYTimes(BasicNewsRecipe):
                        if not a:
                            continue
                        url = re.sub(r'\?.*', '', a['href'])
-                        if not url.startswith("http"):
+                        if self.exclude_url(url):
                            continue
                        if not url.endswith(".html"):
                            continue
                        if 'podcast' in url:
                            continue
                        if 'video' in url:
                            continue
                        url += '?pagewanted=all'
-                        if url in url_list:
+                        if self.filterDuplicates:
-                            continue
+                            if url in self.url_list:
-                        url_list.append(url)
+                                continue
-                        self.log("URL %s" % url)
+                        self.url_list.append(url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
-                        if not articles.has_key(section_name):
+                        if not self.articles.has_key(section_name):
-                            ans.append(section_name)
+                            self.ans.append(section_name)
-                            articles[section_name] = []
+                            self.articles[section_name] = []
-                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+                        self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
-
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return self.filter_ans(self.ans)
        return self.filter_ans(ans)
    def parse_index(self):
        if self.headlinesOnly:
            return self.parse_headline_index()
        elif self.webEdition:
            return self.parse_web_edition()
        else:
            return self.parse_todays_index()
@ -438,6 +561,21 @@ class NYTimes(BasicNewsRecipe):
    def preprocess_html(self, soup):
        if self.webEdition & (self.oldest_article>0):
            date_tag = soup.find(True,attrs={'class': ['dateline','date']})
            if date_tag:
                date_str = self.tag_to_string(date_tag,use_alt=False)
                date_str = date_str.replace('Published:','')
                date_items = date_str.split(',')
                try:
                    datestring = date_items[0]+' '+date_items[1]
                    article_date = self.decode_us_date(datestring)
                except:
                    article_date = date.today()
                if article_date < self.earliest_date:
                    self.log("Skipping article dated %s" % date_str)
                    return None
        kicker_tag = soup.find(attrs={'class':'kicker'})
        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
@ -462,7 +600,6 @@ class NYTimes(BasicNewsRecipe):
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
                    # Move firstImg before article body
                    #article_body = soup.find(True, {'id':'articleBody'})
                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
@ -548,4 +685,3 @@ class NYTimes(BasicNewsRecipe):
            divTag.replaceWith(tag)
        return soup
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -7,14 +7,22 @@ nytimes.com
 '''
 import re, string, time
 from calibre import entity_to_unicode, strftime
 from datetime import timedelta, date
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
 class NYTimes(BasicNewsRecipe):
-    # set headlinesOnly to True for the headlines-only version
+    # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
    headlinesOnly = False
    # set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
    # number of days old an article can be for inclusion. If oldest_article = 0 all articles
    # will be included. Note: oldest_article is ignored if webEdition = False
    webEdition = False
    oldest_article = 7
    # includeSections: List of sections to include. If empty, all sections found will be included.
    # Otherwise, only the sections named will be included. For example,
    #
@ -39,20 +47,76 @@ class NYTimes(BasicNewsRecipe):
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
    # and shown in their original location.
-    one_picture_per_article = True
+    one_picture_per_article = False
    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 100
    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
    # more than one section). If True, only the first occurance will be downloaded.
    filterDuplicates = True
    # Sections to collect for the Web edition.
    # Delete any you don't want, or use includeSections or excludeSections
    web_sections = [(u'World',u'world'),
                    (u'U.S.',u'national'),
                    (u'Politics',u'politics'),
                    (u'New York',u'nyregion'),
                    (u'Business','business'),
                    (u'Technology',u'technology'),
                    (u'Sports',u'sports'),
                    (u'Science',u'science'),
                    (u'Health',u'health'),
                    (u'Opinion',u'opinion'),
                    (u'Arts',u'arts'),
                    (u'Books',u'books'),
                    (u'Movies',u'movies'),
                    (u'Music',u'arts/music'),
                    (u'Television',u'arts/television'),
                    (u'Style',u'style'),
                    (u'Dining & Wine',u'dining'),
                    (u'Fashion & Style',u'fashion'),
                    (u'Home & Garden',u'garden'),
                    (u'Travel',u'travel'),
                    ('Education',u'education'),
                    ('Multimedia',u'multimedia'),
                    (u'Obituaries',u'obituaries'),
                    (u'Sunday Magazine',u'magazine'),
                    (u'Week in Review',u'weekinreview')]
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
        needs_subscription = False
    elif webEdition:
        title='New York Times (Web)'
        description = 'New York Times on the Web'
        needs_subscription = True
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
        needs_subscription = True
    month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
    def decode_us_date(self,datestr):
        udate = datestr.strip().lower().split()
        try:
            m = self.month_list.index(udate[0])+1
        except:
            return date.today()
        d = int(udate[1])
        y = int(udate[2])
        try:
            d = date(y,m,d)
        except:
            d = date.today
        return d
    earliest_date = date.today() - timedelta(days=oldest_article)
    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
@ -60,7 +124,6 @@ class NYTimes(BasicNewsRecipe):
    timefmt = ''
    needs_subscription = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    cover_margins = (18,18,'grey99')
@ -137,6 +200,12 @@ class NYTimes(BasicNewsRecipe):
                .image {text-align: center;}
                .source {text-align: left; }'''
    articles = {}
    key = None
    ans = []
    url_list = []
    def filter_ans(self, ans) :
        total_article_count = 0
        idx = 0
@ -165,6 +234,29 @@ class NYTimes(BasicNewsRecipe):
        self.log( "Queued %d articles" % total_article_count )
        return ans
    def exclude_url(self,url):
        if not url.startswith("http"):
            return True
        if not url.endswith(".html"):
            return True
        if 'nytimes.com' not in url:
            return True
        if 'podcast' in url:
            return True
        if '/video/' in url:
            return True
        if '/slideshow/' in url:
            return True
        if '/magazine/index' in url:
            return True
        if '/interactive/' in url:
            return True
        if '/reference/' in url:
            return True
        if '/premium/' in url:
            return True
        return False
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
@ -250,7 +342,6 @@ class NYTimes(BasicNewsRecipe):
            return BeautifulSoup(_raw, markupMassage=massage)
        # Entry point
        print "index_to_soup()"
        soup = get_the_soup( self.encoding, url_or_raw )
        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
@ -274,83 +365,110 @@ class NYTimes(BasicNewsRecipe):
        else:
            return description
-    def parse_todays_index(self):
+    def feed_title(self,div):
        return ''.join(div.findAll(text=True, recursive=True)).strip()
-        def feed_title(div):
+    def handle_article(self,div):
-            return ''.join(div.findAll(text=True, recursive=True)).strip()
+        thumbnail = div.find('div','thumbnail')
-
+        if thumbnail:
-        articles = {}
+            thumbnail.extract()
-        key = None
+        a = div.find('a', href=True)
-        ans = []
+        if not a:
-        url_list = []
+            return
-
+        url = re.sub(r'\?.*', '', a['href'])
-        def handle_article(div):
+        if self.exclude_url(url):
-            a = div.find('a', href=True)
+            return
-            if not a:
+        url += '?pagewanted=all'
        if self.filterDuplicates:
            if url in self.url_list:
                return
-            url = re.sub(r'\?.*', '', a['href'])
+        self.url_list.append(url)
-            if not url.startswith("http"):
+        title = self.tag_to_string(a, use_alt=True).strip()
-                return
+        description = ''
-            if not url.endswith(".html"):
+        pubdate = strftime('%a, %d %b')
-                return
+        summary = div.find(True, attrs={'class':'summary'})
-            if 'podcast' in url:
+        if summary:
-                return
+            description = self.tag_to_string(summary, use_alt=False)
-            if '/video/' in url:
+        author = ''
-                return
+        authorAttribution = div.find(True, attrs={'class':'byline'})
-            url += '?pagewanted=all'
+        if authorAttribution:
-            if url in url_list:
+            author = self.tag_to_string(authorAttribution, use_alt=False)
-                return
+        else:
            url_list.append(url)
            title = self.tag_to_string(a, use_alt=True).strip()
            description = ''
            pubdate = strftime('%a, %d %b')
            summary = div.find(True, attrs={'class':'summary'})
            if summary:
                description = self.tag_to_string(summary, use_alt=False)
            author = ''
            authorAttribution = div.find(True, attrs={'class':'byline'})
            if authorAttribution:
                author = self.tag_to_string(authorAttribution, use_alt=False)
-            else:
+        feed = self.key if self.key is not None else 'Uncategorized'
-                authorAttribution = div.find(True, attrs={'class':'byline'})
+        if not self.articles.has_key(feed):
-                if authorAttribution:
+            self.ans.append(feed)
-                    author = self.tag_to_string(authorAttribution, use_alt=False)
+            self.articles[feed] = []
-            feed = key if key is not None else 'Uncategorized'
+        self.articles[feed].append(
-            if not articles.has_key(feed):
+                        dict(title=title, url=url, date=pubdate,
-                ans.append(feed)
+                            description=description, author=author,
-                articles[feed] = []
+                            content=''))
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
                                description=description, author=author,
                                content=''))
    def parse_web_edition(self):
        for (sec_title,index_url) in self.web_sections:
            if self.includeSections != []:
                if sec_title not in self.includeSections:
                    print "SECTION NOT INCLUDED: ",sec_title
                    continue
            if sec_title in self.excludeSections:
                print "SECTION EXCLUDED: ",sec_title
                continue
            print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
            soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
            self.key = sec_title
            # Find each article
            for div in soup.findAll(True,
                attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
                if div['class'] in ['story', 'story headline'] :
                    self.handle_article(div)
                elif div['class'] == 'headlinesOnly multiline flush':
                    for lidiv in div.findAll('li'):
                        self.handle_article(lidiv)
        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
        return self.filter_ans(self.ans)
    def parse_todays_index(self):
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-
+        skipping = False
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
            if div['class'] in ['section-headline','sectionHeader']:
-                key = string.capwords(feed_title(div))
+                self.key = string.capwords(self.feed_title(div))
-                key = key.replace('Op-ed','Op-Ed')
+                self.key = self.key.replace('Op-ed','Op-Ed')
-                key = key.replace('U.s.','U.S.')
+                self.key = self.key.replace('U.s.','U.S.')
                self.key = self.key.replace('N.y.','N.Y.')
                skipping = False
                if self.includeSections != []:
                    if self.key not in self.includeSections:
                        print "SECTION NOT INCLUDED: ",self.key
                        skipping = True
                if self.key in self.excludeSections:
                    print "SECTION EXCLUDED: ",self.key
                    skipping = True
            elif div['class'] in ['story', 'story headline'] :
-                handle_article(div)
+                if not skipping:
                    self.handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
-                    handle_article(lidiv)
+                    if not skipping:
                        self.handle_article(lidiv)
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(ans)
+        return self.filter_ans(self.ans)
    def parse_headline_index(self):
        articles = {}
        ans = []
        url_list = []
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
        # Fetch the content table
@ -364,15 +482,24 @@ class NYTimes(BasicNewsRecipe):
        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
            for div_sec in td_col.findAll('div',recursive=False):
                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
                    section_name = re.sub(r'^ *$','',section_name)
                    if section_name == '':
                        continue
                    if self.includeSections != []:
                        if section_name not in self.includeSections:
                            print "SECTION NOT INCLUDED: ",section_name
                            continue
                    if section_name in self.excludeSections:
                        print "SECTION EXCLUDED: ",section_name
                        continue
                    section_name=string.capwords(section_name)
-                    if section_name == 'U.s.':
+                    section_name = section_name.replace('Op-ed','Op-Ed')
-                       section_name = 'U.S.'
+                    section_name = section_name.replace('U.s.','U.S.')
-                    elif section_name == 'Op-ed':
+                    section_name = section_name.replace('N.y.','N.Y.')
                       section_name = 'Op-Ed'
                    pubdate = strftime('%a, %d %b')
                    search_div = div_sec
@ -393,37 +520,32 @@ class NYTimes(BasicNewsRecipe):
                        if not a:
                            continue
                        url = re.sub(r'\?.*', '', a['href'])
-                        if not url.startswith("http"):
+                        if self.exclude_url(url):
                            continue
                        if not url.endswith(".html"):
                            continue
                        if 'podcast' in url:
                            continue
                        if 'video' in url:
                            continue
                        url += '?pagewanted=all'
-                        if url in url_list:
+                        if self.filterDuplicates:
-                            continue
+                            if url in self.url_list:
-                        url_list.append(url)
+                                continue
-                        self.log("URL %s" % url)
+                        self.url_list.append(url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
-                        if not articles.has_key(section_name):
+                        if not self.articles.has_key(section_name):
-                            ans.append(section_name)
+                            self.ans.append(section_name)
-                            articles[section_name] = []
+                            self.articles[section_name] = []
-                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+                        self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
-
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return self.filter_ans(self.ans)
        return self.filter_ans(ans)
    def parse_index(self):
        if self.headlinesOnly:
            return self.parse_headline_index()
        elif self.webEdition:
            return self.parse_web_edition()
        else:
            return self.parse_todays_index()
@ -439,6 +561,21 @@ class NYTimes(BasicNewsRecipe):
    def preprocess_html(self, soup):
        if self.webEdition & (self.oldest_article>0):
            date_tag = soup.find(True,attrs={'class': ['dateline','date']})
            if date_tag:
                date_str = self.tag_to_string(date_tag,use_alt=False)
                date_str = date_str.replace('Published:','')
                date_items = date_str.split(',')
                try:
                    datestring = date_items[0]+' '+date_items[1]
                    article_date = self.decode_us_date(datestring)
                except:
                    article_date = date.today()
                if article_date < self.earliest_date:
                    self.log("Skipping article dated %s" % date_str)
                    return None
        kicker_tag = soup.find(attrs={'class':'kicker'})
        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
@ -463,7 +600,6 @@ class NYTimes(BasicNewsRecipe):
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
                    # Move firstImg before article body
                    #article_body = soup.find(True, {'id':'articleBody'})
                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
--- a/resources/recipes/observa_digital.recipe
+++ b/resources/recipes/observa_digital.recipe
@ -0,0 +1,63 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 observa.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Noticias(BasicNewsRecipe):
    title                 = 'Observa Digital'
    __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
    description           = 'Noticias desde Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['contenido'])]
    remove_tags = [
             dict(name='div', attrs={'id':'contenedorVinculadas'}),
             dict(name='p', attrs={'id':'nota_firma'}),
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
           (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
           (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
           (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
        ]
    def get_cover_url(self):
 		cover_url = None
 		index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia'
 		soup = self.index_to_soup(index)
 		link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'})
 		if link_item:
 			cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip()
 		print cover_url
 		return cover_url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }  '
+    extra_css             = """ 
                               body{font-family: Arial,Helvetica,sans-serif } 
                               img{margin-bottom: 0.4em; display:block}
                               #autor{font-weight: bold} 
                               #fecha,#epigrafe{font-size: 0.9em; margin: 5px} 
                               #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
                               .fgprincipal{font-size: large; font-weight: bold}
                            """
    conversion_options = {
                          'comment'   : description
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
+    remove_tags = [
                     dict(name=['meta','link'])
                    ,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
                  ]
    remove_attributes=['lang']
    feeds = [
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('span', attrs={'id':'seccion'}):
            it = item.a
            it.name='span'
            del it['href']
            del it['title']
        for item in soup.findAll('p'):
            it = item.find('h3')            
            if it:
               it.name='span'
        return soup
--- a/resources/recipes/pc_lab.recipe
+++ b/resources/recipes/pc_lab.recipe
@ -0,0 +1,70 @@
 #!/usr/bin/env  python
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class PCLab(BasicNewsRecipe):
    cover_url             = 'http://pclab.pl/img/logo.png'
    title                 = u"PC Lab"
    __author__            = 'ravcio - rlelusz[at]gmail.com'
    description           = u"Articles from PC Lab website"
    language              = 'pl'
    oldest_article        = 30.0
    max_articles_per_feed = 100
    recursions            = 0
    encoding              = 'iso-8859-2'
    no_stylesheets        = True
    remove_javascript     = True
    use_embedded_content  = False
    keep_only_tags = [
            dict(name='div', attrs={'class':['substance']})
                     ]
    remove_tags = [
            dict(name='div', attrs={'class':['chapters']})
            ,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
                  ]
    remove_tags_after = [
            dict(name='div', attrs={'class':['navigation']})
                ]
    #links to RSS feeds
    feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
    #load second and subsequent page content
    # in: soup - full page with 'next' button
    # out: appendtag - tag to which new page is to be added
    def append_page(self, soup, appendtag):
        # find the 'Next' button
        pager = soup.find('div', attrs={'class':'next'})
        if pager:
            #search for 'a' element with link to next page (exit if not found)
            a = pager.find('a')
            if a:
                nexturl = a['href']
                soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
                pagetext_substance = soup2.find('div', attrs={'class':'substance'})
                pagetext = pagetext_substance.find('div', attrs={'class':'data'})
                pagetext.extract()
                pos = len(appendtag.contents)
                appendtag.insert(pos, pagetext)
                pos = len(appendtag.contents)
                self.append_page(soup2, appendtag)
    def preprocess_html(self, soup):
        # soup.body contains no title and no navigator, they are in soup
        self.append_page(soup, soup.body)
        # finally remove some tags
        tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
        [tag.extract() for tag in tags]
        return soup
--- a/resources/recipes/politika.recipe
+++ b/resources/recipes/politika.recipe
@ -1,13 +1,10 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 politika.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Politika(BasicNewsRecipe):
    title                 = 'Politika Online'
@ -19,53 +16,51 @@ class Politika(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    remove_javascript     = True
    encoding              = 'utf8'
-    language = 'sr'
+    delay                 = 1
-
+    language              = 'sr'
-    lang                 = 'sr-Latn-RS'
+    publication_type      = 'newspaper'    
-    direction            = 'ltr'
+    masthead_url          = 'http://static.politika.co.rs/images_new/politika.gif'    
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    extra_css             = """ 
                               @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} 
                               @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} 
                               body{font-family: Arial,Helvetica,sans1,sans-serif}
                               h1{font-family: "Times New Roman",Times,serif1,serif}
                               .articledescription{font-family: sans1, sans-serif}
                            """
    conversion_options = {
-                          'comment'          : description
+                          'comment'   : description
-                        , 'tags'             : category
+                        , 'tags'      : category
-                        , 'publisher'        : publisher
+                        , 'publisher' : publisher
-                        , 'language'         : lang
+                        , 'language'  : language
                        , 'pretty_print'     : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    keep_only_tags     = [dict(name='div', attrs={'class':'content_center_border'})]
+    keep_only_tags     = [dict(name='div', attrs={'class':'big_article_home item_details'})]
-
+    remove_tags_after  = dict(attrs={'class':'online_date'})
-    remove_tags = [
+    remove_tags        = [dict(name=['link','meta','iframe','embed','object'])]
-                    dict(name='div', attrs={'class':['send_print','txt-komentar']})
+    
                   ,dict(name=['object','link','a'])
                   ,dict(name='h1', attrs={'class':'box_header-tags'})
                  ]
    feeds          = [
-                         (u'Politika'             , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'             )
+                         (u'Politika'         , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'         )
-                        ,(u'Svet'                 , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'                 )
+                        ,(u'Svet'             , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'             )
-                        ,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
+                        ,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
-                        ,(u'Pogledi'              , u'http://www.politika.rs/pogledi/index.lt.xml'                        )
+                        ,(u'Pogledi'          , u'http://www.politika.rs/pogledi/index.lt.xml'                    )
-                        ,(u'Pogledi sa strane'    , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml'    )
+                        ,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
-                        ,(u'Tema dana'            , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'            )
+                        ,(u'Tema dana'        , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'        )
-                        ,(u'Kultura'              , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'              )
+                        ,(u'Kultura'          , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'          )
-                        ,(u'Zivot i stil'         , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'         )
+                        ,(u'Spektar'          , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'     )
                     ]
    def preprocess_html(self, soup):
        soup.html['lang'] = self.lang
        soup.html['dir' ] = self.direction
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
-        ftag = soup.find('div',attrs={'class':'content_center_border'})
+        for item in soup.findAll('a', attrs={'class':'category'}):
-        if ftag.has_key('align'):
+            item.name='span'
-           del ftag['align']
+            if item.has_key('href'):            
-        return self.adeify_images(soup)
+               del item['href']
            if item.has_key('title'):            
               del item['title']
        return soup
--- a/resources/recipes/polityka.recipe
+++ b/resources/recipes/polityka.recipe
@ -0,0 +1,68 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 class Polityka(BasicNewsRecipe):
 	title = u'Polityka'
 	__author__ = 'Mateusz Kielar'
 	description = 'Weekly magazine. Last archive issue'
 	encoding = 'utf-8'
 	no_stylesheets = True
 	language = 'en'
 	remove_javascript = True
 	remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
 	remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
 	remove_tags =[]
 	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
 	remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
 	extra_css = '''
 					h1 {font-size: x-large; font-weight: bold}
 					'''
 	def parse_index(self):
 		soup = self.index_to_soup('http://archiwum.polityka.pl/')
 		box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
 		feeds = []
 		last = 0
 		self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
 		last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
 		while True:
 			index = self.index_to_soup(last_edition)
 			box_list = index.findAll('div', attrs={'class' : 'box_list'})
 			if len(box_list) == 0:
 				break
 			articles = {}
 			for box in box_list:
 				for div in box.findAll('div', attrs={'class': 'list_tresc'}):
 					article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
 					section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
 					print section
 					if not articles.has_key(section):
 						articles[section] = []
 					articles[section].append( {
 						'title' : self.tag_to_string(div.a),
 						'url'   : 'http://archiwum.polityka.pl' + div.a['href'],
 						'date'  : '',
 						'description' : ''
 						})
 			for section in articles:
 				feeds.append((section, articles[section]))
 			last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
 			last = last + 1
 		return feeds
--- a/resources/recipes/rds.recipe
+++ b/resources/recipes/rds.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1290013720(BasicNewsRecipe):
    title          = u'RDS'
    __author__ = 'Nexus'
    language = 'en_CA'
    description = 'Hockey News'
    oldest_article = 7
    max_articles_per_feed = 25
    no_stylesheets = True
    remove_tags = [dict(name='div', attrs={'id':'rdsWrap'}),
 		            dict(name='table', attrs={'id':'aVoir'}),
 		            dict(name='div', attrs={'id':'imageChronique'})]
    keep_only_tags = [dict(name='div', attrs={'id':['enteteChronique']}),
 		                dict(name='div', attrs={'id':['contenuChronique']})]
    feeds          = [(u'RDS', u'http://www.rds.ca/hockey/fildepresse_rds.xml')]
--- a/resources/recipes/reuters_ja.recipe
+++ b/resources/recipes/reuters_ja.recipe
@ -0,0 +1,37 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class ReutersJa(BasicNewsRecipe):
    title = 'Reuters(Japan)'
    description = 'Global news in Japanese'
    __author__ = 'Hiroshi Miura'
    use_embedded_content   = False
    language = 'ja'
    max_articles_per_feed = 10
    remove_javascript = True
    feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/JPTopNews?format=xml'),
                  ('World News', 'http://feeds.reuters.com/reuters/JPWorldNews?format=xml'),
                  ('Business News', 'http://feeds.reuters.com/reuters/JPBusinessNews?format=xml'),
                  ('Technology News', 'http://feeds.reuters.com/reuters/JPTechnologyNews?format=xml'),
                  ('Oddly Enough News', 'http://feeds.reuters.com/reuters/JPOddlyEnoughNews?format=xml')
         ]
    remove_tags_before = {'class':"article primaryContent"}
    remove_tags = [ dict(id="banner"),
                    dict(id="autilities"),
                    dict(id="textSizer"),
                    dict(id="shareFooter"),
                    dict(id="relatedNews"),
                    dict(id="editorsChoice"),
                    dict(id="ecArticles"),
                    {'class':"secondaryContent"},
                    {'class':"module"},
                     ]
    remove_tags_after = {'class':"assetBuddy"}
    def print_version(self, url):
        m = re.search('(.*idJPJAPAN-[0-9]+)', url)
        return m.group(0)+'?sp=true'
--- a/resources/recipes/revista_bla.recipe
+++ b/resources/recipes/revista_bla.recipe
@ -0,0 +1,54 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 http://www.revistabla.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Noticias(BasicNewsRecipe):
    title                 = 'Revista Bla'
    __author__            = 'Gustavo Azambuja'
    description           = 'Moda | Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 20
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['body_container'])]
    remove_tags = [
 				 dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
 				 dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
 				 dict(name='p', attrs={'class':'FacebookLikeButton'}),
 				 dict(name=['object','link']) ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Articulos', u'http://www.revistabla.com/feed/')
        ]
    def get_cover_url(self):
 	cover_url = None
 	index = 'http://www.revistabla.com'
 	soup = self.index_to_soup(index)
 	link_item = soup.find('div',attrs={'class':'header_right'})
 	if link_item:
 		cover_url = link_item.img['src']
 	return cover_url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/revista_muy.recipe
+++ b/resources/recipes/revista_muy.recipe
@ -108,3 +108,10 @@ class RevistaMuyInteresante(BasicNewsRecipe):
                   feeds.append((title, articles))
            return feeds
    def get_cover_url(self):
        index = 'http://www.muyinteresante.es/revista'
        soup = self.index_to_soup(index)
        link_item = soup.find('img',attrs={'class':'img_portada'})
        if link_item:
            cover_url = "http://www.muyinteresante.es"+link_item['src']
        return cover_url
--- a/resources/recipes/rollingstone.recipe
+++ b/resources/recipes/rollingstone.recipe
@ -0,0 +1,69 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 rollingstone.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class RollingStone(BasicNewsRecipe):
    title                 = 'Rolling Stone Magazine - free content'
    __author__            = 'Darko Miletic'
    description           = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.'
    publisher             = 'Werner Media inc.'
    category              = 'news, music, USA, world'
    oldest_article        = 15
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    publication_type      = 'magazine'
    masthead_url          = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
    extra_css             = """
                               body{font-family: Georgia,Times,serif }
                               img{margin-bottom: 0.4em; display:block}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    preprocess_regexps = [
                          (re.compile(r'xml:lang="en">.*?<head>', re.DOTALL|re.IGNORECASE),lambda match: 'xml:lang="en">\n<head>\n')
                         ,(re.compile(r'</title>.*?</head>'     , re.DOTALL|re.IGNORECASE),lambda match: '</title>\n</head>\n'     )
                         ]
    keep_only_tags=[
                     dict(attrs={'class':['headerImgHolder','headerContent']})
                    ,dict(name='div',attrs={'id':['teaser','storyTextContainer']})
                    ,dict(name='div',attrs={'class':'blogDetailModule clearfix'})
                   ]
    remove_tags = [
                      dict(name=['meta','iframe','object','embed'])
                     ,dict(attrs={'id':'mpStoryHeader'})
                     ,dict(attrs={'class':'relatedTopics'})
                  ]
    remove_attributes=['lang','onclick','width','height','name']
    remove_tags_before=dict(attrs={'class':'bloggerInfo'})
    remove_tags_after=dict(attrs={'class':'relatedTopics'})
    feeds = [
              (u'All News'      , u'http://www.rollingstone.com/siteServices/rss/allNews'      )
             ,(u'All Blogs'     , u'http://www.rollingstone.com/siteServices/rss/allBlogs'     )
             ,(u'Movie Reviews' , u'http://www.rollingstone.com/siteServices/rss/movieReviews' )
             ,(u'Album Reviews' , u'http://www.rollingstone.com/siteServices/rss/albumReviews' )
             ,(u'Song Reviews'  , u'http://www.rollingstone.com/siteServices/rss/songReviews'  )
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/spiegelde.recipe
+++ b/resources/recipes/spiegelde.recipe
@ -6,6 +6,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 spiegel.de
 '''
 from time import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Spiegel_ger(BasicNewsRecipe):
@ -44,3 +45,6 @@ class Spiegel_ger(BasicNewsRecipe):
        rmain, rsep, rrest = main.rpartition(',')
        purl = rmain + ',druck-' + rrest + ',' + rest
        return purl
    def get_cover_url(self):
        return 'http://wissen.spiegel.de/wissen/titel/SP/' + strftime("%Y/%W/%j/titel.jpg")
--- a/resources/recipes/tagesan.recipe
+++ b/resources/recipes/tagesan.recipe
@ -7,7 +7,7 @@ class AdvancedUserRecipe1284927619(BasicNewsRecipe):
    __author__ = 'noxxx'
    max_articles_per_feed = 100
    description = 'tagesanzeiger.ch: Nichts verpassen'
-    category = 'News, Politik, Nachrichten, Schweiz, Zürich'
+    category = 'News, Politik, Nachrichten, Schweiz, Zuerich'
    language = 'de'
    conversion_options = {
--- a/resources/recipes/telepolis.recipe
+++ b/resources/recipes/telepolis.recipe
@ -3,12 +3,12 @@
 __license__   = 'GPL v3'
 __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
-''' http://www.derstandard.at - Austrian Newspaper '''
+
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TelepolisNews(BasicNewsRecipe):
-    title          = u'Telepolis (News)'
+    title          = u'Telepolis (News+Artikel)'
    __author__ = 'Gerhard Aigner'
    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
    description = 'News from telepolis'
@ -20,16 +20,16 @@ class TelepolisNews(BasicNewsRecipe):
    encoding = "utf-8"
    language = 'de_AT'
-    use_embedded_content = False
+    use_embedded_content =False
    remove_empty_feeds = True
    preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
-    keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
+    keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
-    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
+    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]
-    feeds          = [(u'News', u'http://www.heise.de/tp/news.rdf')]
+    feeds          = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
    html2lrf_options = [
        '--comment'  , description
@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe):
    def get_article_url(self, article):
        '''if the linked article is of kind artikel don't take it'''
-        if (article.link.count('artikel') > 0) :
+        if (article.link.count('artikel') > 1) :
            return None
        return article.link
@ -49,3 +49,5 @@ class TelepolisNews(BasicNewsRecipe):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
--- a/Show More
+++ b/Show More