Merge from trunk

2025-08-11 09:13:57 -04:00 · 2010-11-26 16:46:56 +00:00 · 2010-11-26 16:46:56 +00:00 · fde541d817
commit fde541d817
parent 0ed382a28d c996a001a7
269 changed files with 40785 additions and 22072 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,176 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.7.29
+  date: 2010-11-19
+
+  new features:
+    - title: "OSX binary build is now based on Qt 4.7. Also, the build is now Intel only and requires at least OS X 10.5.2. If you are on a PowerPC machine or an older OS X version, do not upgrade"
+
+    - title: "Content server: Allow direct navigation to a set of books in the book list."
+      tickets: [7453]
+
+    - title: "OS X: When deleting books, put the files into the recycle bin instead of deleting them permanently"
+
+    - title: "Add button to easy configure Hotmail as email relay. Also improve usability of easy config buttons"
+
+    - title: "Kobo driver: Support Currently_Reading category"
+
+    - title: "Catalog generation: Thumbnail caching, wishlist, improved description layout."
+      tickets: [7376]
+
+    - title: "Support for the Cybook Orizon"
+
+  bug fixes:
+    - title: "Fix restore to defaults in preferences incorrectly setting PDF unwrap factor to 0.0"
+
+    - title: "PDF Input: Fix unwrapping of accented characters"
+
+    - title: "Do not display dialogs asking for confirmation or showing conversion errors when calibre is minimized to system tray"
+      tickets: [7549]
+
+    - title: "calibre server: Fix regression that broke digest authentication when the calibre interface language was set to non English"
+
+    - title: "EPUB Output: Do not raise an error for invalid embedded fonts in the input document."
+      tickets: [7567]
+
+    - title: "RTF Input: Improved conversion of tables, with support for border styles on table cells"
+
+    - title: "E-book viewer: Fix regression that broke hyphenation. Also add more language patterns for hyphenation"
+
+    - title: "SONY driver: Fix cover thumbnails being uploaded to wrong directory on windows"
+
+    - title: "Fix UnicodeDecodeError when displaying a failed metadata fetch message"
+      tickets: [7560]
+
+    - title: "Bulk metadata edit: Speed up remove all tags operation"
+
+    - title: "MOBI Output: Specify image sizes in pixels instead of em to accomodate Amazon's @#$%#@! MOBI renderer"
+
+    - title: "Fix bug preventing customizing of builtin recipes if they are not ascii encoded"
+
+    - title: "SONY XML cache: Handle case where XML db contains reference to a file that does not exist gracefully"
+
+  improved recipes:
+    - Al Jazeera 
+    - The Moscow Times
+    - GLobe and Mail
+    - Washington Post
+
+  new recipes:
+    - title: "Hannoversche Allgemeine Zeitung"
+      author: "Artemis"
+      
+    - title: "globes.co.il"
+      author: "marbs"
+
+    - title: "THN and RDS"
+      author: "Nexus"
+
+    - title: "pclab.pl"
+      author: "ravcio"
+
+    - title: "Now Toronto"
+      author: "Starson17"
+
+    - title: "Press releases of the German government and EU Commission"
+      author: "malfi"
+
+
+- version: 0.7.28
+  date: 2010-11-12
+
+  new features:
+    - title: "Update the version of the grahical toolkit (Qt 4.7.1) used in the calibre binary builds on windows and linux. This should result in a significant speed up for the calibre ebook viewer"
+
+    - title: "Driver for Nook Color, Eken M001"
+
+    - title: "Add a tweak to turn off double clicking to open viewer"
+
+    - title: "Catalog generation: Add indication when a book has no formats"
+      tickets: [7376]
+
+    - title: "Advanced search dialog: Add a tab to allow searching particular metadata fields easily"
+
+    - title: "Conversion pipeline: When using the Level x Table of Contents expressions, if a tag is empty but has a non-empty title attribute, use that instead of ignoring the tag"
+
+  bug fixes:
+    - title: "Comic metadata reader: Sort filenames aplhabetically when choosing an image for the cover"
+      tickets: [7488]
+
+    - title: "Bulk convert dialog: Hide useless restore defaults button."
+      tickets: [7471]
+
+    - title: "Conversion pipeline: Handle input documents that encode null bytes as HTML entities correctly"
+      tickets: [7355]
+
+    - title: "Fix some SONY readers not being detected on windows"
+      tickets: [7413]
+
+    - title: "MOBI Input: Fix images missing when converting MOBI news downloads created with Mobipocket reader"
+      tickets: [7455]
+
+    - title: "ODT Input: Handle hyperlinks to headings that have truncated destination specifiers correctly"
+      tickets: [7506]
+
+    - title: "Sony driver: Ignore invalid strings when updating XML database"
+
+    - title: "Content Server: Add day to displayed date in /mobile book listing"
+
+    - title: "MOBI Input: Do not generate filenames with only extensions if the MOBI file has no internal name"
+      tickets: [7481]
+
+    - title: "MOBI Input: Handle files that has the record sizes set incorrectly to a long integer"
+      tickets: [7472]
+
+    - title: "Fix not enough vertical space for text in the preferences dialog category listing"
+
+    - title: "Remove 'sort' from Search and replace destination fields and add it to source fields. S&R is no longer marked experimental"
+
+    - title: "Edit metadata dialog: Save dialog geometry on reject as well as on accept"
+
+    - title: "E-book viewer: Fix clicking entries in TOC that point to the currently loaded flow not scrolling view to the top of the document"
+
+    - title: "Fix bug in regex used to extract charset from <meta> tags"
+
+    - title: "MOBI Output: Add support for the <q> tag"
+
+  improved recipes:
+    - Zeit Online
+    - Gamespot Review
+    - Ploitika
+    - Pagina12
+    - Irish Times
+    - elektrolese
+
+  new recipes:
+    - title: "Handelsblatt and European Voice"
+      author: "malfi"
+      
+    - title: "Polityka and Newsweek"
+      author: "Mateusz Kielar"
+
+    - title: "MarcTV"
+      author: "Marc Toensings"
+
+    - title: "Rolling Stone"
+      author: "Darko Miletic"
+
+    - title: "Vedomosti"
+      author: "Nikolai Kotchetkov"
+
+    - title: "Hola.com"
+      author: "bmsleight"
+
+    - title: "Dnevnik, Siol.net, MMC-RTV and Avto-magazon"
+      author: "BlonG"
+
+    - title: "SC Print Magazine"
+      author: "Tony Maro"
+
+    - title: "Diario Sport"
+      author: "Jefferson Frantz"
+
 - version: 0.7.27
  date: 2010-11-05

@ -44,6 +214,7 @@
      tickets: [7356]

    - title: "News download: Workaround lack of thread safety in python mechanize, causing corrupted network packets (degrading network performance) on Ubuntu Maverick 64bit kernels"
+      tickets: [7321]

    - title: "Convert comments to HTML for book details panel in separate thread to make scrolling through the book list faster when large comments are present"

--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -12,8 +12,8 @@ p.title  {
 p.author {
 	margin-top:0em;
 	margin-bottom:0em;
-	text-align: left;
-	text-indent: 1em;
+	text-align: center;
+	text-indent: 0em;
 	font-size:large;
  	}

@ -27,17 +27,28 @@ p.author_index {
 	}

 p.tags {
-	margin-top:0em;
+	margin-top:0.5em;
 	margin-bottom:0em;
 	text-align: left;
-	text-indent: 1em;
-	font-size:small;
+	text-indent: 0.0in;
 	}

-p.description {
-	text-align:left;
-	font-style:normal;
-	margin-top: 0em;
+p.formats {
+	font-size:90%;
+	margin-top:0em;
+	margin-bottom:0.5em;
+	text-align: left;
+	text-indent: 0.0in;
+	}
+
+div.description > p:first-child {
+	margin: 0 0 0 0;
+	text-indent: 0em;
+	}
+
+div.description {
+	margin: 0 0 0 0;
+	text-indent: 1em;
 	}

 p.date_index {
@ -81,6 +92,14 @@ p.unread_book {
 	text-indent:-2em;
 	}

+p.wishlist_item {
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:2em;
+	text-indent:-2em;
+	}
+
 p.date_read {
 	text-align:left;
 	margin-top:0px;
@ -104,3 +123,14 @@ hr.annotations_divider {
 	margin-top:0em;
 	margin-bottom:0em;
 	}
+
+td.publisher, td.date {
+	font-weight:bold;
+	text-align:center;
+	}
+td.rating {
+	text-align: center;
+	}
+td.thumbnail img {
+	-webkit-box-shadow: 4px 4px 12px #999;
+	}
--- a/resources/content_server/browse/browse.css
+++ b/resources/content_server/browse/browse.css
@ -355,6 +355,25 @@ h2.library_name {
    color: red;
 }

+#booklist > #pagelist { display: none; }
+
+#goto_page_dialog ul {
+    list-style-type: none;
+    font-size: medium;
+}
+
+#goto_page_dialog li {
+    margin-bottom: 1.5ex;
+}
+
+#goto_page_dialog a {
+    text-decoration: none;
+    color: blue;
+}
+
+#goto_page_dialog a:hover {
+    color: red;
+}

 #booklist .left .ui-button-text {
    font-size: medium;
--- a/resources/content_server/browse/browse.html
+++ b/resources/content_server/browse/browse.html
@ -96,5 +96,6 @@
        </div>
    </div>
    <div id="book_details_dialog"></div>
+    <div id="goto_page_dialog"></div>
 </body>
 </html>
--- a/resources/content_server/browse/browse.js
+++ b/resources/content_server/browse/browse.js
@ -202,6 +202,23 @@ function previous_page() {
    else last_page();
 }

+function gp_internal(id) {
+    var gp = $('#goto_page_dialog');
+    gp.dialog('close');
+    var elem = $("#booklist #" + id);
+    load_page(elem);
+}
+
+function goto_page() {
+    var gp = $('#goto_page_dialog');
+    var pl = $('#booklist > #pagelist');
+    gp.html(pl.html());
+    gp.dialog('option', 'title', pl.attr('title'));
+    gp.dialog('option', 'height', $(window).height() - 100);
+    gp.dialog('open');
+
+}
+
 function load_page(elem) {
    if (elem.is(":visible")) return;
    var ld = elem.find('.load_data');
@ -251,6 +268,12 @@ function booklist(hide_sort) {
        modal: true,
        show: 'slide'
    });
+    $("#goto_page_dialog").dialog({
+        autoOpen: false,
+        modal: true,
+        show: 'slide'
+    });
+
    first_page(); 
 }

--- a/resources/content_server/read/monocle.js
+++ b/resources/content_server/read/monocle.js
@ -38,6 +38,7 @@ Monocle.Browser.on = {
  iPad: navigator.userAgent.indexOf("iPad") != -1,
  BlackBerry: navigator.userAgent.indexOf("BlackBerry") != -1,
  Android: navigator.userAgent.indexOf('Android') != -1,
+  MacOSX: navigator.userAgent.indexOf('Mac OS X') != -1,
  Kindle3: navigator.userAgent.match(/Kindle\/3/)
 }

@ -162,12 +163,23 @@ Monocle.Browser.has.transform3d = Monocle.Browser.CSSProps.isSupported([
  'OPerspective',
  'msPerspective'
 ]) && Monocle.Browser.CSSProps.supportsMediaQueryProperty('transform-3d');
+Monocle.Browser.has.embedded = (top != self);
+
 Monocle.Browser.has.iframeTouchBug = Monocle.Browser.iOSVersionBelow("4.2");
+
 Monocle.Browser.has.selectThruBug = Monocle.Browser.iOSVersionBelow("4.2");
+
 Monocle.Browser.has.mustScrollSheaf = Monocle.Browser.is.MobileSafari;
 Monocle.Browser.has.iframeDoubleWidthBug = Monocle.Browser.has.mustScrollSheaf;
+
 Monocle.Browser.has.floatColumnBug = Monocle.Browser.is.WebKit;

+Monocle.Browser.has.relativeIframeWidthBug = Monocle.Browser.on.Android;
+
+
+Monocle.Browser.has.jumpFlickerBug =
+  Monocle.Browser.on.MacOSX && Monocle.Browser.is.WebKit;
+

 if (typeof window.console == "undefined") {
  window.console = {
@ -1091,11 +1103,29 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
      cmpt.dom.setStyles(Monocle.Styles.component);
      Monocle.Styles.applyRules(cmpt.contentDocument.body, Monocle.Styles.body);
    }
+    lockFrameWidths();
    dom.find('overlay').dom.setStyles(Monocle.Styles.overlay);
    dispatchEvent('monocle:styles');
  }


+  function lockingFrameWidths() {
+    if (!Monocle.Browser.has.relativeIframeWidthBug) { return; }
+    for (var i = 0, cmpt; cmpt = dom.find('component', i); ++i) {
+      cmpt.style.display = "none";
+    }
+  }
+
+
+  function lockFrameWidths() {
+    if (!Monocle.Browser.has.relativeIframeWidthBug) { return; }
+    for (var i = 0, cmpt; cmpt = dom.find('component', i); ++i) {
+      cmpt.style.width = cmpt.parentNode.offsetWidth+"px";
+      cmpt.style.display = "block";
+    }
+  }
+
+
  function setBook(bk, place, callback) {
    p.book = bk;
    var pageCount = 0;
@ -1121,12 +1151,14 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
    if (!p.initialized) {
      console.warn('Attempt to resize book before initialization.');
    }
+    lockingFrameWidths();
    if (!dispatchEvent("monocle:resizing", {}, true)) {
      return;
    }
    clearTimeout(p.resizeTimer);
    p.resizeTimer = setTimeout(
      function () {
+        lockFrameWidths();
        p.flipper.moveTo({ page: pageNumber() });
        dispatchEvent("monocle:resize");
      },
@ -1765,12 +1797,7 @@ Monocle.Book = function (dataSource) {


  function componentIdMatching(str) {
-    for (var i = 0; i < p.componentIds.length; ++i) {
-      if (str.indexOf(p.componentIds[i]) > -1) {
-        return p.componentIds[i];
-      }
-    }
-    return null;
+    return p.componentIds.indexOf(str) >= 0 ? str : null;
  }


@ -2018,6 +2045,12 @@ Monocle.Component = function (book, id, index, chapters, source) {


  function loadFrameFromURL(url, frame, callback) {
+    if (!url.match(/^\//)) {
+      var link = document.createElement('a');
+      link.setAttribute('href', url);
+      url = link.href;
+      delete(link);
+    }
    frame.onload = function () {
      frame.onload = null;
      Monocle.defer(callback);
@ -2460,7 +2493,7 @@ Monocle.Flippers.Legacy = function (reader) {
  function moveTo(locus, callback) {
    var fn = frameToLocus;
    if (typeof callback == "function") {
-      fn = function () { frameToLocus(); callback(); }
+      fn = function (locus) { frameToLocus(locus); callback(locus); }
    }
    p.reader.getBook().setOrLoadPageAt(page(), locus, fn);
  }
@ -2794,7 +2827,9 @@ Monocle.Dimensions.Columns = function (pageDiv) {
  function scrollerWidth() {
    var bdy = p.page.m.activeFrame.contentDocument.body;
    if (Monocle.Browser.has.iframeDoubleWidthBug) {
-      if (Monocle.Browser.iOSVersion < "4.1") {
+      if (Monocle.Browser.on.Android) {
+        return bdy.scrollWidth * 1.5; // I actually have no idea why 1.5.
+      } else if (Monocle.Browser.iOSVersion < "4.1") {
        var hbw = bdy.scrollWidth / 2;
        var sew = scrollerElement().scrollWidth;
        return Math.max(sew, hbw);
@ -2969,6 +3004,7 @@ Monocle.Flippers.Slider = function (reader) {


  function setPage(pageDiv, locus, callback) {
+    ensureWaitControl();
    p.reader.getBook().setOrLoadPageAt(
      pageDiv,
      locus,
@ -3048,6 +3084,7 @@ Monocle.Flippers.Slider = function (reader) {
    checkPoint(boxPointX);

    p.turnData.releasing = true;
+    showWaitControl(lowerPage());

    if (dir == k.FORWARDS) {
      if (
@ -3088,14 +3125,18 @@ Monocle.Flippers.Slider = function (reader) {


  function onGoingBackward(x) {
-    var lp = lowerPage();
+    var lp = lowerPage(), up = upperPage();
+    showWaitControl(up);
    jumpOut(lp, // move lower page off-screen
      function () {
        flipPages(); // flip lower to upper
        setPage( // set upper page to previous
          lp,
          getPlace(lowerPage()).getLocus({ direction: k.BACKWARDS }),
-          function () { lifted(x); }
+          function () {
+            lifted(x);
+            hideWaitControl(up);
+          }
        );
      }
    );
@ -3103,8 +3144,10 @@ Monocle.Flippers.Slider = function (reader) {


  function afterGoingForward() {
-    var up = upperPage();
+    var up = upperPage(), lp = lowerPage();
    if (p.interactive) {
+      showWaitControl(up);
+      showWaitControl(lp);
      setPage( // set upper (off screen) to current
        up,
        getPlace().getLocus({ direction: k.FORWARDS }),
@ -3113,6 +3156,7 @@ Monocle.Flippers.Slider = function (reader) {
        }
      );
    } else {
+      showWaitControl(lp);
      flipPages();
      jumpIn(up, function () { prepareNextPage(announceTurn); });
    }
@ -3171,6 +3215,8 @@ Monocle.Flippers.Slider = function (reader) {


  function announceTurn() {
+    hideWaitControl(upperPage());
+    hideWaitControl(lowerPage());
    p.reader.dispatchEvent('monocle:turn');
    resetTurnData();
  }
@ -3319,12 +3365,14 @@ Monocle.Flippers.Slider = function (reader) {


  function jumpIn(pageDiv, callback) {
-    setX(pageDiv, 0, { duration: 1 }, callback);
+    var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
+    setX(pageDiv, 0, { duration: dur }, callback);
  }


  function jumpOut(pageDiv, callback) {
-    setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: 1 }, callback);
+    var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
+    setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: dur }, callback);
  }


@ -3357,6 +3405,28 @@ Monocle.Flippers.Slider = function (reader) {
  }


+  function ensureWaitControl() {
+    if (p.waitControl) { return; }
+    p.waitControl = {
+      createControlElements: function (holder) {
+        return holder.dom.make('div', 'flippers_slider_wait');
+      }
+    }
+    p.reader.addControl(p.waitControl, 'page');
+  }
+
+
+  function showWaitControl(page) {
+    var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
+    ctrl.style.opacity = 0.5;
+  }
+
+
+  function hideWaitControl(page) {
+    var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
+    ctrl.style.opacity = 0;
+  }
+
  API.pageCount = p.pageCount;
  API.addPage = addPage;
  API.getPlace = getPlace;
--- a/resources/images/format-text-bold.png
+++ b/resources/images/format-text-bold.png
--- a/resources/images/format-text-italic.png
+++ b/resources/images/format-text-italic.png
--- a/resources/images/format-text-strikethrough.png
+++ b/resources/images/format-text-strikethrough.png
--- a/resources/images/format-text-underline.png
+++ b/resources/images/format-text-underline.png
--- a/resources/images/hotmail.png
+++ b/resources/images/hotmail.png
--- a/resources/images/news/cnetjapan.png
+++ b/resources/images/news/cnetjapan.png
--- a/resources/images/news/deutsche_welle_bs.png
+++ b/resources/images/news/deutsche_welle_bs.png
--- a/resources/images/news/deutsche_welle_en.png
+++ b/resources/images/news/deutsche_welle_en.png
--- a/resources/images/news/deutsche_welle_es.png
+++ b/resources/images/news/deutsche_welle_es.png
--- a/resources/images/news/deutsche_welle_hr.png
+++ b/resources/images/news/deutsche_welle_hr.png
--- a/resources/images/news/deutsche_welle_pt.png
+++ b/resources/images/news/deutsche_welle_pt.png
--- a/resources/images/news/deutsche_welle_sr.png
+++ b/resources/images/news/deutsche_welle_sr.png
--- a/resources/images/news/endgadget_ja.png
+++ b/resources/images/news/endgadget_ja.png
--- a/resources/images/news/jijinews.png
+++ b/resources/images/news/jijinews.png
--- a/resources/images/news/moscow_times.png
+++ b/resources/images/news/moscow_times.png
--- a/resources/images/news/msnsankei.png
+++ b/resources/images/news/msnsankei.png
--- a/resources/images/news/nikkei_free.png
+++ b/resources/images/news/nikkei_free.png
--- a/resources/images/news/nikkei_sub_economy.png
+++ b/resources/images/news/nikkei_sub_economy.png
--- a/resources/images/news/nikkei_sub_industory.png
+++ b/resources/images/news/nikkei_sub_industory.png
--- a/resources/images/news/nikkei_sub_life.png
+++ b/resources/images/news/nikkei_sub_life.png
--- a/resources/images/news/nikkei_sub_main.png
+++ b/resources/images/news/nikkei_sub_main.png
--- a/resources/images/news/nikkei_sub_sports.png
+++ b/resources/images/news/nikkei_sub_sports.png
--- a/resources/images/news/reuters.png
+++ b/resources/images/news/reuters.png
--- a/resources/images/news/reuters_ja.png
+++ b/resources/images/news/reuters_ja.png
--- a/resources/images/news/rollingstone.png
+++ b/resources/images/news/rollingstone.png
--- a/resources/images/news/the_workingham_times.png
+++ b/resources/images/news/the_workingham_times.png
--- a/resources/recipes/180.recipe
+++ b/resources/recipes/180.recipe
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+180.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Noticias(BasicNewsRecipe):
+    title                 = '180.com.uy'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf-8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})]
+    remove_tags = [
+             dict(name=['object','link'])
+                  ]
+
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Titulares', u'http://www.180.com.uy/feed.php')
+        ]
+
+    def get_cover_url(self):
+		return 'http://www.180.com.uy/tplef/img/logo.gif'
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/al_jazeera.recipe
+++ b/resources/recipes/al_jazeera.recipe
@ -1,10 +1,8 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'

 '''
-aljazeera.net
+english.aljazeera.net
 '''
 from calibre.web.feeds.news import BasicNewsRecipe

@ -12,41 +10,59 @@ class AlJazeera(BasicNewsRecipe):
    title                  = 'Al Jazeera in English'
    __author__             = 'Darko Miletic'
    description            = 'News from Middle East'
-    language = 'en'
-
+    language               = 'en'
    publisher              = 'Al Jazeera'
    category               = 'news, politics, middle east'
-    simultaneous_downloads = 1
-    delay                  = 4
-    oldest_article         = 1
+    delay                  = 1
+    oldest_article         = 2
    max_articles_per_feed  = 100
    no_stylesheets         = True
    encoding               = 'iso-8859-1'
-    remove_javascript      = True
    use_embedded_content   = False
+    extra_css              = """
+                                body{font-family: Arial,sans-serif}
+                                #ctl00_cphBody_dvSummary{font-weight: bold}
+                                #dvArticleDate{font-size: small; color: #999999}
+                             """
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }

-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
-
-    keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
+    keep_only_tags = [
+                         dict(attrs={'id':['DetailedTitle','ctl00_cphBody_dvSummary','dvArticleDate']})
+                        ,dict(name='td',attrs={'class':'DetailedSummary'})
+                     ]

    remove_tags = [
-                     dict(name=['object','link'])
+                     dict(name=['object','link','table','meta','base','iframe','embed'])
                    ,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
                  ]

    feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]

+    def get_article_url(self, article):
+        artlurl =  article.get('link',  None)
+        return artlurl.replace('http://english.aljazeera.net//','http://english.aljazeera.net/')
+
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll(face=True):
            del item['face']
+        td = soup.find('td',attrs={'class':'DetailedSummary'})
+        if td:
+           td.name = 'div'
+        spn = soup.find('span',attrs={'id':'DetailedTitle'})
+        if spn:
+           spn.name='h1'
+        for itm in soup.findAll('span', attrs={'id':['dvArticleDate','ctl00_cphBody_lblDate']}):
+            itm.name = 'div'
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
        return soup

--- a/resources/recipes/arcamax.recipe
+++ b/resources/recipes/arcamax.recipe
@ -0,0 +1,110 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = 'Copyright 2010 Starson17'
+'''
+www.arcamax.com
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Arcamax(BasicNewsRecipe):
+    title               = 'Arcamax'
+    __author__          = 'Starson17'
+    __version__         = '1.03'
+    __date__            = '25 November 2010'
+    description         = u'Family Friendly Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
+    category            = 'news, comics'
+    language            = 'en'
+    use_embedded_content= False
+    no_stylesheets      = True
+    remove_javascript   = True
+    cover_url           = 'http://www.arcamax.com/images/pub/amuse/leftcol/zits.jpg'
+
+    ####### USER PREFERENCES - SET COMICS AND NUMBER OF COMICS TO RETRIEVE ########
+    num_comics_to_get = 7
+    # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
+
+    conversion_options = {'linearize_tables'  : True
+                        , 'comment'           : description
+                        , 'tags'              : category
+                        , 'language'          : language
+                        }
+
+    keep_only_tags     = [dict(name='div', attrs={'class':['toon']}),
+                          ]
+
+    def parse_index(self):
+        feeds = []
+        for title, url in [
+                            ######## COMICS - GENERAL ########
+                            #(u"9 Chickweed Lane", u"http://www.arcamax.com/ninechickweedlane"),
+                            #(u"Agnes", u"http://www.arcamax.com/agnes"),
+                            #(u"Andy Capp", u"http://www.arcamax.com/andycapp"),
+                            (u"BC", u"http://www.arcamax.com/bc"),
+                            #(u"Baby Blues", u"http://www.arcamax.com/babyblues"),
+                            #(u"Beetle Bailey", u"http://www.arcamax.com/beetlebailey"),
+                            (u"Blondie", u"http://www.arcamax.com/blondie"),
+                            #u"Boondocks", u"http://www.arcamax.com/boondocks"),
+                            #(u"Cathy", u"http://www.arcamax.com/cathy"),
+                            #(u"Daddys Home", u"http://www.arcamax.com/daddyshome"),
+                            (u"Dilbert", u"http://www.arcamax.com/dilbert"),
+                            #(u"Dinette Set", u"http://www.arcamax.com/thedinetteset"),
+                            (u"Dog Eat Doug", u"http://www.arcamax.com/dogeatdoug"),
+                            (u"Doonesbury", u"http://www.arcamax.com/doonesbury"),
+                            #(u"Dustin", u"http://www.arcamax.com/dustin"),
+                            (u"Family Circus", u"http://www.arcamax.com/familycircus"),
+                            (u"Garfield", u"http://www.arcamax.com/garfield"),
+                            #(u"Get Fuzzy", u"http://www.arcamax.com/getfuzzy"),
+                            #(u"Girls and Sports", u"http://www.arcamax.com/girlsandsports"),
+                            #(u"Hagar the Horrible", u"http://www.arcamax.com/hagarthehorrible"),
+                            #(u"Heathcliff", u"http://www.arcamax.com/heathcliff"),
+                            #(u"Jerry King Cartoons", u"http://www.arcamax.com/humorcartoon"),
+                            #(u"Luann", u"http://www.arcamax.com/luann"),
+                            #(u"Momma", u"http://www.arcamax.com/momma"),
+                            #(u"Mother Goose and Grimm", u"http://www.arcamax.com/mothergooseandgrimm"),
+                            (u"Mutts", u"http://www.arcamax.com/mutts"),
+                            #(u"Non Sequitur", u"http://www.arcamax.com/nonsequitur"),
+                            #(u"Pearls Before Swine", u"http://www.arcamax.com/pearlsbeforeswine"),
+                            #(u"Pickles", u"http://www.arcamax.com/pickles"),
+                            #(u"Red and Rover", u"http://www.arcamax.com/redandrover"),
+                            #(u"Rubes", u"http://www.arcamax.com/rubes"),
+                            #(u"Rugrats", u"http://www.arcamax.com/rugrats"),
+                            (u"Speed Bump", u"http://www.arcamax.com/speedbump"),
+                            (u"Wizard of Id", u"http://www.arcamax.com/wizardofid"),
+                            (u"Dilbert", u"http://www.arcamax.com/dilbert"),
+                            (u"Zits", u"http://www.arcamax.com/zits"),
+                             ]:
+            articles = self.make_links(url)
+            if articles:
+                feeds.append((title, articles))
+        return feeds
+
+    def make_links(self, url):
+        title = 'Temp'
+        current_articles = []
+        pages = range(1, self.num_comics_to_get+1)
+        for page in pages:
+            page_soup = self.index_to_soup(url)
+            if page_soup:
+                title = page_soup.find(name='div', attrs={'class':'toon'}).p.img['alt']
+                page_url = url
+                prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'next'}, text='Previous').parent['href']
+            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
+            url = prev_page_url
+        current_articles.reverse()
+        return current_articles
+
+    def preprocess_html(self, soup):
+        main_comic = soup.find('p',attrs={'class':'m0'})
+        if main_comic.a['target'] == '_blank':
+            main_comic.a.img['id'] = 'main_comic'
+        return soup
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    img#main_comic {max-width:100%; min-width:100%;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+		'''
+
--- a/resources/recipes/avto-magazin.recipe
+++ b/resources/recipes/avto-magazin.recipe
@ -13,6 +13,7 @@ class Dnevnik(BasicNewsRecipe):
  labguage = 'sl'
  no_stylesheets = True
  use_embedded_content = False
+  language = 'sl'

  conversion_options = {'linearize_tables' : True}

--- a/resources/recipes/bangkok_biz.recipe
+++ b/resources/recipes/bangkok_biz.recipe
@ -0,0 +1,25 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1290689337(BasicNewsRecipe):
+    __author__ = 'Anat R.'
+    language = 'th'
+    title          = u'Bangkok Biz News'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_javascript = True
+    use_embedded_content = False
+    feeds          = [(u'Headlines',
+    u'http://www.bangkokbiznews.com/home/services/rss/home.xml'),
+    (u'Politics', u'http://www.bangkokbiznews.com/home/services/rss/politics.xml'),
+    (u'Business', u'http://www.bangkokbiznews.com/home/services/rss/business.xml'),
+    (u'Finance', u' http://www.bangkokbiznews.com/home/services/rss/finance.xml'),
+    (u'Technology', u' http://www.bangkokbiznews.com/home/services/rss/it.xml')]
+    remove_tags_before  = dict(name='div', attrs={'class':'box-Detailcontent'})
+    remove_tags_after = dict(name='p', attrs={'class':'allTags'})
+    remove_tags = []
+    remove_tags.append(dict(name = 'div', attrs = {'id': 'content-tools'}))
+    remove_tags.append(dict(name = 'p', attrs = {'class':'allTags'}))
+    remove_tags.append(dict(name = 'div', attrs = {'id':'morePic'}))
+    remove_tags.append(dict(name = 'ul', attrs = {'class':'tabs-nav'}))
+
--- a/resources/recipes/bitacora.recipe
+++ b/resources/recipes/bitacora.recipe
@ -0,0 +1,58 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+bitacora.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'bitacora.com.uy'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'iso-8859-1'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['txt'])]
+    remove_tags = [
+             dict(name='div', attrs={'class':'tablafoot'}),
+             dict(name=['object','h4']),
+             dict(name=['object','link'])
+                  ]
+
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15')
+        ]
+
+    def get_cover_url(self):
+	cover_url = None
+	index = 'http://www.bitacora.com.uy'
+	soup = self.index_to_soup(index)
+	link_item = soup.find('img',attrs={'class':'imgtapa'})
+	if link_item:
+		cover_url = "http://www.bitacora.com.uy/"+link_item['src']
+	return cover_url
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/biz_portal.recipe
+++ b/resources/recipes/biz_portal.recipe
@ -0,0 +1,40 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1283848012(BasicNewsRecipe):
+    description   = 'This is a recipe of BizPortal.co.il.'
+    cover_url      = 'http://www.bizportal.co.il/shukhahon/images/bizportal.jpg'
+    title          = u'BizPortal'
+    language              = 'he'
+    __author__ = 'marbs'
+    extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
+    simultaneous_downloads = 5
+    remove_javascript     = True
+    timefmt        = '[%a, %d %b, %Y]'
+    remove_empty_feeds = True
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_attributes = ['width']
+    simultaneous_downloads = 5
+  #  keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
+    remove_tags = [dict(name='img', attrs={'scr':['images/bizlogo_nl.gif']})]
+    max_articles_per_feed = 100
+    #preprocess_regexps = [
+  #      (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: '')
+#        ]
+
+
+    feeds          = [(u'חדשות שוק ההון', u'http://www.bizportal.co.il/shukhahon/messRssUTF2.xml'),
+                           (u'חדשות וול סטריט בעברית', u'http://www.bizportal.co.il/shukhahon/images/bizportal.jpg'),
+                           (u'שיווק ופרסום', u'http://www.bizportal.co.il/shukhahon/messRssUTF145.xml'),
+                           (u'משפט', u'http://www.bizportal.co.il/shukhahon/messRssUTF3.xml'),
+                           (u'ניתוח טכני', u'http://www.bizportal.co.il/shukhahon/messRssUTF5.xml'),
+                           (u'דיני עבודה ושכר', u'http://www.bizportal.co.il/shukhahon/messRssUTF6.xml'),
+                           (u'מיסוי', u'http://www.bizportal.co.il/shukhahon/messRssUTF7.xml'),
+                           (u'טאבו', u'http://www.bizportal.co.il/shukhahon/messRssUTF8.xml'),
+                           (u'נדל"ן', u'http://www.bizportal.co.il/shukhahon/messRssUTF160.xml'),
+                              ]
+
+    def print_version(self, url):
+        split1 = url.split("=")
+        print_url = 'http://www.bizportal.co.il/web/webnew/shukhahon/biznews02print.shtml?mid=' + split1[1]
+        return print_url
--- a/resources/recipes/brand_eins.recipe
+++ b/resources/recipes/brand_eins.recipe
@ -1,18 +1,22 @@
 #!/usr/bin/env  python
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 mode: python -*-
+
+# Find the newest version of this recipe here:
+# https://github.com/consti/BrandEins-Recipe/raw/master/brandeins.recipe

 __license__   = 'GPL v3'
-__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>'
-__version__   = '0.95'
+__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
+__version__   = '0.96'

 ''' http://brandeins.de - Wirtschaftsmagazin '''
 import re
 import string
 from calibre.web.feeds.recipes import BasicNewsRecipe

+
 class BrandEins(BasicNewsRecipe):

-  title = u'Brand Eins'
+  title = u'brand eins'
  __author__ = 'Constantin Hofstetter'
  description = u'Wirtschaftsmagazin'
  publisher ='brandeins.de'
@ -22,11 +26,14 @@ class BrandEins(BasicNewsRecipe):
  no_stylesheets = True
  encoding = 'utf-8'
  language = 'de'
+  publication_type = 'magazine'
+  needs_subscription = 'optional'

  # 2 is the last full magazine (default)
  # 1 is the newest (but not full)
  # 3 is one before 2 etc.
-  which_ausgabe = 2
+  # This value can be set via the username field.
+  default_issue = 2

  keep_only_tags = [dict(name='div', attrs={'id':'theContent'}), dict(name='div', attrs={'id':'sidebar'}), dict(name='div', attrs={'class':'intro'}), dict(name='p', attrs={'class':'bodytext'}), dict(name='div', attrs={'class':'single_image'})]

@ -61,17 +68,31 @@ class BrandEins(BasicNewsRecipe):

    return soup

+  def get_cover(self, soup):
+    cover_url = None
+    cover_item = soup.find('div', attrs = {'class': 'cover_image'})
+    if cover_item:
+      cover_url = 'http://www.brandeins.de/' + cover_item.img['src']
+    return cover_url
+
  def parse_index(self):
    feeds = []

    archive = "http://www.brandeins.de/archiv.html"

+    issue = self.default_issue
+    if self.username:
+      try:
+        issue = int(self.username)
+      except:
+        pass
+
    soup = self.index_to_soup(archive)
    latest_jahrgang = soup.findAll('div', attrs={'class': re.compile(r'\bjahrgang-latest\b') })[0].findAll('ul')[0]
-    pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-self.which_ausgabe]
+    pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-issue]
    url = pre_latest_issue.get('href', False)
-    # Get the title for the magazin - build it out of the title of the cover - take the issue and year;
-    self.title = "Brand Eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d+)", pre_latest_issue.find('img').get('title', False)).group('date')
+    # Get month and year of the magazine issue - build it out of the title of the cover
+    self.timefmt = " " + re.search(r"(?P<date>\d\d\/\d\d\d\d)", pre_latest_issue.find('img').get('title', False)).group('date')
    url = 'http://brandeins.de/'+url

    # url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
@ -83,6 +104,7 @@ class BrandEins(BasicNewsRecipe):

  def brand_eins_parse_latest_issue(self, url):
    soup = self.index_to_soup(url)
+    self.cover_url = self.get_cover(soup)
    article_lists = [soup.find('div', attrs={'class':'subColumnLeft articleList'}), soup.find('div', attrs={'class':'subColumnRight articleList'})]

    titles_and_articles = []
@ -123,3 +145,4 @@ class BrandEins(BasicNewsRecipe):
          current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
    titles_and_articles.append([chapter_title, current_articles])
    return titles_and_articles
+
--- a/resources/recipes/cbc_canada.recipe
+++ b/resources/recipes/cbc_canada.recipe
@ -11,7 +11,6 @@ class AdvancedUserRecipe1275798572(BasicNewsRecipe):
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
-    language = 'en'
    masthead_url = 'http://www.cbc.ca/includes/gfx/cbcnews_logo_09.gif'
    cover_url = 'http://img692.imageshack.us/img692/2814/cbc.png'
    keep_only_tags = [dict(name='div', attrs={'id':['storyhead','storybody']})]
--- a/resources/recipes/cnetjapan.recipe
+++ b/resources/recipes/cnetjapan.recipe
@ -0,0 +1,32 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CNetJapan(BasicNewsRecipe):
+    title          = u'CNET Japan'
+    oldest_article = 3
+    max_articles_per_feed = 30
+    __author__  = 'Hiroshi Miura'
+
+    feeds          = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')]
+    language       = 'ja'
+    encoding       = 'Shift_JIS'
+    remove_javascript = True
+
+    preprocess_regexps = [
+       (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE),
+        lambda match: '</body>'),
+       (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL|re.IGNORECASE),
+        lambda match: '</body>'),
+       (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
+        lambda match: '<!-- removed -->'),
+        ]
+
+    remove_tags_before = dict(name="h2")
+    remove_tags = [
+                   {'class':"social_bkm_share"},
+                   {'class':"social_bkm_print"},
+                   {'class':"block20 clearfix"},
+                   dict(name="div",attrs={'id':'bookreview'}),
+                    ]
+    remove_tags_after = {'class':"block20"}
+
--- a/resources/recipes/comics_com.recipe
+++ b/resources/recipes/comics_com.recipe
@ -347,6 +347,7 @@ class Comics(BasicNewsRecipe):
                  title = strip_tag['title']
                  print 'title: ', title
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
+        current_articles.reverse()
        return current_articles

    extra_css = '''
--- a/resources/recipes/cosmopolitan.recipe
+++ b/resources/recipes/cosmopolitan.recipe
@ -0,0 +1,69 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+Muy Interesante
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'Cosmopolitan'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Revista Cosmopolitan, Edicion Espanola'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 1
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+    conversion_options = {'linearize_tables': True}
+
+    oldest_article        = 180
+    max_articles_per_feed = 100
+    keep_only_tags = [
+             dict(id=['contenido']),
+             dict(name='td', attrs={'class':['contentheading', 'txt_articulo']})
+                     ]
+    remove_tags = [
+             dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
+             dict(name='div', attrs={'id':'comment'}),
+             dict(name='table', attrs={'class':'pagenav'}),
+             dict(name=['object','link'])
+                  ]
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                img {float:left; clear:both; margin:10px}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+                  (u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
+    ]
+
+    def preprocess_html(self, soup):
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]
+        return soup
+
+    def get_cover_url(self):
+		index = 'http://www.cosmohispano.com/revista'
+		soup = self.index_to_soup(index)
+		link_item = soup.find('img',attrs={'class':'img_portada'})
+		if link_item:
+			cover_url = "http://www.cosmohispano.com"+link_item['src']
+		return cover_url
--- a/resources/recipes/deredactie.recipe
+++ b/resources/recipes/deredactie.recipe
@ -0,0 +1,61 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class deredactie(BasicNewsRecipe):
+    title          = u'Deredactie.be'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://www.deredactie.be/polopoly_fs/1.510827!image/2710428628.gif'
+    language = 'de'
+    keep_only_tags = []
+    __author__ = 'malfi'
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlehead'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlebody'}))
+    remove_tags = []
+    remove_tags.append(dict(name = 'div', attrs = {'id': 'story'}))
+    remove_tags.append(dict(name = 'div', attrs = {'id': 'useractions'}))
+    remove_tags.append(dict(name = 'hr'))
+
+    extra_css = '''
+     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+     '''
+    def parse_index(self):
+        categories = []
+        catnames = {}
+        soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch')
+        for elem in soup.findAll('li', attrs={'id' : re.compile("^navItem[2-9]") }):
+              a = elem.find('a', href=True)
+              m = re.search('(?<=/)[^/]*$', a['href'])
+              cat = str(m.group(0))
+              categories.append(cat)
+              catnames[cat] = a['title']
+              self.log("found cat %s\n" % catnames[cat])
+
+        feeds = []
+
+        for cat in categories:
+            articles = []
+            soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch/'+cat)
+            for a in soup.findAll('a',attrs={'href' : re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
+                skip_this_article = False
+                url = a['href'].strip()
+                if url.startswith('/'):
+                    url = 'http://www.deredactie.be' + url
+                myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
+                for article in articles :
+                    if article['url'] == url :
+                        skip_this_article = True
+                        self.log("SKIPPING DUP %s" % url)
+                        break
+                if skip_this_article :
+                        continue;
+                articles.append(myarticle)
+                self.log("Adding URL %s\n" %url)
+            if articles:
+                feeds.append((catnames[cat], articles))
+        return feeds
+
--- a/resources/recipes/deutsche_welle_bs.recipe
+++ b/resources/recipes/deutsche_welle_bs.recipe
@ -0,0 +1,76 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+dw-world.de
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DeutscheWelle_bs(BasicNewsRecipe):
+    title                 = 'Deutsche Welle'
+    __author__            = 'Darko Miletic'
+    description           = 'Vijesti iz Njemacke i svijeta'
+    publisher             = 'Deutsche Welle'
+    category              = 'news, politics, Germany'
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    no_stylesheets        = True
+    language              = 'bs'
+    publication_type      = 'newsportal'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
+    extra_css             = """
+                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+                                body{font-family: Arial,sans1,sans-serif}
+                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
+                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
+                            """
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    remove_tags = [
+                     dict(name=['iframe','embed','object','form','base','meta','link'])
+                    ,dict(attrs={'class':'actionFooter'})
+                  ]
+    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
+    remove_attributes = ['height','width','onclick','border','lang']
+
+    feeds          = [
+                        (u'Politika'         , u'http://rss.dw-world.de/rdf/rss-bos-pol')
+                       ,(u'Evropa'           , u'http://rss.dw-world.de/rdf/rss-bos-eu' )
+                       ,(u'Kiosk'            , u'http://rss.dw-world.de/rdf/rss-bos-eu' )
+                       ,(u'Ekonomija i Nuka' , u'http://rss.dw-world.de/rdf/rss-bos-eco')
+                       ,(u'Kultura'          , u'http://rss.dw-world.de/rdf/rss-bos-cul')
+                       ,(u'Sport'            , u'http://rss.dw-world.de/rdf/rss-bos-sp' )
+                     ]
+
+    def print_version(self, url):
+        artl = url.rpartition('/')[2]
+        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  del item['href']
+                  if item.has_key('target'):
+                     del item['target']
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup
+
--- a/resources/recipes/deutsche_welle_en.recipe
+++ b/resources/recipes/deutsche_welle_en.recipe
@ -0,0 +1,66 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+dw-world.de
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DeutscheWelle_en(BasicNewsRecipe):
+    title                 = 'Deutsche Welle'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Germany and World'
+    publisher             = 'Deutsche Welle'
+    category              = 'news, politics, Germany'
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    no_stylesheets        = True
+    language              = 'en'
+    publication_type      = 'newsportal'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
+    extra_css             = """
+                                body{font-family: Arial,sans-serif}
+                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
+                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
+                            """
+
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    remove_tags = [
+                     dict(name=['iframe','embed','object','form','base','meta','link'])
+                    ,dict(attrs={'class':'actionFooter'})
+                  ]
+    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
+    remove_attributes = ['height','width','onclick','border','lang']
+
+    feeds          = [(u'All news', u'http://rss.dw-world.de/rdf/rss-en-all')]
+
+    def print_version(self, url):
+        artl = url.rpartition('/')[2]
+        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  del item['href']
+                  if item.has_key('target'):
+                     del item['target']
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup
+
--- a/resources/recipes/deutsche_welle_es.recipe
+++ b/resources/recipes/deutsche_welle_es.recipe
@ -0,0 +1,66 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+dw-world.de
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DeutscheWelle_es(BasicNewsRecipe):
+    title                 = 'Deutsche Welle'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias desde Alemania y mundo'
+    publisher             = 'Deutsche Welle'
+    category              = 'news, politics, Germany'
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    no_stylesheets        = True
+    language              = 'es'
+    publication_type      = 'newsportal'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
+    extra_css             = """
+                                body{font-family: Arial,sans-serif}
+                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
+                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
+                            """
+
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    remove_tags = [
+                     dict(name=['iframe','embed','object','form','base','meta','link'])
+                    ,dict(attrs={'class':'actionFooter'})
+                  ]
+    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
+    remove_attributes = ['height','width','onclick','border','lang']
+
+    feeds          = [(u'Noticias', u'http://rss.dw-world.de/rdf/rss-sp-all')]
+
+    def print_version(self, url):
+        artl = url.rpartition('/')[2]
+        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  del item['href']
+                  if item.has_key('target'):
+                     del item['target']
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup
+
--- a/resources/recipes/deutsche_welle_hr.recipe
+++ b/resources/recipes/deutsche_welle_hr.recipe
@ -0,0 +1,74 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+dw-world.de
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DeutscheWelle_hr(BasicNewsRecipe):
+    title                 = 'Deutsche Welle'
+    __author__            = 'Darko Miletic'
+    description           = 'Vesti iz Njemacke i svijeta'
+    publisher             = 'Deutsche Welle'
+    category              = 'news, politics, Germany'
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    no_stylesheets        = True
+    language              = 'hr'
+    publication_type      = 'newsportal'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
+    extra_css             = """
+                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+                                body{font-family: Arial,sans1,sans-serif}
+                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
+                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
+                            """
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    remove_tags = [
+                     dict(name=['iframe','embed','object','form','base','meta','link'])
+                    ,dict(attrs={'class':'actionFooter'})
+                  ]
+    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
+    remove_attributes = ['height','width','onclick','border','lang']
+
+    feeds          = [
+                        (u'Svijet'   , u'http://rss.dw-world.de/rdf/rss-cro-svijet')
+                       ,(u'Europa'   , u'http://rss.dw-world.de/rdf/rss-cro-eu'    )
+                       ,(u'Njemacka' , u'http://rss.dw-world.de/rdf/rss-cro-ger'   )
+                       ,(u'Vijesti'  , u'http://rss.dw-world.de/rdf/rss-cro-all'   )
+                     ]
+
+    def print_version(self, url):
+        artl = url.rpartition('/')[2]
+        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  del item['href']
+                  if item.has_key('target'):
+                     del item['target']
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup
+
--- a/resources/recipes/deutsche_welle_pt.recipe
+++ b/resources/recipes/deutsche_welle_pt.recipe
@ -0,0 +1,66 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+dw-world.de
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DeutscheWelle_pt(BasicNewsRecipe):
+    title                 = 'Deutsche Welle'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias desde Alemania y mundo'
+    publisher             = 'Deutsche Welle'
+    category              = 'news, politics, Germany'
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    no_stylesheets        = True
+    language              = 'pt'
+    publication_type      = 'newsportal'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
+    extra_css             = """
+                                body{font-family: Arial,sans-serif}
+                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
+                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
+                            """
+
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    remove_tags = [
+                     dict(name=['iframe','embed','object','form','base','meta','link'])
+                    ,dict(attrs={'class':'actionFooter'})
+                  ]
+    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
+    remove_attributes = ['height','width','onclick','border','lang']
+
+    feeds          = [(u'Noticias', u'http://rss.dw-world.de/rdf/rss-br-all')]
+
+    def print_version(self, url):
+        artl = url.rpartition('/')[2]
+        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  del item['href']
+                  if item.has_key('target'):
+                     del item['target']
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup
+
--- a/resources/recipes/deutsche_welle_sr.recipe
+++ b/resources/recipes/deutsche_welle_sr.recipe
@ -0,0 +1,79 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+dw-world.de
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DeutscheWelle_sr(BasicNewsRecipe):
+    title                 = 'Deutsche Welle'
+    __author__            = 'Darko Miletic'
+    description           = 'Vesti iz Nemacke i sveta'
+    publisher             = 'Deutsche Welle'
+    category              = 'news, politics, Germany'
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    no_stylesheets        = True
+    language              = 'sr'
+    publication_type      = 'newsportal'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
+    extra_css             = """
+                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+                                body{font-family: Arial,sans1,sans-serif}
+                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
+                                .caption{font-size: x-small; display: block; margin-bottom: 0.4em}
+                            """
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    remove_tags = [
+                     dict(name=['iframe','embed','object','form','base','meta','link'])
+                    ,dict(attrs={'class':'actionFooter'})
+                  ]
+    keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})]
+    remove_attributes = ['height','width','onclick','border','lang']
+
+    feeds          = [
+                        (u'Politika'  , u'http://rss.dw-world.de/rdf/rss-ser-pol'        )
+                       ,(u'Srbija'    , u'http://rss.dw-world.de/rdf/rss-ser-pol-ser'    )
+                       ,(u'Region'    , u'http://rss.dw-world.de/rdf/rss-ser-pol-region' )
+                       ,(u'Evropa'    , u'http://rss.dw-world.de/rdf/rss-ser-pol-eu'     )
+                       ,(u'Nemacka'   , u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'    )
+                       ,(u'Svet'      , u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'    )
+                       ,(u'Pregled stampe', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger')
+                       ,(u'Nauka Tehnika Medicina', u'http://rss.dw-world.de/rdf/rss-ser-science')
+                       ,(u'Kultura'   , u'feed:http://rss.dw-world.de/rdf/rss-ser-cul'   )
+                     ]
+
+    def print_version(self, url):
+        artl = url.rpartition('/')[2]
+        return 'http://www.dw-world.de/popups/popup_printcontent/' + artl
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  del item['href']
+                  if item.has_key('target'):
+                     del item['target']
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup
+
--- a/resources/recipes/el_pais_uy.recipe
+++ b/resources/recipes/el_pais_uy.recipe
@ -0,0 +1,80 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+http://www.elpais.com.uy/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'El Pais - Uruguay'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay y el resto del mundo'
+    publisher             = 'EL PAIS S.A.'
+    category              = 'news, politics, Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 2
+    encoding = 'iso-8859-1'
+    masthead_url          = 'http://www.elpais.com.uy/Images/09/cabezal/logo_PDEP.png'
+    publication_type      = 'newspaper'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    keep_only_tags = [
+                      dict(name='h1'),
+                      dict(name='div', attrs={'id':'Contenido'})
+                      ]
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        }
+    remove_tags = [
+				 dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
+				 dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
+				 dict(name='p', attrs={'class':'FacebookLikeButton'}),
+				 dict(name=['object','form']),
+				 dict(name=['object','table']) ]
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                body{font-family: Verdana,Arial,Helvetica,sans-serif }
+                img{margin-bottom: 0.4em; display:block;}
+                '''
+    feeds = [
+           (u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'),
+           (u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'),
+           (u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'),
+           (u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'),
+           (u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'),
+           (u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'),
+           (u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'),
+           (u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia')
+        ]
+
+    def get_cover_url(self):
+		cover_url = None
+		index = 'http://www.elpais.com.uy'
+		soup = self.index_to_soup(index)
+		link_item = soup.find('div',attrs={'class':'boxmedio box257'})
+		print link_item
+		if link_item:
+			cover_url = 'http://www.elpais.com.uy'+link_item.img['src']
+		return cover_url
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/endgadget_ja.recipe
+++ b/resources/recipes/endgadget_ja.recipe
@ -0,0 +1,22 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+japan.engadget.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class EndgadgetJapan(BasicNewsRecipe):
+    title          = u'Endgadget\u65e5\u672c\u7248'
+    language = 'ja'
+    __author__ = 'Hiroshi Miura'
+    cover_url      = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg'
+    masthead_url   = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    language = 'ja'
+    encoding = 'utf-8'
+    feeds          = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
--- a/resources/recipes/eu_commission.recipe
+++ b/resources/recipes/eu_commission.recipe
@ -0,0 +1,58 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+LANGUAGE = 'de'
+
+def feedlink(num):
+    return    u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id='+\
+            str(num)+'&lang='+ LANGUAGE
+
+class EUCommissionPress(BasicNewsRecipe):
+    title          = u'Pressemitteilungen der EU Kommission pro Politikbereich'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
+    __author__ = 'malfi'
+    language = LANGUAGE
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'pressReleaseContentMain'}))
+    remove_tags = []
+
+
+    feeds          = [
+                      (u'Pressemitteilung des Tages',feedlink(64)),
+                      (u'Presidency',feedlink(137)),
+                      (u'Foreign affairs and security policy',feedlink(138)),
+                      (u'Agriculture and rural development',feedlink(139)),
+                      (u'Budget and financial programming ',feedlink(140)),
+                      (u'Climate action',feedlink(141)),
+                      (u'Competition',feedlink(142)),
+                      (u'Development',feedlink(143)),
+                      (u'Digital agenda',feedlink(144)),
+                      (u'Economic and monetary affairs',feedlink(145)),
+                      (u'Education, culture, multilingualism and youth ',feedlink(146)),
+                      (u'Employment, social Affairs and inclusion ',feedlink(147)),
+                      (u'Energy',feedlink(148)),
+                      (u'Enlargment and European neighbourhood policy ',feedlink(149)),
+                      (u'Environment',feedlink(150)),
+                      (u'Health and consumer policy',feedlink(151)),
+                      (u'Home affairs',feedlink(152)),
+                      (u'Industry and entrepreneurship',feedlink(153)),
+                      (u'Inter-Institutional relations and administration',feedlink(154)),
+                      (u'Internal market and services',feedlink(155)),
+                      (u'International cooperation, humanitarian aid and crisis response',feedlink(156)),
+                      (u'Justice, fundamental rights and citizenship',feedlink(157)),
+                      (u'Maritime affairs and fisheries',feedlink(158)),
+                      (u'Regional policy',feedlink(159)),
+                      (u'Research and innovation',feedlink(160)),
+                      (u'Taxation and customs union, audit and anti-fraud',feedlink(161)),
+                      (u'Trade',feedlink(162)),
+                      (u'Transport',feedlink(163))
+                      ]
+    extra_css = '''
+     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+     '''
+
--- a/resources/recipes/european_voice.recipe
+++ b/resources/recipes/european_voice.recipe
@ -0,0 +1,51 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class EuropeanVoice(BasicNewsRecipe):
+    title          = u'European Voice'
+    __author__     = 'malfi'
+    oldest_article = 14
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
+    language = 'en'
+    keep_only_tags    = [dict(name='div', attrs={'id':'articleLeftColumn'})]
+    remove_tags    = [dict(name='div', attrs={'id':'BreadCrump'})]
+    feeds          = [
+                        (u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
+                          (u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
+                          (u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
+                          (u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
+                          (u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
+                          (u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
+                          (u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
+                          (u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
+                          (u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
+                          (u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
+                          (u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
+                          (u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
+                          (u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
+                          (u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
+                          (u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
+                          (u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
+                          (u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
+                          (u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
+                          (u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
+                          (u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
+                     ]
+    extra_css = '''
+        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+        '''
+
+    def print_version(self, url):
+          return url + '?bPrint=1'
+
+    def preprocess_html(self, soup):
+           denied = soup.findAll(True,text='Subscribers')
+           if denied:
+                raise Exception('Article skipped, because content can only be seen with subscription')
+           return soup
+
--- a/resources/recipes/freeway.recipe
+++ b/resources/recipes/freeway.recipe
@ -0,0 +1,100 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+http://freeway.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'freeway.com.uy'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Revista Freeway, Montevideo, Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 1
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+    conversion_options = {'linearize_tables': True}
+
+    oldest_article        = 180
+    max_articles_per_feed = 100
+    keep_only_tags = [
+             dict(id=['contenido']),
+             dict(name='a', attrs={'class':'titulo_art_ppal'}),
+             dict(name='img', attrs={'class':'recuadro'}),
+             dict(name='td', attrs={'class':'txt_art_ppal'})
+                     ]
+    remove_tags = [
+             dict(name=['object','link'])
+                  ]
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                img {float:left; clear:both; margin:10px}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+
+    def parse_index(self):
+            feeds = []
+            for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
+               articles = self.art_parse_section(url)
+               if articles:
+                   feeds.append((title, articles))
+            return feeds
+
+    def art_parse_section(self, url):
+            soup = self.index_to_soup(url)
+            div = soup.find(attrs={'id': 'tbl_1'})
+
+            current_articles = []
+            for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}):
+                if tag.get('class') == 'link-list-heading':
+                    break
+                for td in tag.findAll('td'):
+                    a = td.find('a', attrs= {'class': 'titulo_articulos'})
+                    if a is None:
+                        continue
+                    title = self.tag_to_string(a)
+                    url = a.get('href', False)
+                    if not url or not title:
+                        continue
+                    if url.startswith('/'):
+                         url = 'http://freeway.com.uy'+url
+                    p = td.find('p', attrs= {'class': 'txt_articulos'})
+                    description = self.tag_to_string(p)
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    self.log('\t\t\t', description)
+                    current_articles.append({'title': title, 'url': url, 'description':description, 'date':''})
+
+            return current_articles
+
+    def preprocess_html(self, soup):
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]
+        return soup
+
+    def get_cover_url(self):
+		#index = 'http://www.cosmohispano.com/revista'
+		#soup = self.index_to_soup(index)
+		#link_item = soup.find('img',attrs={'class':'img_portada'})
+		#if link_item:
+		#	cover_url = "http://www.cosmohispano.com"+link_item['src']
+		return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'
--- a/resources/recipes/gamespot.recipe
+++ b/resources/recipes/gamespot.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__author__ = u'Marc T\xf6nsing'
+__author__    = u'Marc Toensing'

 from calibre.web.feeds.news import BasicNewsRecipe

@ -17,6 +17,7 @@ class GamespotCom(BasicNewsRecipe):
    no_javascript = True

    feeds =  [
+               ('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
               ('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
               ('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
               ('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
@ -37,5 +38,3 @@ class GamespotCom(BasicNewsRecipe):

    def get_article_url(self, article):
        return article.get('link') + '?print=1'
-
-
--- a/resources/recipes/german_gov.recipe
+++ b/resources/recipes/german_gov.recipe
@ -0,0 +1,28 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GermanGovermentPress(BasicNewsRecipe):
+    title          = u'Pressemitteilungen der Bundesregierung'
+    oldest_article = 14
+    __author__ = 'malfi'
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
+    language = 'de'
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'h2'))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'textblack'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subtitle'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text'}))
+    remove_tags = []
+    feeds          = [ (u'Pressemitteilungen',u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf') ]
+    extra_css = '''
+     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+     '''
+    def print_version(self, url):
+         m = re.search(r'^(.*).html$', url)
+         return str(m.group(1)) + ',layoutVariant=Druckansicht.html'
--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'

-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__copyright__ = '2010, Szing'
 __docformat__ = 'restructuredtext en'

 '''
@ -10,49 +10,52 @@ globeandmail.com

 from calibre.web.feeds.news import BasicNewsRecipe

-class GlobeAndMail(BasicNewsRecipe):
-    title = u'Globe and Mail'
-    language = 'en_CA'
-
-    __author__ = 'Kovid Goyal'
+class AdvancedUserRecipe1287083651(BasicNewsRecipe):
+    title          = u'Globe & Mail'
+    __license__   = 'GPL v3'
+    __author__ = 'Szing'
    oldest_article = 2
-    max_articles_per_feed = 10
    no_stylesheets = True
-    extra_css = '''
-    h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
-    h4 {margin-top: 0px;}
-    #byline { font-family: monospace; font-weight:bold; }
-    #placeline {font-weight:bold;}
-    #credit {margin-top:0px;}
-    .tag {font-size: 22pt;}'''
-    description = 'Canada\'s national newspaper'
-    keep_only_tags = [dict(name='article')]
-    remove_tags = [dict(name='aside'),
-                   dict(name='footer'),
-                   dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
-                   dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
-                  ]
-    feeds = [
-            (u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
-            (u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
-            (u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
-            (u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
-            (u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
-            (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
-            (u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
-            (u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
-            (u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
-            (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
-            (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
-            (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
-            (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
-            (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
-            (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
-            (u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
-            ]
+    max_articles_per_feed = 100
+    encoding               = 'utf8'
+    publisher              = 'Globe & Mail'
+    language               = 'en_CA'
+    extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'

-    def get_article_url(self, article):
-        url = BasicNewsRecipe.get_article_url(self, article)
-        if '/video/' not in url:
-            return url
+    feeds          = [
+      (u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
+      (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
+      (u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
+      (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
+      (u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
+      (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
+      (u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
+      (u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
+      (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
+      (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
+      (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
+      (u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
+      (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
+    ]
+
+    keep_only_tags = [
+      dict(name='h1'),
+      dict(name='h2', attrs={'id':'articletitle'}),
+      dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
+      dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
+      dict(name='id', attrs={'class':'article'}),
+      dict(name='table', attrs={'class':'todays-market'}),
+      dict(name='header', attrs={'id':'leadheader'})
+    ]
+
+    remove_tags = [
+      dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
+    ]
+
+    #this has to be here or the text in the article appears twice.
+    remove_tags_after = [dict(id='article')]
+
+    #Use the mobile version rather than the web version
+    def print_version(self, url):
+        return url + '&service=mobile'

--- a/resources/recipes/globes_co_il.recipe
+++ b/resources/recipes/globes_co_il.recipe
@ -0,0 +1,47 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import re
+
+class AdvancedUserRecipe1283848012(BasicNewsRecipe):
+    description   = 'This is Globes.co.il.'
+    cover_url      = 'http://www.the7eye.org.il/SiteCollectionImages/BAKTANA/arye_avnery_010709_377.jpg'
+    title          = u'Globes'
+    language              = 'he'
+    __author__ = 'marbs'
+    extra_css='img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }'
+    simultaneous_downloads = 5
+    remove_javascript     = True
+    timefmt        = '[%a, %d %b, %Y]'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_attributes = ['width','style']
+
+
+    feeds          = [(u'שוק ההון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585'),
+                           (u'נדל"ן', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607'),
+                           (u'וול סטריט ושווקי העולם', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225'),
+                           (u'ניתוח טכני', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1294'),
+                           (u'היי טק', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594'),
+                           (u'נתח שוק וצרכנות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821'),
+                           (u'דין וחשבון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829'),
+                           (u'רכב', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220'),
+                           (u'דעות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845'),
+                           (u'קניון המניות - טור שבועי', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3175'),
+                           (u'סביבה', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3221')]
+
+    def print_version(self, url):
+        split1 = url.split("=")
+        print_url = 'http://www.globes.co.il/serve/globes/printwindow.asp?did=' + split1[1]
+        return print_url
+
+
+    def preprocess_html(self, soup):
+        soup.find('tr',attrs={'bgcolor':'black'}).findPrevious('tr').extract()
+        soup.find('tr',attrs={'bgcolor':'black'}).extract()
+        return soup
+
+    def fixChars(self,string):
+        # Replace lsquo (\x91)
+        fixed = re.sub("■","■",string)
+        return fixed
+
+
--- a/resources/recipes/handelsblatt.recipe
+++ b/resources/recipes/handelsblatt.recipe
@ -0,0 +1,41 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Handelsblatt(BasicNewsRecipe):
+    title          = u'Handelsblatt'
+    __author__ = 'malfi'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
+    language = 'de'
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
+    remove_tags    = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
+
+    feeds          = [
+                        (u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
+                        (u'Handelsblatt Top-Themen',u'http://www.handelsblatt.com/rss/top-themen'),
+                        (u'Handelsblatt Schlagzeilen',u'http://www.handelsblatt.com/rss/ticker/'),
+                        (u'Handelsblatt Finanzen',u'http://www.handelsblatt.com/rss/finanzen/'),
+                        (u'Handelsblatt Unternehmen',u'http://www.handelsblatt.com/rss/unternehmen/'),
+                        (u'Handelsblatt Politik',u'http://www.handelsblatt.com/rss/politik/'),
+                        (u'Handelsblatt Technologie',u'http://www.handelsblatt.com/rss/technologie/'),
+                        (u'Handelsblatt Meinung',u'http://www.handelsblatt.com/rss/meinung'),
+                        (u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
+                        (u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
+                     ]
+    extra_css = '''
+        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+        '''
+
+    def print_version(self, url):
+         m = re.search('(?<=;)[0-9]*', url)
+         return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'
+
+
--- a/resources/recipes/hannoversche_zeitung.recipe
+++ b/resources/recipes/hannoversche_zeitung.recipe
@ -0,0 +1,38 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1287519083(BasicNewsRecipe):
+    title          = u'Hannoversche Allgemeine Zeitung'
+    oldest_article = 1
+    __author__ = 'Artemis'
+    max_articles_per_feed = 30
+    language = 'de'
+    no_stylesheets = True
+
+    feeds = [
+	#(u'Schlagzeilen', u'http://www.haz.de/rss/feed/haz_schlagzeilen'),
+	(u'Politik', u'http://www.haz.de/rss/feed/haz_politik'),
+	(u'Wirtschaft', u'http://www.haz.de/rss/feed/haz_wirtschaft'),
+	(u'Panorama', u'http://www.haz.de/rss/feed/haz_panorama'),
+	(u'Wissen', u'http://www.haz.de/rss/feed/haz_wissen'),
+	(u'Kultur', u'http://www.haz.de/rss/feed/haz_kultur'),
+	(u'Sp\xe4tvorstellung', u'http://www.haz.de/rss/feed/haz_spaetvorstellung'),
+	(u'Hannover & Region', u'http://www.haz.de/rss/feed/haz_hannoverregion'),
+	(u'Netzgefl\xfcster', u'http://www.haz.de/rss/feed/haz_netzgefluester'),
+	(u'Meinung', u'http://www.haz.de/rss/feed/haz_meinung'),
+	(u'ZiSH', u'http://www.haz.de/rss/feed/haz_zish'),
+	(u'Medien', u'http://www.haz.de/rss/feed/haz_medien'),
+	#(u'Sport', u'http://www.haz.de/rss/feed/haz_sport'),
+	#(u'Hannover 96', u'http://www.haz.de/rss/feed/haz_hannover96')
+	]
+
+    remove_tags_before =dict(id='modul_artikel')
+    remove_tags_after =dict(id='articlecontent')
+
+    remove_tags = dict(id='articlesidebar')
+
+    remove_tags = [
+	dict(name='div', attrs={'class':['articlecomment',
+	'articlebookmark', 'teaser_anzeige', 'teaser_umfrage',
+	'navigation', 'subnavigation']})
+	]
+
--- a/resources/recipes/irish_times.recipe
+++ b/resources/recipes/irish_times.recipe
@ -13,7 +13,6 @@ class IrishTimes(BasicNewsRecipe):
    language = 'en_IE'
    timefmt = ' (%A, %B %d, %Y)'

-
    oldest_article = 3
    no_stylesheets = True
    simultaneous_downloads= 1
@ -33,13 +32,13 @@ class IrishTimes(BasicNewsRecipe):
                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
                    ]

-
    def print_version(self, url):
-        if url.count('rss.feedsportal.com'):
-            u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
-        else:
-            u = url.replace('.html','_pf.html')
-        return u
+         if url.count('rss.feedsportal.com'):
+            u = 'http://www.irishtimes.com' + \
+                     (((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html')
+         else:
+             u = url.replace('.html','_pf.html')
+         return u

    def get_article_url(self, article):
        return article.link
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@ -0,0 +1,26 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.jiji.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JijiDotCom(BasicNewsRecipe):
+    title          = u'\u6642\u4e8b\u901a\u4fe1'
+    __author__     = 'Hiroshi Miura'
+    description    = 'World News from Jiji Press'
+    publisher      = 'Jiji Press Ltd.'
+    category       = 'news'
+    encoding       = 'utf-8'
+    oldest_article = 6
+    max_articles_per_feed = 100
+    language       = 'ja'
+    cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
+    masthead_url    = 'http://jen.jiji.com/images/logo_jijipress.gif'
+
+    feeds          = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
+    remove_tags_after = dict(id="ad_google")
+
--- a/resources/recipes/la_diaria.recipe
+++ b/resources/recipes/la_diaria.recipe
@ -0,0 +1,48 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+ladiaria.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'La Diaria'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['article'])]
+    remove_tags = [
+             dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}),
+             dict(name='div', attrs={'id':'discussion'}),
+             dict(name=['object','link'])
+                  ]
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Articulos', u'http://ladiaria.com/feeds/articulos')
+        ]
+
+    def get_cover_url(self):
+        return 'http://ladiaria.com/edicion/imagenportada/'
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/la_jornada.recipe
+++ b/resources/recipes/la_jornada.recipe
@ -54,10 +54,7 @@ class LaJornada_mx(BasicNewsRecipe):
    preprocess_regexps = [
                          (re.compile(  r'<div class="inicial">(.*)</div><p class="s-s">'
                                       ,re.DOTALL|re.IGNORECASE)
-                                       ,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">'),
-                          (re.compile(  r'<q>(.*?)</q>'
-                                       ,re.DOTALL|re.IGNORECASE)
-                                       ,lambda match: '"' + match.group(1) + '"')
+                                       ,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
                         ]
                        
    keep_only_tags = [
--- a/resources/recipes/la_razon_bo.recipe
+++ b/resources/recipes/la_razon_bo.recipe
@ -8,7 +8,7 @@ from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

 class LaRazon_Bol(BasicNewsRecipe):
-    title                 = 'La Razón - Bolivia'
+    title                 = u'La Razón - Bolivia'
    __author__            = 'Darko Miletic'
    description           = 'El diario nacional de Bolivia'
    publisher             = 'Praxsis S.R.L.'
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -20,11 +20,14 @@ class Lanacion(BasicNewsRecipe):
    publication_type      = 'newspaper'
    remove_empty_feeds    = True    
    masthead_url          = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
-    extra_css             = """ h1{font-family: Georgia,serif} 
+    extra_css             = """ h1{font-family: Georgia,serif}
+                                h2{color: #626262}    
                                body{font-family: Arial,sans-serif} 
-                                img{margin-top: 0.5em; margin-bottom: 0.2em} 
+                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
+                                .notaFecha{color: #808080}                                
                                .notaEpigrafe{font-size: x-small} 
-                                .topNota h1{font-family: Arial,sans-serif} """
+                                .topNota h1{font-family: Arial,sans-serif} 
+                            """


    conversion_options = {
@ -38,12 +41,12 @@ class Lanacion(BasicNewsRecipe):
    remove_tags = [
                     dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
                    ,dict(name='ul'  , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']})
-                    ,dict(name='div' , attrs={'class':'cajaHerramientas noprint'        })
-                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix']})
-                    ,dict(name=['iframe','embed','object','form','base','hr'])
+                    ,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix']   })
+                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
+                    ,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
                  ]
    remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})                
-    remove_attributes = ['height','width','visible']
+    remove_attributes = ['height','width','visible','onclick','data-count','name']

    feeds          = [
                         (u'Ultimas noticias'     , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2'         )
--- a/resources/recipes/mainichi.recipe
+++ b/resources/recipes/mainichi.recipe
@ -0,0 +1,26 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.mainichi.jp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MainichiDailyNews(BasicNewsRecipe):
+    title          = u'\u6bce\u65e5\u65b0\u805e'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    max_articles_per_feed = 20
+    description    = 'Japanese traditional newspaper Mainichi Daily News'
+    publisher      = 'Mainichi Daily News'
+    category       = 'news, japan'
+    language       = 'ja'
+
+    feeds          = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
+
+    remove_tags_before = {'class':"NewsTitle"}
+    remove_tags = [{'class':"RelatedArticle"}]
+    remove_tags_after = {'class':"Credit"}
+
--- a/resources/recipes/mainichi_it_news.recipe
+++ b/resources/recipes/mainichi_it_news.recipe
@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MainichiDailyITNews(BasicNewsRecipe):
+    title          = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    description    = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
+    publisher      = 'Mainichi Daily News'
+    category       = 'news, Japan, IT, Electronics'
+    language       = 'ja'
+
+    feeds          = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
+
+    remove_tags_before = {'class':"NewsTitle"}
+    remove_tags = [{'class':"RelatedArticle"}]
+    remove_tags_after = {'class':"Credit"}
+
--- a/resources/recipes/marctv.recipe
+++ b/resources/recipes/marctv.recipe
@ -0,0 +1,35 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch MarcTV.
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MarcTVde(BasicNewsRecipe):
+
+    title = 'Marc Toensings Visionen'
+
+    description = 'Marc Toensings Visionen'
+
+    language = 'de'
+
+    __author__ = 'Marc Toensing'
+
+    max_articles_per_feed = 40
+
+    oldest_article = 665
+
+    use_embedded_content = False
+
+    remove_tags = []
+
+    keep_only_tags = dict(name='div', attrs={'class':["content"]})
+
+    feeds          = [(u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), (u'Leben', u'http://feeds.feedburner.com/marctv/leben'), (u'Medien', u'http://feeds.feedburner.com/marctv/medien')]
+
+    extra_css = '.#wrapper .entry p img{width:620px; height: 270px;}'
+
+    def get_cover_url(self):
+            return 'http://marctv.de/marctv.png'
--- a/resources/recipes/matichon.recipe
+++ b/resources/recipes/matichon.recipe
@ -0,0 +1,22 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1290412756(BasicNewsRecipe):
+    __author__ = 'Anat R.'
+    title          = u'Matichon'
+    oldest_article = 7
+    language = 'th'
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_javascript = True
+    use_embedded_content = False
+    feeds          = [(u'News', u'http://www.matichon.co.th/rss/news_article.xml'),
+    (u'Columns', u'http://www.matichon.co.th/rss/news_columns.xml'),
+    (u'Politics', u'http://www.matichon.co.th/rss/news_politic.xml'),
+    (u'Business', u'http://www.matichon.co.th/rss/news_business.xml'),
+    (u'World', u'http://www.matichon.co.th/rss/news_world.xml'),
+    (u'Sports', u'http://www.matichon.co.th/rss/news_sport.xml'),
+    (u'Entertainment', u'http://www.matichon.co.th/rss/news_entertainment.xml')]
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'h3', attrs = {'class' : 'read-h'}))
+    keep_only_tags.append(dict(name = 'p', attrs = {'class' : 'read-time'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-content'}))
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@ -3,13 +3,28 @@ __copyright__ = '2010, Eddie Lau'
 '''
 modified from Singtao Toronto calibre recipe by rty
 Change Log:
+2010/11/22: add English section, remove eco-news section which is not updated daily, correct
+            ordering of articles
+2010/11/12: add news image and eco-news section
+2010/11/08: add parsing of finance section
+2010/11/06: temporary work-around for Kindle device having no capability to display unicode
+            in section/article list.
 2010/10/31: skip repeated articles in section pages
 '''

-import datetime
+import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested

-class AdvancedUserRecipe1278063072(BasicNewsRecipe):
+
+from calibre import __appname__, strftime
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.date import now as nowf
+
+class MPHKRecipe(BasicNewsRecipe):
    title          = 'Ming Pao - Hong Kong'
    oldest_article = 1
    max_articles_per_feed = 100
@ -24,27 +39,131 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
    encoding = 'Big5-HKSCS'
    recursions = 0
    conversion_options = {'linearize_tables':True}
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;}'
+    #extra_css = 'img {float:right; margin:4px;}'
    masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
    keep_only_tags = [dict(name='h1'),
+                      #dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page
+                      dict(attrs={'class':['photo']}),
+                      dict(attrs={'id':['newscontent']}),
                      dict(attrs={'id':['newscontent01','newscontent02']})]
+    remove_tags = [dict(name='style'),
+                   dict(attrs={'id':['newscontent135']})]  # for the finance page
+    remove_attributes = ['width']
+    preprocess_regexps = [
+                            (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
+                            lambda match: '<h1>'),
+                            (re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
+                            lambda match: '</h1>'),
+                         ]
+
+    def image_url_processor(cls, baseurl, url):
+        # trick: break the url at the first occurance of digit, add an additional
+        # '_' at the front
+        # not working, may need to move this to preprocess_html() method
+        #minIdx = 10000
+        #i0 = url.find('0')
+        #if i0 >= 0 and i0 < minIdx:
+        #   minIdx = i0
+        #i1 = url.find('1')
+        #if i1 >= 0 and i1 < minIdx:
+        #   minIdx = i1
+        #i2 = url.find('2')
+        #if i2 >= 0 and i2 < minIdx:
+        #   minIdx = i2
+        #i3 = url.find('3')
+        #if i3 >= 0 and i0 < minIdx:
+        #   minIdx = i3
+        #i4 = url.find('4')
+        #if i4 >= 0 and i4 < minIdx:
+        #   minIdx = i4
+        #i5 = url.find('5')
+        #if i5 >= 0 and i5 < minIdx:
+        #   minIdx = i5
+        #i6 = url.find('6')
+        #if i6 >= 0 and i6 < minIdx:
+        #   minIdx = i6
+        #i7 = url.find('7')
+        #if i7 >= 0 and i7 < minIdx:
+        #   minIdx = i7
+        #i8 = url.find('8')
+        #if i8 >= 0 and i8 < minIdx:
+        #   minIdx = i8
+        #i9 = url.find('9')
+        #if i9 >= 0 and i9 < minIdx:
+        #   minIdx = i9
+        #return url[0:minIdx] + '_' + url[minIdx+1:]
+        return url

    def get_fetchdate(self):
        dt_utc = datetime.datetime.utcnow()
-        # convert UTC to local hk time - at around HKT 5.30am, all news are available
-        dt_local = dt_utc - datetime.timedelta(-2.5/24)
+        # convert UTC to local hk time - at around HKT 6.00am, all news are available
+        dt_local = dt_utc - datetime.timedelta(-2.0/24)
        return dt_local.strftime("%Y%m%d")

    def parse_index(self):
-        feeds = []
-        dateStr = self.get_fetchdate()
-        for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
-            articles = self.parse_section(url)
-            if articles:
-                feeds.append((title, articles))
+            feeds = []
+            dateStr = self.get_fetchdate()
+            for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
+                               (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
+                               (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
+                               (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
+                               (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
+                               (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
+                               (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
+                               ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                               (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
+                               (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+                               (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
+                articles = self.parse_section(url)
+                if articles:
+                    feeds.append((title, articles))
+            # special - finance
+            fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
+            if fin_articles:
+                feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+            # special - eco-friendly
+            # eco_articles = self.parse_eco_section('http://tssl.mingpao.com/htm/marketing/eco/cfm/Eco1.cfm')
+            # if eco_articles:
+            #   feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
+            # special - entertainment
+            #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+            #if ent_articles:
+            #   feeds.append(('Entertainment', ent_articles))
            return feeds

    def parse_section(self, url):
+            dateStr = self.get_fetchdate()
+            soup = self.index_to_soup(url)
+            divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
+            current_articles = []
+            included_urls = []
+            divs.reverse()
+            for i in divs:
+                a = i.find('a', href = True)
+                title = self.tag_to_string(a)
+                url = a.get('href', False)
+                url = 'http://news.mingpao.com/' + dateStr + '/' +url
+                if url not in included_urls and url.rfind('Redirect') == -1:
+                    current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+                    included_urls.append(url)
+            current_articles.reverse()
+            return current_articles
+
+    def parse_fin_section(self, url):
        dateStr = self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        a = soup.findAll('a', href= True)
+        current_articles = []
+        for i in a:
+            url = i.get('href', False)
+            if not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
+                title = self.tag_to_string(i)
+                url = 'http://www.mpfinance.com/cfm/' +url
+                current_articles.append({'title': title, 'url': url, 'description':''})
+        return current_articles
+
+    def parse_eco_section(self, url):
        soup = self.index_to_soup(url)
        divs = soup.findAll(attrs={'class': ['bullet']})
        current_articles = []
@ -53,9 +172,162 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
            a = i.find('a', href = True)
            title = self.tag_to_string(a)
            url = a.get('href', False)
-            url = 'http://news.mingpao.com/' + dateStr + '/' +url
-            if url not in included_urls:
+            url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
+            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':''})
                included_urls.append(url)
        return current_articles

+    #def parse_ent_section(self, url):
+    #   dateStr = self.get_fetchdate()
+    #   soup = self.index_to_soup(url)
+    #   a = soup.findAll('a', href=True)
+    #   current_articles = []
+    #   included_urls = []
+    #   for i in a:
+    #       title = self.tag_to_string(i)
+    #       url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
+    #       if url not in included_urls and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1 and not title == '':
+    #           current_articles.append({'title': title, 'url': url, 'description': ''})
+    #   return current_articles
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(style=True):
+            del item['width']
+        for item in soup.findAll(stype=True):
+            del item['absmiddle']
+        return soup
+
+    def create_opf(self, feeds, dir=None):
+        #super(MPHKRecipe,self).create_opf(feeds, dir)
+        if dir is None:
+            dir = self.output_dir
+        title = self.short_title()
+        if self.output_profile.periodical_date_in_title:
+            title += strftime(self.timefmt)
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        mi.publication_type = self.publication_type+':'+self.short_title()
+        mi.timestamp = nowf()
+        mi.comments = self.description
+        if not isinstance(mi.comments, unicode):
+            mi.comments = mi.comments.decode('utf-8', 'replace')
+        mi.pubdate = nowf()
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)
+
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))
+
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)
+
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)
+
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}
+
+        def feed_index(num, parent):
+            f = feeds[num]
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(num, j)
+                    auth = a.author
+                    if not auth:
+                        auth = None
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
+                    else:
+                        desc = self.description_limiter(desc)
+                    entries.append('%sindex.html'%adir)
+                    po = self.play_order_map.get(entries[-1], None)
+                    if po is None:
+                        self.play_order_counter += 1
+                        po = self.play_order_counter
+                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                                    play_order=po, author=auth, description=desc)
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        last = sp
+
+                    if os.path.exists(last):
+                        with open(last, 'rb') as fi:
+                            src = fi.read().decode('utf-8')
+                        soup = BeautifulSoup(src)
+                        body = soup.find('body')
+                        if body is not None:
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            templ = self.navbar.generate(True, num, j, len(f),
+                                            not self.has_single_feed,
+                                            a.orig_url, __appname__, prefix=prefix,
+                                            center=self.center_navbar)
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            body.insert(len(body.contents), elem)
+                            with open(last, 'wb') as fi:
+                                fi.write(unicode(soup).encode('utf-8'))
+        if len(feeds) == 0:
+            raise Exception('All feeds are empty, aborting.')
+
+        if len(feeds) > 1:
+            for i, f in enumerate(feeds):
+                entries.append('feed_%d/index.html'%i)
+                po = self.play_order_map.get(entries[-1], None)
+                if po is None:
+                    self.play_order_counter += 1
+                    po = self.play_order_counter
+                auth = getattr(f, 'author', None)
+                if not auth:
+                    auth = None
+                desc = getattr(f, 'description', None)
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                    f.title, play_order=po, description=desc, author=auth))
+
+        else:
+            entries.append('feed_%d/index.html'%0)
+            feed_index(0, toc)
+
+        for i, p in enumerate(entries):
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+        opf.create_spine(entries)
+        opf.set_toc(toc)
+
+        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+            opf.render(opf_file, ncx_file)
+
--- a/resources/recipes/montevideo_com.recipe
+++ b/resources/recipes/montevideo_com.recipe
@ -0,0 +1,56 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+http://www.montevideo.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Noticias(BasicNewsRecipe):
+    title                 = 'Montevideo COMM'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf-8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['txt'])]
+    remove_tags = [
+             dict(name=['object','link'])
+                  ]
+
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'),
+           (u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'),
+           (u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'),
+           (u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'),
+           # (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'),
+           # (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'),
+           (u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023')
+        ]
+
+    def get_cover_url(self):
+		return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg'
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/moscow_times.recipe
+++ b/resources/recipes/moscow_times.recipe
@ -1,31 +1,33 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
-moscowtimes.ru
+www.themoscowtimes.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe

 class Moscowtimes(BasicNewsRecipe):
-    title                 = u'The Moscow Times'
+    title                 = 'The Moscow Times'
    __author__            = 'Darko Miletic and Sujata Raman'
-    description           = 'News from Russia'
-    language = 'en'
-    lang = 'en'
-    oldest_article        = 7
+    description           = 'The Moscow Times is a daily English-language newspaper featuring objective, reliable news on business, politics, sports and culture in Moscow, in Russia and the former Soviet Union (CIS).'
+    category              = 'Russia, Moscow, Russian news, Moscow news, Russian newspaper, daily news, independent news, reliable news, USSR, Soviet Union, CIS, Russian politics, Russian business, Russian culture, Russian opinion, St Petersburg, Saint Petersburg'
+    publisher             = 'The Moscow Times'
+    language              = 'en'
+    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    #encoding = 'utf-8'
-    encoding =  'cp1252'
-    remove_javascript = True
+    remove_empty_feeds    = True
+    encoding              = 'cp1251'
+    masthead_url          = 'http://www.themoscowtimes.com/bitrix/templates/tmt/img/logo.gif'
+    publication_type      = 'newspaper'

    conversion_options = {
-          'comment'          : description
-        , 'language'         : lang
-    }
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }

    extra_css      = '''
                        h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large}
@ -35,39 +37,37 @@ class Moscowtimes(BasicNewsRecipe):
                        .text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
                        '''
    feeds          = [
-                        (u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'),
-                        (u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'),
-                        (u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'),
-                        (u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'),
-                        (u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'),
-                        (u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion')
+                         (u'Top Stories'   , u'http://www.themoscowtimes.com/rss/top'     )
+                        ,(u'Current Issue' , u'http://www.themoscowtimes.com/rss/issue'   )
+                        ,(u'News'          , u'http://www.themoscowtimes.com/rss/news'    )
+                        ,(u'Business'      , u'http://www.themoscowtimes.com/rss/business')
+                        ,(u'Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'     )
+                        ,(u'Opinion'       , u'http://www.themoscowtimes.com/rss/opinion' )
                     ]

-    keep_only_tags = [
-                        dict(name='div', attrs={'class':['newstextblock']})
-                    ]
-
+    keep_only_tags = [dict(name='div', attrs={'id':'content'})]
    remove_tags    = [
-                        dict(name='div', attrs={'class':['photo_nav']})
-                    ]
-
+                       dict(name='div', attrs={'class':['photo_nav','phototext']})
+                      ,dict(name=['iframe','meta','base','link','embed','object'])
+                     ]
+                     
    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
-        soup.head.insert(0,mtag)
-
-        return self.adeify_images(soup)
+        for lnk in soup.findAll('a'):
+            if lnk.string is not None:
+               ind = self.tag_to_string(lnk)
+               lnk.replaceWith(ind)
+        return soup

+    def print_version(self, url):        
+        return url.replace('.themoscowtimes.com/','.themoscowtimes.com/print/')

    def get_cover_url(self):
-
+        cover_url = None
        href =  'http://www.themoscowtimes.com/pdf/'
-
-        soup = self.index_to_soup(href)
+        soup = self.index_to_soup(href)        
        div = soup.find('div',attrs={'class':'left'})
-        a = div.find('a')
-        print a
-        if a :
-           cover_url = a.img['src']
+        if div:
+            a = div.find('a')
+            if a :
+               cover_url = 'http://www.themoscowtimes.com' + a.img['src']
        return cover_url
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@ -0,0 +1,24 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+sankei.jp.msn.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MSNSankeiNewsProduct(BasicNewsRecipe):
+    title          = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)'
+    __author__      = 'Hiroshi Miura'
+    description     = 'Products release from Japan'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    encoding       = 'Shift_JIS'
+    language       = 'ja'
+
+    feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
+
+    remove_tags_before = dict(id="__r_article_title__")
+    remove_tags_after  = dict(id="ajax_release_news")
+    remove_tags = [{'class':"parent chromeCustom6G"}]
--- a/resources/recipes/newsweek_polska.recipe
+++ b/resources/recipes/newsweek_polska.recipe
@ -0,0 +1,68 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Newsweek(BasicNewsRecipe):
+	EDITION = 0
+
+	title = u'Newsweek Polska'
+	__author__ = 'Mateusz Kielar'
+	description = 'Weekly magazine'
+	encoding = 'utf-8'
+	no_stylesheets = True
+	language = 'en'
+	remove_javascript = True
+
+	keep_only_tags =[]
+	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}))
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'copy'}))
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'url'}))
+
+	extra_css = '''
+					.body {font-size: small}
+					.author {font-size: x-small}
+					.lead {font-size: x-small}
+					.title{font-size: x-large; font-weight: bold}
+					'''
+
+	def print_version(self, url):
+		return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
+
+	def find_last_full_issue(self):
+		page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
+		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
+		page = self.index_to_soup(issue)
+		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
+		page = self.index_to_soup(issue)
+		self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
+
+	def parse_index(self):
+		self.find_last_full_issue()
+		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
+		img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
+		self.cover_url = img['src']
+		feeds = []
+		parent = soup.find(id='content-left-big')
+		for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
+			section = self.tag_to_string(txt).capitalize()
+			articles = list(self.find_articles(txt))
+			feeds.append((section, articles))
+		return feeds
+
+	def find_articles(self, txt):
+		for a in txt.findAllNext( attrs={'class':['strong','hr']}):
+			if a.name in "div":
+				break
+			yield {
+				'title' : self.tag_to_string(a),
+				'url'   : 'http://www.newsweek.pl'+a['href'],
+				'date'  : '',
+				'description' : ''
+				}
+
+
--- a/resources/recipes/nikkei_free.recipe
+++ b/resources/recipes/nikkei_free.recipe
@ -0,0 +1,60 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.nikkei.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NikkeiNet(BasicNewsRecipe):
+    title          = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)'
+    __author__     = 'Hiroshi Miura'
+    description    = 'News and current market affairs from Japan'
+    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    oldest_article = 2
+    max_articles_per_feed = 20
+    language       = 'ja'
+
+    feeds          =  [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
+		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
+		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
+		 (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
+		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
+		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
+		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
+		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
+		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
+		 (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
+		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
+		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
+		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
+		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
+		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
+		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
+		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
+		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
+		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
+		 (u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
+		 (u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
+		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
+		 (u'\u4f1a\u898b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
+		 (u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
+ 		 (u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
+		]
+
+    remove_tags_before = dict(id="CONTENTS")
+    remove_tags = [
+                   dict(name="form"),
+                   {'class':"cmn-hide"},
+                  ]
+    remove_tags_after = {'class':"cmn-pr_list"}
+
--- a/resources/recipes/nikkei_sub.recipe
+++ b/resources/recipes/nikkei_sub.recipe
@ -0,0 +1,125 @@
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import mechanize
+from calibre.ptempfile import PersistentTemporaryFile
+
+
+class NikkeiNet_subscription(BasicNewsRecipe):
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248'
+    __author__      = 'Hiroshi Miura'
+    description     = 'News and current market affairs from Japan'
+    needs_subscription = True
+    oldest_article  = 2
+    max_articles_per_feed = 20
+    language        = 'ja'
+    remove_javascript = False
+    temp_files = []
+
+    remove_tags_before = {'class':"cmn-section cmn-indent"}
+    remove_tags = [
+                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
+                       {'class':"cmn-article_keyword cmn-clearfix"},
+                       {'class':"cmn-print_headline cmn-clearfix"},
+                         ]
+    remove_tags_after = {'class':"cmn-pr_list"}
+
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+
+        cj = mechanize.LWPCookieJar()
+        br.set_cookiejar(cj)
+
+        #br.set_debug_http(True)
+        #br.set_debug_redirects(True)
+        #br.set_debug_responses(True)
+
+        if self.username is not None and self.password is not None:
+            #print "----------------------------get login form--------------------------------------------"
+            # open login form
+            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
+            response = br.response()
+            #print "----------------------------get login form---------------------------------------------"
+            #print "----------------------------set login form---------------------------------------------"
+            # remove disabled input which brings error on mechanize
+            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
+            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
+            br.set_response(response)
+            br.select_form(name='LA0010Form01')
+            br['LA0010Form01:LA0010Email']   = self.username
+            br['LA0010Form01:LA0010Password'] = self.password
+            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
+            br.submit()
+            br.response()
+            #print "----------------------------send login form---------------------------------------------"
+            #print "----------------------------open news main page-----------------------------------------"
+            # open news site
+            br.open('http://www.nikkei.com/')
+            br.response()
+            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
+            #print response2.get_data()
+            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
+            # forced redirect in default
+            br.select_form(nr=0)
+            br.submit()
+            response3 = br.response()
+            # return some cookie which should be set by Javascript
+            #print response3.geturl()
+            raw = response3.get_data()
+            #print "---------------------------response to form --------------------------------------------"
+            # grab cookie from JS and set it
+            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
+            br.select_form(nr=0)
+
+            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
+
+            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].close()
+            cj.load(self.temp_files[-1].name)
+
+            br.submit()
+
+            #br.set_debug_http(False)
+            #br.set_debug_redirects(False)
+            #br.set_debug_responses(False)
+        return br
+
+
+
+    feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
+		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
+		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
+		 (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
+		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
+		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
+		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
+		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
+		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
+		 (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
+		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
+		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
+		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
+		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
+		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
+		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
+		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
+		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
+		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
+		 (u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
+		 (u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
+		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
+		 (u'\u4f1a\u898b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
+		 (u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
+ 		 (u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
+		]
+
+
+
--- a/resources/recipes/nikkei_sub_economy.recipe
+++ b/resources/recipes/nikkei_sub_economy.recipe
@ -0,0 +1,109 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.nikkei.com
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import mechanize
+from calibre.ptempfile import PersistentTemporaryFile
+
+class NikkeiNet_sub_economy(BasicNewsRecipe):
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
+    __author__      = 'Hiroshi Miura'
+    description     = 'News and current market affairs from Japan'
+    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    needs_subscription = True
+    oldest_article  = 2
+    max_articles_per_feed = 20
+    language        = 'ja'
+    remove_javascript = False
+    temp_files = []
+
+    remove_tags_before = {'class':"cmn-section cmn-indent"}
+    remove_tags = [
+                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
+                       {'class':"cmn-article_keyword cmn-clearfix"},
+                       {'class':"cmn-print_headline cmn-clearfix"},
+                         ]
+    remove_tags_after = {'class':"cmn-pr_list"}
+
+    feeds = [  (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
+		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
+		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
+		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
+		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
+		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
+		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
+		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
+        ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+
+        cj = mechanize.LWPCookieJar()
+        br.set_cookiejar(cj)
+
+        #br.set_debug_http(True)
+        #br.set_debug_redirects(True)
+        #br.set_debug_responses(True)
+
+        if self.username is not None and self.password is not None:
+            #print "----------------------------get login form--------------------------------------------"
+            # open login form
+            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
+            response = br.response()
+            #print "----------------------------get login form---------------------------------------------"
+            #print "----------------------------set login form---------------------------------------------"
+            # remove disabled input which brings error on mechanize
+            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
+            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
+            br.set_response(response)
+            br.select_form(name='LA0010Form01')
+            br['LA0010Form01:LA0010Email']   = self.username
+            br['LA0010Form01:LA0010Password'] = self.password
+            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
+            br.submit()
+            br.response()
+            #print "----------------------------send login form---------------------------------------------"
+            #print "----------------------------open news main page-----------------------------------------"
+            # open news site
+            br.open('http://www.nikkei.com/')
+            br.response()
+            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
+            #print response2.get_data()
+            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
+            # forced redirect in default
+            br.select_form(nr=0)
+            br.submit()
+            response3 = br.response()
+            # return some cookie which should be set by Javascript
+            #print response3.geturl()
+            raw = response3.get_data()
+            #print "---------------------------response to form --------------------------------------------"
+            # grab cookie from JS and set it
+            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
+            br.select_form(nr=0)
+
+            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
+
+            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].close()
+            cj.load(self.temp_files[-1].name)
+
+            br.submit()
+
+            #br.set_debug_http(False)
+            #br.set_debug_redirects(False)
+            #br.set_debug_responses(False)
+        return br
+
+
+
+
--- a/resources/recipes/nikkei_sub_industry.recipe
+++ b/resources/recipes/nikkei_sub_industry.recipe
@ -0,0 +1,108 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.nikkei.com
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import mechanize
+from calibre.ptempfile import PersistentTemporaryFile
+
+
+class NikkeiNet_sub_industory(BasicNewsRecipe):
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7523\u696d)'
+    __author__      = 'Hiroshi Miura'
+    description     = 'News and current market affairs from Japan'
+    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    needs_subscription = True
+    oldest_article  = 2
+    max_articles_per_feed = 20
+    language        = 'ja'
+    remove_javascript = False
+    temp_files = []
+
+    remove_tags_before = {'class':"cmn-section cmn-indent"}
+    remove_tags = [
+                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
+                       {'class':"cmn-article_keyword cmn-clearfix"},
+                       {'class':"cmn-print_headline cmn-clearfix"},
+                         ]
+    remove_tags_after = {'class':"cmn-pr_list"}
+
+    feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
+		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
+		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
+		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
+		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
+
+        ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+
+        cj = mechanize.LWPCookieJar()
+        br.set_cookiejar(cj)
+
+        #br.set_debug_http(True)
+        #br.set_debug_redirects(True)
+        #br.set_debug_responses(True)
+
+        if self.username is not None and self.password is not None:
+            #print "----------------------------get login form--------------------------------------------"
+            # open login form
+            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
+            response = br.response()
+            #print "----------------------------get login form---------------------------------------------"
+            #print "----------------------------set login form---------------------------------------------"
+            # remove disabled input which brings error on mechanize
+            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
+            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
+            br.set_response(response)
+            br.select_form(name='LA0010Form01')
+            br['LA0010Form01:LA0010Email']   = self.username
+            br['LA0010Form01:LA0010Password'] = self.password
+            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
+            br.submit()
+            br.response()
+            #print "----------------------------send login form---------------------------------------------"
+            #print "----------------------------open news main page-----------------------------------------"
+            # open news site
+            br.open('http://www.nikkei.com/')
+            br.response()
+            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
+            #print response2.get_data()
+            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
+            # forced redirect in default
+            br.select_form(nr=0)
+            br.submit()
+            response3 = br.response()
+            # return some cookie which should be set by Javascript
+            #print response3.geturl()
+            raw = response3.get_data()
+            #print "---------------------------response to form --------------------------------------------"
+            # grab cookie from JS and set it
+            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
+            br.select_form(nr=0)
+
+            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
+
+            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].close()
+            cj.load(self.temp_files[-1].name)
+
+            br.submit()
+
+            #br.set_debug_http(False)
+            #br.set_debug_redirects(False)
+            #br.set_debug_responses(False)
+        return br
+
+
+
+
--- a/resources/recipes/nikkei_sub_life.recipe
+++ b/resources/recipes/nikkei_sub_life.recipe
@ -0,0 +1,109 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.nikkei.com
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import mechanize
+from calibre.ptempfile import PersistentTemporaryFile
+
+
+class NikkeiNet_sub_life(BasicNewsRecipe):
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
+    __author__      = 'Hiroshi Miura'
+    description     = 'News and current market affairs from Japan'
+    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    needs_subscription = True
+    oldest_article  = 2
+    max_articles_per_feed = 20
+    language        = 'ja'
+    remove_javascript = False
+    temp_files = []
+
+    remove_tags_before = {'class':"cmn-section cmn-indent"}
+    remove_tags = [
+                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
+                       {'class':"cmn-article_keyword cmn-clearfix"},
+                       {'class':"cmn-print_headline cmn-clearfix"},
+                         ]
+    remove_tags_after = {'class':"cmn-pr_list"}
+
+    feeds = [  (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
+		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
+		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
+		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
+		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
+		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
+		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
+        ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+
+        cj = mechanize.LWPCookieJar()
+        br.set_cookiejar(cj)
+
+        #br.set_debug_http(True)
+        #br.set_debug_redirects(True)
+        #br.set_debug_responses(True)
+
+        if self.username is not None and self.password is not None:
+            #print "----------------------------get login form--------------------------------------------"
+            # open login form
+            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
+            response = br.response()
+            #print "----------------------------get login form---------------------------------------------"
+            #print "----------------------------set login form---------------------------------------------"
+            # remove disabled input which brings error on mechanize
+            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
+            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
+            br.set_response(response)
+            br.select_form(name='LA0010Form01')
+            br['LA0010Form01:LA0010Email']   = self.username
+            br['LA0010Form01:LA0010Password'] = self.password
+            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
+            br.submit()
+            br.response()
+            #print "----------------------------send login form---------------------------------------------"
+            #print "----------------------------open news main page-----------------------------------------"
+            # open news site
+            br.open('http://www.nikkei.com/')
+            br.response()
+            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
+            #print response2.get_data()
+            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
+            # forced redirect in default
+            br.select_form(nr=0)
+            br.submit()
+            response3 = br.response()
+            # return some cookie which should be set by Javascript
+            #print response3.geturl()
+            raw = response3.get_data()
+            #print "---------------------------response to form --------------------------------------------"
+            # grab cookie from JS and set it
+            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
+            br.select_form(nr=0)
+
+            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
+
+            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].close()
+            cj.load(self.temp_files[-1].name)
+
+            br.submit()
+
+            #br.set_debug_http(False)
+            #br.set_debug_redirects(False)
+            #br.set_debug_responses(False)
+        return br
+
+
+
+
--- a/resources/recipes/nikkei_sub_main.recipe
+++ b/resources/recipes/nikkei_sub_main.recipe
@ -0,0 +1,102 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.nikkei.com
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import mechanize
+from calibre.ptempfile import PersistentTemporaryFile
+
+
+class NikkeiNet_sub_main(BasicNewsRecipe):
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7dcf\u5408)'
+    __author__      = 'Hiroshi Miura'
+    description     = 'News and current market affairs from Japan'
+    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    needs_subscription = True
+    oldest_article  = 2
+    max_articles_per_feed = 20
+    language        = 'ja'
+    remove_javascript = False
+    temp_files = []
+
+    remove_tags_before = {'class':"cmn-section cmn-indent"}
+    remove_tags = [
+                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
+                       {'class':"cmn-article_keyword cmn-clearfix"},
+                       {'class':"cmn-print_headline cmn-clearfix"},
+                         ]
+    remove_tags_after = {'class':"cmn-pr_list"}
+
+    feeds = [ (u'NIKKEI', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+
+        cj = mechanize.LWPCookieJar()
+        br.set_cookiejar(cj)
+
+        #br.set_debug_http(True)
+        #br.set_debug_redirects(True)
+        #br.set_debug_responses(True)
+
+        if self.username is not None and self.password is not None:
+            #print "----------------------------get login form--------------------------------------------"
+            # open login form
+            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
+            response = br.response()
+            #print "----------------------------get login form---------------------------------------------"
+            #print "----------------------------set login form---------------------------------------------"
+            # remove disabled input which brings error on mechanize
+            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
+            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
+            br.set_response(response)
+            br.select_form(name='LA0010Form01')
+            br['LA0010Form01:LA0010Email']   = self.username
+            br['LA0010Form01:LA0010Password'] = self.password
+            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
+            br.submit()
+            br.response()
+            #print "----------------------------send login form---------------------------------------------"
+            #print "----------------------------open news main page-----------------------------------------"
+            # open news site
+            br.open('http://www.nikkei.com/')
+            br.response()
+            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
+            #print response2.get_data()
+            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
+            # forced redirect in default
+            br.select_form(nr=0)
+            br.submit()
+            response3 = br.response()
+            # return some cookie which should be set by Javascript
+            #print response3.geturl()
+            raw = response3.get_data()
+            #print "---------------------------response to form --------------------------------------------"
+            # grab cookie from JS and set it
+            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
+            br.select_form(nr=0)
+
+            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
+
+            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].close()
+            cj.load(self.temp_files[-1].name)
+
+            br.submit()
+
+            #br.set_debug_http(False)
+            #br.set_debug_redirects(False)
+            #br.set_debug_responses(False)
+        return br
+
+
+
+
--- a/resources/recipes/nikkei_sub_sports.recipe
+++ b/resources/recipes/nikkei_sub_sports.recipe
@ -0,0 +1,109 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.nikkei.com
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import mechanize
+from calibre.ptempfile import PersistentTemporaryFile
+
+
+class NikkeiNet_sub_sports(BasicNewsRecipe):
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u30b9\u30dd\u30fc\u30c4)'
+    __author__      = 'Hiroshi Miura'
+    description     = 'News and current market affairs from Japan'
+    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+    needs_subscription = True
+    oldest_article  = 2
+    max_articles_per_feed = 20
+    language        = 'ja'
+    remove_javascript = False
+    temp_files = []
+
+    remove_tags_before = {'class':"cmn-section cmn-indent"}
+    remove_tags = [
+                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
+                       {'class':"cmn-article_keyword cmn-clearfix"},
+                       {'class':"cmn-print_headline cmn-clearfix"},
+                         ]
+    remove_tags_after = {'class':"cmn-pr_list"}
+
+    feeds = [
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
+		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba')
+        ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+
+        cj = mechanize.LWPCookieJar()
+        br.set_cookiejar(cj)
+
+        #br.set_debug_http(True)
+        #br.set_debug_redirects(True)
+        #br.set_debug_responses(True)
+
+        if self.username is not None and self.password is not None:
+            #print "----------------------------get login form--------------------------------------------"
+            # open login form
+            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
+            response = br.response()
+            #print "----------------------------get login form---------------------------------------------"
+            #print "----------------------------set login form---------------------------------------------"
+            # remove disabled input which brings error on mechanize
+            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
+            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
+            br.set_response(response)
+            br.select_form(name='LA0010Form01')
+            br['LA0010Form01:LA0010Email']   = self.username
+            br['LA0010Form01:LA0010Password'] = self.password
+            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
+            br.submit()
+            br.response()
+            #print "----------------------------send login form---------------------------------------------"
+            #print "----------------------------open news main page-----------------------------------------"
+            # open news site
+            br.open('http://www.nikkei.com/')
+            br.response()
+            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
+            #print response2.get_data()
+            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
+            # forced redirect in default
+            br.select_form(nr=0)
+            br.submit()
+            response3 = br.response()
+            # return some cookie which should be set by Javascript
+            #print response3.geturl()
+            raw = response3.get_data()
+            #print "---------------------------response to form --------------------------------------------"
+            # grab cookie from JS and set it
+            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
+            br.select_form(nr=0)
+
+            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
+
+            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+            self.temp_files[-1].close()
+            cj.load(self.temp_files[-1].name)
+
+            br.submit()
+
+            #br.set_debug_http(False)
+            #br.set_debug_redirects(False)
+            #br.set_debug_responses(False)
+        return br
+
+
+
+
--- a/resources/recipes/now_toronto.recipe
+++ b/resources/recipes/now_toronto.recipe
@ -0,0 +1,36 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+#Based on Lars Jacob's Taz Digiabo recipe
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Starson17'
+
+import os, urllib2, zipfile
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+class NowToronto(BasicNewsRecipe):
+    title = u'Now Toronto'
+    description = u'Now Toronto'
+    __author__ = 'Starson17'
+    language = 'en_CA'
+    conversion_options = {
+        'no_default_epub_cover' : True
+    }
+
+    def build_index(self):
+        epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
+        soup = self.index_to_soup(epub_feed)
+        url = soup.find(name = 'feedburner:origlink').string
+        f = urllib2.urlopen(url)
+        tmp = PersistentTemporaryFile(suffix='.epub')
+        self.report_progress(0,_('downloading epub'))
+        tmp.write(f.read())
+        tmp.close()
+        zfile = zipfile.ZipFile(tmp.name, 'r')
+        self.report_progress(0,_('extracting epub'))
+        zfile.extractall(self.output_dir)
+        tmp.close()
+        index = os.path.join(self.output_dir, 'content.opf')
+        self.report_progress(1,_('epub downloaded and extracted'))
+        return index
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -7,14 +7,22 @@ nytimes.com
 '''
 import re, string, time
 from calibre import entity_to_unicode, strftime
+from datetime import timedelta, date
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

+
 class NYTimes(BasicNewsRecipe):

-    # set headlinesOnly to True for the headlines-only version
+    # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
    headlinesOnly = True

+    # set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
+    # number of days old an article can be for inclusion. If oldest_article = 0 all articles
+    # will be included. Note: oldest_article is ignored if webEdition = False
+    webEdition = False
+    oldest_article = 7
+
    # includeSections: List of sections to include. If empty, all sections found will be included.
    # Otherwise, only the sections named will be included. For example,
    #
@ -39,20 +47,76 @@ class NYTimes(BasicNewsRecipe):
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
-
    # and shown in their original location.
-    one_picture_per_article = True
+    one_picture_per_article = False

    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 100

+    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
+    # more than one section). If True, only the first occurance will be downloaded.
+    filterDuplicates = True
+
+    # Sections to collect for the Web edition.
+    # Delete any you don't want, or use includeSections or excludeSections
+    web_sections = [(u'World',u'world'),
+                    (u'U.S.',u'national'),
+                    (u'Politics',u'politics'),
+                    (u'New York',u'nyregion'),
+                    (u'Business','business'),
+                    (u'Technology',u'technology'),
+                    (u'Sports',u'sports'),
+                    (u'Science',u'science'),
+                    (u'Health',u'health'),
+                    (u'Opinion',u'opinion'),
+                    (u'Arts',u'arts'),
+                    (u'Books',u'books'),
+                    (u'Movies',u'movies'),
+                    (u'Music',u'arts/music'),
+                    (u'Television',u'arts/television'),
+                    (u'Style',u'style'),
+                    (u'Dining & Wine',u'dining'),
+                    (u'Fashion & Style',u'fashion'),
+                    (u'Home & Garden',u'garden'),
+                    (u'Travel',u'travel'),
+                    ('Education',u'education'),
+                    ('Multimedia',u'multimedia'),
+                    (u'Obituaries',u'obituaries'),
+                    (u'Sunday Magazine',u'magazine'),
+                    (u'Week in Review',u'weekinreview')]
+

    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
+        needs_subscription = False
+    elif webEdition:
+        title='New York Times (Web)'
+        description = 'New York Times on the Web'
+        needs_subscription = True
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
+        needs_subscription = True
+
+
+    month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
+
+    def decode_us_date(self,datestr):
+        udate = datestr.strip().lower().split()
+        try:
+            m = self.month_list.index(udate[0])+1
+        except:
+            return date.today()
+        d = int(udate[1])
+        y = int(udate[2])
+        try:
+            d = date(y,m,d)
+        except:
+            d = date.today
+        return d
+
+    earliest_date = date.today() - timedelta(days=oldest_article)

    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
@ -136,6 +200,12 @@ class NYTimes(BasicNewsRecipe):
                .image {text-align: center;}
                .source {text-align: left; }'''

+
+    articles = {}
+    key = None
+    ans = []
+    url_list = []
+
    def filter_ans(self, ans) :
        total_article_count = 0
        idx = 0
@ -164,6 +234,29 @@ class NYTimes(BasicNewsRecipe):
        self.log( "Queued %d articles" % total_article_count )
        return ans

+    def exclude_url(self,url):
+        if not url.startswith("http"):
+            return True
+        if not url.endswith(".html"):
+            return True
+        if 'nytimes.com' not in url:
+            return True
+        if 'podcast' in url:
+            return True
+        if '/video/' in url:
+            return True
+        if '/slideshow/' in url:
+            return True
+        if '/magazine/index' in url:
+            return True
+        if '/interactive/' in url:
+            return True
+        if '/reference/' in url:
+            return True
+        if '/premium/' in url:
+            return True
+        return False
+
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
@ -249,7 +342,6 @@ class NYTimes(BasicNewsRecipe):
            return BeautifulSoup(_raw, markupMassage=massage)

        # Entry point
-        print "index_to_soup()"
        soup = get_the_soup( self.encoding, url_or_raw )
        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
@ -273,83 +365,110 @@ class NYTimes(BasicNewsRecipe):
        else:
            return description

-    def parse_todays_index(self):
+    def feed_title(self,div):
+        return ''.join(div.findAll(text=True, recursive=True)).strip()

-        def feed_title(div):
-            return ''.join(div.findAll(text=True, recursive=True)).strip()
-
-        articles = {}
-        key = None
-        ans = []
-        url_list = []
-
-        def handle_article(div):
-            a = div.find('a', href=True)
-            if not a:
+    def handle_article(self,div):
+        thumbnail = div.find('div','thumbnail')
+        if thumbnail:
+            thumbnail.extract()
+        a = div.find('a', href=True)
+        if not a:
+            return
+        url = re.sub(r'\?.*', '', a['href'])
+        if self.exclude_url(url):
+            return
+        url += '?pagewanted=all'
+        if self.filterDuplicates:
+            if url in self.url_list:
                return
-            url = re.sub(r'\?.*', '', a['href'])
-            if not url.startswith("http"):
-                return
-            if not url.endswith(".html"):
-                return
-            if 'podcast' in url:
-                return
-            if '/video/' in url:
-                return
-            url += '?pagewanted=all'
-            if url in url_list:
-                return
-            url_list.append(url)
-            title = self.tag_to_string(a, use_alt=True).strip()
-            description = ''
-            pubdate = strftime('%a, %d %b')
-            summary = div.find(True, attrs={'class':'summary'})
-            if summary:
-                description = self.tag_to_string(summary, use_alt=False)
-            author = ''
+        self.url_list.append(url)
+        title = self.tag_to_string(a, use_alt=True).strip()
+        description = ''
+        pubdate = strftime('%a, %d %b')
+        summary = div.find(True, attrs={'class':'summary'})
+        if summary:
+            description = self.tag_to_string(summary, use_alt=False)
+        author = ''
+        authorAttribution = div.find(True, attrs={'class':'byline'})
+        if authorAttribution:
+            author = self.tag_to_string(authorAttribution, use_alt=False)
+        else:
            authorAttribution = div.find(True, attrs={'class':'byline'})
            if authorAttribution:
                author = self.tag_to_string(authorAttribution, use_alt=False)
-            else:
-                authorAttribution = div.find(True, attrs={'class':'byline'})
-                if authorAttribution:
-                    author = self.tag_to_string(authorAttribution, use_alt=False)
-            feed = key if key is not None else 'Uncategorized'
-            if not articles.has_key(feed):
-                ans.append(feed)
-                articles[feed] = []
-            articles[feed].append(
-                            dict(title=title, url=url, date=pubdate,
-                                description=description, author=author,
-                                content=''))
+        feed = self.key if self.key is not None else 'Uncategorized'
+        if not self.articles.has_key(feed):
+            self.ans.append(feed)
+            self.articles[feed] = []
+        self.articles[feed].append(
+                        dict(title=title, url=url, date=pubdate,
+                            description=description, author=author,
+                            content=''))


+    def parse_web_edition(self):
+
+        for (sec_title,index_url) in self.web_sections:
+            if self.includeSections != []:
+                if sec_title not in self.includeSections:
+                    print "SECTION NOT INCLUDED: ",sec_title
+                    continue
+            if sec_title in self.excludeSections:
+                print "SECTION EXCLUDED: ",sec_title
+                continue
+            print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
+            soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
+            self.key = sec_title
+            # Find each article
+            for div in soup.findAll(True,
+                attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
+                if div['class'] in ['story', 'story headline'] :
+                    self.handle_article(div)
+                elif div['class'] == 'headlinesOnly multiline flush':
+                    for lidiv in div.findAll('li'):
+                        self.handle_article(lidiv)
+
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        return self.filter_ans(self.ans)
+
+
+    def parse_todays_index(self):
+
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')

-
+        skipping = False
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):

            if div['class'] in ['section-headline','sectionHeader']:
-                key = string.capwords(feed_title(div))
-                key = key.replace('Op-ed','Op-Ed')
-                key = key.replace('U.s.','U.S.')
+                self.key = string.capwords(self.feed_title(div))
+                self.key = self.key.replace('Op-ed','Op-Ed')
+                self.key = self.key.replace('U.s.','U.S.')
+                self.key = self.key.replace('N.y.','N.Y.')
+                skipping = False
+                if self.includeSections != []:
+                    if self.key not in self.includeSections:
+                        print "SECTION NOT INCLUDED: ",self.key
+                        skipping = True
+                if self.key in self.excludeSections:
+                    print "SECTION EXCLUDED: ",self.key
+                    skipping = True
+
            elif div['class'] in ['story', 'story headline'] :
-                handle_article(div)
+                if not skipping:
+                    self.handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
-                    handle_article(lidiv)
+                    if not skipping:
+                        self.handle_article(lidiv)

-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        return self.filter_ans(ans)
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        return self.filter_ans(self.ans)

    def parse_headline_index(self):

-        articles = {}
-        ans = []
-        url_list = []
-
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')

        # Fetch the content table
@ -363,15 +482,24 @@ class NYTimes(BasicNewsRecipe):
        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
            for div_sec in td_col.findAll('div',recursive=False):
                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
+
                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
                    section_name = re.sub(r'^ *$','',section_name)
+
                    if section_name == '':
                        continue
+                    if self.includeSections != []:
+                        if section_name not in self.includeSections:
+                            print "SECTION NOT INCLUDED: ",section_name
+                            continue
+                    if section_name in self.excludeSections:
+                        print "SECTION EXCLUDED: ",section_name
+                        continue
+
                    section_name=string.capwords(section_name)
-                    if section_name == 'U.s.':
-                       section_name = 'U.S.'
-                    elif section_name == 'Op-ed':
-                       section_name = 'Op-Ed'
+                    section_name = section_name.replace('Op-ed','Op-Ed')
+                    section_name = section_name.replace('U.s.','U.S.')
+                    section_name = section_name.replace('N.y.','N.Y.')
                    pubdate = strftime('%a, %d %b')

                    search_div = div_sec
@ -392,37 +520,32 @@ class NYTimes(BasicNewsRecipe):
                        if not a:
                            continue
                        url = re.sub(r'\?.*', '', a['href'])
-                        if not url.startswith("http"):
-                            continue
-                        if not url.endswith(".html"):
-                            continue
-                        if 'podcast' in url:
-                            continue
-                        if 'video' in url:
+                        if self.exclude_url(url):
                            continue
                        url += '?pagewanted=all'
-                        if url in url_list:
-                            continue
-                        url_list.append(url)
-                        self.log("URL %s" % url)
+                        if self.filterDuplicates:
+                            if url in self.url_list:
+                                continue
+                        self.url_list.append(url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
-                        if not articles.has_key(section_name):
-                            ans.append(section_name)
-                            articles[section_name] = []
-                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+                        if not self.articles.has_key(section_name):
+                            self.ans.append(section_name)
+                            self.articles[section_name] = []
+                        self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))

-
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        return self.filter_ans(ans)
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        return self.filter_ans(self.ans)

    def parse_index(self):
        if self.headlinesOnly:
            return self.parse_headline_index()
+        elif self.webEdition:
+            return self.parse_web_edition()
        else:
            return self.parse_todays_index()

@ -438,6 +561,21 @@ class NYTimes(BasicNewsRecipe):

    def preprocess_html(self, soup):

+        if self.webEdition & (self.oldest_article>0):
+            date_tag = soup.find(True,attrs={'class': ['dateline','date']})
+            if date_tag:
+                date_str = self.tag_to_string(date_tag,use_alt=False)
+                date_str = date_str.replace('Published:','')
+                date_items = date_str.split(',')
+                try:
+                    datestring = date_items[0]+' '+date_items[1]
+                    article_date = self.decode_us_date(datestring)
+                except:
+                    article_date = date.today()
+                if article_date < self.earliest_date:
+                    self.log("Skipping article dated %s" % date_str)
+                    return None
+
        kicker_tag = soup.find(attrs={'class':'kicker'})
        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
@ -462,7 +600,6 @@ class NYTimes(BasicNewsRecipe):
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
                    # Move firstImg before article body
-                    #article_body = soup.find(True, {'id':'articleBody'})
                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
@ -548,4 +685,3 @@ class NYTimes(BasicNewsRecipe):
            divTag.replaceWith(tag)

        return soup
-
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -7,14 +7,22 @@ nytimes.com
 '''
 import re, string, time
 from calibre import entity_to_unicode, strftime
+from datetime import timedelta, date
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

+
 class NYTimes(BasicNewsRecipe):

-    # set headlinesOnly to True for the headlines-only version
+    # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
    headlinesOnly = False

+    # set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
+    # number of days old an article can be for inclusion. If oldest_article = 0 all articles
+    # will be included. Note: oldest_article is ignored if webEdition = False
+    webEdition = False
+    oldest_article = 7
+
    # includeSections: List of sections to include. If empty, all sections found will be included.
    # Otherwise, only the sections named will be included. For example,
    #
@ -39,20 +47,76 @@ class NYTimes(BasicNewsRecipe):
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
-
    # and shown in their original location.
-    one_picture_per_article = True
+    one_picture_per_article = False

    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 100

+    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
+    # more than one section). If True, only the first occurance will be downloaded.
+    filterDuplicates = True
+
+    # Sections to collect for the Web edition.
+    # Delete any you don't want, or use includeSections or excludeSections
+    web_sections = [(u'World',u'world'),
+                    (u'U.S.',u'national'),
+                    (u'Politics',u'politics'),
+                    (u'New York',u'nyregion'),
+                    (u'Business','business'),
+                    (u'Technology',u'technology'),
+                    (u'Sports',u'sports'),
+                    (u'Science',u'science'),
+                    (u'Health',u'health'),
+                    (u'Opinion',u'opinion'),
+                    (u'Arts',u'arts'),
+                    (u'Books',u'books'),
+                    (u'Movies',u'movies'),
+                    (u'Music',u'arts/music'),
+                    (u'Television',u'arts/television'),
+                    (u'Style',u'style'),
+                    (u'Dining & Wine',u'dining'),
+                    (u'Fashion & Style',u'fashion'),
+                    (u'Home & Garden',u'garden'),
+                    (u'Travel',u'travel'),
+                    ('Education',u'education'),
+                    ('Multimedia',u'multimedia'),
+                    (u'Obituaries',u'obituaries'),
+                    (u'Sunday Magazine',u'magazine'),
+                    (u'Week in Review',u'weekinreview')]
+

    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
+        needs_subscription = False
+    elif webEdition:
+        title='New York Times (Web)'
+        description = 'New York Times on the Web'
+        needs_subscription = True
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
+        needs_subscription = True
+
+
+    month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
+
+    def decode_us_date(self,datestr):
+        udate = datestr.strip().lower().split()
+        try:
+            m = self.month_list.index(udate[0])+1
+        except:
+            return date.today()
+        d = int(udate[1])
+        y = int(udate[2])
+        try:
+            d = date(y,m,d)
+        except:
+            d = date.today
+        return d
+
+    earliest_date = date.today() - timedelta(days=oldest_article)

    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
@ -60,7 +124,6 @@ class NYTimes(BasicNewsRecipe):


    timefmt = ''
-    needs_subscription = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    cover_margins = (18,18,'grey99')

@ -137,6 +200,12 @@ class NYTimes(BasicNewsRecipe):
                .image {text-align: center;}
                .source {text-align: left; }'''

+
+    articles = {}
+    key = None
+    ans = []
+    url_list = []
+
    def filter_ans(self, ans) :
        total_article_count = 0
        idx = 0
@ -165,6 +234,29 @@ class NYTimes(BasicNewsRecipe):
        self.log( "Queued %d articles" % total_article_count )
        return ans

+    def exclude_url(self,url):
+        if not url.startswith("http"):
+            return True
+        if not url.endswith(".html"):
+            return True
+        if 'nytimes.com' not in url:
+            return True
+        if 'podcast' in url:
+            return True
+        if '/video/' in url:
+            return True
+        if '/slideshow/' in url:
+            return True
+        if '/magazine/index' in url:
+            return True
+        if '/interactive/' in url:
+            return True
+        if '/reference/' in url:
+            return True
+        if '/premium/' in url:
+            return True
+        return False
+
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
@ -250,7 +342,6 @@ class NYTimes(BasicNewsRecipe):
            return BeautifulSoup(_raw, markupMassage=massage)

        # Entry point
-        print "index_to_soup()"
        soup = get_the_soup( self.encoding, url_or_raw )
        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
@ -274,83 +365,110 @@ class NYTimes(BasicNewsRecipe):
        else:
            return description

-    def parse_todays_index(self):
+    def feed_title(self,div):
+        return ''.join(div.findAll(text=True, recursive=True)).strip()

-        def feed_title(div):
-            return ''.join(div.findAll(text=True, recursive=True)).strip()
-
-        articles = {}
-        key = None
-        ans = []
-        url_list = []
-
-        def handle_article(div):
-            a = div.find('a', href=True)
-            if not a:
+    def handle_article(self,div):
+        thumbnail = div.find('div','thumbnail')
+        if thumbnail:
+            thumbnail.extract()
+        a = div.find('a', href=True)
+        if not a:
+            return
+        url = re.sub(r'\?.*', '', a['href'])
+        if self.exclude_url(url):
+            return
+        url += '?pagewanted=all'
+        if self.filterDuplicates:
+            if url in self.url_list:
                return
-            url = re.sub(r'\?.*', '', a['href'])
-            if not url.startswith("http"):
-                return
-            if not url.endswith(".html"):
-                return
-            if 'podcast' in url:
-                return
-            if '/video/' in url:
-                return
-            url += '?pagewanted=all'
-            if url in url_list:
-                return
-            url_list.append(url)
-            title = self.tag_to_string(a, use_alt=True).strip()
-            description = ''
-            pubdate = strftime('%a, %d %b')
-            summary = div.find(True, attrs={'class':'summary'})
-            if summary:
-                description = self.tag_to_string(summary, use_alt=False)
-            author = ''
+        self.url_list.append(url)
+        title = self.tag_to_string(a, use_alt=True).strip()
+        description = ''
+        pubdate = strftime('%a, %d %b')
+        summary = div.find(True, attrs={'class':'summary'})
+        if summary:
+            description = self.tag_to_string(summary, use_alt=False)
+        author = ''
+        authorAttribution = div.find(True, attrs={'class':'byline'})
+        if authorAttribution:
+            author = self.tag_to_string(authorAttribution, use_alt=False)
+        else:
            authorAttribution = div.find(True, attrs={'class':'byline'})
            if authorAttribution:
                author = self.tag_to_string(authorAttribution, use_alt=False)
-            else:
-                authorAttribution = div.find(True, attrs={'class':'byline'})
-                if authorAttribution:
-                    author = self.tag_to_string(authorAttribution, use_alt=False)
-            feed = key if key is not None else 'Uncategorized'
-            if not articles.has_key(feed):
-                ans.append(feed)
-                articles[feed] = []
-            articles[feed].append(
-                            dict(title=title, url=url, date=pubdate,
-                                description=description, author=author,
-                                content=''))
+        feed = self.key if self.key is not None else 'Uncategorized'
+        if not self.articles.has_key(feed):
+            self.ans.append(feed)
+            self.articles[feed] = []
+        self.articles[feed].append(
+                        dict(title=title, url=url, date=pubdate,
+                            description=description, author=author,
+                            content=''))


+    def parse_web_edition(self):
+
+        for (sec_title,index_url) in self.web_sections:
+            if self.includeSections != []:
+                if sec_title not in self.includeSections:
+                    print "SECTION NOT INCLUDED: ",sec_title
+                    continue
+            if sec_title in self.excludeSections:
+                print "SECTION EXCLUDED: ",sec_title
+                continue
+            print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
+            soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
+            self.key = sec_title
+            # Find each article
+            for div in soup.findAll(True,
+                attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
+                if div['class'] in ['story', 'story headline'] :
+                    self.handle_article(div)
+                elif div['class'] == 'headlinesOnly multiline flush':
+                    for lidiv in div.findAll('li'):
+                        self.handle_article(lidiv)
+
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        return self.filter_ans(self.ans)
+
+
+    def parse_todays_index(self):
+
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')

-
+        skipping = False
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):

            if div['class'] in ['section-headline','sectionHeader']:
-                key = string.capwords(feed_title(div))
-                key = key.replace('Op-ed','Op-Ed')
-                key = key.replace('U.s.','U.S.')
+                self.key = string.capwords(self.feed_title(div))
+                self.key = self.key.replace('Op-ed','Op-Ed')
+                self.key = self.key.replace('U.s.','U.S.')
+                self.key = self.key.replace('N.y.','N.Y.')
+                skipping = False
+                if self.includeSections != []:
+                    if self.key not in self.includeSections:
+                        print "SECTION NOT INCLUDED: ",self.key
+                        skipping = True
+                if self.key in self.excludeSections:
+                    print "SECTION EXCLUDED: ",self.key
+                    skipping = True
+
            elif div['class'] in ['story', 'story headline'] :
-                handle_article(div)
+                if not skipping:
+                    self.handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
-                    handle_article(lidiv)
+                    if not skipping:
+                        self.handle_article(lidiv)

-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        return self.filter_ans(ans)
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        return self.filter_ans(self.ans)

    def parse_headline_index(self):

-        articles = {}
-        ans = []
-        url_list = []
-
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')

        # Fetch the content table
@ -364,15 +482,24 @@ class NYTimes(BasicNewsRecipe):
        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
            for div_sec in td_col.findAll('div',recursive=False):
                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
+
                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
                    section_name = re.sub(r'^ *$','',section_name)
+
                    if section_name == '':
                        continue
+                    if self.includeSections != []:
+                        if section_name not in self.includeSections:
+                            print "SECTION NOT INCLUDED: ",section_name
+                            continue
+                    if section_name in self.excludeSections:
+                        print "SECTION EXCLUDED: ",section_name
+                        continue
+
                    section_name=string.capwords(section_name)
-                    if section_name == 'U.s.':
-                       section_name = 'U.S.'
-                    elif section_name == 'Op-ed':
-                       section_name = 'Op-Ed'
+                    section_name = section_name.replace('Op-ed','Op-Ed')
+                    section_name = section_name.replace('U.s.','U.S.')
+                    section_name = section_name.replace('N.y.','N.Y.')
                    pubdate = strftime('%a, %d %b')

                    search_div = div_sec
@ -393,37 +520,32 @@ class NYTimes(BasicNewsRecipe):
                        if not a:
                            continue
                        url = re.sub(r'\?.*', '', a['href'])
-                        if not url.startswith("http"):
-                            continue
-                        if not url.endswith(".html"):
-                            continue
-                        if 'podcast' in url:
-                            continue
-                        if 'video' in url:
+                        if self.exclude_url(url):
                            continue
                        url += '?pagewanted=all'
-                        if url in url_list:
-                            continue
-                        url_list.append(url)
-                        self.log("URL %s" % url)
+                        if self.filterDuplicates:
+                            if url in self.url_list:
+                                continue
+                        self.url_list.append(url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
-                        if not articles.has_key(section_name):
-                            ans.append(section_name)
-                            articles[section_name] = []
-                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+                        if not self.articles.has_key(section_name):
+                            self.ans.append(section_name)
+                            self.articles[section_name] = []
+                        self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))

-
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        return self.filter_ans(ans)
+        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        return self.filter_ans(self.ans)

    def parse_index(self):
        if self.headlinesOnly:
            return self.parse_headline_index()
+        elif self.webEdition:
+            return self.parse_web_edition()
        else:
            return self.parse_todays_index()

@ -439,6 +561,21 @@ class NYTimes(BasicNewsRecipe):

    def preprocess_html(self, soup):

+        if self.webEdition & (self.oldest_article>0):
+            date_tag = soup.find(True,attrs={'class': ['dateline','date']})
+            if date_tag:
+                date_str = self.tag_to_string(date_tag,use_alt=False)
+                date_str = date_str.replace('Published:','')
+                date_items = date_str.split(',')
+                try:
+                    datestring = date_items[0]+' '+date_items[1]
+                    article_date = self.decode_us_date(datestring)
+                except:
+                    article_date = date.today()
+                if article_date < self.earliest_date:
+                    self.log("Skipping article dated %s" % date_str)
+                    return None
+
        kicker_tag = soup.find(attrs={'class':'kicker'})
        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
@ -463,7 +600,6 @@ class NYTimes(BasicNewsRecipe):
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
                    # Move firstImg before article body
-                    #article_body = soup.find(True, {'id':'articleBody'})
                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
--- a/resources/recipes/observa_digital.recipe
+++ b/resources/recipes/observa_digital.recipe
@ -0,0 +1,63 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+observa.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Noticias(BasicNewsRecipe):
+    title                 = 'Observa Digital'
+    __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+    description           = 'Noticias desde Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['contenido'])]
+    remove_tags = [
+             dict(name='div', attrs={'id':'contenedorVinculadas'}),
+             dict(name='p', attrs={'id':'nota_firma'}),
+             dict(name=['object','link'])
+                  ]
+
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
+           (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
+           (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
+           (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
+        ]
+
+    def get_cover_url(self):
+		cover_url = None
+		index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia'
+		soup = self.index_to_soup(index)
+		link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'})
+		if link_item:
+			cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip()
+
+		print cover_url
+
+		return cover_url
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }  '
+    extra_css             = """ 
+                               body{font-family: Arial,Helvetica,sans-serif } 
+                               img{margin-bottom: 0.4em; display:block}
+                               #autor{font-weight: bold} 
+                               #fecha,#epigrafe{font-size: 0.9em; margin: 5px} 
+                               #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
+                               .fgprincipal{font-size: large; font-weight: bold}
+                            """

    conversion_options = {
                          'comment'   : description
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
+    remove_tags = [
+                     dict(name=['meta','link'])
+                    ,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
+                  ]
+    remove_attributes=['lang']


    feeds = [
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
+        for item in soup.findAll('span', attrs={'id':'seccion'}):
+            it = item.a
+            it.name='span'
+            del it['href']
+            del it['title']
+        for item in soup.findAll('p'):
+            it = item.find('h3')            
+            if it:
+               it.name='span'
        return soup
--- a/resources/recipes/pc_lab.recipe
+++ b/resources/recipes/pc_lab.recipe
@ -0,0 +1,70 @@
+#!/usr/bin/env  python
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class PCLab(BasicNewsRecipe):
+    cover_url             = 'http://pclab.pl/img/logo.png'
+    title                 = u"PC Lab"
+    __author__            = 'ravcio - rlelusz[at]gmail.com'
+    description           = u"Articles from PC Lab website"
+    language              = 'pl'
+    oldest_article        = 30.0
+    max_articles_per_feed = 100
+    recursions            = 0
+    encoding              = 'iso-8859-2'
+    no_stylesheets        = True
+    remove_javascript     = True
+    use_embedded_content  = False
+
+    keep_only_tags = [
+            dict(name='div', attrs={'class':['substance']})
+                     ]
+
+    remove_tags = [
+            dict(name='div', attrs={'class':['chapters']})
+            ,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
+                  ]
+
+    remove_tags_after = [
+            dict(name='div', attrs={'class':['navigation']})
+                ]
+
+    #links to RSS feeds
+    feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
+
+    #load second and subsequent page content
+    # in: soup - full page with 'next' button
+    # out: appendtag - tag to which new page is to be added
+    def append_page(self, soup, appendtag):
+        # find the 'Next' button
+        pager = soup.find('div', attrs={'class':'next'})
+
+        if pager:
+            #search for 'a' element with link to next page (exit if not found)
+            a = pager.find('a')
+            if a:
+                nexturl = a['href']
+
+                soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
+
+                pagetext_substance = soup2.find('div', attrs={'class':'substance'})
+                pagetext = pagetext_substance.find('div', attrs={'class':'data'})
+                pagetext.extract()
+
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+                pos = len(appendtag.contents)
+
+                self.append_page(soup2, appendtag)
+
+
+    def preprocess_html(self, soup):
+
+        # soup.body contains no title and no navigator, they are in soup
+        self.append_page(soup, soup.body)
+
+        # finally remove some tags
+        tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
+        [tag.extract() for tag in tags]
+
+        return soup
--- a/resources/recipes/politika.recipe
+++ b/resources/recipes/politika.recipe
@ -1,13 +1,10 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 politika.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Politika(BasicNewsRecipe):
    title                 = 'Politika Online'
@ -19,53 +16,51 @@ class Politika(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    remove_javascript     = True
    encoding              = 'utf8'
-    language = 'sr'
-
-    lang                 = 'sr-Latn-RS'
-    direction            = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    delay                 = 1
+    language              = 'sr'
+    publication_type      = 'newspaper'    
+    masthead_url          = 'http://static.politika.co.rs/images_new/politika.gif'    
+    extra_css             = """ 
+                               @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} 
+                               @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} 
+                               body{font-family: Arial,Helvetica,sans1,sans-serif}
+                               h1{font-family: "Times New Roman",Times,serif1,serif}
+                               .articledescription{font-family: sans1, sans-serif}
+                            """

    conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
                        }


    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

-    keep_only_tags     = [dict(name='div', attrs={'class':'content_center_border'})]
-
-    remove_tags = [
-                    dict(name='div', attrs={'class':['send_print','txt-komentar']})
-                   ,dict(name=['object','link','a'])
-                   ,dict(name='h1', attrs={'class':'box_header-tags'})
-                  ]
-
-
+    keep_only_tags     = [dict(name='div', attrs={'class':'big_article_home item_details'})]
+    remove_tags_after  = dict(attrs={'class':'online_date'})
+    remove_tags        = [dict(name=['link','meta','iframe','embed','object'])]
+    
    feeds          = [
-                         (u'Politika'             , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'             )
-                        ,(u'Svet'                 , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'                 )
-                        ,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
-                        ,(u'Pogledi'              , u'http://www.politika.rs/pogledi/index.lt.xml'                        )
-                        ,(u'Pogledi sa strane'    , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml'    )
-                        ,(u'Tema dana'            , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'            )
-                        ,(u'Kultura'              , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'              )
-                        ,(u'Zivot i stil'         , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'         )
+                         (u'Politika'         , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'         )
+                        ,(u'Svet'             , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'             )
+                        ,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
+                        ,(u'Pogledi'          , u'http://www.politika.rs/pogledi/index.lt.xml'                    )
+                        ,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
+                        ,(u'Tema dana'        , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'        )
+                        ,(u'Kultura'          , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'          )
+                        ,(u'Spektar'          , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'     )
                     ]

    def preprocess_html(self, soup):
-        soup.html['lang'] = self.lang
-        soup.html['dir' ] = self.direction
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
-        ftag = soup.find('div',attrs={'class':'content_center_border'})
-        if ftag.has_key('align'):
-           del ftag['align']
-        return self.adeify_images(soup)
+        for item in soup.findAll('a', attrs={'class':'category'}):
+            item.name='span'
+            if item.has_key('href'):            
+               del item['href']
+            if item.has_key('title'):            
+               del item['title']
+        return soup
--- a/resources/recipes/polityka.recipe
+++ b/resources/recipes/polityka.recipe
@ -0,0 +1,68 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Polityka(BasicNewsRecipe):
+
+	title = u'Polityka'
+	__author__ = 'Mateusz Kielar'
+	description = 'Weekly magazine. Last archive issue'
+	encoding = 'utf-8'
+	no_stylesheets = True
+	language = 'en'
+	remove_javascript = True
+
+	remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
+	remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
+
+
+	extra_css = '''
+					h1 {font-size: x-large; font-weight: bold}
+					'''
+
+	def parse_index(self):
+		soup = self.index_to_soup('http://archiwum.polityka.pl/')
+		box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
+		feeds = []
+		last = 0
+		self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
+		last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
+
+		while True:
+			index = self.index_to_soup(last_edition)
+
+
+			box_list = index.findAll('div', attrs={'class' : 'box_list'})
+			if len(box_list) == 0:
+				break
+
+			articles = {}
+			for box in box_list:
+				for div in box.findAll('div', attrs={'class': 'list_tresc'}):
+					article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
+					section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
+					print section
+					if not articles.has_key(section):
+						articles[section] = []
+					articles[section].append( {
+						'title' : self.tag_to_string(div.a),
+						'url'   : 'http://archiwum.polityka.pl' + div.a['href'],
+						'date'  : '',
+						'description' : ''
+						})
+
+			for section in articles:
+				feeds.append((section, articles[section]))
+
+			last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
+			last = last + 1
+
+		return feeds
+
--- a/resources/recipes/rds.recipe
+++ b/resources/recipes/rds.recipe
@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1290013720(BasicNewsRecipe):
+    title          = u'RDS'
+    __author__ = 'Nexus'
+    language = 'en_CA'
+    description = 'Hockey News'
+    oldest_article = 7
+    max_articles_per_feed = 25
+    no_stylesheets = True
+    remove_tags = [dict(name='div', attrs={'id':'rdsWrap'}),
+		            dict(name='table', attrs={'id':'aVoir'}),
+		            dict(name='div', attrs={'id':'imageChronique'})]
+    keep_only_tags = [dict(name='div', attrs={'id':['enteteChronique']}),
+		                dict(name='div', attrs={'id':['contenuChronique']})]
+
+
+    feeds          = [(u'RDS', u'http://www.rds.ca/hockey/fildepresse_rds.xml')]
--- a/resources/recipes/reuters_ja.recipe
+++ b/resources/recipes/reuters_ja.recipe
@ -0,0 +1,37 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class ReutersJa(BasicNewsRecipe):
+
+    title = 'Reuters(Japan)'
+    description = 'Global news in Japanese'
+    __author__ = 'Hiroshi Miura'
+    use_embedded_content   = False
+    language = 'ja'
+    max_articles_per_feed = 10
+    remove_javascript = True
+
+    feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/JPTopNews?format=xml'),
+                  ('World News', 'http://feeds.reuters.com/reuters/JPWorldNews?format=xml'),
+                  ('Business News', 'http://feeds.reuters.com/reuters/JPBusinessNews?format=xml'),
+                  ('Technology News', 'http://feeds.reuters.com/reuters/JPTechnologyNews?format=xml'),
+                  ('Oddly Enough News', 'http://feeds.reuters.com/reuters/JPOddlyEnoughNews?format=xml')
+         ]
+
+    remove_tags_before = {'class':"article primaryContent"}
+    remove_tags = [ dict(id="banner"),
+                    dict(id="autilities"),
+                    dict(id="textSizer"),
+                    dict(id="shareFooter"),
+                    dict(id="relatedNews"),
+                    dict(id="editorsChoice"),
+                    dict(id="ecArticles"),
+                    {'class':"secondaryContent"},
+                    {'class':"module"},
+                     ]
+    remove_tags_after = {'class':"assetBuddy"}
+
+    def print_version(self, url):
+        m = re.search('(.*idJPJAPAN-[0-9]+)', url)
+        return m.group(0)+'?sp=true'
+
--- a/resources/recipes/revista_bla.recipe
+++ b/resources/recipes/revista_bla.recipe
@ -0,0 +1,54 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+http://www.revistabla.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Noticias(BasicNewsRecipe):
+    title                 = 'Revista Bla'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Moda | Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 20
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['body_container'])]
+    remove_tags = [
+				 dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
+				 dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
+				 dict(name='p', attrs={'class':'FacebookLikeButton'}),
+				 dict(name=['object','link']) ]
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Articulos', u'http://www.revistabla.com/feed/')
+        ]
+
+    def get_cover_url(self):
+	cover_url = None
+	index = 'http://www.revistabla.com'
+	soup = self.index_to_soup(index)
+	link_item = soup.find('div',attrs={'class':'header_right'})
+	if link_item:
+		cover_url = link_item.img['src']
+	return cover_url
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/revista_muy.recipe
+++ b/resources/recipes/revista_muy.recipe
@ -108,3 +108,10 @@ class RevistaMuyInteresante(BasicNewsRecipe):
                   feeds.append((title, articles))
            return feeds

+    def get_cover_url(self):
+        index = 'http://www.muyinteresante.es/revista'
+        soup = self.index_to_soup(index)
+        link_item = soup.find('img',attrs={'class':'img_portada'})
+        if link_item:
+            cover_url = "http://www.muyinteresante.es"+link_item['src']
+        return cover_url
--- a/resources/recipes/rollingstone.recipe
+++ b/resources/recipes/rollingstone.recipe
@ -0,0 +1,69 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+rollingstone.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class RollingStone(BasicNewsRecipe):
+    title                 = 'Rolling Stone Magazine - free content'
+    __author__            = 'Darko Miletic'
+    description           = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.'
+    publisher             = 'Werner Media inc.'
+    category              = 'news, music, USA, world'
+    oldest_article        = 15
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'magazine'
+    masthead_url          = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
+    extra_css             = """
+                               body{font-family: Georgia,Times,serif }
+                               img{margin-bottom: 0.4em; display:block}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    preprocess_regexps = [
+                          (re.compile(r'xml:lang="en">.*?<head>', re.DOTALL|re.IGNORECASE),lambda match: 'xml:lang="en">\n<head>\n')
+                         ,(re.compile(r'</title>.*?</head>'     , re.DOTALL|re.IGNORECASE),lambda match: '</title>\n</head>\n'     )
+                         ]
+
+    keep_only_tags=[
+                     dict(attrs={'class':['headerImgHolder','headerContent']})
+                    ,dict(name='div',attrs={'id':['teaser','storyTextContainer']})
+                    ,dict(name='div',attrs={'class':'blogDetailModule clearfix'})
+                   ]
+
+    remove_tags = [
+                      dict(name=['meta','iframe','object','embed'])
+                     ,dict(attrs={'id':'mpStoryHeader'})
+                     ,dict(attrs={'class':'relatedTopics'})
+                  ]
+    remove_attributes=['lang','onclick','width','height','name']
+    remove_tags_before=dict(attrs={'class':'bloggerInfo'})
+    remove_tags_after=dict(attrs={'class':'relatedTopics'})
+
+
+    feeds = [
+              (u'All News'      , u'http://www.rollingstone.com/siteServices/rss/allNews'      )
+             ,(u'All Blogs'     , u'http://www.rollingstone.com/siteServices/rss/allBlogs'     )
+             ,(u'Movie Reviews' , u'http://www.rollingstone.com/siteServices/rss/movieReviews' )
+             ,(u'Album Reviews' , u'http://www.rollingstone.com/siteServices/rss/albumReviews' )
+             ,(u'Song Reviews'  , u'http://www.rollingstone.com/siteServices/rss/songReviews'  )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/spiegelde.recipe
+++ b/resources/recipes/spiegelde.recipe
@ -6,6 +6,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 spiegel.de
 '''

+from time import strftime
 from calibre.web.feeds.news import BasicNewsRecipe

 class Spiegel_ger(BasicNewsRecipe):
@ -44,3 +45,6 @@ class Spiegel_ger(BasicNewsRecipe):
        rmain, rsep, rrest = main.rpartition(',')
        purl = rmain + ',druck-' + rrest + ',' + rest
        return purl
+
+    def get_cover_url(self):
+        return 'http://wissen.spiegel.de/wissen/titel/SP/' + strftime("%Y/%W/%j/titel.jpg")
--- a/resources/recipes/tagesan.recipe
+++ b/resources/recipes/tagesan.recipe
@ -7,7 +7,7 @@ class AdvancedUserRecipe1284927619(BasicNewsRecipe):
    __author__ = 'noxxx'
    max_articles_per_feed = 100
    description = 'tagesanzeiger.ch: Nichts verpassen'
-    category = 'News, Politik, Nachrichten, Schweiz, Zürich'
+    category = 'News, Politik, Nachrichten, Schweiz, Zuerich'
    language = 'de'

    conversion_options = {
--- a/resources/recipes/telepolis.recipe
+++ b/resources/recipes/telepolis.recipe
@ -3,12 +3,12 @@
 __license__   = 'GPL v3'
 __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'

-''' http://www.derstandard.at - Austrian Newspaper '''
+
 import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class TelepolisNews(BasicNewsRecipe):
-    title          = u'Telepolis (News)'
+    title          = u'Telepolis (News+Artikel)'
    __author__ = 'Gerhard Aigner'
    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
    description = 'News from telepolis'
@ -20,16 +20,16 @@ class TelepolisNews(BasicNewsRecipe):
    encoding = "utf-8"
    language = 'de_AT'

-    use_embedded_content = False
+    use_embedded_content =False
    remove_empty_feeds = True

    preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]

-    keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
-    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
+    keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
+    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]

-    feeds          = [(u'News', u'http://www.heise.de/tp/news.rdf')]
+    feeds          = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]

    html2lrf_options = [
        '--comment'  , description
@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe):

    def get_article_url(self, article):
        '''if the linked article is of kind artikel don't take it'''
-        if (article.link.count('artikel') > 0) :
+        if (article.link.count('artikel') > 1) :
            return None
        return article.link

@ -49,3 +49,5 @@ class TelepolisNews(BasicNewsRecipe):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
+
+
--- a/Show More
+++ b/Show More